summaryrefslogtreecommitdiff
path: root/compiler/optimizing/code_generator_vector_x86_64.cc
diff options
context:
space:
mode:
authorAart Bik <ajcbik@google.com>2017-08-31 09:08:13 -0700
committerAart Bik <ajcbik@google.com>2017-09-01 10:32:50 -0700
commitcfa59b49cde265dc5329a7e6956445f9f7a75f15 (patch)
treeeed953f62e796f7e64252520a40d7e77d1f117af /compiler/optimizing/code_generator_vector_x86_64.cc
parent82a63734d3067ea0c96f8ba15bc40caaf798c625 (diff)
Basic SIMD reduction support.
Rationale: Enables vectorization of x += .... for very basic (simple, same-type) constructs. Paves the way for more complex (narrower and/or mixed-type) constructs, which will be handled by the next CL. This is a revert^2 of I7880c135aee3ed0a39da9ae5b468cbf80e613766 and thus a revert of I1c1c87b6323e01442e8fbd94869ddc9e760ea1fc PS1-2 shows what needed to change, with regression tests Test: test-art-host test-art-target Bug: 64091002, 65212948 Change-Id: I2454778dd0ef1da915c178c7274e1cf33e271d0f
Diffstat (limited to 'compiler/optimizing/code_generator_vector_x86_64.cc')
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc252
1 files changed, 222 insertions, 30 deletions
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index c7ee81c60d..7051ba041f 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -27,6 +27,8 @@ namespace x86_64 {
void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
@@ -34,13 +36,16 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimLong:
- locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(is_zero ? Location::RequiresFpuRegister()
+ : Location::SameAsFirstInput());
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -50,42 +55,49 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru
void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = instruction->GetLocations();
- XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ __ xorps(dst, dst);
+ return;
+ }
+
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
DCHECK_EQ(16u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
- __ punpcklbw(reg, reg);
- __ punpcklwd(reg, reg);
- __ pshufd(reg, reg, Immediate(0));
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
+ __ punpcklbw(dst, dst);
+ __ punpcklwd(dst, dst);
+ __ pshufd(dst, dst, Immediate(0));
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
DCHECK_EQ(8u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
- __ punpcklwd(reg, reg);
- __ pshufd(reg, reg, Immediate(0));
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
+ __ punpcklwd(dst, dst);
+ __ pshufd(dst, dst, Immediate(0));
break;
case Primitive::kPrimInt:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
- __ pshufd(reg, reg, Immediate(0));
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
+ __ pshufd(dst, dst, Immediate(0));
break;
case Primitive::kPrimLong:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
- __ punpcklqdq(reg, reg);
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
+ __ punpcklqdq(dst, dst);
break;
case Primitive::kPrimFloat:
DCHECK(locations->InAt(0).Equals(locations->Out()));
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ shufps(reg, reg, Immediate(0));
+ __ shufps(dst, dst, Immediate(0));
break;
case Primitive::kPrimDouble:
DCHECK(locations->InAt(0).Equals(locations->Out()));
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ shufpd(reg, reg, Immediate(0));
+ __ shufpd(dst, dst, Immediate(0));
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -93,20 +105,57 @@ void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar
}
}
-void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
-void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: // TODO: up to here, and?
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movd(locations->Out().AsRegister<CpuRegister>(), src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movd(locations->Out().AsRegister<CpuRegister>(), src); // is 64-bit
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 4u);
+ DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector unary operations.
@@ -130,6 +179,73 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
}
}
+void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ // Long reduction or min/max require a temporary.
+ if (instruction->GetPackedType() == Primitive::kPrimLong ||
+ instruction->GetKind() == HVecReduce::kMin ||
+ instruction->GetKind() == HVecReduce::kMax) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ movaps(dst, src);
+ __ phaddd(dst, dst);
+ __ phaddd(dst, dst);
+ break;
+ case HVecReduce::kMin: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ psrldq(tmp, Immediate(8));
+ __ pminsd(dst, tmp);
+ __ psrldq(tmp, Immediate(4));
+ __ pminsd(dst, tmp);
+ break;
+ }
+ case HVecReduce::kMax: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ psrldq(tmp, Immediate(8));
+ __ pmaxsd(dst, tmp);
+ __ psrldq(tmp, Immediate(4));
+ __ pmaxsd(dst, tmp);
+ break;
+ }
+ }
+ break;
+ case Primitive::kPrimLong: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ punpckhqdq(tmp, tmp);
+ __ paddq(dst, tmp);
+ break;
+ case HVecReduce::kMin:
+ case HVecReduce::kMax:
+ LOG(FATAL) << "Unsupported SIMD type";
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -814,6 +930,81 @@ void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
}
}
+void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
+
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ // Zero out all other elements first.
+ __ xorps(dst, dst);
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ return;
+ }
+
+ // Set required elements.
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: // TODO: up to here, and?
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LOG(FATAL) << "No SIMD for " << instr->GetId();
}
@@ -861,6 +1052,7 @@ static Address VecAddress(LocationSummary* locations, size_t size, bool is_strin
case 8: scale = TIMES_8; break;
default: break;
}
+ // Incorporate the string or array offset in the address computation.
uint32_t offset = is_string_char_at
? mirror::String::ValueOffset().Uint32Value()
: mirror::Array::DataOffset(size).Uint32Value();
@@ -895,7 +1087,7 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
__ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
__ j(kNotZero, &not_compressed);
// Zero extend 8 compressed bytes into 8 chars.
- __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
+ __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
__ pxor(tmp, tmp);
__ punpcklbw(reg, tmp);
__ jmp(&done);