diff options
author | Aart Bik <ajcbik@google.com> | 2017-08-31 09:08:13 -0700 |
---|---|---|
committer | Aart Bik <ajcbik@google.com> | 2017-09-01 10:32:50 -0700 |
commit | cfa59b49cde265dc5329a7e6956445f9f7a75f15 (patch) | |
tree | eed953f62e796f7e64252520a40d7e77d1f117af /compiler/optimizing/code_generator_vector_arm64.cc | |
parent | 82a63734d3067ea0c96f8ba15bc40caaf798c625 (diff) |
Basic SIMD reduction support.
Rationale:
Enables vectorization of x += .... for very basic (simple, same-type)
constructs. Paves the way for more complex (narrower and/or mixed-type)
constructs, which will be handled by the next CL.
This is a revert^2 of I7880c135aee3ed0a39da9ae5b468cbf80e613766
and thus a revert of I1c1c87b6323e01442e8fbd94869ddc9e760ea1fc
PS1-2 shows what needed to change, with regression tests
Test: test-art-host test-art-target
Bug: 64091002, 65212948
Change-Id: I2454778dd0ef1da915c178c7274e1cf33e271d0f
Diffstat (limited to 'compiler/optimizing/code_generator_vector_arm64.cc')
-rw-r--r-- | compiler/optimizing/code_generator_vector_arm64.cc | 172 |
1 files changed, 158 insertions, 14 deletions
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 9095ecdf16..18a55c8b09 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -27,12 +27,13 @@ namespace arm64 { using helpers::ARM64EncodableConstantOrRegister; using helpers::Arm64CanEncodeConstantAsImmediate; using helpers::DRegisterFrom; -using helpers::VRegisterFrom; using helpers::HeapOperand; using helpers::InputRegisterAt; using helpers::Int64ConstantFrom; -using helpers::XRegisterFrom; +using helpers::OutputRegister; +using helpers::VRegisterFrom; using helpers::WRegisterFrom; +using helpers::XRegisterFrom; #define __ GetVIXLAssembler()-> @@ -127,20 +128,51 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* } } -void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void LocationsBuilderARM64::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorARM64::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister src = VRegisterFrom(locations->InAt(0)); + switch (instruction->GetPackedType()) { + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Umov(OutputRegister(instruction), src.V4S(), 0); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Umov(OutputRegister(instruction), src.V2D(), 0); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 4u); + DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector unary operations. @@ -169,6 +201,46 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in } } +void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister src = VRegisterFrom(locations->InAt(0)); + VRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ Addv(dst.S(), src.V4S()); + break; + case HVecReduce::kMin: + __ Sminv(dst.S(), src.V4S()); + break; + case HVecReduce::kMax: + __ Smaxv(dst.S(), src.V4S()); + break; + } + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ Addp(dst.D(), src.V2D()); + break; + default: + LOG(FATAL) << "Unsupported SIMD min/max"; + UNREACHABLE(); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); } @@ -263,6 +335,7 @@ void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) { break; default: LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); } } @@ -805,6 +878,77 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { } } +void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); + + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister dst = VRegisterFrom(locations->Out()); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + // Zero out all other elements first. + __ Movi(dst.V16B(), 0); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + return; + } + + // Set required elements. + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0)); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0)); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0)); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0)); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); switch (instr->GetPackedType()) { |