diff options
author | Nicolas Geoffray <ngeoffray@google.com> | 2017-09-02 12:54:16 +0000 |
---|---|---|
committer | Nicolas Geoffray <ngeoffray@google.com> | 2017-09-02 12:54:16 +0000 |
commit | 982334cef17d47ef2477d88a97203a9587a4b86f (patch) | |
tree | 7e65d03a4533f21286cf68e66696bd0a7a54ef54 /compiler/optimizing/loop_optimization.cc | |
parent | cfa59b49cde265dc5329a7e6956445f9f7a75f15 (diff) |
Revert "Basic SIMD reduction support."
Fails 530-checker-lse on arm64.
Bug: 64091002, 65212948
This reverts commit cfa59b49cde265dc5329a7e6956445f9f7a75f15.
Change-Id: Icb5d6c805516db0a1d911c3ede9a246ccef89a22
Diffstat (limited to 'compiler/optimizing/loop_optimization.cc')
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 216 |
1 files changed, 41 insertions, 175 deletions
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index f8f4eb2ae3..027ba7741c 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -285,19 +285,6 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { return false; } -// Translates operation to reduction kind. -static HVecReduce::ReductionKind GetReductionKind(HInstruction* reduction) { - if (reduction->IsVecAdd() || reduction->IsVecSub()) { - return HVecReduce::kSum; - } else if (reduction->IsVecMin()) { - return HVecReduce::kMin; - } else if (reduction->IsVecMax()) { - return HVecReduce::kMax; - } - LOG(FATAL) << "Unsupported SIMD reduction"; - UNREACHABLE(); -} - // Test vector restrictions. static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) { return (restrictions & tested) != 0; @@ -347,8 +334,7 @@ HLoopOptimization::HLoopOptimization(HGraph* graph, vector_peeling_candidate_(nullptr), vector_runtime_test_a_(nullptr), vector_runtime_test_b_(nullptr), - vector_map_(nullptr), - vector_permanent_map_(nullptr) { + vector_map_(nullptr) { } void HLoopOptimization::Run() { @@ -402,14 +388,11 @@ void HLoopOptimization::LocalRun() { ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); ArenaSafeMap<HInstruction*, HInstruction*> map( std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); - ArenaSafeMap<HInstruction*, HInstruction*> perm( - std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); // Attach. iset_ = &iset; reductions_ = &reds; vector_refs_ = &refs; vector_map_ = ↦ - vector_permanent_map_ = &perm; // Traverse. TraverseLoopsInnerToOuter(top_loop_); // Detach. @@ -417,7 +400,6 @@ void HLoopOptimization::LocalRun() { reductions_ = nullptr; vector_refs_ = nullptr; vector_map_ = nullptr; - vector_permanent_map_ = nullptr; } } @@ -621,6 +603,7 @@ bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { // Vectorize loop, if possible and valid. if (kEnableVectorization && TrySetSimpleLoopHeader(header, &main_phi) && + reductions_->empty() && // TODO: possible with some effort ShouldVectorize(node, body, trip_count) && TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) { Vectorize(node, body, exit, trip_count); @@ -819,13 +802,6 @@ void HLoopOptimization::Vectorize(LoopNode* node, /*unroll*/ 1); } - // Link reductions to their final uses. - for (auto i = reductions_->begin(); i != reductions_->end(); ++i) { - if (i->first->IsPhi()) { - i->first->ReplaceWith(ReduceAndExtractIfNeeded(i->second)); - } - } - // Remove the original loop by disconnecting the body block // and removing all instructions from the header. block->DisconnectAndDelete(); @@ -865,10 +841,21 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node, vector_header_->AddInstruction(cond); vector_header_->AddInstruction(new (global_allocator_) HIf(cond)); vector_index_ = phi; - vector_permanent_map_->clear(); // preserved over unrolling for (uint32_t u = 0; u < unroll; u++) { + // Clear map, leaving loop invariants setup during unrolling. + if (u == 0) { + vector_map_->clear(); + } else { + for (auto i = vector_map_->begin(); i != vector_map_->end(); ) { + if (i->second->IsVecReplicateScalar()) { + DCHECK(node->loop_info->IsDefinedOutOfTheLoop(i->first)); + ++i; + } else { + i = vector_map_->erase(i); + } + } + } // Generate instruction map. - vector_map_->clear(); for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true); DCHECK(vectorized_def); @@ -885,17 +872,9 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node, } } } - // Generate the induction. vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step); Insert(vector_body_, vector_index_); } - // Finalize phi inputs for the reductions (if any). - for (auto i = reductions_->begin(); i != reductions_->end(); ++i) { - if (!i->first->IsPhi()) { - DCHECK(i->second->IsPhi()); - GenerateVecReductionPhiInputs(i->second->AsPhi(), i->first); - } - } // Finalize phi inputs for the loop index. phi->AddInput(lo); phi->AddInput(vector_index_); @@ -931,23 +910,6 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, } return false; } - // Accept a left-hand-side reduction for - // (1) supported vector type, - // (2) vectorizable right-hand-side value. - auto redit = reductions_->find(instruction); - if (redit != reductions_->end()) { - Primitive::Type type = instruction->GetType(); - if (TrySetVectorType(type, &restrictions) && - VectorizeUse(node, instruction, generate_code, type, restrictions)) { - if (generate_code) { - HInstruction* new_red = vector_map_->Get(instruction); - vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second)); - vector_permanent_map_->Overwrite(redit->second, new_red); - } - return true; - } - return false; - } // Branch back okay. if (instruction->IsGoto()) { return true; @@ -1003,21 +965,6 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } return true; } - } else if (instruction->IsPhi()) { - // Accept particular phi operations. - if (reductions_->find(instruction) != reductions_->end()) { - // Deal with vector restrictions. - if (HasVectorRestrictions(restrictions, kNoReduction)) { - return false; - } - // Accept a reduction. - if (generate_code) { - GenerateVecReductionPhi(instruction->AsPhi()); - } - return true; - } - // TODO: accept right-hand-side induction? - return false; } else if (instruction->IsTypeConversion()) { // Accept particular type conversions. HTypeConversion* conversion = instruction->AsTypeConversion(); @@ -1208,14 +1155,14 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv; return TrySetVectorLength(8); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; + *restrictions |= kNoDiv | kNoStringCharAt; return TrySetVectorLength(4); case Primitive::kPrimInt: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv; return TrySetVectorLength(2); default: break; @@ -1227,11 +1174,11 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv; return TrySetVectorLength(8); case Primitive::kPrimInt: *restrictions |= kNoDiv; @@ -1240,10 +1187,8 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric *restrictions |= kNoDiv | kNoMul | kNoMinMax; return TrySetVectorLength(2); case Primitive::kPrimFloat: - *restrictions |= kNoReduction; return TrySetVectorLength(4); case Primitive::kPrimDouble: - *restrictions |= kNoReduction; return TrySetVectorLength(2); default: return false; @@ -1255,12 +1200,11 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= - kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction; + *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction; + *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd; return TrySetVectorLength(8); case Primitive::kPrimInt: *restrictions |= kNoDiv; @@ -1269,10 +1213,10 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax; return TrySetVectorLength(2); case Primitive::kPrimFloat: - *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0 + *restrictions |= kNoMinMax; // -0.0 vs +0.0 return TrySetVectorLength(4); case Primitive::kPrimDouble: - *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0 + *restrictions |= kNoMinMax; // -0.0 vs +0.0 return TrySetVectorLength(2); default: break; @@ -1284,23 +1228,23 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; + *restrictions |= kNoDiv | kNoStringCharAt; return TrySetVectorLength(8); case Primitive::kPrimInt: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv; return TrySetVectorLength(4); case Primitive::kPrimLong: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv; return TrySetVectorLength(2); case Primitive::kPrimFloat: - *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) + *restrictions |= kNoMinMax; // min/max(x, NaN) return TrySetVectorLength(4); case Primitive::kPrimDouble: - *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) + *restrictions |= kNoMinMax; // min/max(x, NaN) return TrySetVectorLength(2); default: break; @@ -1312,23 +1256,23 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; + *restrictions |= kNoDiv | kNoStringCharAt; return TrySetVectorLength(8); case Primitive::kPrimInt: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv; return TrySetVectorLength(4); case Primitive::kPrimLong: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv; return TrySetVectorLength(2); case Primitive::kPrimFloat: - *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) + *restrictions |= kNoMinMax; // min/max(x, NaN) return TrySetVectorLength(4); case Primitive::kPrimDouble: - *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) + *restrictions |= kNoMinMax; // min/max(x, NaN) return TrySetVectorLength(2); default: break; @@ -1361,16 +1305,9 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, Primitive::Type type) return; } // In vector code, explicit scalar expansion is needed. - HInstruction* vector = nullptr; - auto it = vector_permanent_map_->find(org); - if (it != vector_permanent_map_->end()) { - vector = it->second; // reuse during unrolling - } else { - vector = new (global_allocator_) HVecReplicateScalar( - global_allocator_, org, type, vector_length_); - vector_permanent_map_->Put(org, Insert(vector_preheader_, vector)); - } - vector_map_->Put(org, vector); + HInstruction* vector = new (global_allocator_) HVecReplicateScalar( + global_allocator_, org, type, vector_length_); + vector_map_->Put(org, Insert(vector_preheader_, vector)); } } @@ -1425,78 +1362,6 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org, vector_map_->Put(org, vector); } -void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) { - DCHECK(reductions_->find(phi) != reductions_->end()); - DCHECK(reductions_->Get(phi->InputAt(1)) == phi); - HInstruction* vector = nullptr; - if (vector_mode_ == kSequential) { - HPhi* new_phi = new (global_allocator_) HPhi( - global_allocator_, kNoRegNumber, 0, phi->GetType()); - vector_header_->AddPhi(new_phi); - vector = new_phi; - } else { - // Link vector reduction back to prior unrolled update, or a first phi. - auto it = vector_permanent_map_->find(phi); - if (it != vector_permanent_map_->end()) { - vector = it->second; - } else { - HPhi* new_phi = new (global_allocator_) HPhi( - global_allocator_, kNoRegNumber, 0, HVecOperation::kSIMDType); - vector_header_->AddPhi(new_phi); - vector = new_phi; - } - } - vector_map_->Put(phi, vector); -} - -void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) { - HInstruction* new_phi = vector_map_->Get(phi); - HInstruction* new_init = reductions_->Get(phi); - HInstruction* new_red = vector_map_->Get(reduction); - // Link unrolled vector loop back to new phi. - for (; !new_phi->IsPhi(); new_phi = vector_permanent_map_->Get(new_phi)) { - DCHECK(new_phi->IsVecOperation()); - } - // Prepare the new initialization. - if (vector_mode_ == kVector) { - // Generate a [initial, 0, .., 0] vector. - new_init = Insert( - vector_preheader_, - new (global_allocator_) HVecSetScalars( - global_allocator_, &new_init, phi->GetType(), vector_length_, 1)); - } else { - new_init = ReduceAndExtractIfNeeded(new_init); - } - // Set the phi inputs. - DCHECK(new_phi->IsPhi()); - new_phi->AsPhi()->AddInput(new_init); - new_phi->AsPhi()->AddInput(new_red); - // New feed value for next phi (safe mutation in iteration). - reductions_->find(phi)->second = new_phi; -} - -HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruction) { - if (instruction->IsPhi()) { - HInstruction* input = instruction->InputAt(1); - if (input->IsVecOperation()) { - Primitive::Type type = input->AsVecOperation()->GetPackedType(); - HBasicBlock* exit = instruction->GetBlock()->GetSuccessors()[0]; - // Generate a vector reduction and scalar extract - // x = REDUCE( [x_1, .., x_n] ) - // y = x_1 - // along the exit of the defining loop. - HVecReduce::ReductionKind kind = GetReductionKind(input); - HInstruction* reduce = new (global_allocator_) HVecReduce( - global_allocator_, instruction, type, vector_length_, kind); - exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction()); - instruction = new (global_allocator_) HVecExtractScalar( - global_allocator_, reduce, type, vector_length_, 0); - exit->InsertInstructionAfter(instruction, reduce); - } - } - return instruction; -} - #define GENERATE_VEC(x, y) \ if (vector_mode_ == kVector) { \ vector = (x); \ @@ -1677,9 +1542,10 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1 // (note whether the sign bit in wider precision is shifted in has no effect // on the narrow precision computed by the idiom). + int64_t distance = 0; if ((instruction->IsShr() || instruction->IsUShr()) && - IsInt64Value(instruction->InputAt(1), 1)) { + IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) { // Test for (a + b + c) >> 1 for optional constant c. HInstruction* a = nullptr; HInstruction* b = nullptr; |