diff options
Diffstat (limited to 'compiler/optimizing/loop_optimization.cc')
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 134 |
1 files changed, 112 insertions, 22 deletions
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 7d66155b39..12b180d5ff 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -351,7 +351,10 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { // Translates vector operation to reduction kind. static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) { - if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) { + if (reduction->IsVecAdd() || + reduction->IsVecSub() || + reduction->IsVecSADAccumulate() || + reduction->IsVecDotProd()) { return HVecReduce::kSum; } LOG(FATAL) << "Unsupported SIMD reduction " << reduction->GetId(); @@ -431,6 +434,23 @@ static void PeelByCount(HLoopInformation* loop_info, int count) { } } +// Returns the narrower type out of instructions a and b types. +static DataType::Type GetNarrowerType(HInstruction* a, HInstruction* b) { + DataType::Type type = a->GetType(); + if (DataType::Size(b->GetType()) < DataType::Size(type)) { + type = b->GetType(); + } + if (a->IsTypeConversion() && + DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(type)) { + type = a->InputAt(0)->GetType(); + } + if (b->IsTypeConversion() && + DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(type)) { + type = b->InputAt(0)->GetType(); + } + return type; +} + // // Public methods. // @@ -1289,6 +1309,7 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, DataType::Type type = instruction->GetType(); // Recognize SAD idiom or direct reduction. if (VectorizeSADIdiom(node, instruction, generate_code, type, restrictions) || + VectorizeDotProdIdiom(node, instruction, generate_code, type, restrictions) || (TrySetVectorType(type, &restrictions) && VectorizeUse(node, instruction, generate_code, type, restrictions))) { if (generate_code) { @@ -1531,11 +1552,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv | kNoReduction | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; + *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoDotProd; return TrySetVectorLength(4); case DataType::Type::kInt32: *restrictions |= kNoDiv | kNoWideSAD; @@ -1580,12 +1601,23 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= - kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; + *restrictions |= kNoMul | + kNoDiv | + kNoShift | + kNoAbs | + kNoSignedHAdd | + kNoUnroundedHAdd | + kNoSAD | + kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; + *restrictions |= kNoDiv | + kNoAbs | + kNoSignedHAdd | + kNoUnroundedHAdd | + kNoSAD| + kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv | kNoSAD; @@ -1610,11 +1642,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv; @@ -1639,11 +1671,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv; @@ -2071,18 +2103,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, HInstruction* r = a; HInstruction* s = b; bool is_unsigned = false; - DataType::Type sub_type = a->GetType(); - if (DataType::Size(b->GetType()) < DataType::Size(sub_type)) { - sub_type = b->GetType(); - } - if (a->IsTypeConversion() && - DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(sub_type)) { - sub_type = a->InputAt(0)->GetType(); - } - if (b->IsTypeConversion() && - DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(sub_type)) { - sub_type = b->InputAt(0)->GetType(); - } + DataType::Type sub_type = GetNarrowerType(a, b); if (reduction_type != sub_type && (!IsNarrowerOperands(a, b, sub_type, &r, &s, &is_unsigned) || is_unsigned)) { return false; @@ -2123,6 +2144,75 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, return false; } +// Method recognises the following dot product idiom: +// q += a * b for operands a, b whose type is narrower than the reduction one. +// Provided that the operands have the same type or are promoted to a wider form. +// Since this may involve a vector length change, the idiom is handled by going directly +// to a dot product node (rather than relying combining finer grained nodes later). +bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type reduction_type, + uint64_t restrictions) { + if (!instruction->IsAdd() || (reduction_type != DataType::Type::kInt32)) { + return false; + } + + HInstruction* q = instruction->InputAt(0); + HInstruction* v = instruction->InputAt(1); + if (!v->IsMul() || v->GetType() != reduction_type) { + return false; + } + + HInstruction* a = v->InputAt(0); + HInstruction* b = v->InputAt(1); + HInstruction* r = a; + HInstruction* s = b; + DataType::Type op_type = GetNarrowerType(a, b); + bool is_unsigned = false; + + if (!IsNarrowerOperands(a, b, op_type, &r, &s, &is_unsigned)) { + return false; + } + op_type = HVecOperation::ToProperType(op_type, is_unsigned); + + if (!TrySetVectorType(op_type, &restrictions) || + HasVectorRestrictions(restrictions, kNoDotProd)) { + return false; + } + + DCHECK(r != nullptr && s != nullptr); + // Accept dot product idiom for vectorizable operands. Vectorized code uses the shorthand + // idiomatic operation. Sequential code uses the original scalar expressions. + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = a; + s = b; + } + if (VectorizeUse(node, q, generate_code, op_type, restrictions) && + VectorizeUse(node, r, generate_code, op_type, restrictions) && + VectorizeUse(node, s, generate_code, op_type, restrictions)) { + if (generate_code) { + if (vector_mode_ == kVector) { + vector_map_->Put(instruction, new (global_allocator_) HVecDotProd( + global_allocator_, + vector_map_->Get(q), + vector_map_->Get(r), + vector_map_->Get(s), + reduction_type, + is_unsigned, + GetOtherVL(reduction_type, op_type, vector_length_), + kNoDexPc)); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); + } else { + GenerateVecOp(v, vector_map_->Get(r), vector_map_->Get(s), reduction_type); + GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type); + } + } + return true; + } + return false; +} + // // Vectorization heuristics. // |