diff options
Diffstat (limited to 'compiler/optimizing/loop_optimization.cc')
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 152 |
1 files changed, 147 insertions, 5 deletions
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index d3b081e005..abd644ae9b 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -36,6 +36,10 @@ static constexpr bool kEnableVectorization = true; // No loop unrolling factor (just one copy of the loop-body). static constexpr uint32_t kNoUnrollingFactor = 1; +// Values that indicate unbounded end. +static constexpr int64_t kNoLo = std::numeric_limits<int64_t>::min(); +static constexpr int64_t kNoHi = std::numeric_limits<int64_t>::max(); + // // Static helpers. // @@ -331,6 +335,74 @@ static bool IsAddConst(HInstruction* instruction, return false; } +// Detect clipped [lo, hi] range for nested MIN-MAX operations on a clippee, +// such as MIN(hi, MAX(lo, clippee)) for an arbitrary clippee expression. +// Example: MIN(10, MIN(20, MAX(0, x))) yields [0, 10] with clippee x. +static bool IsClipped(HInstruction* instruction, + /*out*/ int64_t* lo, + /*out*/ int64_t* hi, + /*out*/ HInstruction** clippee) { + // Recurse into MIN-MAX expressions and 'tighten' the range [lo, hi]. + if (instruction->IsMin() || instruction->IsMax()) { + // Find MIN-MAX(const, ..) or MIN-MAX(.., const). + for (int i = 0; i < 2; i++) { + int64_t c = 0; + if (IsInt64AndGet(instruction->InputAt(i), &c)) { + if (instruction->IsMin()) { + *hi = std::min(*hi, c); + } else { + *lo = std::max(*lo, c); + } + return IsClipped(instruction->InputAt(1 - i), lo, hi, clippee); + } + } + // Recursion fails at any MIN/MAX that does not have one constant + // argument, e.g. MIN(x, y) or MAX(2 * x, f()). + return false; + } + // Recursion ends in any other expression. At this point we record the leaf + // expression as the clippee and report success on the range [lo, hi]. + DCHECK(*clippee == nullptr); + *clippee = instruction; + return true; +} + +// Accept various saturated addition forms. +static bool IsSaturatedAdd(DataType::Type type, int64_t lo, int64_t hi, bool is_unsigned) { + // MIN(r + s, 255) => SAT_ADD_unsigned + // MAX(MIN(r + s, 127), -128) => SAT_ADD_signed etc. + if (DataType::Size(type) == 1) { + return is_unsigned + ? (lo <= 0 && hi == std::numeric_limits<uint8_t>::max()) + : (lo == std::numeric_limits<int8_t>::min() && + hi == std::numeric_limits<int8_t>::max()); + } else if (DataType::Size(type) == 2) { + return is_unsigned + ? (lo <= 0 && hi == std::numeric_limits<uint16_t>::max()) + : (lo == std::numeric_limits<int16_t>::min() && + hi == std::numeric_limits<int16_t>::max()); + } + return false; +} + +// Accept various saturated subtraction forms. +static bool IsSaturatedSub(DataType::Type type, int64_t lo, int64_t hi, bool is_unsigned) { + // MAX(r - s, 0) => SAT_SUB_unsigned + // MIN(MAX(r - s, -128), 127) => SAT_ADD_signed etc. + if (DataType::Size(type) == 1) { + return is_unsigned + ? (lo == 0 && hi >= std::numeric_limits<uint8_t>::max()) + : (lo == std::numeric_limits<int8_t>::min() && + hi == std::numeric_limits<int8_t>::max()); + } else if (DataType::Size(type) == 2) { + return is_unsigned + ? (lo == 0 && hi >= std::numeric_limits<uint16_t>::min()) + : (lo == std::numeric_limits<int16_t>::min() && + hi == std::numeric_limits<int16_t>::max()); + } + return false; +} + // Detect reductions of the following forms, // x = x_phi + .. // x = x_phi - .. @@ -1109,7 +1181,6 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite(); } -// TODO: saturation arithmetic. bool HLoopOptimization::VectorizeUse(LoopNode* node, HInstruction* instruction, bool generate_code, @@ -1308,6 +1379,10 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return true; } } else if (instruction->IsMin() || instruction->IsMax()) { + // Recognize saturation arithmetic. + if (VectorizeSaturationIdiom(node, instruction, generate_code, type, restrictions)) { + return true; + } // Deal with vector restrictions. HInstruction* opa = instruction->InputAt(0); HInstruction* opb = instruction->InputAt(1); @@ -1439,11 +1514,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoSaturation; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoSaturation | kNoStringCharAt; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv; @@ -1468,11 +1543,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoSaturation; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoSaturation | kNoStringCharAt; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv; @@ -1811,6 +1886,73 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, // Vectorization idioms. // +// Method recognizes single and double clipping saturation arithmetic. +bool HLoopOptimization::VectorizeSaturationIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type type, + uint64_t restrictions) { + // Deal with vector restrictions. + if (HasVectorRestrictions(restrictions, kNoSaturation)) { + return false; + } + // Search for clipping of a clippee. + int64_t lo = kNoLo; + int64_t hi = kNoHi; + HInstruction* clippee = nullptr; + if (!IsClipped(instruction, &lo, &hi, &clippee)) { + return false; + } + CHECK(clippee != nullptr); + // Clipped addition or subtraction? + bool is_add = true; + if (clippee->IsAdd()) { + is_add = true; + } else if (clippee->IsSub()) { + is_add = false; + } else { + return false; // clippee is not add/sub + } + // Addition or subtraction on narrower operands? + HInstruction* r = nullptr; + HInstruction* s = nullptr; + bool is_unsigned = false; + if (IsNarrowerOperands(clippee->InputAt(0), clippee->InputAt(1), type, &r, &s, &is_unsigned) && + (is_add ? IsSaturatedAdd(type, lo, hi, is_unsigned) + : IsSaturatedSub(type, lo, hi, is_unsigned))) { + DCHECK(r != nullptr); + DCHECK(s != nullptr); + } else { + return false; + } + // Accept saturation idiom for vectorizable operands. + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = instruction->InputAt(0); + s = instruction->InputAt(1); + restrictions &= ~(kNoHiBits | kNoMinMax); // allow narrow MIN/MAX in seq + } + if (VectorizeUse(node, r, generate_code, type, restrictions) && + VectorizeUse(node, s, generate_code, type, restrictions)) { + if (generate_code) { + if (vector_mode_ == kVector) { + DataType::Type vtype = HVecOperation::ToProperType(type, is_unsigned); + HInstruction* op1 = vector_map_->Get(r); + HInstruction* op2 = vector_map_->Get(s); + vector_map_->Put(instruction, is_add + ? reinterpret_cast<HInstruction*>(new (global_allocator_) HVecSaturationAdd( + global_allocator_, op1, op2, vtype, vector_length_, kNoDexPc)) + : reinterpret_cast<HInstruction*>(new (global_allocator_) HVecSaturationSub( + global_allocator_, op1, op2, vtype, vector_length_, kNoDexPc))); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); + } else { + GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type); + } + } + return true; + } + return false; +} + // Method recognizes the following idioms: // rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b // truncated halving add (a + b) >> 1 for unsigned/signed operands a, b |