diff options
author | Aart Bik <ajcbik@google.com> | 2017-04-12 17:09:20 -0700 |
---|---|---|
committer | Aart Bik <ajcbik@google.com> | 2017-04-19 10:30:57 -0700 |
commit | f3e61ee363fe7f82ef56704f06d753e2034a67dd (patch) | |
tree | a00f1fce4a2e284b0a03f941f530afc5b5c56b59 /compiler/optimizing/loop_optimization.cc | |
parent | 741a81af441cbcb7255229bf250bc009d2894e92 (diff) |
Implement halving add idiom (with checker tests).
Rationale:
First of several idioms that map to very efficient SIMD instructions.
Note that the is-zero-ext and is-sign-ext are general-purpose utilities
that will be widely used in the vectorizer to detect low precision
idioms, so expect that code to be shared with many CLs to come.
Test: test-art-host, test-art-target
Change-Id: If7dc2926c72a2e4b5cea15c44ef68cf5503e9be9
Diffstat (limited to 'compiler/optimizing/loop_optimization.cc')
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 204 |
1 files changed, 201 insertions, 3 deletions
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 8e88c1ec7f..5a95abdb50 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -63,12 +63,122 @@ static bool IsEarlyExit(HLoopInformation* loop_info) { return false; } +// Detect a sign extension from the given type. Returns the promoted operand on success. +static bool IsSignExtensionAndGet(HInstruction* instruction, + Primitive::Type type, + /*out*/ HInstruction** operand) { + // Accept any already wider constant that would be handled properly by sign + // extension when represented in the *width* of the given narrower data type + // (the fact that char normally zero extends does not matter here). + int64_t value = 0; + if (IsInt64AndGet(instruction, &value)) { + switch (type) { + case Primitive::kPrimByte: + if (std::numeric_limits<int8_t>::min() <= value && + std::numeric_limits<int8_t>::max() >= value) { + *operand = instruction; + return true; + } + return false; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + if (std::numeric_limits<int16_t>::min() <= value && + std::numeric_limits<int16_t>::max() <= value) { + *operand = instruction; + return true; + } + return false; + default: + return false; + } + } + // An implicit widening conversion of a signed integer to an integral type sign-extends + // the two's-complement representation of the integer value to fill the wider format. + if (instruction->GetType() == type && (instruction->IsArrayGet() || + instruction->IsStaticFieldGet() || + instruction->IsInstanceFieldGet())) { + switch (type) { + case Primitive::kPrimByte: + case Primitive::kPrimShort: + *operand = instruction; + return true; + default: + return false; + } + } + // TODO: perhaps explicit conversions later too? + // (this may return something different from instruction) + return false; +} + +// Detect a zero extension from the given type. Returns the promoted operand on success. +static bool IsZeroExtensionAndGet(HInstruction* instruction, + Primitive::Type type, + /*out*/ HInstruction** operand) { + // Accept any already wider constant that would be handled properly by zero + // extension when represented in the *width* of the given narrower data type + // (the fact that byte/short normally sign extend does not matter here). + int64_t value = 0; + if (IsInt64AndGet(instruction, &value)) { + switch (type) { + case Primitive::kPrimByte: + if (std::numeric_limits<uint8_t>::min() <= value && + std::numeric_limits<uint8_t>::max() >= value) { + *operand = instruction; + return true; + } + return false; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + if (std::numeric_limits<uint16_t>::min() <= value && + std::numeric_limits<uint16_t>::max() <= value) { + *operand = instruction; + return true; + } + return false; + default: + return false; + } + } + // An implicit widening conversion of a char to an integral type zero-extends + // the representation of the char value to fill the wider format. + if (instruction->GetType() == type && (instruction->IsArrayGet() || + instruction->IsStaticFieldGet() || + instruction->IsInstanceFieldGet())) { + if (type == Primitive::kPrimChar) { + *operand = instruction; + return true; + } + } + // A sign (or zero) extension followed by an explicit removal of just the + // higher sign bits is equivalent to a zero extension of the underlying operand. + if (instruction->IsAnd()) { + int64_t mask = 0; + HInstruction* a = instruction->InputAt(0); + HInstruction* b = instruction->InputAt(1); + // In (a & b) find (mask & b) or (a & mask) with sign or zero extension on the non-mask. + if ((IsInt64AndGet(a, /*out*/ &mask) && (IsSignExtensionAndGet(b, type, /*out*/ operand) || + IsZeroExtensionAndGet(b, type, /*out*/ operand))) || + (IsInt64AndGet(b, /*out*/ &mask) && (IsSignExtensionAndGet(a, type, /*out*/ operand) || + IsZeroExtensionAndGet(a, type, /*out*/ operand)))) { + switch ((*operand)->GetType()) { + case Primitive::kPrimByte: return mask == std::numeric_limits<uint8_t>::max(); + case Primitive::kPrimChar: + case Primitive::kPrimShort: return mask == std::numeric_limits<uint16_t>::max(); + default: return false; + } + } + } + // TODO: perhaps explicit conversions later too? + return false; +} + // Test vector restrictions. static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) { return (restrictions & tested) != 0; } -// Inserts an instruction. +// Insert an instruction. static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { DCHECK(block != nullptr); DCHECK(instruction != nullptr); @@ -713,6 +823,10 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return true; } } else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) { + // Recognize vectorization idioms. + if (VectorizeHalvingAddIdiom(node, instruction, generate_code, type, restrictions)) { + return true; + } // Deal with vector restrictions. if ((HasVectorRestrictions(restrictions, kNoShift)) || (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) { @@ -806,11 +920,11 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs; + *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoAbs; + *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd; return TrySetVectorLength(8); case Primitive::kPrimInt: *restrictions |= kNoDiv; @@ -1039,6 +1153,90 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, #undef GENERATE_VEC // +// Vectorization idioms. +// + +// Method recognizes the following idioms: +// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b +// regular halving add (a + b) >> 1 for unsigned/signed operands a, b +// Provided that the operands are promoted to a wider form to do the arithmetic and +// then cast back to narrower form, the idioms can be mapped into efficient SIMD +// implementation that operates directly in narrower form (plus one extra bit). +// TODO: current version recognizes implicit byte/short/char widening only; +// explicit widening from int to long could be added later. +bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + Primitive::Type type, + uint64_t restrictions) { + // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1 + // (note whether the sign bit in higher precision is shifted in has no effect + // on the narrow precision computed by the idiom). + int64_t value = 0; + if ((instruction->IsShr() || + instruction->IsUShr()) && + IsInt64AndGet(instruction->InputAt(1), &value) && value == 1) { + // + // TODO: make following code less sensitive to associativity and commutativity differences. + // + HInstruction* x = instruction->InputAt(0); + // Test for an optional rounding part (x + 1) >> 1. + bool is_rounded = false; + if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), &value) && value == 1) { + x = x->InputAt(0); + is_rounded = true; + } + // Test for a core addition (a + b) >> 1 (possibly rounded), either unsigned or signed. + if (x->IsAdd()) { + HInstruction* a = x->InputAt(0); + HInstruction* b = x->InputAt(1); + HInstruction* r = nullptr; + HInstruction* s = nullptr; + bool is_unsigned = false; + if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) { + is_unsigned = true; + } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) { + is_unsigned = false; + } else { + return false; + } + // Deal with vector restrictions. + if ((!is_unsigned && HasVectorRestrictions(restrictions, kNoSignedHAdd)) || + (!is_rounded && HasVectorRestrictions(restrictions, kNoUnroundedHAdd))) { + return false; + } + // Accept recognized halving add for vectorizable operands. Vectorized code uses the + // shorthand idiomatic operation. Sequential code uses the original scalar expressions. + DCHECK(r != nullptr && s != nullptr); + if (VectorizeUse(node, r, generate_code, type, restrictions) && + VectorizeUse(node, s, generate_code, type, restrictions)) { + if (generate_code) { + if (vector_mode_ == kVector) { + vector_map_->Put(instruction, new (global_allocator_) HVecHalvingAdd( + global_allocator_, + vector_map_->Get(r), + vector_map_->Get(s), + type, + vector_length_, + is_unsigned, + is_rounded)); + } else { + VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions); + VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions); + GenerateVecOp(instruction, + vector_map_->Get(instruction->InputAt(0)), + vector_map_->Get(instruction->InputAt(1)), + type); + } + } + return true; + } + } + } + return false; +} + +// // Helpers. // |