summaryrefslogtreecommitdiff
path: root/compiler/optimizing/loop_optimization.cc
diff options
context:
space:
mode:
authorArtem Serov <artem.serov@linaro.org>2020-04-27 21:02:28 +0100
committerUlyana Trafimovich <skvadrik@google.com>2021-02-05 11:34:38 +0000
commit55ab7e84c4682c492b6fa18375b87ffc5d0b23bb (patch)
tree5fcc2567a1a4e6ae73dead2f70c69bc03b0a64bb /compiler/optimizing/loop_optimization.cc
parentac27ac01490f53f9e2413dc9b66fbb2880904c96 (diff)
ARM64: Support SVE VL other than 128-bit.
Arm SVE register size is not fixed and can be a multiple of 128 bits. To support that the patch removes explicit assumptions on the SIMD register size to be 128 bit from the vectorizer and code generators and enables configurable SVE vector length autovectorization, e.g. extends SIMD register save/restore routines. Test: art SIMD tests on VIXL simulator. Test: art tests on FVP (steps in test/README.arm_fvp.md) with FVP arg: -C SVE.ScalableVectorExtension.veclen=[2,4] (SVE vector [128,256] bits wide) Change-Id: Icb46e7eb17f21d3bd38b16dd50f735c29b316427
Diffstat (limited to 'compiler/optimizing/loop_optimization.cc')
-rw-r--r--compiler/optimizing/loop_optimization.cc33
1 files changed, 14 insertions, 19 deletions
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 1210dbe67b..02ee4ec057 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -946,9 +946,10 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6
// make one particular reference aligned), never to exceed (1).
// (3) variable to record how many references share same alignment.
// (4) variable to record suitable candidate for dynamic loop peeling.
- uint32_t desired_alignment = GetVectorSizeInBytes();
- DCHECK_LE(desired_alignment, 16u);
- uint32_t peeling_votes[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ size_t desired_alignment = GetVectorSizeInBytes();
+ ScopedArenaVector<uint32_t> peeling_votes(desired_alignment, 0u,
+ loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+
uint32_t max_num_same_alignment = 0;
const ArrayReference* peeling_candidate = nullptr;
@@ -1577,14 +1578,6 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
}
uint32_t HLoopOptimization::GetVectorSizeInBytes() {
- if (kIsDebugBuild) {
- InstructionSet isa = compiler_options_->GetInstructionSet();
- // TODO: Remove this check when there are no implicit assumptions on the SIMD reg size.
- DCHECK_EQ(simd_register_size_, (isa == InstructionSet::kArm || isa == InstructionSet::kThumb2)
- ? 8u
- : 16u);
- }
-
return simd_register_size_;
}
@@ -1616,6 +1609,8 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
if (IsInPredicatedVectorizationMode()) {
// SVE vectorization.
CHECK(features->AsArm64InstructionSetFeatures()->HasSVE());
+ size_t vector_length = simd_register_size_ / DataType::Size(type);
+ DCHECK_EQ(simd_register_size_ % DataType::Size(type), 0u);
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
@@ -1625,7 +1620,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
kNoUnsignedHAdd |
kNoUnroundedHAdd |
kNoSAD;
- return TrySetVectorLength(type, 16);
+ return TrySetVectorLength(type, vector_length);
case DataType::Type::kUint16:
case DataType::Type::kInt16:
*restrictions |= kNoDiv |
@@ -1634,19 +1629,19 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
kNoUnroundedHAdd |
kNoSAD |
kNoDotProd;
- return TrySetVectorLength(type, 8);
+ return TrySetVectorLength(type, vector_length);
case DataType::Type::kInt32:
*restrictions |= kNoDiv | kNoSAD;
- return TrySetVectorLength(type, 4);
+ return TrySetVectorLength(type, vector_length);
case DataType::Type::kInt64:
*restrictions |= kNoDiv | kNoSAD;
- return TrySetVectorLength(type, 2);
+ return TrySetVectorLength(type, vector_length);
case DataType::Type::kFloat32:
*restrictions |= kNoReduction;
- return TrySetVectorLength(type, 4);
+ return TrySetVectorLength(type, vector_length);
case DataType::Type::kFloat64:
*restrictions |= kNoReduction;
- return TrySetVectorLength(type, 2);
+ return TrySetVectorLength(type, vector_length);
default:
break;
}
@@ -2311,12 +2306,12 @@ Alignment HLoopOptimization::ComputeAlignment(HInstruction* offset,
return Alignment(DataType::Size(type), 0);
}
-void HLoopOptimization::SetAlignmentStrategy(uint32_t peeling_votes[],
+void HLoopOptimization::SetAlignmentStrategy(const ScopedArenaVector<uint32_t>& peeling_votes,
const ArrayReference* peeling_candidate) {
// Current heuristic: pick the best static loop peeling factor, if any,
// or otherwise use dynamic loop peeling on suggested peeling candidate.
uint32_t max_vote = 0;
- for (int32_t i = 0; i < 16; i++) {
+ for (size_t i = 0; i < peeling_votes.size(); i++) {
if (peeling_votes[i] > max_vote) {
max_vote = peeling_votes[i];
vector_static_peeling_factor_ = i;