summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator.cc55
-rw-r--r--compiler/optimizing/code_generator.h14
-rw-r--r--compiler/optimizing/code_generator_arm64.cc658
-rw-r--r--compiler/optimizing/code_generator_arm64.h113
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc836
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h125
-rw-r--r--compiler/optimizing/code_generator_mips.cc658
-rw-r--r--compiler/optimizing/code_generator_mips.h13
-rw-r--r--compiler/optimizing/code_generator_mips64.cc385
-rw-r--r--compiler/optimizing/code_generator_mips64.h12
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc138
-rw-r--r--compiler/optimizing/code_generator_vector_arm_vixl.cc126
-rw-r--r--compiler/optimizing/code_generator_vector_mips.cc90
-rw-r--r--compiler/optimizing/code_generator_vector_mips64.cc90
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc140
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc140
-rw-r--r--compiler/optimizing/code_generator_x86.cc425
-rw-r--r--compiler/optimizing/code_generator_x86.h15
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc410
-rw-r--r--compiler/optimizing/code_generator_x86_64.h15
-rw-r--r--compiler/optimizing/codegen_test_utils.h8
-rw-r--r--compiler/optimizing/data_type.h6
-rw-r--r--compiler/optimizing/graph_checker.cc92
-rw-r--r--compiler/optimizing/graph_checker.h6
-rw-r--r--compiler/optimizing/graph_visualizer.cc50
-rw-r--r--compiler/optimizing/induction_var_range.cc70
-rw-r--r--compiler/optimizing/induction_var_range.h18
-rw-r--r--compiler/optimizing/inliner.cc12
-rw-r--r--compiler/optimizing/instruction_builder.cc107
-rw-r--r--compiler/optimizing/instruction_builder.h7
-rw-r--r--compiler/optimizing/instruction_simplifier.cc242
-rw-r--r--compiler/optimizing/intrinsics.h12
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc170
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc335
-rw-r--r--compiler/optimizing/intrinsics_mips.cc720
-rw-r--r--compiler/optimizing/intrinsics_mips.h1
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc374
-rw-r--r--compiler/optimizing/intrinsics_mips64.h2
-rw-r--r--compiler/optimizing/intrinsics_x86.cc431
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc298
-rw-r--r--compiler/optimizing/loop_analysis.cc141
-rw-r--r--compiler/optimizing/loop_analysis.h164
-rw-r--r--compiler/optimizing/loop_optimization.cc765
-rw-r--r--compiler/optimizing/loop_optimization.h26
-rw-r--r--compiler/optimizing/loop_optimization_test.cc5
-rw-r--r--compiler/optimizing/nodes.cc16
-rw-r--r--compiler/optimizing/nodes.h372
-rw-r--r--compiler/optimizing/nodes_vector.h52
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc10
-rw-r--r--compiler/optimizing/optimizing_compiler.cc34
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h6
-rw-r--r--compiler/optimizing/optimizing_unit_test.h3
-rw-r--r--compiler/optimizing/parallel_move_test.cc15
-rw-r--r--compiler/optimizing/pc_relative_fixups_mips.cc4
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc13
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc14
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.h2
-rw-r--r--compiler/optimizing/reference_type_propagation.cc35
-rw-r--r--compiler/optimizing/scheduler.cc5
-rw-r--r--compiler/optimizing/select_generator.cc47
-rw-r--r--compiler/optimizing/sharpening.cc79
-rw-r--r--compiler/optimizing/sharpening.h27
-rw-r--r--compiler/optimizing/stack_map_stream.h2
-rw-r--r--compiler/optimizing/stack_map_test.cc21
-rw-r--r--compiler/optimizing/superblock_cloner.cc348
-rw-r--r--compiler/optimizing/superblock_cloner.h112
-rw-r--r--compiler/optimizing/superblock_cloner_test.cc557
67 files changed, 6887 insertions, 3407 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 6abda9b3026..231017f55e6 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -51,6 +51,8 @@
#include "dex/verified_method.h"
#include "driver/compiler_driver.h"
#include "graph_visualizer.h"
+#include "image.h"
+#include "gc/space/image_space.h"
#include "intern_table.h"
#include "intrinsics.h"
#include "mirror/array-inl.h"
@@ -447,6 +449,18 @@ void CodeGenerator::EmitLinkerPatches(
// No linker patches by default.
}
+bool CodeGenerator::NeedsThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED) const {
+ // Code generators that create patches requiring thunk compilation should override this function.
+ return false;
+}
+
+void CodeGenerator::EmitThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED,
+ /*out*/ ArenaVector<uint8_t>* code ATTRIBUTE_UNUSED,
+ /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) {
+ // Code generators that create patches requiring thunk compilation should override this function.
+ LOG(FATAL) << "Unexpected call to EmitThunkCode().";
+}
+
void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
size_t maximum_safepoint_spill_size,
size_t number_of_out_slots,
@@ -722,6 +736,47 @@ void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) {
}
}
+static uint32_t GetBootImageOffsetImpl(const void* object, ImageHeader::ImageSections section) {
+ Runtime* runtime = Runtime::Current();
+ DCHECK(runtime->IsAotCompiler());
+ const std::vector<gc::space::ImageSpace*>& boot_image_spaces =
+ runtime->GetHeap()->GetBootImageSpaces();
+ // Check that the `object` is in the expected section of one of the boot image files.
+ DCHECK(std::any_of(boot_image_spaces.begin(),
+ boot_image_spaces.end(),
+ [object, section](gc::space::ImageSpace* space) {
+ uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
+ uintptr_t offset = reinterpret_cast<uintptr_t>(object) - begin;
+ return space->GetImageHeader().GetImageSection(section).Contains(offset);
+ }));
+ uintptr_t begin = reinterpret_cast<uintptr_t>(boot_image_spaces.front()->Begin());
+ uintptr_t offset = reinterpret_cast<uintptr_t>(object) - begin;
+ return dchecked_integral_cast<uint32_t>(offset);
+}
+
+// NO_THREAD_SAFETY_ANALYSIS: Avoid taking the mutator lock, boot image classes are non-moveable.
+uint32_t CodeGenerator::GetBootImageOffset(HLoadClass* load_class) NO_THREAD_SAFETY_ANALYSIS {
+ DCHECK_EQ(load_class->GetLoadKind(), HLoadClass::LoadKind::kBootImageRelRo);
+ ObjPtr<mirror::Class> klass = load_class->GetClass().Get();
+ DCHECK(klass != nullptr);
+ return GetBootImageOffsetImpl(klass.Ptr(), ImageHeader::kSectionObjects);
+}
+
+// NO_THREAD_SAFETY_ANALYSIS: Avoid taking the mutator lock, boot image strings are non-moveable.
+uint32_t CodeGenerator::GetBootImageOffset(HLoadString* load_string) NO_THREAD_SAFETY_ANALYSIS {
+ DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kBootImageRelRo);
+ ObjPtr<mirror::String> string = load_string->GetString().Get();
+ DCHECK(string != nullptr);
+ return GetBootImageOffsetImpl(string.Ptr(), ImageHeader::kSectionObjects);
+}
+
+uint32_t CodeGenerator::GetBootImageOffset(HInvokeStaticOrDirect* invoke) {
+ DCHECK_EQ(invoke->GetMethodLoadKind(), HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo);
+ ArtMethod* method = invoke->GetResolvedMethod();
+ DCHECK(method != nullptr);
+ return GetBootImageOffsetImpl(method, ImageHeader::kSectionArtMethods);
+}
+
void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const {
// The DCHECKS below check that a register is not specified twice in
// the summary. The out location can overlap with an input, so we need
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index f784a1a8573..62cacebaa1e 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -21,15 +21,16 @@
#include "arch/instruction_set_features.h"
#include "base/arena_containers.h"
#include "base/arena_object.h"
+#include "base/array_ref.h"
#include "base/bit_field.h"
#include "base/bit_utils.h"
#include "base/enums.h"
+#include "base/memory_region.h"
#include "dex/string_reference.h"
#include "dex/type_reference.h"
#include "globals.h"
#include "graph_visualizer.h"
#include "locations.h"
-#include "memory_region.h"
#include "nodes.h"
#include "optimizing_compiler_stats.h"
#include "read_barrier_option.h"
@@ -74,6 +75,7 @@ class CodeAllocator {
virtual ~CodeAllocator() {}
virtual uint8_t* Allocate(size_t size) = 0;
+ virtual ArrayRef<const uint8_t> GetMemory() const = 0;
private:
DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
@@ -210,6 +212,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
virtual void Initialize() = 0;
virtual void Finalize(CodeAllocator* allocator);
virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches);
+ virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const;
+ virtual void EmitThunkCode(const linker::LinkerPatch& patch,
+ /*out*/ ArenaVector<uint8_t>* code,
+ /*out*/ std::string* debug_name);
virtual void GenerateFrameEntry() = 0;
virtual void GenerateFrameExit() = 0;
virtual void Bind(HBasicBlock* block) = 0;
@@ -438,6 +444,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
return false;
+ case TypeCheckKind::kBitstringCheck:
+ return true;
}
LOG(FATAL) << "Unreachable";
UNREACHABLE();
@@ -556,6 +564,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
Location runtime_return_location);
void GenerateLoadClassRuntimeCall(HLoadClass* cls);
+ uint32_t GetBootImageOffset(HLoadClass* load_class);
+ uint32_t GetBootImageOffset(HLoadString* load_string);
+ uint32_t GetBootImageOffset(HInvokeStaticOrDirect* invoke);
+
static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 60f8f98757d..d4cfab82de3 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -30,7 +30,6 @@
#include "heap_poisoning.h"
#include "intrinsics.h"
#include "intrinsics_arm64.h"
-#include "linker/arm64/relative_patcher_arm64.h"
#include "linker/linker_patch.h"
#include "lock_word.h"
#include "mirror/array-inl.h"
@@ -78,6 +77,7 @@ using helpers::OutputFPRegister;
using helpers::OutputRegister;
using helpers::QRegisterFrom;
using helpers::RegisterFrom;
+using helpers::SRegisterFrom;
using helpers::StackOperandFrom;
using helpers::VIXLRegCodeFromART;
using helpers::WRegisterFrom;
@@ -1424,6 +1424,62 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
__ FinalizeCode();
CodeGenerator::Finalize(allocator);
+
+ // Verify Baker read barrier linker patches.
+ if (kIsDebugBuild) {
+ ArrayRef<const uint8_t> code = allocator->GetMemory();
+ for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+ DCHECK(info.label.IsBound());
+ uint32_t literal_offset = info.label.GetLocation();
+ DCHECK_ALIGNED(literal_offset, 4u);
+
+ auto GetInsn = [&code](uint32_t offset) {
+ DCHECK_ALIGNED(offset, 4u);
+ return
+ (static_cast<uint32_t>(code[offset + 0]) << 0) +
+ (static_cast<uint32_t>(code[offset + 1]) << 8) +
+ (static_cast<uint32_t>(code[offset + 2]) << 16)+
+ (static_cast<uint32_t>(code[offset + 3]) << 24);
+ };
+
+ const uint32_t encoded_data = info.custom_data;
+ BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+ // Check that the next instruction matches the expected LDR.
+ switch (kind) {
+ case BakerReadBarrierKind::kField: {
+ DCHECK_GE(code.size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn(literal_offset + 4u);
+ // LDR (immediate) with correct base_reg.
+ CheckValidReg(next_insn & 0x1fu); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
+ break;
+ }
+ case BakerReadBarrierKind::kArray: {
+ DCHECK_GE(code.size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn(literal_offset + 4u);
+ // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
+ // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
+ CheckValidReg(next_insn & 0x1fu); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
+ CheckValidReg((next_insn >> 16) & 0x1f); // Check index register
+ break;
+ }
+ case BakerReadBarrierKind::kGcRoot: {
+ DCHECK_GE(literal_offset, 4u);
+ uint32_t prev_insn = GetInsn(literal_offset - 4u);
+ // LDR (immediate) with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+ UNREACHABLE();
+ }
+ }
+ }
}
void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
@@ -2128,6 +2184,26 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod
__ Bind(slow_path->GetExitLabel());
}
+void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare(
+ HTypeCheckInstruction* check, vixl::aarch64::Register temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if (mask_bits == 16u) {
+ // Load only the bitstring part of the status word.
+ __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
+ // Extract the bitstring bits.
+ __ Ubfx(temp, temp, 0, mask_bits);
+ }
+ // Compare the bitstring bits to `path_to_root`.
+ __ Cmp(temp, path_to_root);
+}
+
void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
BarrierType type = BarrierAll;
@@ -3865,6 +3941,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -3873,7 +3951,13 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// The "out" register is used as a temporary, so it overlaps with the inputs.
// Note that TypeCheckSlowPathARM64 uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -3886,7 +3970,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = InputRegisterAt(instruction, 0);
- Register cls = InputRegisterAt(instruction, 1);
+ Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? Register()
+ : InputRegisterAt(instruction, 1);
Location out_loc = locations->Out();
Register out = OutputRegister(instruction);
const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -4072,6 +4158,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ __ Cset(out, eq);
+ if (zero.IsLinked()) {
+ __ B(&done);
+ }
+ break;
+ }
}
if (zero.IsLinked()) {
@@ -4094,7 +4197,13 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -4104,7 +4213,9 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = InputRegisterAt(instruction, 0);
- Register cls = InputRegisterAt(instruction, 1);
+ Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? Register()
+ : InputRegisterAt(instruction, 1);
const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
DCHECK_GE(num_temps, 1u);
DCHECK_LE(num_temps, 3u);
@@ -4285,6 +4396,20 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
__ B(ne, &start_loop);
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ B(ne, type_check_slow_path->GetEntryLabel());
+ break;
+ }
}
__ Bind(&done);
@@ -4459,12 +4584,23 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(
// Load method address from literal pool.
__ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
+ uint32_t boot_image_offset = GetBootImageOffset(invoke);
+ vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset);
+ EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
+ // Add LDR with its PC-relative .data.bimg.rel.ro patch.
+ vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label);
+ // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+ EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp));
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
- // Add ADRP with its PC-relative DexCache access patch.
+ // Add ADRP with its PC-relative .bss entry patch.
MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method);
EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
- // Add LDR with its PC-relative DexCache access patch.
+ // Add LDR with its PC-relative .bss entry patch.
vixl::aarch64::Label* ldr_label =
NewMethodBssEntryPatch(target_method, adrp_label);
EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
@@ -4559,6 +4695,13 @@ void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* i
codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
+vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch(
+ uint32_t boot_image_offset,
+ vixl::aarch64::Label* adrp_label) {
+ return NewPcRelativePatch(
+ /* dex_file */ nullptr, boot_image_offset, adrp_label, &boot_image_method_patches_);
+}
+
vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch(
MethodReference target_method,
vixl::aarch64::Label* adrp_label) {
@@ -4681,6 +4824,14 @@ inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
}
}
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null.
+ return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -4700,11 +4851,10 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -4719,6 +4869,44 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin
DCHECK_EQ(size, linker_patches->size());
}
+bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const {
+ return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
+ patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
+}
+
+void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
+ /*out*/ ArenaVector<uint8_t>* code,
+ /*out*/ std::string* debug_name) {
+ Arm64Assembler assembler(GetGraph()->GetAllocator());
+ switch (patch.GetType()) {
+ case linker::LinkerPatch::Type::kCallRelative: {
+ // The thunk just uses the entry point in the ArtMethod. This works even for calls
+ // to the generic JNI and interpreter trampolines.
+ Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kArm64PointerSize).Int32Value());
+ assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+ if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+ *debug_name = "MethodCallThunk";
+ }
+ break;
+ }
+ case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
+ DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
+ CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected patch type " << patch.GetType();
+ UNREACHABLE();
+ }
+
+ // Ensure we emit the literal pool if any.
+ assembler.FinalizeCode();
+ code->resize(assembler.CodeSize());
+ MemoryRegion code_region(code->data(), code->size());
+ assembler.FinalizeInstructions(code_region);
+}
+
vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
return uint32_literals_.GetOrCreate(
value,
@@ -4779,7 +4967,7 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -4859,12 +5047,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
DCHECK(!cls->MustGenerateClinitCheck());
// /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
Register current_method = InputRegisterAt(cls, 0);
- GenerateGcRootFieldLoad(cls,
- out_loc,
- current_method,
- ArtMethod::DeclaringClassOffset().Int32Value(),
- /* fixup_label */ nullptr,
- read_barrier_option);
+ codegen_->GenerateGcRootFieldLoad(cls,
+ out_loc,
+ current_method,
+ ArtMethod::DeclaringClassOffset().Int32Value(),
+ /* fixup_label */ nullptr,
+ read_barrier_option);
break;
}
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
@@ -4888,23 +5076,16 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
__ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
break;
}
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
- // Add ADRP with its PC-relative type patch.
- const DexFile& dex_file = cls->GetDexFile();
- dex::TypeIndex type_index = cls->GetTypeIndex();
- vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
+ // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
+ vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
- // Add LDR with its PC-relative type patch.
+ // Add LDR with its PC-relative .data.bimg.rel.ro patch.
vixl::aarch64::Label* ldr_label =
- codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(dex_file.StringByTypeIdx(type_index)));
- if (masked_hash != 0) {
- __ Sub(out.W(), out.W(), Operand(masked_hash));
- }
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -4914,16 +5095,16 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
vixl::aarch64::Register temp = XRegisterFrom(out_loc);
vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
codegen_->EmitAdrpPlaceholder(adrp_label, temp);
- // Add LDR with its PC-relative Class patch.
+ // Add LDR with its PC-relative Class .bss entry patch.
vixl::aarch64::Label* ldr_label =
codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
// /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */
- GenerateGcRootFieldLoad(cls,
- out_loc,
- temp,
- /* offset placeholder */ 0u,
- ldr_label,
- read_barrier_option);
+ codegen_->GenerateGcRootFieldLoad(cls,
+ out_loc,
+ temp,
+ /* offset placeholder */ 0u,
+ ldr_label,
+ read_barrier_option);
generate_null_check = true;
break;
}
@@ -4931,12 +5112,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
__ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
cls->GetTypeIndex(),
cls->GetClass()));
- GenerateGcRootFieldLoad(cls,
- out_loc,
- out.X(),
- /* offset */ 0,
- /* fixup_label */ nullptr,
- read_barrier_option);
+ codegen_->GenerateGcRootFieldLoad(cls,
+ out_loc,
+ out.X(),
+ /* offset */ 0,
+ /* fixup_label */ nullptr,
+ read_barrier_option);
break;
}
case HLoadClass::LoadKind::kRuntimeCall:
@@ -4989,7 +5170,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -5055,16 +5236,15 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
__ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
return;
}
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
- // Add ADRP with its PC-relative String patch.
- const DexFile& dex_file = load->GetDexFile();
- const dex::StringIndex string_index = load->GetStringIndex();
- vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
+ // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
+ vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
- // Add LDR with its PC-relative String patch.
+ // Add LDR with its PC-relative .data.bimg.rel.ro patch.
vixl::aarch64::Label* ldr_label =
- codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
return;
}
@@ -5076,16 +5256,16 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
Register temp = XRegisterFrom(out_loc);
vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
codegen_->EmitAdrpPlaceholder(adrp_label, temp);
- // Add LDR with its .bss entry String patch.
+ // Add LDR with its PC-relative String .bss entry patch.
vixl::aarch64::Label* ldr_label =
codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
// /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */
- GenerateGcRootFieldLoad(load,
- out_loc,
- temp,
- /* offset placeholder */ 0u,
- ldr_label,
- kCompilerReadBarrierOption);
+ codegen_->GenerateGcRootFieldLoad(load,
+ out_loc,
+ temp,
+ /* offset placeholder */ 0u,
+ ldr_label,
+ kCompilerReadBarrierOption);
SlowPathCodeARM64* slow_path =
new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
codegen_->AddSlowPath(slow_path);
@@ -5098,12 +5278,12 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
__ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
load->GetStringIndex(),
load->GetString()));
- GenerateGcRootFieldLoad(load,
- out_loc,
- out.X(),
- /* offset */ 0,
- /* fixup_label */ nullptr,
- kCompilerReadBarrierOption);
+ codegen_->GenerateGcRootFieldLoad(load,
+ out_loc,
+ out.X(),
+ /* offset */ 0,
+ /* fixup_label */ nullptr,
+ kCompilerReadBarrierOption);
return;
}
default:
@@ -5462,6 +5642,153 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
}
}
+// TODO: integrate with HandleBinaryOp?
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARM64::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1 = locations->InAt(0);
+ Location op2 = locations->InAt(1);
+ Location out = locations->Out();
+
+ Register op1_reg;
+ Register op2_reg;
+ Register out_reg;
+ if (type == DataType::Type::kInt64) {
+ op1_reg = XRegisterFrom(op1);
+ op2_reg = XRegisterFrom(op2);
+ out_reg = XRegisterFrom(out);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ op1_reg = WRegisterFrom(op1);
+ op2_reg = WRegisterFrom(op2);
+ out_reg = WRegisterFrom(out);
+ }
+
+ __ Cmp(op1_reg, op2_reg);
+ __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
+}
+
+void InstructionCodeGeneratorARM64::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1 = locations->InAt(0);
+ Location op2 = locations->InAt(1);
+ Location out = locations->Out();
+
+ FPRegister op1_reg;
+ FPRegister op2_reg;
+ FPRegister out_reg;
+ if (type == DataType::Type::kFloat64) {
+ op1_reg = DRegisterFrom(op1);
+ op2_reg = DRegisterFrom(op2);
+ out_reg = DRegisterFrom(out);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ op1_reg = SRegisterFrom(op1);
+ op2_reg = SRegisterFrom(op2);
+ out_reg = SRegisterFrom(out);
+ }
+
+ if (is_min) {
+ __ Fmin(out_reg, op1_reg, op2_reg);
+ } else {
+ __ Fmax(out_reg, op1_reg, op2_reg);
+ }
+}
+
+// TODO: integrate with HandleBinaryOp?
+void InstructionCodeGeneratorARM64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderARM64::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorARM64::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderARM64::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorARM64::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderARM64::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) {
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64: {
+ Register in_reg = InputRegisterAt(abs, 0);
+ Register out_reg = OutputRegister(abs);
+ __ Cmp(in_reg, Operand(0));
+ __ Cneg(out_reg, in_reg, lt);
+ break;
+ }
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64: {
+ FPRegister in_reg = InputFPRegisterAt(abs, 0);
+ FPRegister out_reg = OutputFPRegister(abs);
+ __ Fabs(out_reg, in_reg);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
+ }
+}
+
void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
constructor_fence->SetLocations(nullptr);
}
@@ -5905,7 +6232,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
}
}
-void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
+void CodeGeneratorARM64::GenerateGcRootFieldLoad(
HInstruction* instruction,
Location root,
Register obj,
@@ -5939,9 +6266,8 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
DCHECK(temps.IsAvailable(ip0));
DCHECK(temps.IsAvailable(ip1));
temps.Exclude(ip0, ip1);
- uint32_t custom_data =
- linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
- vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
+ uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
+ vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
vixl::aarch64::Label return_address;
@@ -5970,14 +6296,14 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
// Slow path marking the GC root `root`. The entrypoint will
// be loaded by the slow path code.
SlowPathCodeARM64* slow_path =
- new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root);
- codegen_->AddSlowPath(slow_path);
+ new (GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root);
+ AddSlowPath(slow_path);
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
if (fixup_label == nullptr) {
__ Ldr(root_reg, MemOperand(obj, offset));
} else {
- codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
+ EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
}
static_assert(
sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
@@ -5997,10 +6323,10 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
if (fixup_label == nullptr) {
__ Add(root_reg.X(), obj.X(), offset);
} else {
- codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
+ EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
}
// /* mirror::Object* */ root = root->Read()
- codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ GenerateReadBarrierForRootSlow(instruction, root, root);
}
} else {
// Plain GC root load with no read barrier.
@@ -6008,12 +6334,12 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
if (fixup_label == nullptr) {
__ Ldr(root_reg, MemOperand(obj, offset));
} else {
- codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
+ EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
}
// Note that GC roots are not affected by heap poisoning, thus we
// do not have to unpoison `root_reg` here.
}
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6062,9 +6388,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins
DCHECK(temps.IsAvailable(ip0));
DCHECK(temps.IsAvailable(ip1));
temps.Exclude(ip0, ip1);
- uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(
- base.GetCode(),
- obj.GetCode());
+ uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode());
vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
{
@@ -6149,8 +6473,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins
DCHECK(temps.IsAvailable(ip0));
DCHECK(temps.IsAvailable(ip1));
temps.Exclude(ip0, ip1);
- uint32_t custom_data =
- linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
+ uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
__ Add(temp.X(), obj.X(), Operand(data_offset));
@@ -6510,5 +6833,176 @@ void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_
#undef __
#undef QUICK_ENTRY_POINT
+#define __ assembler.GetVIXLAssembler()->
+
+static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
+ vixl::aarch64::Register base_reg,
+ vixl::aarch64::MemOperand& lock_word,
+ vixl::aarch64::Label* slow_path,
+ vixl::aarch64::Label* throw_npe = nullptr) {
+ // Load the lock word containing the rb_state.
+ __ Ldr(ip0.W(), lock_word);
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
+ static_assert(
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
+ "Field and array LDR offsets must be the same to reuse the same code.");
+ // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
+ if (throw_npe != nullptr) {
+ __ Bind(throw_npe);
+ }
+ // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
+ static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Field LDR must be 1 instruction (4B) before the return address label; "
+ " 2 instructions (8B) for heap poisoning.");
+ __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
+ __ Br(lr); // And return back to the function.
+ // Note: The fake dependency is unnecessary for the slow path.
+}
+
+// Load the read barrier introspection entrypoint in register `entrypoint`.
+static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler,
+ vixl::aarch64::Register entrypoint) {
+ // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip0.GetCode(), 16u);
+ const int32_t entry_point_offset =
+ Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+ __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
+}
+
+void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
+ uint32_t encoded_data,
+ /*out*/ std::string* debug_name) {
+ BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+ switch (kind) {
+ case BakerReadBarrierKind::kField: {
+ auto base_reg =
+ Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(base_reg.GetCode());
+ auto holder_reg =
+ Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data));
+ CheckValidReg(holder_reg.GetCode());
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip0, ip1);
+ // If base_reg differs from holder_reg, the offset was too large and we must have emitted
+ // an explicit null check before the load. Otherwise, for implicit null checks, we need to
+ // null-check the holder as we do not necessarily do that check before going to the thunk.
+ vixl::aarch64::Label throw_npe_label;
+ vixl::aarch64::Label* throw_npe = nullptr;
+ if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
+ throw_npe = &throw_npe_label;
+ __ Cbz(holder_reg.W(), throw_npe);
+ }
+ // Check if the holder is gray and, if not, add fake dependency to the base register
+ // and return to the LDR instruction to load the reference. Otherwise, use introspection
+ // to load the reference and call the entrypoint that performs further checks on the
+ // reference and marks it if needed.
+ vixl::aarch64::Label slow_path;
+ MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe);
+ __ Bind(&slow_path);
+ MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
+ __ Ldr(ip0.W(), ldr_address); // Load the LDR (immediate) unsigned offset.
+ LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
+ __ Ubfx(ip0.W(), ip0.W(), 10, 12); // Extract the offset.
+ __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2)); // Load the reference.
+ // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
+ __ Br(ip1); // Jump to the entrypoint.
+ break;
+ }
+ case BakerReadBarrierKind::kArray: {
+ auto base_reg =
+ Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(base_reg.GetCode());
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip0, ip1);
+ vixl::aarch64::Label slow_path;
+ int32_t data_offset =
+ mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+ MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+ DCHECK_LT(lock_word.GetOffset(), 0);
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+ __ Bind(&slow_path);
+ MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+ __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset.
+ LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
+ __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set).
+ __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create
+ // a switch case target based on the index register.
+ __ Mov(ip0, base_reg); // Move the base register to ip0.
+ __ Br(ip1); // Jump to the entrypoint's array switch case.
+ break;
+ }
+ case BakerReadBarrierKind::kGcRoot: {
+ // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
+ // and it does not have a forwarding address), call the correct introspection entrypoint;
+ // otherwise return the reference (or the extracted forwarding address).
+ // There is no gray bit check for GC roots.
+ auto root_reg =
+ Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(root_reg.GetCode());
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip0, ip1);
+ vixl::aarch64::Label return_label, not_marked, forwarding_address;
+ __ Cbz(root_reg, &return_label);
+ MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value());
+ __ Ldr(ip0.W(), lock_word);
+ __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, &not_marked);
+ __ Bind(&return_label);
+ __ Br(lr);
+ __ Bind(&not_marked);
+ __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
+ __ B(&forwarding_address, mi);
+ LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
+ // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
+ // art_quick_read_barrier_mark_introspection_gc_roots.
+ __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
+ __ Mov(ip0.W(), root_reg);
+ __ Br(ip1);
+ __ Bind(&forwarding_address);
+ __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift);
+ __ Br(lr);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+ UNREACHABLE();
+ }
+
+ if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+ std::ostringstream oss;
+ oss << "BakerReadBarrierThunk";
+ switch (kind) {
+ case BakerReadBarrierKind::kField:
+ oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
+ << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
+ break;
+ case BakerReadBarrierKind::kArray:
+ oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ break;
+ case BakerReadBarrierKind::kGcRoot:
+ oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ break;
+ }
+ *debug_name = oss.str();
+ }
+}
+
+#undef __
+
} // namespace arm64
} // namespace art
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 0654046de5d..aa343b1185d 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
#include "arch/arm64/quick_method_frame_info_arm64.h"
+#include "base/bit_field.h"
#include "code_generator.h"
#include "common_arm64.h"
#include "dex/dex_file_types.h"
@@ -36,6 +37,11 @@
#pragma GCC diagnostic pop
namespace art {
+
+namespace linker {
+class Arm64RelativePatcherTest;
+} // namespace linker
+
namespace arm64 {
class CodeGeneratorARM64;
@@ -264,6 +270,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
private:
void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
vixl::aarch64::Register class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ vixl::aarch64::Register temp);
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void HandleBinaryOp(HBinaryOperation* instr);
@@ -273,6 +281,10 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void HandleCondition(HCondition* instruction);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
@@ -303,17 +315,6 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
uint32_t offset,
Location maybe_temp,
ReadBarrierOption read_barrier_option);
- // Generate a GC root reference load:
- //
- // root <- *(obj + offset)
- //
- // while honoring read barriers based on read_barrier_option.
- void GenerateGcRootFieldLoad(HInstruction* instruction,
- Location root,
- vixl::aarch64::Register obj,
- uint32_t offset,
- vixl::aarch64::Label* fixup_label,
- ReadBarrierOption read_barrier_option);
// Generate a floating-point comparison.
void GenerateFcmp(HInstruction* instruction);
@@ -561,7 +562,14 @@ class CodeGeneratorARM64 : public CodeGenerator {
UNIMPLEMENTED(FATAL);
}
- // Add a new PC-relative method patch for an instruction and return the label
+ // Add a new boot image relocation patch for an instruction and return the label
+ // to be bound before the instruction. The instruction will be either the
+ // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing
+ // to the associated ADRP patch label).
+ vixl::aarch64::Label* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+ vixl::aarch64::Label* adrp_label = nullptr);
+
+ // Add a new boot image method patch for an instruction and return the label
// to be bound before the instruction. The instruction will be either the
// ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
// to the associated ADRP patch label).
@@ -575,7 +583,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
vixl::aarch64::Label* NewMethodBssEntryPatch(MethodReference target_method,
vixl::aarch64::Label* adrp_label = nullptr);
- // Add a new PC-relative type patch for an instruction and return the label
+ // Add a new boot image type patch for an instruction and return the label
// to be bound before the instruction. The instruction will be either the
// ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
// to the associated ADRP patch label).
@@ -591,7 +599,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
dex::TypeIndex type_index,
vixl::aarch64::Label* adrp_label = nullptr);
- // Add a new PC-relative string patch for an instruction and return the label
+ // Add a new boot image string patch for an instruction and return the label
// to be bound before the instruction. The instruction will be either the
// ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
// to the associated ADRP patch label).
@@ -628,9 +636,24 @@ class CodeGeneratorARM64 : public CodeGenerator {
vixl::aarch64::Register base);
void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
+ bool NeedsThunkCode(const linker::LinkerPatch& patch) const OVERRIDE;
+ void EmitThunkCode(const linker::LinkerPatch& patch,
+ /*out*/ ArenaVector<uint8_t>* code,
+ /*out*/ std::string* debug_name) OVERRIDE;
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ // Generate a GC root reference load:
+ //
+ // root <- *(obj + offset)
+ //
+ // while honoring read barriers based on read_barrier_option.
+ void GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ vixl::aarch64::Register obj,
+ uint32_t offset,
+ vixl::aarch64::Label* fixup_label,
+ ReadBarrierOption read_barrier_option);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -765,6 +788,62 @@ class CodeGeneratorARM64 : public CodeGenerator {
void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
private:
+ // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
+
+ enum class BakerReadBarrierKind : uint8_t {
+ kField, // Field get or array get with constant offset (i.e. constant index).
+ kArray, // Array get with index in register.
+ kGcRoot, // GC root load.
+ kLast = kGcRoot
+ };
+
+ static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* sp/zr is invalid */ 31u;
+
+ static constexpr size_t kBitsForBakerReadBarrierKind =
+ MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
+ static constexpr size_t kBakerReadBarrierBitsForRegister =
+ MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg);
+ using BakerReadBarrierKindField =
+ BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
+ using BakerReadBarrierFirstRegField =
+ BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>;
+ using BakerReadBarrierSecondRegField =
+ BitField<uint32_t,
+ kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister,
+ kBakerReadBarrierBitsForRegister>;
+
+ static void CheckValidReg(uint32_t reg) {
+ DCHECK(reg < vixl::aarch64::lr.GetCode() &&
+ reg != vixl::aarch64::ip0.GetCode() &&
+ reg != vixl::aarch64::ip1.GetCode()) << reg;
+ }
+
+ static inline uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
+ CheckValidReg(base_reg);
+ CheckValidReg(holder_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
+ BakerReadBarrierFirstRegField::Encode(base_reg) |
+ BakerReadBarrierSecondRegField::Encode(holder_reg);
+ }
+
+ static inline uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+ CheckValidReg(base_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+ BakerReadBarrierFirstRegField::Encode(base_reg) |
+ BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg);
+ }
+
+ static inline uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
+ CheckValidReg(root_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
+ BakerReadBarrierFirstRegField::Encode(root_reg) |
+ BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg);
+ }
+
+ void CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
+ uint32_t encoded_data,
+ /*out*/ std::string* debug_name);
+
using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
using StringToLiteralMap = ArenaSafeMap<StringReference,
@@ -820,7 +899,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
Uint32ToLiteralMap uint32_literals_;
// Deduplication map for 64-bit literals, used for non-patchable method address or method code.
Uint64ToLiteralMap uint64_literals_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/BootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -828,7 +908,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
// PC-relative String patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
@@ -840,6 +920,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
// Patches for class literals in JIT compiled code.
TypeToLiteralMap jit_class_patches_;
+ friend class linker::Arm64RelativePatcherTest;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
};
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 2f495fc15fd..7350b146f95 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -29,7 +29,6 @@
#include "gc/accounting/card_table.h"
#include "heap_poisoning.h"
#include "intrinsics_arm_vixl.h"
-#include "linker/arm/relative_patcher_thumb2.h"
#include "linker/linker_patch.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
@@ -94,9 +93,6 @@ constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
-// The reserved entrypoint register for link-time generated thunks.
-const vixl32::Register kBakerCcEntrypointRegister = r4;
-
// Using a base helps identify when we hit Marking Register check breakpoints.
constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
@@ -116,8 +112,6 @@ static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope
DCHECK(temps->IsAvailable(ip));
temps->Exclude(ip);
DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister));
- DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(),
- linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister);
DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp(
instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister));
@@ -2422,6 +2416,80 @@ void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
FixJumpTables();
GetAssembler()->FinalizeCode();
CodeGenerator::Finalize(allocator);
+
+ // Verify Baker read barrier linker patches.
+ if (kIsDebugBuild) {
+ ArrayRef<const uint8_t> code = allocator->GetMemory();
+ for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+ DCHECK(info.label.IsBound());
+ uint32_t literal_offset = info.label.GetLocation();
+ DCHECK_ALIGNED(literal_offset, 2u);
+
+ auto GetInsn16 = [&code](uint32_t offset) {
+ DCHECK_ALIGNED(offset, 2u);
+ return (static_cast<uint32_t>(code[offset + 0]) << 0) +
+ (static_cast<uint32_t>(code[offset + 1]) << 8);
+ };
+ auto GetInsn32 = [=](uint32_t offset) {
+ return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0);
+ };
+
+ uint32_t encoded_data = info.custom_data;
+ BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+ // Check that the next instruction matches the expected LDR.
+ switch (kind) {
+ case BakerReadBarrierKind::kField: {
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ if (width == BakerReadBarrierWidth::kWide) {
+ DCHECK_GE(code.size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn32(literal_offset + 4u);
+ // LDR (immediate), encoding T3, with correct base_reg.
+ CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
+ } else {
+ DCHECK_GE(code.size() - literal_offset, 6u);
+ uint32_t next_insn = GetInsn16(literal_offset + 4u);
+ // LDR (immediate), encoding T1, with correct base_reg.
+ CheckValidReg(next_insn & 0x7u); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
+ }
+ break;
+ }
+ case BakerReadBarrierKind::kArray: {
+ DCHECK_GE(code.size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn32(literal_offset + 4u);
+ // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
+ CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
+ CheckValidReg(next_insn & 0xf); // Check index register
+ break;
+ }
+ case BakerReadBarrierKind::kGcRoot: {
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ if (width == BakerReadBarrierWidth::kWide) {
+ DCHECK_GE(literal_offset, 4u);
+ uint32_t prev_insn = GetInsn32(literal_offset - 4u);
+ // LDR (immediate), encoding T3, with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
+ } else {
+ DCHECK_GE(literal_offset, 2u);
+ uint32_t prev_insn = GetInsn16(literal_offset - 2u);
+ // LDR (immediate), encoding T1, with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+ UNREACHABLE();
+ }
+ }
+ }
}
void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
@@ -4690,6 +4758,299 @@ void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ case DataType::Type::kFloat32:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ vixl32::Register op1 = RegisterFrom(op1_loc);
+ vixl32::Register op2 = RegisterFrom(op2_loc);
+ vixl32::Register out = RegisterFrom(out_loc);
+
+ __ Cmp(op1, op2);
+
+ {
+ ExactAssemblyScope aas(GetVIXLAssembler(),
+ 3 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+
+ __ ite(is_min ? lt : gt);
+ __ mov(is_min ? lt : gt, out, op1);
+ __ mov(is_min ? ge : le, out, op2);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
+ return;
+ }
+
+ vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
+ vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
+ vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
+ vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
+ vixl32::Register out_lo = LowRegisterFrom(out_loc);
+ vixl32::Register out_hi = HighRegisterFrom(out_loc);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ const vixl32::Register temp = temps.Acquire();
+
+ DCHECK(op1_lo.Is(out_lo));
+ DCHECK(op1_hi.Is(out_hi));
+
+ // Compare op1 >= op2, or op1 < op2.
+ __ Cmp(out_lo, op2_lo);
+ __ Sbcs(temp, out_hi, op2_hi);
+
+ // Now GE/LT condition code is correct for the long comparison.
+ {
+ vixl32::ConditionType cond = is_min ? ge : lt;
+ ExactAssemblyScope it_scope(GetVIXLAssembler(),
+ 3 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ itt(cond);
+ __ mov(cond, out_lo, op2_lo);
+ __ mov(cond, out_hi, op2_hi);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) {
+ LocationSummary* locations = minmax->GetLocations();
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
+ return;
+ }
+
+ vixl32::SRegister op1 = SRegisterFrom(op1_loc);
+ vixl32::SRegister op2 = SRegisterFrom(op2_loc);
+ vixl32::SRegister out = SRegisterFrom(out_loc);
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ const vixl32::Register temp1 = temps.Acquire();
+ vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0));
+ vixl32::Label nan, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
+
+ DCHECK(op1.Is(out));
+
+ __ Vcmp(op1, op2);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling.
+
+ // op1 <> op2
+ vixl32::ConditionType cond = is_min ? gt : lt;
+ {
+ ExactAssemblyScope it_scope(GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(cond);
+ __ vmov(cond, F32, out, op2);
+ }
+ // for <>(not equal), we've done min/max calculation.
+ __ B(ne, final_label, /* far_target */ false);
+
+ // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
+ __ Vmov(temp1, op1);
+ __ Vmov(temp2, op2);
+ if (is_min) {
+ __ Orr(temp1, temp1, temp2);
+ } else {
+ __ And(temp1, temp1, temp2);
+ }
+ __ Vmov(out, temp1);
+ __ B(final_label);
+
+ // handle NaN input.
+ __ Bind(&nan);
+ __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
+ __ Vmov(out, temp1);
+
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) {
+ LocationSummary* locations = minmax->GetLocations();
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
+ return;
+ }
+
+ vixl32::DRegister op1 = DRegisterFrom(op1_loc);
+ vixl32::DRegister op2 = DRegisterFrom(op2_loc);
+ vixl32::DRegister out = DRegisterFrom(out_loc);
+ vixl32::Label handle_nan_eq, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
+
+ DCHECK(op1.Is(out));
+
+ __ Vcmp(op1, op2);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling.
+
+ // op1 <> op2
+ vixl32::ConditionType cond = is_min ? gt : lt;
+ {
+ ExactAssemblyScope it_scope(GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(cond);
+ __ vmov(cond, F64, out, op2);
+ }
+ // for <>(not equal), we've done min/max calculation.
+ __ B(ne, final_label, /* far_target */ false);
+
+ // handle op1 == op2, max(+0.0,-0.0).
+ if (!is_min) {
+ __ Vand(F64, out, op1, op2);
+ __ B(final_label);
+ }
+
+ // handle op1 == op2, min(+0.0,-0.0), NaN input.
+ __ Bind(&handle_nan_eq);
+ __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
+
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min);
+ break;
+ case DataType::Type::kInt64:
+ GenerateMinMaxLong(minmax->GetLocations(), is_min);
+ break;
+ case DataType::Type::kFloat32:
+ GenerateMinMaxFloat(minmax, is_min);
+ break;
+ case DataType::Type::kFloat64:
+ GenerateMinMaxDouble(minmax, is_min);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderARMVIXL::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = abs->GetLocations();
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32: {
+ vixl32::Register in_reg = RegisterFrom(locations->InAt(0));
+ vixl32::Register out_reg = RegisterFrom(locations->Out());
+ vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
+ __ Asr(mask, in_reg, 31);
+ __ Add(out_reg, in_reg, mask);
+ __ Eor(out_reg, out_reg, mask);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ Location in = locations->InAt(0);
+ vixl32::Register in_reg_lo = LowRegisterFrom(in);
+ vixl32::Register in_reg_hi = HighRegisterFrom(in);
+ Location output = locations->Out();
+ vixl32::Register out_reg_lo = LowRegisterFrom(output);
+ vixl32::Register out_reg_hi = HighRegisterFrom(output);
+ DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
+ vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
+ __ Asr(mask, in_reg_hi, 31);
+ __ Adds(out_reg_lo, in_reg_lo, mask);
+ __ Adc(out_reg_hi, in_reg_hi, mask);
+ __ Eor(out_reg_lo, out_reg_lo, mask);
+ __ Eor(out_reg_hi, out_reg_hi, mask);
+ break;
+ }
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0));
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
+ }
+}
void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
@@ -7033,7 +7394,7 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -7120,11 +7481,11 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
DCHECK(!cls->MustGenerateClinitCheck());
// /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
vixl32::Register current_method = InputRegisterAt(cls, 0);
- GenerateGcRootFieldLoad(cls,
- out_loc,
- current_method,
- ArtMethod::DeclaringClassOffset().Int32Value(),
- read_barrier_option);
+ codegen_->GenerateGcRootFieldLoad(cls,
+ out_loc,
+ current_method,
+ ArtMethod::DeclaringClassOffset().Int32Value(),
+ read_barrier_option);
break;
}
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
@@ -7143,25 +7504,19 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
__ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
break;
}
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+ codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(cls));
codegen_->EmitMovwMovtPlaceholder(labels, out);
__ Ldr(out, MemOperand(out, /* offset */ 0));
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ Sub(out, out, Operand(masked_hash));
- }
break;
}
case HLoadClass::LoadKind::kBssEntry: {
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
codegen_->EmitMovwMovtPlaceholder(labels, out);
- GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
+ codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
generate_null_check = true;
break;
}
@@ -7170,7 +7525,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
cls->GetTypeIndex(),
cls->GetClass()));
// /* GcRoot<mirror::Class> */ out = *out
- GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
+ codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
break;
}
case HLoadClass::LoadKind::kRuntimeCall:
@@ -7236,11 +7591,72 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
__ Bind(slow_path->GetExitLabel());
}
+void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare(
+ HTypeCheckInstruction* check,
+ vixl32::Register temp,
+ vixl32::FlagsUpdate flags_update) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs
+ // the Z flag for BNE. This is indicated by the `flags_update` parameter.
+ if (mask_bits == 16u) {
+ // Load only the bitstring part of the status word.
+ __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
+ // Check if the bitstring bits are equal to `path_to_root`.
+ if (flags_update == SetFlags) {
+ __ Cmp(temp, path_to_root);
+ } else {
+ __ Sub(temp, temp, path_to_root);
+ }
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
+ if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) {
+ // Compare the bitstring bits using SUB.
+ __ Sub(temp, temp, path_to_root);
+ // Shift out bits that do not contribute to the comparison.
+ __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+ } else if (IsUint<16>(path_to_root)) {
+ if (temp.IsLow()) {
+ // Note: Optimized for size but contains one more dependent instruction than necessary.
+ // MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the
+ // macro assembler would use the high reg IP for the constant by default.
+ // Compare the bitstring bits using SUB.
+ __ Sub(temp, temp, path_to_root & 0x00ffu); // 16-bit SUB (immediate) T2
+ __ Sub(temp, temp, path_to_root & 0xff00u); // 32-bit SUB (immediate) T3
+ // Shift out bits that do not contribute to the comparison.
+ __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+ } else {
+ // Extract the bitstring bits.
+ __ Ubfx(temp, temp, 0, mask_bits);
+ // Check if the bitstring bits are equal to `path_to_root`.
+ if (flags_update == SetFlags) {
+ __ Cmp(temp, path_to_root);
+ } else {
+ __ Sub(temp, temp, path_to_root);
+ }
+ }
+ } else {
+ // Shift out bits that do not contribute to the comparison.
+ __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+ // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`.
+ if (flags_update == SetFlags) {
+ __ Cmp(temp, path_to_root << (32u - mask_bits));
+ } else {
+ __ Sub(temp, temp, path_to_root << (32u - mask_bits));
+ }
+ }
+ }
+}
+
HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -7304,10 +7720,10 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
__ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
return;
}
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
+ codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(load));
codegen_->EmitMovwMovtPlaceholder(labels, out);
__ Ldr(out, MemOperand(out, /* offset */ 0));
return;
@@ -7317,7 +7733,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
codegen_->EmitMovwMovtPlaceholder(labels, out);
- GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
+ codegen_->GenerateGcRootFieldLoad(
+ load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
LoadStringSlowPathARMVIXL* slow_path =
new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
codegen_->AddSlowPath(slow_path);
@@ -7331,7 +7748,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
load->GetStringIndex(),
load->GetString()));
// /* GcRoot<mirror::String> */ out = *out
- GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
+ codegen_->GenerateGcRootFieldLoad(
+ load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
return;
}
default:
@@ -7427,6 +7845,8 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -7435,7 +7855,13 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// The "out" register is used as a temporary, so it overlaps with the inputs.
// Note that TypeCheckSlowPathARM uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -7450,7 +7876,9 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
vixl32::Register obj = InputRegisterAt(instruction, 0);
- vixl32::Register cls = InputRegisterAt(instruction, 1);
+ vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? vixl32::Register()
+ : InputRegisterAt(instruction, 1);
Location out_loc = locations->Out();
vixl32::Register out = OutputRegister(instruction);
const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -7690,6 +8118,26 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
__ B(slow_path->GetEntryLabel());
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out, DontCare);
+ // If `out` is a low reg and we would have another low reg temp, we could
+ // optimize this as RSBS+ADC, see GenerateConditionWithZero().
+ //
+ // Also, in some cases when `out` is a low reg and we're loading a constant to IP
+ // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size
+ // would be the same and we would have fewer direct data dependencies.
+ codegen_->GenerateConditionWithZero(kCondEQ, out, out); // CLZ+LSR
+ break;
+ }
}
if (done.IsReferenced()) {
@@ -7707,7 +8155,13 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -7716,7 +8170,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
vixl32::Register obj = InputRegisterAt(instruction, 0);
- vixl32::Register cls = InputRegisterAt(instruction, 1);
+ vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? vixl32::Register()
+ : InputRegisterAt(instruction, 1);
Location temp_loc = locations->GetTemp(0);
vixl32::Register temp = RegisterFrom(temp_loc);
const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -7901,6 +8357,20 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
__ B(ne, &start_loop, /* far_target */ false);
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags);
+ __ B(ne, type_check_slow_path->GetEntryLabel());
+ break;
+ }
}
if (done.IsReferenced()) {
__ Bind(&done);
@@ -8330,7 +8800,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
}
}
-void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
+void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
HInstruction* instruction,
Location root,
vixl32::Register obj,
@@ -8361,9 +8831,8 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
UseScratchRegisterScope temps(GetVIXLAssembler());
ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
- uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(
- root_reg.GetCode(), narrow);
- vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
+ uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
+ vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes);
vixl32::Label return_address;
@@ -8374,7 +8843,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
DCHECK_LT(offset, kReferenceLoadMinFarOffset);
ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
__ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
- EmitPlaceholderBne(codegen_, bne_label);
+ EmitPlaceholderBne(this, bne_label);
__ Bind(&return_address);
DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
@@ -8394,8 +8863,8 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
// Slow path marking the GC root `root`. The entrypoint will
// be loaded by the slow path code.
SlowPathCodeARMVIXL* slow_path =
- new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root);
- codegen_->AddSlowPath(slow_path);
+ new (GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root);
+ AddSlowPath(slow_path);
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
@@ -8416,7 +8885,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
// /* GcRoot<mirror::Object>* */ root = obj + offset
__ Add(root_reg, obj, offset);
// /* mirror::Object* */ root = root->Read()
- codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ GenerateReadBarrierForRootSlow(instruction, root, root);
}
} else {
// Plain GC root load with no read barrier.
@@ -8425,7 +8894,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
// Note that GC roots are not affected by heap poisoning, thus we
// do not have to unpoison `root_reg` here.
}
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18);
+ MaybeGenerateMarkingRegisterCheck(/* code */ 18);
}
void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
@@ -8486,8 +8955,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
}
UseScratchRegisterScope temps(GetVIXLAssembler());
ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
- uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
- base.GetCode(), obj.GetCode(), narrow);
+ uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow);
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
{
@@ -8573,8 +9041,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i
UseScratchRegisterScope temps(GetVIXLAssembler());
ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
- uint32_t custom_data =
- linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode());
+ uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
__ Add(data_reg, obj, Operand(data_offset));
@@ -8711,7 +9178,7 @@ void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction
void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
Location ref,
- vixl::aarch32::Register obj,
+ vixl32::Register obj,
uint32_t offset,
Location index,
ScaleFactor scale_factor,
@@ -8901,6 +9368,14 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress()));
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ uint32_t boot_image_offset = GetBootImageOffset(invoke);
+ PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset);
+ vixl32::Register temp_reg = RegisterFrom(temp);
+ EmitMovwMovtPlaceholder(labels, temp_reg);
+ GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
PcRelativePatchInfo* labels = NewMethodBssEntryPatch(
MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
@@ -8998,6 +9473,13 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(
}
}
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch(
+ uint32_t boot_image_offset) {
+ return NewPcRelativePatch(/* dex_file */ nullptr,
+ boot_image_offset,
+ &boot_image_method_patches_);
+}
+
CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch(
MethodReference target_method) {
return NewPcRelativePatch(
@@ -9036,7 +9518,7 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa
return &patches->back();
}
-vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) {
+vixl32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) {
baker_read_barrier_patches_.emplace_back(custom_data);
return &baker_read_barrier_patches_.back().label;
}
@@ -9088,6 +9570,14 @@ inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
}
}
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null.
+ return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -9107,11 +9597,10 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -9126,6 +9615,45 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l
DCHECK_EQ(size, linker_patches->size());
}
+bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const {
+ return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
+ patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
+}
+
+void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
+ /*out*/ ArenaVector<uint8_t>* code,
+ /*out*/ std::string* debug_name) {
+ arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator());
+ switch (patch.GetType()) {
+ case linker::LinkerPatch::Type::kCallRelative:
+ // The thunk just uses the entry point in the ArtMethod. This works even for calls
+ // to the generic JNI and interpreter trampolines.
+ assembler.LoadFromOffset(
+ arm::kLoadWord,
+ vixl32::pc,
+ vixl32::r0,
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
+ assembler.GetVIXLAssembler()->Bkpt(0);
+ if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+ *debug_name = "MethodCallThunk";
+ }
+ break;
+ case linker::LinkerPatch::Type::kBakerReadBarrierBranch:
+ DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
+ CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected patch type " << patch.GetType();
+ UNREACHABLE();
+ }
+
+ // Ensure we emit the literal pool if any.
+ assembler.FinalizeCode();
+ code->resize(assembler.CodeSize());
+ MemoryRegion code_region(code->data(), code->size());
+ assembler.FinalizeInstructions(code_region);
+}
+
VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
uint32_t value,
Uint32ToLiteralMap* map) {
@@ -9370,5 +9898,211 @@ void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
#undef QUICK_ENTRY_POINT
#undef TODO_VIXL32
+#define __ assembler.GetVIXLAssembler()->
+
+static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler,
+ vixl32::Register base_reg,
+ vixl32::MemOperand& lock_word,
+ vixl32::Label* slow_path,
+ int32_t raw_ldr_offset,
+ vixl32::Label* throw_npe = nullptr) {
+ // Load the lock word containing the rb_state.
+ __ Ldr(ip, lock_word);
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
+ __ B(ne, slow_path, /* is_far_target */ false);
+ // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
+ if (throw_npe != nullptr) {
+ __ Bind(throw_npe);
+ }
+ __ Add(lr, lr, raw_ldr_offset);
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
+ __ Bx(lr); // And return back to the function.
+ // Note: The fake dependency is unnecessary for the slow path.
+}
+
+// Load the read barrier introspection entrypoint in register `entrypoint`
+static void LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler,
+ vixl32::Register entrypoint) {
+ // The register where the read barrier introspection entrypoint is loaded
+ // is fixed: `kBakerCcEntrypointRegister` (R4).
+ DCHECK(entrypoint.Is(kBakerCcEntrypointRegister));
+ // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip.GetCode(), 12u);
+ const int32_t entry_point_offset =
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
+}
+
+void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
+ uint32_t encoded_data,
+ /*out*/ std::string* debug_name) {
+ BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+ switch (kind) {
+ case BakerReadBarrierKind::kField: {
+ vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(base_reg.GetCode());
+ vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
+ CheckValidReg(holder_reg.GetCode());
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip);
+ // If base_reg differs from holder_reg, the offset was too large and we must have emitted
+ // an explicit null check before the load. Otherwise, for implicit null checks, we need to
+ // null-check the holder as we do not necessarily do that check before going to the thunk.
+ vixl32::Label throw_npe_label;
+ vixl32::Label* throw_npe = nullptr;
+ if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
+ throw_npe = &throw_npe_label;
+ __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target */ false);
+ }
+ // Check if the holder is gray and, if not, add fake dependency to the base register
+ // and return to the LDR instruction to load the reference. Otherwise, use introspection
+ // to load the reference and call the entrypoint that performs further checks on the
+ // reference and marks it if needed.
+ vixl32::Label slow_path;
+ MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
+ const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
+ ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
+ EmitGrayCheckAndFastPath(
+ assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe);
+ __ Bind(&slow_path);
+ const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
+ raw_ldr_offset;
+ vixl32::Register ep_reg(kBakerCcEntrypointRegister);
+ LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+ if (width == BakerReadBarrierWidth::kWide) {
+ MemOperand ldr_half_address(lr, ldr_offset + 2);
+ __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
+ __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12.
+ __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference.
+ } else {
+ MemOperand ldr_address(lr, ldr_offset);
+ __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1.
+ __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint
+ ep_reg, // for narrow LDR.
+ Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
+ __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4.
+ __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference.
+ }
+ // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
+ __ Bx(ep_reg); // Jump to the entrypoint.
+ break;
+ }
+ case BakerReadBarrierKind::kArray: {
+ vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(base_reg.GetCode());
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip);
+ vixl32::Label slow_path;
+ int32_t data_offset =
+ mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+ MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+ DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
+ const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
+ __ Bind(&slow_path);
+ const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
+ raw_ldr_offset;
+ MemOperand ldr_address(lr, ldr_offset + 2);
+ __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm",
+ // i.e. Rm+32 because the scale in imm2 is 2.
+ vixl32::Register ep_reg(kBakerCcEntrypointRegister);
+ LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+ __ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create
+ // a switch case target based on the index register.
+ __ Mov(ip, base_reg); // Move the base register to ip0.
+ __ Bx(ep_reg); // Jump to the entrypoint's array switch case.
+ break;
+ }
+ case BakerReadBarrierKind::kGcRoot: {
+ // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
+ // and it does not have a forwarding address), call the correct introspection entrypoint;
+ // otherwise return the reference (or the extracted forwarding address).
+ // There is no gray bit check for GC roots.
+ vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(root_reg.GetCode());
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip);
+ vixl32::Label return_label, not_marked, forwarding_address;
+ __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target */ false);
+ MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
+ __ Ldr(ip, lock_word);
+ __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
+ __ B(eq, &not_marked);
+ __ Bind(&return_label);
+ __ Bx(lr);
+ __ Bind(&not_marked);
+ static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
+ "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
+ " the highest bits and the 'forwarding address' state to have all bits set");
+ __ Cmp(ip, Operand(0xc0000000));
+ __ B(hs, &forwarding_address);
+ vixl32::Register ep_reg(kBakerCcEntrypointRegister);
+ LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+ // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
+ // to art_quick_read_barrier_mark_introspection_gc_roots.
+ int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide)
+ ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET;
+ __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
+ __ Mov(ip, root_reg);
+ __ Bx(ep_reg);
+ __ Bind(&forwarding_address);
+ __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
+ __ Bx(lr);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+ UNREACHABLE();
+ }
+
+ if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+ std::ostringstream oss;
+ oss << "BakerReadBarrierThunk";
+ switch (kind) {
+ case BakerReadBarrierKind::kField:
+ oss << "Field";
+ if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
+ oss << "Wide";
+ }
+ oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
+ << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
+ break;
+ case BakerReadBarrierKind::kArray:
+ oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
+ break;
+ case BakerReadBarrierKind::kGcRoot:
+ oss << "GcRoot";
+ if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
+ oss << "Wide";
+ }
+ oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ break;
+ }
+ *debug_name = oss.str();
+ }
+}
+
+#undef __
+
} // namespace arm
} // namespace art
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 536da41d07f..6b9919ab15e 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -36,6 +36,11 @@
#pragma GCC diagnostic pop
namespace art {
+
+namespace linker {
+class Thumb2RelativePatcherTest;
+} // namespace linker
+
namespace arm {
// This constant is used as an approximate margin when emission of veneer and literal pools
@@ -108,6 +113,9 @@ static const vixl::aarch32::SRegister kRuntimeParameterFpuRegistersVIXL[] = {
static const size_t kRuntimeParameterFpuRegistersLengthVIXL =
arraysize(kRuntimeParameterFpuRegistersVIXL);
+// The reserved entrypoint register for link-time generated thunks.
+const vixl::aarch32::Register kBakerCcEntrypointRegister = vixl32::r4;
+
class LoadClassSlowPathARMVIXL;
class CodeGeneratorARMVIXL;
@@ -322,6 +330,9 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path,
vixl32::Register class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ vixl::aarch32::Register temp,
+ vixl::aarch32::FlagsUpdate flags_update);
void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
@@ -349,6 +360,12 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min);
+ void GenerateMinMaxLong(LocationSummary* locations, bool is_min);
+ void GenerateMinMaxFloat(HInstruction* minmax, bool is_min);
+ void GenerateMinMaxDouble(HInstruction* minmax, bool is_min);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
@@ -379,16 +396,6 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
uint32_t offset,
Location maybe_temp,
ReadBarrierOption read_barrier_option);
- // Generate a GC root reference load:
- //
- // root <- *(obj + offset)
- //
- // while honoring read barriers based on read_barrier_option.
- void GenerateGcRootFieldLoad(HInstruction* instruction,
- Location root,
- vixl::aarch32::Register obj,
- uint32_t offset,
- ReadBarrierOption read_barrier_option);
void GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
vixl::aarch32::Label* true_target,
@@ -574,6 +581,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
vixl::aarch32::Label add_pc_label;
};
+ PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset);
PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method);
PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method);
PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
@@ -596,6 +604,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
Handle<mirror::Class> handle);
void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
+ bool NeedsThunkCode(const linker::LinkerPatch& patch) const OVERRIDE;
+ void EmitThunkCode(const linker::LinkerPatch& patch,
+ /*out*/ ArenaVector<uint8_t>* code,
+ /*out*/ std::string* debug_name) OVERRIDE;
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
@@ -603,6 +615,16 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
// is added only for AOT compilation if link-time generated thunks for fields are enabled.
void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
+ // Generate a GC root reference load:
+ //
+ // root <- *(obj + offset)
+ //
+ // while honoring read barriers based on read_barrier_option.
+ void GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ vixl::aarch32::Register obj,
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -757,6 +779,83 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
vixl::aarch32::Register temp = vixl32::Register());
private:
+ // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
+
+ enum class BakerReadBarrierKind : uint8_t {
+ kField, // Field get or array get with constant offset (i.e. constant index).
+ kArray, // Array get with index in register.
+ kGcRoot, // GC root load.
+ kLast = kGcRoot
+ };
+
+ enum class BakerReadBarrierWidth : uint8_t {
+ kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled).
+ kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled).
+ kLast = kNarrow
+ };
+
+ static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* pc is invalid */ 15u;
+
+ static constexpr size_t kBitsForBakerReadBarrierKind =
+ MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
+ static constexpr size_t kBakerReadBarrierBitsForRegister =
+ MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg);
+ using BakerReadBarrierKindField =
+ BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
+ using BakerReadBarrierFirstRegField =
+ BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>;
+ using BakerReadBarrierSecondRegField =
+ BitField<uint32_t,
+ kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister,
+ kBakerReadBarrierBitsForRegister>;
+ static constexpr size_t kBitsForBakerReadBarrierWidth =
+ MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast));
+ using BakerReadBarrierWidthField =
+ BitField<BakerReadBarrierWidth,
+ kBitsForBakerReadBarrierKind + 2 * kBakerReadBarrierBitsForRegister,
+ kBitsForBakerReadBarrierWidth>;
+
+ static void CheckValidReg(uint32_t reg) {
+ DCHECK(reg < vixl::aarch32::ip.GetCode() && reg != kBakerCcEntrypointRegister.GetCode()) << reg;
+ }
+
+ static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg,
+ uint32_t holder_reg,
+ bool narrow) {
+ CheckValidReg(base_reg);
+ CheckValidReg(holder_reg);
+ DCHECK(!narrow || base_reg < 8u) << base_reg;
+ BakerReadBarrierWidth width =
+ narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
+ BakerReadBarrierFirstRegField::Encode(base_reg) |
+ BakerReadBarrierSecondRegField::Encode(holder_reg) |
+ BakerReadBarrierWidthField::Encode(width);
+ }
+
+ static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+ CheckValidReg(base_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+ BakerReadBarrierFirstRegField::Encode(base_reg) |
+ BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) |
+ BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide);
+ }
+
+ static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) {
+ CheckValidReg(root_reg);
+ DCHECK(!narrow || root_reg < 8u) << root_reg;
+ BakerReadBarrierWidth width =
+ narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
+ BakerReadBarrierFirstRegField::Encode(root_reg) |
+ BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) |
+ BakerReadBarrierWidthField::Encode(width);
+ }
+
+ void CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
+ uint32_t encoded_data,
+ /*out*/ std::string* debug_name);
+
vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
vixl::aarch32::Register temp);
@@ -798,7 +897,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
// Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
Uint32ToLiteralMap uint32_literals_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -806,7 +906,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
// PC-relative String patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
@@ -818,6 +918,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
// Patches for class literals in JIT compiled code.
TypeToLiteralMap jit_class_patches_;
+ friend class linker::Thumb2RelativePatcherTest;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL);
};
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 87e6d6834b7..25e2eddbfab 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1597,6 +1597,14 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches(
}
}
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null.
+ return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -1615,11 +1623,10 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -1630,6 +1637,13 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link
DCHECK_EQ(size, linker_patches->size());
}
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageRelRoPatch(
+ uint32_t boot_image_offset,
+ const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ /* dex_file */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_);
+}
+
CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageMethodPatch(
MethodReference target_method,
const PcRelativePatchInfo* info_high) {
@@ -1936,6 +1950,34 @@ void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCode
__ Bind(slow_path->GetExitLabel());
}
+void InstructionCodeGeneratorMIPS::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ Register temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if (mask_bits == 16u) {
+ // Load only the bitstring part of the status word.
+ __ LoadFromOffset(
+ kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value());
+ // Compare the bitstring bits using XOR.
+ __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value());
+ // Compare the bitstring bits using XOR.
+ if (IsUint<16>(path_to_root)) {
+ __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+ } else {
+ __ LoadConst32(TMP, path_to_root);
+ __ Xor(temp, temp, TMP);
+ }
+ // Shift out bits that do not contribute to the comparison.
+ __ Sll(temp, temp, 32 - mask_bits);
+ }
+}
+
void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
__ Sync(0); // Only stype 0 is supported.
}
@@ -3287,7 +3329,13 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -3296,7 +3344,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
- Register cls = locations->InAt(1).AsRegister<Register>();
+ Location cls = locations->InAt(1);
Location temp_loc = locations->GetTemp(0);
Register temp = temp_loc.AsRegister<Register>();
const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -3335,7 +3383,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
kWithoutReadBarrier);
// Jump to slow path for throwing the exception or doing a
// more involved array check.
- __ Bne(temp, cls, slow_path->GetEntryLabel());
+ __ Bne(temp, cls.AsRegister<Register>(), slow_path->GetEntryLabel());
break;
}
@@ -3361,7 +3409,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
// exception.
__ Beqz(temp, slow_path->GetEntryLabel());
// Otherwise, compare the classes.
- __ Bne(temp, cls, &loop);
+ __ Bne(temp, cls.AsRegister<Register>(), &loop);
break;
}
@@ -3376,7 +3424,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
// Walk over the class hierarchy to find a match.
MipsLabel loop;
__ Bind(&loop);
- __ Beq(temp, cls, &done);
+ __ Beq(temp, cls.AsRegister<Register>(), &done);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction,
temp_loc,
@@ -3399,7 +3447,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
maybe_temp2_loc,
kWithoutReadBarrier);
// Do an exact check.
- __ Beq(temp, cls, &done);
+ __ Beq(temp, cls.AsRegister<Register>(), &done);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
GenerateReferenceLoadOneRegister(instruction,
@@ -3458,7 +3506,21 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
// Go to next interface.
__ Addiu(TMP, TMP, -2);
// Compare the classes and continue the loop if they do not match.
- __ Bne(AT, cls, &loop);
+ __ Bne(AT, cls.AsRegister<Register>(), &loop);
+ break;
+ }
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ Bnez(temp, slow_path->GetEntryLabel());
break;
}
}
@@ -7401,6 +7463,8 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -7409,7 +7473,13 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// The output does overlap inputs.
// Note that TypeCheckSlowPathMIPS uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -7421,7 +7491,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
- Register cls = locations->InAt(1).AsRegister<Register>();
+ Location cls = locations->InAt(1);
Location out_loc = locations->Out();
Register out = out_loc.AsRegister<Register>();
const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -7453,7 +7523,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
maybe_temp_loc,
read_barrier_option);
// Classes must be equal for the instanceof to succeed.
- __ Xor(out, out, cls);
+ __ Xor(out, out, cls.AsRegister<Register>());
__ Sltiu(out, out, 1);
break;
}
@@ -7480,7 +7550,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
read_barrier_option);
// If `out` is null, we use it for the result, and jump to `done`.
__ Beqz(out, &done);
- __ Bne(out, cls, &loop);
+ __ Bne(out, cls.AsRegister<Register>(), &loop);
__ LoadConst32(out, 1);
break;
}
@@ -7498,7 +7568,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
// Walk over the class hierarchy to find a match.
MipsLabel loop, success;
__ Bind(&loop);
- __ Beq(out, cls, &success);
+ __ Beq(out, cls.AsRegister<Register>(), &success);
// /* HeapReference<Class> */ out = out->super_class_
GenerateReferenceLoadOneRegister(instruction,
out_loc,
@@ -7525,7 +7595,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
read_barrier_option);
// Do an exact check.
MipsLabel success;
- __ Beq(out, cls, &success);
+ __ Beq(out, cls.AsRegister<Register>(), &success);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ out = out->component_type_
GenerateReferenceLoadOneRegister(instruction,
@@ -7557,7 +7627,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS(
instruction, /* is_fatal */ false);
codegen_->AddSlowPath(slow_path);
- __ Bne(out, cls, slow_path->GetEntryLabel());
+ __ Bne(out, cls.AsRegister<Register>(), slow_path->GetEntryLabel());
__ LoadConst32(out, 1);
break;
}
@@ -7589,6 +7659,20 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
__ B(slow_path->GetEntryLabel());
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ __ Sltiu(out, out, 1);
+ break;
+ }
}
__ Bind(&done);
@@ -7725,7 +7809,7 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -7748,7 +7832,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -7835,6 +7919,15 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ uint32_t boot_image_offset = GetBootImageOffset(invoke);
+ PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset);
+ PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high);
+ Register temp_reg = temp.AsRegister<Register>();
+ EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg);
+ __ Lw(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label);
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
PcRelativePatchInfo* info_high = NewMethodBssEntryPatch(
MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
@@ -7956,7 +8049,7 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
// We need an extra register for PC-relative literals on R2.
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
case HLoadClass::LoadKind::kBootImageAddress:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
if (isR6) {
break;
@@ -8008,7 +8101,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
// We need an extra register for PC-relative literals on R2.
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
case HLoadClass::LoadKind::kBootImageAddress:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
base_or_current_method_reg =
(isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>();
@@ -8065,22 +8158,17 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
}
break;
}
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
CodeGeneratorMIPS::PcRelativePatchInfo* info_high =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+ codegen_->NewBootImageRelRoPatch(boot_image_offset);
CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
out,
base_or_current_method_reg);
__ Lw(out, out, /* placeholder */ 0x5678, &info_low->label);
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ Addiu(out, out, -masked_hash);
- }
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -8171,7 +8259,7 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
// We need an extra register for PC-relative literals on R2.
case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
if (isR6) {
break;
@@ -8223,7 +8311,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
// We need an extra register for PC-relative literals on R2.
case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
base_or_current_method_reg =
(isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>();
@@ -8259,12 +8347,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
}
return;
}
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
CodeGeneratorMIPS::PcRelativePatchInfo* info_high =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
+ codegen_->NewBootImageRelRoPatch(boot_image_offset);
CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
out,
base_or_current_method_reg);
@@ -8779,6 +8868,501 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ bool isR6,
+ DataType::Type type) {
+ if (isR6) {
+ // Some architectures, such as ARM and MIPS (prior to r6), have a
+ // conditional move instruction which only changes the target
+ // (output) register if the condition is true (MIPS prior to r6 had
+ // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions
+ // always change the target (output) register. If the condition is
+ // true the output register gets the contents of the "rs" register;
+ // otherwise, the output register is set to zero. One consequence
+ // of this is that to implement something like "rd = c==0 ? rs : rt"
+ // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions.
+ // After executing this pair of instructions one of the output
+ // registers from the pair will necessarily contain zero. Then the
+ // code ORs the output registers from the SELEQZ/SELNEZ instructions
+ // to get the final result.
+ //
+ // The initial test to see if the output register is same as the
+ // first input register is needed to make sure that value in the
+ // first input register isn't clobbered before we've finished
+ // computing the output value. The logic in the corresponding else
+ // clause performs the same task but makes sure the second input
+ // register isn't clobbered in the event that it's the same register
+ // as the output register; the else clause also handles the case
+ // where the output register is distinct from both the first, and the
+ // second input registers.
+ if (type == DataType::Type::kInt64) {
+ Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
+ Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
+ Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+ Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+ MipsLabel compare_done;
+
+ if (a_lo == b_lo) {
+ if (out_lo != a_lo) {
+ __ Move(out_lo, a_lo);
+ __ Move(out_hi, a_hi);
+ }
+ } else {
+ __ Slt(TMP, b_hi, a_hi);
+ __ Bne(b_hi, a_hi, &compare_done);
+
+ __ Sltu(TMP, b_lo, a_lo);
+
+ __ Bind(&compare_done);
+
+ if (is_min) {
+ __ Seleqz(AT, a_lo, TMP);
+ __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo
+ // because at this point we're
+ // done using a_lo/b_lo.
+ } else {
+ __ Selnez(AT, a_lo, TMP);
+ __ Seleqz(out_lo, b_lo, TMP); // ditto
+ }
+ __ Or(out_lo, out_lo, AT);
+ if (is_min) {
+ __ Seleqz(AT, a_hi, TMP);
+ __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
+ } else {
+ __ Selnez(AT, a_hi, TMP);
+ __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
+ }
+ __ Or(out_hi, out_hi, AT);
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ Register a = locations->InAt(0).AsRegister<Register>();
+ Register b = locations->InAt(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ if (a == b) {
+ if (out != a) {
+ __ Move(out, a);
+ }
+ } else {
+ __ Slt(AT, b, a);
+ if (is_min) {
+ __ Seleqz(TMP, a, AT);
+ __ Selnez(AT, b, AT);
+ } else {
+ __ Selnez(TMP, a, AT);
+ __ Seleqz(AT, b, AT);
+ }
+ __ Or(out, TMP, AT);
+ }
+ }
+ } else { // !isR6
+ if (type == DataType::Type::kInt64) {
+ Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
+ Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
+ Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+ Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+ MipsLabel compare_done;
+
+ if (a_lo == b_lo) {
+ if (out_lo != a_lo) {
+ __ Move(out_lo, a_lo);
+ __ Move(out_hi, a_hi);
+ }
+ } else {
+ __ Slt(TMP, a_hi, b_hi);
+ __ Bne(a_hi, b_hi, &compare_done);
+
+ __ Sltu(TMP, a_lo, b_lo);
+
+ __ Bind(&compare_done);
+
+ if (is_min) {
+ if (out_lo != a_lo) {
+ __ Movn(out_hi, a_hi, TMP);
+ __ Movn(out_lo, a_lo, TMP);
+ }
+ if (out_lo != b_lo) {
+ __ Movz(out_hi, b_hi, TMP);
+ __ Movz(out_lo, b_lo, TMP);
+ }
+ } else {
+ if (out_lo != a_lo) {
+ __ Movz(out_hi, a_hi, TMP);
+ __ Movz(out_lo, a_lo, TMP);
+ }
+ if (out_lo != b_lo) {
+ __ Movn(out_hi, b_hi, TMP);
+ __ Movn(out_lo, b_lo, TMP);
+ }
+ }
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ Register a = locations->InAt(0).AsRegister<Register>();
+ Register b = locations->InAt(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ if (a == b) {
+ if (out != a) {
+ __ Move(out, a);
+ }
+ } else {
+ __ Slt(AT, a, b);
+ if (is_min) {
+ if (out != a) {
+ __ Movn(out, a, AT);
+ }
+ if (out != b) {
+ __ Movz(out, b, AT);
+ }
+ } else {
+ if (out != a) {
+ __ Movz(out, a, AT);
+ }
+ if (out != b) {
+ __ Movn(out, b, AT);
+ }
+ }
+ }
+ }
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ bool isR6,
+ DataType::Type type) {
+ FRegister out = locations->Out().AsFpuRegister<FRegister>();
+ FRegister a = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister b = locations->InAt(1).AsFpuRegister<FRegister>();
+
+ if (isR6) {
+ MipsLabel noNaNs;
+ MipsLabel done;
+ FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
+
+ // When Java computes min/max it prefers a NaN to a number; the
+ // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
+ // the inputs is a NaN and the other is a valid number, the MIPS
+ // instruction will return the number; Java wants the NaN value
+ // returned. This is why there is extra logic preceding the use of
+ // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
+ // NaN, return the NaN, otherwise return the min/max.
+ if (type == DataType::Type::kFloat64) {
+ __ CmpUnD(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqD(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelD(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovD(out, ftmp);
+ }
+
+ __ B(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinD(out, a, b);
+ } else {
+ __ MaxD(out, a, b);
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ CmpUnS(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqS(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelS(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovS(out, ftmp);
+ }
+
+ __ B(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinS(out, a, b);
+ } else {
+ __ MaxS(out, a, b);
+ }
+ }
+
+ __ Bind(&done);
+
+ } else { // !isR6
+ MipsLabel ordered;
+ MipsLabel compare;
+ MipsLabel select;
+ MipsLabel done;
+
+ if (type == DataType::Type::kFloat64) {
+ __ CunD(a, b);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ CunS(a, b);
+ }
+ __ Bc1f(&ordered);
+
+ // a or b (or both) is a NaN. Return one, which is a NaN.
+ if (type == DataType::Type::kFloat64) {
+ __ CeqD(b, b);
+ } else {
+ __ CeqS(b, b);
+ }
+ __ B(&select);
+
+ __ Bind(&ordered);
+
+ // Neither is a NaN.
+ // a == b? (-0.0 compares equal with +0.0)
+ // If equal, handle zeroes, else compare further.
+ if (type == DataType::Type::kFloat64) {
+ __ CeqD(a, b);
+ } else {
+ __ CeqS(a, b);
+ }
+ __ Bc1f(&compare);
+
+ // a == b either bit for bit or one is -0.0 and the other is +0.0.
+ if (type == DataType::Type::kFloat64) {
+ __ MoveFromFpuHigh(TMP, a);
+ __ MoveFromFpuHigh(AT, b);
+ } else {
+ __ Mfc1(TMP, a);
+ __ Mfc1(AT, b);
+ }
+
+ if (is_min) {
+ // -0.0 prevails over +0.0.
+ __ Or(TMP, TMP, AT);
+ } else {
+ // +0.0 prevails over -0.0.
+ __ And(TMP, TMP, AT);
+ }
+
+ if (type == DataType::Type::kFloat64) {
+ __ Mfc1(AT, a);
+ __ Mtc1(AT, out);
+ __ MoveToFpuHigh(TMP, out);
+ } else {
+ __ Mtc1(TMP, out);
+ }
+ __ B(&done);
+
+ __ Bind(&compare);
+
+ if (type == DataType::Type::kFloat64) {
+ if (is_min) {
+ // return (a <= b) ? a : b;
+ __ ColeD(a, b);
+ } else {
+ // return (a >= b) ? a : b;
+ __ ColeD(b, a); // b <= a
+ }
+ } else {
+ if (is_min) {
+ // return (a <= b) ? a : b;
+ __ ColeS(a, b);
+ } else {
+ // return (a >= b) ? a : b;
+ __ ColeS(b, a); // b <= a
+ }
+ }
+
+ __ Bind(&select);
+
+ if (type == DataType::Type::kFloat64) {
+ __ MovtD(out, a);
+ __ MovfD(out, b);
+ } else {
+ __ MovtS(out, a);
+ __ MovfS(out, b);
+ }
+
+ __ Bind(&done);
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, isR6, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, isR6, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderMIPS::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorMIPS::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderMIPS::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorMIPS::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderMIPS::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateAbsFP(LocationSummary* locations,
+ DataType::Type type,
+ bool isR2OrNewer,
+ bool isR6) {
+ FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+ // Note, as a "quality of implementation", rather than pure "spec compliance", we require that
+ // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN
+ // (signaling NaN may become quiet though).
+ //
+ // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case,
+ // both regular floating point numbers and NAN values are treated alike, only the sign bit is
+ // affected by this instruction.
+ // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any
+ // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be
+ // changed when doing abs(NaN). Because of that, we clear sign bit in a different way.
+ if (isR6) {
+ if (type == DataType::Type::kFloat64) {
+ __ AbsD(out, in);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ AbsS(out, in);
+ }
+ } else {
+ if (type == DataType::Type::kFloat64) {
+ if (in != out) {
+ __ MovD(out, in);
+ }
+ __ MoveFromFpuHigh(TMP, in);
+ // ins instruction is not available for R1.
+ if (isR2OrNewer) {
+ __ Ins(TMP, ZERO, 31, 1);
+ } else {
+ __ Sll(TMP, TMP, 1);
+ __ Srl(TMP, TMP, 1);
+ }
+ __ MoveToFpuHigh(TMP, out);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ Mfc1(TMP, in);
+ // ins instruction is not available for R1.
+ if (isR2OrNewer) {
+ __ Ins(TMP, ZERO, 31, 1);
+ } else {
+ __ Sll(TMP, TMP, 1);
+ __ Srl(TMP, TMP, 1);
+ }
+ __ Mtc1(TMP, out);
+ }
+ }
+}
+
+void InstructionCodeGeneratorMIPS::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = abs->GetLocations();
+ bool isR2OrNewer = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32: {
+ Register in = locations->InAt(0).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+ __ Sra(AT, in, 31);
+ __ Xor(out, in, AT);
+ __ Subu(out, out, AT);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+ Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+ // The comments in this section show the analogous operations which would
+ // be performed if we had 64-bit registers "in", and "out".
+ // __ Dsra32(AT, in, 31);
+ __ Sra(AT, in_hi, 31);
+ // __ Xor(out, in, AT);
+ __ Xor(TMP, in_lo, AT);
+ __ Xor(out_hi, in_hi, AT);
+ // __ Dsubu(out, out, AT);
+ __ Subu(out_lo, TMP, AT);
+ __ Sltu(TMP, out_lo, TMP);
+ __ Addu(out_hi, out_hi, TMP);
+ break;
+ }
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateAbsFP(locations, abs->GetResultType(), isR2OrNewer, isR6);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+ }
+}
+
void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) {
constructor_fence->SetLocations(nullptr);
}
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index c91cb62eda5..2e7c736dbd3 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -237,6 +237,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
private:
void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg);
void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);
void HandleBinaryOp(HBinaryOperation* operation);
void HandleCondition(HCondition* instruction);
void HandleShift(HBinaryOperation* operation);
@@ -246,6 +247,11 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation*, bool is_min);
+ void GenerateAbsFP(LocationSummary* locations, DataType::Type type, bool isR2OrNewer, bool isR6);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
@@ -615,6 +621,8 @@ class CodeGeneratorMIPS : public CodeGenerator {
DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo);
};
+ PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+ const PcRelativePatchInfo* info_high = nullptr);
PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method,
const PcRelativePatchInfo* info_high = nullptr);
PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method,
@@ -689,7 +697,8 @@ class CodeGeneratorMIPS : public CodeGenerator {
// Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
Uint32ToLiteralMap uint32_literals_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -697,7 +706,7 @@ class CodeGeneratorMIPS : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
// PC-relative String patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 985ac2ca554..5b07b55cbbb 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1509,6 +1509,14 @@ inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches(
}
}
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null.
+ return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -1527,11 +1535,10 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -1542,6 +1549,13 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
DCHECK_EQ(size, linker_patches->size());
}
+CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageRelRoPatch(
+ uint32_t boot_image_offset,
+ const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ /* dex_file */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_);
+}
+
CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageMethodPatch(
MethodReference target_method,
const PcRelativePatchInfo* info_high) {
@@ -1780,6 +1794,34 @@ void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCo
__ Bind(slow_path->GetExitLabel());
}
+void InstructionCodeGeneratorMIPS64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ GpuRegister temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if (mask_bits == 16u) {
+ // Load only the bitstring part of the status word.
+ __ LoadFromOffset(
+ kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value());
+ // Compare the bitstring bits using XOR.
+ __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value());
+ // Compare the bitstring bits using XOR.
+ if (IsUint<16>(path_to_root)) {
+ __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+ } else {
+ __ LoadConst32(TMP, path_to_root);
+ __ Xor(temp, temp, TMP);
+ }
+ // Shift out bits that do not contribute to the comparison.
+ __ Sll(temp, temp, 32 - mask_bits);
+ }
+}
+
void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
__ Sync(0); // only stype 0 is supported
}
@@ -2840,7 +2882,13 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -2849,7 +2897,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
- GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+ Location cls = locations->InAt(1);
Location temp_loc = locations->GetTemp(0);
GpuRegister temp = temp_loc.AsRegister<GpuRegister>();
const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -2888,7 +2936,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
kWithoutReadBarrier);
// Jump to slow path for throwing the exception or doing a
// more involved array check.
- __ Bnec(temp, cls, slow_path->GetEntryLabel());
+ __ Bnec(temp, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
break;
}
@@ -2914,7 +2962,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
// exception.
__ Beqzc(temp, slow_path->GetEntryLabel());
// Otherwise, compare the classes.
- __ Bnec(temp, cls, &loop);
+ __ Bnec(temp, cls.AsRegister<GpuRegister>(), &loop);
break;
}
@@ -2929,7 +2977,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
// Walk over the class hierarchy to find a match.
Mips64Label loop;
__ Bind(&loop);
- __ Beqc(temp, cls, &done);
+ __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction,
temp_loc,
@@ -2952,7 +3000,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
maybe_temp2_loc,
kWithoutReadBarrier);
// Do an exact check.
- __ Beqc(temp, cls, &done);
+ __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
GenerateReferenceLoadOneRegister(instruction,
@@ -3011,7 +3059,21 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
__ Daddiu(temp, temp, 2 * kHeapReferenceSize);
__ Addiu(TMP, TMP, -2);
// Compare the classes and continue the loop if they do not match.
- __ Bnec(AT, cls, &loop);
+ __ Bnec(AT, cls.AsRegister<GpuRegister>(), &loop);
+ break;
+ }
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ Bnezc(temp, slow_path->GetEntryLabel());
break;
}
}
@@ -5515,6 +5577,8 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -5523,7 +5587,13 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// The output does overlap inputs.
// Note that TypeCheckSlowPathMIPS64 uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -5535,7 +5605,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
- GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+ Location cls = locations->InAt(1);
Location out_loc = locations->Out();
GpuRegister out = out_loc.AsRegister<GpuRegister>();
const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -5567,7 +5637,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
maybe_temp_loc,
read_barrier_option);
// Classes must be equal for the instanceof to succeed.
- __ Xor(out, out, cls);
+ __ Xor(out, out, cls.AsRegister<GpuRegister>());
__ Sltiu(out, out, 1);
break;
}
@@ -5594,7 +5664,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
read_barrier_option);
// If `out` is null, we use it for the result, and jump to `done`.
__ Beqzc(out, &done);
- __ Bnec(out, cls, &loop);
+ __ Bnec(out, cls.AsRegister<GpuRegister>(), &loop);
__ LoadConst32(out, 1);
break;
}
@@ -5612,7 +5682,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
// Walk over the class hierarchy to find a match.
Mips64Label loop, success;
__ Bind(&loop);
- __ Beqc(out, cls, &success);
+ __ Beqc(out, cls.AsRegister<GpuRegister>(), &success);
// /* HeapReference<Class> */ out = out->super_class_
GenerateReferenceLoadOneRegister(instruction,
out_loc,
@@ -5639,7 +5709,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
read_barrier_option);
// Do an exact check.
Mips64Label success;
- __ Beqc(out, cls, &success);
+ __ Beqc(out, cls.AsRegister<GpuRegister>(), &success);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ out = out->component_type_
GenerateReferenceLoadOneRegister(instruction,
@@ -5671,7 +5741,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64(
instruction, /* is_fatal */ false);
codegen_->AddSlowPath(slow_path);
- __ Bnec(out, cls, slow_path->GetEntryLabel());
+ __ Bnec(out, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
__ LoadConst32(out, 1);
break;
}
@@ -5703,6 +5773,20 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
__ Bc(slow_path->GetEntryLabel());
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ __ Sltiu(out, out, 1);
+ break;
+ }
}
__ Bind(&done);
@@ -5839,7 +5923,7 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
bool fallback_load = false;
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -5866,7 +5950,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -5926,6 +6010,15 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(
kLoadDoubleword,
DeduplicateUint64Literal(invoke->GetMethodAddress()));
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ uint32_t boot_image_offset = GetBootImageOffset(invoke);
+ PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset);
+ PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high);
+ EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
+ // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+ __ Lwu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678);
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
PcRelativePatchInfo* info_high = NewMethodBssEntryPatch(
MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
@@ -6113,20 +6206,15 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
codegen_->DeduplicateBootImageAddressLiteral(address));
break;
}
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
CodeGeneratorMIPS64::PcRelativePatchInfo* info_high =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+ codegen_->NewBootImageRelRoPatch(boot_image_offset);
CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
__ Lwu(out, AT, /* placeholder */ 0x5678);
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ Daddiu(out, out, -masked_hash);
- }
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -6248,12 +6336,13 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
codegen_->DeduplicateBootImageAddressLiteral(address));
return;
}
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
CodeGeneratorMIPS64::PcRelativePatchInfo* info_high =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
+ codegen_->NewBootImageRelRoPatch(boot_image_offset);
CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
__ Lwu(out, AT, /* placeholder */ 0x5678);
return;
@@ -6665,6 +6754,236 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
+ GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+ GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+ if (lhs == rhs) {
+ if (out != lhs) {
+ __ Move(out, lhs);
+ }
+ } else {
+ // Some architectures, such as ARM and MIPS (prior to r6), have a
+ // conditional move instruction which only changes the target
+ // (output) register if the condition is true (MIPS prior to r6 had
+ // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
+ // change the target (output) register. If the condition is true the
+ // output register gets the contents of the "rs" register; otherwise,
+ // the output register is set to zero. One consequence of this is
+ // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
+ // needs to use a pair of SELEQZ/SELNEZ instructions. After
+ // executing this pair of instructions one of the output registers
+ // from the pair will necessarily contain zero. Then the code ORs the
+ // output registers from the SELEQZ/SELNEZ instructions to get the
+ // final result.
+ //
+ // The initial test to see if the output register is same as the
+ // first input register is needed to make sure that value in the
+ // first input register isn't clobbered before we've finished
+ // computing the output value. The logic in the corresponding else
+ // clause performs the same task but makes sure the second input
+ // register isn't clobbered in the event that it's the same register
+ // as the output register; the else clause also handles the case
+ // where the output register is distinct from both the first, and the
+ // second input registers.
+ if (out == lhs) {
+ __ Slt(AT, rhs, lhs);
+ if (is_min) {
+ __ Seleqz(out, lhs, AT);
+ __ Selnez(AT, rhs, AT);
+ } else {
+ __ Selnez(out, lhs, AT);
+ __ Seleqz(AT, rhs, AT);
+ }
+ } else {
+ __ Slt(AT, lhs, rhs);
+ if (is_min) {
+ __ Seleqz(out, rhs, AT);
+ __ Selnez(AT, lhs, AT);
+ } else {
+ __ Selnez(out, rhs, AT);
+ __ Seleqz(AT, lhs, AT);
+ }
+ }
+ __ Or(out, out, AT);
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>();
+ FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+ Mips64Label noNaNs;
+ Mips64Label done;
+ FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
+
+ // When Java computes min/max it prefers a NaN to a number; the
+ // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
+ // the inputs is a NaN and the other is a valid number, the MIPS
+ // instruction will return the number; Java wants the NaN value
+ // returned. This is why there is extra logic preceding the use of
+ // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
+ // NaN, return the NaN, otherwise return the min/max.
+ if (type == DataType::Type::kFloat64) {
+ __ CmpUnD(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqD(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelD(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovD(out, ftmp);
+ }
+
+ __ Bc(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinD(out, a, b);
+ } else {
+ __ MaxD(out, a, b);
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ CmpUnS(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqS(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelS(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovS(out, ftmp);
+ }
+
+ __ Bc(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinS(out, a, b);
+ } else {
+ __ MaxS(out, a, b);
+ }
+ }
+
+ __ Bind(&done);
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderMIPS64::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderMIPS64::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderMIPS64::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = abs->GetLocations();
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32: {
+ GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ __ Sra(AT, in, 31);
+ __ Xor(out, in, AT);
+ __ Subu(out, out, AT);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ __ Dsra32(AT, in, 31);
+ __ Xor(out, in, AT);
+ __ Dsubu(out, out, AT);
+ break;
+ }
+ case DataType::Type::kFloat32: {
+ FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+ __ AbsS(out, in);
+ break;
+ }
+ case DataType::Type::kFloat64: {
+ FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+ __ AbsD(out, in);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+ }
+}
+
void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) {
constructor_fence->SetLocations(nullptr);
}
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index e6b69c469fd..6e69e4611a7 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -233,6 +233,7 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
private:
void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, GpuRegister temp);
void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
void HandleBinaryOp(HBinaryOperation* operation);
void HandleCondition(HCondition* instruction);
@@ -242,6 +243,10 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
@@ -586,6 +591,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo);
};
+ PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+ const PcRelativePatchInfo* info_high = nullptr);
PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method,
const PcRelativePatchInfo* info_high = nullptr);
PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method,
@@ -655,7 +662,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
// Deduplication map for 64-bit literals, used for non-patchable method address or method code
// address.
Uint64ToLiteralMap uint64_literals_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -663,7 +671,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 174efdf1155..6b0ec253e99 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -63,7 +63,7 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -125,7 +125,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar*
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -149,7 +149,7 @@ void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -173,7 +173,7 @@ void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* ins
DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -200,7 +200,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -240,7 +240,7 @@ void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -259,7 +259,7 @@ void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
DCHECK_EQ(4u, instruction->GetVectorLength());
__ Scvtf(dst.V4S(), src.V4S());
} else {
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
}
}
@@ -299,7 +299,7 @@ void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
__ Fneg(dst.V2D(), src.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -338,7 +338,7 @@ void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
__ Fabs(dst.V2D(), src.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -366,7 +366,7 @@ void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
__ Not(dst.V16B(), src.V16B()); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -389,7 +389,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -431,7 +431,39 @@ void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
__ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -471,7 +503,7 @@ void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instructi
: __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -513,7 +545,39 @@ void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
__ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -551,7 +615,7 @@ void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
__ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -575,7 +639,7 @@ void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
__ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -623,7 +687,7 @@ void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
__ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -671,7 +735,7 @@ void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
__ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -699,7 +763,7 @@ void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
__ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -735,7 +799,7 @@ void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
__ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -762,7 +826,7 @@ void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
__ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -782,7 +846,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -816,7 +880,7 @@ void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
__ Shl(dst.V2D(), lhs.V2D(), value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -850,7 +914,7 @@ void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
__ Sshr(dst.V2D(), lhs.V2D(), value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -884,7 +948,7 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
__ Ushr(dst.V2D(), lhs.V2D(), value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -916,7 +980,7 @@ void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -957,7 +1021,7 @@ void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instructi
__ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -978,7 +1042,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1026,7 +1090,7 @@ void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccum
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1139,7 +1203,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1167,7 +1231,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1188,7 +1252,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins
__ Sabal2(acc.V2D(), left.V4S(), right.V4S());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1204,12 +1268,12 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
}
}
@@ -1237,7 +1301,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1331,7 +1395,7 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
__ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1362,7 +1426,7 @@ void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
__ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 7c3155ab73b..7b66b179839 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -46,7 +46,7 @@ void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instr
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -71,7 +71,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScala
__ Vdup(Untyped32, dst, InputRegisterAt(instruction, 0));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -84,7 +84,7 @@ void LocationsBuilderARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instructi
locations->SetOut(Location::RequiresRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -98,7 +98,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecExtractScalar(HVecExtractScalar* i
__ Vmov(OutputRegister(instruction), DRegisterLane(src, 0));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -122,7 +122,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -151,7 +151,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) {
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -188,7 +188,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) {
__ Vneg(DataTypeValue::S32, dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -215,7 +215,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) {
__ Vabs(DataTypeValue::S32, dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -242,7 +242,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) {
__ Vmvn(I8, dst, src); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -262,7 +262,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -292,7 +292,39 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) {
__ Vadd(I32, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vqadd(DataTypeValue::U8, dst, lhs, rhs);
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vqadd(DataTypeValue::S8, dst, lhs, rhs);
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vqadd(DataTypeValue::U16, dst, lhs, rhs);
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vqadd(DataTypeValue::S16, dst, lhs, rhs);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -332,7 +364,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruc
: __ Vhadd(DataTypeValue::S16, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -362,7 +394,39 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) {
__ Vsub(I32, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vqsub(DataTypeValue::U8, dst, lhs, rhs);
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vqsub(DataTypeValue::S8, dst, lhs, rhs);
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vqsub(DataTypeValue::U16, dst, lhs, rhs);
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vqsub(DataTypeValue::S16, dst, lhs, rhs);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -392,7 +456,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) {
__ Vmul(I32, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -440,7 +504,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) {
__ Vmin(DataTypeValue::S32, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -480,7 +544,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) {
__ Vmax(DataTypeValue::S32, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -505,7 +569,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) {
__ Vand(I8, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -537,7 +601,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) {
__ Vorr(I8, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -561,7 +625,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) {
__ Veor(I8, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -580,7 +644,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -610,7 +674,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) {
__ Vshl(I32, dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -640,7 +704,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) {
__ Vshr(DataTypeValue::S32, dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -670,7 +734,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) {
__ Vshr(DataTypeValue::U32, dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -690,7 +754,7 @@ void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -716,7 +780,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruc
__ Vmov(Untyped32, DRegisterLane(dst, 0), InputRegisterAt(instruction, 0));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -737,7 +801,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -780,12 +844,12 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* i
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -817,7 +881,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -923,7 +987,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) {
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -971,7 +1035,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) {
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index ed9de964965..df0e1485d69 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -42,7 +42,7 @@ void LocationsBuilderMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruct
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -89,7 +89,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar*
/* is_double */ true);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -113,7 +113,7 @@ void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction)
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -138,7 +138,7 @@ void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* inst
DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -170,7 +170,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
: Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -225,7 +225,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) {
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -244,7 +244,7 @@ void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) {
DCHECK_EQ(4u, instruction->GetVectorLength());
__ Ffint_sW(dst, src);
} else {
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
}
}
@@ -290,7 +290,7 @@ void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) {
__ FsubD(dst, dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -337,7 +337,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAbs(HVecAbs* instruction) {
__ AndV(dst, dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -369,7 +369,7 @@ void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) {
__ NorV(dst, src, src); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -392,7 +392,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -434,11 +434,19 @@ void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) {
__ FaddD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
+void LocationsBuilderMIPS::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ LOG(FATAL) << "Unsupported SIMD " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
@@ -474,7 +482,7 @@ void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instructio
: __ Ave_sH(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -516,11 +524,19 @@ void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) {
__ FsubD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
+void LocationsBuilderMIPS::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ LOG(FATAL) << "Unsupported SIMD " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecMul(HVecMul* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
@@ -558,7 +574,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) {
__ FmulD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -582,7 +598,7 @@ void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) {
__ FdivD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -640,7 +656,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) {
__ FminD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -698,7 +714,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) {
__ FmaxD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -727,7 +743,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) {
__ AndV(dst, lhs, rhs); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -764,7 +780,7 @@ void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) {
__ OrV(dst, lhs, rhs); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -793,7 +809,7 @@ void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) {
__ XorV(dst, lhs, rhs); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -813,7 +829,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -847,7 +863,7 @@ void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) {
__ SlliD(dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -881,7 +897,7 @@ void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) {
__ SraiD(dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -915,7 +931,7 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) {
__ SrliD(dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -947,7 +963,7 @@ void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -989,7 +1005,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instructio
__ InsertW(dst, locations->InAt(0).AsRegisterPairHigh<Register>(), 1);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1010,7 +1026,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1060,7 +1076,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumu
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1162,7 +1178,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1201,7 +1217,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1231,7 +1247,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1247,13 +1263,13 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1282,7 +1298,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1357,7 +1373,7 @@ void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) {
__ LdD(reg, base, offset);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1395,7 +1411,7 @@ void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) {
__ StD(reg, base, offset);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 9ea55ec8d79..de354b63a15 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -47,7 +47,7 @@ void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instru
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -88,7 +88,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar
/* is_double */ true);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -112,7 +112,7 @@ void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instructio
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -136,7 +136,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* in
DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -168,7 +168,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
: Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -223,7 +223,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) {
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -242,7 +242,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) {
DCHECK_EQ(4u, instruction->GetVectorLength());
__ Ffint_sW(dst, src);
} else {
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -289,7 +289,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) {
__ FsubD(dst, dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -336,7 +336,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) {
__ AndV(dst, dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -368,7 +368,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) {
__ NorV(dst, src, src); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -391,7 +391,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -433,11 +433,19 @@ void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) {
__ FaddD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
+void LocationsBuilderMIPS64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ LOG(FATAL) << "Unsupported SIMD " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
@@ -473,7 +481,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruct
: __ Ave_sH(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -515,11 +523,19 @@ void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) {
__ FsubD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
+void LocationsBuilderMIPS64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ LOG(FATAL) << "Unsupported SIMD " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
@@ -557,7 +573,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) {
__ FmulD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -581,7 +597,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) {
__ FdivD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -639,7 +655,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) {
__ FminD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -697,7 +713,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) {
__ FmaxD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -726,7 +742,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) {
__ AndV(dst, lhs, rhs); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -763,7 +779,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) {
__ OrV(dst, lhs, rhs); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -792,7 +808,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) {
__ XorV(dst, lhs, rhs); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -812,7 +828,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -846,7 +862,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) {
__ SlliD(dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -880,7 +896,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) {
__ SraiD(dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -914,7 +930,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) {
__ SrliD(dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -946,7 +962,7 @@ void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -987,7 +1003,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruct
__ InsertD(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1008,7 +1024,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1058,7 +1074,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccu
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1160,7 +1176,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1199,7 +1215,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1229,7 +1245,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1245,13 +1261,13 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1280,7 +1296,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1355,7 +1371,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) {
__ LdD(reg, base, offset);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1393,7 +1409,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) {
__ StD(reg, base, offset);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index f2ffccc8879..086ae07a064 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -54,7 +54,7 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi
: Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -111,7 +111,7 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i
__ shufpd(dst, dst, Immediate(0));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -138,7 +138,7 @@ void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction)
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -152,7 +152,7 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
case DataType::Type::kInt32:
DCHECK_LE(4u, instruction->GetVectorLength());
@@ -174,7 +174,7 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr
DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -196,7 +196,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -258,12 +258,12 @@ void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) {
break;
case HVecReduce::kMin:
case HVecReduce::kMax:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
}
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -282,7 +282,7 @@ void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
DCHECK_EQ(4u, instruction->GetVectorLength());
__ cvtdq2ps(dst, src);
} else {
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
}
}
@@ -328,7 +328,7 @@ void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
__ subpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -369,7 +369,7 @@ void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
__ andpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -418,7 +418,7 @@ void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
__ xorpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -441,7 +441,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -483,7 +483,39 @@ void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
__ addpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ paddusb(dst, src);
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ paddsb(dst, src);
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ paddusw(dst, src);
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ paddsw(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -503,14 +535,14 @@ void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
DCHECK_EQ(16u, instruction->GetVectorLength());
- __ pavgb(dst, src);
- return;
+ __ pavgb(dst, src);
+ break;
case DataType::Type::kUint16:
DCHECK_EQ(8u, instruction->GetVectorLength());
__ pavgw(dst, src);
- return;
+ break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -552,7 +584,39 @@ void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
__ subpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ psubusb(dst, src);
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ psubsb(dst, src);
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psubusw(dst, src);
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psubsw(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -585,7 +649,7 @@ void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
__ mulpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -609,7 +673,7 @@ void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
__ divpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -658,7 +722,7 @@ void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
__ minpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -707,7 +771,7 @@ void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
__ maxpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -742,7 +806,7 @@ void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
__ andpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -777,7 +841,7 @@ void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
__ andnpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -812,7 +876,7 @@ void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
__ orpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -847,7 +911,7 @@ void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
__ xorpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -865,7 +929,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -894,7 +958,7 @@ void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
__ psllq(dst, Immediate(static_cast<uint8_t>(value)));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -919,7 +983,7 @@ void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
__ psrad(dst, Immediate(static_cast<uint8_t>(value)));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -948,7 +1012,7 @@ void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
__ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -985,7 +1049,7 @@ void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1011,7 +1075,7 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -1035,7 +1099,7 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction
__ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1056,7 +1120,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1103,7 +1167,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1184,7 +1248,7 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1220,7 +1284,7 @@ void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index e2b0485f890..4d31ab68d11 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -49,7 +49,7 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru
: Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -102,7 +102,7 @@ void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar
__ shufpd(dst, dst, Immediate(0));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -126,7 +126,7 @@ void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instructio
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -140,7 +140,7 @@ void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* in
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -157,7 +157,7 @@ void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* in
DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -179,7 +179,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -241,12 +241,12 @@ void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
break;
case HVecReduce::kMin:
case HVecReduce::kMax:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
}
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -265,7 +265,7 @@ void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
DCHECK_EQ(4u, instruction->GetVectorLength());
__ cvtdq2ps(dst, src);
} else {
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
}
}
@@ -311,7 +311,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
__ subpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -352,7 +352,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
__ andpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -401,7 +401,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
__ xorpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -424,7 +424,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -466,7 +466,39 @@ void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
__ addpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ paddusb(dst, src);
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ paddsb(dst, src);
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ paddusw(dst, src);
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ paddsw(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -486,14 +518,14 @@ void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruct
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
DCHECK_EQ(16u, instruction->GetVectorLength());
- __ pavgb(dst, src);
- return;
+ __ pavgb(dst, src);
+ break;
case DataType::Type::kUint16:
DCHECK_EQ(8u, instruction->GetVectorLength());
__ pavgw(dst, src);
- return;
+ break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -535,7 +567,39 @@ void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
__ subpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ psubusb(dst, src);
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ psubsb(dst, src);
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psubusw(dst, src);
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psubsw(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -568,7 +632,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
__ mulpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -592,7 +656,7 @@ void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
__ divpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -641,7 +705,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
__ minpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -690,7 +754,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
__ maxpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -725,7 +789,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
__ andpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -760,7 +824,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
__ andnpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -795,7 +859,7 @@ void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
__ orpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -830,7 +894,7 @@ void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
__ xorpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -848,7 +912,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -877,7 +941,7 @@ void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
__ psllq(dst, Immediate(static_cast<int8_t>(value)));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -902,7 +966,7 @@ void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
__ psrad(dst, Immediate(static_cast<int8_t>(value)));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -931,7 +995,7 @@ void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
__ psrlq(dst, Immediate(static_cast<int8_t>(value)));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -963,7 +1027,7 @@ void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -989,7 +1053,7 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -1008,7 +1072,7 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct
__ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1029,7 +1093,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1076,7 +1140,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1157,7 +1221,7 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1193,7 +1257,7 @@ void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6bf045885d6..82d1fda8789 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -51,6 +51,9 @@ static constexpr int kC2ConditionMask = 0x400;
static constexpr int kFakeReturnRegister = Register(8);
+static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
+static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
+
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
@@ -3802,6 +3805,301 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ // Register to use to perform a long subtract to set cc.
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat32:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ // Can return immediately, as op1_loc == out_loc.
+ // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+ // a copy here.
+ DCHECK(locations->Out().Equals(op1_loc));
+ return;
+ }
+
+ if (type == DataType::Type::kInt64) {
+ // Need to perform a subtract to get the sign right.
+ // op1 is already in the same location as the output.
+ Location output = locations->Out();
+ Register output_lo = output.AsRegisterPairLow<Register>();
+ Register output_hi = output.AsRegisterPairHigh<Register>();
+
+ Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
+ Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
+
+ // The comparison is performed by subtracting the second operand from
+ // the first operand and then setting the status flags in the same
+ // manner as the SUB instruction."
+ __ cmpl(output_lo, op2_lo);
+
+ // Now use a temp and the borrow to finish the subtraction of op2_hi.
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ __ movl(temp, output_hi);
+ __ sbbl(temp, op2_hi);
+
+ // Now the condition code is correct.
+ Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
+ __ cmovl(cond, output_lo, op2_lo);
+ __ cmovl(cond, output_hi, op2_hi);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ Register out = locations->Out().AsRegister<Register>();
+ Register op2 = op2_loc.AsRegister<Register>();
+
+ // (out := op1)
+ // out <=? op2
+ // if out is min jmp done
+ // out := op2
+ // done:
+
+ __ cmpl(out, op2);
+ Condition cond = is_min ? Condition::kGreater : Condition::kLess;
+ __ cmovl(cond, out, op2);
+ }
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc));
+ return;
+ }
+
+ // (out := op1)
+ // out <=? op2
+ // if Nan jmp Nan_label
+ // if out is min jmp done
+ // if op2 is min jmp op2_label
+ // handle -0/+0
+ // jmp done
+ // Nan_label:
+ // out := NaN
+ // op2_label:
+ // out := op2
+ // done:
+ //
+ // This removes one jmp, but needs to copy one input (op1) to out.
+ //
+ // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+
+ XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+ NearLabel nan, done, op2_label;
+ if (type == DataType::Type::kFloat64) {
+ __ ucomisd(out, op2);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ ucomiss(out, op2);
+ }
+
+ __ j(Condition::kParityEven, &nan);
+
+ __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+ __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+ // Handle 0.0/-0.0.
+ if (is_min) {
+ if (type == DataType::Type::kFloat64) {
+ __ orpd(out, op2);
+ } else {
+ __ orps(out, op2);
+ }
+ } else {
+ if (type == DataType::Type::kFloat64) {
+ __ andpd(out, op2);
+ } else {
+ __ andps(out, op2);
+ }
+ }
+ __ jmp(&done);
+
+ // NaN handling.
+ __ Bind(&nan);
+ if (type == DataType::Type::kFloat64) {
+ // TODO: Use a constant from the constant table (requires extra input).
+ __ LoadLongConstant(out, kDoubleNaN);
+ } else {
+ Register constant = locations->GetTemp(0).AsRegister<Register>();
+ __ movl(constant, Immediate(kFloatNaN));
+ __ movd(out, constant);
+ }
+ __ jmp(&done);
+
+ // out := op2;
+ __ Bind(&op2_label);
+ if (type == DataType::Type::kFloat64) {
+ __ movsd(out, op2);
+ } else {
+ __ movss(out, op2);
+ }
+
+ // Done.
+ __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderX86::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderX86::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderX86::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ locations->SetInAt(0, Location::RegisterLocation(EAX));
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RegisterLocation(EDX));
+ break;
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat32:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = abs->GetLocations();
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32: {
+ Register out = locations->Out().AsRegister<Register>();
+ DCHECK_EQ(out, EAX);
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ DCHECK_EQ(temp, EDX);
+ // Sign extend EAX into EDX.
+ __ cdq();
+ // XOR EAX with sign.
+ __ xorl(EAX, EDX);
+ // Subtract out sign to correct.
+ __ subl(EAX, EDX);
+ // The result is in EAX.
+ break;
+ }
+ case DataType::Type::kInt64: {
+ Location input = locations->InAt(0);
+ Register input_lo = input.AsRegisterPairLow<Register>();
+ Register input_hi = input.AsRegisterPairHigh<Register>();
+ Location output = locations->Out();
+ Register output_lo = output.AsRegisterPairLow<Register>();
+ Register output_hi = output.AsRegisterPairHigh<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ // Compute the sign into the temporary.
+ __ movl(temp, input_hi);
+ __ sarl(temp, Immediate(31));
+ // Store the sign into the output.
+ __ movl(output_lo, temp);
+ __ movl(output_hi, temp);
+ // XOR the input to the output.
+ __ xorl(output_lo, input_lo);
+ __ xorl(output_hi, input_hi);
+ // Subtract the sign.
+ __ subl(output_lo, temp);
+ __ sbbl(output_hi, temp);
+ break;
+ }
+ case DataType::Type::kFloat32: {
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ Register constant = locations->GetTemp(1).AsRegister<Register>();
+ __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
+ __ movd(temp, constant);
+ __ andps(out, temp);
+ break;
+ }
+ case DataType::Type::kFloat64: {
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ // TODO: Use a constant from the constant table (requires extra input).
+ __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
+ __ andpd(out, temp);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
+ }
+}
+
void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
switch (instruction->GetType()) {
@@ -4534,6 +4832,15 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+ temp.AsRegister<Register>());
+ __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
+ RecordBootImageRelRoPatch(
+ invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(),
+ GetBootImageOffset(invoke));
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
temp.AsRegister<Register>());
@@ -4595,6 +4902,13 @@ void CodeGeneratorX86::GenerateVirtualCall(
RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
}
+void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
+ uint32_t boot_image_offset) {
+ boot_image_method_patches_.emplace_back(
+ method_address, /* target_dex_file */ nullptr, boot_image_offset);
+ __ Bind(&boot_image_method_patches_.back().label);
+}
+
void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
HX86ComputeBaseMethodAddress* method_address =
@@ -4664,6 +4978,14 @@ inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
}
}
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null.
+ return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -4682,11 +5004,10 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -6055,7 +6376,7 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -6093,7 +6414,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == HLoadClass::LoadKind::kBootImageClassTable ||
+ load_kind == HLoadClass::LoadKind::kBootImageRelRo ||
load_kind == HLoadClass::LoadKind::kBssEntry) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -6169,17 +6490,12 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
__ movl(out, Immediate(address));
break;
}
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
Register method_address = locations->InAt(0).AsRegister<Register>();
__ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
- codegen_->RecordBootImageTypePatch(cls);
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ subl(out, Immediate(masked_hash));
- }
+ codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
+ codegen_->GetBootImageOffset(cls));
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -6255,11 +6571,31 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
// No need for memory fence, thanks to the X86 memory model.
}
+void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ Register temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if (mask_bits == 16u) {
+ // Compare the bitstring in memory.
+ __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
+ // Compare the bitstring bits using SUB.
+ __ subl(temp, Immediate(path_to_root));
+ // Shift out bits that do not contribute to the comparison.
+ __ shll(temp, Immediate(32u - mask_bits));
+ }
+}
+
HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -6278,7 +6614,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
HLoadString::LoadKind load_kind = load->GetLoadKind();
if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == HLoadString::LoadKind::kBootImageInternTable ||
+ load_kind == HLoadString::LoadKind::kBootImageRelRo ||
load_kind == HLoadString::LoadKind::kBssEntry) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -6332,11 +6668,12 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
__ movl(out, Immediate(address));
return;
}
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
Register method_address = locations->InAt(0).AsRegister<Register>();
__ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
- codegen_->RecordBootImageStringPatch(load);
+ codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
+ codegen_->GetBootImageOffset(load));
return;
}
case HLoadString::LoadKind::kBssEntry: {
@@ -6418,8 +6755,8 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
return 0;
}
-// Interface case has 3 temps, one for holding the number of interfaces, one for the current
-// interface pointer, one for loading the current interface.
+// Interface case has 2 temps, one for holding the number of interfaces, one for the current
+// interface pointer, the current interface is compared in memory.
// The other checks have one temp for loading the object's class.
static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
@@ -6447,6 +6784,8 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -6455,7 +6794,13 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::Any());
+ }
// Note that TypeCheckSlowPathX86 uses this "out" register too.
locations->SetOut(Location::RequiresRegister());
// When read barriers are enabled, we need a temporary register for some cases.
@@ -6676,6 +7021,21 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ __ j(kNotEqual, &zero);
+ __ movl(out, Immediate(1));
+ __ jmp(&done);
+ break;
+ }
}
if (zero.IsLinked()) {
@@ -6702,12 +7062,14 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
// Require a register for the interface check since there is a loop that compares the class to
// a memory address.
locations->SetInAt(1, Location::RequiresRegister());
+ } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
} else {
locations->SetInAt(1, Location::Any());
}
- // Note that TypeCheckSlowPathX86 uses this "temp" register too.
- locations->AddTemp(Location::RequiresRegister());
- // When read barriers are enabled, we need an additional temporary register for some cases.
+ // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -6921,6 +7283,19 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
__ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+ break;
+ }
}
__ Bind(&done);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 51e5bca00b6..6c76e27d35f 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -211,6 +211,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
// the suspend call.
void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);
void HandleBitwiseOperation(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
@@ -225,6 +226,9 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
void GenerateShlLong(const Location& loc, int shift);
void GenerateShrLong(const Location& loc, int shift);
void GenerateUShrLong(const Location& loc, int shift);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
@@ -414,6 +418,8 @@ class CodeGeneratorX86 : public CodeGenerator {
void GenerateVirtualCall(
HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ void RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
+ uint32_t boot_image_offset);
void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke);
void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke);
void RecordBootImageTypePatch(HLoadClass* load_class);
@@ -631,17 +637,18 @@ class CodeGeneratorX86 : public CodeGenerator {
X86Assembler assembler_;
const X86InstructionSetFeatures& isa_features_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_;
// PC-relative type patch info for kBootImageLinkTimePcRelative.
ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_;
- // Type patch locations for kBssEntry.
+ // PC-relative type patch info for kBssEntry.
ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_;
- // String patch locations; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_;
- // String patch locations for kBssEntry.
+ // PC-relative String patch info for kBssEntry.
ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_;
// Patches for string root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 7be360536b2..322b0cfc4c1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -998,6 +998,13 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+ __ movl(temp.AsRegister<CpuRegister>(),
+ Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
+ RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
__ movq(temp.AsRegister<CpuRegister>(),
Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
@@ -1059,6 +1066,11 @@ void CodeGeneratorX86_64::GenerateVirtualCall(
RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
}
+void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
+ boot_image_method_patches_.emplace_back(/* target_dex_file */ nullptr, boot_image_offset);
+ __ Bind(&boot_image_method_patches_.back().label);
+}
+
void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
boot_image_method_patches_.emplace_back(
invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
@@ -1110,6 +1122,14 @@ inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
}
}
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null.
+ return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -1128,11 +1148,10 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -3821,6 +3840,241 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ // The following is sub-optimal, but all we can do for now. It would be fine to also accept
+ // the second input to be the output (we can simply swap inputs).
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ // Can return immediately, as op1_loc == out_loc.
+ // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+ // a copy here.
+ DCHECK(locations->Out().Equals(op1_loc));
+ return;
+ }
+
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
+
+ // (out := op1)
+ // out <=? op2
+ // if out is min jmp done
+ // out := op2
+ // done:
+
+ if (type == DataType::Type::kInt64) {
+ __ cmpq(out, op2);
+ __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ __ cmpl(out, op2);
+ __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc));
+ return;
+ }
+
+ // (out := op1)
+ // out <=? op2
+ // if Nan jmp Nan_label
+ // if out is min jmp done
+ // if op2 is min jmp op2_label
+ // handle -0/+0
+ // jmp done
+ // Nan_label:
+ // out := NaN
+ // op2_label:
+ // out := op2
+ // done:
+ //
+ // This removes one jmp, but needs to copy one input (op1) to out.
+ //
+ // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
+
+ XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+ NearLabel nan, done, op2_label;
+ if (type == DataType::Type::kFloat64) {
+ __ ucomisd(out, op2);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ ucomiss(out, op2);
+ }
+
+ __ j(Condition::kParityEven, &nan);
+
+ __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+ __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+ // Handle 0.0/-0.0.
+ if (is_min) {
+ if (type == DataType::Type::kFloat64) {
+ __ orpd(out, op2);
+ } else {
+ __ orps(out, op2);
+ }
+ } else {
+ if (type == DataType::Type::kFloat64) {
+ __ andpd(out, op2);
+ } else {
+ __ andps(out, op2);
+ }
+ }
+ __ jmp(&done);
+
+ // NaN handling.
+ __ Bind(&nan);
+ if (type == DataType::Type::kFloat64) {
+ __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
+ } else {
+ __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
+ }
+ __ jmp(&done);
+
+ // out := op2;
+ __ Bind(&op2_label);
+ if (type == DataType::Type::kFloat64) {
+ __ movsd(out, op2);
+ } else {
+ __ movss(out, op2);
+ }
+
+ // Done.
+ __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderX86_64::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderX86_64::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = abs->GetLocations();
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32: {
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
+ // Create mask.
+ __ movl(mask, out);
+ __ sarl(mask, Immediate(31));
+ // Add mask.
+ __ addl(out, mask);
+ __ xorl(out, mask);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
+ // Create mask.
+ __ movq(mask, out);
+ __ sarq(mask, Immediate(63));
+ // Add mask.
+ __ addq(out, mask);
+ __ xorq(out, mask);
+ break;
+ }
+ case DataType::Type::kFloat32: {
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
+ __ andps(out, mask);
+ break;
+ }
+ case DataType::Type::kFloat64: {
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
+ __ andpd(out, mask);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
+ }
+}
+
void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
locations->SetInAt(0, Location::Any());
@@ -5462,6 +5716,26 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
// No need for memory fence, thanks to the x86-64 memory model.
}
+void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ CpuRegister temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if (mask_bits == 16u) {
+ // Compare the bitstring in memory.
+ __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
+ // Compare the bitstring bits using SUB.
+ __ subl(temp, Immediate(path_to_root));
+ // Shift out bits that do not contribute to the comparison.
+ __ shll(temp, Immediate(32u - mask_bits));
+ }
+}
+
HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
switch (desired_class_load_kind) {
@@ -5471,7 +5745,7 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -5579,16 +5853,10 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
__ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
break;
}
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
__ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
- codegen_->RecordBootImageTypePatch(cls);
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ subl(out, Immediate(masked_hash));
- }
+ codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(cls));
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -5653,7 +5921,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -5719,10 +5987,10 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
__ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
return;
}
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
__ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
- codegen_->RecordBootImageStringPatch(load);
+ codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(load));
return;
}
case HLoadString::LoadKind::kBssEntry: {
@@ -5795,24 +6063,26 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
-static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
- if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
- // We need a temporary for holding the iftable length.
- return true;
- }
- return kEmitCompilerReadBarrier &&
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+ if (kEmitCompilerReadBarrier &&
!kUseBakerReadBarrier &&
(type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck);
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ return 1;
+ }
+ return 0;
}
-static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
- return kEmitCompilerReadBarrier &&
- !kUseBakerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck);
+// Interface case has 2 temps, one for holding the number of interfaces, one for the current
+// interface pointer, the current interface is compared in memory.
+// The other checks have one temp for loading the object's class.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+ if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+ return 2;
+ }
+ return 1 + NumberOfInstanceOfTemps(type_check_kind);
}
void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5834,6 +6104,8 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -5842,14 +6114,16 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::Any());
+ }
// Note that TypeCheckSlowPathX86_64 uses this "out" register too.
locations->SetOut(Location::RequiresRegister());
- // When read barriers are enabled, we need a temporary register for
- // some cases.
- if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) {
- locations->AddTemp(Location::RequiresRegister());
- }
+ locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
}
void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5860,9 +6134,9 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
Location cls = locations->InAt(1);
Location out_loc = locations->Out();
CpuRegister out = out_loc.AsRegister<CpuRegister>();
- Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ?
- locations->GetTemp(0) :
- Location::NoLocation();
+ const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+ DCHECK_LE(num_temps, 1u);
+ Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -6071,6 +6345,27 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ if (zero.IsLinked()) {
+ __ j(kNotEqual, &zero);
+ __ movl(out, Immediate(1));
+ __ jmp(&done);
+ } else {
+ __ setcc(kEqual, out);
+ // setcc only sets the low byte.
+ __ andl(out, Immediate(1));
+ }
+ break;
+ }
}
if (zero.IsLinked()) {
@@ -6097,17 +6392,15 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
// Require a register for the interface check since there is a loop that compares the class to
// a memory address.
locations->SetInAt(1, Location::RequiresRegister());
+ } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
} else {
locations->SetInAt(1, Location::Any());
}
-
- // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
- locations->AddTemp(Location::RequiresRegister());
- // When read barriers are enabled, we need an additional temporary
- // register for some cases.
- if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) {
- locations->AddTemp(Location::RequiresRegister());
- }
+ // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
+ locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
@@ -6118,9 +6411,10 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
Location cls = locations->InAt(1);
Location temp_loc = locations->GetTemp(0);
CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
- Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ?
- locations->GetTemp(1) :
- Location::NoLocation();
+ const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+ DCHECK_GE(num_temps, 1u);
+ DCHECK_LE(num_temps, 2u);
+ Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -6283,7 +6577,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
break;
}
- case TypeCheckKind::kInterfaceCheck:
+ case TypeCheckKind::kInterfaceCheck: {
// Fast path for the interface check. Try to avoid read barriers to improve the fast path.
// We can not get false positives by doing this.
// /* HeapReference<Class> */ temp = obj->klass_
@@ -6319,6 +6613,20 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// If `cls` was poisoned above, unpoison it.
__ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
break;
+ }
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+ break;
+ }
}
if (done.IsLinked()) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 1079e94dfc2..9a4c53b5240 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -208,6 +208,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
// the suspend call.
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp);
void HandleBitwiseOperation(HBinaryOperation* operation);
void GenerateRemFP(HRem* rem);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
@@ -222,6 +223,10 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
@@ -410,6 +415,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
void GenerateVirtualCall(
HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ void RecordBootImageRelRoPatch(uint32_t boot_image_offset);
void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke);
void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke);
void RecordBootImageTypePatch(HLoadClass* load_class);
@@ -604,17 +610,18 @@ class CodeGeneratorX86_64 : public CodeGenerator {
// Used for fixups to the constant area.
int constant_area_start_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PatchInfo<Label>> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_;
// PC-relative type patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PatchInfo<Label>> boot_image_type_patches_;
- // Type patch locations for kBssEntry.
+ // PC-relative type patch info for kBssEntry.
ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_;
- // String patch locations; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PatchInfo<Label>> boot_image_string_patches_;
- // String patch locations for kBssEntry.
+ // PC-relative String patch info for kBssEntry.
ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_;
// Patches for string literals in JIT compiled code.
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index c41c290c8b4..792cfb539a6 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -195,7 +195,9 @@ class InternalCodeAllocator : public CodeAllocator {
}
size_t GetSize() const { return size_; }
- uint8_t* GetMemory() const { return memory_.get(); }
+ ArrayRef<const uint8_t> GetMemory() const OVERRIDE {
+ return ArrayRef<const uint8_t>(memory_.get(), size_);
+ }
private:
size_t size_;
@@ -269,8 +271,8 @@ static void Run(const InternalCodeAllocator& allocator,
InstructionSet target_isa = codegen.GetInstructionSet();
typedef Expected (*fptr)();
- CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
- fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
+ CommonCompilerTest::MakeExecutable(allocator.GetMemory().data(), allocator.GetMemory().size());
+ fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(allocator.GetMemory().data()));
if (target_isa == InstructionSet::kThumb2) {
// For thumb we need the bottom bit set.
f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h
index 4a6c91459fc..be26e67af37 100644
--- a/compiler/optimizing/data_type.h
+++ b/compiler/optimizing/data_type.h
@@ -210,6 +210,12 @@ class DataType {
static bool IsTypeConversionImplicit(Type input_type, Type result_type);
static bool IsTypeConversionImplicit(int64_t value, Type result_type);
+ static bool IsZeroExtension(Type input_type, Type result_type) {
+ return IsIntOrLongType(result_type) &&
+ IsUnsignedType(input_type) &&
+ Size(result_type) > Size(input_type);
+ }
+
static const char* PrettyDescriptor(Type type);
private:
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index c88baa8610f..fbcbe3608e6 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -25,6 +25,11 @@
#include "base/bit_vector-inl.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "obj_ptr-inl.h"
+#include "scoped_thread_state_change-inl.h"
+#include "subtype_check.h"
namespace art {
@@ -548,30 +553,85 @@ void GraphChecker::VisitReturnVoid(HReturnVoid* ret) {
}
}
-void GraphChecker::VisitCheckCast(HCheckCast* check) {
- VisitInstruction(check);
- HInstruction* input = check->InputAt(1);
- if (!input->IsLoadClass()) {
- AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
+void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check,
+ size_t input_pos,
+ bool check_value,
+ uint32_t expected_value,
+ const char* name) {
+ if (!check->InputAt(input_pos)->IsIntConstant()) {
+ AddError(StringPrintf("%s:%d (bitstring) expects a HIntConstant input %zu (%s), not %s:%d.",
check->DebugName(),
check->GetId(),
- input->DebugName(),
- input->GetId()));
+ input_pos,
+ name,
+ check->InputAt(2)->DebugName(),
+ check->InputAt(2)->GetId()));
+ } else if (check_value) {
+ uint32_t actual_value =
+ static_cast<uint32_t>(check->InputAt(input_pos)->AsIntConstant()->GetValue());
+ if (actual_value != expected_value) {
+ AddError(StringPrintf("%s:%d (bitstring) has %s 0x%x, not 0x%x as expected.",
+ check->DebugName(),
+ check->GetId(),
+ name,
+ actual_value,
+ expected_value));
+ }
}
}
-void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) {
- VisitInstruction(instruction);
- HInstruction* input = instruction->InputAt(1);
- if (!input->IsLoadClass()) {
- AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
- instruction->DebugName(),
- instruction->GetId(),
- input->DebugName(),
- input->GetId()));
+void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) {
+ VisitInstruction(check);
+ HInstruction* input = check->InputAt(1);
+ if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+ if (!input->IsNullConstant()) {
+ AddError(StringPrintf("%s:%d (bitstring) expects a HNullConstant as second input, not %s:%d.",
+ check->DebugName(),
+ check->GetId(),
+ input->DebugName(),
+ input->GetId()));
+ }
+ bool check_values = false;
+ BitString::StorageType expected_path_to_root = 0u;
+ BitString::StorageType expected_mask = 0u;
+ {
+ ScopedObjectAccess soa(Thread::Current());
+ ObjPtr<mirror::Class> klass = check->GetClass().Get();
+ MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+ SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass);
+ if (state == SubtypeCheckInfo::kAssigned) {
+ expected_path_to_root =
+ SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass);
+ expected_mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass);
+ check_values = true;
+ } else {
+ AddError(StringPrintf("%s:%d (bitstring) references a class with unassigned bitstring.",
+ check->DebugName(),
+ check->GetId()));
+ }
+ }
+ CheckTypeCheckBitstringInput(
+ check, /* input_pos */ 2, check_values, expected_path_to_root, "path_to_root");
+ CheckTypeCheckBitstringInput(check, /* input_pos */ 3, check_values, expected_mask, "mask");
+ } else {
+ if (!input->IsLoadClass()) {
+ AddError(StringPrintf("%s:%d (classic) expects a HLoadClass as second input, not %s:%d.",
+ check->DebugName(),
+ check->GetId(),
+ input->DebugName(),
+ input->GetId()));
+ }
}
}
+void GraphChecker::VisitCheckCast(HCheckCast* check) {
+ HandleTypeCheckInstruction(check);
+}
+
+void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) {
+ HandleTypeCheckInstruction(instruction);
+}
+
void GraphChecker::HandleLoop(HBasicBlock* loop_header) {
int id = loop_header->GetBlockId();
HLoopInformation* loop_information = loop_header->GetLoopInformation();
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 0f0b49d240a..dbedc405185 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -71,6 +71,12 @@ class GraphChecker : public HGraphDelegateVisitor {
void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
+ void CheckTypeCheckBitstringInput(HTypeCheckInstruction* check,
+ size_t input_pos,
+ bool check_value,
+ uint32_t expected_value,
+ const char* name);
+ void HandleTypeCheckInstruction(HTypeCheckInstruction* instruction);
void HandleLoop(HBasicBlock* loop_header);
void HandleBooleanInput(HInstruction* instruction, size_t input_index);
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 5ff31cead58..54d46445804 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -390,16 +390,23 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("load_kind") << load_string->GetLoadKind();
}
- void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
- StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind();
+ void HandleTypeCheckInstruction(HTypeCheckInstruction* check) {
+ StartAttributeStream("check_kind") << check->GetTypeCheckKind();
StartAttributeStream("must_do_null_check") << std::boolalpha
- << check_cast->MustDoNullCheck() << std::noboolalpha;
+ << check->MustDoNullCheck() << std::noboolalpha;
+ if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+ StartAttributeStream("path_to_root") << std::hex
+ << "0x" << check->GetBitstringPathToRoot() << std::dec;
+ StartAttributeStream("mask") << std::hex << "0x" << check->GetBitstringMask() << std::dec;
+ }
+ }
+
+ void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
+ HandleTypeCheckInstruction(check_cast);
}
void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE {
- StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind();
- StartAttributeStream("must_do_null_check") << std::boolalpha
- << instance_of->MustDoNullCheck() << std::noboolalpha;
+ HandleTypeCheckInstruction(instance_of);
}
void VisitArrayLength(HArrayLength* array_length) OVERRIDE {
@@ -576,6 +583,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
}
StartAttributeStream() << input_list;
}
+ if (instruction->GetDexPc() != kNoDexPc) {
+ StartAttributeStream("dex_pc") << instruction->GetDexPc();
+ } else {
+ StartAttributeStream("dex_pc") << "n/a";
+ }
instruction->Accept(this);
if (instruction->HasEnvironment()) {
StringList envs;
@@ -641,20 +653,32 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
<< std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha;
}
+ // For the builder and the inliner, we want to add extra information on HInstructions
+ // that have reference types, and also HInstanceOf/HCheckcast.
if ((IsPass(HGraphBuilder::kBuilderPassName)
|| IsPass(HInliner::kInlinerPassName))
- && (instruction->GetType() == DataType::Type::kReference)) {
- ReferenceTypeInfo info = instruction->IsLoadClass()
- ? instruction->AsLoadClass()->GetLoadedClassRTI()
- : instruction->GetReferenceTypeInfo();
+ && (instruction->GetType() == DataType::Type::kReference ||
+ instruction->IsInstanceOf() ||
+ instruction->IsCheckCast())) {
+ ReferenceTypeInfo info = (instruction->GetType() == DataType::Type::kReference)
+ ? instruction->IsLoadClass()
+ ? instruction->AsLoadClass()->GetLoadedClassRTI()
+ : instruction->GetReferenceTypeInfo()
+ : instruction->IsInstanceOf()
+ ? instruction->AsInstanceOf()->GetTargetClassRTI()
+ : instruction->AsCheckCast()->GetTargetClassRTI();
ScopedObjectAccess soa(Thread::Current());
if (info.IsValid()) {
StartAttributeStream("klass")
<< mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get());
- StartAttributeStream("can_be_null")
- << std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
+ if (instruction->GetType() == DataType::Type::kReference) {
+ StartAttributeStream("can_be_null")
+ << std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
+ }
StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
- } else if (instruction->IsLoadClass()) {
+ } else if (instruction->IsLoadClass() ||
+ instruction->IsInstanceOf() ||
+ instruction->IsCheckCast()) {
StartAttributeStream("klass") << "unresolved";
} else {
// The NullConstant may be added to the graph during other passes that happen between
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 99dec112400..55eca2316a1 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -78,22 +78,15 @@ static bool IsGEZero(HInstruction* instruction) {
DCHECK(instruction != nullptr);
if (instruction->IsArrayLength()) {
return true;
- } else if (instruction->IsInvokeStaticOrDirect()) {
- switch (instruction->AsInvoke()->GetIntrinsic()) {
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- // Instruction MIN(>=0, >=0) is >= 0.
- return IsGEZero(instruction->InputAt(0)) &&
- IsGEZero(instruction->InputAt(1));
- case Intrinsics::kMathAbsInt:
- case Intrinsics::kMathAbsLong:
- // Instruction ABS(>=0) is >= 0.
- // NOTE: ABS(minint) = minint prevents assuming
- // >= 0 without looking at the argument.
- return IsGEZero(instruction->InputAt(0));
- default:
- break;
- }
+ } else if (instruction->IsMin()) {
+ // Instruction MIN(>=0, >=0) is >= 0.
+ return IsGEZero(instruction->InputAt(0)) &&
+ IsGEZero(instruction->InputAt(1));
+ } else if (instruction->IsAbs()) {
+ // Instruction ABS(>=0) is >= 0.
+ // NOTE: ABS(minint) = minint prevents assuming
+ // >= 0 without looking at the argument.
+ return IsGEZero(instruction->InputAt(0));
}
int64_t value = -1;
return IsInt64AndGet(instruction, &value) && value >= 0;
@@ -102,21 +95,14 @@ static bool IsGEZero(HInstruction* instruction) {
/** Hunts "under the hood" for a suitable instruction at the hint. */
static bool IsMaxAtHint(
HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) {
- if (instruction->IsInvokeStaticOrDirect()) {
- switch (instruction->AsInvoke()->GetIntrinsic()) {
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- // For MIN(x, y), return most suitable x or y as maximum.
- return IsMaxAtHint(instruction->InputAt(0), hint, suitable) ||
- IsMaxAtHint(instruction->InputAt(1), hint, suitable);
- default:
- break;
- }
+ if (instruction->IsMin()) {
+ // For MIN(x, y), return most suitable x or y as maximum.
+ return IsMaxAtHint(instruction->InputAt(0), hint, suitable) ||
+ IsMaxAtHint(instruction->InputAt(1), hint, suitable);
} else {
*suitable = instruction;
return HuntForDeclaration(instruction) == hint;
}
- return false;
}
/** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */
@@ -365,14 +351,16 @@ void InductionVarRange::Replace(HInstruction* instruction,
}
}
-bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const {
- HInductionVarAnalysis::InductionInfo *trip =
- induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
- if (trip != nullptr && !IsUnsafeTripCount(trip)) {
- IsConstant(trip->op_a, kExact, tc);
- return true;
- }
- return false;
+bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const {
+ bool is_constant_unused = false;
+ return CheckForFiniteAndConstantProps(loop, &is_constant_unused, trip_count);
+}
+
+bool InductionVarRange::HasKnownTripCount(HLoopInformation* loop,
+ /*out*/ int64_t* trip_count) const {
+ bool is_constant = false;
+ CheckForFiniteAndConstantProps(loop, &is_constant, trip_count);
+ return is_constant;
}
bool InductionVarRange::IsUnitStride(HInstruction* context,
@@ -431,6 +419,18 @@ HInstruction* InductionVarRange::GenerateTripCount(HLoopInformation* loop,
// Private class methods.
//
+bool InductionVarRange::CheckForFiniteAndConstantProps(HLoopInformation* loop,
+ /*out*/ bool* is_constant,
+ /*out*/ int64_t* trip_count) const {
+ HInductionVarAnalysis::InductionInfo *trip =
+ induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
+ if (trip != nullptr && !IsUnsafeTripCount(trip)) {
+ *is_constant = IsConstant(trip->op_a, kExact, trip_count);
+ return true;
+ }
+ return false;
+}
+
bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info,
ConstantRequest request,
/*out*/ int64_t* value) const {
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 0b980f596a3..906dc6bb7b9 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -161,9 +161,15 @@ class InductionVarRange {
}
/**
- * Checks if header logic of a loop terminates. Sets trip-count tc if known.
+ * Checks if header logic of a loop terminates. If trip count is known sets 'trip_count' to its
+ * value.
*/
- bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const;
+ bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const;
+
+ /**
+ * Checks if a trip count is known for the loop and sets 'trip_count' to its value in this case.
+ */
+ bool HasKnownTripCount(HLoopInformation* loop, /*out*/ int64_t* trip_count) const;
/**
* Checks if the given instruction is a unit stride induction inside the closest enveloping
@@ -194,6 +200,14 @@ class InductionVarRange {
};
/**
+ * Checks if header logic of a loop terminates. If trip count is known (constant) sets
+ * 'is_constant' to true and 'trip_count' to the trip count value.
+ */
+ bool CheckForFiniteAndConstantProps(HLoopInformation* loop,
+ /*out*/ bool* is_constant,
+ /*out*/ int64_t* trip_count) const;
+
+ /**
* Returns true if exact or upper/lower bound on the given induction
* information is known as a 64-bit constant, which is returned in value.
*/
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 4fc7262265e..8b10a78212e 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -147,10 +147,11 @@ void HInliner::Run() {
// that this method is actually inlined;
// - if a method's name contains the substring "$noinline$", do not
// inline that method.
- // We limit this to AOT compilation, as the JIT may or may not inline
+ // We limit the latter to AOT compilation, as the JIT may or may not inline
// depending on the state of classes at runtime.
- const bool honor_inlining_directives =
- IsCompilingWithCoreImage() && Runtime::Current()->IsAotCompiler();
+ const bool honor_noinline_directives = IsCompilingWithCoreImage();
+ const bool honor_inline_directives =
+ honor_noinline_directives && Runtime::Current()->IsAotCompiler();
// Keep a copy of all blocks when starting the visit.
ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder();
@@ -164,18 +165,19 @@ void HInliner::Run() {
HInvoke* call = instruction->AsInvoke();
// As long as the call is not intrinsified, it is worth trying to inline.
if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) {
- if (honor_inlining_directives) {
+ if (honor_noinline_directives) {
// Debugging case: directives in method names control or assert on inlining.
std::string callee_name = outer_compilation_unit_.GetDexFile()->PrettyMethod(
call->GetDexMethodIndex(), /* with_signature */ false);
// Tests prevent inlining by having $noinline$ in their method names.
if (callee_name.find("$noinline$") == std::string::npos) {
- if (!TryInline(call)) {
+ if (!TryInline(call) && honor_inline_directives) {
bool should_have_inlined = (callee_name.find("$inline$") != std::string::npos);
CHECK(!should_have_inlined) << "Could not inline " << callee_name;
}
}
} else {
+ DCHECK(!honor_inline_directives);
// Normal case: try to inline.
TryInline(call);
}
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index c7aef3779d1..9647dd5d41c 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1815,29 +1815,6 @@ void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object,
}
}
-static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- if (cls == nullptr) {
- return TypeCheckKind::kUnresolvedCheck;
- } else if (cls->IsInterface()) {
- return TypeCheckKind::kInterfaceCheck;
- } else if (cls->IsArrayClass()) {
- if (cls->GetComponentType()->IsObjectClass()) {
- return TypeCheckKind::kArrayObjectCheck;
- } else if (cls->CannotBeAssignedFromOtherTypes()) {
- return TypeCheckKind::kExactCheck;
- } else {
- return TypeCheckKind::kArrayCheck;
- }
- } else if (cls->IsFinal()) {
- return TypeCheckKind::kExactCheck;
- } else if (cls->IsAbstract()) {
- return TypeCheckKind::kAbstractClassCheck;
- } else {
- return TypeCheckKind::kClassHierarchyCheck;
- }
-}
-
void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) {
HLoadString* load_string =
new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc);
@@ -1852,22 +1829,8 @@ void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_
HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) {
ScopedObjectAccess soa(Thread::Current());
const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
- Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
- Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass(
- soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_));
-
- bool needs_access_check = true;
- if (klass != nullptr) {
- if (klass->IsPublic()) {
- needs_access_check = false;
- } else {
- ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
- if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) {
- needs_access_check = false;
- }
- }
- }
-
+ Handle<mirror::Class> klass = ResolveClass(soa, type_index);
+ bool needs_access_check = LoadClassNeedsAccessCheck(klass);
return BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check);
}
@@ -1912,25 +1875,83 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index,
return load_class;
}
+Handle<mirror::Class> HInstructionBuilder::ResolveClass(ScopedObjectAccess& soa,
+ dex::TypeIndex type_index) {
+ Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
+ ObjPtr<mirror::Class> klass = compiler_driver_->ResolveClass(
+ soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_);
+ // TODO: Avoid creating excessive handles if the method references the same class repeatedly.
+ // (Use a map on the local_allocator_.)
+ return handles_->NewHandle(klass);
+}
+
+bool HInstructionBuilder::LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) {
+ if (klass == nullptr) {
+ return true;
+ } else if (klass->IsPublic()) {
+ return false;
+ } else {
+ ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
+ return compiling_class == nullptr || !compiling_class->CanAccess(klass.Get());
+ }
+}
+
void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,
uint8_t destination,
uint8_t reference,
dex::TypeIndex type_index,
uint32_t dex_pc) {
HInstruction* object = LoadLocal(reference, DataType::Type::kReference);
- HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
ScopedObjectAccess soa(Thread::Current());
- TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass());
+ const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
+ Handle<mirror::Class> klass = ResolveClass(soa, type_index);
+ bool needs_access_check = LoadClassNeedsAccessCheck(klass);
+ TypeCheckKind check_kind = HSharpening::ComputeTypeCheckKind(
+ klass.Get(), code_generator_, compiler_driver_, needs_access_check);
+
+ HInstruction* class_or_null = nullptr;
+ HIntConstant* bitstring_path_to_root = nullptr;
+ HIntConstant* bitstring_mask = nullptr;
+ if (check_kind == TypeCheckKind::kBitstringCheck) {
+ // TODO: Allow using the bitstring check also if we need an access check.
+ DCHECK(!needs_access_check);
+ class_or_null = graph_->GetNullConstant(dex_pc);
+ MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+ uint32_t path_to_root =
+ SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass.Get());
+ uint32_t mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass.Get());
+ bitstring_path_to_root = graph_->GetIntConstant(static_cast<int32_t>(path_to_root), dex_pc);
+ bitstring_mask = graph_->GetIntConstant(static_cast<int32_t>(mask), dex_pc);
+ } else {
+ class_or_null = BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check);
+ }
+ DCHECK(class_or_null != nullptr);
+
if (instruction.Opcode() == Instruction::INSTANCE_OF) {
- AppendInstruction(new (allocator_) HInstanceOf(object, cls, check_kind, dex_pc));
+ AppendInstruction(new (allocator_) HInstanceOf(object,
+ class_or_null,
+ check_kind,
+ klass,
+ dex_pc,
+ allocator_,
+ bitstring_path_to_root,
+ bitstring_mask));
UpdateLocal(destination, current_block_->GetLastInstruction());
} else {
DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
// We emit a CheckCast followed by a BoundType. CheckCast is a statement
// which may throw. If it succeeds BoundType sets the new type of `object`
// for all subsequent uses.
- AppendInstruction(new (allocator_) HCheckCast(object, cls, check_kind, dex_pc));
+ AppendInstruction(
+ new (allocator_) HCheckCast(object,
+ class_or_null,
+ check_kind,
+ klass,
+ dex_pc,
+ allocator_,
+ bitstring_path_to_root,
+ bitstring_mask));
AppendInstruction(new (allocator_) HBoundType(object, dex_pc));
UpdateLocal(reference, current_block_->GetLastInstruction());
}
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 4428c532779..f78829232d4 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -39,6 +39,7 @@ class DexCompilationUnit;
class HBasicBlockBuilder;
class Instruction;
class OptimizingCompilerStats;
+class ScopedObjectAccess;
class SsaBuilder;
class VariableSizedHandleScope;
@@ -232,6 +233,12 @@ class HInstructionBuilder : public ValueObject {
bool needs_access_check)
REQUIRES_SHARED(Locks::mutator_lock_);
+ Handle<mirror::Class> ResolveClass(ScopedObjectAccess& soa, dex::TypeIndex type_index)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ bool LoadClassNeedsAccessCheck(Handle<mirror::Class> klass)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
// Returns the outer-most compiling method's class.
ObjPtr<mirror::Class> GetOutermostCompilingClass() const;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index a42a85dc1d2..d3cf9568c2d 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -67,7 +67,6 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
bool TryCombineVecMultiplyAccumulate(HVecMul* mul);
void VisitShift(HBinaryOperation* shift);
-
void VisitEqual(HEqual* equal) OVERRIDE;
void VisitNotEqual(HNotEqual* equal) OVERRIDE;
void VisitBooleanNot(HBooleanNot* bool_not) OVERRIDE;
@@ -78,6 +77,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
void VisitNullCheck(HNullCheck* instruction) OVERRIDE;
void VisitArrayLength(HArrayLength* instruction) OVERRIDE;
void VisitCheckCast(HCheckCast* instruction) OVERRIDE;
+ void VisitAbs(HAbs* instruction) OVERRIDE;
void VisitAdd(HAdd* instruction) OVERRIDE;
void VisitAnd(HAnd* instruction) OVERRIDE;
void VisitCondition(HCondition* instruction) OVERRIDE;
@@ -120,6 +120,9 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
void SimplifyReturnThis(HInvoke* invoke);
void SimplifyAllocationIntrinsic(HInvoke* invoke);
void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
+ void SimplifyMin(HInvoke* invoke, DataType::Type type);
+ void SimplifyMax(HInvoke* invoke, DataType::Type type);
+ void SimplifyAbs(HInvoke* invoke, DataType::Type type);
CodeGenerator* codegen_;
CompilerDriver* compiler_driver_;
@@ -576,7 +579,9 @@ bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInst
// Returns whether doing a type test between the class of `object` against `klass` has
// a statically known outcome. The result of the test is stored in `outcome`.
-static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) {
+static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti,
+ HInstruction* object,
+ /*out*/bool* outcome) {
DCHECK(!object->IsNullConstant()) << "Null constants should be special cased";
ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo();
ScopedObjectAccess soa(Thread::Current());
@@ -586,7 +591,6 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo
return false;
}
- ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI();
if (!class_rti.IsValid()) {
// Happens when the loaded class is unresolved.
return false;
@@ -611,8 +615,8 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo
void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
HInstruction* object = check_cast->InputAt(0);
- HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
- if (load_class->NeedsAccessCheck()) {
+ if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck &&
+ check_cast->GetTargetClass()->NeedsAccessCheck()) {
// If we need to perform an access check we cannot remove the instruction.
return;
}
@@ -630,15 +634,18 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
// Note: The `outcome` is initialized to please valgrind - the compiler can reorder
// the return value check with the `outcome` check, b/27651442 .
bool outcome = false;
- if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
+ if (TypeCheckHasKnownOutcome(check_cast->GetTargetClassRTI(), object, &outcome)) {
if (outcome) {
check_cast->GetBlock()->RemoveInstruction(check_cast);
MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast);
- if (!load_class->HasUses()) {
- // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
- // However, here we know that it cannot because the checkcast was successfull, hence
- // the class was already loaded.
- load_class->GetBlock()->RemoveInstruction(load_class);
+ if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) {
+ HLoadClass* load_class = check_cast->GetTargetClass();
+ if (!load_class->HasUses()) {
+ // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
+ // However, here we know that it cannot because the checkcast was successfull, hence
+ // the class was already loaded.
+ load_class->GetBlock()->RemoveInstruction(load_class);
+ }
}
} else {
// Don't do anything for exceptional cases for now. Ideally we should remove
@@ -649,8 +656,8 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
HInstruction* object = instruction->InputAt(0);
- HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass();
- if (load_class->NeedsAccessCheck()) {
+ if (instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck &&
+ instruction->GetTargetClass()->NeedsAccessCheck()) {
// If we need to perform an access check we cannot remove the instruction.
return;
}
@@ -673,7 +680,7 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
// Note: The `outcome` is initialized to please valgrind - the compiler can reorder
// the return value check with the `outcome` check, b/27651442 .
bool outcome = false;
- if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
+ if (TypeCheckHasKnownOutcome(instruction->GetTargetClassRTI(), object, &outcome)) {
MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf);
if (outcome && can_be_null) {
// Type test will succeed, we just need a null test.
@@ -686,11 +693,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
}
RecordSimplification();
instruction->GetBlock()->RemoveInstruction(instruction);
- if (outcome && !load_class->HasUses()) {
- // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
- // However, here we know that it cannot because the instanceof check was successfull, hence
- // the class was already loaded.
- load_class->GetBlock()->RemoveInstruction(load_class);
+ if (outcome && instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) {
+ HLoadClass* load_class = instruction->GetTargetClass();
+ if (!load_class->HasUses()) {
+ // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
+ // However, here we know that it cannot because the instanceof check was successfull, hence
+ // the class was already loaded.
+ load_class->GetBlock()->RemoveInstruction(load_class);
+ }
}
}
}
@@ -849,35 +859,29 @@ void InstructionSimplifierVisitor::VisitBooleanNot(HBooleanNot* bool_not) {
static HInstruction* NewIntegralAbs(ArenaAllocator* allocator,
HInstruction* x,
HInstruction* cursor) {
- DataType::Type type = x->GetType();
- DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
- // Construct a fake intrinsic with as much context as is needed to allocate one.
- // The intrinsic will always be lowered into code later anyway.
- // TODO: b/65164101 : moving towards a real HAbs node makes more sense.
- HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
- HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress,
- HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
- 0u
- };
- HInvokeStaticOrDirect* invoke = new (allocator) HInvokeStaticOrDirect(
- allocator,
- 1,
- type,
- x->GetDexPc(),
- /*method_idx*/ -1,
- /*resolved_method*/ nullptr,
- dispatch_info,
- kStatic,
- MethodReference(nullptr, dex::kDexNoIndex),
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
- invoke->SetArgumentAt(0, x);
- invoke->SetIntrinsic(type == DataType::Type::kInt32 ? Intrinsics::kMathAbsInt
- : Intrinsics::kMathAbsLong,
- kNoEnvironmentOrCache,
- kNoSideEffects,
- kNoThrow);
- cursor->GetBlock()->InsertInstructionBefore(invoke, cursor);
- return invoke;
+ DataType::Type type = DataType::Kind(x->GetType());
+ DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
+ HAbs* abs = new (allocator) HAbs(type, x, cursor->GetDexPc());
+ cursor->GetBlock()->InsertInstructionBefore(abs, cursor);
+ return abs;
+}
+
+// Constructs a new MIN/MAX(x, y) node in the HIR.
+static HInstruction* NewIntegralMinMax(ArenaAllocator* allocator,
+ HInstruction* x,
+ HInstruction* y,
+ HInstruction* cursor,
+ bool is_min) {
+ DataType::Type type = DataType::Kind(x->GetType());
+ DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
+ HBinaryOperation* minmax = nullptr;
+ if (is_min) {
+ minmax = new (allocator) HMin(type, x, y, cursor->GetDexPc());
+ } else {
+ minmax = new (allocator) HMax(type, x, y, cursor->GetDexPc());
+ }
+ cursor->GetBlock()->InsertInstructionBefore(minmax, cursor);
+ return minmax;
}
// Returns true if operands a and b consists of widening type conversions
@@ -899,6 +903,30 @@ static bool AreLowerPrecisionArgs(DataType::Type to_type, HInstruction* a, HInst
to_type == DataType::Type::kInt64);
}
+// Returns an acceptable substitution for "a" on the select
+// construct "a <cmp> b ? c : .." during MIN/MAX recognition.
+static HInstruction* AllowInMinMax(IfCondition cmp,
+ HInstruction* a,
+ HInstruction* b,
+ HInstruction* c) {
+ int64_t value = 0;
+ if (IsInt64AndGet(b, /*out*/ &value) &&
+ (((cmp == kCondLT || cmp == kCondLE) && c->IsMax()) ||
+ ((cmp == kCondGT || cmp == kCondGE) && c->IsMin()))) {
+ HConstant* other = c->AsBinaryOperation()->GetConstantRight();
+ if (other != nullptr && a == c->AsBinaryOperation()->GetLeastConstantLeft()) {
+ int64_t other_value = Int64FromConstant(other);
+ bool is_max = (cmp == kCondLT || cmp == kCondLE);
+ // Allow the max for a < 100 ? max(a, -100) : ..
+ // or the min for a > -100 ? min(a, 100) : ..
+ if (is_max ? (value >= other_value) : (value <= other_value)) {
+ return c;
+ }
+ }
+ }
+ return nullptr;
+}
+
void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
HInstruction* replace_with = nullptr;
HInstruction* condition = select->GetCondition();
@@ -942,23 +970,35 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
DataType::Type t_type = true_value->GetType();
DataType::Type f_type = false_value->GetType();
// Here we have a <cmp> b ? true_value : false_value.
- // Test if both values are same-typed int or long.
- if (t_type == f_type &&
- (t_type == DataType::Type::kInt32 || t_type == DataType::Type::kInt64)) {
- // Try to replace typical integral ABS constructs.
- if (true_value->IsNeg()) {
- HInstruction* negated = true_value->InputAt(0);
- if ((cmp == kCondLT || cmp == kCondLE) &&
- (a == negated && a == false_value && IsInt64Value(b, 0))) {
- // Found a < 0 ? -a : a which can be replaced by ABS(a).
- replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), false_value, select);
- }
- } else if (false_value->IsNeg()) {
- HInstruction* negated = false_value->InputAt(0);
- if ((cmp == kCondGT || cmp == kCondGE) &&
- (a == true_value && a == negated && IsInt64Value(b, 0))) {
- // Found a > 0 ? a : -a which can be replaced by ABS(a).
- replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select);
+ // Test if both values are compatible integral types (resulting MIN/MAX/ABS
+ // type will be int or long, like the condition). Replacements are general,
+ // but assume conditions prefer constants on the right.
+ if (DataType::IsIntegralType(t_type) && DataType::Kind(t_type) == DataType::Kind(f_type)) {
+ // Allow a < 100 ? max(a, -100) : ..
+ // or a > -100 ? min(a, 100) : ..
+ // to use min/max instead of a to detect nested min/max expressions.
+ HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value);
+ if (new_a != nullptr) {
+ a = new_a;
+ }
+ // Try to replace typical integral MIN/MAX/ABS constructs.
+ if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) &&
+ ((a == true_value && b == false_value) ||
+ (b == true_value && a == false_value))) {
+ // Found a < b ? a : b (MIN) or a < b ? b : a (MAX)
+ // or a > b ? a : b (MAX) or a > b ? b : a (MIN).
+ bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value);
+ replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min);
+ } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) ||
+ ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) {
+ bool negLeft = (cmp == kCondLT || cmp == kCondLE);
+ HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0);
+ HInstruction* not_negated = negLeft ? false_value : true_value;
+ if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) {
+ // Found a < 0 ? -a : a
+ // or a > 0 ? a : -a
+ // which can be replaced by ABS(a).
+ replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select);
}
} else if (true_value->IsSub() && false_value->IsSub()) {
HInstruction* true_sub1 = true_value->InputAt(0);
@@ -970,8 +1010,8 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
((cmp == kCondLT || cmp == kCondLE) &&
(a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) &&
AreLowerPrecisionArgs(t_type, a, b)) {
- // Found a > b ? a - b : b - a or
- // a < b ? b - a : a - b
+ // Found a > b ? a - b : b - a
+ // or a < b ? b - a : a - b
// which can be replaced by ABS(a - b) for lower precision operands a, b.
replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select);
}
@@ -1230,6 +1270,17 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct
}
}
+void InstructionSimplifierVisitor::VisitAbs(HAbs* instruction) {
+ HInstruction* input = instruction->GetInput();
+ if (DataType::IsZeroExtension(input->GetType(), instruction->GetResultType())) {
+ // Zero extension from narrow to wide can never set sign bit in the wider
+ // operand, making the subsequent Abs redundant (e.g., abs(b & 0xff) for byte b).
+ instruction->ReplaceWith(input);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ RecordSimplification();
+ }
+}
+
void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
HConstant* input_cst = instruction->GetConstantRight();
HInstruction* input_other = instruction->GetLeastConstantLeft();
@@ -2430,6 +2481,27 @@ void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke,
invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, mem_barrier);
}
+void InstructionSimplifierVisitor::SimplifyMin(HInvoke* invoke, DataType::Type type) {
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ HMin* min = new (GetGraph()->GetAllocator())
+ HMin(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc());
+ invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, min);
+}
+
+void InstructionSimplifierVisitor::SimplifyMax(HInvoke* invoke, DataType::Type type) {
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ HMax* max = new (GetGraph()->GetAllocator())
+ HMax(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc());
+ invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, max);
+}
+
+void InstructionSimplifierVisitor::SimplifyAbs(HInvoke* invoke, DataType::Type type) {
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ HAbs* abs = new (GetGraph()->GetAllocator())
+ HAbs(type, invoke->InputAt(0), invoke->GetDexPc());
+ invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, abs);
+}
+
void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
switch (instruction->GetIntrinsic()) {
case Intrinsics::kStringEquals:
@@ -2513,6 +2585,42 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
case Intrinsics::kVarHandleStoreStoreFence:
SimplifyMemBarrier(instruction, MemBarrierKind::kStoreStore);
break;
+ case Intrinsics::kMathMinIntInt:
+ SimplifyMin(instruction, DataType::Type::kInt32);
+ break;
+ case Intrinsics::kMathMinLongLong:
+ SimplifyMin(instruction, DataType::Type::kInt64);
+ break;
+ case Intrinsics::kMathMinFloatFloat:
+ SimplifyMin(instruction, DataType::Type::kFloat32);
+ break;
+ case Intrinsics::kMathMinDoubleDouble:
+ SimplifyMin(instruction, DataType::Type::kFloat64);
+ break;
+ case Intrinsics::kMathMaxIntInt:
+ SimplifyMax(instruction, DataType::Type::kInt32);
+ break;
+ case Intrinsics::kMathMaxLongLong:
+ SimplifyMax(instruction, DataType::Type::kInt64);
+ break;
+ case Intrinsics::kMathMaxFloatFloat:
+ SimplifyMax(instruction, DataType::Type::kFloat32);
+ break;
+ case Intrinsics::kMathMaxDoubleDouble:
+ SimplifyMax(instruction, DataType::Type::kFloat64);
+ break;
+ case Intrinsics::kMathAbsInt:
+ SimplifyAbs(instruction, DataType::Type::kInt32);
+ break;
+ case Intrinsics::kMathAbsLong:
+ SimplifyAbs(instruction, DataType::Type::kInt64);
+ break;
+ case Intrinsics::kMathAbsFloat:
+ SimplifyAbs(instruction, DataType::Type::kFloat32);
+ break;
+ case Intrinsics::kMathAbsDouble:
+ SimplifyAbs(instruction, DataType::Type::kFloat64);
+ break;
default:
break;
}
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 62991435c7a..1035cbc2c46 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -266,6 +266,18 @@ void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke) { \
<< " should have been converted to HIR"; \
}
#define UNREACHABLE_INTRINSICS(Arch) \
+UNREACHABLE_INTRINSIC(Arch, MathMinIntInt) \
+UNREACHABLE_INTRINSIC(Arch, MathMinLongLong) \
+UNREACHABLE_INTRINSIC(Arch, MathMinFloatFloat) \
+UNREACHABLE_INTRINSIC(Arch, MathMinDoubleDouble) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxIntInt) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxLongLong) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxFloatFloat) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxDoubleDouble) \
+UNREACHABLE_INTRINSIC(Arch, MathAbsInt) \
+UNREACHABLE_INTRINSIC(Arch, MathAbsLong) \
+UNREACHABLE_INTRINSIC(Arch, MathAbsFloat) \
+UNREACHABLE_INTRINSIC(Arch, MathAbsDouble) \
UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits) \
UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits) \
UNREACHABLE_INTRINSIC(Arch, FloatIsNaN) \
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 0e6485be9f7..c3d643a7d18 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -344,14 +344,6 @@ void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
}
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
static void GenNumberOfLeadingZeros(LocationSummary* locations,
DataType::Type type,
MacroAssembler* masm) {
@@ -536,168 +528,6 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
}
-static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
- Location in = locations->InAt(0);
- Location out = locations->Out();
-
- FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in);
- FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out);
-
- __ Fabs(out_reg, in_reg);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
-}
-
-static void GenAbsInteger(LocationSummary* locations,
- bool is64bit,
- MacroAssembler* masm) {
- Location in = locations->InAt(0);
- Location output = locations->Out();
-
- Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in);
- Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output);
-
- __ Cmp(in_reg, Operand(0));
- __ Cneg(out_reg, in_reg, lt);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) {
- CreateIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) {
- CreateIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- bool is_double,
- MacroAssembler* masm) {
- Location op1 = locations->InAt(0);
- Location op2 = locations->InAt(1);
- Location out = locations->Out();
-
- FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1);
- FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2);
- FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out);
- if (is_min) {
- __ Fmin(out_reg, op1_reg, op2_reg);
- } else {
- __ Fmax(out_reg, op1_reg, op2_reg);
- }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler());
-}
-
-static void GenMinMax(LocationSummary* locations,
- bool is_min,
- bool is_long,
- MacroAssembler* masm) {
- Location op1 = locations->InAt(0);
- Location op2 = locations->InAt(1);
- Location out = locations->Out();
-
- Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
- Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2);
- Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out);
-
- __ Cmp(op1_reg, op2_reg);
- __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler());
-}
-
void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
CreateFPToFPLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 97a145664c3..29aecbc0975 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -432,341 +432,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invo
GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
}
-static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
- __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke, GetAssembler());
-}
-
-static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-
- locations->AddTemp(Location::RequiresRegister());
-}
-
-static void GenAbsInteger(LocationSummary* locations,
- bool is64bit,
- ArmVIXLAssembler* assembler) {
- Location in = locations->InAt(0);
- Location output = locations->Out();
-
- vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
-
- if (is64bit) {
- vixl32::Register in_reg_lo = LowRegisterFrom(in);
- vixl32::Register in_reg_hi = HighRegisterFrom(in);
- vixl32::Register out_reg_lo = LowRegisterFrom(output);
- vixl32::Register out_reg_hi = HighRegisterFrom(output);
-
- DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
-
- __ Asr(mask, in_reg_hi, 31);
- __ Adds(out_reg_lo, in_reg_lo, mask);
- __ Adc(out_reg_hi, in_reg_hi, mask);
- __ Eor(out_reg_lo, mask, out_reg_lo);
- __ Eor(out_reg_hi, mask, out_reg_hi);
- } else {
- vixl32::Register in_reg = RegisterFrom(in);
- vixl32::Register out_reg = RegisterFrom(output);
-
- __ Asr(mask, in_reg, 31);
- __ Add(out_reg, in_reg, mask);
- __ Eor(out_reg, mask, out_reg);
- }
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
- CreateIntToIntPlusTemp(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
- CreateIntToIntPlusTemp(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
- ArmVIXLAssembler* assembler = codegen->GetAssembler();
- Location op1_loc = invoke->GetLocations()->InAt(0);
- Location op2_loc = invoke->GetLocations()->InAt(1);
- Location out_loc = invoke->GetLocations()->Out();
-
- // Optimization: don't generate any code if inputs are the same.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
- return;
- }
-
- vixl32::SRegister op1 = SRegisterFrom(op1_loc);
- vixl32::SRegister op2 = SRegisterFrom(op2_loc);
- vixl32::SRegister out = OutputSRegister(invoke);
- UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
- const vixl32::Register temp1 = temps.Acquire();
- vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
- vixl32::Label nan, done;
- vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
-
- DCHECK(op1.Is(out));
-
- __ Vcmp(op1, op2);
- __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
- __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling.
-
- // op1 <> op2
- vixl32::ConditionType cond = is_min ? gt : lt;
- {
- ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
- 2 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ it(cond);
- __ vmov(cond, F32, out, op2);
- }
- // for <>(not equal), we've done min/max calculation.
- __ B(ne, final_label, /* far_target */ false);
-
- // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
- __ Vmov(temp1, op1);
- __ Vmov(temp2, op2);
- if (is_min) {
- __ Orr(temp1, temp1, temp2);
- } else {
- __ And(temp1, temp1, temp2);
- }
- __ Vmov(out, temp1);
- __ B(final_label);
-
- // handle NaN input.
- __ Bind(&nan);
- __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
- __ Vmov(out, temp1);
-
- if (done.IsReferenced()) {
- __ Bind(&done);
- }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
- invoke->GetLocations()->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFloat(invoke, /* is_min */ true, codegen_);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
- invoke->GetLocations()->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFloat(invoke, /* is_min */ false, codegen_);
-}
-
-static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
- ArmVIXLAssembler* assembler = codegen->GetAssembler();
- Location op1_loc = invoke->GetLocations()->InAt(0);
- Location op2_loc = invoke->GetLocations()->InAt(1);
- Location out_loc = invoke->GetLocations()->Out();
-
- // Optimization: don't generate any code if inputs are the same.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
- return;
- }
-
- vixl32::DRegister op1 = DRegisterFrom(op1_loc);
- vixl32::DRegister op2 = DRegisterFrom(op2_loc);
- vixl32::DRegister out = OutputDRegister(invoke);
- vixl32::Label handle_nan_eq, done;
- vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
-
- DCHECK(op1.Is(out));
-
- __ Vcmp(op1, op2);
- __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
- __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling.
-
- // op1 <> op2
- vixl32::ConditionType cond = is_min ? gt : lt;
- {
- ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
- 2 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ it(cond);
- __ vmov(cond, F64, out, op2);
- }
- // for <>(not equal), we've done min/max calculation.
- __ B(ne, final_label, /* far_target */ false);
-
- // handle op1 == op2, max(+0.0,-0.0).
- if (!is_min) {
- __ Vand(F64, out, op1, op2);
- __ B(final_label);
- }
-
- // handle op1 == op2, min(+0.0,-0.0), NaN input.
- __ Bind(&handle_nan_eq);
- __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
-
- if (done.IsReferenced()) {
- __ Bind(&done);
- }
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxDouble(invoke, /* is_min */ true , codegen_);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxDouble(invoke, /* is_min */ false, codegen_);
-}
-
-static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
- Location op1_loc = invoke->GetLocations()->InAt(0);
- Location op2_loc = invoke->GetLocations()->InAt(1);
- Location out_loc = invoke->GetLocations()->Out();
-
- // Optimization: don't generate any code if inputs are the same.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
- return;
- }
-
- vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
- vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
- vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
- vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
- vixl32::Register out_lo = LowRegisterFrom(out_loc);
- vixl32::Register out_hi = HighRegisterFrom(out_loc);
- UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
- const vixl32::Register temp = temps.Acquire();
-
- DCHECK(op1_lo.Is(out_lo));
- DCHECK(op1_hi.Is(out_hi));
-
- // Compare op1 >= op2, or op1 < op2.
- __ Cmp(out_lo, op2_lo);
- __ Sbcs(temp, out_hi, op2_hi);
-
- // Now GE/LT condition code is correct for the long comparison.
- {
- vixl32::ConditionType cond = is_min ? ge : lt;
- ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
- 3 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ itt(cond);
- __ mov(cond, out_lo, op2_lo);
- __ mov(cond, out_hi, op2_hi);
- }
-}
-
-static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMaxLong(invoke, /* is_min */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMaxLong(invoke, /* is_min */ false, GetAssembler());
-}
-
-static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
- vixl32::Register op1 = InputRegisterAt(invoke, 0);
- vixl32::Register op2 = InputRegisterAt(invoke, 1);
- vixl32::Register out = OutputRegister(invoke);
-
- __ Cmp(op1, op2);
-
- {
- ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
- 3 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
-
- __ ite(is_min ? lt : gt);
- __ mov(is_min ? lt : gt, out, op1);
- __ mov(is_min ? ge : le, out, op2);
- }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke, /* is_min */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke, /* is_min */ false, GetAssembler());
-}
-
void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
CreateFPToFPLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index b7936b9c8ec..ae248a3e5c7 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -58,6 +58,10 @@ inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const {
return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
}
+inline bool IntrinsicCodeGeneratorMIPS::HasMsa() const {
+ return codegen_->GetInstructionSetFeatures().HasMsa();
+}
+
#define __ codegen->GetAssembler()->
static void MoveFromReturnRegister(Location trg,
@@ -612,6 +616,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
static void GenBitCount(LocationSummary* locations,
DataType::Type type,
bool isR6,
+ bool hasMsa,
MipsAssembler* assembler) {
Register out = locations->Out().AsRegister<Register>();
@@ -637,85 +642,102 @@ static void GenBitCount(LocationSummary* locations,
// instructions compared to a loop-based algorithm which required 47
// instructions.
- if (type == DataType::Type::kInt32) {
- Register in = locations->InAt(0).AsRegister<Register>();
-
- __ Srl(TMP, in, 1);
- __ LoadConst32(AT, 0x55555555);
- __ And(TMP, TMP, AT);
- __ Subu(TMP, in, TMP);
- __ LoadConst32(AT, 0x33333333);
- __ And(out, TMP, AT);
- __ Srl(TMP, TMP, 2);
- __ And(TMP, TMP, AT);
- __ Addu(TMP, out, TMP);
- __ Srl(out, TMP, 4);
- __ Addu(out, out, TMP);
- __ LoadConst32(AT, 0x0F0F0F0F);
- __ And(out, out, AT);
- __ LoadConst32(TMP, 0x01010101);
- if (isR6) {
- __ MulR6(out, out, TMP);
+ if (hasMsa) {
+ if (type == DataType::Type::kInt32) {
+ Register in = locations->InAt(0).AsRegister<Register>();
+ __ Mtc1(in, FTMP);
+ __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Mfc1(out, FTMP);
} else {
- __ MulR2(out, out, TMP);
+ DCHECK_EQ(type, DataType::Type::kInt64);
+ Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ __ Mtc1(in_lo, FTMP);
+ __ Mthc1(in_hi, FTMP);
+ __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Mfc1(out, FTMP);
}
- __ Srl(out, out, 24);
} else {
- DCHECK_EQ(type, DataType::Type::kInt64);
- Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Register tmp_hi = locations->GetTemp(0).AsRegister<Register>();
- Register out_hi = locations->GetTemp(1).AsRegister<Register>();
- Register tmp_lo = TMP;
- Register out_lo = out;
+ if (type == DataType::Type::kInt32) {
+ Register in = locations->InAt(0).AsRegister<Register>();
+
+ __ Srl(TMP, in, 1);
+ __ LoadConst32(AT, 0x55555555);
+ __ And(TMP, TMP, AT);
+ __ Subu(TMP, in, TMP);
+ __ LoadConst32(AT, 0x33333333);
+ __ And(out, TMP, AT);
+ __ Srl(TMP, TMP, 2);
+ __ And(TMP, TMP, AT);
+ __ Addu(TMP, out, TMP);
+ __ Srl(out, TMP, 4);
+ __ Addu(out, out, TMP);
+ __ LoadConst32(AT, 0x0F0F0F0F);
+ __ And(out, out, AT);
+ __ LoadConst32(TMP, 0x01010101);
+ if (isR6) {
+ __ MulR6(out, out, TMP);
+ } else {
+ __ MulR2(out, out, TMP);
+ }
+ __ Srl(out, out, 24);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt64);
+ Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register tmp_hi = locations->GetTemp(0).AsRegister<Register>();
+ Register out_hi = locations->GetTemp(1).AsRegister<Register>();
+ Register tmp_lo = TMP;
+ Register out_lo = out;
- __ Srl(tmp_lo, in_lo, 1);
- __ Srl(tmp_hi, in_hi, 1);
+ __ Srl(tmp_lo, in_lo, 1);
+ __ Srl(tmp_hi, in_hi, 1);
- __ LoadConst32(AT, 0x55555555);
+ __ LoadConst32(AT, 0x55555555);
- __ And(tmp_lo, tmp_lo, AT);
- __ Subu(tmp_lo, in_lo, tmp_lo);
+ __ And(tmp_lo, tmp_lo, AT);
+ __ Subu(tmp_lo, in_lo, tmp_lo);
- __ And(tmp_hi, tmp_hi, AT);
- __ Subu(tmp_hi, in_hi, tmp_hi);
+ __ And(tmp_hi, tmp_hi, AT);
+ __ Subu(tmp_hi, in_hi, tmp_hi);
- __ LoadConst32(AT, 0x33333333);
+ __ LoadConst32(AT, 0x33333333);
- __ And(out_lo, tmp_lo, AT);
- __ Srl(tmp_lo, tmp_lo, 2);
- __ And(tmp_lo, tmp_lo, AT);
- __ Addu(tmp_lo, out_lo, tmp_lo);
+ __ And(out_lo, tmp_lo, AT);
+ __ Srl(tmp_lo, tmp_lo, 2);
+ __ And(tmp_lo, tmp_lo, AT);
+ __ Addu(tmp_lo, out_lo, tmp_lo);
- __ And(out_hi, tmp_hi, AT);
- __ Srl(tmp_hi, tmp_hi, 2);
- __ And(tmp_hi, tmp_hi, AT);
- __ Addu(tmp_hi, out_hi, tmp_hi);
+ __ And(out_hi, tmp_hi, AT);
+ __ Srl(tmp_hi, tmp_hi, 2);
+ __ And(tmp_hi, tmp_hi, AT);
+ __ Addu(tmp_hi, out_hi, tmp_hi);
- // Here we deviate from the original algorithm a bit. We've reached
- // the stage where the bitfields holding the subtotals are large
- // enough to hold the combined subtotals for both the low word, and
- // the high word. This means that we can add the subtotals for the
- // the high, and low words into a single word, and compute the final
- // result for both the high, and low words using fewer instructions.
- __ LoadConst32(AT, 0x0F0F0F0F);
+ // Here we deviate from the original algorithm a bit. We've reached
+ // the stage where the bitfields holding the subtotals are large
+ // enough to hold the combined subtotals for both the low word, and
+ // the high word. This means that we can add the subtotals for the
+ // the high, and low words into a single word, and compute the final
+ // result for both the high, and low words using fewer instructions.
+ __ LoadConst32(AT, 0x0F0F0F0F);
- __ Addu(TMP, tmp_hi, tmp_lo);
+ __ Addu(TMP, tmp_hi, tmp_lo);
- __ Srl(out, TMP, 4);
- __ And(out, out, AT);
- __ And(TMP, TMP, AT);
- __ Addu(out, out, TMP);
+ __ Srl(out, TMP, 4);
+ __ And(out, out, AT);
+ __ And(TMP, TMP, AT);
+ __ Addu(out, out, TMP);
- __ LoadConst32(AT, 0x01010101);
+ __ LoadConst32(AT, 0x01010101);
- if (isR6) {
- __ MulR6(out, out, AT);
- } else {
- __ MulR2(out, out, AT);
- }
+ if (isR6) {
+ __ MulR6(out, out, AT);
+ } else {
+ __ MulR2(out, out, AT);
+ }
- __ Srl(out, out, 24);
+ __ Srl(out, out, 24);
+ }
}
}
@@ -725,7 +747,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitIntegerBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), HasMsa(), GetAssembler());
}
// int java.lang.Long.bitCount(int)
@@ -739,575 +761,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler());
-}
-
-static void MathAbsFP(LocationSummary* locations,
- bool is64bit,
- bool isR2OrNewer,
- bool isR6,
- MipsAssembler* assembler) {
- FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
- FRegister out = locations->Out().AsFpuRegister<FRegister>();
-
- // Note, as a "quality of implementation", rather than pure "spec compliance", we require that
- // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN
- // (signaling NaN may become quiet though).
- //
- // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case,
- // both regular floating point numbers and NAN values are treated alike, only the sign bit is
- // affected by this instruction.
- // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any
- // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be
- // changed when doing abs(NaN). Because of that, we clear sign bit in a different way.
- if (isR6) {
- if (is64bit) {
- __ AbsD(out, in);
- } else {
- __ AbsS(out, in);
- }
- } else {
- if (is64bit) {
- if (in != out) {
- __ MovD(out, in);
- }
- __ MoveFromFpuHigh(TMP, in);
- // ins instruction is not available for R1.
- if (isR2OrNewer) {
- __ Ins(TMP, ZERO, 31, 1);
- } else {
- __ Sll(TMP, TMP, 1);
- __ Srl(TMP, TMP, 1);
- }
- __ MoveToFpuHigh(TMP, out);
- } else {
- __ Mfc1(TMP, in);
- // ins instruction is not available for R1.
- if (isR2OrNewer) {
- __ Ins(TMP, ZERO, 31, 1);
- } else {
- __ Sll(TMP, TMP, 1);
- __ Srl(TMP, TMP, 1);
- }
- __ Mtc1(TMP, out);
- }
- }
-}
-
-// double java.lang.Math.abs(double)
-void IntrinsicLocationsBuilderMIPS::VisitMathAbsDouble(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ true, IsR2OrNewer(), IsR6(), GetAssembler());
-}
-
-// float java.lang.Math.abs(float)
-void IntrinsicLocationsBuilderMIPS::VisitMathAbsFloat(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ false, IsR2OrNewer(), IsR6(), GetAssembler());
-}
-
-static void GenAbsInteger(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
- if (is64bit) {
- Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Register out_lo = locations->Out().AsRegisterPairLow<Register>();
- Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
-
- // The comments in this section show the analogous operations which would
- // be performed if we had 64-bit registers "in", and "out".
- // __ Dsra32(AT, in, 31);
- __ Sra(AT, in_hi, 31);
- // __ Xor(out, in, AT);
- __ Xor(TMP, in_lo, AT);
- __ Xor(out_hi, in_hi, AT);
- // __ Dsubu(out, out, AT);
- __ Subu(out_lo, TMP, AT);
- __ Sltu(TMP, out_lo, TMP);
- __ Addu(out_hi, out_hi, TMP);
- } else {
- Register in = locations->InAt(0).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
-
- __ Sra(AT, in, 31);
- __ Xor(out, in, AT);
- __ Subu(out, out, AT);
- }
-}
-
-// int java.lang.Math.abs(int)
-void IntrinsicLocationsBuilderMIPS::VisitMathAbsInt(HInvoke* invoke) {
- CreateIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathAbsInt(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-// long java.lang.Math.abs(long)
-void IntrinsicLocationsBuilderMIPS::VisitMathAbsLong(HInvoke* invoke) {
- CreateIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathAbsLong(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- DataType::Type type,
- bool is_R6,
- MipsAssembler* assembler) {
- FRegister out = locations->Out().AsFpuRegister<FRegister>();
- FRegister a = locations->InAt(0).AsFpuRegister<FRegister>();
- FRegister b = locations->InAt(1).AsFpuRegister<FRegister>();
-
- if (is_R6) {
- MipsLabel noNaNs;
- MipsLabel done;
- FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
-
- // When Java computes min/max it prefers a NaN to a number; the
- // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
- // the inputs is a NaN and the other is a valid number, the MIPS
- // instruction will return the number; Java wants the NaN value
- // returned. This is why there is extra logic preceding the use of
- // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
- // NaN, return the NaN, otherwise return the min/max.
- if (type == DataType::Type::kFloat64) {
- __ CmpUnD(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqD(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelD(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovD(out, ftmp);
- }
-
- __ B(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinD(out, a, b);
- } else {
- __ MaxD(out, a, b);
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kFloat32);
- __ CmpUnS(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqS(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelS(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovS(out, ftmp);
- }
-
- __ B(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinS(out, a, b);
- } else {
- __ MaxS(out, a, b);
- }
- }
-
- __ Bind(&done);
- } else {
- MipsLabel ordered;
- MipsLabel compare;
- MipsLabel select;
- MipsLabel done;
-
- if (type == DataType::Type::kFloat64) {
- __ CunD(a, b);
- } else {
- DCHECK_EQ(type, DataType::Type::kFloat32);
- __ CunS(a, b);
- }
- __ Bc1f(&ordered);
-
- // a or b (or both) is a NaN. Return one, which is a NaN.
- if (type == DataType::Type::kFloat64) {
- __ CeqD(b, b);
- } else {
- __ CeqS(b, b);
- }
- __ B(&select);
-
- __ Bind(&ordered);
-
- // Neither is a NaN.
- // a == b? (-0.0 compares equal with +0.0)
- // If equal, handle zeroes, else compare further.
- if (type == DataType::Type::kFloat64) {
- __ CeqD(a, b);
- } else {
- __ CeqS(a, b);
- }
- __ Bc1f(&compare);
-
- // a == b either bit for bit or one is -0.0 and the other is +0.0.
- if (type == DataType::Type::kFloat64) {
- __ MoveFromFpuHigh(TMP, a);
- __ MoveFromFpuHigh(AT, b);
- } else {
- __ Mfc1(TMP, a);
- __ Mfc1(AT, b);
- }
-
- if (is_min) {
- // -0.0 prevails over +0.0.
- __ Or(TMP, TMP, AT);
- } else {
- // +0.0 prevails over -0.0.
- __ And(TMP, TMP, AT);
- }
-
- if (type == DataType::Type::kFloat64) {
- __ Mfc1(AT, a);
- __ Mtc1(AT, out);
- __ MoveToFpuHigh(TMP, out);
- } else {
- __ Mtc1(TMP, out);
- }
- __ B(&done);
-
- __ Bind(&compare);
-
- if (type == DataType::Type::kFloat64) {
- if (is_min) {
- // return (a <= b) ? a : b;
- __ ColeD(a, b);
- } else {
- // return (a >= b) ? a : b;
- __ ColeD(b, a); // b <= a
- }
- } else {
- if (is_min) {
- // return (a <= b) ? a : b;
- __ ColeS(a, b);
- } else {
- // return (a >= b) ? a : b;
- __ ColeS(b, a); // b <= a
- }
- }
-
- __ Bind(&select);
-
- if (type == DataType::Type::kFloat64) {
- __ MovtD(out, a);
- __ MovfD(out, b);
- } else {
- __ MovtS(out, a);
- __ MovfS(out, b);
- }
-
- __ Bind(&done);
- }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
-}
-
-// double java.lang.Math.min(double, double)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kFloat64,
- IsR6(),
- GetAssembler());
-}
-
-// float java.lang.Math.min(float, float)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kFloat32,
- IsR6(),
- GetAssembler());
-}
-
-// double java.lang.Math.max(double, double)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kFloat64,
- IsR6(),
- GetAssembler());
-}
-
-// float java.lang.Math.max(float, float)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kFloat32,
- IsR6(),
- GetAssembler());
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-static void GenMinMax(LocationSummary* locations,
- bool is_min,
- DataType::Type type,
- bool is_R6,
- MipsAssembler* assembler) {
- if (is_R6) {
- // Some architectures, such as ARM and MIPS (prior to r6), have a
- // conditional move instruction which only changes the target
- // (output) register if the condition is true (MIPS prior to r6 had
- // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions
- // always change the target (output) register. If the condition is
- // true the output register gets the contents of the "rs" register;
- // otherwise, the output register is set to zero. One consequence
- // of this is that to implement something like "rd = c==0 ? rs : rt"
- // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions.
- // After executing this pair of instructions one of the output
- // registers from the pair will necessarily contain zero. Then the
- // code ORs the output registers from the SELEQZ/SELNEZ instructions
- // to get the final result.
- //
- // The initial test to see if the output register is same as the
- // first input register is needed to make sure that value in the
- // first input register isn't clobbered before we've finished
- // computing the output value. The logic in the corresponding else
- // clause performs the same task but makes sure the second input
- // register isn't clobbered in the event that it's the same register
- // as the output register; the else clause also handles the case
- // where the output register is distinct from both the first, and the
- // second input registers.
- if (type == DataType::Type::kInt64) {
- Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
- Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
- Register out_lo = locations->Out().AsRegisterPairLow<Register>();
- Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
-
- MipsLabel compare_done;
-
- if (a_lo == b_lo) {
- if (out_lo != a_lo) {
- __ Move(out_lo, a_lo);
- __ Move(out_hi, a_hi);
- }
- } else {
- __ Slt(TMP, b_hi, a_hi);
- __ Bne(b_hi, a_hi, &compare_done);
-
- __ Sltu(TMP, b_lo, a_lo);
-
- __ Bind(&compare_done);
-
- if (is_min) {
- __ Seleqz(AT, a_lo, TMP);
- __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo
- // because at this point we're
- // done using a_lo/b_lo.
- } else {
- __ Selnez(AT, a_lo, TMP);
- __ Seleqz(out_lo, b_lo, TMP); // ditto
- }
- __ Or(out_lo, out_lo, AT);
- if (is_min) {
- __ Seleqz(AT, a_hi, TMP);
- __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
- } else {
- __ Selnez(AT, a_hi, TMP);
- __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
- }
- __ Or(out_hi, out_hi, AT);
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kInt32);
- Register a = locations->InAt(0).AsRegister<Register>();
- Register b = locations->InAt(1).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
-
- if (a == b) {
- if (out != a) {
- __ Move(out, a);
- }
- } else {
- __ Slt(AT, b, a);
- if (is_min) {
- __ Seleqz(TMP, a, AT);
- __ Selnez(AT, b, AT);
- } else {
- __ Selnez(TMP, a, AT);
- __ Seleqz(AT, b, AT);
- }
- __ Or(out, TMP, AT);
- }
- }
- } else {
- if (type == DataType::Type::kInt64) {
- Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
- Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
- Register out_lo = locations->Out().AsRegisterPairLow<Register>();
- Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
-
- MipsLabel compare_done;
-
- if (a_lo == b_lo) {
- if (out_lo != a_lo) {
- __ Move(out_lo, a_lo);
- __ Move(out_hi, a_hi);
- }
- } else {
- __ Slt(TMP, a_hi, b_hi);
- __ Bne(a_hi, b_hi, &compare_done);
-
- __ Sltu(TMP, a_lo, b_lo);
-
- __ Bind(&compare_done);
-
- if (is_min) {
- if (out_lo != a_lo) {
- __ Movn(out_hi, a_hi, TMP);
- __ Movn(out_lo, a_lo, TMP);
- }
- if (out_lo != b_lo) {
- __ Movz(out_hi, b_hi, TMP);
- __ Movz(out_lo, b_lo, TMP);
- }
- } else {
- if (out_lo != a_lo) {
- __ Movz(out_hi, a_hi, TMP);
- __ Movz(out_lo, a_lo, TMP);
- }
- if (out_lo != b_lo) {
- __ Movn(out_hi, b_hi, TMP);
- __ Movn(out_lo, b_lo, TMP);
- }
- }
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kInt32);
- Register a = locations->InAt(0).AsRegister<Register>();
- Register b = locations->InAt(1).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
-
- if (a == b) {
- if (out != a) {
- __ Move(out, a);
- }
- } else {
- __ Slt(AT, a, b);
- if (is_min) {
- if (out != a) {
- __ Movn(out, a, AT);
- }
- if (out != b) {
- __ Movz(out, b, AT);
- }
- } else {
- if (out != a) {
- __ Movz(out, a, AT);
- }
- if (out != b) {
- __ Movn(out, b, AT);
- }
- }
- }
- }
- }
-}
-
-// int java.lang.Math.min(int, int)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kInt32,
- IsR6(),
- GetAssembler());
-}
-
-// long java.lang.Math.min(long, long)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kInt64,
- IsR6(),
- GetAssembler());
-}
-
-// int java.lang.Math.max(int, int)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kInt32,
- IsR6(),
- GetAssembler());
-}
-
-// long java.lang.Math.max(long, long)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kInt64,
- IsR6(),
- GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), HasMsa(), GetAssembler());
}
// double java.lang.Math.sqrt(double)
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index 13397f11d4c..1c1ba401325 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -71,6 +71,7 @@ class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor {
bool IsR2OrNewer() const;
bool IsR6() const;
bool Is32BitFPU() const;
+ bool HasMsa() const;
private:
MipsAssembler* GetAssembler();
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 4668c561ed3..9a9ae714bc6 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -46,6 +46,10 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() {
return codegen_->GetGraph()->GetAllocator();
}
+inline bool IntrinsicCodeGeneratorMIPS64::HasMsa() const {
+ return codegen_->GetInstructionSetFeatures().HasMsa();
+}
+
#define __ codegen->GetAssembler()->
static void MoveFromReturnRegister(Location trg,
@@ -386,6 +390,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
static void GenBitCount(LocationSummary* locations,
const DataType::Type type,
+ const bool hasMsa,
Mips64Assembler* assembler) {
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
@@ -414,41 +419,52 @@ static void GenBitCount(LocationSummary* locations,
// bits are set but the algorithm here attempts to minimize the total
// number of instructions executed even when a large number of bits
// are set.
-
- if (type == DataType::Type::kInt32) {
- __ Srl(TMP, in, 1);
- __ LoadConst32(AT, 0x55555555);
- __ And(TMP, TMP, AT);
- __ Subu(TMP, in, TMP);
- __ LoadConst32(AT, 0x33333333);
- __ And(out, TMP, AT);
- __ Srl(TMP, TMP, 2);
- __ And(TMP, TMP, AT);
- __ Addu(TMP, out, TMP);
- __ Srl(out, TMP, 4);
- __ Addu(out, out, TMP);
- __ LoadConst32(AT, 0x0F0F0F0F);
- __ And(out, out, AT);
- __ LoadConst32(TMP, 0x01010101);
- __ MulR6(out, out, TMP);
- __ Srl(out, out, 24);
- } else if (type == DataType::Type::kInt64) {
- __ Dsrl(TMP, in, 1);
- __ LoadConst64(AT, 0x5555555555555555L);
- __ And(TMP, TMP, AT);
- __ Dsubu(TMP, in, TMP);
- __ LoadConst64(AT, 0x3333333333333333L);
- __ And(out, TMP, AT);
- __ Dsrl(TMP, TMP, 2);
- __ And(TMP, TMP, AT);
- __ Daddu(TMP, out, TMP);
- __ Dsrl(out, TMP, 4);
- __ Daddu(out, out, TMP);
- __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
- __ And(out, out, AT);
- __ LoadConst64(TMP, 0x0101010101010101L);
- __ Dmul(out, out, TMP);
- __ Dsrl32(out, out, 24);
+ if (hasMsa) {
+ if (type == DataType::Type::kInt32) {
+ __ Mtc1(in, FTMP);
+ __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Mfc1(out, FTMP);
+ } else {
+ __ Dmtc1(in, FTMP);
+ __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Dmfc1(out, FTMP);
+ }
+ } else {
+ if (type == DataType::Type::kInt32) {
+ __ Srl(TMP, in, 1);
+ __ LoadConst32(AT, 0x55555555);
+ __ And(TMP, TMP, AT);
+ __ Subu(TMP, in, TMP);
+ __ LoadConst32(AT, 0x33333333);
+ __ And(out, TMP, AT);
+ __ Srl(TMP, TMP, 2);
+ __ And(TMP, TMP, AT);
+ __ Addu(TMP, out, TMP);
+ __ Srl(out, TMP, 4);
+ __ Addu(out, out, TMP);
+ __ LoadConst32(AT, 0x0F0F0F0F);
+ __ And(out, out, AT);
+ __ LoadConst32(TMP, 0x01010101);
+ __ MulR6(out, out, TMP);
+ __ Srl(out, out, 24);
+ } else {
+ __ Dsrl(TMP, in, 1);
+ __ LoadConst64(AT, 0x5555555555555555L);
+ __ And(TMP, TMP, AT);
+ __ Dsubu(TMP, in, TMP);
+ __ LoadConst64(AT, 0x3333333333333333L);
+ __ And(out, TMP, AT);
+ __ Dsrl(TMP, TMP, 2);
+ __ And(TMP, TMP, AT);
+ __ Daddu(TMP, out, TMP);
+ __ Dsrl(out, TMP, 4);
+ __ Daddu(out, out, TMP);
+ __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
+ __ And(out, out, AT);
+ __ LoadConst64(TMP, 0x0101010101010101L);
+ __ Dmul(out, out, TMP);
+ __ Dsrl32(out, out, 24);
+ }
}
}
@@ -458,7 +474,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, HasMsa(), GetAssembler());
}
// int java.lang.Long.bitCount(long)
@@ -467,291 +483,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
-}
-
-static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
- FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
- FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
-
- if (is64bit) {
- __ AbsD(out, in);
- } else {
- __ AbsS(out, in);
- }
-}
-
-// double java.lang.Math.abs(double)
-void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-// float java.lang.Math.abs(float)
-void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-static void CreateIntToInt(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
- GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
- if (is64bit) {
- __ Dsra32(AT, in, 31);
- __ Xor(out, in, AT);
- __ Dsubu(out, out, AT);
- } else {
- __ Sra(AT, in, 31);
- __ Xor(out, in, AT);
- __ Subu(out, out, AT);
- }
-}
-
-// int java.lang.Math.abs(int)
-void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) {
- CreateIntToInt(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-// long java.lang.Math.abs(long)
-void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) {
- CreateIntToInt(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- DataType::Type type,
- Mips64Assembler* assembler) {
- FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>();
- FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>();
- FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
-
- Mips64Label noNaNs;
- Mips64Label done;
- FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
-
- // When Java computes min/max it prefers a NaN to a number; the
- // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
- // the inputs is a NaN and the other is a valid number, the MIPS
- // instruction will return the number; Java wants the NaN value
- // returned. This is why there is extra logic preceding the use of
- // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
- // NaN, return the NaN, otherwise return the min/max.
- if (type == DataType::Type::kFloat64) {
- __ CmpUnD(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqD(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelD(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovD(out, ftmp);
- }
-
- __ Bc(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinD(out, a, b);
- } else {
- __ MaxD(out, a, b);
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kFloat32);
- __ CmpUnS(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqS(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelS(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovS(out, ftmp);
- }
-
- __ Bc(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinS(out, a, b);
- } else {
- __ MaxS(out, a, b);
- }
- }
-
- __ Bind(&done);
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-}
-
-// double java.lang.Math.min(double, double)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat64, GetAssembler());
-}
-
-// float java.lang.Math.min(float, float)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat32, GetAssembler());
-}
-
-// double java.lang.Math.max(double, double)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat64, GetAssembler());
-}
-
-// float java.lang.Math.max(float, float)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat32, GetAssembler());
-}
-
-static void GenMinMax(LocationSummary* locations,
- bool is_min,
- Mips64Assembler* assembler) {
- GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
- GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
- if (lhs == rhs) {
- if (out != lhs) {
- __ Move(out, lhs);
- }
- } else {
- // Some architectures, such as ARM and MIPS (prior to r6), have a
- // conditional move instruction which only changes the target
- // (output) register if the condition is true (MIPS prior to r6 had
- // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
- // change the target (output) register. If the condition is true the
- // output register gets the contents of the "rs" register; otherwise,
- // the output register is set to zero. One consequence of this is
- // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
- // needs to use a pair of SELEQZ/SELNEZ instructions. After
- // executing this pair of instructions one of the output registers
- // from the pair will necessarily contain zero. Then the code ORs the
- // output registers from the SELEQZ/SELNEZ instructions to get the
- // final result.
- //
- // The initial test to see if the output register is same as the
- // first input register is needed to make sure that value in the
- // first input register isn't clobbered before we've finished
- // computing the output value. The logic in the corresponding else
- // clause performs the same task but makes sure the second input
- // register isn't clobbered in the event that it's the same register
- // as the output register; the else clause also handles the case
- // where the output register is distinct from both the first, and the
- // second input registers.
- if (out == lhs) {
- __ Slt(AT, rhs, lhs);
- if (is_min) {
- __ Seleqz(out, lhs, AT);
- __ Selnez(AT, rhs, AT);
- } else {
- __ Selnez(out, lhs, AT);
- __ Seleqz(AT, rhs, AT);
- }
- } else {
- __ Slt(AT, lhs, rhs);
- if (is_min) {
- __ Seleqz(out, rhs, AT);
- __ Selnez(AT, lhs, AT);
- } else {
- __ Selnez(out, rhs, AT);
- __ Seleqz(AT, lhs, AT);
- }
- }
- __ Or(out, out, AT);
- }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-// int java.lang.Math.min(int, int)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
-}
-
-// long java.lang.Math.min(long, long)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
-}
-
-// int java.lang.Math.max(int, int)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
-}
-
-// long java.lang.Math.max(long, long)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, HasMsa(), GetAssembler());
}
// double java.lang.Math.sqrt(double)
diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h
index 6f40d90ddbf..748b0b02b2e 100644
--- a/compiler/optimizing/intrinsics_mips64.h
+++ b/compiler/optimizing/intrinsics_mips64.h
@@ -68,6 +68,8 @@ class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor {
#undef INTRINSICS_LIST
#undef OPTIMIZING_INTRINSICS
+ bool HasMsa() const;
+
private:
Mips64Assembler* GetAssembler();
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 0763ef23529..f84a33bb8e3 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -40,11 +40,6 @@ namespace art {
namespace x86 {
-static constexpr int kDoubleNaNHigh = 0x7FF80000;
-static constexpr int kDoubleNaNLow = 0x00000000;
-static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
-static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
-
IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
: allocator_(codegen->GetGraph()->GetAllocator()),
codegen_(codegen) {
@@ -333,432 +328,6 @@ void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
}
-
-// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
-// need is 64b.
-
-static void CreateFloatToFloat(ArenaAllocator* allocator, HInvoke* invoke) {
- // TODO: Enable memory operations when the assembler supports them.
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
- HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
- DCHECK(static_or_direct != nullptr);
- if (static_or_direct->HasSpecialInput() &&
- invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
- // We need addressibility for the constant area.
- locations->SetInAt(1, Location::RequiresRegister());
- // We need a temporary to hold the constant.
- locations->AddTemp(Location::RequiresFpuRegister());
- }
-}
-
-static void MathAbsFP(HInvoke* invoke,
- bool is64bit,
- X86Assembler* assembler,
- CodeGeneratorX86* codegen) {
- LocationSummary* locations = invoke->GetLocations();
- Location output = locations->Out();
-
- DCHECK(output.IsFpuRegister());
- if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
- HX86ComputeBaseMethodAddress* method_address =
- invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
- DCHECK(locations->InAt(1).IsRegister());
- // We also have a constant area pointer.
- Register constant_area = locations->InAt(1).AsRegister<Register>();
- XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- if (is64bit) {
- __ movsd(temp, codegen->LiteralInt64Address(
- INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area));
- __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
- } else {
- __ movss(temp, codegen->LiteralInt32Address(
- INT32_C(0x7FFFFFFF), method_address, constant_area));
- __ andps(output.AsFpuRegister<XmmRegister>(), temp);
- }
- } else {
- // Create the right constant on an aligned stack.
- if (is64bit) {
- __ subl(ESP, Immediate(8));
- __ pushl(Immediate(0x7FFFFFFF));
- __ pushl(Immediate(0xFFFFFFFF));
- __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
- } else {
- __ subl(ESP, Immediate(12));
- __ pushl(Immediate(0x7FFFFFFF));
- __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
- }
- __ addl(ESP, Immediate(16));
- }
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
- CreateFloatToFloat(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
- CreateFloatToFloat(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_);
-}
-
-static void CreateAbsIntLocation(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RegisterLocation(EAX));
- locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RegisterLocation(EDX));
-}
-
-static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
- Location output = locations->Out();
- Register out = output.AsRegister<Register>();
- DCHECK_EQ(out, EAX);
- Register temp = locations->GetTemp(0).AsRegister<Register>();
- DCHECK_EQ(temp, EDX);
-
- // Sign extend EAX into EDX.
- __ cdq();
-
- // XOR EAX with sign.
- __ xorl(EAX, EDX);
-
- // Subtract out sign to correct.
- __ subl(EAX, EDX);
-
- // The result is in EAX.
-}
-
-static void CreateAbsLongLocation(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
- locations->AddTemp(Location::RequiresRegister());
-}
-
-static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
- Location input = locations->InAt(0);
- Register input_lo = input.AsRegisterPairLow<Register>();
- Register input_hi = input.AsRegisterPairHigh<Register>();
- Location output = locations->Out();
- Register output_lo = output.AsRegisterPairLow<Register>();
- Register output_hi = output.AsRegisterPairHigh<Register>();
- Register temp = locations->GetTemp(0).AsRegister<Register>();
-
- // Compute the sign into the temporary.
- __ movl(temp, input_hi);
- __ sarl(temp, Immediate(31));
-
- // Store the sign into the output.
- __ movl(output_lo, temp);
- __ movl(output_hi, temp);
-
- // XOR the input to the output.
- __ xorl(output_lo, input_lo);
- __ xorl(output_hi, input_hi);
-
- // Subtract the sign.
- __ subl(output_lo, temp);
- __ sbbl(output_hi, temp);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
- CreateAbsIntLocation(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
- CreateAbsLongLocation(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
- GenAbsLong(invoke->GetLocations(), GetAssembler());
-}
-
-static void GenMinMaxFP(HInvoke* invoke,
- bool is_min,
- bool is_double,
- X86Assembler* assembler,
- CodeGeneratorX86* codegen) {
- LocationSummary* locations = invoke->GetLocations();
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
- Location out_loc = locations->Out();
- XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc));
- return;
- }
-
- // (out := op1)
- // out <=? op2
- // if Nan jmp Nan_label
- // if out is min jmp done
- // if op2 is min jmp op2_label
- // handle -0/+0
- // jmp done
- // Nan_label:
- // out := NaN
- // op2_label:
- // out := op2
- // done:
- //
- // This removes one jmp, but needs to copy one input (op1) to out.
- //
- // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
-
- XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
-
- NearLabel nan, done, op2_label;
- if (is_double) {
- __ ucomisd(out, op2);
- } else {
- __ ucomiss(out, op2);
- }
-
- __ j(Condition::kParityEven, &nan);
-
- __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
- __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
-
- // Handle 0.0/-0.0.
- if (is_min) {
- if (is_double) {
- __ orpd(out, op2);
- } else {
- __ orps(out, op2);
- }
- } else {
- if (is_double) {
- __ andpd(out, op2);
- } else {
- __ andps(out, op2);
- }
- }
- __ jmp(&done);
-
- // NaN handling.
- __ Bind(&nan);
- // Do we have a constant area pointer?
- if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
- HX86ComputeBaseMethodAddress* method_address =
- invoke->InputAt(2)->AsX86ComputeBaseMethodAddress();
- DCHECK(locations->InAt(2).IsRegister());
- Register constant_area = locations->InAt(2).AsRegister<Register>();
- if (is_double) {
- __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area));
- } else {
- __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area));
- }
- } else {
- if (is_double) {
- __ pushl(Immediate(kDoubleNaNHigh));
- __ pushl(Immediate(kDoubleNaNLow));
- __ movsd(out, Address(ESP, 0));
- __ addl(ESP, Immediate(8));
- } else {
- __ pushl(Immediate(kFloatNaN));
- __ movss(out, Address(ESP, 0));
- __ addl(ESP, Immediate(4));
- }
- }
- __ jmp(&done);
-
- // out := op2;
- __ Bind(&op2_label);
- if (is_double) {
- __ movsd(out, op2);
- } else {
- __ movss(out, op2);
- }
-
- // Done.
- __ Bind(&done);
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- // The following is sub-optimal, but all we can do for now. It would be fine to also accept
- // the second input to be the output (we can simply swap inputs).
- locations->SetOut(Location::SameAsFirstInput());
- HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
- DCHECK(static_or_direct != nullptr);
- if (static_or_direct->HasSpecialInput() &&
- invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
- locations->SetInAt(2, Location::RequiresRegister());
- }
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ true,
- /* is_double */ true,
- GetAssembler(),
- codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ true,
- /* is_double */ false,
- GetAssembler(),
- codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ false,
- /* is_double */ true,
- GetAssembler(),
- codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ false,
- /* is_double */ false,
- GetAssembler(),
- codegen_);
-}
-
-static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
- X86Assembler* assembler) {
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- // Can return immediately, as op1_loc == out_loc.
- // Note: if we ever support separate registers, e.g., output into memory, we need to check for
- // a copy here.
- DCHECK(locations->Out().Equals(op1_loc));
- return;
- }
-
- if (is_long) {
- // Need to perform a subtract to get the sign right.
- // op1 is already in the same location as the output.
- Location output = locations->Out();
- Register output_lo = output.AsRegisterPairLow<Register>();
- Register output_hi = output.AsRegisterPairHigh<Register>();
-
- Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
- Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
-
- // Spare register to compute the subtraction to set condition code.
- Register temp = locations->GetTemp(0).AsRegister<Register>();
-
- // Subtract off op2_low.
- __ movl(temp, output_lo);
- __ subl(temp, op2_lo);
-
- // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
- __ movl(temp, output_hi);
- __ sbbl(temp, op2_hi);
-
- // Now the condition code is correct.
- Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
- __ cmovl(cond, output_lo, op2_lo);
- __ cmovl(cond, output_hi, op2_hi);
- } else {
- Register out = locations->Out().AsRegister<Register>();
- Register op2 = op2_loc.AsRegister<Register>();
-
- // (out := op1)
- // out <=? op2
- // if out is min jmp done
- // out := op2
- // done:
-
- __ cmpl(out, op2);
- Condition cond = is_min ? Condition::kGreater : Condition::kLess;
- __ cmovl(cond, out, op2);
- }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
- // Register to use to perform a long subtract to set cc.
- locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
-}
-
static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 91a505ede1a..7627dc9490a 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -236,304 +236,6 @@ void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
}
-
-// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
-// need is 64b.
-
-static void CreateFloatToFloatPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) {
- // TODO: Enable memory operations when the assembler supports them.
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
-}
-
-static void MathAbsFP(LocationSummary* locations,
- bool is64bit,
- X86_64Assembler* assembler,
- CodeGeneratorX86_64* codegen) {
- Location output = locations->Out();
-
- DCHECK(output.IsFpuRegister());
- XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-
- // TODO: Can mask directly with constant area using pand if we can guarantee
- // that the literal is aligned on a 16 byte boundary. This will avoid a
- // temporary.
- if (is64bit) {
- __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
- __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
- } else {
- __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
- __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
- }
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
- CreateFloatToFloatPlusTemps(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
- CreateFloatToFloatPlusTemps(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
-}
-
-static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister());
-}
-
-static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
- Location output = locations->Out();
- CpuRegister out = output.AsRegister<CpuRegister>();
- CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
-
- if (is64bit) {
- // Create mask.
- __ movq(mask, out);
- __ sarq(mask, Immediate(63));
- // Add mask.
- __ addq(out, mask);
- __ xorq(out, mask);
- } else {
- // Create mask.
- __ movl(mask, out);
- __ sarl(mask, Immediate(31));
- // Add mask.
- __ addl(out, mask);
- __ xorl(out, mask);
- }
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
- CreateIntToIntPlusTemp(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
- CreateIntToIntPlusTemp(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- bool is_double,
- X86_64Assembler* assembler,
- CodeGeneratorX86_64* codegen) {
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
- Location out_loc = locations->Out();
- XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc));
- return;
- }
-
- // (out := op1)
- // out <=? op2
- // if Nan jmp Nan_label
- // if out is min jmp done
- // if op2 is min jmp op2_label
- // handle -0/+0
- // jmp done
- // Nan_label:
- // out := NaN
- // op2_label:
- // out := op2
- // done:
- //
- // This removes one jmp, but needs to copy one input (op1) to out.
- //
- // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
-
- XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
-
- NearLabel nan, done, op2_label;
- if (is_double) {
- __ ucomisd(out, op2);
- } else {
- __ ucomiss(out, op2);
- }
-
- __ j(Condition::kParityEven, &nan);
-
- __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
- __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
-
- // Handle 0.0/-0.0.
- if (is_min) {
- if (is_double) {
- __ orpd(out, op2);
- } else {
- __ orps(out, op2);
- }
- } else {
- if (is_double) {
- __ andpd(out, op2);
- } else {
- __ andps(out, op2);
- }
- }
- __ jmp(&done);
-
- // NaN handling.
- __ Bind(&nan);
- if (is_double) {
- __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
- } else {
- __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
- }
- __ jmp(&done);
-
- // out := op2;
- __ Bind(&op2_label);
- if (is_double) {
- __ movsd(out, op2);
- } else {
- __ movss(out, op2);
- }
-
- // Done.
- __ Bind(&done);
-}
-
-static void CreateFPFPToFP(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- // The following is sub-optimal, but all we can do for now. It would be fine to also accept
- // the second input to be the output (we can simply swap inputs).
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_);
-}
-
-static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
- X86_64Assembler* assembler) {
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- // Can return immediately, as op1_loc == out_loc.
- // Note: if we ever support separate registers, e.g., output into memory, we need to check for
- // a copy here.
- DCHECK(locations->Out().Equals(op1_loc));
- return;
- }
-
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
-
- // (out := op1)
- // out <=? op2
- // if out is min jmp done
- // out := op2
- // done:
-
- if (is_long) {
- __ cmpq(out, op2);
- } else {
- __ cmpl(out, op2);
- }
-
- __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
-}
-
static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
new file mode 100644
index 00000000000..a0760eff691
--- /dev/null
+++ b/compiler/optimizing/loop_analysis.cc
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loop_analysis.h"
+
+#include "base/bit_vector-inl.h"
+
+namespace art {
+
+void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
+ LoopAnalysisInfo* analysis_results) {
+ for (HBlocksInLoopIterator block_it(*loop_info);
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* block = block_it.Current();
+
+ for (HBasicBlock* successor : block->GetSuccessors()) {
+ if (!loop_info->Contains(*successor)) {
+ analysis_results->exits_num_++;
+ }
+ }
+
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (MakesScalarPeelingUnrollingNonBeneficial(instruction)) {
+ analysis_results->has_instructions_preventing_scalar_peeling_ = true;
+ analysis_results->has_instructions_preventing_scalar_unrolling_ = true;
+ }
+ analysis_results->instr_num_++;
+ }
+ analysis_results->bb_num_++;
+ }
+}
+
+bool LoopAnalysis::HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info) {
+ HGraph* graph = loop_info->GetHeader()->GetGraph();
+ for (uint32_t block_id : loop_info->GetBlocks().Indexes()) {
+ HBasicBlock* block = graph->GetBlocks()[block_id];
+ DCHECK(block != nullptr);
+ if (block->EndsWithIf()) {
+ HIf* hif = block->GetLastInstruction()->AsIf();
+ HInstruction* input = hif->InputAt(0);
+ if (IsLoopExit(loop_info, hif) && !loop_info->Contains(*input->GetBlock())) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+class Arm64LoopHelper : public ArchDefaultLoopHelper {
+ public:
+ // Scalar loop unrolling parameters and heuristics.
+ //
+ // Maximum possible unrolling factor.
+ static constexpr uint32_t kArm64ScalarMaxUnrollFactor = 2;
+ // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled.
+ static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40;
+ // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
+ static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8;
+
+ // SIMD loop unrolling parameters and heuristics.
+ //
+ // Maximum possible unrolling factor.
+ static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8;
+ // Loop's maximum instruction count. Loops with higher count will not be unrolled.
+ static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50;
+
+ bool IsLoopTooBigForScalarPeelingUnrolling(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
+ size_t instr_num = loop_analysis_info->GetNumberOfInstructions();
+ size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks();
+ return (instr_num >= kArm64ScalarHeuristicMaxBodySizeInstr ||
+ bb_num >= kArm64ScalarHeuristicMaxBodySizeBlocks);
+ }
+
+ uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
+ uint64_t trip_count) const OVERRIDE {
+ uint32_t desired_unrolling_factor = kArm64ScalarMaxUnrollFactor;
+ if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) {
+ return kNoUnrollingFactor;
+ }
+
+ return desired_unrolling_factor;
+ }
+
+ bool IsLoopPeelingEnabled() const OVERRIDE { return true; }
+
+ uint32_t GetSIMDUnrollingFactor(HBasicBlock* block,
+ int64_t trip_count,
+ uint32_t max_peel,
+ uint32_t vector_length) const OVERRIDE {
+ // Don't unroll with insufficient iterations.
+ // TODO: Unroll loops with unknown trip count.
+ DCHECK_NE(vector_length, 0u);
+ if (trip_count < (2 * vector_length + max_peel)) {
+ return kNoUnrollingFactor;
+ }
+ // Don't unroll for large loop body size.
+ uint32_t instruction_count = block->GetInstructions().CountSize();
+ if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) {
+ return kNoUnrollingFactor;
+ }
+ // Find a beneficial unroll factor with the following restrictions:
+ // - At least one iteration of the transformed loop should be executed.
+ // - The loop body shouldn't be "too big" (heuristic).
+
+ uint32_t uf1 = kArm64SimdHeuristicMaxBodySizeInstr / instruction_count;
+ uint32_t uf2 = (trip_count - max_peel) / vector_length;
+ uint32_t unroll_factor =
+ TruncToPowerOfTwo(std::min({uf1, uf2, kArm64SimdMaxUnrollFactor}));
+ DCHECK_GE(unroll_factor, 1u);
+ return unroll_factor;
+ }
+};
+
+ArchDefaultLoopHelper* ArchDefaultLoopHelper::Create(InstructionSet isa,
+ ArenaAllocator* allocator) {
+ switch (isa) {
+ case InstructionSet::kArm64: {
+ return new (allocator) Arm64LoopHelper;
+ }
+ default: {
+ return new (allocator) ArchDefaultLoopHelper;
+ }
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
new file mode 100644
index 00000000000..ece98581367
--- /dev/null
+++ b/compiler/optimizing/loop_analysis.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
+#define ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
+
+#include "nodes.h"
+
+namespace art {
+
+class LoopAnalysis;
+
+// No loop unrolling factor (just one copy of the loop-body).
+static constexpr uint32_t kNoUnrollingFactor = 1;
+
+// Class to hold cached information on properties of the loop.
+class LoopAnalysisInfo : public ValueObject {
+ public:
+ explicit LoopAnalysisInfo(HLoopInformation* loop_info)
+ : bb_num_(0),
+ instr_num_(0),
+ exits_num_(0),
+ has_instructions_preventing_scalar_peeling_(false),
+ has_instructions_preventing_scalar_unrolling_(false),
+ loop_info_(loop_info) {}
+
+ size_t GetNumberOfBasicBlocks() const { return bb_num_; }
+ size_t GetNumberOfInstructions() const { return instr_num_; }
+ size_t GetNumberOfExits() const { return exits_num_; }
+
+ bool HasInstructionsPreventingScalarPeeling() const {
+ return has_instructions_preventing_scalar_peeling_;
+ }
+
+ bool HasInstructionsPreventingScalarUnrolling() const {
+ return has_instructions_preventing_scalar_unrolling_;
+ }
+
+ const HLoopInformation* GetLoopInfo() const { return loop_info_; }
+
+ private:
+ // Number of basic blocks in the loop body.
+ size_t bb_num_;
+ // Number of instructions in the loop body.
+ size_t instr_num_;
+ // Number of loop's exits.
+ size_t exits_num_;
+ // Whether the loop has instructions which make scalar loop peeling non-beneficial.
+ bool has_instructions_preventing_scalar_peeling_;
+ // Whether the loop has instructions which make scalar loop unrolling non-beneficial.
+ bool has_instructions_preventing_scalar_unrolling_;
+
+ // Corresponding HLoopInformation.
+ const HLoopInformation* loop_info_;
+
+ friend class LoopAnalysis;
+};
+
+// Placeholder class for methods and routines used to analyse loops, calculate loop properties
+// and characteristics.
+class LoopAnalysis : public ValueObject {
+ public:
+ // Calculates loops basic properties like body size, exits number, etc. and fills
+ // 'analysis_results' with this information.
+ static void CalculateLoopBasicProperties(HLoopInformation* loop_info,
+ LoopAnalysisInfo* analysis_results);
+
+ // Returns whether the loop has at least one loop invariant exit.
+ static bool HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info);
+
+ // Returns whether HIf's true or false successor is outside the specified loop.
+ //
+ // Prerequisite: HIf must be in the specified loop.
+ static bool IsLoopExit(HLoopInformation* loop_info, const HIf* hif) {
+ DCHECK(loop_info->Contains(*hif->GetBlock()));
+ HBasicBlock* true_succ = hif->IfTrueSuccessor();
+ HBasicBlock* false_succ = hif->IfFalseSuccessor();
+ return (!loop_info->Contains(*true_succ) || !loop_info->Contains(*false_succ));
+ }
+
+ private:
+ // Returns whether an instruction makes scalar loop peeling/unrolling non-beneficial.
+ //
+ // If in the loop body we have a dex/runtime call then its contribution to the whole
+ // loop performance will probably prevail. So peeling/unrolling optimization will not bring
+ // any noticeable performance improvement. It will increase the code size.
+ static bool MakesScalarPeelingUnrollingNonBeneficial(HInstruction* instruction) {
+ return (instruction->IsNewArray() ||
+ instruction->IsNewInstance() ||
+ instruction->IsUnresolvedInstanceFieldGet() ||
+ instruction->IsUnresolvedInstanceFieldSet() ||
+ instruction->IsUnresolvedStaticFieldGet() ||
+ instruction->IsUnresolvedStaticFieldSet() ||
+ // TODO: Support loops with intrinsified invokes.
+ instruction->IsInvoke() ||
+ // TODO: Support loops with ClinitChecks.
+ instruction->IsClinitCheck());
+ }
+};
+
+//
+// Helper class which holds target-dependent methods and constants needed for loop optimizations.
+//
+// To support peeling/unrolling for a new architecture one needs to create new helper class,
+// inherit it from this and add implementation for the following methods.
+//
+class ArchDefaultLoopHelper : public ArenaObject<kArenaAllocOptimization> {
+ public:
+ virtual ~ArchDefaultLoopHelper() {}
+
+ // Creates an instance of specialised helper for the target or default helper if the target
+ // doesn't support loop peeling and unrolling.
+ static ArchDefaultLoopHelper* Create(InstructionSet isa, ArenaAllocator* allocator);
+
+ // Returns whether the loop is too big for loop peeling/unrolling by checking its total number of
+ // basic blocks and instructions.
+ //
+ // If the loop body has too many instructions then peeling/unrolling optimization will not bring
+ // any noticeable performance improvement however will increase the code size.
+ //
+ // Returns 'true' by default, should be overridden by particular target loop helper.
+ virtual bool IsLoopTooBigForScalarPeelingUnrolling(
+ LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; }
+
+ // Returns optimal scalar unrolling factor for the loop.
+ //
+ // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
+ virtual uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
+ uint64_t trip_count ATTRIBUTE_UNUSED) const {
+ return kNoUnrollingFactor;
+ }
+
+ // Returns whether scalar loop peeling is enabled,
+ //
+ // Returns 'false' by default, should be overridden by particular target loop helper.
+ virtual bool IsLoopPeelingEnabled() const { return false; }
+
+ // Returns optimal SIMD unrolling factor for the loop.
+ //
+ // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
+ virtual uint32_t GetSIMDUnrollingFactor(HBasicBlock* block ATTRIBUTE_UNUSED,
+ int64_t trip_count ATTRIBUTE_UNUSED,
+ uint32_t max_peel ATTRIBUTE_UNUSED,
+ uint32_t vector_length ATTRIBUTE_UNUSED) const {
+ return kNoUnrollingFactor;
+ }
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 899496328eb..1462404932e 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -33,8 +33,8 @@ namespace art {
// Enables vectorization (SIMDization) in the loop optimizer.
static constexpr bool kEnableVectorization = true;
-// No loop unrolling factor (just one copy of the loop-body).
-static constexpr uint32_t kNoUnrollingFactor = 1;
+// Enables scalar loop unrolling in the loop optimizer.
+static constexpr bool kEnableScalarPeelingUnrolling = false;
//
// Static helpers.
@@ -153,6 +153,18 @@ static bool IsSignExtensionAndGet(HInstruction* instruction,
return false;
}
}
+ // A MIN-MAX on narrower operands qualifies as well
+ // (returning the operator itself).
+ if (instruction->IsMin() || instruction->IsMax()) {
+ HBinaryOperation* min_max = instruction->AsBinaryOperation();
+ DCHECK(min_max->GetType() == DataType::Type::kInt32 ||
+ min_max->GetType() == DataType::Type::kInt64);
+ if (IsSignExtensionAndGet(min_max->InputAt(0), type, operand) &&
+ IsSignExtensionAndGet(min_max->InputAt(1), type, operand)) {
+ *operand = min_max;
+ return true;
+ }
+ }
return false;
}
@@ -216,6 +228,18 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction,
return false;
}
}
+ // A MIN-MAX on narrower operands qualifies as well
+ // (returning the operator itself).
+ if (instruction->IsMin() || instruction->IsMax()) {
+ HBinaryOperation* min_max = instruction->AsBinaryOperation();
+ DCHECK(min_max->GetType() == DataType::Type::kInt32 ||
+ min_max->GetType() == DataType::Type::kInt64);
+ if (IsZeroExtensionAndGet(min_max->InputAt(0), type, operand) &&
+ IsZeroExtensionAndGet(min_max->InputAt(1), type, operand)) {
+ *operand = min_max;
+ return true;
+ }
+ }
return false;
}
@@ -227,6 +251,7 @@ static bool IsNarrowerOperands(HInstruction* a,
/*out*/ HInstruction** r,
/*out*/ HInstruction** s,
/*out*/ bool* is_unsigned) {
+ DCHECK(a != nullptr && b != nullptr);
// Look for a matching sign extension.
DataType::Type stype = HVecOperation::ToSignedType(type);
if (IsSignExtensionAndGet(a, stype, r) && IsSignExtensionAndGet(b, stype, s)) {
@@ -247,6 +272,7 @@ static bool IsNarrowerOperand(HInstruction* a,
DataType::Type type,
/*out*/ HInstruction** r,
/*out*/ bool* is_unsigned) {
+ DCHECK(a != nullptr);
// Look for a matching sign extension.
DataType::Type stype = HVecOperation::ToSignedType(type);
if (IsSignExtensionAndGet(a, stype, r)) {
@@ -270,20 +296,28 @@ static uint32_t GetOtherVL(DataType::Type other_type, DataType::Type vector_type
return vl >> (DataType::SizeShift(other_type) - DataType::SizeShift(vector_type));
}
-// Detect up to two instructions a and b, and an acccumulated constant c.
-static bool IsAddConstHelper(HInstruction* instruction,
- /*out*/ HInstruction** a,
- /*out*/ HInstruction** b,
- /*out*/ int64_t* c,
- int32_t depth) {
- static constexpr int32_t kMaxDepth = 8; // don't search too deep
+// Detect up to two added operands a and b and an acccumulated constant c.
+static bool IsAddConst(HInstruction* instruction,
+ /*out*/ HInstruction** a,
+ /*out*/ HInstruction** b,
+ /*out*/ int64_t* c,
+ int32_t depth = 8) { // don't search too deep
int64_t value = 0;
+ // Enter add/sub while still within reasonable depth.
+ if (depth > 0) {
+ if (instruction->IsAdd()) {
+ return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1) &&
+ IsAddConst(instruction->InputAt(1), a, b, c, depth - 1);
+ } else if (instruction->IsSub() &&
+ IsInt64AndGet(instruction->InputAt(1), &value)) {
+ *c -= value;
+ return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1);
+ }
+ }
+ // Otherwise, deal with leaf nodes.
if (IsInt64AndGet(instruction, &value)) {
*c += value;
return true;
- } else if (instruction->IsAdd() && depth <= kMaxDepth) {
- return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) &&
- IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1);
} else if (*a == nullptr) {
*a = instruction;
return true;
@@ -291,72 +325,170 @@ static bool IsAddConstHelper(HInstruction* instruction,
*b = instruction;
return true;
}
- return false; // too many non-const operands
+ return false; // too many operands
}
-// Detect a + b + c for an optional constant c.
-static bool IsAddConst(HInstruction* instruction,
- /*out*/ HInstruction** a,
- /*out*/ HInstruction** b,
- /*out*/ int64_t* c) {
- if (instruction->IsAdd()) {
- // Try to find a + b and accumulated c.
- if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) &&
- IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) &&
- *b != nullptr) {
- return true;
+// Detect a + b + c with optional constant c.
+static bool IsAddConst2(HGraph* graph,
+ HInstruction* instruction,
+ /*out*/ HInstruction** a,
+ /*out*/ HInstruction** b,
+ /*out*/ int64_t* c) {
+ if (IsAddConst(instruction, a, b, c) && *a != nullptr) {
+ if (*b == nullptr) {
+ // Constant is usually already present, unless accumulated.
+ *b = graph->GetConstant(instruction->GetType(), (*c));
+ *c = 0;
}
- // Found a + b.
+ return true;
+ }
+ return false;
+}
+
+// Detect a direct a - b or a hidden a - (-c).
+static bool IsSubConst2(HGraph* graph,
+ HInstruction* instruction,
+ /*out*/ HInstruction** a,
+ /*out*/ HInstruction** b) {
+ int64_t c = 0;
+ if (instruction->IsSub()) {
*a = instruction->InputAt(0);
*b = instruction->InputAt(1);
- *c = 0;
+ return true;
+ } else if (IsAddConst(instruction, a, b, &c) && *a != nullptr && *b == nullptr) {
+ // Constant for the hidden subtraction.
+ *b = graph->GetConstant(instruction->GetType(), -c);
return true;
}
return false;
}
-// Detect a + c for constant c.
-static bool IsAddConst(HInstruction* instruction,
- /*out*/ HInstruction** a,
- /*out*/ int64_t* c) {
- if (instruction->IsAdd()) {
- if (IsInt64AndGet(instruction->InputAt(0), c)) {
- *a = instruction->InputAt(1);
- return true;
- } else if (IsInt64AndGet(instruction->InputAt(1), c)) {
- *a = instruction->InputAt(0);
- return true;
+// Detect clipped [lo, hi] range for nested MIN-MAX operations on a clippee,
+// such as MIN(hi, MAX(lo, clippee)) for an arbitrary clippee expression.
+// Example: MIN(10, MIN(20, MAX(0, x))) yields [0, 10] with clippee x.
+static HInstruction* FindClippee(HInstruction* instruction,
+ /*out*/ int64_t* lo,
+ /*out*/ int64_t* hi) {
+ // Iterate into MIN(.., c)-MAX(.., c) expressions and 'tighten' the range [lo, hi].
+ while (instruction->IsMin() || instruction->IsMax()) {
+ HBinaryOperation* min_max = instruction->AsBinaryOperation();
+ DCHECK(min_max->GetType() == DataType::Type::kInt32 ||
+ min_max->GetType() == DataType::Type::kInt64);
+ // Process the constant.
+ HConstant* right = min_max->GetConstantRight();
+ if (right == nullptr) {
+ break;
+ } else if (instruction->IsMin()) {
+ *hi = std::min(*hi, Int64FromConstant(right));
+ } else {
+ *lo = std::max(*lo, Int64FromConstant(right));
}
+ instruction = min_max->GetLeastConstantLeft();
+ }
+ // Iteration ends in any other expression (possibly MIN/MAX without constant).
+ // This leaf expression is the clippee with range [lo, hi].
+ return instruction;
+}
+
+// Set value range for type (or fail).
+static bool CanSetRange(DataType::Type type,
+ /*out*/ int64_t* uhi,
+ /*out*/ int64_t* slo,
+ /*out*/ int64_t* shi) {
+ if (DataType::Size(type) == 1) {
+ *uhi = std::numeric_limits<uint8_t>::max();
+ *slo = std::numeric_limits<int8_t>::min();
+ *shi = std::numeric_limits<int8_t>::max();
+ return true;
+ } else if (DataType::Size(type) == 2) {
+ *uhi = std::numeric_limits<uint16_t>::max();
+ *slo = std::numeric_limits<int16_t>::min();
+ *shi = std::numeric_limits<int16_t>::max();
+ return true;
}
return false;
}
+// Accept various saturated addition forms.
+static bool IsSaturatedAdd(HInstruction* a,
+ HInstruction* b,
+ DataType::Type type,
+ int64_t lo,
+ int64_t hi,
+ bool is_unsigned) {
+ int64_t ulo = 0, uhi = 0, slo = 0, shi = 0;
+ if (!CanSetRange(type, &uhi, &slo, &shi)) {
+ return false;
+ }
+ // Tighten the range for signed single clipping on constant.
+ if (!is_unsigned) {
+ int64_t c = 0;
+ if (IsInt64AndGet(a, &c) || IsInt64AndGet(b, &c)) {
+ // For c in proper range and narrower operand r:
+ // MIN(r + c, 127) c > 0
+ // or MAX(r + c, -128) c < 0 (and possibly redundant bound).
+ if (0 < c && c <= shi && hi == shi) {
+ if (lo <= (slo + c)) {
+ return true;
+ }
+ } else if (slo <= c && c < 0 && lo == slo) {
+ if (hi >= (shi + c)) {
+ return true;
+ }
+ }
+ }
+ }
+ // Detect for narrower operands r and s:
+ // MIN(r + s, 255) => SAT_ADD_unsigned
+ // MAX(MIN(r + s, 127), -128) => SAT_ADD_signed.
+ return is_unsigned ? (lo <= ulo && hi == uhi) : (lo == slo && hi == shi);
+}
+
+// Accept various saturated subtraction forms.
+static bool IsSaturatedSub(HInstruction* a,
+ DataType::Type type,
+ int64_t lo,
+ int64_t hi,
+ bool is_unsigned) {
+ int64_t ulo = 0, uhi = 0, slo = 0, shi = 0;
+ if (!CanSetRange(type, &uhi, &slo, &shi)) {
+ return false;
+ }
+ // Tighten the range for signed single clipping on constant.
+ if (!is_unsigned) {
+ int64_t c = 0;
+ if (IsInt64AndGet(a, /*out*/ &c)) {
+ // For c in proper range and narrower operand r:
+ // MIN(c - r, 127) c > 0
+ // or MAX(c - r, -128) c < 0 (and possibly redundant bound).
+ if (0 < c && c <= shi && hi == shi) {
+ if (lo <= (c - shi)) {
+ return true;
+ }
+ } else if (slo <= c && c < 0 && lo == slo) {
+ if (hi >= (c - slo)) {
+ return true;
+ }
+ }
+ }
+ }
+ // Detect for narrower operands r and s:
+ // MAX(r - s, 0) => SAT_SUB_unsigned
+ // MIN(MAX(r - s, -128), 127) => SAT_ADD_signed.
+ return is_unsigned ? (lo == ulo && hi >= uhi) : (lo == slo && hi == shi);
+}
+
// Detect reductions of the following forms,
// x = x_phi + ..
// x = x_phi - ..
-// x = max(x_phi, ..)
// x = min(x_phi, ..)
+// x = max(x_phi, ..)
static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
- if (reduction->IsAdd()) {
+ if (reduction->IsAdd() || reduction->IsMin() || reduction->IsMax()) {
return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
(reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
} else if (reduction->IsSub()) {
return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi);
- } else if (reduction->IsInvokeStaticOrDirect()) {
- switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) {
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- case Intrinsics::kMathMinFloatFloat:
- case Intrinsics::kMathMinDoubleDouble:
- case Intrinsics::kMathMaxIntInt:
- case Intrinsics::kMathMaxLongLong:
- case Intrinsics::kMathMaxFloatFloat:
- case Intrinsics::kMathMaxDoubleDouble:
- return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
- (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
- default:
- return false;
- }
}
return false;
}
@@ -401,6 +533,43 @@ static bool CheckInductionSetFullyRemoved(ScopedArenaSet<HInstruction*>* iset) {
return true;
}
+// Tries to statically evaluate condition of the specified "HIf" for other condition checks.
+static void TryToEvaluateIfCondition(HIf* instruction, HGraph* graph) {
+ HInstruction* cond = instruction->InputAt(0);
+
+ // If a condition 'cond' is evaluated in an HIf instruction then in the successors of the
+ // IF_BLOCK we statically know the value of the condition 'cond' (TRUE in TRUE_SUCC, FALSE in
+ // FALSE_SUCC). Using that we can replace another evaluation (use) EVAL of the same 'cond'
+ // with TRUE value (FALSE value) if every path from the ENTRY_BLOCK to EVAL_BLOCK contains the
+ // edge HIF_BLOCK->TRUE_SUCC (HIF_BLOCK->FALSE_SUCC).
+ // if (cond) { if(cond) {
+ // if (cond) {} if (1) {}
+ // } else { =======> } else {
+ // if (cond) {} if (0) {}
+ // } }
+ if (!cond->IsConstant()) {
+ HBasicBlock* true_succ = instruction->IfTrueSuccessor();
+ HBasicBlock* false_succ = instruction->IfFalseSuccessor();
+
+ DCHECK_EQ(true_succ->GetPredecessors().size(), 1u);
+ DCHECK_EQ(false_succ->GetPredecessors().size(), 1u);
+
+ const HUseList<HInstruction*>& uses = cond->GetUses();
+ for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+ HInstruction* user = it->GetUser();
+ size_t index = it->GetIndex();
+ HBasicBlock* user_block = user->GetBlock();
+ // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+ ++it;
+ if (true_succ->Dominates(user_block)) {
+ user->ReplaceInput(graph->GetIntConstant(1), index);
+ } else if (false_succ->Dominates(user_block)) {
+ user->ReplaceInput(graph->GetIntConstant(0), index);
+ }
+ }
+ }
+}
+
//
// Public methods.
//
@@ -432,7 +601,11 @@ HLoopOptimization::HLoopOptimization(HGraph* graph,
vector_preheader_(nullptr),
vector_header_(nullptr),
vector_body_(nullptr),
- vector_index_(nullptr) {
+ vector_index_(nullptr),
+ arch_loop_helper_(ArchDefaultLoopHelper::Create(compiler_driver_ != nullptr
+ ? compiler_driver_->GetInstructionSet()
+ : InstructionSet::kNone,
+ global_allocator_)) {
}
void HLoopOptimization::Run() {
@@ -643,7 +816,7 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
}
}
-bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
+bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
// Ensure loop header logic is finite.
@@ -713,6 +886,103 @@ bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
return false;
}
+bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
+ return TryOptimizeInnerLoopFinite(node) ||
+ TryPeelingForLoopInvariantExitsElimination(node) ||
+ TryUnrollingForBranchPenaltyReduction(node);
+}
+
+
+
+//
+// Loop unrolling: generic part methods.
+//
+
+bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) {
+ // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests)
+ // as InstructionSet is needed.
+ if (!kEnableScalarPeelingUnrolling || compiler_driver_ == nullptr) {
+ return false;
+ }
+
+ HLoopInformation* loop_info = node->loop_info;
+ int64_t trip_count = 0;
+ // Only unroll loops with a known tripcount.
+ if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) {
+ return false;
+ }
+
+ uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(loop_info, trip_count);
+ if (unrolling_factor == kNoUnrollingFactor) {
+ return false;
+ }
+
+ LoopAnalysisInfo loop_analysis_info(loop_info);
+ LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
+
+ // Check "IsLoopClonable" last as it can be time-consuming.
+ if (arch_loop_helper_->IsLoopTooBigForScalarPeelingUnrolling(&loop_analysis_info) ||
+ (loop_analysis_info.GetNumberOfExits() > 1) ||
+ loop_analysis_info.HasInstructionsPreventingScalarUnrolling() ||
+ !PeelUnrollHelper::IsLoopClonable(loop_info)) {
+ return false;
+ }
+
+ // TODO: support other unrolling factors.
+ DCHECK_EQ(unrolling_factor, 2u);
+
+ // Perform unrolling.
+ PeelUnrollSimpleHelper helper(loop_info);
+ helper.DoUnrolling();
+
+ // Remove the redundant loop check after unrolling.
+ HIf* copy_hif =
+ helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
+ int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0;
+ copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
+
+ return true;
+}
+
+bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* node) {
+ // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests)
+ // as InstructionSet is needed.
+ if (!kEnableScalarPeelingUnrolling || compiler_driver_ == nullptr) {
+ return false;
+ }
+
+ HLoopInformation* loop_info = node->loop_info;
+ // Check 'IsLoopClonable' the last as it might be time-consuming.
+ if (!arch_loop_helper_->IsLoopPeelingEnabled()) {
+ return false;
+ }
+
+ LoopAnalysisInfo loop_analysis_info(loop_info);
+ LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
+
+ // Check "IsLoopClonable" last as it can be time-consuming.
+ if (arch_loop_helper_->IsLoopTooBigForScalarPeelingUnrolling(&loop_analysis_info) ||
+ loop_analysis_info.HasInstructionsPreventingScalarPeeling() ||
+ !LoopAnalysis::HasLoopAtLeastOneInvariantExit(loop_info) ||
+ !PeelUnrollHelper::IsLoopClonable(loop_info)) {
+ return false;
+ }
+
+ // Perform peeling.
+ PeelUnrollSimpleHelper helper(loop_info);
+ helper.DoPeeling();
+
+ const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap();
+ for (auto entry : *hir_map) {
+ HInstruction* copy = entry.second;
+ if (copy->IsIf()) {
+ TryToEvaluateIfCondition(copy->AsIf(), graph_);
+ }
+ }
+
+ return true;
+}
+
//
// Loop vectorization. The implementation is based on the book by Aart J.C. Bik:
// "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance."
@@ -843,7 +1113,8 @@ void HLoopOptimization::Vectorize(LoopNode* node,
HBasicBlock* preheader = node->loop_info->GetPreHeader();
// Pick a loop unrolling factor for the vector loop.
- uint32_t unroll = GetUnrollingFactor(block, trip_count);
+ uint32_t unroll = arch_loop_helper_->GetSIMDUnrollingFactor(
+ block, trip_count, MaxNumberPeeled(), vector_length_);
uint32_t chunk = vector_length_ * unroll;
DCHECK(trip_count == 0 || (trip_count >= MaxNumberPeeled() + chunk));
@@ -1082,6 +1353,11 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
HInstruction* index = instruction->InputAt(1);
HInstruction* value = instruction->InputAt(2);
HInstruction* offset = nullptr;
+ // For narrow types, explicit type conversion may have been
+ // optimized way, so set the no hi bits restriction here.
+ if (DataType::Size(type) <= 2) {
+ restrictions |= kNoHiBits;
+ }
if (TrySetVectorType(type, &restrictions) &&
node->loop_info->IsDefinedOutOfTheLoop(base) &&
induction_range_.IsUnitStride(instruction, index, graph_, &offset) &&
@@ -1124,7 +1400,6 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite();
}
-// TODO: saturation arithmetic.
bool HLoopOptimization::VectorizeUse(LoopNode* node,
HInstruction* instruction,
bool generate_code,
@@ -1297,80 +1572,62 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
return true;
}
}
- } else if (instruction->IsInvokeStaticOrDirect()) {
- // Accept particular intrinsics.
- HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect();
- switch (invoke->GetIntrinsic()) {
- case Intrinsics::kMathAbsInt:
- case Intrinsics::kMathAbsLong:
- case Intrinsics::kMathAbsFloat:
- case Intrinsics::kMathAbsDouble: {
- // Deal with vector restrictions.
- HInstruction* opa = instruction->InputAt(0);
- HInstruction* r = opa;
- bool is_unsigned = false;
- if (HasVectorRestrictions(restrictions, kNoAbs)) {
- return false;
- } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
- (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
- return false; // reject, unless operand is sign-extension narrower
- }
- // Accept ABS(x) for vectorizable operand.
- DCHECK(r != nullptr);
- if (generate_code && vector_mode_ != kVector) { // de-idiom
- r = opa;
- }
- if (VectorizeUse(node, r, generate_code, type, restrictions)) {
- if (generate_code) {
- GenerateVecOp(instruction,
- vector_map_->Get(r),
- nullptr,
- HVecOperation::ToProperType(type, is_unsigned));
- }
- return true;
- }
- return false;
+ } else if (instruction->IsAbs()) {
+ // Deal with vector restrictions.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* r = opa;
+ bool is_unsigned = false;
+ if (HasVectorRestrictions(restrictions, kNoAbs)) {
+ return false;
+ } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+ (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
+ return false; // reject, unless operand is sign-extension narrower
+ }
+ // Accept ABS(x) for vectorizable operand.
+ DCHECK(r != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = opa;
+ }
+ if (VectorizeUse(node, r, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(instruction,
+ vector_map_->Get(r),
+ nullptr,
+ HVecOperation::ToProperType(type, is_unsigned));
}
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- case Intrinsics::kMathMinFloatFloat:
- case Intrinsics::kMathMinDoubleDouble:
- case Intrinsics::kMathMaxIntInt:
- case Intrinsics::kMathMaxLongLong:
- case Intrinsics::kMathMaxFloatFloat:
- case Intrinsics::kMathMaxDoubleDouble: {
- // Deal with vector restrictions.
- HInstruction* opa = instruction->InputAt(0);
- HInstruction* opb = instruction->InputAt(1);
- HInstruction* r = opa;
- HInstruction* s = opb;
- bool is_unsigned = false;
- if (HasVectorRestrictions(restrictions, kNoMinMax)) {
- return false;
- } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
- !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
- return false; // reject, unless all operands are same-extension narrower
- }
- // Accept MIN/MAX(x, y) for vectorizable operands.
- DCHECK(r != nullptr);
- DCHECK(s != nullptr);
- if (generate_code && vector_mode_ != kVector) { // de-idiom
- r = opa;
- s = opb;
- }
- if (VectorizeUse(node, r, generate_code, type, restrictions) &&
- VectorizeUse(node, s, generate_code, type, restrictions)) {
- if (generate_code) {
- GenerateVecOp(
- instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
- }
- return true;
- }
- return false;
+ return true;
+ }
+ } else if (instruction->IsMin() || instruction->IsMax()) {
+ // Recognize saturation arithmetic.
+ if (VectorizeSaturationIdiom(node, instruction, generate_code, type, restrictions)) {
+ return true;
+ }
+ // Deal with vector restrictions.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* opb = instruction->InputAt(1);
+ HInstruction* r = opa;
+ HInstruction* s = opb;
+ bool is_unsigned = false;
+ if (HasVectorRestrictions(restrictions, kNoMinMax)) {
+ return false;
+ } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+ !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
+ return false; // reject, unless all operands are same-extension narrower
+ }
+ // Accept MIN/MAX(x, y) for vectorizable operands.
+ DCHECK(r != nullptr && s != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = opa;
+ s = opb;
+ }
+ if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+ VectorizeUse(node, s, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(
+ instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
}
- default:
- return false;
- } // switch
+ return true;
+ }
}
return false;
}
@@ -1475,11 +1732,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoSaturation;
return TrySetVectorLength(16);
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- *restrictions |= kNoDiv | kNoStringCharAt;
+ *restrictions |= kNoDiv | kNoSaturation | kNoStringCharAt;
return TrySetVectorLength(8);
case DataType::Type::kInt32:
*restrictions |= kNoDiv;
@@ -1504,11 +1761,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoSaturation;
return TrySetVectorLength(16);
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- *restrictions |= kNoDiv | kNoStringCharAt;
+ *restrictions |= kNoDiv | kNoSaturation | kNoStringCharAt;
return TrySetVectorLength(8);
case DataType::Type::kInt32:
*restrictions |= kNoDiv;
@@ -1811,83 +2068,29 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
GENERATE_VEC(
new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_, dex_pc),
new (global_allocator_) HUShr(org_type, opa, opb, dex_pc));
- case HInstruction::kInvokeStaticOrDirect: {
- HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect();
- if (vector_mode_ == kVector) {
- switch (invoke->GetIntrinsic()) {
- case Intrinsics::kMathAbsInt:
- case Intrinsics::kMathAbsLong:
- case Intrinsics::kMathAbsFloat:
- case Intrinsics::kMathAbsDouble:
- DCHECK(opb == nullptr);
- vector = new (global_allocator_)
- HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc);
- break;
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- case Intrinsics::kMathMinFloatFloat:
- case Intrinsics::kMathMinDoubleDouble: {
- vector = new (global_allocator_)
- HVecMin(global_allocator_,
- opa,
- opb,
- HVecOperation::ToProperType(type, is_unsigned),
- vector_length_,
- dex_pc);
- break;
- }
- case Intrinsics::kMathMaxIntInt:
- case Intrinsics::kMathMaxLongLong:
- case Intrinsics::kMathMaxFloatFloat:
- case Intrinsics::kMathMaxDoubleDouble: {
- vector = new (global_allocator_)
- HVecMax(global_allocator_,
- opa,
- opb,
- HVecOperation::ToProperType(type, is_unsigned),
- vector_length_,
- dex_pc);
- break;
- }
- default:
- LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId();
- UNREACHABLE();
- } // switch invoke
- } else {
- // In scalar code, simply clone the method invoke, and replace its operands with the
- // corresponding new scalar instructions in the loop. The instruction will get an
- // environment while being inserted from the instruction map in original program order.
- DCHECK(vector_mode_ == kSequential);
- size_t num_args = invoke->GetNumberOfArguments();
- HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect(
- global_allocator_,
- num_args,
- invoke->GetType(),
- invoke->GetDexPc(),
- invoke->GetDexMethodIndex(),
- invoke->GetResolvedMethod(),
- invoke->GetDispatchInfo(),
- invoke->GetInvokeType(),
- invoke->GetTargetMethod(),
- invoke->GetClinitCheckRequirement());
- HInputsRef inputs = invoke->GetInputs();
- size_t num_inputs = inputs.size();
- DCHECK_LE(num_args, num_inputs);
- DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree
- for (size_t index = 0; index < num_inputs; ++index) {
- HInstruction* new_input = index < num_args
- ? vector_map_->Get(inputs[index])
- : inputs[index]; // beyond arguments: just pass through
- new_invoke->SetArgumentAt(index, new_input);
- }
- new_invoke->SetIntrinsic(invoke->GetIntrinsic(),
- kNeedsEnvironmentOrCache,
- kNoSideEffects,
- kNoThrow);
- vector = new_invoke;
- }
- break;
- }
+ case HInstruction::kMin:
+ GENERATE_VEC(
+ new (global_allocator_) HVecMin(global_allocator_,
+ opa,
+ opb,
+ HVecOperation::ToProperType(type, is_unsigned),
+ vector_length_,
+ dex_pc),
+ new (global_allocator_) HMin(org_type, opa, opb, dex_pc));
+ case HInstruction::kMax:
+ GENERATE_VEC(
+ new (global_allocator_) HVecMax(global_allocator_,
+ opa,
+ opb,
+ HVecOperation::ToProperType(type, is_unsigned),
+ vector_length_,
+ dex_pc),
+ new (global_allocator_) HMax(org_type, opa, opb, dex_pc));
+ case HInstruction::kAbs:
+ DCHECK(opb == nullptr);
+ GENERATE_VEC(
+ new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc),
+ new (global_allocator_) HAbs(org_type, opa, dex_pc));
default:
break;
} // switch
@@ -1901,6 +2104,79 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
// Vectorization idioms.
//
+// Method recognizes single and double clipping saturation arithmetic.
+bool HLoopOptimization::VectorizeSaturationIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ DataType::Type type,
+ uint64_t restrictions) {
+ // Deal with vector restrictions.
+ if (HasVectorRestrictions(restrictions, kNoSaturation)) {
+ return false;
+ }
+ // Restrict type (generalize if one day we generalize allowed MIN/MAX integral types).
+ if (instruction->GetType() != DataType::Type::kInt32 &&
+ instruction->GetType() != DataType::Type::kInt64) {
+ return false;
+ }
+ // Clipped addition or subtraction on narrower operands? We will try both
+ // formats since, e.g., x+c can be interpreted as x+c and x-(-c), depending
+ // on what clipping values are used, to get most benefits.
+ int64_t lo = std::numeric_limits<int64_t>::min();
+ int64_t hi = std::numeric_limits<int64_t>::max();
+ HInstruction* clippee = FindClippee(instruction, &lo, &hi);
+ HInstruction* a = nullptr;
+ HInstruction* b = nullptr;
+ HInstruction* r = nullptr;
+ HInstruction* s = nullptr;
+ bool is_unsigned = false;
+ bool is_add = true;
+ int64_t c = 0;
+ // First try for saturated addition.
+ if (IsAddConst2(graph_, clippee, /*out*/ &a, /*out*/ &b, /*out*/ &c) && c == 0 &&
+ IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned) &&
+ IsSaturatedAdd(r, s, type, lo, hi, is_unsigned)) {
+ is_add = true;
+ } else {
+ // Then try again for saturated subtraction.
+ a = b = r = s = nullptr;
+ if (IsSubConst2(graph_, clippee, /*out*/ &a, /*out*/ &b) &&
+ IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned) &&
+ IsSaturatedSub(r, type, lo, hi, is_unsigned)) {
+ is_add = false;
+ } else {
+ return false;
+ }
+ }
+ // Accept saturation idiom for vectorizable operands.
+ DCHECK(r != nullptr && s != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = instruction->InputAt(0);
+ s = instruction->InputAt(1);
+ restrictions &= ~(kNoHiBits | kNoMinMax); // allow narrow MIN/MAX in seq
+ }
+ if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+ VectorizeUse(node, s, generate_code, type, restrictions)) {
+ if (generate_code) {
+ if (vector_mode_ == kVector) {
+ DataType::Type vtype = HVecOperation::ToProperType(type, is_unsigned);
+ HInstruction* op1 = vector_map_->Get(r);
+ HInstruction* op2 = vector_map_->Get(s);
+ vector_map_->Put(instruction, is_add
+ ? reinterpret_cast<HInstruction*>(new (global_allocator_) HVecSaturationAdd(
+ global_allocator_, op1, op2, vtype, vector_length_, kNoDexPc))
+ : reinterpret_cast<HInstruction*>(new (global_allocator_) HVecSaturationSub(
+ global_allocator_, op1, op2, vtype, vector_length_, kNoDexPc)));
+ MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
+ } else {
+ GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type);
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
// Method recognizes the following idioms:
// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b
// truncated halving add (a + b) >> 1 for unsigned/signed operands a, b
@@ -1924,8 +2200,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
HInstruction* a = nullptr;
HInstruction* b = nullptr;
int64_t c = 0;
- if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
- DCHECK(a != nullptr && b != nullptr);
+ if (IsAddConst2(graph_, instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
// Accept c == 1 (rounded) or c == 0 (not rounded).
bool is_rounded = false;
if (c == 1) {
@@ -1947,8 +2222,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
}
// Accept recognized halving add for vectorizable operands. Vectorized code uses the
// shorthand idiomatic operation. Sequential code uses the original scalar expressions.
- DCHECK(r != nullptr);
- DCHECK(s != nullptr);
+ DCHECK(r != nullptr && s != nullptr);
if (generate_code && vector_mode_ != kVector) { // de-idiom
r = instruction->InputAt(0);
s = instruction->InputAt(1);
@@ -1998,21 +2272,11 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
HInstruction* v = instruction->InputAt(1);
HInstruction* a = nullptr;
HInstruction* b = nullptr;
- if (v->IsInvokeStaticOrDirect() &&
- (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt ||
- v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) {
- HInstruction* x = v->InputAt(0);
- if (x->GetType() == reduction_type) {
- int64_t c = 0;
- if (x->IsSub()) {
- a = x->InputAt(0);
- b = x->InputAt(1);
- } else if (IsAddConst(x, /*out*/ &a, /*out*/ &c)) {
- b = graph_->GetConstant(reduction_type, -c); // hidden SUB!
- }
- }
- }
- if (a == nullptr || b == nullptr) {
+ if (v->IsAbs() &&
+ v->GetType() == reduction_type &&
+ IsSubConst2(graph_, v->InputAt(0), /*out*/ &a, /*out*/ &b)) {
+ DCHECK(a != nullptr && b != nullptr);
+ } else {
return false;
}
// Accept same-type or consistent sign extension for narrower-type on operands a and b.
@@ -2045,8 +2309,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
}
// Accept SAD idiom for vectorizable operands. Vectorized code uses the shorthand
// idiomatic operation. Sequential code uses the original scalar expressions.
- DCHECK(r != nullptr);
- DCHECK(s != nullptr);
+ DCHECK(r != nullptr && s != nullptr);
if (generate_code && vector_mode_ != kVector) { // de-idiom
r = s = v->InputAt(0);
}
@@ -2054,14 +2317,13 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
VectorizeUse(node, r, generate_code, sub_type, restrictions) &&
VectorizeUse(node, s, generate_code, sub_type, restrictions)) {
if (generate_code) {
- reduction_type = HVecOperation::ToProperType(reduction_type, is_unsigned);
if (vector_mode_ == kVector) {
vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate(
global_allocator_,
vector_map_->Get(q),
vector_map_->Get(r),
vector_map_->Get(s),
- reduction_type,
+ HVecOperation::ToProperType(reduction_type, is_unsigned),
GetOtherVL(reduction_type, sub_type, vector_length_),
kNoDexPc));
MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
@@ -2134,41 +2396,6 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) {
return true;
}
-static constexpr uint32_t ARM64_SIMD_MAXIMUM_UNROLL_FACTOR = 8;
-static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50;
-
-uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) {
- uint32_t max_peel = MaxNumberPeeled();
- switch (compiler_driver_->GetInstructionSet()) {
- case InstructionSet::kArm64: {
- // Don't unroll with insufficient iterations.
- // TODO: Unroll loops with unknown trip count.
- DCHECK_NE(vector_length_, 0u);
- if (trip_count < (2 * vector_length_ + max_peel)) {
- return kNoUnrollingFactor;
- }
- // Don't unroll for large loop body size.
- uint32_t instruction_count = block->GetInstructions().CountSize();
- if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) {
- return kNoUnrollingFactor;
- }
- // Find a beneficial unroll factor with the following restrictions:
- // - At least one iteration of the transformed loop should be executed.
- // - The loop body shouldn't be "too big" (heuristic).
- uint32_t uf1 = ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE / instruction_count;
- uint32_t uf2 = (trip_count - max_peel) / vector_length_;
- uint32_t unroll_factor =
- TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR}));
- DCHECK_GE(unroll_factor, 1u);
- return unroll_factor;
- }
- case InstructionSet::kX86:
- case InstructionSet::kX86_64:
- default:
- return kNoUnrollingFactor;
- }
-}
-
//
// Helpers.
//
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index a707ad13580..f9a31a34d40 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -20,12 +20,15 @@
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "induction_var_range.h"
+#include "loop_analysis.h"
#include "nodes.h"
#include "optimization.h"
+#include "superblock_cloner.h"
namespace art {
class CompilerDriver;
+class ArchDefaultLoopHelper;
/**
* Loop optimizations. Builds a loop hierarchy and applies optimizations to
@@ -80,6 +83,7 @@ class HLoopOptimization : public HOptimization {
kNoReduction = 1 << 10, // no reduction
kNoSAD = 1 << 11, // no sum of absolute differences (SAD)
kNoWideSAD = 1 << 12, // no sum of absolute differences (SAD) with operand widening
+ kNoSaturation = 1 << 13, // no saturation arithmetic
};
/*
@@ -134,10 +138,21 @@ class HLoopOptimization : public HOptimization {
void SimplifyInduction(LoopNode* node);
void SimplifyBlocks(LoopNode* node);
- // Performs optimizations specific to inner loop (empty loop removal,
+ // Performs optimizations specific to inner loop with finite header logic (empty loop removal,
// unrolling, vectorization). Returns true if anything changed.
+ bool TryOptimizeInnerLoopFinite(LoopNode* node);
+
+ // Performs optimizations specific to inner loop. Returns true if anything changed.
bool OptimizeInnerLoop(LoopNode* node);
+ // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling
+ // opportunities. Returns whether transformation happened.
+ bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node);
+
+ // Tries to apply loop peeling for loop invariant exits elimination. Returns whether
+ // transformation happened.
+ bool TryPeelingForLoopInvariantExitsElimination(LoopNode* loop_node);
+
//
// Vectorization analysis and synthesis.
//
@@ -177,6 +192,11 @@ class HLoopOptimization : public HOptimization {
bool is_unsigned = false);
// Vectorization idioms.
+ bool VectorizeSaturationIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ DataType::Type type,
+ uint64_t restrictions);
bool VectorizeHalvingAddIdiom(LoopNode* node,
HInstruction* instruction,
bool generate_code,
@@ -197,7 +217,6 @@ class HLoopOptimization : public HOptimization {
const ArrayReference* peeling_candidate);
uint32_t MaxNumberPeeled();
bool IsVectorizationProfitable(int64_t trip_count);
- uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
//
// Helpers.
@@ -291,6 +310,9 @@ class HLoopOptimization : public HOptimization {
HBasicBlock* vector_body_; // body of the new loop
HInstruction* vector_index_; // normalized index of the new loop
+ // Helper for target-specific behaviour for loop optimizations.
+ ArchDefaultLoopHelper* arch_loop_helper_;
+
friend class LoopOptimizationTest;
DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index db8368986c9..c21bd65d97e 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -227,11 +227,14 @@ TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) {
graph_->ClearDominanceInformation();
graph_->BuildDominatorTree();
+ // BuildDominatorTree inserts a block beetween loop header and entry block.
+ EXPECT_EQ(header->GetPredecessors()[0]->GetSinglePredecessor(), entry_block_);
+
// Check that after optimizations in BuildDominatorTree()/SimplifyCFG() phi inputs
// are still mapped correctly to the block predecessors.
for (size_t i = 0, e = phi->InputCount(); i < e; i++) {
HInstruction* input = phi->InputAt(i);
- ASSERT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i]));
+ EXPECT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i]));
}
}
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index f6ba19f22a8..f784f8f7f35 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2891,6 +2891,8 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind
return os << "BootImageLinkTimePcRelative";
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
return os << "DirectAddress";
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo:
+ return os << "BootImageRelRo";
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry:
return os << "BssEntry";
case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall:
@@ -2925,7 +2927,7 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const {
}
switch (GetLoadKind()) {
case LoadKind::kBootImageAddress:
- case LoadKind::kBootImageClassTable:
+ case LoadKind::kBootImageRelRo:
case LoadKind::kJitTableAddress: {
ScopedObjectAccess soa(Thread::Current());
return GetClass().Get() == other_load_class->GetClass().Get();
@@ -2944,8 +2946,8 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) {
return os << "BootImageLinkTimePcRelative";
case HLoadClass::LoadKind::kBootImageAddress:
return os << "BootImageAddress";
- case HLoadClass::LoadKind::kBootImageClassTable:
- return os << "BootImageClassTable";
+ case HLoadClass::LoadKind::kBootImageRelRo:
+ return os << "BootImageRelRo";
case HLoadClass::LoadKind::kBssEntry:
return os << "BssEntry";
case HLoadClass::LoadKind::kJitTableAddress:
@@ -2968,7 +2970,7 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const {
}
switch (GetLoadKind()) {
case LoadKind::kBootImageAddress:
- case LoadKind::kBootImageInternTable:
+ case LoadKind::kBootImageRelRo:
case LoadKind::kJitTableAddress: {
ScopedObjectAccess soa(Thread::Current());
return GetString().Get() == other_load_string->GetString().Get();
@@ -2984,8 +2986,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) {
return os << "BootImageLinkTimePcRelative";
case HLoadString::LoadKind::kBootImageAddress:
return os << "BootImageAddress";
- case HLoadString::LoadKind::kBootImageInternTable:
- return os << "BootImageInternTable";
+ case HLoadString::LoadKind::kBootImageRelRo:
+ return os << "BootImageRelRo";
case HLoadString::LoadKind::kBssEntry:
return os << "BssEntry";
case HLoadString::LoadKind::kJitTableAddress:
@@ -3101,6 +3103,8 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) {
return os << "array_object_check";
case TypeCheckKind::kArrayCheck:
return os << "array_check";
+ case TypeCheckKind::kBitstringCheck:
+ return os << "bitstring_check";
default:
LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs);
UNREACHABLE();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index fe992a7f399..79d733060b3 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1338,6 +1338,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
#define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \
M(Above, Condition) \
M(AboveOrEqual, Condition) \
+ M(Abs, UnaryOperation) \
M(Add, BinaryOperation) \
M(And, BinaryOperation) \
M(ArrayGet, Instruction) \
@@ -1383,7 +1384,9 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(LoadException, Instruction) \
M(LoadString, Instruction) \
M(LongConstant, Constant) \
+ M(Max, Instruction) \
M(MemoryBarrier, Instruction) \
+ M(Min, BinaryOperation) \
M(MonitorOperation, Instruction) \
M(Mul, BinaryOperation) \
M(NativeDebugInfo, Instruction) \
@@ -1437,6 +1440,8 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(VecAndNot, VecBinaryOperation) \
M(VecOr, VecBinaryOperation) \
M(VecXor, VecBinaryOperation) \
+ M(VecSaturationAdd, VecBinaryOperation) \
+ M(VecSaturationSub, VecBinaryOperation) \
M(VecShl, VecBinaryOperation) \
M(VecShr, VecBinaryOperation) \
M(VecUShr, VecBinaryOperation) \
@@ -4428,6 +4433,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
// Used for app->boot calls with non-relocatable image and for JIT-compiled calls.
kDirectAddress,
+ // Load from an entry in the .data.bimg.rel.ro using a PC-relative load.
+ // Used for app->boot calls with relocatable image.
+ kBootImageRelRo,
+
// Load from an entry in the .bss section using a PC-relative load.
// Used for classes outside boot image when .bss is accessible with a PC-relative load.
kBssEntry,
@@ -4560,6 +4569,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; }
bool HasPcRelativeMethodLoadKind() const {
return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative ||
+ GetMethodLoadKind() == MethodLoadKind::kBootImageRelRo ||
GetMethodLoadKind() == MethodLoadKind::kBssEntry;
}
bool HasCurrentMethodInput() const {
@@ -5016,6 +5026,117 @@ class HRem FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Rem);
};
+class HMin FINAL : public HBinaryOperation {
+ public:
+ HMin(DataType::Type result_type,
+ HInstruction* left,
+ HInstruction* right,
+ uint32_t dex_pc)
+ : HBinaryOperation(kMin, result_type, left, right, SideEffects::None(), dex_pc) {}
+
+ bool IsCommutative() const OVERRIDE { return true; }
+
+ // Evaluation for integral values.
+ template <typename T> static T ComputeIntegral(T x, T y) {
+ return (x <= y) ? x : y;
+ }
+
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetIntConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetLongConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ // TODO: Evaluation for floating-point values.
+ HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+ HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+ HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+
+ DECLARE_INSTRUCTION(Min);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(Min);
+};
+
+class HMax FINAL : public HBinaryOperation {
+ public:
+ HMax(DataType::Type result_type,
+ HInstruction* left,
+ HInstruction* right,
+ uint32_t dex_pc)
+ : HBinaryOperation(kMax, result_type, left, right, SideEffects::None(), dex_pc) {}
+
+ bool IsCommutative() const OVERRIDE { return true; }
+
+ // Evaluation for integral values.
+ template <typename T> static T ComputeIntegral(T x, T y) {
+ return (x >= y) ? x : y;
+ }
+
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetIntConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetLongConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ // TODO: Evaluation for floating-point values.
+ HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+ HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+ HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+
+ DECLARE_INSTRUCTION(Max);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(Max);
+};
+
+class HAbs FINAL : public HUnaryOperation {
+ public:
+ HAbs(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
+ : HUnaryOperation(kAbs, result_type, input, dex_pc) {}
+
+ // Evaluation for integral values.
+ template <typename T> static T ComputeIntegral(T x) {
+ return x < 0 ? -x : x;
+ }
+
+ // Evaluation for floating-point values.
+ // Note, as a "quality of implementation", rather than pure "spec compliance",
+ // we require that Math.abs() clears the sign bit (but changes nothing else)
+ // for all floating-point numbers, including NaN (signaling NaN may become quiet though).
+ // http://b/30758343
+ template <typename T, typename S> static T ComputeFP(T x) {
+ S bits = bit_cast<S, T>(x);
+ return bit_cast<T, S>(bits & std::numeric_limits<S>::max());
+ }
+
+ HConstant* Evaluate(HIntConstant* x) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetIntConstant(ComputeIntegral(x->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HLongConstant* x) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetLongConstant(ComputeIntegral(x->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HFloatConstant* x) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetFloatConstant(
+ ComputeFP<float, int32_t>(x->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HDoubleConstant* x) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetDoubleConstant(
+ ComputeFP<double, int64_t>(x->GetValue()), GetDexPc());
+ }
+
+ DECLARE_INSTRUCTION(Abs);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(Abs);
+};
+
class HDivZeroCheck FINAL : public HExpression<1> {
public:
// `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException`
@@ -6025,12 +6146,12 @@ class HLoadClass FINAL : public HInstruction {
kBootImageLinkTimePcRelative,
// Use a known boot image Class* address, embedded in the code by the codegen.
- // Used for boot image classes referenced by apps in AOT- and JIT-compiled code.
+ // Used for boot image classes referenced by apps in JIT- and AOT-compiled code (non-PIC).
kBootImageAddress,
- // Use a PC-relative load from a boot image ClassTable mmapped into the .bss
- // of the oat file.
- kBootImageClassTable,
+ // Load from an entry in the .data.bimg.rel.ro using a PC-relative load.
+ // Used for boot image classes referenced by apps in AOT-compiled code (PIC).
+ kBootImageRelRo,
// Load from an entry in the .bss section using a PC-relative load.
// Used for classes outside boot image when .bss is accessible with a PC-relative load.
@@ -6057,8 +6178,7 @@ class HLoadClass FINAL : public HInstruction {
special_input_(HUserRecord<HInstruction*>(current_method)),
type_index_(type_index),
dex_file_(dex_file),
- klass_(klass),
- loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
+ klass_(klass) {
// Referrers class should not need access check. We never inline unverified
// methods so we can't possibly end up in this situation.
DCHECK(!is_referrers_class || !needs_access_check);
@@ -6068,6 +6188,7 @@ class HLoadClass FINAL : public HInstruction {
SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
SetPackedFlag<kFlagIsInBootImage>(false);
SetPackedFlag<kFlagGenerateClInitCheck>(false);
+ SetPackedFlag<kFlagValidLoadedClassRTI>(false);
}
bool IsClonable() const OVERRIDE { return true; }
@@ -6078,6 +6199,12 @@ class HLoadClass FINAL : public HInstruction {
return GetPackedField<LoadKindField>();
}
+ bool HasPcRelativeLoadKind() const {
+ return GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
+ GetLoadKind() == LoadKind::kBootImageRelRo ||
+ GetLoadKind() == LoadKind::kBssEntry;
+ }
+
bool CanBeMoved() const OVERRIDE { return true; }
bool InstructionDataEquals(const HInstruction* other) const;
@@ -6116,13 +6243,18 @@ class HLoadClass FINAL : public HInstruction {
}
ReferenceTypeInfo GetLoadedClassRTI() {
- return loaded_class_rti_;
+ if (GetPackedFlag<kFlagValidLoadedClassRTI>()) {
+ // Note: The is_exact flag from the return value should not be used.
+ return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true);
+ } else {
+ return ReferenceTypeInfo::CreateInvalid();
+ }
}
- void SetLoadedClassRTI(ReferenceTypeInfo rti) {
- // Make sure we only set exact types (the loaded class should never be merged).
- DCHECK(rti.IsExact());
- loaded_class_rti_ = rti;
+ // Loaded class RTI is marked as valid by RTP if the klass_ is admissible.
+ void SetValidLoadedClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(klass_ != nullptr);
+ SetPackedFlag<kFlagValidLoadedClassRTI>(true);
}
dex::TypeIndex GetTypeIndex() const { return type_index_; }
@@ -6175,14 +6307,14 @@ class HLoadClass FINAL : public HInstruction {
static constexpr size_t kFieldLoadKind = kFlagGenerateClInitCheck + 1;
static constexpr size_t kFieldLoadKindSize =
MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
- static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize;
+ static constexpr size_t kFlagValidLoadedClassRTI = kFieldLoadKind + kFieldLoadKindSize;
+ static constexpr size_t kNumberOfLoadClassPackedBits = kFlagValidLoadedClassRTI + 1;
static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields.");
using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>;
static bool HasTypeReference(LoadKind load_kind) {
return load_kind == LoadKind::kReferrersClass ||
load_kind == LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == LoadKind::kBootImageClassTable ||
load_kind == LoadKind::kBssEntry ||
load_kind == LoadKind::kRuntimeCall;
}
@@ -6203,8 +6335,6 @@ class HLoadClass FINAL : public HInstruction {
const DexFile& dex_file_;
Handle<mirror::Class> klass_;
-
- ReferenceTypeInfo loaded_class_rti_;
};
std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs);
@@ -6228,7 +6358,7 @@ inline void HLoadClass::AddSpecialInput(HInstruction* special_input) {
// including literal pool loads, which are PC-relative too.
DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
GetLoadKind() == LoadKind::kBootImageAddress ||
- GetLoadKind() == LoadKind::kBootImageClassTable ||
+ GetLoadKind() == LoadKind::kBootImageRelRo ||
GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind();
DCHECK(special_input_.GetInstruction() == nullptr);
special_input_ = HUserRecord<HInstruction*>(special_input);
@@ -6244,12 +6374,12 @@ class HLoadString FINAL : public HInstruction {
kBootImageLinkTimePcRelative,
// Use a known boot image String* address, embedded in the code by the codegen.
- // Used for boot image strings referenced by apps in AOT- and JIT-compiled code.
+ // Used for boot image strings referenced by apps in JIT- and AOT-compiled code (non-PIC).
kBootImageAddress,
- // Use a PC-relative load from a boot image InternTable mmapped into the .bss
- // of the oat file.
- kBootImageInternTable,
+ // Load from an entry in the .data.bimg.rel.ro using a PC-relative load.
+ // Used for boot image strings referenced by apps in AOT-compiled code (PIC).
+ kBootImageRelRo,
// Load from an entry in the .bss section using a PC-relative load.
// Used for strings outside boot image when .bss is accessible with a PC-relative load.
@@ -6284,6 +6414,12 @@ class HLoadString FINAL : public HInstruction {
return GetPackedField<LoadKindField>();
}
+ bool HasPcRelativeLoadKind() const {
+ return GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
+ GetLoadKind() == LoadKind::kBootImageRelRo ||
+ GetLoadKind() == LoadKind::kBssEntry;
+ }
+
const DexFile& GetDexFile() const {
return dex_file_;
}
@@ -6312,7 +6448,7 @@ class HLoadString FINAL : public HInstruction {
LoadKind load_kind = GetLoadKind();
if (load_kind == LoadKind::kBootImageLinkTimePcRelative ||
load_kind == LoadKind::kBootImageAddress ||
- load_kind == LoadKind::kBootImageInternTable ||
+ load_kind == LoadKind::kBootImageRelRo ||
load_kind == LoadKind::kJitTableAddress) {
return false;
}
@@ -6390,7 +6526,7 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
// including literal pool loads, which are PC-relative too.
DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
GetLoadKind() == LoadKind::kBootImageAddress ||
- GetLoadKind() == LoadKind::kBootImageInternTable ||
+ GetLoadKind() == LoadKind::kBootImageRelRo ||
GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind();
// HLoadString::GetInputRecords() returns an empty array at this point,
// so use the GetInputRecords() from the base class to set the input record.
@@ -6750,72 +6886,159 @@ enum class TypeCheckKind {
kInterfaceCheck, // No optimization yet when checking against an interface.
kArrayObjectCheck, // Can just check if the array is not primitive.
kArrayCheck, // No optimization yet when checking against a generic array.
+ kBitstringCheck, // Compare the type check bitstring.
kLast = kArrayCheck
};
std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs);
-class HInstanceOf FINAL : public HExpression<2> {
+// Note: HTypeCheckInstruction is just a helper class, not an abstract instruction with an
+// `IsTypeCheckInstruction()`. (New virtual methods in the HInstruction class have a high cost.)
+class HTypeCheckInstruction : public HVariableInputSizeInstruction {
public:
- HInstanceOf(HInstruction* object,
- HLoadClass* target_class,
- TypeCheckKind check_kind,
- uint32_t dex_pc)
- : HExpression(kInstanceOf,
- DataType::Type::kBool,
- SideEffectsForArchRuntimeCalls(check_kind),
- dex_pc) {
+ HTypeCheckInstruction(InstructionKind kind,
+ HInstruction* object,
+ HInstruction* target_class_or_null,
+ TypeCheckKind check_kind,
+ Handle<mirror::Class> klass,
+ uint32_t dex_pc,
+ ArenaAllocator* allocator,
+ HIntConstant* bitstring_path_to_root,
+ HIntConstant* bitstring_mask,
+ SideEffects side_effects)
+ : HVariableInputSizeInstruction(
+ kind,
+ side_effects,
+ dex_pc,
+ allocator,
+ /* number_of_inputs */ check_kind == TypeCheckKind::kBitstringCheck ? 4u : 2u,
+ kArenaAllocTypeCheckInputs),
+ klass_(klass) {
SetPackedField<TypeCheckKindField>(check_kind);
SetPackedFlag<kFlagMustDoNullCheck>(true);
+ SetPackedFlag<kFlagValidTargetClassRTI>(false);
SetRawInputAt(0, object);
- SetRawInputAt(1, target_class);
+ SetRawInputAt(1, target_class_or_null);
+ DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_path_to_root != nullptr);
+ DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_mask != nullptr);
+ if (check_kind == TypeCheckKind::kBitstringCheck) {
+ DCHECK(target_class_or_null->IsNullConstant());
+ SetRawInputAt(2, bitstring_path_to_root);
+ SetRawInputAt(3, bitstring_mask);
+ } else {
+ DCHECK(target_class_or_null->IsLoadClass());
+ }
}
HLoadClass* GetTargetClass() const {
+ DCHECK_NE(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
HInstruction* load_class = InputAt(1);
DCHECK(load_class->IsLoadClass());
return load_class->AsLoadClass();
}
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
+ uint32_t GetBitstringPathToRoot() const {
+ DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
+ HInstruction* path_to_root = InputAt(2);
+ DCHECK(path_to_root->IsIntConstant());
+ return static_cast<uint32_t>(path_to_root->AsIntConstant()->GetValue());
+ }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
- return true;
+ uint32_t GetBitstringMask() const {
+ DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
+ HInstruction* mask = InputAt(3);
+ DCHECK(mask->IsIntConstant());
+ return static_cast<uint32_t>(mask->AsIntConstant()->GetValue());
}
- bool NeedsEnvironment() const OVERRIDE {
- return CanCallRuntime(GetTypeCheckKind());
+ bool IsClonable() const OVERRIDE { return true; }
+ bool CanBeMoved() const OVERRIDE { return true; }
+
+ bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ DCHECK(other->IsInstanceOf() || other->IsCheckCast()) << other->DebugName();
+ return GetPackedFields() == down_cast<const HTypeCheckInstruction*>(other)->GetPackedFields();
}
- // Used only in code generation.
bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }
void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }
TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }
bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; }
- static bool CanCallRuntime(TypeCheckKind check_kind) {
- // Mips currently does runtime calls for any other checks.
- return check_kind != TypeCheckKind::kExactCheck;
+ ReferenceTypeInfo GetTargetClassRTI() {
+ if (GetPackedFlag<kFlagValidTargetClassRTI>()) {
+ // Note: The is_exact flag from the return value should not be used.
+ return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true);
+ } else {
+ return ReferenceTypeInfo::CreateInvalid();
+ }
}
- static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) {
- return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None();
+ // Target class RTI is marked as valid by RTP if the klass_ is admissible.
+ void SetValidTargetClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(klass_ != nullptr);
+ SetPackedFlag<kFlagValidTargetClassRTI>(true);
}
- DECLARE_INSTRUCTION(InstanceOf);
+ Handle<mirror::Class> GetClass() const {
+ return klass_;
+ }
protected:
- DEFAULT_COPY_CONSTRUCTOR(InstanceOf);
+ DEFAULT_COPY_CONSTRUCTOR(TypeCheckInstruction);
private:
- static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits;
+ static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits;
static constexpr size_t kFieldTypeCheckKindSize =
MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
- static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1;
+ static constexpr size_t kFlagValidTargetClassRTI = kFlagMustDoNullCheck + 1;
+ static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagValidTargetClassRTI + 1;
static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
+
+ Handle<mirror::Class> klass_;
+};
+
+class HInstanceOf FINAL : public HTypeCheckInstruction {
+ public:
+ HInstanceOf(HInstruction* object,
+ HInstruction* target_class_or_null,
+ TypeCheckKind check_kind,
+ Handle<mirror::Class> klass,
+ uint32_t dex_pc,
+ ArenaAllocator* allocator,
+ HIntConstant* bitstring_path_to_root,
+ HIntConstant* bitstring_mask)
+ : HTypeCheckInstruction(kInstanceOf,
+ object,
+ target_class_or_null,
+ check_kind,
+ klass,
+ dex_pc,
+ allocator,
+ bitstring_path_to_root,
+ bitstring_mask,
+ SideEffectsForArchRuntimeCalls(check_kind)) {}
+
+ DataType::Type GetType() const OVERRIDE { return DataType::Type::kBool; }
+
+ bool NeedsEnvironment() const OVERRIDE {
+ return CanCallRuntime(GetTypeCheckKind());
+ }
+
+ static bool CanCallRuntime(TypeCheckKind check_kind) {
+ // Mips currently does runtime calls for any other checks.
+ return check_kind != TypeCheckKind::kExactCheck;
+ }
+
+ static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) {
+ return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None();
+ }
+
+ DECLARE_INSTRUCTION(InstanceOf);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(InstanceOf);
};
class HBoundType FINAL : public HExpression<1> {
@@ -6865,31 +7088,26 @@ class HBoundType FINAL : public HExpression<1> {
ReferenceTypeInfo upper_bound_;
};
-class HCheckCast FINAL : public HTemplateInstruction<2> {
+class HCheckCast FINAL : public HTypeCheckInstruction {
public:
HCheckCast(HInstruction* object,
- HLoadClass* target_class,
+ HInstruction* target_class_or_null,
TypeCheckKind check_kind,
- uint32_t dex_pc)
- : HTemplateInstruction(kCheckCast, SideEffects::CanTriggerGC(), dex_pc) {
- SetPackedField<TypeCheckKindField>(check_kind);
- SetPackedFlag<kFlagMustDoNullCheck>(true);
- SetRawInputAt(0, object);
- SetRawInputAt(1, target_class);
- }
-
- HLoadClass* GetTargetClass() const {
- HInstruction* load_class = InputAt(1);
- DCHECK(load_class->IsLoadClass());
- return load_class->AsLoadClass();
- }
-
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
-
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
- return true;
- }
+ Handle<mirror::Class> klass,
+ uint32_t dex_pc,
+ ArenaAllocator* allocator,
+ HIntConstant* bitstring_path_to_root,
+ HIntConstant* bitstring_mask)
+ : HTypeCheckInstruction(kCheckCast,
+ object,
+ target_class_or_null,
+ check_kind,
+ klass,
+ dex_pc,
+ allocator,
+ bitstring_path_to_root,
+ bitstring_mask,
+ SideEffects::CanTriggerGC()) {}
bool NeedsEnvironment() const OVERRIDE {
// Instruction may throw a CheckCastError.
@@ -6898,24 +7116,10 @@ class HCheckCast FINAL : public HTemplateInstruction<2> {
bool CanThrow() const OVERRIDE { return true; }
- bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }
- void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }
- TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }
- bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; }
-
DECLARE_INSTRUCTION(CheckCast);
protected:
DEFAULT_COPY_CONSTRUCTOR(CheckCast);
-
- private:
- static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits;
- static constexpr size_t kFieldTypeCheckKindSize =
- MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
- static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
- static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1;
- static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
- using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
};
/**
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index d1eaf5c3666..1a484e1944f 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -328,7 +328,7 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation {
uint32_t dex_pc)
: HVecUnaryOperation(
kVecReplicateScalar, allocator, scalar, packed_type, vector_length, dex_pc) {
- DCHECK(!scalar->IsVecOperation());
+ DCHECK(!ReturnsSIMDValue(scalar));
}
// A replicate needs to stay in place, since SIMD registers are not
@@ -533,6 +533,31 @@ class HVecAdd FINAL : public HVecBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(VecAdd);
};
+// Adds every component in the two vectors using saturation arithmetic,
+// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 +_sat y1, .. , xn +_sat yn ]
+// for either both signed or both unsigned operands x, y (reflected in packed_type).
+class HVecSaturationAdd FINAL : public HVecBinaryOperation {
+ public:
+ HVecSaturationAdd(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVecBinaryOperation(
+ kVecSaturationAdd, allocator, left, right, packed_type, vector_length, dex_pc) {
+ DCHECK(HasConsistentPackedTypes(left, packed_type));
+ DCHECK(HasConsistentPackedTypes(right, packed_type));
+ }
+
+ bool CanBeMoved() const OVERRIDE { return true; }
+
+ DECLARE_INSTRUCTION(VecSaturationAdd);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecSaturationAdd);
+};
+
// Performs halving add on every component in the two vectors, viz.
// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ]
// truncated [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ]
@@ -598,6 +623,31 @@ class HVecSub FINAL : public HVecBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(VecSub);
};
+// Subtracts every component in the two vectors using saturation arithmetic,
+// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 -_sat y1, .. , xn -_sat yn ]
+// for either both signed or both unsigned operands x, y (reflected in packed_type).
+class HVecSaturationSub FINAL : public HVecBinaryOperation {
+ public:
+ HVecSaturationSub(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVecBinaryOperation(
+ kVecSaturationSub, allocator, left, right, packed_type, vector_length, dex_pc) {
+ DCHECK(HasConsistentPackedTypes(left, packed_type));
+ DCHECK(HasConsistentPackedTypes(right, packed_type));
+ }
+
+ bool CanBeMoved() const OVERRIDE { return true; }
+
+ DECLARE_INSTRUCTION(VecSaturationSub);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecSaturationSub);
+};
+
// Multiplies every component in the two vectors,
// viz. [ x1, .. , xn ] * [ y1, .. , yn ] = [ x1 * y1, .. , xn * yn ].
class HVecMul FINAL : public HVecBinaryOperation {
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index d20b681b49a..2e189fdd141 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -105,15 +105,15 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
const std::vector<uint8_t>& expected_asm,
const std::vector<uint8_t>& expected_cfi) {
// Get the outputs.
- const std::vector<uint8_t>& actual_asm = code_allocator_.GetMemory();
+ ArrayRef<const uint8_t> actual_asm = code_allocator_.GetMemory();
Assembler* opt_asm = code_gen_->GetAssembler();
- const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data());
+ ArrayRef<const uint8_t> actual_cfi(*(opt_asm->cfi().data()));
if (kGenerateExpected) {
GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
} else {
- EXPECT_EQ(expected_asm, actual_asm);
- EXPECT_EQ(expected_cfi, actual_cfi);
+ EXPECT_EQ(ArrayRef<const uint8_t>(expected_asm), actual_asm);
+ EXPECT_EQ(ArrayRef<const uint8_t>(expected_cfi), actual_cfi);
}
}
@@ -140,7 +140,7 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
return memory_.data();
}
- const std::vector<uint8_t>& GetMemory() { return memory_; }
+ ArrayRef<const uint8_t> GetMemory() const OVERRIDE { return ArrayRef<const uint8_t>(memory_); }
private:
std::vector<uint8_t> memory_;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index e42dfc10ba5..cadefc3b015 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -75,22 +75,18 @@ static constexpr const char* kPassNameSeparator = "$";
class CodeVectorAllocator FINAL : public CodeAllocator {
public:
explicit CodeVectorAllocator(ArenaAllocator* allocator)
- : memory_(allocator->Adapter(kArenaAllocCodeBuffer)),
- size_(0) {}
+ : memory_(allocator->Adapter(kArenaAllocCodeBuffer)) {}
virtual uint8_t* Allocate(size_t size) {
- size_ = size;
memory_.resize(size);
return &memory_[0];
}
- size_t GetSize() const { return size_; }
- const ArenaVector<uint8_t>& GetMemory() const { return memory_; }
+ ArrayRef<const uint8_t> GetMemory() const OVERRIDE { return ArrayRef<const uint8_t>(memory_); }
uint8_t* GetData() { return memory_.data(); }
private:
ArenaVector<uint8_t> memory_;
- size_t size_;
DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator);
};
@@ -647,15 +643,13 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
MaybeRunInliner(graph, codegen, dex_compilation_unit, pass_observer, handles);
OptimizationDef optimizations2[] = {
- // SelectGenerator depends on the InstructionSimplifier removing
- // redundant suspend checks to recognize empty blocks.
+ OptDef(OptimizationPass::kSideEffectsAnalysis, "side_effects$before_gvn"),
+ OptDef(OptimizationPass::kGlobalValueNumbering),
OptDef(OptimizationPass::kSelectGenerator),
- // TODO: if we don't inline we can also skip fold2.
OptDef(OptimizationPass::kConstantFolding, "constant_folding$after_inlining"),
OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$after_inlining"),
OptDef(OptimizationPass::kDeadCodeElimination, "dead_code_elimination$after_inlining"),
- OptDef(OptimizationPass::kSideEffectsAnalysis, "side_effects$before_gvn"),
- OptDef(OptimizationPass::kGlobalValueNumbering),
+ OptDef(OptimizationPass::kSideEffectsAnalysis, "side_effects$before_licm"),
OptDef(OptimizationPass::kInvariantCodeMotion),
OptDef(OptimizationPass::kInductionVarAnalysis),
OptDef(OptimizationPass::kBoundsCheckElimination),
@@ -719,7 +713,7 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
GetCompilerDriver(),
codegen->GetInstructionSet(),
- ArrayRef<const uint8_t>(code_allocator->GetMemory()),
+ code_allocator->GetMemory(),
// Follow Quick's behavior and set the frame size to zero if it is
// considered "empty" (see the definition of
// art::CodeGenerator::HasEmptyFrame).
@@ -731,6 +725,16 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
ArrayRef<const linker::LinkerPatch>(linker_patches));
+ CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage();
+ for (const linker::LinkerPatch& patch : linker_patches) {
+ if (codegen->NeedsThunkCode(patch) && storage->GetThunkCode(patch).empty()) {
+ ArenaVector<uint8_t> code(allocator->Adapter());
+ std::string debug_name;
+ codegen->EmitThunkCode(patch, &code, &debug_name);
+ storage->SetThunkCode(patch, ArrayRef<const uint8_t>(code), debug_name);
+ }
+ }
+
return compiled_method;
}
@@ -1339,7 +1343,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
codegen->GetCoreSpillMask(),
codegen->GetFpuSpillMask(),
code_allocator.GetMemory().data(),
- code_allocator.GetSize(),
+ code_allocator.GetMemory().size(),
data_size,
osr,
roots,
@@ -1369,7 +1373,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
info.is_optimized = true;
info.is_code_address_text_relative = false;
info.code_address = code_address;
- info.code_size = code_allocator.GetSize();
+ info.code_size = code_allocator.GetMemory().size();
info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
info.code_info = stack_map_size == 0 ? nullptr : stack_map_data;
info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
@@ -1378,7 +1382,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed());
if (jit_logger != nullptr) {
- jit_logger->WriteLog(code, code_allocator.GetSize(), method);
+ jit_logger->WriteLog(code, code_allocator.GetMemory().size(), method);
}
if (kArenaAllocatorCountAllocations) {
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 00194ff1fe0..9a26f2f6c40 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -99,6 +99,7 @@ enum class MethodCompilationStat {
kConstructorFenceRemovedLSE,
kConstructorFenceRemovedPFRA,
kConstructorFenceRemovedCFRE,
+ kBitstringTypeCheck,
kJitOutOfMemoryForCommit,
kLastStat
};
@@ -124,11 +125,6 @@ class OptimizingCompilerStats {
}
void Log() const {
- if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) {
- // Log only in debug builds or if the compiler is verbose.
- return;
- }
-
uint32_t compiled_intrinsics = GetStat(MethodCompilationStat::kCompiledIntrinsic);
uint32_t compiled_native_stubs = GetStat(MethodCompilationStat::kCompiledNativeStub);
uint32_t bytecode_attempts =
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 6dcbadba6ed..a9bc5664c09 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -20,6 +20,7 @@
#include <memory>
#include <vector>
+#include "base/malloc_arena_pool.h"
#include "base/scoped_arena_allocator.h"
#include "builder.h"
#include "common_compiler_test.h"
@@ -97,7 +98,7 @@ class ArenaPoolAndAllocator {
ScopedArenaAllocator* GetScopedAllocator() { return &scoped_allocator_; }
private:
- ArenaPool pool_;
+ MallocArenaPool pool_;
ArenaAllocator allocator_;
ArenaStack arena_stack_;
ScopedArenaAllocator scoped_allocator_;
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index cb87cabe1cd..be352011668 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -15,6 +15,7 @@
*/
#include "base/arena_allocator.h"
+#include "base/malloc_arena_pool.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
@@ -180,7 +181,7 @@ TYPED_TEST_CASE(ParallelMoveTest, ParallelMoveResolverTestTypes);
TYPED_TEST(ParallelMoveTest, Dependency) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaAllocator allocator(&pool);
{
@@ -207,7 +208,7 @@ TYPED_TEST(ParallelMoveTest, Dependency) {
}
TYPED_TEST(ParallelMoveTest, Cycle) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaAllocator allocator(&pool);
{
@@ -257,7 +258,7 @@ TYPED_TEST(ParallelMoveTest, Cycle) {
}
TYPED_TEST(ParallelMoveTest, ConstantLast) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaAllocator allocator(&pool);
TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
@@ -276,7 +277,7 @@ TYPED_TEST(ParallelMoveTest, ConstantLast) {
}
TYPED_TEST(ParallelMoveTest, Pairs) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaAllocator allocator(&pool);
{
@@ -453,7 +454,7 @@ TYPED_TEST(ParallelMoveTest, Pairs) {
}
TYPED_TEST(ParallelMoveTest, MultiCycles) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaAllocator allocator(&pool);
{
@@ -551,7 +552,7 @@ TYPED_TEST(ParallelMoveTest, MultiCycles) {
// Test that we do 64bits moves before 32bits moves.
TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaAllocator allocator(&pool);
{
@@ -610,7 +611,7 @@ TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
}
TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaAllocator allocator(&pool);
{
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index 9d5358514ee..01022542062 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -75,7 +75,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
switch (load_kind) {
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
case HLoadClass::LoadKind::kBootImageAddress:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
// Add a base register for PC-relative literals on R2.
InitializePCRelativeBasePointer();
@@ -91,7 +91,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
switch (load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
case HLoadString::LoadKind::kBootImageAddress:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
// Add a base register for PC-relative literals on R2.
InitializePCRelativeBasePointer();
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index f92f4b274ae..647336b6b93 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -81,20 +81,14 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
}
void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
- HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == HLoadClass::LoadKind::kBootImageClassTable ||
- load_kind == HLoadClass::LoadKind::kBssEntry) {
+ if (load_class->HasPcRelativeLoadKind()) {
HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_class);
load_class->AddSpecialInput(method_address);
}
}
void VisitLoadString(HLoadString* load_string) OVERRIDE {
- HLoadString::LoadKind load_kind = load_string->GetLoadKind();
- if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == HLoadString::LoadKind::kBootImageInternTable ||
- load_kind == HLoadString::LoadKind::kBssEntry) {
+ if (load_string->HasPcRelativeLoadKind()) {
HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_string);
load_string->AddSpecialInput(method_address);
}
@@ -238,6 +232,9 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
case Intrinsics::kMathMaxFloatFloat:
case Intrinsics::kMathMinDoubleDouble:
case Intrinsics::kMathMinFloatFloat:
+ LOG(FATAL) << "Unreachable min/max/abs: intrinsics should have been lowered "
+ "to IR nodes by instruction simplifier";
+ UNREACHABLE();
case Intrinsics::kMathRoundFloat:
if (!base_added) {
DCHECK(invoke_static_or_direct != nullptr);
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index f843c008d8b..59733397bfe 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -34,6 +34,20 @@ void PrepareForRegisterAllocation::Run() {
}
}
+void PrepareForRegisterAllocation::VisitCheckCast(HCheckCast* check_cast) {
+ // Record only those bitstring type checks that make it to the codegen stage.
+ if (check_cast->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+ MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck);
+ }
+}
+
+void PrepareForRegisterAllocation::VisitInstanceOf(HInstanceOf* instance_of) {
+ // Record only those bitstring type checks that make it to the codegen stage.
+ if (instance_of->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+ MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck);
+ }
+}
+
void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) {
check->ReplaceWith(check->InputAt(0));
}
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 2c64f016c17..f6e4d3ef99b 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -40,6 +40,8 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
"prepare_for_register_allocation";
private:
+ void VisitCheckCast(HCheckCast* check_cast) OVERRIDE;
+ void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE;
void VisitNullCheck(HNullCheck* check) OVERRIDE;
void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE;
void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 67a61fc01de..4030883a57e 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -87,6 +87,7 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE;
void VisitNewInstance(HNewInstance* new_instance) OVERRIDE;
void VisitLoadClass(HLoadClass* load_class) OVERRIDE;
+ void VisitInstanceOf(HInstanceOf* load_class) OVERRIDE;
void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE;
void VisitLoadString(HLoadString* instr) OVERRIDE;
void VisitLoadException(HLoadException* instr) OVERRIDE;
@@ -171,6 +172,12 @@ void ReferenceTypePropagation::ValidateTypes() {
<< "NullCheck " << instr->GetReferenceTypeInfo()
<< "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo();
}
+ } else if (instr->IsInstanceOf()) {
+ HInstanceOf* iof = instr->AsInstanceOf();
+ DCHECK(!iof->GetTargetClassRTI().IsValid() || iof->GetTargetClassRTI().IsExact());
+ } else if (instr->IsCheckCast()) {
+ HCheckCast* check = instr->AsCheckCast();
+ DCHECK(!check->GetTargetClassRTI().IsValid() || check->GetTargetClassRTI().IsExact());
}
}
}
@@ -499,8 +506,7 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock*
return;
}
- HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass();
- ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+ ReferenceTypeInfo class_rti = instanceOf->GetTargetClassRTI();
if (!class_rti.IsValid()) {
// He have loaded an unresolved class. Don't bother bounding the type.
return;
@@ -643,15 +649,20 @@ void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedStaticFieldGet(
void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) {
ScopedObjectAccess soa(Thread::Current());
- Handle<mirror::Class> resolved_class = instr->GetClass();
- if (IsAdmissible(resolved_class.Get())) {
- instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(
- resolved_class, /* is_exact */ true));
+ if (IsAdmissible(instr->GetClass().Get())) {
+ instr->SetValidLoadedClassRTI();
}
instr->SetReferenceTypeInfo(
ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true));
}
+void ReferenceTypePropagation::RTPVisitor::VisitInstanceOf(HInstanceOf* instr) {
+ ScopedObjectAccess soa(Thread::Current());
+ if (IsAdmissible(instr->GetClass().Get())) {
+ instr->SetValidTargetClassRTI();
+ }
+}
+
void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) {
instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo());
}
@@ -719,8 +730,6 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) {
}
void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
- HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
- ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
HBoundType* bound_type = check_cast->GetNext()->AsBoundType();
if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) {
// The next instruction is not an uninitialized BoundType. This must be
@@ -729,12 +738,14 @@ void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast
}
DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0));
- if (class_rti.IsValid()) {
+ ScopedObjectAccess soa(Thread::Current());
+ Handle<mirror::Class> klass = check_cast->GetClass();
+ if (IsAdmissible(klass.Get())) {
DCHECK(is_first_run_);
- ScopedObjectAccess soa(Thread::Current());
+ check_cast->SetValidTargetClassRTI();
// This is the first run of RTP and class is resolved.
- bool is_exact = class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes();
- bound_type->SetUpperBound(ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), is_exact),
+ bool is_exact = klass->CannotBeAssignedFromOtherTypes();
+ bound_type->SetUpperBound(ReferenceTypeInfo::Create(klass, is_exact),
/* CheckCast succeeds for nulls. */ true);
} else {
// This is the first run of RTP and class is unresolved. Remove the binding.
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index bb28d50b569..bca538fb170 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -667,7 +667,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
// HUnaryOperation (or HBinaryOperation), check in debug mode that we have
// the exhaustive lists here.
if (instruction->IsUnaryOperation()) {
- DCHECK(instruction->IsBooleanNot() ||
+ DCHECK(instruction->IsAbs() ||
+ instruction->IsBooleanNot() ||
instruction->IsNot() ||
instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName();
return true;
@@ -678,6 +679,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
instruction->IsCompare() ||
instruction->IsCondition() ||
instruction->IsDiv() ||
+ instruction->IsMin() ||
+ instruction->IsMax() ||
instruction->IsMul() ||
instruction->IsOr() ||
instruction->IsRem() ||
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index 66e51421ca3..f9acf5aa9a9 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -16,6 +16,7 @@
#include "select_generator.h"
+#include "base/scoped_arena_containers.h"
#include "reference_type_propagation.h"
namespace art {
@@ -43,12 +44,16 @@ static bool IsSimpleBlock(HBasicBlock* block) {
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* instruction = it.Current();
if (instruction->IsControlFlow()) {
- if (num_instructions > kMaxInstructionsInBranch) {
- return false;
- }
return instruction->IsGoto() || instruction->IsReturn();
} else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) {
- num_instructions++;
+ if (instruction->IsSelect() &&
+ instruction->AsSelect()->GetCondition()->GetBlock() == block) {
+ // Count one HCondition and HSelect in the same block as a single instruction.
+ // This enables finding nested selects.
+ continue;
+ } else if (++num_instructions > kMaxInstructionsInBranch) {
+ return false; // bail as soon as we exceed number of allowed instructions
+ }
} else {
return false;
}
@@ -86,9 +91,13 @@ static HPhi* GetSingleChangedPhi(HBasicBlock* block, size_t index1, size_t index
}
void HSelectGenerator::Run() {
+ // Select cache with local allocator.
+ ScopedArenaAllocator allocator(graph_->GetArenaStack());
+ ScopedArenaSafeMap<HInstruction*, HSelect*> cache(
+ std::less<HInstruction*>(), allocator.Adapter(kArenaAllocSelectGenerator));
+
// Iterate in post order in the unlikely case that removing one occurrence of
// the selection pattern empties a branch block of another occurrence.
- // Otherwise the order does not matter.
for (HBasicBlock* block : graph_->GetPostOrder()) {
if (!block->EndsWithIf()) continue;
@@ -97,6 +106,7 @@ void HSelectGenerator::Run() {
HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
DCHECK_NE(true_block, false_block);
+
if (!IsSimpleBlock(true_block) ||
!IsSimpleBlock(false_block) ||
!BlocksMergeTogether(true_block, false_block)) {
@@ -107,10 +117,10 @@ void HSelectGenerator::Run() {
// If the branches are not empty, move instructions in front of the If.
// TODO(dbrazdil): This puts an instruction between If and its condition.
// Implement moving of conditions to first users if possible.
- if (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) {
+ while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) {
true_block->GetFirstInstruction()->MoveBefore(if_instruction);
}
- if (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) {
+ while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) {
false_block->GetFirstInstruction()->MoveBefore(if_instruction);
}
DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn());
@@ -138,7 +148,8 @@ void HSelectGenerator::Run() {
DCHECK(both_successors_return || phi != nullptr);
// Create the Select instruction and insert it in front of the If.
- HSelect* select = new (graph_->GetAllocator()) HSelect(if_instruction->InputAt(0),
+ HInstruction* condition = if_instruction->InputAt(0);
+ HSelect* select = new (graph_->GetAllocator()) HSelect(condition,
true_value,
false_value,
if_instruction->GetDexPc());
@@ -175,6 +186,26 @@ void HSelectGenerator::Run() {
MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated);
+ // Very simple way of finding common subexpressions in the generated HSelect statements
+ // (since this runs after GVN). Lookup by condition, and reuse latest one if possible
+ // (due to post order, latest select is most likely replacement). If needed, we could
+ // improve this by e.g. using the operands in the map as well.
+ auto it = cache.find(condition);
+ if (it == cache.end()) {
+ cache.Put(condition, select);
+ } else {
+ // Found cached value. See if latest can replace cached in the HIR.
+ HSelect* cached = it->second;
+ DCHECK_EQ(cached->GetCondition(), select->GetCondition());
+ if (cached->GetTrueValue() == select->GetTrueValue() &&
+ cached->GetFalseValue() == select->GetFalseValue() &&
+ select->StrictlyDominates(cached)) {
+ cached->ReplaceWith(select);
+ cached->GetBlock()->RemoveInstruction(cached);
+ }
+ it->second = select; // always cache latest
+ }
+
// No need to update dominance information, as we are simplifying
// a simple diamond shape, where the join block is merged with the
// entry block. Any following blocks would have had the join block
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 1e49411c72d..70b45763af7 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -125,8 +125,12 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
BootImageAOTCanEmbedMethod(callee, compiler_driver)) {
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative;
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+ } else if (IsInBootImage(callee)) {
+ // Use PC-relative access to the .data.bimg.rel.ro methods array.
+ method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo;
+ code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
} else {
- // Use PC-relative access to the .bss methods arrays.
+ // Use PC-relative access to the .bss methods array.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry;
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
}
@@ -207,7 +211,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(
} else if (is_in_boot_image) {
// AOT app compilation, boot image class.
if (codegen->GetCompilerOptions().GetCompilePic()) {
- desired_load_kind = HLoadClass::LoadKind::kBootImageClassTable;
+ desired_load_kind = HLoadClass::LoadKind::kBootImageRelRo;
} else {
desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
}
@@ -236,6 +240,75 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(
return load_kind;
}
+static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(!klass->IsProxyClass());
+ DCHECK(!klass->IsArrayClass());
+
+ if (Runtime::Current()->UseJitCompilation()) {
+ // If we're JITting, try to assign a type check bitstring (fall through).
+ } else if (codegen->GetCompilerOptions().IsBootImage()) {
+ const char* descriptor = klass->GetDexFile().StringByTypeIdx(klass->GetDexTypeIndex());
+ if (!compiler_driver->IsImageClass(descriptor)) {
+ return false;
+ }
+ // If the target is a boot image class, try to assign a type check bitstring (fall through).
+ // (If --force-determinism, this was already done; repeating is OK and yields the same result.)
+ } else {
+ // TODO: Use the bitstring also for AOT app compilation if the target class has a bitstring
+ // already assigned in the boot image.
+ return false;
+ }
+
+ // Try to assign a type check bitstring.
+ MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+ if ((false) && // FIXME: Inliner does not respect compiler_driver->IsClassToCompile()
+ // and we're hitting an unassigned bitstring in dex2oat_image_test. b/26687569
+ kIsDebugBuild &&
+ codegen->GetCompilerOptions().IsBootImage() &&
+ codegen->GetCompilerOptions().IsForceDeterminism()) {
+ SubtypeCheckInfo::State old_state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass);
+ CHECK(old_state == SubtypeCheckInfo::kAssigned || old_state == SubtypeCheckInfo::kOverflowed)
+ << klass->PrettyDescriptor() << "/" << old_state
+ << " in " << codegen->GetGraph()->PrettyMethod();
+ }
+ SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass);
+ return state == SubtypeCheckInfo::kAssigned;
+}
+
+TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ bool needs_access_check) {
+ if (klass == nullptr) {
+ return TypeCheckKind::kUnresolvedCheck;
+ } else if (klass->IsInterface()) {
+ return TypeCheckKind::kInterfaceCheck;
+ } else if (klass->IsArrayClass()) {
+ if (klass->GetComponentType()->IsObjectClass()) {
+ return TypeCheckKind::kArrayObjectCheck;
+ } else if (klass->CannotBeAssignedFromOtherTypes()) {
+ return TypeCheckKind::kExactCheck;
+ } else {
+ return TypeCheckKind::kArrayCheck;
+ }
+ } else if (klass->IsFinal()) { // TODO: Consider using bitstring for final classes.
+ return TypeCheckKind::kExactCheck;
+ } else if (kBitstringSubtypeCheckEnabled &&
+ !needs_access_check &&
+ CanUseTypeCheckBitstring(klass, codegen, compiler_driver)) {
+ // TODO: We should not need the `!needs_access_check` check but getting rid of that
+ // requires rewriting some optimizations in instruction simplifier.
+ return TypeCheckKind::kBitstringCheck;
+ } else if (klass->IsAbstract()) {
+ return TypeCheckKind::kAbstractClassCheck;
+ } else {
+ return TypeCheckKind::kClassHierarchyCheck;
+ }
+}
+
void HSharpening::ProcessLoadString(
HLoadString* load_string,
CodeGenerator* codegen,
@@ -288,7 +361,7 @@ void HSharpening::ProcessLoadString(
string = class_linker->LookupString(string_index, dex_cache.Get());
if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
if (codegen->GetCompilerOptions().GetCompilePic()) {
- desired_load_kind = HLoadString::LoadKind::kBootImageInternTable;
+ desired_load_kind = HLoadString::LoadKind::kBootImageRelRo;
} else {
desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
}
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 6df7d6d91ed..fa3e948eeb5 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -44,12 +44,10 @@ class HSharpening : public HOptimization {
static constexpr const char* kSharpeningPassName = "sharpening";
- // Used by the builder.
- static void ProcessLoadString(HLoadString* load_string,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
- const DexCompilationUnit& dex_compilation_unit,
- VariableSizedHandleScope* handles);
+ // Used by Sharpening and InstructionSimplifier.
+ static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver);
// Used by the builder and the inliner.
static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class,
@@ -58,10 +56,19 @@ class HSharpening : public HOptimization {
const DexCompilationUnit& dex_compilation_unit)
REQUIRES_SHARED(Locks::mutator_lock_);
- // Used by Sharpening and InstructionSimplifier.
- static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver);
+ // Used by the builder.
+ static TypeCheckKind ComputeTypeCheckKind(ObjPtr<mirror::Class> klass,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ bool needs_access_check)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ // Used by the builder.
+ static void ProcessLoadString(HLoadString* load_string,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ const DexCompilationUnit& dex_compilation_unit,
+ VariableSizedHandleScope* handles);
private:
CodeGenerator* codegen_;
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 579aabdb5f5..268e9bd6e0c 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -19,9 +19,9 @@
#include "base/bit_vector-inl.h"
#include "base/hash_map.h"
+#include "base/memory_region.h"
#include "base/scoped_arena_containers.h"
#include "base/value_object.h"
-#include "memory_region.h"
#include "method_info.h"
#include "nodes.h"
#include "stack_map.h"
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 7e517f34850..e36c5926622 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -18,6 +18,7 @@
#include "art_method.h"
#include "base/arena_bit_vector.h"
+#include "base/malloc_arena_pool.h"
#include "stack_map_stream.h"
#include "gtest/gtest.h"
@@ -46,7 +47,7 @@ static bool CheckStackMask(
using Kind = DexRegisterLocation::Kind;
TEST(StackMapTest, Test1) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
@@ -128,7 +129,7 @@ TEST(StackMapTest, Test1) {
}
TEST(StackMapTest, Test2) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
@@ -413,7 +414,7 @@ TEST(StackMapTest, Test2) {
}
TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
@@ -508,7 +509,7 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) {
}
TEST(StackMapTest, TestNonLiveDexRegisters) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
@@ -588,7 +589,7 @@ TEST(StackMapTest, TestNonLiveDexRegisters) {
// StackMap::kNoDexRegisterMapSmallEncoding, and ensure we do
// not treat it as kNoDexRegisterMap.
TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
@@ -652,7 +653,7 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
}
TEST(StackMapTest, TestShareDexRegisterMap) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
@@ -711,7 +712,7 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
}
TEST(StackMapTest, TestNoDexRegisterMap) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
@@ -761,7 +762,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
}
TEST(StackMapTest, InlineTest) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
@@ -949,7 +950,7 @@ TEST(StackMapTest, CodeOffsetTest) {
}
TEST(StackMapTest, TestDeduplicateStackMask) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
@@ -978,7 +979,7 @@ TEST(StackMapTest, TestDeduplicateStackMask) {
}
TEST(StackMapTest, TestInvokeInfo) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index a7c23bef7e3..fad7729956b 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -70,20 +70,18 @@ static bool ArePhiInputsTheSame(const HPhi* phi) {
return true;
}
-// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
-// graph.
-static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
- if (loop1 != nullptr || loop2 != nullptr) {
- return nullptr;
+// Returns whether two Edge sets are equal (ArenaHashSet doesn't have "Equal" method).
+static bool EdgeHashSetsEqual(const HEdgeSet* set1, const HEdgeSet* set2) {
+ if (set1->Size() != set2->Size()) {
+ return false;
}
- if (loop1->IsIn(*loop2)) {
- return loop2;
- } else if (loop2->IsIn(*loop1)) {
- return loop1;
+ for (auto e : *set1) {
+ if (set2->Find(e) == set2->end()) {
+ return false;
+ }
}
- HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader());
- return block->GetLoopInformation();
+ return true;
}
// Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph.
@@ -95,6 +93,21 @@ static void OrderLoopsHeadersPredecessors(HGraph* graph) {
}
}
+// Performs DFS on the subgraph (specified by 'bb_set') starting from the specified block; while
+// traversing the function removes basic blocks from the bb_set (instead of traditional DFS
+// 'marking'). So what is left in the 'bb_set' after the traversal is not reachable from the start
+// block.
+static void TraverseSubgraphForConnectivity(HBasicBlock* block, HBasicBlockSet* bb_set) {
+ DCHECK(bb_set->IsBitSet(block->GetBlockId()));
+ bb_set->ClearBit(block->GetBlockId());
+
+ for (HBasicBlock* succ : block->GetSuccessors()) {
+ if (bb_set->IsBitSet(succ->GetBlockId())) {
+ TraverseSubgraphForConnectivity(succ, bb_set);
+ }
+ }
+}
+
//
// Helpers for CloneBasicBlock.
//
@@ -268,7 +281,6 @@ void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVect
}
void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) {
- // TODO: DCHECK that after the transformation the graph is connected.
HBasicBlock* block_entry = nullptr;
if (outer_loop_ == nullptr) {
@@ -424,6 +436,11 @@ void SuperblockCloner::FindAndSetLocalAreaForAdjustments() {
outer_loop_ = nullptr;
break;
}
+ if (outer_loop_ == nullptr) {
+ // We should not use the initial outer_loop_ value 'nullptr' when finding the most outer
+ // common loop.
+ outer_loop_ = loop_exit_loop_info;
+ }
outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info);
}
@@ -507,6 +524,34 @@ void SuperblockCloner::ResolveDataFlow() {
// Debug and logging methods.
//
+// Debug function to dump graph' BasicBlocks info.
+void DumpBB(HGraph* graph) {
+ for (HBasicBlock* bb : graph->GetBlocks()) {
+ if (bb == nullptr) {
+ continue;
+ }
+ std::cout << bb->GetBlockId();
+ std::cout << " <- ";
+ for (HBasicBlock* pred : bb->GetPredecessors()) {
+ std::cout << pred->GetBlockId() << " ";
+ }
+ std::cout << " -> ";
+ for (HBasicBlock* succ : bb->GetSuccessors()) {
+ std::cout << succ->GetBlockId() << " ";
+ }
+
+ if (bb->GetDominator()) {
+ std::cout << " dom " << bb->GetDominator()->GetBlockId();
+ }
+
+ if (bb->GetLoopInformation()) {
+ std::cout << "\tloop: " << bb->GetLoopInformation()->GetHeader()->GetBlockId();
+ }
+
+ std::cout << std::endl;
+ }
+}
+
void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) {
DCHECK(!orig_instr->IsPhi());
HInstruction* copy_instr = GetInstrCopy(orig_instr);
@@ -542,6 +587,82 @@ void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr)
}
}
+bool SuperblockCloner::CheckRemappingInfoIsValid() {
+ for (HEdge edge : *remap_orig_internal_) {
+ if (!IsEdgeValid(edge, graph_) ||
+ !IsInOrigBBSet(edge.GetFrom()) ||
+ !IsInOrigBBSet(edge.GetTo())) {
+ return false;
+ }
+ }
+
+ for (auto edge : *remap_copy_internal_) {
+ if (!IsEdgeValid(edge, graph_) ||
+ !IsInOrigBBSet(edge.GetFrom()) ||
+ !IsInOrigBBSet(edge.GetTo())) {
+ return false;
+ }
+ }
+
+ for (auto edge : *remap_incoming_) {
+ if (!IsEdgeValid(edge, graph_) ||
+ IsInOrigBBSet(edge.GetFrom()) ||
+ !IsInOrigBBSet(edge.GetTo())) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void SuperblockCloner::VerifyGraph() {
+ for (auto it : *hir_map_) {
+ HInstruction* orig_instr = it.first;
+ HInstruction* copy_instr = it.second;
+ if (!orig_instr->IsPhi() && !orig_instr->IsSuspendCheck()) {
+ DCHECK(it.first->GetBlock() != nullptr);
+ }
+ if (!copy_instr->IsPhi() && !copy_instr->IsSuspendCheck()) {
+ DCHECK(it.second->GetBlock() != nullptr);
+ }
+ }
+
+ GraphChecker checker(graph_);
+ checker.Run();
+ if (!checker.IsValid()) {
+ for (const std::string& error : checker.GetErrors()) {
+ std::cout << error << std::endl;
+ }
+ LOG(FATAL) << "GraphChecker failed: superblock cloner\n";
+ }
+}
+
+void DumpBBSet(const ArenaBitVector* set) {
+ for (uint32_t idx : set->Indexes()) {
+ std::cout << idx << "\n";
+ }
+}
+
+void SuperblockCloner::DumpInputSets() {
+ std::cout << graph_->PrettyMethod() << "\n";
+ std::cout << "orig_bb_set:\n";
+ for (uint32_t idx : orig_bb_set_.Indexes()) {
+ std::cout << idx << "\n";
+ }
+ std::cout << "remap_orig_internal:\n";
+ for (HEdge e : *remap_orig_internal_) {
+ std::cout << e << "\n";
+ }
+ std::cout << "remap_copy_internal:\n";
+ for (auto e : *remap_copy_internal_) {
+ std::cout << e << "\n";
+ }
+ std::cout << "remap_incoming:\n";
+ for (auto e : *remap_incoming_) {
+ std::cout << e << "\n";
+ }
+}
+
//
// Public methods.
//
@@ -569,6 +690,7 @@ void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_inte
remap_orig_internal_ = remap_orig_internal;
remap_copy_internal_ = remap_copy_internal;
remap_incoming_ = remap_incoming;
+ DCHECK(CheckRemappingInfoIsValid());
}
bool SuperblockCloner::IsSubgraphClonable() const {
@@ -602,6 +724,63 @@ bool SuperblockCloner::IsSubgraphClonable() const {
return true;
}
+bool SuperblockCloner::IsFastCase() const {
+ // Check that loop unrolling/loop peeling is being conducted.
+ // Check that all the basic blocks belong to the same loop.
+ bool flag = false;
+ HLoopInformation* common_loop_info = nullptr;
+ for (uint32_t idx : orig_bb_set_.Indexes()) {
+ HBasicBlock* block = GetBlockById(idx);
+ HLoopInformation* block_loop_info = block->GetLoopInformation();
+ if (!flag) {
+ common_loop_info = block_loop_info;
+ } else {
+ if (block_loop_info != common_loop_info) {
+ return false;
+ }
+ }
+ }
+
+ // Check that orig_bb_set_ corresponds to loop peeling/unrolling.
+ if (common_loop_info == nullptr || !orig_bb_set_.SameBitsSet(&common_loop_info->GetBlocks())) {
+ return false;
+ }
+
+ bool peeling_or_unrolling = false;
+ HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+
+ // Check whether remapping info corresponds to loop unrolling.
+ CollectRemappingInfoForPeelUnroll(/* to_unroll*/ true,
+ common_loop_info,
+ &remap_orig_internal,
+ &remap_copy_internal,
+ &remap_incoming);
+
+ peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) &&
+ EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) &&
+ EdgeHashSetsEqual(&remap_incoming, remap_incoming_);
+
+ remap_orig_internal.Clear();
+ remap_copy_internal.Clear();
+ remap_incoming.Clear();
+
+ // Check whether remapping info corresponds to loop peeling.
+ CollectRemappingInfoForPeelUnroll(/* to_unroll*/ false,
+ common_loop_info,
+ &remap_orig_internal,
+ &remap_copy_internal,
+ &remap_incoming);
+
+ peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) &&
+ EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) &&
+ EdgeHashSetsEqual(&remap_incoming, remap_incoming_);
+
+ return peeling_or_unrolling;
+}
+
void SuperblockCloner::Run() {
DCHECK(bb_map_ != nullptr);
DCHECK(hir_map_ != nullptr);
@@ -609,6 +788,11 @@ void SuperblockCloner::Run() {
remap_copy_internal_ != nullptr &&
remap_incoming_ != nullptr);
DCHECK(IsSubgraphClonable());
+ DCHECK(IsFastCase());
+
+ if (kSuperblockClonerLogging) {
+ DumpInputSets();
+ }
// Find an area in the graph for which control flow information should be adjusted.
FindAndSetLocalAreaForAdjustments();
@@ -618,6 +802,19 @@ void SuperblockCloner::Run() {
// Connect the blocks together/remap successors and fix phis which are directly affected my the
// remapping.
RemapEdgesSuccessors();
+
+ // Check that the subgraph is connected.
+ if (kIsDebugBuild) {
+ HBasicBlockSet work_set(arena_, orig_bb_set_.GetSizeOf(), true, kArenaAllocSuperblockCloner);
+
+ // Add original and copy blocks of the subgraph to the work set.
+ for (auto iter : *bb_map_) {
+ work_set.SetBit(iter.first->GetBlockId()); // Original block.
+ work_set.SetBit(iter.second->GetBlockId()); // Copy block.
+ }
+ CHECK(IsSubgraphConnected(&work_set, graph_));
+ }
+
// Recalculate dominance and backedge information which is required by the next stage.
AdjustControlFlowInfo();
// Fix data flow of the graph.
@@ -650,6 +847,10 @@ void SuperblockCloner::CleanUp() {
}
}
}
+
+ if (kIsDebugBuild) {
+ VerifyGraph();
+ }
}
HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) {
@@ -701,4 +902,127 @@ void SuperblockCloner::CloneBasicBlocks() {
}
}
+//
+// Stand-alone methods.
+//
+
+void CollectRemappingInfoForPeelUnroll(bool to_unroll,
+ HLoopInformation* loop_info,
+ HEdgeSet* remap_orig_internal,
+ HEdgeSet* remap_copy_internal,
+ HEdgeSet* remap_incoming) {
+ DCHECK(loop_info != nullptr);
+ HBasicBlock* loop_header = loop_info->GetHeader();
+ // Set up remap_orig_internal edges set - set is empty.
+ // Set up remap_copy_internal edges set.
+ for (HBasicBlock* back_edge_block : loop_info->GetBackEdges()) {
+ HEdge e = HEdge(back_edge_block, loop_header);
+ if (to_unroll) {
+ remap_orig_internal->Insert(e);
+ remap_copy_internal->Insert(e);
+ } else {
+ remap_copy_internal->Insert(e);
+ }
+ }
+
+ // Set up remap_incoming edges set.
+ if (!to_unroll) {
+ remap_incoming->Insert(HEdge(loop_info->GetPreHeader(), loop_header));
+ }
+}
+
+bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph) {
+ ArenaVector<HBasicBlock*> entry_blocks(
+ graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ // Find subgraph entry blocks.
+ for (uint32_t orig_block_id : work_set->Indexes()) {
+ HBasicBlock* block = graph->GetBlocks()[orig_block_id];
+ for (HBasicBlock* pred : block->GetPredecessors()) {
+ if (!work_set->IsBitSet(pred->GetBlockId())) {
+ entry_blocks.push_back(block);
+ break;
+ }
+ }
+ }
+
+ for (HBasicBlock* entry_block : entry_blocks) {
+ if (work_set->IsBitSet(entry_block->GetBlockId())) {
+ TraverseSubgraphForConnectivity(entry_block, work_set);
+ }
+ }
+
+ // Return whether there are unvisited - unreachable - blocks.
+ return work_set->NumSetBits() == 0;
+}
+
+HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
+ if (loop1 == nullptr || loop2 == nullptr) {
+ return nullptr;
+ }
+
+ if (loop1->IsIn(*loop2)) {
+ return loop2;
+ }
+
+ HLoopInformation* current = loop1;
+ while (current != nullptr && !loop2->IsIn(*current)) {
+ current = current->GetPreHeader()->GetLoopInformation();
+ }
+
+ return current;
+}
+
+bool PeelUnrollHelper::IsLoopClonable(HLoopInformation* loop_info) {
+ PeelUnrollHelper helper(loop_info, nullptr, nullptr);
+ return helper.IsLoopClonable();
+}
+
+HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) {
+ // For now do peeling only for natural loops.
+ DCHECK(!loop_info_->IsIrreducible());
+
+ HBasicBlock* loop_header = loop_info_->GetHeader();
+ // Check that loop info is up-to-date.
+ DCHECK(loop_info_ == loop_header->GetLoopInformation());
+
+ HGraph* graph = loop_header->GetGraph();
+ ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool());
+
+ HEdgeSet remap_orig_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_copy_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_incoming(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ CollectRemappingInfoForPeelUnroll(to_unroll,
+ loop_info_,
+ &remap_orig_internal,
+ &remap_copy_internal,
+ &remap_incoming);
+
+ cloner_.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming);
+ cloner_.Run();
+ cloner_.CleanUp();
+
+ // Check that loop info is preserved.
+ DCHECK(loop_info_ == loop_header->GetLoopInformation());
+
+ return loop_header;
+}
+
+PeelUnrollSimpleHelper::PeelUnrollSimpleHelper(HLoopInformation* info)
+ : bb_map_(std::less<HBasicBlock*>(),
+ info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)),
+ hir_map_(std::less<HInstruction*>(),
+ info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)),
+ helper_(info, &bb_map_, &hir_map_) {}
+
} // namespace art
+
+namespace std {
+
+ostream& operator<<(ostream& os, const art::HEdge& e) {
+ e.Dump(os);
+ return os;
+}
+
+} // namespace std
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
index 23de6926735..e0931674cb3 100644
--- a/compiler/optimizing/superblock_cloner.h
+++ b/compiler/optimizing/superblock_cloner.h
@@ -25,7 +25,6 @@
namespace art {
static const bool kSuperblockClonerLogging = false;
-static const bool kSuperblockClonerVerify = false;
// Represents an edge between two HBasicBlocks.
//
@@ -152,6 +151,15 @@ class SuperblockCloner : public ValueObject {
// TODO: Start from small range of graph patterns then extend it.
bool IsSubgraphClonable() const;
+ // Returns whether selected subgraph satisfies the criteria for fast data flow resolution
+ // when iterative DF algorithm is not required and dominators/instructions inputs can be
+ // trivially adjusted.
+ //
+ // TODO: formally describe the criteria.
+ //
+ // Loop peeling and unrolling satisfy the criteria.
+ bool IsFastCase() const;
+
// Runs the copy algorithm according to the description.
void Run();
@@ -202,11 +210,17 @@ class SuperblockCloner : public ValueObject {
return IsInOrigBBSet(block->GetBlockId());
}
+ // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops,
+ // dominators) needs to be adjusted.
+ HLoopInformation* GetRegionToBeAdjusted() const {
+ return outer_loop_;
+ }
+
private:
// Fills the 'exits' vector with the subgraph exits.
void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
- // Finds and records information about the area in the graph for which control-flow (back edges,
+ // Finds and records information about the area in the graph for which control flow (back edges,
// loops, dominators) needs to be adjusted.
void FindAndSetLocalAreaForAdjustments();
@@ -217,7 +231,7 @@ class SuperblockCloner : public ValueObject {
// phis' nor instructions' inputs values are resolved.
void RemapEdgesSuccessors();
- // Adjusts control-flow (back edges, loops, dominators) for the local area defined by
+ // Adjusts control flow (back edges, loops, dominators) for the local area defined by
// FindAndSetLocalAreaForAdjustments.
void AdjustControlFlowInfo();
@@ -272,6 +286,9 @@ class SuperblockCloner : public ValueObject {
// Debug and logging methods.
//
void CheckInstructionInputsRemapping(HInstruction* orig_instr);
+ bool CheckRemappingInfoIsValid();
+ void VerifyGraph();
+ void DumpInputSets();
HBasicBlock* GetBlockById(uint32_t block_id) const {
DCHECK(block_id < graph_->GetBlocks().size());
@@ -295,15 +312,97 @@ class SuperblockCloner : public ValueObject {
HBasicBlockMap* bb_map_;
// Correspondence map for instructions: (original HInstruction, copy HInstruction).
HInstructionMap* hir_map_;
- // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted.
+ // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted.
HLoopInformation* outer_loop_;
HBasicBlockSet outer_loop_bb_set_;
ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
+ ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected);
DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
};
+// Helper class to perform loop peeling/unrolling.
+//
+// This helper should be used when correspondence map between original and copied
+// basic blocks/instructions are demanded.
+class PeelUnrollHelper : public ValueObject {
+ public:
+ explicit PeelUnrollHelper(HLoopInformation* info,
+ SuperblockCloner::HBasicBlockMap* bb_map,
+ SuperblockCloner::HInstructionMap* hir_map) :
+ loop_info_(info),
+ cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map) {
+ // For now do peeling/unrolling only for natural loops.
+ DCHECK(!info->IsIrreducible());
+ }
+
+ // Returns whether the loop can be peeled/unrolled (static function).
+ static bool IsLoopClonable(HLoopInformation* loop_info);
+
+ // Returns whether the loop can be peeled/unrolled.
+ bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); }
+
+ HBasicBlock* DoPeeling() { return DoPeelUnrollImpl(/* to_unroll */ false); }
+ HBasicBlock* DoUnrolling() { return DoPeelUnrollImpl(/* to_unroll */ true); }
+ HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); }
+
+ protected:
+ // Applies loop peeling/unrolling for the loop specified by 'loop_info'.
+ //
+ // Depending on 'do_unroll' either unrolls loop by 2 or peels one iteration from it.
+ HBasicBlock* DoPeelUnrollImpl(bool to_unroll);
+
+ private:
+ HLoopInformation* loop_info_;
+ SuperblockCloner cloner_;
+
+ DISALLOW_COPY_AND_ASSIGN(PeelUnrollHelper);
+};
+
+// Helper class to perform loop peeling/unrolling.
+//
+// This helper should be used when there is no need to get correspondence information between
+// original and copied basic blocks/instructions.
+class PeelUnrollSimpleHelper : public ValueObject {
+ public:
+ explicit PeelUnrollSimpleHelper(HLoopInformation* info);
+ bool IsLoopClonable() const { return helper_.IsLoopClonable(); }
+ HBasicBlock* DoPeeling() { return helper_.DoPeeling(); }
+ HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); }
+ HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); }
+
+ const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; }
+ const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; }
+
+ private:
+ SuperblockCloner::HBasicBlockMap bb_map_;
+ SuperblockCloner::HInstructionMap hir_map_;
+ PeelUnrollHelper helper_;
+
+ DISALLOW_COPY_AND_ASSIGN(PeelUnrollSimpleHelper);
+};
+
+// Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info.
+void CollectRemappingInfoForPeelUnroll(bool to_unroll,
+ HLoopInformation* loop_info,
+ SuperblockCloner::HEdgeSet* remap_orig_internal,
+ SuperblockCloner::HEdgeSet* remap_copy_internal,
+ SuperblockCloner::HEdgeSet* remap_incoming);
+
+// Returns whether blocks from 'work_set' are reachable from the rest of the graph.
+//
+// Returns whether such a set 'outer_entries' of basic blocks exists that:
+// - each block from 'outer_entries' is not from 'work_set'.
+// - each block from 'work_set' is reachable from at least one block from 'outer_entries'.
+//
+// After the function returns work_set contains only blocks from the original 'work_set'
+// which are unreachable from the rest of the graph.
+bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph);
+
+// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
+// graph.
+HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2);
} // namespace art
namespace std {
@@ -312,11 +411,12 @@ template <>
struct hash<art::HEdge> {
size_t operator()(art::HEdge const& x) const noexcept {
// Use Cantor pairing function as the hash function.
- uint32_t a = x.GetFrom();
- uint32_t b = x.GetTo();
+ size_t a = x.GetFrom();
+ size_t b = x.GetTo();
return (a + b) * (a + b + 1) / 2 + b;
}
};
+ostream& operator<<(ostream& os, const art::HEdge& e);
} // namespace std
diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc
index f1b7bffdf5f..df2e517afff 100644
--- a/compiler/optimizing/superblock_cloner_test.cc
+++ b/compiler/optimizing/superblock_cloner_test.cc
@@ -25,52 +25,65 @@ namespace art {
using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
using HInstructionMap = SuperblockCloner::HInstructionMap;
+using HBasicBlockSet = SuperblockCloner::HBasicBlockSet;
+using HEdgeSet = SuperblockCloner::HEdgeSet;
// This class provides methods and helpers for testing various cloning and copying routines:
// individual instruction cloning and cloning of the more coarse-grain structures.
class SuperblockClonerTest : public OptimizingUnitTest {
public:
- SuperblockClonerTest()
- : graph_(CreateGraph()), entry_block_(nullptr), exit_block_(nullptr), parameter_(nullptr) {}
+ SuperblockClonerTest() : graph_(CreateGraph()),
+ entry_block_(nullptr),
+ return_block_(nullptr),
+ exit_block_(nullptr),
+ parameter_(nullptr) {}
- void CreateBasicLoopControlFlow(/* out */ HBasicBlock** header_p,
- /* out */ HBasicBlock** body_p) {
+ void InitGraph() {
entry_block_ = new (GetAllocator()) HBasicBlock(graph_);
graph_->AddBlock(entry_block_);
graph_->SetEntryBlock(entry_block_);
+ return_block_ = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(return_block_);
+
+ exit_block_ = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(exit_block_);
+ graph_->SetExitBlock(exit_block_);
+
+ entry_block_->AddSuccessor(return_block_);
+ return_block_->AddSuccessor(exit_block_);
+
+ parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
+ dex::TypeIndex(0),
+ 0,
+ DataType::Type::kInt32);
+ entry_block_->AddInstruction(parameter_);
+ return_block_->AddInstruction(new (GetAllocator()) HReturnVoid());
+ exit_block_->AddInstruction(new (GetAllocator()) HExit());
+ }
+
+ void CreateBasicLoopControlFlow(HBasicBlock* position,
+ HBasicBlock* successor,
+ /* out */ HBasicBlock** header_p,
+ /* out */ HBasicBlock** body_p) {
HBasicBlock* loop_preheader = new (GetAllocator()) HBasicBlock(graph_);
HBasicBlock* loop_header = new (GetAllocator()) HBasicBlock(graph_);
HBasicBlock* loop_body = new (GetAllocator()) HBasicBlock(graph_);
- HBasicBlock* loop_exit = new (GetAllocator()) HBasicBlock(graph_);
graph_->AddBlock(loop_preheader);
graph_->AddBlock(loop_header);
graph_->AddBlock(loop_body);
- graph_->AddBlock(loop_exit);
- exit_block_ = new (GetAllocator()) HBasicBlock(graph_);
- graph_->AddBlock(exit_block_);
- graph_->SetExitBlock(exit_block_);
+ position->ReplaceSuccessor(successor, loop_preheader);
- entry_block_->AddSuccessor(loop_preheader);
loop_preheader->AddSuccessor(loop_header);
// Loop exit first to have a proper exit condition/target for HIf.
- loop_header->AddSuccessor(loop_exit);
+ loop_header->AddSuccessor(successor);
loop_header->AddSuccessor(loop_body);
loop_body->AddSuccessor(loop_header);
- loop_exit->AddSuccessor(exit_block_);
*header_p = loop_header;
*body_p = loop_body;
-
- parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
- dex::TypeIndex(0),
- 0,
- DataType::Type::kInt32);
- entry_block_->AddInstruction(parameter_);
- loop_exit->AddInstruction(new (GetAllocator()) HReturnVoid());
- exit_block_->AddInstruction(new (GetAllocator()) HExit());
}
void CreateBasicLoopDataFlow(HBasicBlock* loop_header, HBasicBlock* loop_body) {
@@ -84,11 +97,12 @@ class SuperblockClonerTest : public OptimizingUnitTest {
// Header block.
HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32);
HInstruction* suspend_check = new (GetAllocator()) HSuspendCheck();
+ HInstruction* loop_check = new (GetAllocator()) HGreaterThanOrEqual(phi, const_128);
loop_header->AddPhi(phi);
loop_header->AddInstruction(suspend_check);
- loop_header->AddInstruction(new (GetAllocator()) HGreaterThanOrEqual(phi, const_128));
- loop_header->AddInstruction(new (GetAllocator()) HIf(parameter_));
+ loop_header->AddInstruction(loop_check);
+ loop_header->AddInstruction(new (GetAllocator()) HIf(loop_check));
// Loop body block.
HInstruction* null_check = new (GetAllocator()) HNullCheck(parameter_, dex_pc);
@@ -97,8 +111,8 @@ class SuperblockClonerTest : public OptimizingUnitTest {
HInstruction* array_get =
new (GetAllocator()) HArrayGet(null_check, bounds_check, DataType::Type::kInt32, dex_pc);
HInstruction* add = new (GetAllocator()) HAdd(DataType::Type::kInt32, array_get, const_1);
- HInstruction* array_set =
- new (GetAllocator()) HArraySet(null_check, bounds_check, add, DataType::Type::kInt32, dex_pc);
+ HInstruction* array_set = new (GetAllocator()) HArraySet(
+ null_check, bounds_check, add, DataType::Type::kInt32, dex_pc);
HInstruction* induction_inc = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi, const_1);
loop_body->AddInstruction(null_check);
@@ -153,6 +167,7 @@ class SuperblockClonerTest : public OptimizingUnitTest {
HGraph* graph_;
HBasicBlock* entry_block_;
+ HBasicBlock* return_block_;
HBasicBlock* exit_block_;
HInstruction* parameter_;
@@ -162,10 +177,11 @@ TEST_F(SuperblockClonerTest, IndividualInstrCloner) {
HBasicBlock* header = nullptr;
HBasicBlock* loop_body = nullptr;
- CreateBasicLoopControlFlow(&header, &loop_body);
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
CreateBasicLoopDataFlow(header, loop_body);
graph_->BuildDominatorTree();
- ASSERT_TRUE(CheckGraph());
+ EXPECT_TRUE(CheckGraph());
HSuspendCheck* old_suspend_check = header->GetLoopInformation()->GetSuspendCheck();
CloneAndReplaceInstructionVisitor visitor(graph_);
@@ -193,7 +209,8 @@ TEST_F(SuperblockClonerTest, CloneBasicBlocks) {
HBasicBlock* loop_body = nullptr;
ArenaAllocator* arena = graph_->GetAllocator();
- CreateBasicLoopControlFlow(&header, &loop_body);
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
CreateBasicLoopDataFlow(header, loop_body);
graph_->BuildDominatorTree();
ASSERT_TRUE(CheckGraph());
@@ -272,7 +289,8 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) {
HBasicBlock* loop_body = nullptr;
ArenaAllocator* arena = graph_->GetAllocator();
- CreateBasicLoopControlFlow(&header, &loop_body);
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
CreateBasicLoopDataFlow(header, loop_body);
graph_->BuildDominatorTree();
ASSERT_TRUE(CheckGraph());
@@ -303,4 +321,487 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) {
EXPECT_TRUE(loop_info->IsBackEdge(*loop_body));
}
+// Tests IsSubgraphConnected function for negative case.
+TEST_F(SuperblockClonerTest, IsGraphConnected) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+ ArenaAllocator* arena = graph_->GetAllocator();
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* unreachable_block = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(unreachable_block);
+
+ HBasicBlockSet bb_set(
+ arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+ bb_set.SetBit(header->GetBlockId());
+ bb_set.SetBit(loop_body->GetBlockId());
+ bb_set.SetBit(unreachable_block->GetBlockId());
+
+ EXPECT_FALSE(IsSubgraphConnected(&bb_set, graph_));
+ EXPECT_EQ(bb_set.NumSetBits(), 1u);
+ EXPECT_TRUE(bb_set.IsBitSet(unreachable_block->GetBlockId()));
+}
+
+// Tests SuperblockCloner for loop peeling case.
+//
+// Control Flow of the example (ignoring critical edges splitting).
+//
+// Before After
+//
+// |B| |B|
+// | |
+// v v
+// |1| |1|
+// | |
+// v v
+// |2|<-\ (6) |2A|
+// / \ / / \
+// v v/ / v
+// |4| |3| / |3A| (7)
+// | / /
+// v | v
+// |E| \ |2|<-\
+// \ / \ /
+// v v /
+// |4| |3|
+// |
+// v
+// |E|
+TEST_F(SuperblockClonerTest, LoopPeeling) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HBasicBlockMap bb_map(
+ std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HInstructionMap hir_map(
+ std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+ PeelUnrollHelper helper(loop_info, &bb_map, &hir_map);
+ EXPECT_TRUE(helper.IsLoopClonable());
+ HBasicBlock* new_header = helper.DoPeeling();
+ HLoopInformation* new_loop_info = new_header->GetLoopInformation();
+
+ EXPECT_TRUE(CheckGraph());
+
+ // Check loop body successors.
+ EXPECT_EQ(loop_body->GetSingleSuccessor(), header);
+ EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header);
+
+ // Check loop structure.
+ EXPECT_EQ(header, new_header);
+ EXPECT_EQ(new_loop_info->GetHeader(), header);
+ EXPECT_EQ(new_loop_info->GetBackEdges().size(), 1u);
+ EXPECT_EQ(new_loop_info->GetBackEdges()[0], loop_body);
+}
+
+// Tests SuperblockCloner for loop unrolling case.
+//
+// Control Flow of the example (ignoring critical edges splitting).
+//
+// Before After
+//
+// |B| |B|
+// | |
+// v v
+// |1| |1|
+// | |
+// v v
+// |2|<-\ (6) |2A|<-\
+// / \ / / \ \
+// v v/ / v \
+// |4| |3| /(7)|3A| |
+// | / / /
+// v | v /
+// |E| \ |2| /
+// \ / \ /
+// v v/
+// |4| |3|
+// |
+// v
+// |E|
+TEST_F(SuperblockClonerTest, LoopUnrolling) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HBasicBlockMap bb_map(
+ std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HInstructionMap hir_map(
+ std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+ PeelUnrollHelper helper(loop_info, &bb_map, &hir_map);
+ EXPECT_TRUE(helper.IsLoopClonable());
+ HBasicBlock* new_header = helper.DoUnrolling();
+
+ EXPECT_TRUE(CheckGraph());
+
+ // Check loop body successors.
+ EXPECT_EQ(loop_body->GetSingleSuccessor(), bb_map.Get(header));
+ EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header);
+
+ // Check loop structure.
+ EXPECT_EQ(header, new_header);
+ EXPECT_EQ(loop_info, new_header->GetLoopInformation());
+ EXPECT_EQ(loop_info->GetHeader(), new_header);
+ EXPECT_EQ(loop_info->GetBackEdges().size(), 1u);
+ EXPECT_EQ(loop_info->GetBackEdges()[0], bb_map.Get(loop_body));
+}
+
+// Checks that loop unrolling works fine for a loop with multiple back edges. Tests that after
+// the transformation the loop has a single preheader.
+TEST_F(SuperblockClonerTest, LoopPeelingMultipleBackEdges) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+
+ // Transform a basic loop to have multiple back edges.
+ HBasicBlock* latch = header->GetSuccessors()[1];
+ HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_);
+ HBasicBlock* temp1 = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(if_block);
+ graph_->AddBlock(temp1);
+ header->ReplaceSuccessor(latch, if_block);
+ if_block->AddSuccessor(latch);
+ if_block->AddSuccessor(temp1);
+ temp1->AddSuccessor(header);
+
+ if_block->AddInstruction(new (GetAllocator()) HIf(parameter_));
+
+ HInstructionIterator it(header->GetPhis());
+ DCHECK(!it.Done());
+ HPhi* loop_phi = it.Current()->AsPhi();
+ HInstruction* temp_add = new (GetAllocator()) HAdd(DataType::Type::kInt32,
+ loop_phi,
+ graph_->GetIntConstant(2));
+ temp1->AddInstruction(temp_add);
+ temp1->AddInstruction(new (GetAllocator()) HGoto());
+ loop_phi->AddInput(temp_add);
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+ PeelUnrollSimpleHelper helper(loop_info);
+ HBasicBlock* new_header = helper.DoPeeling();
+ EXPECT_EQ(header, new_header);
+
+ EXPECT_TRUE(CheckGraph());
+ EXPECT_EQ(header->GetPredecessors().size(), 3u);
+}
+
+static void CheckLoopStructureForLoopPeelingNested(HBasicBlock* loop1_header,
+ HBasicBlock* loop2_header,
+ HBasicBlock* loop3_header) {
+ EXPECT_EQ(loop1_header->GetLoopInformation()->GetHeader(), loop1_header);
+ EXPECT_EQ(loop2_header->GetLoopInformation()->GetHeader(), loop2_header);
+ EXPECT_EQ(loop3_header->GetLoopInformation()->GetHeader(), loop3_header);
+ EXPECT_EQ(loop1_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr);
+ EXPECT_EQ(loop2_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr);
+ EXPECT_EQ(loop3_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation()->GetHeader(),
+ loop2_header);
+}
+
+TEST_F(SuperblockClonerTest, LoopPeelingNested) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+
+ // Create the following nested structure of loops
+ // Headers: 1 2 3
+ // [ ], [ [ ] ]
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop1_header = header;
+
+ CreateBasicLoopControlFlow(header, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop2_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop3_header = header;
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HLoopInformation* loop2_info_before = loop2_header->GetLoopInformation();
+ HLoopInformation* loop3_info_before = loop3_header->GetLoopInformation();
+
+ // Check nested loops structure.
+ CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header);
+ PeelUnrollSimpleHelper helper(loop1_header->GetLoopInformation());
+ helper.DoPeeling();
+ // Check that nested loops structure has not changed after the transformation.
+ CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header);
+
+ // Test that the loop info is preserved.
+ EXPECT_EQ(loop2_info_before, loop2_header->GetLoopInformation());
+ EXPECT_EQ(loop3_info_before, loop3_header->GetLoopInformation());
+
+ EXPECT_EQ(loop3_info_before->GetPreHeader()->GetLoopInformation(), loop2_info_before);
+ EXPECT_EQ(loop2_info_before->GetPreHeader()->GetLoopInformation(), nullptr);
+
+ EXPECT_EQ(helper.GetRegionToBeAdjusted(), nullptr);
+
+ EXPECT_TRUE(CheckGraph());
+}
+
+// Checks that the loop population is correctly propagated after an inner loop is peeled.
+TEST_F(SuperblockClonerTest, OuterLoopPopulationAfterInnerPeeled) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+
+ // Create the following nested structure of loops
+ // Headers: 1 2 3 4
+ // [ [ [ ] ] ], [ ]
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop1_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop2_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop3_header = header;
+
+ CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop4_header = header;
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation());
+ helper.DoPeeling();
+ HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+ HLoopInformation* loop2 = loop2_header->GetLoopInformation();
+ HLoopInformation* loop3 = loop3_header->GetLoopInformation();
+ HLoopInformation* loop4 = loop4_header->GetLoopInformation();
+
+ EXPECT_TRUE(loop1->Contains(*loop2_header));
+ EXPECT_TRUE(loop1->Contains(*loop3_header));
+ EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader()));
+
+ // Check that loop4 info has not been touched after local run of AnalyzeLoops.
+ EXPECT_EQ(loop4, loop4_header->GetLoopInformation());
+
+ EXPECT_TRUE(loop1->IsIn(*loop1));
+ EXPECT_TRUE(loop2->IsIn(*loop1));
+ EXPECT_TRUE(loop3->IsIn(*loop1));
+ EXPECT_TRUE(loop3->IsIn(*loop2));
+ EXPECT_TRUE(!loop4->IsIn(*loop1));
+
+ EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), nullptr);
+
+ EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop2);
+
+ EXPECT_TRUE(CheckGraph());
+}
+
+// Checks the case when inner loop have an exit not to its immediate outer_loop but some other loop
+// in the hierarchy. Loop population information must be valid after loop peeling.
+TEST_F(SuperblockClonerTest, NestedCaseExitToOutermost) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+
+ // Create the following nested structure of loops then peel loop3.
+ // Headers: 1 2 3
+ // [ [ [ ] ] ]
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop1_header = header;
+ HBasicBlock* loop_body1 = loop_body;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop3_header = header;
+ HBasicBlock* loop_body3 = loop_body;
+
+ // Change the loop3 - insert an exit which leads to loop1.
+ HBasicBlock* loop3_extra_if_block = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(loop3_extra_if_block);
+ loop3_extra_if_block->AddInstruction(new (GetAllocator()) HIf(parameter_));
+
+ loop3_header->ReplaceSuccessor(loop_body3, loop3_extra_if_block);
+ loop3_extra_if_block->AddSuccessor(loop_body1); // Long exit.
+ loop3_extra_if_block->AddSuccessor(loop_body3);
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HBasicBlock* loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0];
+ EXPECT_TRUE(loop1_header->GetLoopInformation()->Contains(*loop3_long_exit));
+
+ PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation());
+ helper.DoPeeling();
+
+ HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+ // Check that after the transformation the local area for CF adjustments has been chosen
+ // correctly and loop population has been updated.
+ loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0];
+ EXPECT_TRUE(loop1->Contains(*loop3_long_exit));
+
+ EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop1);
+
+ EXPECT_TRUE(loop1->Contains(*loop3_header));
+ EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader()));
+
+ EXPECT_TRUE(CheckGraph());
+}
+
+TEST_F(SuperblockClonerTest, FastCaseCheck) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+ ArenaAllocator* arena = graph_->GetAllocator();
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ graph_->BuildDominatorTree();
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+
+ ArenaBitVector orig_bb_set(
+ arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+ orig_bb_set.Union(&loop_info->GetBlocks());
+
+ HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ CollectRemappingInfoForPeelUnroll(true,
+ loop_info,
+ &remap_orig_internal,
+ &remap_copy_internal,
+ &remap_incoming);
+
+ // Insert some extra nodes and edges.
+ HBasicBlock* preheader = loop_info->GetPreHeader();
+ orig_bb_set.SetBit(preheader->GetBlockId());
+
+ // Adjust incoming edges.
+ remap_incoming.Clear();
+ remap_incoming.Insert(HEdge(preheader->GetSinglePredecessor(), preheader));
+
+ HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+ HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+
+ SuperblockCloner cloner(graph_,
+ &orig_bb_set,
+ &bb_map,
+ &hir_map);
+ cloner.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming);
+
+ EXPECT_FALSE(cloner.IsFastCase());
+}
+
+// Helper for FindCommonLoop which also check that FindCommonLoop is symmetric.
+static HLoopInformation* FindCommonLoopCheck(HLoopInformation* loop1, HLoopInformation* loop2) {
+ HLoopInformation* common_loop12 = FindCommonLoop(loop1, loop2);
+ HLoopInformation* common_loop21 = FindCommonLoop(loop2, loop1);
+ EXPECT_EQ(common_loop21, common_loop12);
+ return common_loop12;
+}
+
+// Tests FindCommonLoop function on a loop nest.
+TEST_F(SuperblockClonerTest, FindCommonLoop) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+
+ // Create the following nested structure of loops
+ // Headers: 1 2 3 4 5
+ // [ [ [ ] ], [ ] ], [ ]
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop1_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop2_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop3_header = header;
+
+ CreateBasicLoopControlFlow(loop2_header, loop2_header->GetSuccessors()[0], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop4_header = header;
+
+ CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop5_header = header;
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+ HLoopInformation* loop2 = loop2_header->GetLoopInformation();
+ HLoopInformation* loop3 = loop3_header->GetLoopInformation();
+ HLoopInformation* loop4 = loop4_header->GetLoopInformation();
+ HLoopInformation* loop5 = loop5_header->GetLoopInformation();
+
+ EXPECT_TRUE(loop1->IsIn(*loop1));
+ EXPECT_TRUE(loop2->IsIn(*loop1));
+ EXPECT_TRUE(loop3->IsIn(*loop1));
+ EXPECT_TRUE(loop3->IsIn(*loop2));
+ EXPECT_TRUE(loop4->IsIn(*loop1));
+
+ EXPECT_FALSE(loop5->IsIn(*loop1));
+ EXPECT_FALSE(loop4->IsIn(*loop2));
+ EXPECT_FALSE(loop4->IsIn(*loop3));
+
+ EXPECT_EQ(loop1->GetPreHeader()->GetLoopInformation(), nullptr);
+ EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), loop1);
+
+ EXPECT_EQ(FindCommonLoopCheck(nullptr, nullptr), nullptr);
+ EXPECT_EQ(FindCommonLoopCheck(loop2, nullptr), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop1), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop2), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop3), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop4), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop5), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop2, loop3), loop2);
+ EXPECT_EQ(FindCommonLoopCheck(loop2, loop4), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop2, loop5), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop3, loop4), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop3, loop5), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop4, loop5), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop5, loop5), loop5);
+}
+
} // namespace art