diff options
Diffstat (limited to 'compiler')
132 files changed, 7539 insertions, 10283 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index e42261c5561..cde64b058cb 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -39,7 +39,6 @@ art_cc_defaults { "linker/file_output_stream.cc", "linker/output_stream.cc", "linker/vector_output_stream.cc", - "linker/relative_patcher.cc", "jit/jit_compiler.cc", "jit/jit_logger.cc", "jni/quick/calling_convention.cc", @@ -70,6 +69,7 @@ art_cc_defaults { "optimizing/load_store_analysis.cc", "optimizing/load_store_elimination.cc", "optimizing/locations.cc", + "optimizing/loop_analysis.cc", "optimizing/loop_optimization.cc", "optimizing/nodes.cc", "optimizing/optimization.cc", @@ -101,8 +101,6 @@ art_cc_defaults { arm: { srcs: [ "jni/quick/arm/calling_convention_arm.cc", - "linker/arm/relative_patcher_arm_base.cc", - "linker/arm/relative_patcher_thumb2.cc", "optimizing/code_generator_arm_vixl.cc", "optimizing/code_generator_vector_arm_vixl.cc", "optimizing/instruction_simplifier_arm.cc", @@ -119,7 +117,6 @@ art_cc_defaults { arm64: { srcs: [ "jni/quick/arm64/calling_convention_arm64.cc", - "linker/arm64/relative_patcher_arm64.cc", "optimizing/code_generator_arm64.cc", "optimizing/code_generator_vector_arm64.cc", "optimizing/scheduler_arm64.cc", @@ -133,7 +130,6 @@ art_cc_defaults { mips: { srcs: [ "jni/quick/mips/calling_convention_mips.cc", - "linker/mips/relative_patcher_mips.cc", "optimizing/code_generator_mips.cc", "optimizing/code_generator_vector_mips.cc", "optimizing/instruction_simplifier_mips.cc", @@ -146,7 +142,6 @@ art_cc_defaults { mips64: { srcs: [ "jni/quick/mips64/calling_convention_mips64.cc", - "linker/mips64/relative_patcher_mips64.cc", "optimizing/code_generator_mips64.cc", "optimizing/code_generator_vector_mips64.cc", "optimizing/intrinsics_mips64.cc", @@ -157,8 +152,6 @@ art_cc_defaults { x86: { srcs: [ "jni/quick/x86/calling_convention_x86.cc", - "linker/x86/relative_patcher_x86.cc", - "linker/x86/relative_patcher_x86_base.cc", "optimizing/code_generator_x86.cc", "optimizing/code_generator_vector_x86.cc", "optimizing/intrinsics_x86.cc", @@ -172,7 +165,6 @@ art_cc_defaults { x86_64: { srcs: [ "jni/quick/x86_64/calling_convention_x86_64.cc", - "linker/x86_64/relative_patcher_x86_64.cc", "optimizing/intrinsics_x86_64.cc", "optimizing/code_generator_x86_64.cc", "optimizing/code_generator_vector_x86_64.cc", @@ -372,31 +364,25 @@ art_cc_test { codegen: { arm: { srcs: [ - "linker/arm/relative_patcher_thumb2_test.cc", "utils/arm/managed_register_arm_test.cc", ], }, arm64: { srcs: [ - "linker/arm64/relative_patcher_arm64_test.cc", "utils/arm64/managed_register_arm64_test.cc", ], }, mips: { srcs: [ - "linker/mips/relative_patcher_mips_test.cc", - "linker/mips/relative_patcher_mips32r6_test.cc", ], }, mips64: { srcs: [ - "linker/mips64/relative_patcher_mips64_test.cc", "utils/mips64/managed_register_mips64_test.cc", ], }, x86: { srcs: [ - "linker/x86/relative_patcher_x86_test.cc", "utils/x86/managed_register_x86_test.cc", // These tests are testing architecture-independent @@ -412,7 +398,8 @@ art_cc_test { }, x86_64: { srcs: [ - "linker/x86_64/relative_patcher_x86_64_test.cc", + // Is this test a bit-rotten copy of the x86 test? b/77951326 + // "utils/x86_64/managed_register_x86_64_test.cc", ], }, }, diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h index 29ff235cea7..581edaa773c 100644 --- a/compiler/cfi_test.h +++ b/compiler/cfi_test.h @@ -37,8 +37,8 @@ constexpr dwarf::CFIFormat kCFIFormat = dwarf::DW_DEBUG_FRAME_FORMAT; class CFITest : public dwarf::DwarfTest { public: void GenerateExpected(FILE* f, InstructionSet isa, const char* isa_str, - const std::vector<uint8_t>& actual_asm, - const std::vector<uint8_t>& actual_cfi) { + ArrayRef<const uint8_t> actual_asm, + ArrayRef<const uint8_t> actual_cfi) { std::vector<std::string> lines; // Print the raw bytes. fprintf(f, "static constexpr uint8_t expected_asm_%s[] = {", isa_str); @@ -50,11 +50,18 @@ class CFITest : public dwarf::DwarfTest { // Pretty-print CFI opcodes. constexpr bool is64bit = false; dwarf::DebugFrameOpCodeWriter<> initial_opcodes; - dwarf::WriteCIE(is64bit, dwarf::Reg(8), - initial_opcodes, kCFIFormat, &debug_frame_data_); + dwarf::WriteCIE(is64bit, dwarf::Reg(8), initial_opcodes, kCFIFormat, &debug_frame_data_); std::vector<uintptr_t> debug_frame_patches; - dwarf::WriteFDE(is64bit, 0, 0, 0, actual_asm.size(), ArrayRef<const uint8_t>(actual_cfi), - kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches); + dwarf::WriteFDE(is64bit, + /* section_address */ 0, + /* cie_address */ 0, + /* code_address */ 0, + actual_asm.size(), + actual_cfi, + kCFIFormat, + /* buffer_address */ 0, + &debug_frame_data_, + &debug_frame_patches); ReformatCfi(Objdump(false, "-W"), &lines); // Pretty-print assembly. const uint8_t* asm_base = actual_asm.data(); @@ -142,7 +149,7 @@ class CFITest : public dwarf::DwarfTest { } // Pretty-print byte array. 12 bytes per line. - static void HexDump(FILE* f, const std::vector<uint8_t>& data) { + static void HexDump(FILE* f, ArrayRef<const uint8_t> data) { for (size_t i = 0; i < data.size(); i++) { fprintf(f, i % 12 == 0 ? "\n " : " "); // Whitespace. fprintf(f, "0x%02X,", data[i]); diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index d3e3a51f7a5..96a0c1be4db 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -138,20 +138,6 @@ std::unordered_set<std::string>* CommonCompilerTest::GetImageClasses() { return new std::unordered_set<std::string>(); } -// Get the set of compiled classes given to the compiler-driver in SetUp. Note: the compiler -// driver assumes ownership of the set, so the test should properly release the set. -std::unordered_set<std::string>* CommonCompilerTest::GetCompiledClasses() { - // Null, no selection of compiled-classes. - return nullptr; -} - -// Get the set of compiled methods given to the compiler-driver in SetUp. Note: the compiler -// driver assumes ownership of the set, so the test should properly release the set. -std::unordered_set<std::string>* CommonCompilerTest::GetCompiledMethods() { - // Null, no selection of compiled-methods. - return nullptr; -} - // Get ProfileCompilationInfo that should be passed to the driver. ProfileCompilationInfo* CommonCompilerTest::GetProfileCompilationInfo() { // Null, profile information will not be taken into account. @@ -190,8 +176,6 @@ void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, isa, instruction_set_features_.get(), GetImageClasses(), - GetCompiledClasses(), - GetCompiledMethods(), number_of_threads, /* swap_fd */ -1, GetProfileCompilationInfo())); diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index 8af29d44f0c..39c8bd817bb 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -67,14 +67,6 @@ class CommonCompilerTest : public CommonRuntimeTest { // driver assumes ownership of the set, so the test should properly release the set. virtual std::unordered_set<std::string>* GetImageClasses(); - // Get the set of compiled classes given to the compiler-driver in SetUp. Note: the compiler - // driver assumes ownership of the set, so the test should properly release the set. - virtual std::unordered_set<std::string>* GetCompiledClasses(); - - // Get the set of compiled methods given to the compiler-driver in SetUp. Note: the compiler - // driver assumes ownership of the set, so the test should properly release the set. - virtual std::unordered_set<std::string>* GetCompiledMethods(); - virtual ProfileCompilationInfo* GetProfileCompilationInfo(); virtual CompilerFilter::Filter GetCompilerFilter() const { diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc index a26a985ff9b..aa8277edb4d 100644 --- a/compiler/driver/compiled_method_storage.cc +++ b/compiler/driver/compiled_method_storage.cc @@ -161,6 +161,46 @@ class CompiledMethodStorage::LengthPrefixedArrayAlloc { SwapSpace* const swap_space_; }; +class CompiledMethodStorage::ThunkMapKey { + public: + ThunkMapKey(linker::LinkerPatch::Type type, uint32_t custom_value1, uint32_t custom_value2) + : type_(type), custom_value1_(custom_value1), custom_value2_(custom_value2) {} + + bool operator<(const ThunkMapKey& other) const { + if (custom_value1_ != other.custom_value1_) { + return custom_value1_ < other.custom_value1_; + } + if (custom_value2_ != other.custom_value2_) { + return custom_value2_ < other.custom_value2_; + } + return type_ < other.type_; + } + + private: + linker::LinkerPatch::Type type_; + uint32_t custom_value1_; + uint32_t custom_value2_; +}; + +class CompiledMethodStorage::ThunkMapValue { + public: + ThunkMapValue(std::vector<uint8_t, SwapAllocator<uint8_t>>&& code, + const std::string& debug_name) + : code_(std::move(code)), debug_name_(debug_name) {} + + ArrayRef<const uint8_t> GetCode() const { + return ArrayRef<const uint8_t>(code_); + } + + const std::string& GetDebugName() const { + return debug_name_; + } + + private: + std::vector<uint8_t, SwapAllocator<uint8_t>> code_; + std::string debug_name_; +}; + CompiledMethodStorage::CompiledMethodStorage(int swap_fd) : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)), dedupe_enabled_(true), @@ -171,7 +211,9 @@ CompiledMethodStorage::CompiledMethodStorage(int swap_fd) LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())), dedupe_cfi_info_("dedupe cfi info", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())), dedupe_linker_patches_("dedupe cfi info", - LengthPrefixedArrayAlloc<linker::LinkerPatch>(swap_space_.get())) { + LengthPrefixedArrayAlloc<linker::LinkerPatch>(swap_space_.get())), + thunk_map_lock_("thunk_map_lock"), + thunk_map_(std::less<ThunkMapKey>(), SwapAllocator<ThunkMapValueType>(swap_space_.get())) { } CompiledMethodStorage::~CompiledMethodStorage() { @@ -237,4 +279,55 @@ void CompiledMethodStorage::ReleaseLinkerPatches( ReleaseArrayIfNotDeduplicated(linker_patches); } +CompiledMethodStorage::ThunkMapKey CompiledMethodStorage::GetThunkMapKey( + const linker::LinkerPatch& linker_patch) { + uint32_t custom_value1 = 0u; + uint32_t custom_value2 = 0u; + switch (linker_patch.GetType()) { + case linker::LinkerPatch::Type::kBakerReadBarrierBranch: + custom_value1 = linker_patch.GetBakerCustomValue1(); + custom_value2 = linker_patch.GetBakerCustomValue2(); + break; + case linker::LinkerPatch::Type::kCallRelative: + // No custom values. + break; + default: + LOG(FATAL) << "Unexpected patch type: " << linker_patch.GetType(); + UNREACHABLE(); + } + return ThunkMapKey(linker_patch.GetType(), custom_value1, custom_value2); +} + +ArrayRef<const uint8_t> CompiledMethodStorage::GetThunkCode(const linker::LinkerPatch& linker_patch, + /*out*/ std::string* debug_name) { + ThunkMapKey key = GetThunkMapKey(linker_patch); + MutexLock lock(Thread::Current(), thunk_map_lock_); + auto it = thunk_map_.find(key); + if (it != thunk_map_.end()) { + const ThunkMapValue& value = it->second; + if (debug_name != nullptr) { + *debug_name = value.GetDebugName(); + } + return value.GetCode(); + } else { + if (debug_name != nullptr) { + *debug_name = std::string(); + } + return ArrayRef<const uint8_t>(); + } +} + +void CompiledMethodStorage::SetThunkCode(const linker::LinkerPatch& linker_patch, + ArrayRef<const uint8_t> code, + const std::string& debug_name) { + DCHECK(!code.empty()); + ThunkMapKey key = GetThunkMapKey(linker_patch); + std::vector<uint8_t, SwapAllocator<uint8_t>> code_copy( + code.begin(), code.end(), SwapAllocator<uint8_t>(swap_space_.get())); + ThunkMapValue value(std::move(code_copy), debug_name); + MutexLock lock(Thread::Current(), thunk_map_lock_); + // Note: Multiple threads can try and compile the same thunk, so this may not create a new entry. + thunk_map_.emplace(key, std::move(value)); +} + } // namespace art diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h index 249f06c20f3..1634facb7ca 100644 --- a/compiler/driver/compiled_method_storage.h +++ b/compiler/driver/compiled_method_storage.h @@ -18,6 +18,7 @@ #define ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_ #include <iosfwd> +#include <map> #include <memory> #include "base/array_ref.h" @@ -67,7 +68,29 @@ class CompiledMethodStorage { const ArrayRef<const linker::LinkerPatch>& linker_patches); void ReleaseLinkerPatches(const LengthPrefixedArray<linker::LinkerPatch>* linker_patches); + // Returns the code associated with the given patch. + // If the code has not been set, returns empty data. + // If `debug_name` is not null, stores the associated debug name in `*debug_name`. + ArrayRef<const uint8_t> GetThunkCode(const linker::LinkerPatch& linker_patch, + /*out*/ std::string* debug_name = nullptr); + + // Sets the code and debug name associated with the given patch. + void SetThunkCode(const linker::LinkerPatch& linker_patch, + ArrayRef<const uint8_t> code, + const std::string& debug_name); + private: + class ThunkMapKey; + class ThunkMapValue; + using ThunkMapValueType = std::pair<const ThunkMapKey, ThunkMapValue>; + using ThunkMap = std::map<ThunkMapKey, + ThunkMapValue, + std::less<ThunkMapKey>, + SwapAllocator<ThunkMapValueType>>; + static_assert(std::is_same<ThunkMapValueType, ThunkMap::value_type>::value, "Value type check."); + + static ThunkMapKey GetThunkMapKey(const linker::LinkerPatch& linker_patch); + template <typename T, typename DedupeSetType> const LengthPrefixedArray<T>* AllocateOrDeduplicateArray(const ArrayRef<const T>& data, DedupeSetType* dedupe_set); @@ -102,6 +125,9 @@ class CompiledMethodStorage { ArrayDedupeSet<uint8_t> dedupe_cfi_info_; ArrayDedupeSet<linker::LinkerPatch> dedupe_linker_patches_; + Mutex thunk_map_lock_; + ThunkMap thunk_map_ GUARDED_BY(thunk_map_lock_); + DISALLOW_COPY_AND_ASSIGN(CompiledMethodStorage); }; diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc index 0769561d0ed..42fbba5109e 100644 --- a/compiler/driver/compiled_method_storage_test.cc +++ b/compiler/driver/compiled_method_storage_test.cc @@ -34,8 +34,6 @@ TEST(CompiledMethodStorage, Deduplicate) { /* instruction_set_ */ InstructionSet::kNone, /* instruction_set_features */ nullptr, /* image_classes */ nullptr, - /* compiled_classes */ nullptr, - /* compiled_methods */ nullptr, /* thread_count */ 1u, /* swap_fd */ -1, /* profile_compilation_info */ nullptr); diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 53604761d12..41b7e7be47f 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -264,8 +264,6 @@ CompilerDriver::CompilerDriver( InstructionSet instruction_set, const InstructionSetFeatures* instruction_set_features, std::unordered_set<std::string>* image_classes, - std::unordered_set<std::string>* compiled_classes, - std::unordered_set<std::string>* compiled_methods, size_t thread_count, int swap_fd, const ProfileCompilationInfo* profile_compilation_info) @@ -279,8 +277,6 @@ CompilerDriver::CompilerDriver( requires_constructor_barrier_lock_("constructor barrier lock"), non_relative_linker_patch_count_(0u), image_classes_(image_classes), - classes_to_compile_(compiled_classes), - methods_to_compile_(compiled_methods), number_of_soft_verifier_failures_(0), had_hard_verifier_failure_(false), parallel_thread_count_(thread_count), @@ -638,7 +634,6 @@ static void CompileMethodQuick( (verified_method->GetEncounteredVerificationFailures() & (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 && // Is eligable for compilation by methods-to-compile filter. - driver->IsMethodToCompile(method_ref) && driver->ShouldCompileBasedOnProfile(method_ref); if (compile) { @@ -781,7 +776,8 @@ void CompilerDriver::Resolve(jobject class_loader, // TODO: Collect the relevant string indices in parallel, then allocate them sequentially in a // stable order. -static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache, +static void ResolveConstStrings(ClassLinker* class_linker, + Handle<mirror::DexCache> dex_cache, const DexFile& dex_file, const DexFile::CodeItem* code_item) REQUIRES_SHARED(Locks::mutator_lock_) { @@ -790,7 +786,6 @@ static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache, return; } - ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); for (const DexInstructionPcPair& inst : CodeItemInstructionAccessor(dex_file, code_item)) { switch (inst->Opcode()) { case Instruction::CONST_STRING: @@ -838,22 +833,105 @@ static void ResolveConstStrings(CompilerDriver* driver, dex_file->StringByTypeIdx(class_def.class_idx_)); if (!compilation_enabled) { // Compilation is skipped, do not resolve const-string in code of this class. - // TODO: Make sure that inlining honors this. + // FIXME: Make sure that inlining honors this. b/26687569 continue; } // Direct and virtual methods. - int64_t previous_method_idx = -1; while (it.HasNextMethod()) { - uint32_t method_idx = it.GetMemberIndex(); - if (method_idx == previous_method_idx) { - // smali can create dex files with two encoded_methods sharing the same method_idx - // http://code.google.com/p/smali/issues/detail?id=119 - it.Next(); - continue; + ResolveConstStrings(class_linker, dex_cache, *dex_file, it.GetMethodCodeItem()); + it.Next(); + } + DCHECK(!it.HasNext()); + } + } +} + +// Initialize type check bit strings for check-cast and instance-of in the code. Done to have +// deterministic allocation behavior. Right now this is single-threaded for simplicity. +// TODO: Collect the relevant type indices in parallel, then process them sequentially in a +// stable order. + +static void InitializeTypeCheckBitstrings(CompilerDriver* driver, + ClassLinker* class_linker, + Handle<mirror::DexCache> dex_cache, + const DexFile& dex_file, + const DexFile::CodeItem* code_item) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (code_item == nullptr) { + // Abstract or native method. + return; + } + + for (const DexInstructionPcPair& inst : CodeItemInstructionAccessor(dex_file, code_item)) { + switch (inst->Opcode()) { + case Instruction::CHECK_CAST: + case Instruction::INSTANCE_OF: { + dex::TypeIndex type_index( + (inst->Opcode() == Instruction::CHECK_CAST) ? inst->VRegB_21c() : inst->VRegC_22c()); + const char* descriptor = dex_file.StringByTypeIdx(type_index); + // We currently do not use the bitstring type check for array or final (including + // primitive) classes. We may reconsider this in future if it's deemed to be beneficial. + // And we cannot use it for classes outside the boot image as we do not know the runtime + // value of their bitstring when compiling (it may not even get assigned at runtime). + if (descriptor[0] == 'L' && driver->IsImageClass(descriptor)) { + ObjPtr<mirror::Class> klass = + class_linker->LookupResolvedType(type_index, + dex_cache.Get(), + /* class_loader */ nullptr); + CHECK(klass != nullptr) << descriptor << " should have been previously resolved."; + // Now assign the bitstring if the class is not final. Keep this in sync with sharpening. + if (!klass->IsFinal()) { + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass); + } } - previous_method_idx = method_idx; - ResolveConstStrings(dex_cache, *dex_file, it.GetMethodCodeItem()); + break; + } + + default: + break; + } + } +} + +static void InitializeTypeCheckBitstrings(CompilerDriver* driver, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); + MutableHandle<mirror::DexCache> dex_cache(hs.NewHandle<mirror::DexCache>(nullptr)); + + for (const DexFile* dex_file : dex_files) { + dex_cache.Assign(class_linker->FindDexCache(soa.Self(), *dex_file)); + TimingLogger::ScopedTiming t("Initialize type check bitstrings", timings); + + size_t class_def_count = dex_file->NumClassDefs(); + for (size_t class_def_index = 0; class_def_index < class_def_count; ++class_def_index) { + const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index); + + const uint8_t* class_data = dex_file->GetClassData(class_def); + if (class_data == nullptr) { + // empty class, probably a marker interface + continue; + } + + ClassDataItemIterator it(*dex_file, class_data); + it.SkipAllFields(); + + bool compilation_enabled = driver->IsClassToCompile( + dex_file->StringByTypeIdx(class_def.class_idx_)); + if (!compilation_enabled) { + // Compilation is skipped, do not look for type checks in code of this class. + // FIXME: Make sure that inlining honors this. b/26687569 + continue; + } + + // Direct and virtual methods. + while (it.HasNextMethod()) { + InitializeTypeCheckBitstrings( + driver, class_linker, dex_cache, *dex_file, it.GetMethodCodeItem()); it.Next(); } DCHECK(!it.HasNext()); @@ -955,6 +1033,14 @@ void CompilerDriver::PreCompile(jobject class_loader, UpdateImageClasses(timings); VLOG(compiler) << "UpdateImageClasses: " << GetMemoryUsageString(false); + + if (kBitstringSubtypeCheckEnabled && + GetCompilerOptions().IsForceDeterminism() && GetCompilerOptions().IsBootImage()) { + // Initialize type check bit string used by check-cast and instanceof. + // Do this now to have a deterministic image. + // Note: This is done after UpdateImageClasses() at it relies on the image classes to be final. + InitializeTypeCheckBitstrings(this, dex_files, timings); + } } bool CompilerDriver::IsImageClass(const char* descriptor) const { @@ -974,15 +1060,6 @@ bool CompilerDriver::IsClassToCompile(const char* descriptor) const { return classes_to_compile_->find(descriptor) != classes_to_compile_->end(); } -bool CompilerDriver::IsMethodToCompile(const MethodReference& method_ref) const { - if (methods_to_compile_ == nullptr) { - return true; - } - - std::string tmp = method_ref.PrettyMethod(); - return methods_to_compile_->find(tmp.c_str()) != methods_to_compile_->end(); -} - bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_ref) const { // Profile compilation info may be null if no profile is passed. if (!CompilerFilter::DependsOnProfile(compiler_options_->GetCompilerFilter())) { @@ -1555,7 +1632,7 @@ class ParallelCompilationManager { self->AssertNoPendingException(); CHECK_GT(work_units, 0U); - index_.StoreRelaxed(begin); + index_.store(begin, std::memory_order_relaxed); for (size_t i = 0; i < work_units; ++i) { thread_pool_->AddTask(self, new ForAllClosureLambda<Fn>(this, end, fn)); } @@ -1573,7 +1650,7 @@ class ParallelCompilationManager { } size_t NextIndex() { - return index_.FetchAndAddSequentiallyConsistent(1); + return index_.fetch_add(1, std::memory_order_seq_cst); } private: @@ -2838,7 +2915,8 @@ void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref, /*expected*/ nullptr, compiled_method); CHECK(result == MethodTable::kInsertResultSuccess); - non_relative_linker_patch_count_.FetchAndAddRelaxed(non_relative_linker_patch_count); + non_relative_linker_patch_count_.fetch_add(non_relative_linker_patch_count, + std::memory_order_relaxed); DCHECK(GetCompiledMethod(method_ref) != nullptr) << method_ref.PrettyMethod(); } @@ -2949,7 +3027,7 @@ bool CompilerDriver::IsMethodVerifiedWithoutFailures(uint32_t method_idx, } size_t CompilerDriver::GetNonRelativeLinkerPatchCount() const { - return non_relative_linker_patch_count_.LoadRelaxed(); + return non_relative_linker_patch_count_.load(std::memory_order_relaxed); } void CompilerDriver::SetRequiresConstructorBarrier(Thread* self, diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index a5462eefe2a..55f3561e3a8 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -100,8 +100,6 @@ class CompilerDriver { InstructionSet instruction_set, const InstructionSetFeatures* instruction_set_features, std::unordered_set<std::string>* image_classes, - std::unordered_set<std::string>* compiled_classes, - std::unordered_set<std::string>* compiled_methods, size_t thread_count, int swap_fd, const ProfileCompilationInfo* profile_compilation_info); @@ -316,9 +314,6 @@ class CompilerDriver { // Checks whether the provided class should be compiled, i.e., is in classes_to_compile_. bool IsClassToCompile(const char* descriptor) const; - // Checks whether the provided method should be compiled, i.e., is in method_to_compile_. - bool IsMethodToCompile(const MethodReference& method_ref) const; - // Checks whether profile guided compilation is enabled and if the method should be compiled // according to the profile file. bool ShouldCompileBasedOnProfile(const MethodReference& method_ref) const; @@ -505,12 +500,8 @@ class CompilerDriver { // This option may be restricted to the boot image, depending on a flag in the implementation. std::unique_ptr<std::unordered_set<std::string>> classes_to_compile_; - // Specifies the methods that will be compiled. Note that if methods_to_compile_ is null, - // all methods are eligible for compilation (compilation filters etc. will still apply). - // This option may be restricted to the boot image, depending on a flag in the implementation. - std::unique_ptr<std::unordered_set<std::string>> methods_to_compile_; - std::atomic<uint32_t> number_of_soft_verifier_failures_; + bool had_hard_verifier_failure_; // A thread pool that can (potentially) run tasks in parallel. diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 162904c0e73..1332280d20c 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -184,59 +184,6 @@ TEST_F(CompilerDriverTest, AbstractMethodErrorStub) { } } -class CompilerDriverMethodsTest : public CompilerDriverTest { - protected: - std::unordered_set<std::string>* GetCompiledMethods() OVERRIDE { - return new std::unordered_set<std::string>({ - "byte StaticLeafMethods.identity(byte)", - "int StaticLeafMethods.sum(int, int, int)", - "double StaticLeafMethods.sum(double, double, double, double)" - }); - } -}; - -TEST_F(CompilerDriverMethodsTest, Selection) { - Thread* self = Thread::Current(); - jobject class_loader; - { - ScopedObjectAccess soa(self); - class_loader = LoadDex("StaticLeafMethods"); - } - ASSERT_NE(class_loader, nullptr); - - // Need to enable dex-file writability. Methods rejected to be compiled will run through the - // dex-to-dex compiler. - for (const DexFile* dex_file : GetDexFiles(class_loader)) { - ASSERT_TRUE(dex_file->EnableWrite()); - } - - CompileAll(class_loader); - - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ScopedObjectAccess soa(self); - StackHandleScope<1> hs(self); - Handle<mirror::ClassLoader> h_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader))); - mirror::Class* klass = class_linker->FindClass(self, "LStaticLeafMethods;", h_loader); - ASSERT_NE(klass, nullptr); - - std::unique_ptr<std::unordered_set<std::string>> expected(GetCompiledMethods()); - - const auto pointer_size = class_linker->GetImagePointerSize(); - for (auto& m : klass->GetDirectMethods(pointer_size)) { - std::string name = m.PrettyMethod(true); - const void* code = m.GetEntryPointFromQuickCompiledCodePtrSize(pointer_size); - ASSERT_NE(code, nullptr); - if (expected->find(name) != expected->end()) { - expected->erase(name); - EXPECT_FALSE(class_linker->IsQuickToInterpreterBridge(code)); - } else { - EXPECT_TRUE(class_linker->IsQuickToInterpreterBridge(code)); - } - } - EXPECT_TRUE(expected->empty()); -} - class CompilerDriverProfileTest : public CompilerDriverTest { protected: ProfileCompilationInfo* GetProfileCompilationInfo() OVERRIDE { diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc index f582341b180..c139fcf1d8a 100644 --- a/compiler/exception_test.cc +++ b/compiler/exception_test.cc @@ -20,6 +20,7 @@ #include "base/callee_save_type.h" #include "base/enums.h" #include "base/leb128.h" +#include "base/malloc_arena_pool.h" #include "class_linker.h" #include "common_runtime_test.h" #include "dex/code_item_accessors-inl.h" @@ -67,7 +68,7 @@ class ExceptionTest : public CommonRuntimeTest { fake_code_.push_back(0x70 | i); } - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stack_maps(&allocator, kRuntimeISA); diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index ac5c6fb01f8..0de00a82fa4 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -142,8 +142,6 @@ JitCompiler::JitCompiler() { instruction_set, instruction_set_features_.get(), /* image_classes */ nullptr, - /* compiled_classes */ nullptr, - /* compiled_methods */ nullptr, /* thread_count */ 1, /* swap_fd */ -1, /* profile_compilation_info */ nullptr)); diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc index 236b5c0c2e3..920a3a8da63 100644 --- a/compiler/jni/jni_cfi_test.cc +++ b/compiler/jni/jni_cfi_test.cc @@ -20,6 +20,7 @@ #include "arch/instruction_set.h" #include "base/arena_allocator.h" #include "base/enums.h" +#include "base/malloc_arena_pool.h" #include "cfi_test.h" #include "gtest/gtest.h" #include "jni/quick/calling_convention.h" @@ -61,7 +62,7 @@ class JNICFITest : public CFITest { const bool is_synchronized = false; const char* shorty = "IIFII"; - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); std::unique_ptr<JniCallingConvention> jni_conv( @@ -94,7 +95,11 @@ class JNICFITest : public CFITest { const std::vector<uint8_t>& actual_cfi = *(jni_asm->cfi().data()); if (kGenerateExpected) { - GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi); + GenerateExpected(stdout, + isa, + isa_str, + ArrayRef<const uint8_t>(actual_asm), + ArrayRef<const uint8_t>(actual_cfi)); } else { EXPECT_EQ(expected_asm, actual_asm); EXPECT_EQ(expected_cfi, actual_cfi); diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index 451a9099651..730a1a63e8e 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -21,6 +21,7 @@ #include "art_method-inl.h" #include "base/bit_utils.h" +#include "base/mem_map.h" #include "class_linker.h" #include "common_compiler_test.h" #include "compiler.h" @@ -29,7 +30,6 @@ #include "indirect_reference_table.h" #include "java_vm_ext.h" #include "jni_internal.h" -#include "mem_map.h" #include "mirror/class-inl.h" #include "mirror/class_loader.h" #include "mirror/object-inl.h" diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index d001cfe4fc5..8cb1998f7f6 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -27,6 +27,8 @@ #include "base/enums.h" #include "base/logging.h" // For VLOG. #include "base/macros.h" +#include "base/malloc_arena_pool.h" +#include "base/memory_region.h" #include "base/utils.h" #include "calling_convention.h" #include "class_linker.h" @@ -36,7 +38,6 @@ #include "driver/compiler_options.h" #include "entrypoints/quick/quick_entrypoints.h" #include "jni_env_ext.h" -#include "memory_region.h" #include "thread.h" #include "utils/arm/managed_register_arm.h" #include "utils/arm64/managed_register_arm64.h" @@ -174,7 +175,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(CompilerDriver* driver, } } - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); // Calling conventions used to iterate over parameters to method diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc deleted file mode 100644 index 6e0286afac1..00000000000 --- a/compiler/linker/arm/relative_patcher_arm_base.cc +++ /dev/null @@ -1,522 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/arm/relative_patcher_arm_base.h" - -#include "base/stl_util.h" -#include "compiled_method-inl.h" -#include "debug/method_debug_info.h" -#include "dex/dex_file_types.h" -#include "linker/linker_patch.h" -#include "linker/output_stream.h" -#include "oat.h" -#include "oat_quick_method_header.h" - -namespace art { -namespace linker { - -class ArmBaseRelativePatcher::ThunkData { - public: - ThunkData(std::vector<uint8_t> code, uint32_t max_next_offset) - : code_(std::move(code)), - offsets_(), - max_next_offset_(max_next_offset), - pending_offset_(0u) { - DCHECK(NeedsNextThunk()); // The data is constructed only when we expect to need the thunk. - } - - ThunkData(ThunkData&& src) = default; - - size_t CodeSize() const { - return code_.size(); - } - - ArrayRef<const uint8_t> GetCode() const { - return ArrayRef<const uint8_t>(code_); - } - - bool NeedsNextThunk() const { - return max_next_offset_ != 0u; - } - - uint32_t MaxNextOffset() const { - DCHECK(NeedsNextThunk()); - return max_next_offset_; - } - - void ClearMaxNextOffset() { - DCHECK(NeedsNextThunk()); - max_next_offset_ = 0u; - } - - void SetMaxNextOffset(uint32_t max_next_offset) { - DCHECK(!NeedsNextThunk()); - max_next_offset_ = max_next_offset; - } - - // Adjust the MaxNextOffset() down if needed to fit the code before the next thunk. - // Returns true if it was adjusted, false if the old value was kept. - bool MakeSpaceBefore(const ThunkData& next_thunk, size_t alignment) { - DCHECK(NeedsNextThunk()); - DCHECK(next_thunk.NeedsNextThunk()); - DCHECK_ALIGNED_PARAM(MaxNextOffset(), alignment); - DCHECK_ALIGNED_PARAM(next_thunk.MaxNextOffset(), alignment); - if (next_thunk.MaxNextOffset() - CodeSize() < MaxNextOffset()) { - max_next_offset_ = RoundDown(next_thunk.MaxNextOffset() - CodeSize(), alignment); - return true; - } else { - return false; - } - } - - uint32_t ReserveOffset(size_t offset) { - DCHECK(NeedsNextThunk()); - DCHECK_LE(offset, max_next_offset_); - max_next_offset_ = 0u; // The reserved offset should satisfy all pending references. - offsets_.push_back(offset); - return offset + CodeSize(); - } - - bool HasReservedOffset() const { - return !offsets_.empty(); - } - - uint32_t LastReservedOffset() const { - DCHECK(HasReservedOffset()); - return offsets_.back(); - } - - bool HasPendingOffset() const { - return pending_offset_ != offsets_.size(); - } - - uint32_t GetPendingOffset() const { - DCHECK(HasPendingOffset()); - return offsets_[pending_offset_]; - } - - void MarkPendingOffsetAsWritten() { - DCHECK(HasPendingOffset()); - ++pending_offset_; - } - - bool HasWrittenOffset() const { - return pending_offset_ != 0u; - } - - uint32_t LastWrittenOffset() const { - DCHECK(HasWrittenOffset()); - return offsets_[pending_offset_ - 1u]; - } - - size_t IndexOfFirstThunkAtOrAfter(uint32_t offset) const { - size_t number_of_thunks = NumberOfThunks(); - for (size_t i = 0; i != number_of_thunks; ++i) { - if (GetThunkOffset(i) >= offset) { - return i; - } - } - return number_of_thunks; - } - - size_t NumberOfThunks() const { - return offsets_.size(); - } - - uint32_t GetThunkOffset(size_t index) const { - DCHECK_LT(index, NumberOfThunks()); - return offsets_[index]; - } - - private: - std::vector<uint8_t> code_; // The code of the thunk. - std::vector<uint32_t> offsets_; // Offsets at which the thunk needs to be written. - uint32_t max_next_offset_; // The maximum offset at which the next thunk can be placed. - uint32_t pending_offset_; // The index of the next offset to write. -}; - -class ArmBaseRelativePatcher::PendingThunkComparator { - public: - bool operator()(const ThunkData* lhs, const ThunkData* rhs) const { - DCHECK(lhs->HasPendingOffset()); - DCHECK(rhs->HasPendingOffset()); - // The top of the heap is defined to contain the highest element and we want to pick - // the thunk with the smallest pending offset, so use the reverse ordering, i.e. ">". - return lhs->GetPendingOffset() > rhs->GetPendingOffset(); - } -}; - -uint32_t ArmBaseRelativePatcher::ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) { - return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u); -} - -uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) { - // For multi-oat compilations (boot image), ReserveSpaceEnd() is called for each oat file. - // Since we do not know here whether this is the last file or whether the next opportunity - // to place thunk will be soon enough, we need to reserve all needed thunks now. Code for - // subsequent oat files can still call back to them. - if (!unprocessed_method_call_patches_.empty()) { - ResolveMethodCalls(offset, MethodReference(nullptr, dex::kDexNoIndex)); - } - for (ThunkData* data : unreserved_thunks_) { - uint32_t thunk_offset = CompiledCode::AlignCode(offset, instruction_set_); - offset = data->ReserveOffset(thunk_offset); - } - unreserved_thunks_.clear(); - // We also need to delay initiating the pending_thunks_ until the call to WriteThunks(). - // Check that the `pending_thunks_.capacity()` indicates that no WriteThunks() has taken place. - DCHECK_EQ(pending_thunks_.capacity(), 0u); - return offset; -} - -uint32_t ArmBaseRelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) { - if (pending_thunks_.capacity() == 0u) { - if (thunks_.empty()) { - return offset; - } - // First call to WriteThunks(), prepare the thunks for writing. - pending_thunks_.reserve(thunks_.size()); - for (auto& entry : thunks_) { - ThunkData* data = &entry.second; - if (data->HasPendingOffset()) { - pending_thunks_.push_back(data); - } - } - std::make_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator()); - } - uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); - while (!pending_thunks_.empty() && - pending_thunks_.front()->GetPendingOffset() == aligned_offset) { - // Write alignment bytes and code. - uint32_t aligned_code_delta = aligned_offset - offset; - if (aligned_code_delta != 0u && UNLIKELY(!WriteCodeAlignment(out, aligned_code_delta))) { - return 0u; - } - if (UNLIKELY(!WriteThunk(out, pending_thunks_.front()->GetCode()))) { - return 0u; - } - offset = aligned_offset + pending_thunks_.front()->CodeSize(); - // Mark the thunk as written at the pending offset and update the `pending_thunks_` heap. - std::pop_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator()); - pending_thunks_.back()->MarkPendingOffsetAsWritten(); - if (pending_thunks_.back()->HasPendingOffset()) { - std::push_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator()); - } else { - pending_thunks_.pop_back(); - } - aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); - } - DCHECK(pending_thunks_.empty() || pending_thunks_.front()->GetPendingOffset() > aligned_offset); - return offset; -} - -std::vector<debug::MethodDebugInfo> ArmBaseRelativePatcher::GenerateThunkDebugInfo( - uint32_t executable_offset) { - // For multi-oat compilation (boot image), `thunks_` records thunks for all oat files. - // To return debug info for the current oat file, we must ignore thunks before the - // `executable_offset` as they are in the previous oat files and this function must be - // called before reserving thunk positions for subsequent oat files. - size_t number_of_thunks = 0u; - for (auto&& entry : thunks_) { - const ThunkData& data = entry.second; - number_of_thunks += data.NumberOfThunks() - data.IndexOfFirstThunkAtOrAfter(executable_offset); - } - std::vector<debug::MethodDebugInfo> result; - result.reserve(number_of_thunks); - for (auto&& entry : thunks_) { - const ThunkKey& key = entry.first; - const ThunkData& data = entry.second; - size_t start = data.IndexOfFirstThunkAtOrAfter(executable_offset); - if (start == data.NumberOfThunks()) { - continue; - } - // Get the base name to use for the first occurrence of the thunk. - std::string base_name = GetThunkDebugName(key); - for (size_t i = start, num = data.NumberOfThunks(); i != num; ++i) { - debug::MethodDebugInfo info = {}; - if (i == 0u) { - info.custom_name = base_name; - } else { - // Add a disambiguating tag for subsequent identical thunks. Since the `thunks_` - // keeps records also for thunks in previous oat files, names based on the thunk - // index shall be unique across the whole multi-oat output. - info.custom_name = base_name + "_" + std::to_string(i); - } - info.isa = instruction_set_; - info.is_code_address_text_relative = true; - info.code_address = data.GetThunkOffset(i) - executable_offset; - info.code_size = data.CodeSize(); - result.push_back(std::move(info)); - } - } - return result; -} - -ArmBaseRelativePatcher::ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider, - InstructionSet instruction_set) - : provider_(provider), - instruction_set_(instruction_set), - thunks_(), - unprocessed_method_call_patches_(), - method_call_thunk_(nullptr), - pending_thunks_() { -} - -ArmBaseRelativePatcher::~ArmBaseRelativePatcher() { - // All work done by member destructors. -} - -uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref, - uint32_t max_extra_space) { - // Adjust code size for extra space required by the subclass. - uint32_t max_code_size = compiled_method->GetQuickCode().size() + max_extra_space; - uint32_t code_offset; - uint32_t next_aligned_offset; - while (true) { - code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader)); - next_aligned_offset = compiled_method->AlignCode(code_offset + max_code_size); - if (unreserved_thunks_.empty() || - unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset) { - break; - } - ThunkData* thunk = unreserved_thunks_.front(); - if (thunk == method_call_thunk_) { - ResolveMethodCalls(code_offset, method_ref); - // This may have changed `method_call_thunk_` data, so re-check if we need to reserve. - if (unreserved_thunks_.empty() || - unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset) { - break; - } - // We need to process the new `front()` whether it's still the `method_call_thunk_` or not. - thunk = unreserved_thunks_.front(); - } - unreserved_thunks_.pop_front(); - uint32_t thunk_offset = CompiledCode::AlignCode(offset, instruction_set_); - offset = thunk->ReserveOffset(thunk_offset); - if (thunk == method_call_thunk_) { - // All remaining method call patches will be handled by this thunk. - DCHECK(!unprocessed_method_call_patches_.empty()); - DCHECK_LE(thunk_offset - unprocessed_method_call_patches_.front().GetPatchOffset(), - MaxPositiveDisplacement(GetMethodCallKey())); - unprocessed_method_call_patches_.clear(); - } - } - - // Process patches and check that adding thunks for the current method did not push any - // thunks (previously existing or newly added) before `next_aligned_offset`. This is - // essentially a check that we never compile a method that's too big. The calls or branches - // from the method should be able to reach beyond the end of the method and over any pending - // thunks. (The number of different thunks should be relatively low and their code short.) - ProcessPatches(compiled_method, code_offset); - CHECK(unreserved_thunks_.empty() || - unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset); - - return offset; -} - -uint32_t ArmBaseRelativePatcher::CalculateMethodCallDisplacement(uint32_t patch_offset, - uint32_t target_offset) { - DCHECK(method_call_thunk_ != nullptr); - // Unsigned arithmetic with its well-defined overflow behavior is just fine here. - uint32_t displacement = target_offset - patch_offset; - uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey()); - uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey()); - // NOTE: With unsigned arithmetic we do mean to use && rather than || below. - if (displacement > max_positive_displacement && displacement < -max_negative_displacement) { - // Unwritten thunks have higher offsets, check if it's within range. - DCHECK(!method_call_thunk_->HasPendingOffset() || - method_call_thunk_->GetPendingOffset() > patch_offset); - if (method_call_thunk_->HasPendingOffset() && - method_call_thunk_->GetPendingOffset() - patch_offset <= max_positive_displacement) { - displacement = method_call_thunk_->GetPendingOffset() - patch_offset; - } else { - // We must have a previous thunk then. - DCHECK(method_call_thunk_->HasWrittenOffset()); - DCHECK_LT(method_call_thunk_->LastWrittenOffset(), patch_offset); - displacement = method_call_thunk_->LastWrittenOffset() - patch_offset; - DCHECK_GE(displacement, -max_negative_displacement); - } - } - return displacement; -} - -uint32_t ArmBaseRelativePatcher::GetThunkTargetOffset(const ThunkKey& key, uint32_t patch_offset) { - auto it = thunks_.find(key); - CHECK(it != thunks_.end()); - const ThunkData& data = it->second; - if (data.HasWrittenOffset()) { - uint32_t offset = data.LastWrittenOffset(); - DCHECK_LT(offset, patch_offset); - if (patch_offset - offset <= MaxNegativeDisplacement(key)) { - return offset; - } - } - DCHECK(data.HasPendingOffset()); - uint32_t offset = data.GetPendingOffset(); - DCHECK_GT(offset, patch_offset); - DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key)); - return offset; -} - -ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetMethodCallKey() { - return ThunkKey(ThunkType::kMethodCall); -} - -ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetBakerThunkKey( - const LinkerPatch& patch) { - DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch); - return ThunkKey(ThunkType::kBakerReadBarrier, - patch.GetBakerCustomValue1(), - patch.GetBakerCustomValue2()); -} - -void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_method, - uint32_t code_offset) { - for (const LinkerPatch& patch : compiled_method->GetPatches()) { - uint32_t patch_offset = code_offset + patch.LiteralOffset(); - ThunkKey key(static_cast<ThunkType>(-1)); - ThunkData* old_data = nullptr; - if (patch.GetType() == LinkerPatch::Type::kCallRelative) { - key = GetMethodCallKey(); - unprocessed_method_call_patches_.emplace_back(patch_offset, patch.TargetMethod()); - if (method_call_thunk_ == nullptr) { - uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key); - auto it = thunks_.Put(key, ThunkData(CompileThunk(key), max_next_offset)); - method_call_thunk_ = &it->second; - AddUnreservedThunk(method_call_thunk_); - } else { - old_data = method_call_thunk_; - } - } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) { - key = GetBakerThunkKey(patch); - auto lb = thunks_.lower_bound(key); - if (lb == thunks_.end() || thunks_.key_comp()(key, lb->first)) { - uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key); - auto it = thunks_.PutBefore(lb, key, ThunkData(CompileThunk(key), max_next_offset)); - AddUnreservedThunk(&it->second); - } else { - old_data = &lb->second; - } - } - if (old_data != nullptr) { - // Shared path where an old thunk may need an update. - DCHECK(key.GetType() != static_cast<ThunkType>(-1)); - DCHECK(!old_data->HasReservedOffset() || old_data->LastReservedOffset() < patch_offset); - if (old_data->NeedsNextThunk()) { - // Patches for a method are ordered by literal offset, so if we still need to place - // this thunk for a previous patch, that thunk shall be in range for this patch. - DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key)); - } else { - if (!old_data->HasReservedOffset() || - patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key)) { - old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key)); - AddUnreservedThunk(old_data); - } - } - } - } -} - -void ArmBaseRelativePatcher::AddUnreservedThunk(ThunkData* data) { - DCHECK(data->NeedsNextThunk()); - size_t index = unreserved_thunks_.size(); - while (index != 0u && data->MaxNextOffset() < unreserved_thunks_[index - 1u]->MaxNextOffset()) { - --index; - } - unreserved_thunks_.insert(unreserved_thunks_.begin() + index, data); - // We may need to update the max next offset(s) if the thunk code would not fit. - size_t alignment = GetInstructionSetAlignment(instruction_set_); - if (index + 1u != unreserved_thunks_.size()) { - // Note: Ignore the return value as we need to process previous thunks regardless. - data->MakeSpaceBefore(*unreserved_thunks_[index + 1u], alignment); - } - // Make space for previous thunks. Once we find a pending thunk that does - // not need an adjustment, we can stop. - while (index != 0u && unreserved_thunks_[index - 1u]->MakeSpaceBefore(*data, alignment)) { - --index; - data = unreserved_thunks_[index]; - } -} - -void ArmBaseRelativePatcher::ResolveMethodCalls(uint32_t quick_code_offset, - MethodReference method_ref) { - DCHECK(!unreserved_thunks_.empty()); - DCHECK(!unprocessed_method_call_patches_.empty()); - DCHECK(method_call_thunk_ != nullptr); - uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey()); - uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey()); - // Process as many patches as possible, stop only on unresolved targets or calls too far back. - while (!unprocessed_method_call_patches_.empty()) { - MethodReference target_method = unprocessed_method_call_patches_.front().GetTargetMethod(); - uint32_t patch_offset = unprocessed_method_call_patches_.front().GetPatchOffset(); - DCHECK(!method_call_thunk_->HasReservedOffset() || - method_call_thunk_->LastReservedOffset() <= patch_offset); - if (!method_call_thunk_->HasReservedOffset() || - patch_offset - method_call_thunk_->LastReservedOffset() > max_negative_displacement) { - // No previous thunk in range, check if we can reach the target directly. - if (target_method == method_ref) { - DCHECK_GT(quick_code_offset, patch_offset); - if (quick_code_offset - patch_offset > max_positive_displacement) { - break; - } - } else { - auto result = provider_->FindMethodOffset(target_method); - if (!result.first) { - break; - } - uint32_t target_offset = result.second - CompiledCode::CodeDelta(instruction_set_); - if (target_offset >= patch_offset) { - DCHECK_LE(target_offset - patch_offset, max_positive_displacement); - } else if (patch_offset - target_offset > max_negative_displacement) { - break; - } - } - } - unprocessed_method_call_patches_.pop_front(); - } - if (!unprocessed_method_call_patches_.empty()) { - // Try to adjust the max next offset in `method_call_thunk_`. Do this conservatively only if - // the thunk shall be at the end of the `unreserved_thunks_` to avoid dealing with overlaps. - uint32_t new_max_next_offset = - unprocessed_method_call_patches_.front().GetPatchOffset() + max_positive_displacement; - if (new_max_next_offset > - unreserved_thunks_.back()->MaxNextOffset() + unreserved_thunks_.back()->CodeSize()) { - method_call_thunk_->ClearMaxNextOffset(); - method_call_thunk_->SetMaxNextOffset(new_max_next_offset); - if (method_call_thunk_ != unreserved_thunks_.back()) { - RemoveElement(unreserved_thunks_, method_call_thunk_); - unreserved_thunks_.push_back(method_call_thunk_); - } - } - } else { - // We have resolved all method calls, we do not need a new thunk anymore. - method_call_thunk_->ClearMaxNextOffset(); - RemoveElement(unreserved_thunks_, method_call_thunk_); - } -} - -inline uint32_t ArmBaseRelativePatcher::CalculateMaxNextOffset(uint32_t patch_offset, - const ThunkKey& key) { - return RoundDown(patch_offset + MaxPositiveDisplacement(key), - GetInstructionSetAlignment(instruction_set_)); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h deleted file mode 100644 index ee09bf96b3d..00000000000 --- a/compiler/linker/arm/relative_patcher_arm_base.h +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_ -#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_ - -#include <deque> -#include <vector> - -#include "base/safe_map.h" -#include "dex/method_reference.h" -#include "linker/relative_patcher.h" - -namespace art { -namespace linker { - -class ArmBaseRelativePatcher : public RelativePatcher { - public: - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) OVERRIDE; - uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; - uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; - std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo(uint32_t executable_offset) OVERRIDE; - - protected: - ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider, - InstructionSet instruction_set); - ~ArmBaseRelativePatcher(); - - enum class ThunkType { - kMethodCall, // Method call thunk. - kBakerReadBarrier, // Baker read barrier. - }; - - class ThunkKey { - public: - explicit ThunkKey(ThunkType type, uint32_t custom_value1 = 0u, uint32_t custom_value2 = 0u) - : type_(type), custom_value1_(custom_value1), custom_value2_(custom_value2) { } - - ThunkType GetType() const { - return type_; - } - - uint32_t GetCustomValue1() const { - return custom_value1_; - } - - uint32_t GetCustomValue2() const { - return custom_value2_; - } - - private: - ThunkType type_; - uint32_t custom_value1_; - uint32_t custom_value2_; - }; - - class ThunkKeyCompare { - public: - bool operator()(const ThunkKey& lhs, const ThunkKey& rhs) const { - if (lhs.GetType() != rhs.GetType()) { - return lhs.GetType() < rhs.GetType(); - } - if (lhs.GetCustomValue1() != rhs.GetCustomValue1()) { - return lhs.GetCustomValue1() < rhs.GetCustomValue1(); - } - return lhs.GetCustomValue2() < rhs.GetCustomValue2(); - } - }; - - static ThunkKey GetMethodCallKey(); - static ThunkKey GetBakerThunkKey(const LinkerPatch& patch); - - uint32_t ReserveSpaceInternal(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref, - uint32_t max_extra_space); - uint32_t GetThunkTargetOffset(const ThunkKey& key, uint32_t patch_offset); - - uint32_t CalculateMethodCallDisplacement(uint32_t patch_offset, - uint32_t target_offset); - - virtual std::vector<uint8_t> CompileThunk(const ThunkKey& key) = 0; - virtual std::string GetThunkDebugName(const ThunkKey& key) = 0; - virtual uint32_t MaxPositiveDisplacement(const ThunkKey& key) = 0; - virtual uint32_t MaxNegativeDisplacement(const ThunkKey& key) = 0; - - private: - class ThunkData; - - void ProcessPatches(const CompiledMethod* compiled_method, uint32_t code_offset); - void AddUnreservedThunk(ThunkData* data); - - void ResolveMethodCalls(uint32_t quick_code_offset, MethodReference method_ref); - - uint32_t CalculateMaxNextOffset(uint32_t patch_offset, const ThunkKey& key); - - RelativePatcherTargetProvider* const provider_; - const InstructionSet instruction_set_; - - // The data for all thunks. - // SafeMap<> nodes don't move after being inserted, so we can use direct pointers to the data. - using ThunkMap = SafeMap<ThunkKey, ThunkData, ThunkKeyCompare>; - ThunkMap thunks_; - - // ReserveSpace() tracks unprocessed method call patches. These may be resolved later. - class UnprocessedMethodCallPatch { - public: - UnprocessedMethodCallPatch(uint32_t patch_offset, MethodReference target_method) - : patch_offset_(patch_offset), target_method_(target_method) { } - - uint32_t GetPatchOffset() const { - return patch_offset_; - } - - MethodReference GetTargetMethod() const { - return target_method_; - } - - private: - uint32_t patch_offset_; - MethodReference target_method_; - }; - std::deque<UnprocessedMethodCallPatch> unprocessed_method_call_patches_; - // Once we have compiled a method call thunk, cache pointer to the data. - ThunkData* method_call_thunk_; - - // Thunks - std::deque<ThunkData*> unreserved_thunks_; - - class PendingThunkComparator; - std::vector<ThunkData*> pending_thunks_; // Heap with the PendingThunkComparator. - - friend class Arm64RelativePatcherTest; - friend class Thumb2RelativePatcherTest; - - DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativePatcher); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_ diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc deleted file mode 100644 index 78755176e43..00000000000 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ /dev/null @@ -1,484 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/arm/relative_patcher_thumb2.h" - -#include <sstream> - -#include "arch/arm/asm_support_arm.h" -#include "art_method.h" -#include "base/bit_utils.h" -#include "compiled_method.h" -#include "entrypoints/quick/quick_entrypoints_enum.h" -#include "linker/linker_patch.h" -#include "lock_word.h" -#include "mirror/array-inl.h" -#include "mirror/object.h" -#include "read_barrier.h" -#include "utils/arm/assembler_arm_vixl.h" - -namespace art { -namespace linker { - -// PC displacement from patch location; Thumb2 PC is always at instruction address + 4. -static constexpr int32_t kPcDisplacement = 4; - -// Maximum positive and negative displacement for method call measured from the patch location. -// (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from -// the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.) -constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement; -constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 24) - kPcDisplacement; - -// Maximum positive and negative displacement for a conditional branch measured from the patch -// location. (Signed 21 bit displacement with the last bit 0 has range [-2^20, 2^20-2] measured -// from the Thumb2 PC pointing right after the B.cond, i.e. 4 bytes later than the patch location.) -constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 2u + kPcDisplacement; -constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20) - kPcDisplacement; - -Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider) - : ArmBaseRelativePatcher(provider, InstructionSet::kThumb2) { -} - -void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) { - DCHECK_LE(literal_offset + 4u, code->size()); - DCHECK_EQ(literal_offset & 1u, 0u); - DCHECK_EQ(patch_offset & 1u, 0u); - DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit. - uint32_t displacement = CalculateMethodCallDisplacement(patch_offset, target_offset & ~1u); - displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. - DCHECK_EQ(displacement & 1u, 0u); - DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u); // 25-bit signed. - uint32_t signbit = (displacement >> 31) & 0x1; - uint32_t i1 = (displacement >> 23) & 0x1; - uint32_t i2 = (displacement >> 22) & 0x1; - uint32_t imm10 = (displacement >> 12) & 0x03ff; - uint32_t imm11 = (displacement >> 1) & 0x07ff; - uint32_t j1 = i1 ^ (signbit ^ 1); - uint32_t j2 = i2 ^ (signbit ^ 1); - uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11; - value |= 0xf000d000; // BL - - // Check that we're just overwriting an existing BL. - DCHECK_EQ(GetInsn32(code, literal_offset) & 0xf800d000, 0xf000d000); - // Write the new BL. - SetInsn32(code, literal_offset, value); -} - -void Thumb2RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) { - uint32_t literal_offset = patch.LiteralOffset(); - uint32_t pc_literal_offset = patch.PcInsnOffset(); - uint32_t pc_base = patch_offset + (pc_literal_offset - literal_offset) + 4u /* PC adjustment */; - uint32_t diff = target_offset - pc_base; - - uint32_t insn = GetInsn32(code, literal_offset); - DCHECK_EQ(insn & 0xff7ff0ffu, 0xf2400000u); // MOVW/MOVT, unpatched (imm16 == 0). - uint32_t diff16 = ((insn & 0x00800000u) != 0u) ? (diff >> 16) : (diff & 0xffffu); - uint32_t imm4 = (diff16 >> 12) & 0xfu; - uint32_t imm = (diff16 >> 11) & 0x1u; - uint32_t imm3 = (diff16 >> 8) & 0x7u; - uint32_t imm8 = diff16 & 0xffu; - insn = (insn & 0xfbf08f00u) | (imm << 26) | (imm4 << 16) | (imm3 << 12) | imm8; - SetInsn32(code, literal_offset, insn); -} - -void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) { - DCHECK_ALIGNED(patch_offset, 2u); - uint32_t literal_offset = patch.LiteralOffset(); - DCHECK_ALIGNED(literal_offset, 2u); - DCHECK_LT(literal_offset, code->size()); - uint32_t insn = GetInsn32(code, literal_offset); - DCHECK_EQ(insn, 0xf0408000); // BNE +0 (unpatched) - ThunkKey key = GetBakerThunkKey(patch); - if (kIsDebugBuild) { - const uint32_t encoded_data = key.GetCustomValue1(); - BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); - // Check that the next instruction matches the expected LDR. - switch (kind) { - case BakerReadBarrierKind::kField: { - BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); - if (width == BakerReadBarrierWidth::kWide) { - DCHECK_GE(code->size() - literal_offset, 8u); - uint32_t next_insn = GetInsn32(code, literal_offset + 4u); - // LDR (immediate), encoding T3, with correct base_reg. - CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. - const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16)); - } else { - DCHECK_GE(code->size() - literal_offset, 6u); - uint32_t next_insn = GetInsn16(code, literal_offset + 4u); - // LDR (immediate), encoding T1, with correct base_reg. - CheckValidReg(next_insn & 0x7u); // Check destination register. - const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3)); - } - break; - } - case BakerReadBarrierKind::kArray: { - DCHECK_GE(code->size() - literal_offset, 8u); - uint32_t next_insn = GetInsn32(code, literal_offset + 4u); - // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]). - CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. - const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16)); - CheckValidReg(next_insn & 0xf); // Check index register - break; - } - case BakerReadBarrierKind::kGcRoot: { - BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); - if (width == BakerReadBarrierWidth::kWide) { - DCHECK_GE(literal_offset, 4u); - uint32_t prev_insn = GetInsn32(code, literal_offset - 4u); - // LDR (immediate), encoding T3, with correct root_reg. - const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12)); - } else { - DCHECK_GE(literal_offset, 2u); - uint32_t prev_insn = GetInsn16(code, literal_offset - 2u); - // LDR (immediate), encoding T1, with correct root_reg. - const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg); - } - break; - } - default: - LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType()); - UNREACHABLE(); - } - } - uint32_t target_offset = GetThunkTargetOffset(key, patch_offset); - DCHECK_ALIGNED(target_offset, 4u); - uint32_t disp = target_offset - (patch_offset + kPcDisplacement); - DCHECK((disp >> 20) == 0u || (disp >> 20) == 0xfffu); // 21-bit signed. - insn |= ((disp << (26 - 20)) & 0x04000000u) | // Shift bit 20 to 26, "S". - ((disp >> (19 - 11)) & 0x00000800u) | // Shift bit 19 to 13, "J1". - ((disp >> (18 - 13)) & 0x00002000u) | // Shift bit 18 to 11, "J2". - ((disp << (16 - 12)) & 0x003f0000u) | // Shift bits 12-17 to 16-25, "imm6". - ((disp >> (1 - 0)) & 0x000007ffu); // Shift bits 1-12 to 0-11, "imm11". - SetInsn32(code, literal_offset, insn); -} - -#define __ assembler.GetVIXLAssembler()-> - -static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler, - vixl::aarch32::Register base_reg, - vixl::aarch32::MemOperand& lock_word, - vixl::aarch32::Label* slow_path, - int32_t raw_ldr_offset) { - using namespace vixl::aarch32; // NOLINT(build/namespaces) - // Load the lock word containing the rb_state. - __ Ldr(ip, lock_word); - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted)); - __ B(ne, slow_path, /* is_far_target */ false); - __ Add(lr, lr, raw_ldr_offset); - // Introduce a dependency on the lock_word including rb_state, - // to prevent load-load reordering, and without using - // a memory barrier (which would be more expensive). - __ Add(base_reg, base_reg, Operand(ip, LSR, 32)); - __ Bx(lr); // And return back to the function. - // Note: The fake dependency is unnecessary for the slow path. -} - -// Load the read barrier introspection entrypoint in register `entrypoint` -static void LoadReadBarrierMarkIntrospectionEntrypoint(arm::ArmVIXLAssembler& assembler, - vixl::aarch32::Register entrypoint) { - using vixl::aarch32::MemOperand; - using vixl::aarch32::ip; - // Thread Register. - const vixl::aarch32::Register tr = vixl::aarch32::r9; - - // The register where the read barrier introspection entrypoint is loaded - // is fixed: `Thumb2RelativePatcher::kBakerCcEntrypointRegister` (R4). - DCHECK_EQ(entrypoint.GetCode(), Thumb2RelativePatcher::kBakerCcEntrypointRegister); - // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. - DCHECK_EQ(ip.GetCode(), 12u); - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); - __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); -} - -void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, - uint32_t encoded_data) { - using namespace vixl::aarch32; // NOLINT(build/namespaces) - BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); - switch (kind) { - case BakerReadBarrierKind::kField: { - // Check if the holder is gray and, if not, add fake dependency to the base register - // and return to the LDR instruction to load the reference. Otherwise, use introspection - // to load the reference and call the entrypoint (in kBakerCcEntrypointRegister) - // that performs further checks on the reference and marks it if needed. - Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); - CheckValidReg(base_reg.GetCode()); - Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data)); - CheckValidReg(holder_reg.GetCode()); - BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); - UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); - temps.Exclude(ip); - // If base_reg differs from holder_reg, the offset was too large and we must have - // emitted an explicit null check before the load. Otherwise, we need to null-check - // the holder as we do not necessarily do that check before going to the thunk. - vixl::aarch32::Label throw_npe; - if (holder_reg.Is(base_reg)) { - __ CompareAndBranchIfZero(holder_reg, &throw_npe, /* is_far_target */ false); - } - vixl::aarch32::Label slow_path; - MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); - const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide) - ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET - : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET; - EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); - __ Bind(&slow_path); - const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + - raw_ldr_offset; - Register ep_reg(kBakerCcEntrypointRegister); - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); - if (width == BakerReadBarrierWidth::kWide) { - MemOperand ldr_half_address(lr, ldr_offset + 2); - __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". - __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12. - __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference. - } else { - MemOperand ldr_address(lr, ldr_offset); - __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1. - __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint - ep_reg, // for narrow LDR. - Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET)); - __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4. - __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference. - } - // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. - __ Bx(ep_reg); // Jump to the entrypoint. - if (holder_reg.Is(base_reg)) { - // Add null check slow path. The stack map is at the address pointed to by LR. - __ Bind(&throw_npe); - int32_t offset = GetThreadOffset<kArmPointerSize>(kQuickThrowNullPointer).Int32Value(); - __ Ldr(ip, MemOperand(/* Thread* */ vixl::aarch32::r9, offset)); - __ Bx(ip); - } - break; - } - case BakerReadBarrierKind::kArray: { - Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); - CheckValidReg(base_reg.GetCode()); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide); - UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); - temps.Exclude(ip); - vixl::aarch32::Label slow_path; - int32_t data_offset = - mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); - MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); - DCHECK_LT(lock_word.GetOffsetImmediate(), 0); - const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET; - EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); - __ Bind(&slow_path); - const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + - raw_ldr_offset; - MemOperand ldr_address(lr, ldr_offset + 2); - __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm", - // i.e. Rm+32 because the scale in imm2 is 2. - Register ep_reg(kBakerCcEntrypointRegister); - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); - __ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create - // a switch case target based on the index register. - __ Mov(ip, base_reg); // Move the base register to ip0. - __ Bx(ep_reg); // Jump to the entrypoint's array switch case. - break; - } - case BakerReadBarrierKind::kGcRoot: { - // Check if the reference needs to be marked and if so (i.e. not null, not marked yet - // and it does not have a forwarding address), call the correct introspection entrypoint; - // otherwise return the reference (or the extracted forwarding address). - // There is no gray bit check for GC roots. - Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); - CheckValidReg(root_reg.GetCode()); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); - UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); - temps.Exclude(ip); - vixl::aarch32::Label return_label, not_marked, forwarding_address; - __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target */ false); - MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value()); - __ Ldr(ip, lock_word); - __ Tst(ip, LockWord::kMarkBitStateMaskShifted); - __ B(eq, ¬_marked); - __ Bind(&return_label); - __ Bx(lr); - __ Bind(¬_marked); - static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3, - "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in " - " the highest bits and the 'forwarding address' state to have all bits set"); - __ Cmp(ip, Operand(0xc0000000)); - __ B(hs, &forwarding_address); - Register ep_reg(kBakerCcEntrypointRegister); - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); - // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister - // to art_quick_read_barrier_mark_introspection_gc_roots. - int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide) - ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET - : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET; - __ Add(ep_reg, ep_reg, Operand(entrypoint_offset)); - __ Mov(ip, root_reg); - __ Bx(ep_reg); - __ Bind(&forwarding_address); - __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift); - __ Bx(lr); - break; - } - default: - LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); - UNREACHABLE(); - } -} - -std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - arm::ArmVIXLAssembler assembler(&allocator); - - switch (key.GetType()) { - case ThunkType::kMethodCall: - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. - assembler.LoadFromOffset( - arm::kLoadWord, - vixl::aarch32::pc, - vixl::aarch32::r0, - ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); - __ Bkpt(0); - break; - case ThunkType::kBakerReadBarrier: - CompileBakerReadBarrierThunk(assembler, key.GetCustomValue1()); - break; - } - - assembler.FinalizeCode(); - std::vector<uint8_t> thunk_code(assembler.CodeSize()); - MemoryRegion code(thunk_code.data(), thunk_code.size()); - assembler.FinalizeInstructions(code); - return thunk_code; -} - -std::string Thumb2RelativePatcher::GetThunkDebugName(const ThunkKey& key) { - switch (key.GetType()) { - case ThunkType::kMethodCall: - return "MethodCallThunk"; - - case ThunkType::kBakerReadBarrier: { - uint32_t encoded_data = key.GetCustomValue1(); - BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); - std::ostringstream oss; - oss << "BakerReadBarrierThunk"; - switch (kind) { - case BakerReadBarrierKind::kField: - oss << "Field"; - if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) { - oss << "Wide"; - } - oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) - << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); - break; - case BakerReadBarrierKind::kArray: - oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide); - break; - case BakerReadBarrierKind::kGcRoot: - oss << "GcRoot"; - if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) { - oss << "Wide"; - } - oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - break; - } - return oss.str(); - } - } -} - -#undef __ - -uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) { - switch (key.GetType()) { - case ThunkType::kMethodCall: - return kMaxMethodCallPositiveDisplacement; - case ThunkType::kBakerReadBarrier: - return kMaxBcondPositiveDisplacement; - } -} - -uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) { - switch (key.GetType()) { - case ThunkType::kMethodCall: - return kMaxMethodCallNegativeDisplacement; - case ThunkType::kBakerReadBarrier: - return kMaxBcondNegativeDisplacement; - } -} - -void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { - DCHECK_LE(offset + 4u, code->size()); - DCHECK_ALIGNED(offset, 2u); - uint8_t* addr = &(*code)[offset]; - addr[0] = (value >> 16) & 0xff; - addr[1] = (value >> 24) & 0xff; - addr[2] = (value >> 0) & 0xff; - addr[3] = (value >> 8) & 0xff; -} - -uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) { - DCHECK_LE(offset + 4u, code.size()); - DCHECK_ALIGNED(offset, 2u); - const uint8_t* addr = &code[offset]; - return - (static_cast<uint32_t>(addr[0]) << 16) + - (static_cast<uint32_t>(addr[1]) << 24) + - (static_cast<uint32_t>(addr[2]) << 0)+ - (static_cast<uint32_t>(addr[3]) << 8); -} - -template <typename Vector> -uint32_t Thumb2RelativePatcher::GetInsn32(Vector* code, uint32_t offset) { - static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); - return GetInsn32(ArrayRef<const uint8_t>(*code), offset); -} - -uint32_t Thumb2RelativePatcher::GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset) { - DCHECK_LE(offset + 2u, code.size()); - DCHECK_ALIGNED(offset, 2u); - const uint8_t* addr = &code[offset]; - return (static_cast<uint32_t>(addr[0]) << 0) + (static_cast<uint32_t>(addr[1]) << 8); -} - -template <typename Vector> -uint32_t Thumb2RelativePatcher::GetInsn16(Vector* code, uint32_t offset) { - static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); - return GetInsn16(ArrayRef<const uint8_t>(*code), offset); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h deleted file mode 100644 index 68386c00f4a..00000000000 --- a/compiler/linker/arm/relative_patcher_thumb2.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ -#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ - -#include "arch/arm/registers_arm.h" -#include "base/array_ref.h" -#include "base/bit_field.h" -#include "base/bit_utils.h" -#include "linker/arm/relative_patcher_arm_base.h" - -namespace art { - -namespace arm { -class ArmVIXLAssembler; -} // namespace arm - -namespace linker { - -class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { - public: - static constexpr uint32_t kBakerCcEntrypointRegister = 4u; - - static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, - uint32_t holder_reg, - bool narrow) { - CheckValidReg(base_reg); - CheckValidReg(holder_reg); - DCHECK(!narrow || base_reg < 8u) << base_reg; - BakerReadBarrierWidth width = - narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; - return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | - BakerReadBarrierFirstRegField::Encode(base_reg) | - BakerReadBarrierSecondRegField::Encode(holder_reg) | - BakerReadBarrierWidthField::Encode(width); - } - - static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { - CheckValidReg(base_reg); - return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | - BakerReadBarrierFirstRegField::Encode(base_reg) | - BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) | - BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide); - } - - static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) { - CheckValidReg(root_reg); - DCHECK(!narrow || root_reg < 8u) << root_reg; - BakerReadBarrierWidth width = - narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; - return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | - BakerReadBarrierFirstRegField::Encode(root_reg) | - BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) | - BakerReadBarrierWidthField::Encode(width); - } - - explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider); - - void PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) OVERRIDE; - - protected: - std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE; - std::string GetThunkDebugName(const ThunkKey& key) OVERRIDE; - uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE; - uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE; - - private: - static constexpr uint32_t kInvalidEncodedReg = /* pc is invalid */ 15u; - - enum class BakerReadBarrierKind : uint8_t { - kField, // Field get or array get with constant offset (i.e. constant index). - kArray, // Array get with index in register. - kGcRoot, // GC root load. - kLast = kGcRoot - }; - - enum class BakerReadBarrierWidth : uint8_t { - kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled). - kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled). - kLast = kNarrow - }; - - static constexpr size_t kBitsForBakerReadBarrierKind = - MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); - static constexpr size_t kBitsForRegister = 4u; - using BakerReadBarrierKindField = - BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; - using BakerReadBarrierFirstRegField = - BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>; - using BakerReadBarrierSecondRegField = - BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; - static constexpr size_t kBitsForBakerReadBarrierWidth = - MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast)); - using BakerReadBarrierWidthField = BitField<BakerReadBarrierWidth, - kBitsForBakerReadBarrierKind + 2 * kBitsForRegister, - kBitsForBakerReadBarrierWidth>; - - static void CheckValidReg(uint32_t reg) { - DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister) << reg; - } - - void CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data); - - void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value); - static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset); - - template <typename Vector> - static uint32_t GetInsn32(Vector* code, uint32_t offset); - - static uint32_t GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset); - - template <typename Vector> - static uint32_t GetInsn16(Vector* code, uint32_t offset); - - friend class Thumb2RelativePatcherTest; - - DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc deleted file mode 100644 index 2c22a352c23..00000000000 --- a/compiler/linker/arm/relative_patcher_thumb2_test.cc +++ /dev/null @@ -1,1287 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/arm/relative_patcher_thumb2.h" - -#include "base/casts.h" -#include "linker/relative_patcher_test.h" -#include "lock_word.h" -#include "mirror/array-inl.h" -#include "mirror/object.h" -#include "oat_quick_method_header.h" - -namespace art { -namespace linker { - -class Thumb2RelativePatcherTest : public RelativePatcherTest { - public: - Thumb2RelativePatcherTest() : RelativePatcherTest(InstructionSet::kThumb2, "default") { } - - protected: - static const uint8_t kCallRawCode[]; - static const ArrayRef<const uint8_t> kCallCode; - static const uint8_t kNopRawCode[]; - static const ArrayRef<const uint8_t> kNopCode; - static const uint8_t kUnpatchedPcRelativeRawCode[]; - static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode; - static const uint32_t kPcInsnOffset; - - // The PC in Thumb mode is 4 bytes after the instruction location. - static constexpr uint32_t kPcAdjustment = 4u; - - // Branches within range [-256, 256) can be created from these by adding the low 8 bits. - static constexpr uint32_t kBlPlus0 = 0xf000f800u; - static constexpr uint32_t kBlMinus256 = 0xf7ffff00u; - - // Special BL values. - static constexpr uint32_t kBlPlusMax = 0xf3ffd7ffu; - static constexpr uint32_t kBlMinusMax = 0xf400d000u; - - // BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset. - static constexpr uint32_t kBneWPlus0 = 0xf0408000u; - - // LDR immediate, 16-bit, encoding T1. Bits 6-10 are imm5, 0-2 are Rt, 3-5 are Rn. - static constexpr uint32_t kLdrInsn = 0x6800u; - - // LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn. - static constexpr uint32_t kLdrWInsn = 0xf8d00000u; - - // LDR immediate, negative offset, encoding T4. Bits 0-7 are the offset to subtract. - static constexpr uint32_t kLdrNegativeOffset = 0xf8500c00u; - - // LDR register, lsl #2. Bits 4-5 are the imm2, i.e. the lsl shift. - static constexpr uint32_t kLdrRegLsl2 = 0xf8500020u; - - // NOP instructions. - static constexpr uint32_t kNopInsn = 0xbf00u; - static constexpr uint32_t kNopWInsn = 0xf3af8000u; - - void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { - CHECK_LE(pos, code->size()); - if (IsUint<16>(insn)) { - const uint8_t insn_code[] = { - static_cast<uint8_t>(insn), - static_cast<uint8_t>(insn >> 8), - }; - static_assert(sizeof(insn_code) == 2u, "Invalid sizeof(insn_code)."); - code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); - } else { - const uint8_t insn_code[] = { - static_cast<uint8_t>(insn >> 16), - static_cast<uint8_t>(insn >> 24), - static_cast<uint8_t>(insn), - static_cast<uint8_t>(insn >> 8), - }; - static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code)."); - code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); - } - } - - void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) { - InsertInsn(code, code->size(), insn); - } - - std::vector<uint8_t> GenNops(size_t num_nops) { - std::vector<uint8_t> result; - result.reserve(num_nops * 2u); - for (size_t i = 0; i != num_nops; ++i) { - PushBackInsn(&result, kNopInsn); - } - return result; - } - - std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) { - std::vector<uint8_t> raw_code; - size_t number_of_16_bit_insns = - std::count_if(insns.begin(), insns.end(), [](uint32_t x) { return IsUint<16>(x); }); - raw_code.reserve(insns.size() * 4u - number_of_16_bit_insns * 2u); - for (uint32_t insn : insns) { - PushBackInsn(&raw_code, insn); - } - return raw_code; - } - - uint32_t BneWWithOffset(uint32_t bne_offset, uint32_t target_offset) { - if (!IsAligned<2u>(bne_offset)) { - LOG(ERROR) << "Unaligned bne_offset: " << bne_offset; - return 0xffffffffu; // Fails code diff later. - } - if (!IsAligned<2u>(target_offset)) { - LOG(ERROR) << "Unaligned target_offset: " << target_offset; - return 0xffffffffu; // Fails code diff later. - } - uint32_t diff = target_offset - bne_offset - kPcAdjustment; - DCHECK_ALIGNED(diff, 2u); - if ((diff >> 20) != 0 && (diff >> 20) != 0xfffu) { - LOG(ERROR) << "Target out of range: " << diff; - return 0xffffffffu; // Fails code diff later. - } - return kBneWPlus0 | ((diff >> 1) & 0x7ffu) // imm11 - | (((diff >> 12) & 0x3fu) << 16) // imm6 - | (((diff >> 18) & 1) << 13) // J1 - | (((diff >> 19) & 1) << 11) // J2 - | (((diff >> 20) & 1) << 26); // S - } - - bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code, - const ArrayRef<const LinkerPatch>& method1_patches, - const ArrayRef<const uint8_t>& method3_code, - const ArrayRef<const LinkerPatch>& method3_patches, - uint32_t distance_without_thunks) { - CHECK_EQ(distance_without_thunks % kArmAlignment, 0u); - uint32_t method1_offset = - kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader); - AddCompiledMethod(MethodRef(1u), method1_code, method1_patches); - - // We want to put the method3 at a very precise offset. - const uint32_t method3_offset = method1_offset + distance_without_thunks; - CHECK_ALIGNED(method3_offset, kArmAlignment); - - // Calculate size of method2 so that we put method3 at the correct place. - const uint32_t method1_end = method1_offset + method1_code.size(); - const uint32_t method2_offset = - method1_end + CodeAlignmentSize(method1_end) + sizeof(OatQuickMethodHeader); - const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset); - std::vector<uint8_t> method2_raw_code(method2_size); - ArrayRef<const uint8_t> method2_code(method2_raw_code); - AddCompiledMethod(MethodRef(2u), method2_code); - - AddCompiledMethod(MethodRef(3u), method3_code, method3_patches); - - Link(); - - // Check assumptions. - CHECK_EQ(GetMethodOffset(1), method1_offset); - CHECK_EQ(GetMethodOffset(2), method2_offset); - auto result3 = method_offset_map_.FindMethodOffset(MethodRef(3)); - CHECK(result3.first); - // There may be a thunk before method2. - if (result3.second == method3_offset + 1 /* thumb mode */) { - return false; // No thunk. - } else { - uint32_t thunk_end = - CompiledCode::AlignCode(method3_offset - sizeof(OatQuickMethodHeader), - InstructionSet::kThumb2) + - MethodCallThunkSize(); - uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end); - CHECK_EQ(result3.second, header_offset + sizeof(OatQuickMethodHeader) + 1 /* thumb mode */); - return true; // Thunk present. - } - } - - uint32_t GetMethodOffset(uint32_t method_idx) { - auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(result.first); - CHECK_NE(result.second & 1u, 0u); - return result.second - 1 /* thumb mode */; - } - - std::vector<uint8_t> CompileMethodCallThunk() { - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey(); - return static_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - uint32_t MethodCallThunkSize() { - return CompileMethodCallThunk().size(); - } - - bool CheckThunk(uint32_t thunk_offset) { - const std::vector<uint8_t> expected_code = CompileMethodCallThunk(); - if (output_.size() < thunk_offset + expected_code.size()) { - LOG(ERROR) << "output_.size() == " << output_.size() << " < " - << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size()); - return false; - } - ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size()); - if (linked_code == ArrayRef<const uint8_t>(expected_code)) { - return true; - } - // Log failure info. - DumpDiff(ArrayRef<const uint8_t>(expected_code), linked_code); - return false; - } - - std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) { - std::vector<uint8_t> result; - result.reserve(num_nops * 2u + 4u); - for (size_t i = 0; i != num_nops; ++i) { - PushBackInsn(&result, kNopInsn); - } - PushBackInsn(&result, bl); - return result; - } - - void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset); - void TestStringReference(uint32_t string_offset); - void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); - - std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, - uint32_t holder_reg, - bool narrow) { - const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg, narrow)); - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); - return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) { - LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)); - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); - return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg, bool narrow) { - LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow)); - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); - return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - uint32_t GetOutputInsn32(uint32_t offset) { - CHECK_LE(offset, output_.size()); - CHECK_GE(output_.size() - offset, 4u); - return (static_cast<uint32_t>(output_[offset]) << 16) | - (static_cast<uint32_t>(output_[offset + 1]) << 24) | - (static_cast<uint32_t>(output_[offset + 2]) << 0) | - (static_cast<uint32_t>(output_[offset + 3]) << 8); - } - - uint16_t GetOutputInsn16(uint32_t offset) { - CHECK_LE(offset, output_.size()); - CHECK_GE(output_.size() - offset, 2u); - return (static_cast<uint32_t>(output_[offset]) << 0) | - (static_cast<uint32_t>(output_[offset + 1]) << 8); - } - - void TestBakerFieldWide(uint32_t offset, uint32_t ref_reg); - void TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg); -}; - -const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = { - 0x00, 0xf0, 0x00, 0xf8 -}; - -const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kCallCode(kCallRawCode); - -const uint8_t Thumb2RelativePatcherTest::kNopRawCode[] = { - 0x00, 0xbf -}; - -const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kNopCode(kNopRawCode); - -const uint8_t Thumb2RelativePatcherTest::kUnpatchedPcRelativeRawCode[] = { - 0x40, 0xf2, 0x00, 0x00, // MOVW r0, #0 (placeholder) - 0xc0, 0xf2, 0x00, 0x00, // MOVT r0, #0 (placeholder) - 0x78, 0x44, // ADD r0, pc -}; -const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kUnpatchedPcRelativeCode( - kUnpatchedPcRelativeRawCode); -const uint32_t Thumb2RelativePatcherTest::kPcInsnOffset = 8u; - -void Thumb2RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin, - uint32_t string_entry_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); - bss_begin_ = bss_begin; - const LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(0u, nullptr, kPcInsnOffset, kStringIndex), - LinkerPatch::StringBssEntryPatch(4u, nullptr, kPcInsnOffset, kStringIndex), - }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset); -} - -void Thumb2RelativePatcherTest::TestStringReference(uint32_t string_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_offset); - const LinkerPatch patches[] = { - LinkerPatch::RelativeStringPatch(0u, nullptr, kPcInsnOffset, kStringIndex), - LinkerPatch::RelativeStringPatch(4u, nullptr, kPcInsnOffset, kStringIndex), - }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset); -} - -void Thumb2RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, - uint32_t target_offset) { - AddCompiledMethod(MethodRef(1u), kUnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t pc_base_offset = method1_offset + kPcInsnOffset + 4u /* PC adjustment */; - uint32_t diff = target_offset - pc_base_offset; - // Distribute the bits of the diff between the MOVW and MOVT: - uint32_t diffw = diff & 0xffffu; - uint32_t difft = diff >> 16; - uint32_t movw = 0xf2400000u | // MOVW r0, #0 (placeholder), - ((diffw & 0xf000u) << (16 - 12)) | // move imm4 from bits 12-15 to bits 16-19, - ((diffw & 0x0800u) << (26 - 11)) | // move imm from bit 11 to bit 26, - ((diffw & 0x0700u) << (12 - 8)) | // move imm3 from bits 8-10 to bits 12-14, - ((diffw & 0x00ffu)); // keep imm8 at bits 0-7. - uint32_t movt = 0xf2c00000u | // MOVT r0, #0 (placeholder), - ((difft & 0xf000u) << (16 - 12)) | // move imm4 from bits 12-15 to bits 16-19, - ((difft & 0x0800u) << (26 - 11)) | // move imm from bit 11 to bit 26, - ((difft & 0x0700u) << (12 - 8)) | // move imm3 from bits 8-10 to bits 12-14, - ((difft & 0x00ffu)); // keep imm8 at bits 0-7. - const uint8_t expected_code[] = { - static_cast<uint8_t>(movw >> 16), static_cast<uint8_t>(movw >> 24), - static_cast<uint8_t>(movw >> 0), static_cast<uint8_t>(movw >> 8), - static_cast<uint8_t>(movt >> 16), static_cast<uint8_t>(movt >> 24), - static_cast<uint8_t>(movt >> 0), static_cast<uint8_t>(movt >> 8), - 0x78, 0x44, - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Thumb2RelativePatcherTest, CallSelf) { - const LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - static const uint8_t expected_code[] = { - 0xff, 0xf7, 0xfe, 0xff - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Thumb2RelativePatcherTest, CallOther) { - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); - const LinkerPatch method2_patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t method2_offset = GetMethodOffset(2u); - uint32_t diff_after = method2_offset - (method1_offset + 4u /* PC adjustment */); - ASSERT_EQ(diff_after & 1u, 0u); - ASSERT_LT(diff_after >> 1, 1u << 8); // Simple encoding, (diff_after >> 1) fits into 8 bits. - static const uint8_t method1_expected_code[] = { - 0x00, 0xf0, static_cast<uint8_t>(diff_after >> 1), 0xf8 - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); - uint32_t diff_before = method1_offset - (method2_offset + 4u /* PC adjustment */); - ASSERT_EQ(diff_before & 1u, 0u); - ASSERT_GE(diff_before, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0. - auto method2_expected_code = GenNopsAndBl(0u, kBlMinus256 | ((diff_before >> 1) & 0xffu)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); -} - -TEST_F(Thumb2RelativePatcherTest, CallTrampoline) { - const LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t diff = kTrampolineOffset - (method1_offset + 4u); - ASSERT_EQ(diff & 1u, 0u); - ASSERT_GE(diff, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0 (checked as unsigned). - auto expected_code = GenNopsAndBl(0u, kBlMinus256 | ((diff >> 1) & 0xffu)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Thumb2RelativePatcherTest, CallTrampolineTooFar) { - constexpr uint32_t missing_method_index = 1024u; - auto method3_raw_code = GenNopsAndBl(3u, kBlPlus0); - constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs. - ArrayRef<const uint8_t> method3_code(method3_raw_code); - ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - const LinkerPatch method3_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index), - }; - - constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(kNopCode, - ArrayRef<const LinkerPatch>(), - method3_code, - ArrayRef<const LinkerPatch>(method3_patches), - just_over_max_negative_disp - bl_offset_in_method3); - ASSERT_FALSE(thunk_in_gap); // There should be a thunk but it should be after the method2. - ASSERT_FALSE(method_offset_map_.FindMethodOffset(MethodRef(missing_method_index)).first); - - // Check linked code. - uint32_t method3_offset = GetMethodOffset(3u); - uint32_t thunk_offset = CompiledCode::AlignCode(method3_offset + method3_code.size(), - InstructionSet::kThumb2); - uint32_t diff = thunk_offset - (method3_offset + bl_offset_in_method3 + 4u /* PC adjustment */); - ASSERT_EQ(diff & 1u, 0u); - ASSERT_LT(diff >> 1, 1u << 8); // Simple encoding, (diff >> 1) fits into 8 bits. - auto expected_code = GenNopsAndBl(3u, kBlPlus0 | ((diff >> 1) & 0xffu)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code))); - EXPECT_TRUE(CheckThunk(thunk_offset)); -} - -TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) { - auto method1_raw_code = GenNopsAndBl(3u, kBlPlus0); - constexpr uint32_t bl_offset_in_method1 = 3u * 2u; // After NOPs. - ArrayRef<const uint8_t> method1_code(method1_raw_code); - ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), - }; - - constexpr uint32_t max_positive_disp = 16 * MB - 2u + 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(method1_code, - ArrayRef<const LinkerPatch>(method1_patches), - kNopCode, - ArrayRef<const LinkerPatch>(), - bl_offset_in_method1 + max_positive_disp); - ASSERT_FALSE(thunk_in_gap); // There should be no thunk. - - // Check linked code. - auto expected_code = GenNopsAndBl(3u, kBlPlusMax); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarBefore) { - auto method3_raw_code = GenNopsAndBl(2u, kBlPlus0); - constexpr uint32_t bl_offset_in_method3 = 2u * 2u; // After NOPs. - ArrayRef<const uint8_t> method3_code(method3_raw_code); - ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - const LinkerPatch method3_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), - }; - - constexpr uint32_t just_over_max_negative_disp = 16 * MB - 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(kNopCode, - ArrayRef<const LinkerPatch>(), - method3_code, - ArrayRef<const LinkerPatch>(method3_patches), - just_over_max_negative_disp - bl_offset_in_method3); - ASSERT_FALSE(thunk_in_gap); // There should be no thunk. - - // Check linked code. - auto expected_code = GenNopsAndBl(2u, kBlMinusMax); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) { - auto method1_raw_code = GenNopsAndBl(2u, kBlPlus0); - constexpr uint32_t bl_offset_in_method1 = 2u * 2u; // After NOPs. - ArrayRef<const uint8_t> method1_code(method1_raw_code); - ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), - }; - - constexpr uint32_t just_over_max_positive_disp = 16 * MB + 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(method1_code, - ArrayRef<const LinkerPatch>(method1_patches), - kNopCode, - ArrayRef<const LinkerPatch>(), - bl_offset_in_method1 + just_over_max_positive_disp); - ASSERT_TRUE(thunk_in_gap); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t method3_offset = GetMethodOffset(3u); - ASSERT_TRUE(IsAligned<kArmAlignment>(method3_offset)); - uint32_t method3_header_offset = method3_offset - sizeof(OatQuickMethodHeader); - uint32_t thunk_size = MethodCallThunkSize(); - uint32_t thunk_offset = RoundDown(method3_header_offset - thunk_size, kArmAlignment); - DCHECK_EQ(thunk_offset + thunk_size + CodeAlignmentSize(thunk_offset + thunk_size), - method3_header_offset); - ASSERT_TRUE(IsAligned<kArmAlignment>(thunk_offset)); - uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1 + 4u /* PC adjustment */); - ASSERT_EQ(diff & 1u, 0u); - ASSERT_GE(diff, 16 * MB - (1u << 9)); // Simple encoding, unknown bits fit into the low 8 bits. - auto expected_code = GenNopsAndBl(2u, 0xf3ffd700 | ((diff >> 1) & 0xffu)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); - CheckThunk(thunk_offset); -} - -TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) { - auto method3_raw_code = GenNopsAndBl(3u, kBlPlus0); - constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs. - ArrayRef<const uint8_t> method3_code(method3_raw_code); - ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - const LinkerPatch method3_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), - }; - - constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(kNopCode, - ArrayRef<const LinkerPatch>(), - method3_code, - ArrayRef<const LinkerPatch>(method3_patches), - just_over_max_negative_disp - bl_offset_in_method3); - ASSERT_FALSE(thunk_in_gap); // There should be a thunk but it should be after the method2. - - // Check linked code. - uint32_t method3_offset = GetMethodOffset(3u); - uint32_t thunk_offset = CompiledCode::AlignCode(method3_offset + method3_code.size(), - InstructionSet::kThumb2); - uint32_t diff = thunk_offset - (method3_offset + bl_offset_in_method3 + 4u /* PC adjustment */); - ASSERT_EQ(diff & 1u, 0u); - ASSERT_LT(diff >> 1, 1u << 8); // Simple encoding, (diff >> 1) fits into 8 bits. - auto expected_code = GenNopsAndBl(3u, kBlPlus0 | ((diff >> 1) & 0xffu)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code))); - EXPECT_TRUE(CheckThunk(thunk_offset)); -} - -TEST_F(Thumb2RelativePatcherTest, StringBssEntry1) { - TestStringBssEntry(0x00ff0000u, 0x00fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -TEST_F(Thumb2RelativePatcherTest, StringBssEntry2) { - TestStringBssEntry(0x02ff0000u, 0x05fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -TEST_F(Thumb2RelativePatcherTest, StringBssEntry3) { - TestStringBssEntry(0x08ff0000u, 0x08fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -TEST_F(Thumb2RelativePatcherTest, StringBssEntry4) { - TestStringBssEntry(0xd0ff0000u, 0x60fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -TEST_F(Thumb2RelativePatcherTest, StringReference1) { - TestStringReference(0x00ff00fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -TEST_F(Thumb2RelativePatcherTest, StringReference2) { - TestStringReference(0x02ff05fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -TEST_F(Thumb2RelativePatcherTest, StringReference3) { - TestStringReference(0x08ff08fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -TEST_F(Thumb2RelativePatcherTest, StringReference4) { - TestStringReference(0xd0ff60fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref_reg) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - 8, 9, 10, 11, // IP, SP, LR and PC are reserved. - }; - DCHECK_ALIGNED(offset, 4u); - DCHECK_LT(offset, 4 * KB); - constexpr size_t kMethodCodeSize = 8u; - constexpr size_t kLiteralOffset = 0u; - uint32_t method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - for (uint32_t holder_reg : valid_regs) { - uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12); - const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - base_reg, holder_reg, /* narrow */ false); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), - }; - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); - method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - for (uint32_t holder_reg : valid_regs) { - ++method_idx; - uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); - uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12); - const std::vector<uint8_t> expected_code = RawCode({bne, ldr}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne; - ASSERT_TRUE( - CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = - CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ false); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - size_t gray_check_offset = thunk_offset; - if (holder_reg == base_reg) { - // Verify that the null-check uses the correct register, i.e. holder_reg. - if (holder_reg < 8) { - ASSERT_GE(output_.size() - gray_check_offset, 2u); - ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); - gray_check_offset +=2u; - } else { - ASSERT_GE(output_.size() - gray_check_offset, 6u); - ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); - ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ - gray_check_offset += 6u; - } - } - // Verify that the lock word for gray bit check is loaded from the holder address. - ASSERT_GE(output_.size() - gray_check_offset, - 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); - const uint32_t load_lock_word = - kLdrWInsn | - (holder_reg << 16) | - (/* IP */ 12 << 12) | - mirror::Object::MonitorOffset().Uint32Value(); - ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset)); - // Verify the gray bit check. - DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. - uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); - const uint32_t tst_gray_bit_without_offset = - 0xf0100f00 | (/* IP */ 12 << 16) - | (((ror_shift >> 4) & 1) << 26) // i - | (((ror_shift >> 1) & 7) << 12) // imm3 - | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). - EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u)); - EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE - // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset"). - const uint32_t fake_dependency = - 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) - (/* IP */ 12) | // Rm = IP - (base_reg << 16) | // Rn = base_reg - (base_reg << 8); // Rd = base_reg - EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u)); - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); - } - } -} - -void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - 8, 9, 10, 11, // IP, SP, LR and PC are reserved. - }; - DCHECK_ALIGNED(offset, 4u); - DCHECK_LT(offset, 32u); - constexpr size_t kMethodCodeSize = 6u; - constexpr size_t kLiteralOffset = 0u; - uint32_t method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - if (base_reg >= 8u) { - continue; - } - for (uint32_t holder_reg : valid_regs) { - uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg; - const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - base_reg, holder_reg, /* narrow */ true); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), - }; - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); - method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - if (base_reg >= 8u) { - continue; - } - for (uint32_t holder_reg : valid_regs) { - ++method_idx; - uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); - uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg; - const std::vector<uint8_t> expected_code = RawCode({bne, ldr}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne; - ASSERT_TRUE( - CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = - CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ true); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - size_t gray_check_offset = thunk_offset; - if (holder_reg == base_reg) { - // Verify that the null-check uses the correct register, i.e. holder_reg. - if (holder_reg < 8) { - ASSERT_GE(output_.size() - gray_check_offset, 2u); - ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); - gray_check_offset +=2u; - } else { - ASSERT_GE(output_.size() - gray_check_offset, 6u); - ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); - ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ - gray_check_offset += 6u; - } - } - // Verify that the lock word for gray bit check is loaded from the holder address. - ASSERT_GE(output_.size() - gray_check_offset, - 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); - const uint32_t load_lock_word = - kLdrWInsn | - (holder_reg << 16) | - (/* IP */ 12 << 12) | - mirror::Object::MonitorOffset().Uint32Value(); - ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset)); - // Verify the gray bit check. - DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. - uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); - const uint32_t tst_gray_bit_without_offset = - 0xf0100f00 | (/* IP */ 12 << 16) - | (((ror_shift >> 4) & 1) << 26) // i - | (((ror_shift >> 1) & 7) << 12) // imm3 - | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). - EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u)); - EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE - // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset"). - const uint32_t fake_dependency = - 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) - (/* IP */ 12) | // Rm = IP - (base_reg << 16) | // Rn = base_reg - (base_reg << 8); // Rd = base_reg - EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u)); - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); - } - } -} - -#define TEST_BAKER_FIELD_WIDE(offset, ref_reg) \ - TEST_F(Thumb2RelativePatcherTest, \ - BakerOffsetWide##offset##_##ref_reg) { \ - TestBakerFieldWide(offset, ref_reg); \ - } - -TEST_BAKER_FIELD_WIDE(/* offset */ 0, /* ref_reg */ 0) -TEST_BAKER_FIELD_WIDE(/* offset */ 8, /* ref_reg */ 3) -TEST_BAKER_FIELD_WIDE(/* offset */ 28, /* ref_reg */ 7) -TEST_BAKER_FIELD_WIDE(/* offset */ 0xffc, /* ref_reg */ 11) - -#define TEST_BAKER_FIELD_NARROW(offset, ref_reg) \ - TEST_F(Thumb2RelativePatcherTest, \ - BakerOffsetNarrow##offset##_##ref_reg) { \ - TestBakerFieldNarrow(offset, ref_reg); \ - } - -TEST_BAKER_FIELD_NARROW(/* offset */ 0, /* ref_reg */ 0) -TEST_BAKER_FIELD_NARROW(/* offset */ 8, /* ref_reg */ 3) -TEST_BAKER_FIELD_NARROW(/* offset */ 28, /* ref_reg */ 7) - -TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) { - // One thunk in the middle with maximum distance branches to it from both sides. - // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. - constexpr uint32_t kLiteralOffset1 = 6u; - const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); - ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); - const LinkerPatch patches1[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), - }; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); - - constexpr uint32_t expected_thunk_offset = - kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u); - static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); - size_t filler1_size = expected_thunk_offset - - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); - std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); - ArrayRef<const uint8_t> filler1_code(raw_filler1_code); - AddCompiledMethod(MethodRef(2u), filler1_code); - - // Enforce thunk reservation with a tiny method. - AddCompiledMethod(MethodRef(3u), kNopCode); - - constexpr uint32_t kLiteralOffset2 = 4; - static_assert(IsAligned<kArmAlignment>(kLiteralOffset2 + kPcAdjustment), - "PC for BNE must be aligned."); - - // Allow reaching the thunk from the very beginning of a method almost 1MiB away. Backward branch - // reaches the full 1MiB but we need to take PC adjustment into account. Things to subtract: - // - thunk size and method 3 pre-header, rounded up (padding in between if needed) - // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) - // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). - size_t thunk_size = - CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size(); - size_t filler2_size = - 1 * MB - (kLiteralOffset2 + kPcAdjustment) - - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) - - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment) - - sizeof(OatQuickMethodHeader); - std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u); - ArrayRef<const uint8_t> filler2_code(raw_filler2_code); - AddCompiledMethod(MethodRef(4u), filler2_code); - - const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn}); - ArrayRef<const uint8_t> code2(raw_code2); - const LinkerPatch patches2[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), - }; - AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); - - Link(); - - uint32_t first_method_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(5u); - EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); - - const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff; - const uint32_t bne_max_backward = kBneWPlus0 | 0x04000000; - const std::vector<uint8_t> expected_code1 = - RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn}); - const std::vector<uint8_t> expected_code2 = RawCode({kNopWInsn, bne_max_backward, kLdrWInsn}); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); -} - -TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkBeforeFiller) { - // Based on the first part of BakerOffsetThunkInTheMiddle but the BNE is one instruction - // earlier, so the thunk is emitted before the filler. - // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. - constexpr uint32_t kLiteralOffset1 = 4u; - const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn}); - ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); - const LinkerPatch patches1[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), - }; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); - - constexpr uint32_t expected_thunk_offset = - kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement + 2 */ (1u << 20); - static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); - size_t filler1_size = expected_thunk_offset - - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); - std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); - ArrayRef<const uint8_t> filler1_code(raw_filler1_code); - AddCompiledMethod(MethodRef(2u), filler1_code); - - Link(); - - const uint32_t bne = BneWWithOffset(kLiteralOffset1, RoundUp(raw_code1.size(), kArmAlignment)); - const std::vector<uint8_t> expected_code1 = RawCode({kNopWInsn, bne, kLdrWInsn, kNopInsn}); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); -} - -TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast) { - // Based on the BakerOffsetThunkInTheMiddle but the BNE in the last method is preceded - // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end. - // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. - constexpr uint32_t kLiteralOffset1 = 6u; - const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); - ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); - const LinkerPatch patches1[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), - }; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); - - constexpr uint32_t expected_thunk_offset = - kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u); - static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); - size_t filler1_size = expected_thunk_offset - - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); - std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); - ArrayRef<const uint8_t> filler1_code(raw_filler1_code); - AddCompiledMethod(MethodRef(2u), filler1_code); - - // Enforce thunk reservation with a tiny method. - AddCompiledMethod(MethodRef(3u), kNopCode); - - constexpr uint32_t kReachableFromOffset2 = 4; - constexpr uint32_t kLiteralOffset2 = kReachableFromOffset2 + 2; - static_assert(IsAligned<kArmAlignment>(kReachableFromOffset2 + kPcAdjustment), - "PC for BNE must be aligned."); - - // If not for the extra NOP, this would allow reaching the thunk from the BNE - // of a method 1MiB away. Backward branch reaches the full 1MiB but we need to take - // PC adjustment into account. Things to subtract: - // - thunk size and method 3 pre-header, rounded up (padding in between if needed) - // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) - // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). - size_t thunk_size = - CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size(); - size_t filler2_size = - 1 * MB - (kReachableFromOffset2 + kPcAdjustment) - - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) - - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment) - - sizeof(OatQuickMethodHeader); - std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u); - ArrayRef<const uint8_t> filler2_code(raw_filler2_code); - AddCompiledMethod(MethodRef(4u), filler2_code); - - // Extra 16-bit NOP compared to BakerOffsetThunkInTheMiddle. - const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); - ArrayRef<const uint8_t> code2(raw_code2); - const LinkerPatch patches2[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), - }; - AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); - - Link(); - - uint32_t first_method_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(5u); - EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); - - const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff; - const uint32_t bne_last = - BneWWithOffset(kLiteralOffset2, RoundUp(raw_code2.size(), kArmAlignment)); - const std::vector<uint8_t> expected_code1 = - RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn}); - const std::vector<uint8_t> expected_code2 = - RawCode({kNopWInsn, kNopInsn, bne_last, kLdrWInsn}); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); -} - -TEST_F(Thumb2RelativePatcherTest, BakerArray) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - 8, 9, 10, 11, // IP, SP, LR and PC are reserved. - }; - auto ldr = [](uint32_t base_reg) { - uint32_t index_reg = (base_reg == 0u) ? 1u : 0u; - uint32_t ref_reg = (base_reg == 2) ? 3u : 2u; - return kLdrRegLsl2 | index_reg | (base_reg << 16) | (ref_reg << 12); - }; - constexpr size_t kMethodCodeSize = 8u; - constexpr size_t kLiteralOffset = 0u; - uint32_t method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - ++method_idx; - const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr(base_reg)}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch( - kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)), - }; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); - method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - ++method_idx; - uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); - const std::vector<uint8_t> expected_code = RawCode({bne, ldr(base_reg)}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - // Verify that the lock word for gray bit check is loaded from the correct address - // before the base_reg which points to the array data. - ASSERT_GE(output_.size() - thunk_offset, - 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); - int32_t data_offset = - mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); - int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset; - ASSERT_LT(offset, 0); - ASSERT_GT(offset, -256); - const uint32_t load_lock_word = - kLdrNegativeOffset | - (-offset & 0xffu) | - (base_reg << 16) | - (/* IP */ 12 << 12); - EXPECT_EQ(load_lock_word, GetOutputInsn32(thunk_offset)); - // Verify the gray bit check. - DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. - uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); - const uint32_t tst_gray_bit_without_offset = - 0xf0100f00 | (/* IP */ 12 << 16) - | (((ror_shift >> 4) & 1) << 26) // i - | (((ror_shift >> 1) & 7) << 12) // imm3 - | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). - EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(thunk_offset + 4u)); - EXPECT_EQ(0xd100u, GetOutputInsn16(thunk_offset + 8u) & 0xff00u); // BNE - // Verify the fake dependency. - const uint32_t fake_dependency = - 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) - (/* IP */ 12) | // Rm = IP - (base_reg << 16) | // Rn = base_reg - (base_reg << 8); // Rd = base_reg - EXPECT_EQ(fake_dependency, GetOutputInsn32(thunk_offset + 14u)); - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); - } -} - -TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - 8, 9, 10, 11, // IP, SP, LR and PC are reserved. - }; - constexpr size_t kMethodCodeSize = 8u; - constexpr size_t kLiteralOffset = 4u; - uint32_t method_idx = 0u; - for (uint32_t root_reg : valid_regs) { - ++method_idx; - uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12); - const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch( - kLiteralOffset, - Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ false)), - }; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); - method_idx = 0u; - for (uint32_t root_reg : valid_regs) { - ++method_idx; - uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); - uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12); - const std::vector<uint8_t> expected_code = RawCode({ldr, bne}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ false); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - // Verify that the fast-path null-check uses the correct register, i.e. root_reg. - if (root_reg < 8) { - ASSERT_GE(output_.size() - thunk_offset, 2u); - ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); - } else { - ASSERT_GE(output_.size() - thunk_offset, 6u); - ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); - ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ - } - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); - } -} - -TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - // Not appplicable to high registers. - }; - constexpr size_t kMethodCodeSize = 6u; - constexpr size_t kLiteralOffset = 2u; - uint32_t method_idx = 0u; - for (uint32_t root_reg : valid_regs) { - ++method_idx; - uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg; - const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch( - kLiteralOffset, - Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ true)), - }; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); - method_idx = 0u; - for (uint32_t root_reg : valid_regs) { - ++method_idx; - uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); - uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg; - const std::vector<uint8_t> expected_code = RawCode({ldr, bne}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ true); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. - ASSERT_GE(output_.size() - thunk_offset, 2u); - ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); - } -} - -TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) { - // Test 1MiB of patches to the same thunk to stress-test different large offsets. - // (The low bits are not that important but the location of the high bits is easy to get wrong.) - std::vector<uint8_t> code; - code.reserve(1 * MB); - const size_t num_patches = 1 * MB / 8u; - std::vector<LinkerPatch> patches; - patches.reserve(num_patches); - const uint32_t ldr = - kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12); - uint32_t encoded_data = - Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0, /* narrow */ false); - for (size_t i = 0; i != num_patches; ++i) { - PushBackInsn(&code, ldr); - PushBackInsn(&code, kBneWPlus0); - patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data)); - } - ASSERT_EQ(1 * MB, code.size()); - ASSERT_EQ(num_patches, patches.size()); - AddCompiledMethod(MethodRef(1u), - ArrayRef<const uint8_t>(code), - ArrayRef<const LinkerPatch>(patches)); - Link(); - - // The thunk is right after the method code. - DCHECK_ALIGNED(1 * MB, kArmAlignment); - std::vector<uint8_t> expected_code; - for (size_t i = 0; i != num_patches; ++i) { - PushBackInsn(&expected_code, ldr); - PushBackInsn(&expected_code, BneWWithOffset(8u * i + 4u, 1 * MB)); - patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data)); - } - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) { - // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());` - // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily - // hold when we're reserving thunks of different sizes. This test exposes the situation - // by using Baker thunks and a method call thunk. - - // Add a method call patch that can reach to method 1 offset + 16MiB. - uint32_t method_idx = 0u; - constexpr size_t kMethodCallLiteralOffset = 2u; - constexpr uint32_t kMissingMethodIdx = 2u; - const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0}); - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u), - }; - ArrayRef<const uint8_t> code1(raw_code1); - ++method_idx; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches)); - - // Skip kMissingMethodIdx. - ++method_idx; - ASSERT_EQ(kMissingMethodIdx, method_idx); - // Add a method with the right size that the method code for the next one starts 1MiB - // after code for method 1. - size_t filler_size = - 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment) - - sizeof(OatQuickMethodHeader); - std::vector<uint8_t> filler_code = GenNops(filler_size / 2u); - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); - // Add 14 methods with 1MiB code+header, making the code for the next method start 1MiB - // before the currently scheduled MaxNextOffset() for the method call thunk. - for (uint32_t i = 0; i != 14; ++i) { - filler_size = 1 * MB - sizeof(OatQuickMethodHeader); - filler_code = GenNops(filler_size / 2u); - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); - } - - // Add 2 Baker GC root patches to the last method, one that would allow the thunk at - // 1MiB + kArmAlignment, i.e. kArmAlignment after the method call thunk, and the - // second that needs it kArmAlignment after that. Given the size of the GC root thunk - // is more than the space required by the method call thunk plus kArmAlignment, - // this pushes the first GC root thunk's pending MaxNextOffset() before the method call - // thunk's pending MaxNextOffset() which needs to be adjusted. - ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment, - CompileBakerGcRootThunk(/* root_reg */ 0, /* narrow */ false).size()); - static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8"); - constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment; - constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment; - // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | (root_reg << 12)`. - const uint32_t ldr1 = kLdrWInsn | (/* root_reg */ 1 << 12); - const uint32_t ldr2 = kLdrWInsn | (/* root_reg */ 2 << 12); - const std::vector<uint8_t> last_method_raw_code = RawCode({ - kNopInsn, // Padding before first GC root read barrier. - ldr1, kBneWPlus0, // First GC root LDR with read barrier. - ldr2, kBneWPlus0, // Second GC root LDR with read barrier. - }); - uint32_t encoded_data1 = - Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1, /* narrow */ false); - uint32_t encoded_data2 = - Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2, /* narrow */ false); - const LinkerPatch last_method_patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1), - LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2), - }; - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), - ArrayRef<const uint8_t>(last_method_raw_code), - ArrayRef<const LinkerPatch>(last_method_patches)); - - // The main purpose of the test is to check that Link() does not cause a crash. - Link(); - - ASSERT_EQ(15 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u)); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc deleted file mode 100644 index 52a07965b92..00000000000 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ /dev/null @@ -1,683 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/arm64/relative_patcher_arm64.h" - -#include "arch/arm64/asm_support_arm64.h" -#include "arch/arm64/instruction_set_features_arm64.h" -#include "art_method.h" -#include "base/bit_utils.h" -#include "compiled_method-inl.h" -#include "driver/compiler_driver.h" -#include "entrypoints/quick/quick_entrypoints_enum.h" -#include "heap_poisoning.h" -#include "linker/linker_patch.h" -#include "linker/output_stream.h" -#include "lock_word.h" -#include "mirror/array-inl.h" -#include "mirror/object.h" -#include "oat.h" -#include "oat_quick_method_header.h" -#include "read_barrier.h" -#include "utils/arm64/assembler_arm64.h" - -namespace art { -namespace linker { - -namespace { - -// Maximum positive and negative displacement for method call measured from the patch location. -// (Signed 28 bit displacement with the last two bits 0 has range [-2^27, 2^27-4] measured from -// the ARM64 PC pointing to the BL.) -constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 27) - 4u; -constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 27); - -// Maximum positive and negative displacement for a conditional branch measured from the patch -// location. (Signed 21 bit displacement with the last two bits 0 has range [-2^20, 2^20-4] -// measured from the ARM64 PC pointing to the B.cond.) -constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 4u; -constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20); - -// The ADRP thunk for erratum 843419 is 2 instructions, i.e. 8 bytes. -constexpr uint32_t kAdrpThunkSize = 8u; - -inline bool IsAdrpPatch(const LinkerPatch& patch) { - switch (patch.GetType()) { - case LinkerPatch::Type::kCall: - case LinkerPatch::Type::kCallRelative: - case LinkerPatch::Type::kBakerReadBarrierBranch: - return false; - case LinkerPatch::Type::kMethodRelative: - case LinkerPatch::Type::kMethodBssEntry: - case LinkerPatch::Type::kTypeRelative: - case LinkerPatch::Type::kTypeClassTable: - case LinkerPatch::Type::kTypeBssEntry: - case LinkerPatch::Type::kStringRelative: - case LinkerPatch::Type::kStringInternTable: - case LinkerPatch::Type::kStringBssEntry: - return patch.LiteralOffset() == patch.PcInsnOffset(); - } -} - -inline uint32_t MaxExtraSpace(size_t num_adrp, size_t code_size) { - if (num_adrp == 0u) { - return 0u; - } - uint32_t alignment_bytes = - CompiledMethod::AlignCode(code_size, InstructionSet::kArm64) - code_size; - return kAdrpThunkSize * num_adrp + alignment_bytes; -} - -} // anonymous namespace - -Arm64RelativePatcher::Arm64RelativePatcher(RelativePatcherTargetProvider* provider, - const Arm64InstructionSetFeatures* features) - : ArmBaseRelativePatcher(provider, InstructionSet::kArm64), - fix_cortex_a53_843419_(features->NeedFixCortexA53_843419()), - reserved_adrp_thunks_(0u), - processed_adrp_thunks_(0u) { - if (fix_cortex_a53_843419_) { - adrp_thunk_locations_.reserve(16u); - current_method_thunks_.reserve(16u * kAdrpThunkSize); - } -} - -uint32_t Arm64RelativePatcher::ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) { - if (!fix_cortex_a53_843419_) { - DCHECK(adrp_thunk_locations_.empty()); - return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u); - } - - // Add thunks for previous method if any. - if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) { - size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_; - offset = CompiledMethod::AlignCode(offset, InstructionSet::kArm64) + - kAdrpThunkSize * num_adrp_thunks; - reserved_adrp_thunks_ = adrp_thunk_locations_.size(); - } - - // Count the number of ADRP insns as the upper bound on the number of thunks needed - // and use it to reserve space for other linker patches. - size_t num_adrp = 0u; - DCHECK(compiled_method != nullptr); - for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (IsAdrpPatch(patch)) { - ++num_adrp; - } - } - ArrayRef<const uint8_t> code = compiled_method->GetQuickCode(); - uint32_t max_extra_space = MaxExtraSpace(num_adrp, code.size()); - offset = ReserveSpaceInternal(offset, compiled_method, method_ref, max_extra_space); - if (num_adrp == 0u) { - return offset; - } - - // Now that we have the actual offset where the code will be placed, locate the ADRP insns - // that actually require the thunk. - uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader)); - uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size()); - DCHECK(compiled_method != nullptr); - for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (IsAdrpPatch(patch)) { - uint32_t patch_offset = quick_code_offset + patch.LiteralOffset(); - if (NeedsErratum843419Thunk(code, patch.LiteralOffset(), patch_offset)) { - adrp_thunk_locations_.emplace_back(patch_offset, thunk_offset); - thunk_offset += kAdrpThunkSize; - } - } - } - return offset; -} - -uint32_t Arm64RelativePatcher::ReserveSpaceEnd(uint32_t offset) { - if (!fix_cortex_a53_843419_) { - DCHECK(adrp_thunk_locations_.empty()); - } else { - // Add thunks for the last method if any. - if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) { - size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_; - offset = CompiledMethod::AlignCode(offset, InstructionSet::kArm64) + - kAdrpThunkSize * num_adrp_thunks; - reserved_adrp_thunks_ = adrp_thunk_locations_.size(); - } - } - return ArmBaseRelativePatcher::ReserveSpaceEnd(offset); -} - -uint32_t Arm64RelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) { - if (fix_cortex_a53_843419_) { - if (!current_method_thunks_.empty()) { - uint32_t aligned_offset = CompiledMethod::AlignCode(offset, InstructionSet::kArm64); - if (kIsDebugBuild) { - CHECK_ALIGNED(current_method_thunks_.size(), kAdrpThunkSize); - size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize; - CHECK_LE(num_thunks, processed_adrp_thunks_); - for (size_t i = 0u; i != num_thunks; ++i) { - const auto& entry = adrp_thunk_locations_[processed_adrp_thunks_ - num_thunks + i]; - CHECK_EQ(entry.second, aligned_offset + i * kAdrpThunkSize); - } - } - uint32_t aligned_code_delta = aligned_offset - offset; - if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) { - return 0u; - } - if (!WriteMiscThunk(out, ArrayRef<const uint8_t>(current_method_thunks_))) { - return 0u; - } - offset = aligned_offset + current_method_thunks_.size(); - current_method_thunks_.clear(); - } - } - return ArmBaseRelativePatcher::WriteThunks(out, offset); -} - -void Arm64RelativePatcher::PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, uint32_t - target_offset) { - DCHECK_LE(literal_offset + 4u, code->size()); - DCHECK_EQ(literal_offset & 3u, 0u); - DCHECK_EQ(patch_offset & 3u, 0u); - DCHECK_EQ(target_offset & 3u, 0u); - uint32_t displacement = CalculateMethodCallDisplacement(patch_offset, target_offset & ~1u); - DCHECK_EQ(displacement & 3u, 0u); - DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u); // 28-bit signed. - uint32_t insn = (displacement & 0x0fffffffu) >> 2; - insn |= 0x94000000; // BL - - // Check that we're just overwriting an existing BL. - DCHECK_EQ(GetInsn(code, literal_offset) & 0xfc000000u, 0x94000000u); - // Write the new BL. - SetInsn(code, literal_offset, insn); -} - -void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) { - DCHECK_EQ(patch_offset & 3u, 0u); - DCHECK_EQ(target_offset & 3u, 0u); - uint32_t literal_offset = patch.LiteralOffset(); - uint32_t insn = GetInsn(code, literal_offset); - uint32_t pc_insn_offset = patch.PcInsnOffset(); - uint32_t disp = target_offset - ((patch_offset - literal_offset + pc_insn_offset) & ~0xfffu); - bool wide = (insn & 0x40000000) != 0; - uint32_t shift = wide ? 3u : 2u; - if (literal_offset == pc_insn_offset) { - // Check it's an ADRP with imm == 0 (unset). - DCHECK_EQ((insn & 0xffffffe0u), 0x90000000u) - << literal_offset << ", " << pc_insn_offset << ", 0x" << std::hex << insn; - if (fix_cortex_a53_843419_ && processed_adrp_thunks_ != adrp_thunk_locations_.size() && - adrp_thunk_locations_[processed_adrp_thunks_].first == patch_offset) { - DCHECK(NeedsErratum843419Thunk(ArrayRef<const uint8_t>(*code), - literal_offset, patch_offset)); - uint32_t thunk_offset = adrp_thunk_locations_[processed_adrp_thunks_].second; - uint32_t adrp_disp = target_offset - (thunk_offset & ~0xfffu); - uint32_t adrp = PatchAdrp(insn, adrp_disp); - - uint32_t out_disp = thunk_offset - patch_offset; - DCHECK_EQ(out_disp & 3u, 0u); - DCHECK((out_disp >> 27) == 0u || (out_disp >> 27) == 31u); // 28-bit signed. - insn = (out_disp & 0x0fffffffu) >> shift; - insn |= 0x14000000; // B <thunk> - - uint32_t back_disp = -out_disp; - DCHECK_EQ(back_disp & 3u, 0u); - DCHECK((back_disp >> 27) == 0u || (back_disp >> 27) == 31u); // 28-bit signed. - uint32_t b_back = (back_disp & 0x0fffffffu) >> 2; - b_back |= 0x14000000; // B <back> - size_t thunks_code_offset = current_method_thunks_.size(); - current_method_thunks_.resize(thunks_code_offset + kAdrpThunkSize); - SetInsn(¤t_method_thunks_, thunks_code_offset, adrp); - SetInsn(¤t_method_thunks_, thunks_code_offset + 4u, b_back); - static_assert(kAdrpThunkSize == 2 * 4u, "thunk has 2 instructions"); - - processed_adrp_thunks_ += 1u; - } else { - insn = PatchAdrp(insn, disp); - } - // Write the new ADRP (or B to the erratum 843419 thunk). - SetInsn(code, literal_offset, insn); - } else { - if ((insn & 0xfffffc00) == 0x91000000) { - // ADD immediate, 64-bit with imm12 == 0 (unset). - if (!kEmitCompilerReadBarrier) { - DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative || - patch.GetType() == LinkerPatch::Type::kTypeRelative || - patch.GetType() == LinkerPatch::Type::kStringRelative) << patch.GetType(); - } else { - // With the read barrier (non-Baker) enabled, it could be kStringBssEntry or kTypeBssEntry. - DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative || - patch.GetType() == LinkerPatch::Type::kTypeRelative || - patch.GetType() == LinkerPatch::Type::kStringRelative || - patch.GetType() == LinkerPatch::Type::kTypeBssEntry || - patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType(); - } - shift = 0u; // No shift for ADD. - } else { - // LDR/STR 32-bit or 64-bit with imm12 == 0 (unset). - DCHECK(patch.GetType() == LinkerPatch::Type::kMethodBssEntry || - patch.GetType() == LinkerPatch::Type::kTypeClassTable || - patch.GetType() == LinkerPatch::Type::kTypeBssEntry || - patch.GetType() == LinkerPatch::Type::kStringInternTable || - patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType(); - DCHECK_EQ(insn & 0xbfbffc00, 0xb9000000) << std::hex << insn; - } - if (kIsDebugBuild) { - uint32_t adrp = GetInsn(code, pc_insn_offset); - if ((adrp & 0x9f000000u) != 0x90000000u) { - CHECK(fix_cortex_a53_843419_); - CHECK_EQ(adrp & 0xfc000000u, 0x14000000u); // B <thunk> - CHECK_ALIGNED(current_method_thunks_.size(), kAdrpThunkSize); - size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize; - CHECK_LE(num_thunks, processed_adrp_thunks_); - uint32_t b_offset = patch_offset - literal_offset + pc_insn_offset; - for (size_t i = processed_adrp_thunks_ - num_thunks; ; ++i) { - CHECK_NE(i, processed_adrp_thunks_); - if (adrp_thunk_locations_[i].first == b_offset) { - size_t idx = num_thunks - (processed_adrp_thunks_ - i); - adrp = GetInsn(¤t_method_thunks_, idx * kAdrpThunkSize); - break; - } - } - } - CHECK_EQ(adrp & 0x9f00001fu, // Check that pc_insn_offset points - 0x90000000 | ((insn >> 5) & 0x1fu)); // to ADRP with matching register. - } - uint32_t imm12 = (disp & 0xfffu) >> shift; - insn = (insn & ~(0xfffu << 10)) | (imm12 << 10); - SetInsn(code, literal_offset, insn); - } -} - -void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) { - DCHECK_ALIGNED(patch_offset, 4u); - uint32_t literal_offset = patch.LiteralOffset(); - DCHECK_ALIGNED(literal_offset, 4u); - DCHECK_LT(literal_offset, code->size()); - uint32_t insn = GetInsn(code, literal_offset); - DCHECK_EQ(insn & 0xffffffe0u, 0xb5000000); // CBNZ Xt, +0 (unpatched) - ThunkKey key = GetBakerThunkKey(patch); - if (kIsDebugBuild) { - const uint32_t encoded_data = key.GetCustomValue1(); - BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); - // Check that the next instruction matches the expected LDR. - switch (kind) { - case BakerReadBarrierKind::kField: { - DCHECK_GE(code->size() - literal_offset, 8u); - uint32_t next_insn = GetInsn(code, literal_offset + 4u); - // LDR (immediate) with correct base_reg. - CheckValidReg(next_insn & 0x1fu); // Check destination register. - const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5)); - break; - } - case BakerReadBarrierKind::kArray: { - DCHECK_GE(code->size() - literal_offset, 8u); - uint32_t next_insn = GetInsn(code, literal_offset + 4u); - // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL), - // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2]. - CheckValidReg(next_insn & 0x1fu); // Check destination register. - const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5)); - CheckValidReg((next_insn >> 16) & 0x1f); // Check index register - break; - } - case BakerReadBarrierKind::kGcRoot: { - DCHECK_GE(literal_offset, 4u); - uint32_t prev_insn = GetInsn(code, literal_offset - 4u); - // LDR (immediate) with correct root_reg. - const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); - break; - } - default: - LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); - UNREACHABLE(); - } - } - uint32_t target_offset = GetThunkTargetOffset(key, patch_offset); - DCHECK_ALIGNED(target_offset, 4u); - uint32_t disp = target_offset - patch_offset; - DCHECK((disp >> 20) == 0u || (disp >> 20) == 4095u); // 21-bit signed. - insn |= (disp << (5 - 2)) & 0x00ffffe0u; // Shift bits 2-20 to 5-23. - SetInsn(code, literal_offset, insn); -} - -#define __ assembler.GetVIXLAssembler()-> - -static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, - vixl::aarch64::Register base_reg, - vixl::aarch64::MemOperand& lock_word, - vixl::aarch64::Label* slow_path) { - using namespace vixl::aarch64; // NOLINT(build/namespaces) - // Load the lock word containing the rb_state. - __ Ldr(ip0.W(), lock_word); - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path); - static_assert( - BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET, - "Field and array LDR offsets must be the same to reuse the same code."); - // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning). - static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Field LDR must be 1 instruction (4B) before the return address label; " - " 2 instructions (8B) for heap poisoning."); - __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); - // Introduce a dependency on the lock_word including rb_state, - // to prevent load-load reordering, and without using - // a memory barrier (which would be more expensive). - __ Add(base_reg, base_reg, Operand(ip0, LSR, 32)); - __ Br(lr); // And return back to the function. - // Note: The fake dependency is unnecessary for the slow path. -} - -// Load the read barrier introspection entrypoint in register `entrypoint`. -static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler, - vixl::aarch64::Register entrypoint) { - using vixl::aarch64::MemOperand; - using vixl::aarch64::ip0; - // Thread Register. - const vixl::aarch64::Register tr = vixl::aarch64::x19; - - // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. - DCHECK_EQ(ip0.GetCode(), 16u); - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); - __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); -} - -void Arm64RelativePatcher::CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, - uint32_t encoded_data) { - using namespace vixl::aarch64; // NOLINT(build/namespaces) - BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); - switch (kind) { - case BakerReadBarrierKind::kField: { - // Check if the holder is gray and, if not, add fake dependency to the base register - // and return to the LDR instruction to load the reference. Otherwise, use introspection - // to load the reference and call the entrypoint (in IP1) that performs further checks - // on the reference and marks it if needed. - auto base_reg = - Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); - CheckValidReg(base_reg.GetCode()); - auto holder_reg = - Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data)); - CheckValidReg(holder_reg.GetCode()); - UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); - temps.Exclude(ip0, ip1); - // If base_reg differs from holder_reg, the offset was too large and we must have - // emitted an explicit null check before the load. Otherwise, we need to null-check - // the holder as we do not necessarily do that check before going to the thunk. - vixl::aarch64::Label throw_npe; - if (holder_reg.Is(base_reg)) { - __ Cbz(holder_reg.W(), &throw_npe); - } - vixl::aarch64::Label slow_path; - MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); - EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); - __ Bind(&slow_path); - MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); - __ Ldr(ip0.W(), ldr_address); // Load the LDR (immediate) unsigned offset. - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); - __ Ubfx(ip0.W(), ip0.W(), 10, 12); // Extract the offset. - __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2)); // Load the reference. - // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. - __ Br(ip1); // Jump to the entrypoint. - if (holder_reg.Is(base_reg)) { - // Add null check slow path. The stack map is at the address pointed to by LR. - __ Bind(&throw_npe); - int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value(); - __ Ldr(ip0, MemOperand(/* Thread* */ vixl::aarch64::x19, offset)); - __ Br(ip0); - } - break; - } - case BakerReadBarrierKind::kArray: { - auto base_reg = - Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); - CheckValidReg(base_reg.GetCode()); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); - temps.Exclude(ip0, ip1); - vixl::aarch64::Label slow_path; - int32_t data_offset = - mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); - MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); - DCHECK_LT(lock_word.GetOffset(), 0); - EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); - __ Bind(&slow_path); - MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); - __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset. - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); - __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set). - __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create - // a switch case target based on the index register. - __ Mov(ip0, base_reg); // Move the base register to ip0. - __ Br(ip1); // Jump to the entrypoint's array switch case. - break; - } - case BakerReadBarrierKind::kGcRoot: { - // Check if the reference needs to be marked and if so (i.e. not null, not marked yet - // and it does not have a forwarding address), call the correct introspection entrypoint; - // otherwise return the reference (or the extracted forwarding address). - // There is no gray bit check for GC roots. - auto root_reg = - Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); - CheckValidReg(root_reg.GetCode()); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); - temps.Exclude(ip0, ip1); - vixl::aarch64::Label return_label, not_marked, forwarding_address; - __ Cbz(root_reg, &return_label); - MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value()); - __ Ldr(ip0.W(), lock_word); - __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, ¬_marked); - __ Bind(&return_label); - __ Br(lr); - __ Bind(¬_marked); - __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1)); - __ B(&forwarding_address, mi); - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); - // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to - // art_quick_read_barrier_mark_introspection_gc_roots. - __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET)); - __ Mov(ip0.W(), root_reg); - __ Br(ip1); - __ Bind(&forwarding_address); - __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift); - __ Br(lr); - break; - } - default: - LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); - UNREACHABLE(); - } -} - -std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - arm64::Arm64Assembler assembler(&allocator); - - switch (key.GetType()) { - case ThunkType::kMethodCall: { - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. - Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArm64PointerSize).Int32Value()); - assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); - break; - } - case ThunkType::kBakerReadBarrier: { - CompileBakerReadBarrierThunk(assembler, key.GetCustomValue1()); - break; - } - } - - // Ensure we emit the literal pool. - assembler.FinalizeCode(); - std::vector<uint8_t> thunk_code(assembler.CodeSize()); - MemoryRegion code(thunk_code.data(), thunk_code.size()); - assembler.FinalizeInstructions(code); - return thunk_code; -} - -std::string Arm64RelativePatcher::GetThunkDebugName(const ThunkKey& key) { - switch (key.GetType()) { - case ThunkType::kMethodCall: - return "MethodCallThunk"; - - case ThunkType::kBakerReadBarrier: { - uint32_t encoded_data = key.GetCustomValue1(); - BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); - std::ostringstream oss; - oss << "BakerReadBarrierThunk"; - switch (kind) { - case BakerReadBarrierKind::kField: - oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) - << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); - break; - case BakerReadBarrierKind::kArray: - oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - break; - case BakerReadBarrierKind::kGcRoot: - oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - break; - } - return oss.str(); - } - } -} - -#undef __ - -uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) { - switch (key.GetType()) { - case ThunkType::kMethodCall: - return kMaxMethodCallPositiveDisplacement; - case ThunkType::kBakerReadBarrier: - return kMaxBcondPositiveDisplacement; - } -} - -uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) { - switch (key.GetType()) { - case ThunkType::kMethodCall: - return kMaxMethodCallNegativeDisplacement; - case ThunkType::kBakerReadBarrier: - return kMaxBcondNegativeDisplacement; - } -} - -uint32_t Arm64RelativePatcher::PatchAdrp(uint32_t adrp, uint32_t disp) { - return (adrp & 0x9f00001fu) | // Clear offset bits, keep ADRP with destination reg. - // Bottom 12 bits are ignored, the next 2 lowest bits are encoded in bits 29-30. - ((disp & 0x00003000u) << (29 - 12)) | - // The next 16 bits are encoded in bits 5-22. - ((disp & 0xffffc000u) >> (12 + 2 - 5)) | - // Since the target_offset is based on the beginning of the oat file and the - // image space precedes the oat file, the target_offset into image space will - // be negative yet passed as uint32_t. Therefore we limit the displacement - // to +-2GiB (rather than the maximim +-4GiB) and determine the sign bit from - // the highest bit of the displacement. This is encoded in bit 23. - ((disp & 0x80000000u) >> (31 - 23)); -} - -bool Arm64RelativePatcher::NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, - uint32_t literal_offset, - uint32_t patch_offset) { - DCHECK_EQ(patch_offset & 0x3u, 0u); - if ((patch_offset & 0xff8) == 0xff8) { // ...ff8 or ...ffc - uint32_t adrp = GetInsn(code, literal_offset); - DCHECK_EQ(adrp & 0x9f000000, 0x90000000); - uint32_t next_offset = patch_offset + 4u; - uint32_t next_insn = GetInsn(code, literal_offset + 4u); - - // Below we avoid patching sequences where the adrp is followed by a load which can easily - // be proved to be aligned. - - // First check if the next insn is the LDR using the result of the ADRP. - // LDR <Wt>, [<Xn>, #pimm], where <Xn> == ADRP destination reg. - if ((next_insn & 0xffc00000) == 0xb9400000 && - (((next_insn >> 5) ^ adrp) & 0x1f) == 0) { - return false; - } - - // And since LinkerPatch::Type::k{Method,Type,String}Relative is using the result - // of the ADRP for an ADD immediate, check for that as well. We generalize a bit - // to include ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination - // or stores the result to a different register. - if ((next_insn & 0x1f000000) == 0x11000000 && - ((((next_insn >> 5) ^ adrp) & 0x1f) == 0 || ((next_insn ^ adrp) & 0x1f) != 0)) { - return false; - } - - // LDR <Wt>, <label> is always aligned and thus it doesn't cause boundary crossing. - if ((next_insn & 0xff000000) == 0x18000000) { - return false; - } - - // LDR <Xt>, <label> is aligned iff the pc + displacement is a multiple of 8. - if ((next_insn & 0xff000000) == 0x58000000) { - bool is_aligned_load = (((next_offset >> 2) ^ (next_insn >> 5)) & 1) == 0; - return !is_aligned_load; - } - - // LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned loads, as SP is - // guaranteed to be 128-bits aligned and <pimm> is multiple of the load size. - if ((next_insn & 0xbfc003e0) == 0xb94003e0) { - return false; - } - return true; - } - return false; -} - -void Arm64RelativePatcher::SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { - DCHECK_LE(offset + 4u, code->size()); - DCHECK_EQ(offset & 3u, 0u); - uint8_t* addr = &(*code)[offset]; - addr[0] = (value >> 0) & 0xff; - addr[1] = (value >> 8) & 0xff; - addr[2] = (value >> 16) & 0xff; - addr[3] = (value >> 24) & 0xff; -} - -uint32_t Arm64RelativePatcher::GetInsn(ArrayRef<const uint8_t> code, uint32_t offset) { - DCHECK_LE(offset + 4u, code.size()); - DCHECK_EQ(offset & 3u, 0u); - const uint8_t* addr = &code[offset]; - return - (static_cast<uint32_t>(addr[0]) << 0) + - (static_cast<uint32_t>(addr[1]) << 8) + - (static_cast<uint32_t>(addr[2]) << 16)+ - (static_cast<uint32_t>(addr[3]) << 24); -} - -template <typename Alloc> -uint32_t Arm64RelativePatcher::GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset) { - return GetInsn(ArrayRef<const uint8_t>(*code), offset); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h deleted file mode 100644 index 8ba59976e7d..00000000000 --- a/compiler/linker/arm64/relative_patcher_arm64.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_ -#define ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_ - -#include "base/array_ref.h" -#include "base/bit_field.h" -#include "base/bit_utils.h" -#include "linker/arm/relative_patcher_arm_base.h" - -namespace art { - -namespace arm64 { -class Arm64Assembler; -} // namespace arm64 - -namespace linker { - -class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { - public: - static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) { - CheckValidReg(base_reg); - CheckValidReg(holder_reg); - return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | - BakerReadBarrierFirstRegField::Encode(base_reg) | - BakerReadBarrierSecondRegField::Encode(holder_reg); - } - - static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { - CheckValidReg(base_reg); - return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | - BakerReadBarrierFirstRegField::Encode(base_reg) | - BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); - } - - static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) { - CheckValidReg(root_reg); - return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | - BakerReadBarrierFirstRegField::Encode(root_reg) | - BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); - } - - Arm64RelativePatcher(RelativePatcherTargetProvider* provider, - const Arm64InstructionSetFeatures* features); - - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) OVERRIDE; - uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; - uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; - void PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) OVERRIDE; - - protected: - std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE; - std::string GetThunkDebugName(const ThunkKey& key) OVERRIDE; - uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE; - uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE; - - private: - static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u; - - enum class BakerReadBarrierKind : uint8_t { - kField, // Field get or array get with constant offset (i.e. constant index). - kArray, // Array get with index in register. - kGcRoot, // GC root load. - kLast = kGcRoot - }; - - static constexpr size_t kBitsForBakerReadBarrierKind = - MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); - static constexpr size_t kBitsForRegister = 5u; - using BakerReadBarrierKindField = - BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; - using BakerReadBarrierFirstRegField = - BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>; - using BakerReadBarrierSecondRegField = - BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; - - static void CheckValidReg(uint32_t reg) { - DCHECK(reg < 30u && reg != 16u && reg != 17u) << reg; - } - - void CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data); - - static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp); - - static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset, - uint32_t patch_offset); - void SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value); - static uint32_t GetInsn(ArrayRef<const uint8_t> code, uint32_t offset); - - template <typename Alloc> - static uint32_t GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset); - - const bool fix_cortex_a53_843419_; - // Map original patch_offset to thunk offset. - std::vector<std::pair<uint32_t, uint32_t>> adrp_thunk_locations_; - size_t reserved_adrp_thunks_; - size_t processed_adrp_thunks_; - std::vector<uint8_t> current_method_thunks_; - - friend class Arm64RelativePatcherTest; - - DISALLOW_COPY_AND_ASSIGN(Arm64RelativePatcher); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_ diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc deleted file mode 100644 index 05459a2a823..00000000000 --- a/compiler/linker/arm64/relative_patcher_arm64_test.cc +++ /dev/null @@ -1,1364 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/arm64/relative_patcher_arm64.h" - -#include "base/casts.h" -#include "linker/relative_patcher_test.h" -#include "lock_word.h" -#include "mirror/array-inl.h" -#include "mirror/object.h" -#include "oat_quick_method_header.h" - -namespace art { -namespace linker { - -class Arm64RelativePatcherTest : public RelativePatcherTest { - public: - explicit Arm64RelativePatcherTest(const std::string& variant) - : RelativePatcherTest(InstructionSet::kArm64, variant) { } - - protected: - static const uint8_t kCallRawCode[]; - static const ArrayRef<const uint8_t> kCallCode; - static const uint8_t kNopRawCode[]; - static const ArrayRef<const uint8_t> kNopCode; - - // NOP instruction. - static constexpr uint32_t kNopInsn = 0xd503201f; - - // All branches can be created from kBlPlus0 or kBPlus0 by adding the low 26 bits. - static constexpr uint32_t kBlPlus0 = 0x94000000u; - static constexpr uint32_t kBPlus0 = 0x14000000u; - - // Special BL values. - static constexpr uint32_t kBlPlusMax = 0x95ffffffu; - static constexpr uint32_t kBlMinusMax = 0x96000000u; - - // LDR immediate, 32-bit, unsigned offset. - static constexpr uint32_t kLdrWInsn = 0xb9400000u; - - // LDR register, 32-bit, LSL #2. - static constexpr uint32_t kLdrWLsl2Insn = 0xb8607800u; - - // LDUR, 32-bit. - static constexpr uint32_t kLdurWInsn = 0xb8400000u; - - // ADD/ADDS/SUB/SUBS immediate, 64-bit. - static constexpr uint32_t kAddXInsn = 0x91000000u; - static constexpr uint32_t kAddsXInsn = 0xb1000000u; - static constexpr uint32_t kSubXInsn = 0xd1000000u; - static constexpr uint32_t kSubsXInsn = 0xf1000000u; - - // LDUR x2, [sp, #4], i.e. unaligned load crossing 64-bit boundary (assuming aligned sp). - static constexpr uint32_t kLdurInsn = 0xf840405fu; - - // LDR w12, <label> and LDR x12, <label>. Bits 5-23 contain label displacement in 4-byte units. - static constexpr uint32_t kLdrWPcRelInsn = 0x1800000cu; - static constexpr uint32_t kLdrXPcRelInsn = 0x5800000cu; - - // LDR w13, [SP, #<pimm>] and LDR x13, [SP, #<pimm>]. Bits 10-21 contain displacement from SP - // in units of 4-bytes (for 32-bit load) or 8-bytes (for 64-bit load). - static constexpr uint32_t kLdrWSpRelInsn = 0xb94003edu; - static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu; - - // CBNZ x17, +0. Bits 5-23 are a placeholder for target offset from PC in units of 4-bytes. - static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011u; - - void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { - CHECK_LE(pos, code->size()); - const uint8_t insn_code[] = { - static_cast<uint8_t>(insn), - static_cast<uint8_t>(insn >> 8), - static_cast<uint8_t>(insn >> 16), - static_cast<uint8_t>(insn >> 24), - }; - static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code)."); - code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); - } - - void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) { - InsertInsn(code, code->size(), insn); - } - - std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) { - std::vector<uint8_t> raw_code; - raw_code.reserve(insns.size() * 4u); - for (uint32_t insn : insns) { - PushBackInsn(&raw_code, insn); - } - return raw_code; - } - - uint32_t Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code, - const ArrayRef<const LinkerPatch>& method1_patches, - const ArrayRef<const uint8_t>& last_method_code, - const ArrayRef<const LinkerPatch>& last_method_patches, - uint32_t distance_without_thunks) { - CHECK_EQ(distance_without_thunks % kArm64Alignment, 0u); - uint32_t method1_offset = - kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader); - AddCompiledMethod(MethodRef(1u), method1_code, method1_patches); - const uint32_t gap_start = method1_offset + method1_code.size(); - - // We want to put the method3 at a very precise offset. - const uint32_t last_method_offset = method1_offset + distance_without_thunks; - CHECK_ALIGNED(last_method_offset, kArm64Alignment); - const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader); - - // Fill the gap with intermediate methods in chunks of 2MiB and the first in [2MiB, 4MiB). - // (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB - // offsets by this test. Making the first chunk bigger makes it easy to give all intermediate - // methods the same alignment of the end, so the thunk insertion adds a predictable size as - // long as it's after the first chunk.) - uint32_t method_idx = 2u; - constexpr uint32_t kSmallChunkSize = 2 * MB; - std::vector<uint8_t> gap_code; - uint32_t gap_size = gap_end - gap_start; - uint32_t num_small_chunks = std::max(gap_size / kSmallChunkSize, 1u) - 1u; - uint32_t chunk_start = gap_start; - uint32_t chunk_size = gap_size - num_small_chunks * kSmallChunkSize; - for (uint32_t i = 0; i <= num_small_chunks; ++i) { // num_small_chunks+1 iterations. - uint32_t chunk_code_size = - chunk_size - CodeAlignmentSize(chunk_start) - sizeof(OatQuickMethodHeader); - gap_code.resize(chunk_code_size, 0u); - AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code)); - method_idx += 1u; - chunk_start += chunk_size; - chunk_size = kSmallChunkSize; // For all but the first chunk. - DCHECK_EQ(CodeAlignmentSize(gap_end), CodeAlignmentSize(chunk_start)); - } - - // Add the last method and link - AddCompiledMethod(MethodRef(method_idx), last_method_code, last_method_patches); - Link(); - - // Check assumptions. - CHECK_EQ(GetMethodOffset(1), method1_offset); - auto last_result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(last_result.first); - // There may be a thunk before method2. - if (last_result.second != last_method_offset) { - // Thunk present. Check that there's only one. - uint32_t thunk_end = - CompiledCode::AlignCode(gap_end, InstructionSet::kArm64) + MethodCallThunkSize(); - uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end); - CHECK_EQ(last_result.second, header_offset + sizeof(OatQuickMethodHeader)); - } - return method_idx; - } - - uint32_t GetMethodOffset(uint32_t method_idx) { - auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(result.first); - CHECK_ALIGNED(result.second, 4u); - return result.second; - } - - std::vector<uint8_t> CompileMethodCallThunk() { - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey(); - return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - uint32_t MethodCallThunkSize() { - return CompileMethodCallThunk().size(); - } - - bool CheckThunk(uint32_t thunk_offset) { - const std::vector<uint8_t> expected_code = CompileMethodCallThunk(); - if (output_.size() < thunk_offset + expected_code.size()) { - LOG(ERROR) << "output_.size() == " << output_.size() << " < " - << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size()); - return false; - } - ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size()); - if (linked_code == ArrayRef<const uint8_t>(expected_code)) { - return true; - } - // Log failure info. - DumpDiff(ArrayRef<const uint8_t>(expected_code), linked_code); - return false; - } - - std::vector<uint8_t> GenNops(size_t num_nops) { - std::vector<uint8_t> result; - result.reserve(num_nops * 4u); - for (size_t i = 0; i != num_nops; ++i) { - PushBackInsn(&result, kNopInsn); - } - return result; - } - - std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) { - std::vector<uint8_t> result; - result.reserve(num_nops * 4u + 4u); - for (size_t i = 0; i != num_nops; ++i) { - PushBackInsn(&result, kNopInsn); - } - PushBackInsn(&result, bl); - return result; - } - - std::vector<uint8_t> GenNopsAndAdrpAndUse(size_t num_nops, - uint32_t method_offset, - uint32_t target_offset, - uint32_t use_insn) { - std::vector<uint8_t> result; - result.reserve(num_nops * 4u + 8u); - for (size_t i = 0; i != num_nops; ++i) { - PushBackInsn(&result, kNopInsn); - } - CHECK_ALIGNED(method_offset, 4u); - CHECK_ALIGNED(target_offset, 4u); - uint32_t adrp_offset = method_offset + num_nops * 4u; - uint32_t disp = target_offset - (adrp_offset & ~0xfffu); - if (use_insn == kLdrWInsn) { - DCHECK_ALIGNED(disp, 1u << 2); - use_insn |= 1 | // LDR x1, [x0, #(imm12 << 2)] - ((disp & 0xfffu) << (10 - 2)); // imm12 = ((disp & 0xfffu) >> 2) is at bit 10. - } else if (use_insn == kAddXInsn) { - use_insn |= 1 | // ADD x1, x0, #imm - (disp & 0xfffu) << 10; // imm12 = (disp & 0xfffu) is at bit 10. - } else { - LOG(FATAL) << "Unexpected instruction: 0x" << std::hex << use_insn; - } - uint32_t adrp = 0x90000000u | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64) - ((disp & 0x3000u) << (29 - 12)) | // immlo = ((disp & 0x3000u) >> 12) is at bit 29, - ((disp & 0xffffc000) >> (14 - 5)) | // immhi = (disp >> 14) is at bit 5, - // We take the sign bit from the disp, limiting disp to +- 2GiB. - ((disp & 0x80000000) >> (31 - 23)); // sign bit in immhi is at bit 23. - PushBackInsn(&result, adrp); - PushBackInsn(&result, use_insn); - return result; - } - - std::vector<uint8_t> GenNopsAndAdrpLdr(size_t num_nops, - uint32_t method_offset, - uint32_t target_offset) { - return GenNopsAndAdrpAndUse(num_nops, method_offset, target_offset, kLdrWInsn); - } - - void TestNopsAdrpLdr(size_t num_nops, uint32_t bss_begin, uint32_t string_entry_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); - bss_begin_ = bss_begin; - auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched. - const LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex), - LinkerPatch::StringBssEntryPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), - ArrayRef<const uint8_t>(code), - ArrayRef<const LinkerPatch>(patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t target_offset = bss_begin_ + string_entry_offset; - auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); - } - - std::vector<uint8_t> GenNopsAndAdrpAdd(size_t num_nops, - uint32_t method_offset, - uint32_t target_offset) { - return GenNopsAndAdrpAndUse(num_nops, method_offset, target_offset, kAddXInsn); - } - - void TestNopsAdrpAdd(size_t num_nops, uint32_t string_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_offset); - auto code = GenNopsAndAdrpAdd(num_nops, 0u, 0u); // Unpatched. - const LinkerPatch patches[] = { - LinkerPatch::RelativeStringPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex), - LinkerPatch::RelativeStringPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), - ArrayRef<const uint8_t>(code), - ArrayRef<const LinkerPatch>(patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - auto expected_code = GenNopsAndAdrpAdd(num_nops, method1_offset, string_offset); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); - } - - void PrepareNopsAdrpInsn2Ldr(size_t num_nops, - uint32_t insn2, - uint32_t bss_begin, - uint32_t string_entry_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); - bss_begin_ = bss_begin; - auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched. - InsertInsn(&code, num_nops * 4u + 4u, insn2); - const LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex), - LinkerPatch::StringBssEntryPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), - ArrayRef<const uint8_t>(code), - ArrayRef<const LinkerPatch>(patches)); - Link(); - } - - void PrepareNopsAdrpInsn2Add(size_t num_nops, uint32_t insn2, uint32_t string_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_offset); - auto code = GenNopsAndAdrpAdd(num_nops, 0u, 0u); // Unpatched. - InsertInsn(&code, num_nops * 4u + 4u, insn2); - const LinkerPatch patches[] = { - LinkerPatch::RelativeStringPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex), - LinkerPatch::RelativeStringPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), - ArrayRef<const uint8_t>(code), - ArrayRef<const LinkerPatch>(patches)); - Link(); - } - - void TestNopsAdrpInsn2AndUse(size_t num_nops, - uint32_t insn2, - uint32_t target_offset, - uint32_t use_insn) { - uint32_t method1_offset = GetMethodOffset(1u); - auto expected_code = GenNopsAndAdrpAndUse(num_nops, method1_offset, target_offset, use_insn); - InsertInsn(&expected_code, num_nops * 4u + 4u, insn2); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); - } - - void TestNopsAdrpInsn2AndUseHasThunk(size_t num_nops, - uint32_t insn2, - uint32_t target_offset, - uint32_t use_insn) { - uint32_t method1_offset = GetMethodOffset(1u); - CHECK(!compiled_method_refs_.empty()); - CHECK_EQ(compiled_method_refs_[0].index, 1u); - CHECK_EQ(compiled_method_refs_.size(), compiled_methods_.size()); - uint32_t method1_size = compiled_methods_[0]->GetQuickCode().size(); - uint32_t thunk_offset = - CompiledCode::AlignCode(method1_offset + method1_size, InstructionSet::kArm64); - uint32_t b_diff = thunk_offset - (method1_offset + num_nops * 4u); - CHECK_ALIGNED(b_diff, 4u); - ASSERT_LT(b_diff, 128 * MB); - uint32_t b_out = kBPlus0 + ((b_diff >> 2) & 0x03ffffffu); - uint32_t b_in = kBPlus0 + ((-b_diff >> 2) & 0x03ffffffu); - - auto expected_code = GenNopsAndAdrpAndUse(num_nops, method1_offset, target_offset, use_insn); - InsertInsn(&expected_code, num_nops * 4u + 4u, insn2); - // Replace adrp with bl. - expected_code.erase(expected_code.begin() + num_nops * 4u, - expected_code.begin() + num_nops * 4u + 4u); - InsertInsn(&expected_code, num_nops * 4u, b_out); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); - - auto expected_thunk_code = GenNopsAndAdrpLdr(0u, thunk_offset, target_offset); - ASSERT_EQ(expected_thunk_code.size(), 8u); - expected_thunk_code.erase(expected_thunk_code.begin() + 4u, expected_thunk_code.begin() + 8u); - InsertInsn(&expected_thunk_code, 4u, b_in); - ASSERT_EQ(expected_thunk_code.size(), 8u); - - uint32_t thunk_size = MethodCallThunkSize(); - ASSERT_EQ(thunk_offset + thunk_size, output_.size()); - ASSERT_EQ(thunk_size, expected_thunk_code.size()); - ArrayRef<const uint8_t> thunk_code(&output_[thunk_offset], thunk_size); - if (ArrayRef<const uint8_t>(expected_thunk_code) != thunk_code) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk_code), thunk_code); - FAIL(); - } - } - - void TestAdrpInsn2Ldr(uint32_t insn2, - uint32_t adrp_offset, - bool has_thunk, - uint32_t bss_begin, - uint32_t string_entry_offset) { - uint32_t method1_offset = - kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader); - ASSERT_LT(method1_offset, adrp_offset); - CHECK_ALIGNED(adrp_offset, 4u); - uint32_t num_nops = (adrp_offset - method1_offset) / 4u; - PrepareNopsAdrpInsn2Ldr(num_nops, insn2, bss_begin, string_entry_offset); - uint32_t target_offset = bss_begin_ + string_entry_offset; - if (has_thunk) { - TestNopsAdrpInsn2AndUseHasThunk(num_nops, insn2, target_offset, kLdrWInsn); - } else { - TestNopsAdrpInsn2AndUse(num_nops, insn2, target_offset, kLdrWInsn); - } - ASSERT_EQ(method1_offset, GetMethodOffset(1u)); // If this fails, num_nops is wrong. - } - - void TestAdrpLdurLdr(uint32_t adrp_offset, - bool has_thunk, - uint32_t bss_begin, - uint32_t string_entry_offset) { - TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, bss_begin, string_entry_offset); - } - - void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn, - int32_t pcrel_disp, - uint32_t adrp_offset, - bool has_thunk, - uint32_t bss_begin, - uint32_t string_entry_offset) { - ASSERT_LT(pcrel_disp, 0x100000); - ASSERT_GE(pcrel_disp, -0x100000); - ASSERT_EQ(pcrel_disp & 0x3, 0); - uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5); - TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, bss_begin, string_entry_offset); - } - - void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn, - uint32_t sprel_disp_in_load_units, - uint32_t adrp_offset, - bool has_thunk, - uint32_t bss_begin, - uint32_t string_entry_offset) { - ASSERT_LT(sprel_disp_in_load_units, 0x1000u); - uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10); - TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, bss_begin, string_entry_offset); - } - - void TestAdrpInsn2Add(uint32_t insn2, - uint32_t adrp_offset, - bool has_thunk, - uint32_t string_offset) { - uint32_t method1_offset = - kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader); - ASSERT_LT(method1_offset, adrp_offset); - CHECK_ALIGNED(adrp_offset, 4u); - uint32_t num_nops = (adrp_offset - method1_offset) / 4u; - PrepareNopsAdrpInsn2Add(num_nops, insn2, string_offset); - if (has_thunk) { - TestNopsAdrpInsn2AndUseHasThunk(num_nops, insn2, string_offset, kAddXInsn); - } else { - TestNopsAdrpInsn2AndUse(num_nops, insn2, string_offset, kAddXInsn); - } - ASSERT_EQ(method1_offset, GetMethodOffset(1u)); // If this fails, num_nops is wrong. - } - - void TestAdrpLdurAdd(uint32_t adrp_offset, bool has_thunk, uint32_t string_offset) { - TestAdrpInsn2Add(kLdurInsn, adrp_offset, has_thunk, string_offset); - } - - void TestAdrpLdrPcRelAdd(uint32_t pcrel_ldr_insn, - int32_t pcrel_disp, - uint32_t adrp_offset, - bool has_thunk, - uint32_t string_offset) { - ASSERT_LT(pcrel_disp, 0x100000); - ASSERT_GE(pcrel_disp, -0x100000); - ASSERT_EQ(pcrel_disp & 0x3, 0); - uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5); - TestAdrpInsn2Add(insn2, adrp_offset, has_thunk, string_offset); - } - - void TestAdrpLdrSpRelAdd(uint32_t sprel_ldr_insn, - uint32_t sprel_disp_in_load_units, - uint32_t adrp_offset, - bool has_thunk, - uint32_t string_offset) { - ASSERT_LT(sprel_disp_in_load_units, 0x1000u); - uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10); - TestAdrpInsn2Add(insn2, adrp_offset, has_thunk, string_offset); - } - - std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) { - const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg)); - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); - return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) { - LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)); - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); - return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) { - LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)); - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); - return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - uint32_t GetOutputInsn(uint32_t offset) { - CHECK_LE(offset, output_.size()); - CHECK_GE(output_.size() - offset, 4u); - return (static_cast<uint32_t>(output_[offset]) << 0) | - (static_cast<uint32_t>(output_[offset + 1]) << 8) | - (static_cast<uint32_t>(output_[offset + 2]) << 16) | - (static_cast<uint32_t>(output_[offset + 3]) << 24); - } - - void TestBakerField(uint32_t offset, uint32_t ref_reg); -}; - -const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = { - 0x00, 0x00, 0x00, 0x94 -}; - -const ArrayRef<const uint8_t> Arm64RelativePatcherTest::kCallCode(kCallRawCode); - -const uint8_t Arm64RelativePatcherTest::kNopRawCode[] = { - 0x1f, 0x20, 0x03, 0xd5 -}; - -const ArrayRef<const uint8_t> Arm64RelativePatcherTest::kNopCode(kNopRawCode); - -class Arm64RelativePatcherTestDefault : public Arm64RelativePatcherTest { - public: - Arm64RelativePatcherTestDefault() : Arm64RelativePatcherTest("default") { } -}; - -class Arm64RelativePatcherTestDenver64 : public Arm64RelativePatcherTest { - public: - Arm64RelativePatcherTestDenver64() : Arm64RelativePatcherTest("denver64") { } -}; - -TEST_F(Arm64RelativePatcherTestDefault, CallSelf) { - const LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - const std::vector<uint8_t> expected_code = RawCode({kBlPlus0}); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Arm64RelativePatcherTestDefault, CallOther) { - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); - const LinkerPatch method2_patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t method2_offset = GetMethodOffset(2u); - uint32_t diff_after = method2_offset - method1_offset; - CHECK_ALIGNED(diff_after, 4u); - ASSERT_LT(diff_after >> 2, 1u << 8); // Simple encoding, (diff_after >> 2) fits into 8 bits. - const std::vector<uint8_t> method1_expected_code = RawCode({kBlPlus0 + (diff_after >> 2)}); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); - uint32_t diff_before = method1_offset - method2_offset; - CHECK_ALIGNED(diff_before, 4u); - ASSERT_GE(diff_before, -1u << 27); - auto method2_expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff_before >> 2) & 0x03ffffffu)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); -} - -TEST_F(Arm64RelativePatcherTestDefault, CallTrampoline) { - const LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t diff = kTrampolineOffset - method1_offset; - ASSERT_EQ(diff & 1u, 0u); - ASSERT_GE(diff, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0 (checked as unsigned). - auto expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff >> 2) & 0x03ffffffu)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Arm64RelativePatcherTestDefault, CallTrampolineTooFar) { - constexpr uint32_t missing_method_index = 1024u; - auto last_method_raw_code = GenNopsAndBl(1u, kBlPlus0); - constexpr uint32_t bl_offset_in_last_method = 1u * 4u; // After NOPs. - ArrayRef<const uint8_t> last_method_code(last_method_raw_code); - ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size()); - const LinkerPatch last_method_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, missing_method_index), - }; - - constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4; - uint32_t last_method_idx = Create2MethodsWithGap( - kNopCode, ArrayRef<const LinkerPatch>(), last_method_code, - ArrayRef<const LinkerPatch>(last_method_patches), - just_over_max_negative_disp - bl_offset_in_last_method); - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(last_method_idx); - ASSERT_EQ(method1_offset, - last_method_offset + bl_offset_in_last_method - just_over_max_negative_disp); - ASSERT_FALSE(method_offset_map_.FindMethodOffset(MethodRef(missing_method_index)).first); - - // Check linked code. - uint32_t thunk_offset = - CompiledCode::AlignCode(last_method_offset + last_method_code.size(), InstructionSet::kArm64); - uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method); - CHECK_ALIGNED(diff, 4u); - ASSERT_LT(diff, 128 * MB); - auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx), - ArrayRef<const uint8_t>(expected_code))); - EXPECT_TRUE(CheckThunk(thunk_offset)); -} - -TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarAfter) { - auto method1_raw_code = GenNopsAndBl(1u, kBlPlus0); - constexpr uint32_t bl_offset_in_method1 = 1u * 4u; // After NOPs. - ArrayRef<const uint8_t> method1_code(method1_raw_code); - ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - uint32_t expected_last_method_idx = 65; // Based on 2MiB chunks in Create2MethodsWithGap(). - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx), - }; - - constexpr uint32_t max_positive_disp = 128 * MB - 4u; - uint32_t last_method_idx = Create2MethodsWithGap(method1_code, - ArrayRef<const LinkerPatch>(method1_patches), - kNopCode, - ArrayRef<const LinkerPatch>(), - bl_offset_in_method1 + max_positive_disp); - ASSERT_EQ(expected_last_method_idx, last_method_idx); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(last_method_idx); - ASSERT_EQ(method1_offset + bl_offset_in_method1 + max_positive_disp, last_method_offset); - - // Check linked code. - auto expected_code = GenNopsAndBl(1u, kBlPlusMax); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarBefore) { - auto last_method_raw_code = GenNopsAndBl(0u, kBlPlus0); - constexpr uint32_t bl_offset_in_last_method = 0u * 4u; // After NOPs. - ArrayRef<const uint8_t> last_method_code(last_method_raw_code); - ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size()); - const LinkerPatch last_method_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u), - }; - - constexpr uint32_t max_negative_disp = 128 * MB; - uint32_t last_method_idx = Create2MethodsWithGap(kNopCode, - ArrayRef<const LinkerPatch>(), - last_method_code, - ArrayRef<const LinkerPatch>(last_method_patches), - max_negative_disp - bl_offset_in_last_method); - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(last_method_idx); - ASSERT_EQ(method1_offset, last_method_offset + bl_offset_in_last_method - max_negative_disp); - - // Check linked code. - auto expected_code = GenNopsAndBl(0u, kBlMinusMax); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx), - ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarAfter) { - auto method1_raw_code = GenNopsAndBl(0u, kBlPlus0); - constexpr uint32_t bl_offset_in_method1 = 0u * 4u; // After NOPs. - ArrayRef<const uint8_t> method1_code(method1_raw_code); - ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - uint32_t expected_last_method_idx = 65; // Based on 2MiB chunks in Create2MethodsWithGap(). - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx), - }; - - constexpr uint32_t just_over_max_positive_disp = 128 * MB; - uint32_t last_method_idx = Create2MethodsWithGap( - method1_code, - ArrayRef<const LinkerPatch>(method1_patches), - kNopCode, - ArrayRef<const LinkerPatch>(), - bl_offset_in_method1 + just_over_max_positive_disp); - ASSERT_EQ(expected_last_method_idx, last_method_idx); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(last_method_idx); - ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_offset)); - uint32_t last_method_header_offset = last_method_offset - sizeof(OatQuickMethodHeader); - uint32_t thunk_size = MethodCallThunkSize(); - uint32_t thunk_offset = RoundDown(last_method_header_offset - thunk_size, kArm64Alignment); - DCHECK_EQ(thunk_offset + thunk_size + CodeAlignmentSize(thunk_offset + thunk_size), - last_method_header_offset); - uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1); - CHECK_ALIGNED(diff, 4u); - ASSERT_LT(diff, 128 * MB); - auto expected_code = GenNopsAndBl(0u, kBlPlus0 | (diff >> 2)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); - CheckThunk(thunk_offset); -} - -TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarBefore) { - auto last_method_raw_code = GenNopsAndBl(1u, kBlPlus0); - constexpr uint32_t bl_offset_in_last_method = 1u * 4u; // After NOPs. - ArrayRef<const uint8_t> last_method_code(last_method_raw_code); - ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size()); - const LinkerPatch last_method_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u), - }; - - constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4; - uint32_t last_method_idx = Create2MethodsWithGap( - kNopCode, ArrayRef<const LinkerPatch>(), last_method_code, - ArrayRef<const LinkerPatch>(last_method_patches), - just_over_max_negative_disp - bl_offset_in_last_method); - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(last_method_idx); - ASSERT_EQ(method1_offset, - last_method_offset + bl_offset_in_last_method - just_over_max_negative_disp); - - // Check linked code. - uint32_t thunk_offset = - CompiledCode::AlignCode(last_method_offset + last_method_code.size(), InstructionSet::kArm64); - uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method); - CHECK_ALIGNED(diff, 4u); - ASSERT_LT(diff, 128 * MB); - auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx), - ArrayRef<const uint8_t>(expected_code))); - EXPECT_TRUE(CheckThunk(thunk_offset)); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry1) { - TestNopsAdrpLdr(0u, 0x12345678u, 0x1234u); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry2) { - TestNopsAdrpLdr(0u, -0x12345678u, 0x4444u); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry3) { - TestNopsAdrpLdr(0u, 0x12345000u, 0x3ffcu); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry4) { - TestNopsAdrpLdr(0u, 0x12345000u, 0x4000u); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringReference1) { - TestNopsAdrpAdd(0u, 0x12345678u); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringReference2) { - TestNopsAdrpAdd(0u, -0x12345678u); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringReference3) { - TestNopsAdrpAdd(0u, 0x12345000u); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringReference4) { - TestNopsAdrpAdd(0u, 0x12345ffcu); -} - -#define TEST_FOR_OFFSETS(test, disp1, disp2) \ - test(0xff4u, disp1) test(0xff8u, disp1) test(0xffcu, disp1) test(0x1000u, disp1) \ - test(0xff4u, disp2) test(0xff8u, disp2) test(0xffcu, disp2) test(0x1000u, disp2) - -#define DEFAULT_LDUR_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## Ldur ## disp) { \ - bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu); \ - TestAdrpLdurLdr(adrp_offset, has_thunk, 0x12345678u, disp); \ - } - -TEST_FOR_OFFSETS(DEFAULT_LDUR_LDR_TEST, 0x1234, 0x1238) - -#define DENVER64_LDUR_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDenver64, StringBssEntry ## adrp_offset ## Ldur ## disp) { \ - TestAdrpLdurLdr(adrp_offset, false, 0x12345678u, disp); \ - } - -TEST_FOR_OFFSETS(DENVER64_LDUR_LDR_TEST, 0x1234, 0x1238) - -// LDR <Wt>, <label> is always aligned. We should never have to use a fixup. -#define LDRW_PCREL_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## WPcRel ## disp) { \ - TestAdrpLdrPcRelLdr(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u, 0x1234u); \ - } - -TEST_FOR_OFFSETS(LDRW_PCREL_LDR_TEST, 0x1234, 0x1238) - -// LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8. -#define LDRX_PCREL_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## XPcRel ## disp) { \ - bool unaligned = !IsAligned<8u>((adrp_offset) + 4u + static_cast<uint32_t>(disp)); \ - bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu) && unaligned; \ - TestAdrpLdrPcRelLdr(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u, 0x1234u); \ - } - -TEST_FOR_OFFSETS(LDRX_PCREL_LDR_TEST, 0x1234, 0x1238) - -// LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed. -#define LDRW_SPREL_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## WSpRel ## disp) { \ - TestAdrpLdrSpRelLdr(kLdrWSpRelInsn, (disp) >> 2, adrp_offset, false, 0x12345678u, 0x1234u); \ - } - -TEST_FOR_OFFSETS(LDRW_SPREL_LDR_TEST, 0, 4) - -#define LDRX_SPREL_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## XSpRel ## disp) { \ - TestAdrpLdrSpRelLdr(kLdrXSpRelInsn, (disp) >> 3, adrp_offset, false, 0x12345678u, 0x1234u); \ - } - -TEST_FOR_OFFSETS(LDRX_SPREL_LDR_TEST, 0, 8) - -#define DEFAULT_LDUR_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## Ldur ## disp) { \ - bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu); \ - TestAdrpLdurAdd(adrp_offset, has_thunk, disp); \ - } - -TEST_FOR_OFFSETS(DEFAULT_LDUR_ADD_TEST, 0x12345678, 0xffffc840) - -#define DENVER64_LDUR_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDenver64, StringReference ## adrp_offset ## Ldur ## disp) { \ - TestAdrpLdurAdd(adrp_offset, false, disp); \ - } - -TEST_FOR_OFFSETS(DENVER64_LDUR_ADD_TEST, 0x12345678, 0xffffc840) - -#define DEFAULT_SUBX3X2_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## SubX3X2 ## disp) { \ - /* SUB unrelated to "ADRP x0, addr". */ \ - uint32_t sub = kSubXInsn | (100 << 10) | (2u << 5) | 3u; /* SUB x3, x2, #100 */ \ - TestAdrpInsn2Add(sub, adrp_offset, false, disp); \ - } - -TEST_FOR_OFFSETS(DEFAULT_SUBX3X2_ADD_TEST, 0x12345678, 0xffffc840) - -#define DEFAULT_SUBSX3X0_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## SubsX3X0 ## disp) { \ - /* SUBS that uses the result of "ADRP x0, addr". */ \ - uint32_t subs = kSubsXInsn | (100 << 10) | (0u << 5) | 3u; /* SUBS x3, x0, #100 */ \ - TestAdrpInsn2Add(subs, adrp_offset, false, disp); \ - } - -TEST_FOR_OFFSETS(DEFAULT_SUBSX3X0_ADD_TEST, 0x12345678, 0xffffc840) - -#define DEFAULT_ADDX0X0_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## AddX0X0 ## disp) { \ - /* ADD that uses the result register of "ADRP x0, addr" as both source and destination. */ \ - uint32_t add = kSubXInsn | (100 << 10) | (0u << 5) | 0u; /* ADD x0, x0, #100 */ \ - TestAdrpInsn2Add(add, adrp_offset, false, disp); \ - } - -TEST_FOR_OFFSETS(DEFAULT_ADDX0X0_ADD_TEST, 0x12345678, 0xffffc840) - -#define DEFAULT_ADDSX0X2_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## AddsX0X2 ## disp) { \ - /* ADDS that does not use the result of "ADRP x0, addr" but overwrites that register. */ \ - uint32_t adds = kAddsXInsn | (100 << 10) | (2u << 5) | 0u; /* ADDS x0, x2, #100 */ \ - bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu); \ - TestAdrpInsn2Add(adds, adrp_offset, has_thunk, disp); \ - } - -TEST_FOR_OFFSETS(DEFAULT_ADDSX0X2_ADD_TEST, 0x12345678, 0xffffc840) - -// LDR <Wt>, <label> is always aligned. We should never have to use a fixup. -#define LDRW_PCREL_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## WPcRel ## disp) { \ - TestAdrpLdrPcRelAdd(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u); \ - } - -TEST_FOR_OFFSETS(LDRW_PCREL_ADD_TEST, 0x1234, 0x1238) - -// LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8. -#define LDRX_PCREL_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## XPcRel ## disp) { \ - bool unaligned = !IsAligned<8u>((adrp_offset) + 4u + static_cast<uint32_t>(disp)); \ - bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu) && unaligned; \ - TestAdrpLdrPcRelAdd(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u); \ - } - -TEST_FOR_OFFSETS(LDRX_PCREL_ADD_TEST, 0x1234, 0x1238) - -// LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed. -#define LDRW_SPREL_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## WSpRel ## disp) { \ - TestAdrpLdrSpRelAdd(kLdrWSpRelInsn, (disp) >> 2, adrp_offset, false, 0x12345678u); \ - } - -TEST_FOR_OFFSETS(LDRW_SPREL_ADD_TEST, 0, 4) - -#define LDRX_SPREL_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## XSpRel ## disp) { \ - TestAdrpLdrSpRelAdd(kLdrXSpRelInsn, (disp) >> 3, adrp_offset, false, 0x12345678u); \ - } - -TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8) - -void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - // LR and SP/ZR are reserved. - }; - DCHECK_ALIGNED(offset, 4u); - DCHECK_LT(offset, 16 * KB); - constexpr size_t kMethodCodeSize = 8u; - constexpr size_t kLiteralOffset = 0u; - uint32_t method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - for (uint32_t holder_reg : valid_regs) { - uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg; - const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - uint32_t encoded_data = - Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), - }; - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment); - method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - for (uint32_t holder_reg : valid_regs) { - ++method_idx; - uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset); - uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); - uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg; - const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()); - ASSERT_TRUE( - CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = CompileBakerOffsetThunk(base_reg, holder_reg); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - size_t gray_check_offset = thunk_offset; - if (holder_reg == base_reg) { - // Verify that the null-check CBZ uses the correct register, i.e. holder_reg. - ASSERT_GE(output_.size() - gray_check_offset, 4u); - ASSERT_EQ(0x34000000u | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001fu); - gray_check_offset +=4u; - } - // Verify that the lock word for gray bit check is loaded from the holder address. - static constexpr size_t kGrayCheckInsns = 5; - ASSERT_GE(output_.size() - gray_check_offset, 4u * kGrayCheckInsns); - const uint32_t load_lock_word = - kLdrWInsn | - (mirror::Object::MonitorOffset().Uint32Value() << (10 - 2)) | - (holder_reg << 5) | - /* ip0 */ 16; - EXPECT_EQ(load_lock_word, GetOutputInsn(gray_check_offset)); - // Verify the gray bit check. - const uint32_t check_gray_bit_without_offset = - 0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16; - EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001fu); - // Verify the fake dependency. - const uint32_t fake_dependency = - 0x8b408000u | // ADD Xd, Xn, Xm, LSR 32 - (/* ip0 */ 16 << 16) | // Xm = ip0 - (base_reg << 5) | // Xn = base_reg - base_reg; // Xd = base_reg - EXPECT_EQ(fake_dependency, GetOutputInsn(gray_check_offset + 12u)); - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment); - } - } -} - -#define TEST_BAKER_FIELD(offset, ref_reg) \ - TEST_F(Arm64RelativePatcherTestDefault, \ - BakerOffset##offset##_##ref_reg) { \ - TestBakerField(offset, ref_reg); \ - } - -TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0) -TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 15) -TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* ref_reg */ 29) - -TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) { - // One thunk in the middle with maximum distance branches to it from both sides. - // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. - constexpr uint32_t kLiteralOffset1 = 4; - const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); - ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = - Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); - const LinkerPatch patches1[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), - }; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); - - // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4 - // allows the branch to reach that thunk. - size_t filler1_size = - 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment); - std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u); - ArrayRef<const uint8_t> filler1_code(raw_filler1_code); - AddCompiledMethod(MethodRef(2u), filler1_code); - - // Enforce thunk reservation with a tiny method. - AddCompiledMethod(MethodRef(3u), kNopCode); - - // Allow reaching the thunk from the very beginning of a method 1MiB away. Backward branch - // reaches the full 1MiB. Things to subtract: - // - thunk size and method 3 pre-header, rounded up (padding in between if needed) - // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) - // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). - size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size(); - size_t filler2_size = - 1 * MB - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArm64Alignment) - - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArm64Alignment) - - sizeof(OatQuickMethodHeader); - std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 4u); - ArrayRef<const uint8_t> filler2_code(raw_filler2_code); - AddCompiledMethod(MethodRef(4u), filler2_code); - - constexpr uint32_t kLiteralOffset2 = 0; - const std::vector<uint8_t> raw_code2 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn}); - ArrayRef<const uint8_t> code2(raw_code2); - const LinkerPatch patches2[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), - }; - AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); - - Link(); - - uint32_t first_method_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(5u); - EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); - - const uint32_t cbnz_max_forward = kCbnzIP1Plus0Insn | 0x007fffe0; - const uint32_t cbnz_max_backward = kCbnzIP1Plus0Insn | 0x00800000; - const std::vector<uint8_t> expected_code1 = RawCode({kNopInsn, cbnz_max_forward, kLdrWInsn}); - const std::vector<uint8_t> expected_code2 = RawCode({cbnz_max_backward, kLdrWInsn}); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); -} - -TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) { - // Based on the first part of BakerOffsetThunkInTheMiddle but the CBNZ is one instruction - // earlier, so the thunk is emitted before the filler. - // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. - constexpr uint32_t kLiteralOffset1 = 0; - const std::vector<uint8_t> raw_code1 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn, kNopInsn}); - ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = - Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); - const LinkerPatch patches1[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), - }; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); - - // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4 - // allows the branch to reach that thunk. - size_t filler1_size = - 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment); - std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u); - ArrayRef<const uint8_t> filler1_code(raw_filler1_code); - AddCompiledMethod(MethodRef(2u), filler1_code); - - Link(); - - const uint32_t cbnz_offset = RoundUp(raw_code1.size(), kArm64Alignment) - kLiteralOffset1; - const uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); - const std::vector<uint8_t> expected_code1 = RawCode({cbnz, kLdrWInsn, kNopInsn}); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); -} - -TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFromLast) { - // Based on the BakerOffsetThunkInTheMiddle but the CBNZ in the last method is preceded - // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end. - // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. - constexpr uint32_t kLiteralOffset1 = 4; - const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); - ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = - Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); - const LinkerPatch patches1[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), - }; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); - - // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4 - // allows the branch to reach that thunk. - size_t filler1_size = - 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment); - std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u); - ArrayRef<const uint8_t> filler1_code(raw_filler1_code); - AddCompiledMethod(MethodRef(2u), filler1_code); - - // Enforce thunk reservation with a tiny method. - AddCompiledMethod(MethodRef(3u), kNopCode); - - // If not for the extra NOP, this would allow reaching the thunk from the very beginning - // of a method 1MiB away. Backward branch reaches the full 1MiB. Things to subtract: - // - thunk size and method 3 pre-header, rounded up (padding in between if needed) - // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) - // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). - size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size(); - size_t filler2_size = - 1 * MB - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArm64Alignment) - - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArm64Alignment) - - sizeof(OatQuickMethodHeader); - std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 4u); - ArrayRef<const uint8_t> filler2_code(raw_filler2_code); - AddCompiledMethod(MethodRef(4u), filler2_code); - - // Extra NOP compared to BakerOffsetThunkInTheMiddle. - constexpr uint32_t kLiteralOffset2 = 4; - const std::vector<uint8_t> raw_code2 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); - ArrayRef<const uint8_t> code2(raw_code2); - const LinkerPatch patches2[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), - }; - AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); - - Link(); - - const uint32_t cbnz_max_forward = kCbnzIP1Plus0Insn | 0x007fffe0; - const uint32_t cbnz_last_offset = RoundUp(raw_code2.size(), kArm64Alignment) - kLiteralOffset2; - const uint32_t cbnz_last = kCbnzIP1Plus0Insn | (cbnz_last_offset << (5 - 2)); - const std::vector<uint8_t> expected_code1 = RawCode({kNopInsn, cbnz_max_forward, kLdrWInsn}); - const std::vector<uint8_t> expected_code2 = RawCode({kNopInsn, cbnz_last, kLdrWInsn}); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); -} - -TEST_F(Arm64RelativePatcherTestDefault, BakerArray) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - // LR and SP/ZR are reserved. - }; - auto ldr = [](uint32_t base_reg) { - uint32_t index_reg = (base_reg == 0u) ? 1u : 0u; - uint32_t ref_reg = (base_reg == 2) ? 3u : 2u; - return kLdrWLsl2Insn | (index_reg << 16) | (base_reg << 5) | ref_reg; - }; - constexpr size_t kMethodCodeSize = 8u; - constexpr size_t kLiteralOffset = 0u; - uint32_t method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - ++method_idx; - const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr(base_reg)}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch( - kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)), - }; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment); - method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - ++method_idx; - uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset); - uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); - const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr(base_reg)}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - // Verify that the lock word for gray bit check is loaded from the correct address - // before the base_reg which points to the array data. - static constexpr size_t kGrayCheckInsns = 5; - ASSERT_GE(output_.size() - thunk_offset, 4u * kGrayCheckInsns); - int32_t data_offset = - mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); - int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset; - ASSERT_LT(offset, 0); - const uint32_t load_lock_word = - kLdurWInsn | - ((offset & 0x1ffu) << 12) | - (base_reg << 5) | - /* ip0 */ 16; - EXPECT_EQ(load_lock_word, GetOutputInsn(thunk_offset)); - // Verify the gray bit check. - const uint32_t check_gray_bit_without_offset = - 0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16; - EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(thunk_offset + 4u) & 0xfff8001fu); - // Verify the fake dependency. - const uint32_t fake_dependency = - 0x8b408000u | // ADD Xd, Xn, Xm, LSR 32 - (/* ip0 */ 16 << 16) | // Xm = ip0 - (base_reg << 5) | // Xn = base_reg - base_reg; // Xd = base_reg - EXPECT_EQ(fake_dependency, GetOutputInsn(thunk_offset + 12u)); - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment); - } -} - -TEST_F(Arm64RelativePatcherTestDefault, BakerGcRoot) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - // LR and SP/ZR are reserved. - }; - constexpr size_t kMethodCodeSize = 8u; - constexpr size_t kLiteralOffset = 4u; - uint32_t method_idx = 0u; - for (uint32_t root_reg : valid_regs) { - ++method_idx; - uint32_t ldr = kLdrWInsn | (/* offset */ 8 << (10 - 2)) | (/* base_reg */ 0 << 5) | root_reg; - const std::vector<uint8_t> raw_code = RawCode({ldr, kCbnzIP1Plus0Insn}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch( - kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)), - }; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment); - method_idx = 0u; - for (uint32_t root_reg : valid_regs) { - ++method_idx; - uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset); - uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); - uint32_t ldr = kLdrWInsn | (/* offset */ 8 << (10 - 2)) | (/* base_reg */ 0 << 5) | root_reg; - const std::vector<uint8_t> expected_code = RawCode({ldr, cbnz}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. - ASSERT_GE(output_.size() - thunk_offset, 4u); - ASSERT_EQ(0x34000000u | root_reg, GetOutputInsn(thunk_offset) & 0xff00001fu); - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment); - } -} - -TEST_F(Arm64RelativePatcherTestDefault, BakerAndMethodCallInteraction) { - // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());` - // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily - // hold when we're reserving thunks of different sizes. This test exposes the situation - // by using Baker thunks and a method call thunk. - - // Add a method call patch that can reach to method 1 offset + 128MiB. - uint32_t method_idx = 0u; - constexpr size_t kMethodCallLiteralOffset = 4u; - constexpr uint32_t kMissingMethodIdx = 2u; - const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0}); - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u), - }; - ArrayRef<const uint8_t> code1(raw_code1); - ++method_idx; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches)); - - // Skip kMissingMethodIdx. - ++method_idx; - ASSERT_EQ(kMissingMethodIdx, method_idx); - // Add a method with the right size that the method code for the next one starts 1MiB - // after code for method 1. - size_t filler_size = - 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment) - - sizeof(OatQuickMethodHeader); - std::vector<uint8_t> filler_code = GenNops(filler_size / 4u); - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); - // Add 126 methods with 1MiB code+header, making the code for the next method start 1MiB - // before the currently scheduled MaxNextOffset() for the method call thunk. - for (uint32_t i = 0; i != 126; ++i) { - filler_size = 1 * MB - sizeof(OatQuickMethodHeader); - filler_code = GenNops(filler_size / 4u); - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); - } - - // Add 2 Baker GC root patches to the last method, one that would allow the thunk at - // 1MiB + kArm64Alignment, i.e. kArm64Alignment after the method call thunk, and the - // second that needs it kArm64Alignment after that. Given the size of the GC root thunk - // is more than the space required by the method call thunk plus kArm64Alignment, - // this pushes the first GC root thunk's pending MaxNextOffset() before the method call - // thunk's pending MaxNextOffset() which needs to be adjusted. - ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArm64Alignment) + kArm64Alignment, - CompileBakerGcRootThunk(/* root_reg */ 0).size()); - static_assert(kArm64Alignment == 16, "Code below assumes kArm64Alignment == 16"); - constexpr size_t kBakerLiteralOffset1 = 4u + kArm64Alignment; - constexpr size_t kBakerLiteralOffset2 = 4u + 2 * kArm64Alignment; - // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | root_reg`. - const uint32_t ldr1 = kLdrWInsn | /* root_reg */ 1; - const uint32_t ldr2 = kLdrWInsn | /* root_reg */ 2; - const std::vector<uint8_t> last_method_raw_code = RawCode({ - kNopInsn, kNopInsn, kNopInsn, kNopInsn, // Padding before first GC root read barrier. - ldr1, kCbnzIP1Plus0Insn, // First GC root LDR with read barrier. - kNopInsn, kNopInsn, // Padding before second GC root read barrier. - ldr2, kCbnzIP1Plus0Insn, // Second GC root LDR with read barrier. - }); - uint32_t encoded_data1 = Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1); - uint32_t encoded_data2 = Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2); - const LinkerPatch last_method_patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1), - LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2), - }; - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), - ArrayRef<const uint8_t>(last_method_raw_code), - ArrayRef<const LinkerPatch>(last_method_patches)); - - // The main purpose of the test is to check that Link() does not cause a crash. - Link(); - - ASSERT_EQ(127 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u)); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/elf_builder.h b/compiler/linker/elf_builder.h index a5f60992cae..3da7a437627 100644 --- a/compiler/linker/elf_builder.h +++ b/compiler/linker/elf_builder.h @@ -529,6 +529,8 @@ class ElfBuilder FINAL { stream_(output), rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0), + data_bimg_rel_ro_( + this, ".data.bimg.rel.ro", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), dex_(this, ".dex", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize), @@ -552,6 +554,7 @@ class ElfBuilder FINAL { loaded_size_(0u), virtual_address_(0) { text_.phdr_flags_ = PF_R | PF_X; + data_bimg_rel_ro_.phdr_flags_ = PF_R | PF_W; // Shall be made read-only at run time. bss_.phdr_flags_ = PF_R | PF_W; dex_.phdr_flags_ = PF_R; dynamic_.phdr_flags_ = PF_R | PF_W; @@ -566,6 +569,7 @@ class ElfBuilder FINAL { BuildIdSection* GetBuildId() { return &build_id_; } Section* GetRoData() { return &rodata_; } Section* GetText() { return &text_; } + Section* GetDataBimgRelRo() { return &data_bimg_rel_ro_; } Section* GetBss() { return &bss_; } Section* GetDex() { return &dex_; } StringSection* GetStrTab() { return &strtab_; } @@ -694,6 +698,7 @@ class ElfBuilder FINAL { void PrepareDynamicSection(const std::string& elf_file_path, Elf_Word rodata_size, Elf_Word text_size, + Elf_Word data_bimg_rel_ro_size, Elf_Word bss_size, Elf_Word bss_methods_offset, Elf_Word bss_roots_offset, @@ -707,6 +712,9 @@ class ElfBuilder FINAL { // Allocate all pre-dynamic sections. rodata_.AllocateVirtualMemory(rodata_size); text_.AllocateVirtualMemory(text_size); + if (data_bimg_rel_ro_size != 0) { + data_bimg_rel_ro_.AllocateVirtualMemory(data_bimg_rel_ro_size); + } if (bss_size != 0) { bss_.AllocateVirtualMemory(bss_size); } @@ -735,6 +743,24 @@ class ElfBuilder FINAL { Elf_Word oatlastword_address = rodata_.GetAddress() + rodata_size - 4; dynsym_.Add(oatlastword, &rodata_, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT); } + if (data_bimg_rel_ro_size != 0u) { + Elf_Word oatdatabimgrelro = dynstr_.Add("oatdatabimgrelro"); + dynsym_.Add(oatdatabimgrelro, + &data_bimg_rel_ro_, + data_bimg_rel_ro_.GetAddress(), + data_bimg_rel_ro_size, + STB_GLOBAL, + STT_OBJECT); + Elf_Word oatdatabimgrelrolastword = dynstr_.Add("oatdatabimgrelrolastword"); + Elf_Word oatdatabimgrelrolastword_address = + data_bimg_rel_ro_.GetAddress() + data_bimg_rel_ro_size - 4; + dynsym_.Add(oatdatabimgrelrolastword, + &data_bimg_rel_ro_, + oatdatabimgrelrolastword_address, + 4, + STB_GLOBAL, + STT_OBJECT); + } DCHECK_LE(bss_roots_offset, bss_size); if (bss_size != 0u) { Elf_Word oatbss = dynstr_.Add("oatbss"); @@ -1010,6 +1036,7 @@ class ElfBuilder FINAL { Section rodata_; Section text_; + Section data_bimg_rel_ro_; Section bss_; Section dex_; CachedStringSection dynstr_; diff --git a/compiler/linker/linker_patch.h b/compiler/linker/linker_patch.h index 77d689d4dbb..7b35fd9b0c3 100644 --- a/compiler/linker/linker_patch.h +++ b/compiler/linker/linker_patch.h @@ -41,19 +41,27 @@ class LinkerPatch { // choose to squeeze the Type into fewer than 8 bits, we'll have to declare // patch_type_ as an uintN_t and do explicit static_cast<>s. enum class Type : uint8_t { + kDataBimgRelRo, // NOTE: Actual patching is instruction_set-dependent. kMethodRelative, // NOTE: Actual patching is instruction_set-dependent. kMethodBssEntry, // NOTE: Actual patching is instruction_set-dependent. kCall, kCallRelative, // NOTE: Actual patching is instruction_set-dependent. kTypeRelative, // NOTE: Actual patching is instruction_set-dependent. - kTypeClassTable, // NOTE: Actual patching is instruction_set-dependent. kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent. kStringRelative, // NOTE: Actual patching is instruction_set-dependent. - kStringInternTable, // NOTE: Actual patching is instruction_set-dependent. kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent. kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent. }; + static LinkerPatch DataBimgRelRoPatch(size_t literal_offset, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + LinkerPatch patch(literal_offset, Type::kDataBimgRelRo, /* target_dex_file */ nullptr); + patch.boot_image_offset_ = boot_image_offset; + patch.pc_insn_offset_ = pc_insn_offset; + return patch; + } + static LinkerPatch RelativeMethodPatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t pc_insn_offset, @@ -100,16 +108,6 @@ class LinkerPatch { return patch; } - static LinkerPatch TypeClassTablePatch(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t pc_insn_offset, - uint32_t target_type_idx) { - LinkerPatch patch(literal_offset, Type::kTypeClassTable, target_dex_file); - patch.type_idx_ = target_type_idx; - patch.pc_insn_offset_ = pc_insn_offset; - return patch; - } - static LinkerPatch TypeBssEntryPatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t pc_insn_offset, @@ -130,16 +128,6 @@ class LinkerPatch { return patch; } - static LinkerPatch StringInternTablePatch(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t pc_insn_offset, - uint32_t target_string_idx) { - LinkerPatch patch(literal_offset, Type::kStringInternTable, target_dex_file); - patch.string_idx_ = target_string_idx; - patch.pc_insn_offset_ = pc_insn_offset; - return patch; - } - static LinkerPatch StringBssEntryPatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t pc_insn_offset, @@ -153,7 +141,7 @@ class LinkerPatch { static LinkerPatch BakerReadBarrierBranchPatch(size_t literal_offset, uint32_t custom_value1 = 0u, uint32_t custom_value2 = 0u) { - LinkerPatch patch(literal_offset, Type::kBakerReadBarrierBranch, nullptr); + LinkerPatch patch(literal_offset, Type::kBakerReadBarrierBranch, /* target_dex_file */ nullptr); patch.baker_custom_value1_ = custom_value1; patch.baker_custom_value2_ = custom_value2; return patch; @@ -172,14 +160,13 @@ class LinkerPatch { bool IsPcRelative() const { switch (GetType()) { + case Type::kDataBimgRelRo: case Type::kMethodRelative: case Type::kMethodBssEntry: case Type::kCallRelative: case Type::kTypeRelative: - case Type::kTypeClassTable: case Type::kTypeBssEntry: case Type::kStringRelative: - case Type::kStringInternTable: case Type::kStringBssEntry: case Type::kBakerReadBarrierBranch: return true; @@ -188,6 +175,11 @@ class LinkerPatch { } } + uint32_t BootImageOffset() const { + DCHECK(patch_type_ == Type::kDataBimgRelRo); + return boot_image_offset_; + } + MethodReference TargetMethod() const { DCHECK(patch_type_ == Type::kMethodRelative || patch_type_ == Type::kMethodBssEntry || @@ -198,40 +190,35 @@ class LinkerPatch { const DexFile* TargetTypeDexFile() const { DCHECK(patch_type_ == Type::kTypeRelative || - patch_type_ == Type::kTypeClassTable || patch_type_ == Type::kTypeBssEntry); return target_dex_file_; } dex::TypeIndex TargetTypeIndex() const { DCHECK(patch_type_ == Type::kTypeRelative || - patch_type_ == Type::kTypeClassTable || patch_type_ == Type::kTypeBssEntry); return dex::TypeIndex(type_idx_); } const DexFile* TargetStringDexFile() const { DCHECK(patch_type_ == Type::kStringRelative || - patch_type_ == Type::kStringInternTable || patch_type_ == Type::kStringBssEntry); return target_dex_file_; } dex::StringIndex TargetStringIndex() const { DCHECK(patch_type_ == Type::kStringRelative || - patch_type_ == Type::kStringInternTable || patch_type_ == Type::kStringBssEntry); return dex::StringIndex(string_idx_); } uint32_t PcInsnOffset() const { - DCHECK(patch_type_ == Type::kMethodRelative || + DCHECK(patch_type_ == Type::kDataBimgRelRo || + patch_type_ == Type::kMethodRelative || patch_type_ == Type::kMethodBssEntry || patch_type_ == Type::kTypeRelative || - patch_type_ == Type::kTypeClassTable || patch_type_ == Type::kTypeBssEntry || patch_type_ == Type::kStringRelative || - patch_type_ == Type::kStringInternTable || patch_type_ == Type::kStringBssEntry); return pc_insn_offset_; } @@ -263,10 +250,11 @@ class LinkerPatch { uint32_t literal_offset_ : 24; // Method code size up to 16MiB. Type patch_type_ : 8; union { - uint32_t cmp1_; // Used for relational operators. - uint32_t method_idx_; // Method index for Call/Method patches. - uint32_t type_idx_; // Type index for Type patches. - uint32_t string_idx_; // String index for String patches. + uint32_t cmp1_; // Used for relational operators. + uint32_t boot_image_offset_; // Data to write to the .data.bimg.rel.ro entry. + uint32_t method_idx_; // Method index for Call/Method patches. + uint32_t type_idx_; // Type index for Type patches. + uint32_t string_idx_; // String index for String patches. uint32_t baker_custom_value1_; static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators"); static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators"); diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc deleted file mode 100644 index 69e0846cb7e..00000000000 --- a/compiler/linker/mips/relative_patcher_mips.cc +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/mips/relative_patcher_mips.h" - -#include "compiled_method.h" -#include "debug/method_debug_info.h" -#include "linker/linker_patch.h" - -namespace art { -namespace linker { - -uint32_t MipsRelativePatcher::ReserveSpace( - uint32_t offset, - const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, - MethodReference method_ref ATTRIBUTE_UNUSED) { - return offset; // No space reserved; no limit on relative call distance. -} - -uint32_t MipsRelativePatcher::ReserveSpaceEnd(uint32_t offset) { - return offset; // No space reserved; no limit on relative call distance. -} - -uint32_t MipsRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) { - return offset; // No thunks added; no limit on relative call distance. -} - -void MipsRelativePatcher::PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - uint32_t literal_offset ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED, - uint32_t target_offset ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL) << "PatchCall unimplemented on MIPS"; -} - -void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) { - uint32_t anchor_literal_offset = patch.PcInsnOffset(); - uint32_t literal_offset = patch.LiteralOffset(); - bool high_patch = ((*code)[literal_offset + 0] == 0x34) && ((*code)[literal_offset + 1] == 0x12); - - // Perform basic sanity checks. - if (high_patch) { - if (is_r6) { - // auipc reg, offset_high - DCHECK_EQ(((*code)[literal_offset + 2] & 0x1F), 0x1E); - DCHECK_EQ(((*code)[literal_offset + 3] & 0xFC), 0xEC); - } else { - // lui reg, offset_high - DCHECK_EQ(((*code)[literal_offset + 2] & 0xE0), 0x00); - DCHECK_EQ((*code)[literal_offset + 3], 0x3C); - } - } else { - // instr reg(s), offset_low - CHECK_EQ((*code)[literal_offset + 0], 0x78); - CHECK_EQ((*code)[literal_offset + 1], 0x56); - } - - // Apply patch. - uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset; - uint32_t diff = target_offset - anchor_offset; - diff += (diff & 0x8000) << 1; // Account for sign extension in "instr reg(s), offset_low". - - if (high_patch) { - // lui reg, offset_high / auipc reg, offset_high - (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 16); - (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 24); - } else { - // instr reg(s), offset_low - (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 0); - (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 8); - } -} - -void MipsRelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; -} - -std::vector<debug::MethodDebugInfo> MipsRelativePatcher::GenerateThunkDebugInfo( - uint32_t executable_offset ATTRIBUTE_UNUSED) { - return std::vector<debug::MethodDebugInfo>(); // No thunks added. -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/mips/relative_patcher_mips.h b/compiler/linker/mips/relative_patcher_mips.h deleted file mode 100644 index 5714a7d1b0c..00000000000 --- a/compiler/linker/mips/relative_patcher_mips.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ -#define ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ - -#include "arch/mips/instruction_set_features_mips.h" -#include "linker/relative_patcher.h" - -namespace art { -namespace linker { - -class MipsRelativePatcher FINAL : public RelativePatcher { - public: - explicit MipsRelativePatcher(const MipsInstructionSetFeatures* features) - : is_r6(features->IsR6()) {} - - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) OVERRIDE; - uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; - uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; - void PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) OVERRIDE; - std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo(uint32_t executable_offset) OVERRIDE; - - private: - bool is_r6; - - DISALLOW_COPY_AND_ASSIGN(MipsRelativePatcher); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc deleted file mode 100644 index 629fdd535de..00000000000 --- a/compiler/linker/mips/relative_patcher_mips32r6_test.cc +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/mips/relative_patcher_mips.h" -#include "linker/relative_patcher_test.h" - -namespace art { -namespace linker { - -class Mips32r6RelativePatcherTest : public RelativePatcherTest { - public: - Mips32r6RelativePatcherTest() : RelativePatcherTest(InstructionSet::kMips, "mips32r6") {} - - protected: - static const uint8_t kUnpatchedPcRelativeRawCode[]; - static const uint32_t kLiteralOffsetHigh; - static const uint32_t kLiteralOffsetLow1; - static const uint32_t kLiteralOffsetLow2; - static const uint32_t kAnchorOffset; - static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode; - - uint32_t GetMethodOffset(uint32_t method_idx) { - auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(result.first); - return result.second; - } - - void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); - void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset); - void TestStringReference(uint32_t string_offset); -}; - -const uint8_t Mips32r6RelativePatcherTest::kUnpatchedPcRelativeRawCode[] = { - 0x34, 0x12, 0x5E, 0xEE, // auipc s2, high(diff); placeholder = 0x1234 - 0x78, 0x56, 0x52, 0x26, // addiu s2, s2, low(diff); placeholder = 0x5678 - 0x78, 0x56, 0x52, 0x8E, // lw s2, (low(diff))(s2) ; placeholder = 0x5678 -}; -const uint32_t Mips32r6RelativePatcherTest::kLiteralOffsetHigh = 0; // At auipc. -const uint32_t Mips32r6RelativePatcherTest::kLiteralOffsetLow1 = 4; // At addiu. -const uint32_t Mips32r6RelativePatcherTest::kLiteralOffsetLow2 = 8; // At lw. -const uint32_t Mips32r6RelativePatcherTest::kAnchorOffset = 0; // At auipc (where PC+0 points). -const ArrayRef<const uint8_t> Mips32r6RelativePatcherTest::kUnpatchedPcRelativeCode( - kUnpatchedPcRelativeRawCode); - -void Mips32r6RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, - uint32_t target_offset) { - AddCompiledMethod(MethodRef(1u), kUnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - - uint32_t diff = target_offset - (result.second + kAnchorOffset); - diff += (diff & 0x8000) << 1; // Account for sign extension in addiu/lw. - - const uint8_t expected_code[] = { - static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE, - static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26, - static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x8E, - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -void Mips32r6RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin, - uint32_t string_entry_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); - bss_begin_ = bss_begin; - LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex) - }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset); -} - -void Mips32r6RelativePatcherTest::TestStringReference(uint32_t string_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_offset); - LinkerPatch patches[] = { - LinkerPatch::RelativeStringPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::RelativeStringPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::RelativeStringPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex) - }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset); -} - -TEST_F(Mips32r6RelativePatcherTest, StringBssEntry) { - TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234); -} - -TEST_F(Mips32r6RelativePatcherTest, StringReference) { - TestStringReference(/* string_offset*/ 0x87651234); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc deleted file mode 100644 index d876c76daae..00000000000 --- a/compiler/linker/mips/relative_patcher_mips_test.cc +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/mips/relative_patcher_mips.h" - -#include "linker/relative_patcher_test.h" - -namespace art { -namespace linker { - -class MipsRelativePatcherTest : public RelativePatcherTest { - public: - MipsRelativePatcherTest() : RelativePatcherTest(InstructionSet::kMips, "mips32r2") {} - - protected: - static const uint8_t kUnpatchedPcRelativeRawCode[]; - static const uint32_t kLiteralOffsetHigh; - static const uint32_t kLiteralOffsetLow1; - static const uint32_t kLiteralOffsetLow2; - static const uint32_t kAnchorOffset; - static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode; - - uint32_t GetMethodOffset(uint32_t method_idx) { - auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(result.first); - return result.second; - } - - void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); - void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset); - void TestStringReference(uint32_t string_offset); -}; - -const uint8_t MipsRelativePatcherTest::kUnpatchedPcRelativeRawCode[] = { - 0x00, 0x00, 0x10, 0x04, // nal - 0x34, 0x12, 0x12, 0x3C, // lui s2, high(diff); placeholder = 0x1234 - 0x21, 0x90, 0x5F, 0x02, // addu s2, s2, ra - 0x78, 0x56, 0x52, 0x26, // addiu s2, s2, low(diff); placeholder = 0x5678 - 0x78, 0x56, 0x52, 0x8E, // lw s2, (low(diff))(s2) ; placeholder = 0x5678 -}; -const uint32_t MipsRelativePatcherTest::kLiteralOffsetHigh = 4; // At lui. -const uint32_t MipsRelativePatcherTest::kLiteralOffsetLow1 = 12; // At addiu. -const uint32_t MipsRelativePatcherTest::kLiteralOffsetLow2 = 16; // At lw. -const uint32_t MipsRelativePatcherTest::kAnchorOffset = 8; // At addu (where PC+0 points). -const ArrayRef<const uint8_t> MipsRelativePatcherTest::kUnpatchedPcRelativeCode( - kUnpatchedPcRelativeRawCode); - -void MipsRelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, - uint32_t target_offset) { - AddCompiledMethod(MethodRef(1u), kUnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - - uint32_t diff = target_offset - (result.second + kAnchorOffset); - diff += (diff & 0x8000) << 1; // Account for sign extension in addiu/lw. - - const uint8_t expected_code[] = { - 0x00, 0x00, 0x10, 0x04, - static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x12, 0x3C, - 0x21, 0x90, 0x5F, 0x02, - static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26, - static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x8E, - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -void MipsRelativePatcherTest::TestStringBssEntry(uint32_t bss_begin, - uint32_t string_entry_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); - bss_begin_ = bss_begin; - LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex) - }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset); -} - -void MipsRelativePatcherTest::TestStringReference(uint32_t string_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_offset); - LinkerPatch patches[] = { - LinkerPatch::RelativeStringPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::RelativeStringPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::RelativeStringPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex) - }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset); -} - -TEST_F(MipsRelativePatcherTest, StringBssEntry) { - TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234); -} - -TEST_F(MipsRelativePatcherTest, StringReference) { - TestStringReference(/* string_offset*/ 0x87651234); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/mips64/relative_patcher_mips64.cc b/compiler/linker/mips64/relative_patcher_mips64.cc deleted file mode 100644 index aae5746278d..00000000000 --- a/compiler/linker/mips64/relative_patcher_mips64.cc +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/mips64/relative_patcher_mips64.h" - -#include "compiled_method.h" -#include "debug/method_debug_info.h" -#include "linker/linker_patch.h" - -namespace art { -namespace linker { - -uint32_t Mips64RelativePatcher::ReserveSpace( - uint32_t offset, - const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, - MethodReference method_ref ATTRIBUTE_UNUSED) { - return offset; // No space reserved; no limit on relative call distance. -} - -uint32_t Mips64RelativePatcher::ReserveSpaceEnd(uint32_t offset) { - return offset; // No space reserved; no limit on relative call distance. -} - -uint32_t Mips64RelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) { - return offset; // No thunks added; no limit on relative call distance. -} - -void Mips64RelativePatcher::PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - uint32_t literal_offset ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED, - uint32_t target_offset ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL) << "PatchCall unimplemented on MIPS64"; -} - -void Mips64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) { - uint32_t anchor_literal_offset = patch.PcInsnOffset(); - uint32_t literal_offset = patch.LiteralOffset(); - bool high_patch = ((*code)[literal_offset + 0] == 0x34) && ((*code)[literal_offset + 1] == 0x12); - - // Perform basic sanity checks. - if (high_patch) { - // auipc reg, offset_high - DCHECK_EQ(((*code)[literal_offset + 2] & 0x1F), 0x1E); - DCHECK_EQ(((*code)[literal_offset + 3] & 0xFC), 0xEC); - } else { - // instr reg(s), offset_low - CHECK_EQ((*code)[literal_offset + 0], 0x78); - CHECK_EQ((*code)[literal_offset + 1], 0x56); - } - - // Apply patch. - uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset; - uint32_t diff = target_offset - anchor_offset; - // Note that a combination of auipc with an instruction that adds a sign-extended - // 16-bit immediate operand (e.g. ld) provides a PC-relative range of - // PC-0x80000000 to PC+0x7FFF7FFF on MIPS64, that is, short of 2GB on one end - // by 32KB. - diff += (diff & 0x8000) << 1; // Account for sign extension in "instr reg(s), offset_low". - - if (high_patch) { - // auipc reg, offset_high - (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 16); - (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 24); - } else { - // instr reg(s), offset_low - (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 0); - (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 8); - } -} - -void Mips64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; -} - -std::vector<debug::MethodDebugInfo> Mips64RelativePatcher::GenerateThunkDebugInfo( - uint32_t executable_offset ATTRIBUTE_UNUSED) { - return std::vector<debug::MethodDebugInfo>(); // No thunks added. -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/mips64/relative_patcher_mips64.h b/compiler/linker/mips64/relative_patcher_mips64.h deleted file mode 100644 index 183bbedb396..00000000000 --- a/compiler/linker/mips64/relative_patcher_mips64.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_MIPS64_RELATIVE_PATCHER_MIPS64_H_ -#define ART_COMPILER_LINKER_MIPS64_RELATIVE_PATCHER_MIPS64_H_ - -#include "linker/relative_patcher.h" - -namespace art { -namespace linker { - -class Mips64RelativePatcher FINAL : public RelativePatcher { - public: - Mips64RelativePatcher() {} - - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) OVERRIDE; - uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; - uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; - void PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) OVERRIDE; - std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo(uint32_t executable_offset) OVERRIDE; - - private: - DISALLOW_COPY_AND_ASSIGN(Mips64RelativePatcher); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_MIPS64_RELATIVE_PATCHER_MIPS64_H_ diff --git a/compiler/linker/mips64/relative_patcher_mips64_test.cc b/compiler/linker/mips64/relative_patcher_mips64_test.cc deleted file mode 100644 index a02f5005e8c..00000000000 --- a/compiler/linker/mips64/relative_patcher_mips64_test.cc +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/mips64/relative_patcher_mips64.h" - -#include "linker/relative_patcher_test.h" - -namespace art { -namespace linker { - -class Mips64RelativePatcherTest : public RelativePatcherTest { - public: - Mips64RelativePatcherTest() : RelativePatcherTest(InstructionSet::kMips64, "default") {} - - protected: - static const uint8_t kUnpatchedPcRelativeRawCode[]; - static const uint8_t kUnpatchedPcRelativeCallRawCode[]; - static const uint32_t kLiteralOffsetHigh; - static const uint32_t kLiteralOffsetLow1; - static const uint32_t kLiteralOffsetLow2; - static const uint32_t kAnchorOffset; - static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode; - - uint32_t GetMethodOffset(uint32_t method_idx) { - auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(result.first); - return result.second; - } - - void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); - void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset); - void TestStringReference(uint32_t string_offset); -}; - -const uint8_t Mips64RelativePatcherTest::kUnpatchedPcRelativeRawCode[] = { - 0x34, 0x12, 0x5E, 0xEE, // auipc s2, high(diff); placeholder = 0x1234 - 0x78, 0x56, 0x52, 0x66, // daddiu s2, s2, low(diff); placeholder = 0x5678 - 0x78, 0x56, 0x52, 0x9E, // lwu s2, (low(diff))(s2) ; placeholder = 0x5678 -}; -const uint32_t Mips64RelativePatcherTest::kLiteralOffsetHigh = 0; // At auipc. -const uint32_t Mips64RelativePatcherTest::kLiteralOffsetLow1 = 4; // At daddiu. -const uint32_t Mips64RelativePatcherTest::kLiteralOffsetLow2 = 8; // At lwu. -const uint32_t Mips64RelativePatcherTest::kAnchorOffset = 0; // At auipc (where PC+0 points). -const ArrayRef<const uint8_t> Mips64RelativePatcherTest::kUnpatchedPcRelativeCode( - kUnpatchedPcRelativeRawCode); - -void Mips64RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, - uint32_t target_offset) { - AddCompiledMethod(MethodRef(1u), kUnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - - uint32_t diff = target_offset - (result.second + kAnchorOffset); - diff += (diff & 0x8000) << 1; // Account for sign extension in daddiu/lwu. - - const uint8_t expected_code[] = { - static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE, - static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x66, - static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x9E, - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -void Mips64RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin, - uint32_t string_entry_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); - bss_begin_ = bss_begin; - LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex) - }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset); -} - -TEST_F(Mips64RelativePatcherTest, StringBssEntry) { - TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc deleted file mode 100644 index 13877f8f128..00000000000 --- a/compiler/linker/relative_patcher.cc +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/relative_patcher.h" - -#include "debug/method_debug_info.h" -#ifdef ART_ENABLE_CODEGEN_arm -#include "linker/arm/relative_patcher_thumb2.h" -#endif -#ifdef ART_ENABLE_CODEGEN_arm64 -#include "linker/arm64/relative_patcher_arm64.h" -#endif -#ifdef ART_ENABLE_CODEGEN_mips -#include "linker/mips/relative_patcher_mips.h" -#endif -#ifdef ART_ENABLE_CODEGEN_mips64 -#include "linker/mips64/relative_patcher_mips64.h" -#endif -#ifdef ART_ENABLE_CODEGEN_x86 -#include "linker/x86/relative_patcher_x86.h" -#endif -#ifdef ART_ENABLE_CODEGEN_x86_64 -#include "linker/x86_64/relative_patcher_x86_64.h" -#endif -#include "output_stream.h" - -namespace art { -namespace linker { - -std::unique_ptr<RelativePatcher> RelativePatcher::Create( - InstructionSet instruction_set, - const InstructionSetFeatures* features, - RelativePatcherTargetProvider* provider) { - class RelativePatcherNone FINAL : public RelativePatcher { - public: - RelativePatcherNone() { } - - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, - MethodReference method_ref ATTRIBUTE_UNUSED) OVERRIDE { - return offset; // No space reserved; no patches expected. - } - - uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE { - return offset; // No space reserved; no patches expected. - } - - uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE { - return offset; // No thunks added; no patches expected. - } - - void PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - uint32_t literal_offset ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED, - uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE { - LOG(FATAL) << "Unexpected relative call patch."; - } - - void PatchPcRelativeReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED, - uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE { - LOG(FATAL) << "Unexpected relative dex cache array patch."; - } - - void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unexpected baker read barrier branch patch."; - } - - std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo( - uint32_t executable_offset ATTRIBUTE_UNUSED) OVERRIDE { - return std::vector<debug::MethodDebugInfo>(); // No thunks added. - } - - private: - DISALLOW_COPY_AND_ASSIGN(RelativePatcherNone); - }; - - UNUSED(features); - UNUSED(provider); - switch (instruction_set) { -#ifdef ART_ENABLE_CODEGEN_x86 - case InstructionSet::kX86: - return std::unique_ptr<RelativePatcher>(new X86RelativePatcher()); -#endif -#ifdef ART_ENABLE_CODEGEN_x86_64 - case InstructionSet::kX86_64: - return std::unique_ptr<RelativePatcher>(new X86_64RelativePatcher()); -#endif -#ifdef ART_ENABLE_CODEGEN_arm - case InstructionSet::kArm: - // Fall through: we generate Thumb2 code for "arm". - case InstructionSet::kThumb2: - return std::unique_ptr<RelativePatcher>(new Thumb2RelativePatcher(provider)); -#endif -#ifdef ART_ENABLE_CODEGEN_arm64 - case InstructionSet::kArm64: - return std::unique_ptr<RelativePatcher>( - new Arm64RelativePatcher(provider, features->AsArm64InstructionSetFeatures())); -#endif -#ifdef ART_ENABLE_CODEGEN_mips - case InstructionSet::kMips: - return std::unique_ptr<RelativePatcher>( - new MipsRelativePatcher(features->AsMipsInstructionSetFeatures())); -#endif -#ifdef ART_ENABLE_CODEGEN_mips64 - case InstructionSet::kMips64: - return std::unique_ptr<RelativePatcher>(new Mips64RelativePatcher()); -#endif - default: - return std::unique_ptr<RelativePatcher>(new RelativePatcherNone); - } -} - -bool RelativePatcher::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) { - static const uint8_t kPadding[] = { - 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u - }; - DCHECK_LE(aligned_code_delta, sizeof(kPadding)); - if (UNLIKELY(!out->WriteFully(kPadding, aligned_code_delta))) { - return false; - } - size_code_alignment_ += aligned_code_delta; - return true; -} - -bool RelativePatcher::WriteThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) { - if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) { - return false; - } - size_relative_call_thunks_ += thunk.size(); - return true; -} - -bool RelativePatcher::WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) { - if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) { - return false; - } - size_misc_thunks_ += thunk.size(); - return true; -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/relative_patcher.h b/compiler/linker/relative_patcher.h deleted file mode 100644 index b58e3dffbd6..00000000000 --- a/compiler/linker/relative_patcher.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_RELATIVE_PATCHER_H_ -#define ART_COMPILER_LINKER_RELATIVE_PATCHER_H_ - -#include <vector> - -#include "arch/instruction_set.h" -#include "arch/instruction_set_features.h" -#include "base/array_ref.h" -#include "base/macros.h" -#include "dex/method_reference.h" - -namespace art { - -class CompiledMethod; - -namespace debug { -struct MethodDebugInfo; -} // namespace debug - -namespace linker { - -class LinkerPatch; -class OutputStream; - -/** - * @class RelativePatcherTargetProvider - * @brief Interface for providing method offsets for relative call targets. - */ -class RelativePatcherTargetProvider { - public: - /** - * Find the offset of the target method of a relative call if known. - * - * The process of assigning target method offsets includes calls to the relative patcher's - * ReserveSpace() which in turn can use FindMethodOffset() to determine if a method already - * has an offset assigned and, if so, what's that offset. If the offset has not yet been - * assigned or if it's too far for the particular architecture's relative call, - * ReserveSpace() may need to allocate space for a special dispatch thunk. - * - * @param ref the target method of the relative call. - * @return true in the first element of the pair if the method was found, false otherwise; - * if found, the second element specifies the offset. - */ - virtual std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) = 0; - - protected: - virtual ~RelativePatcherTargetProvider() { } -}; - -/** - * @class RelativePatcher - * @brief Interface for architecture-specific link-time patching of PC-relative references. - */ -class RelativePatcher { - public: - static std::unique_ptr<RelativePatcher> Create( - InstructionSet instruction_set, const InstructionSetFeatures* features, - RelativePatcherTargetProvider* provider); - - virtual ~RelativePatcher() { } - - uint32_t CodeAlignmentSize() const { - return size_code_alignment_; - } - - uint32_t RelativeCallThunksSize() const { - return size_relative_call_thunks_; - } - - uint32_t MiscThunksSize() const { - return size_misc_thunks_; - } - - // Reserve space for thunks if needed before a method, return adjusted offset. - virtual uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) = 0; - - // Reserve space for thunks if needed after the last method, return adjusted offset. - // The caller may use this method to preemptively force thunk space reservation and - // then resume reservation for more methods. This is useful when there is a gap in - // the .text segment, for example when going to the next oat file for multi-image. - virtual uint32_t ReserveSpaceEnd(uint32_t offset) = 0; - - // Write relative call thunks if needed, return adjusted offset. Returns 0 on write failure. - virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0; - - // Patch method code. The input displacement is relative to the patched location, - // the patcher may need to adjust it if the correct base is different. - virtual void PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) = 0; - - // Patch a reference to a dex cache location. - virtual void PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) = 0; - - // Patch a branch to a Baker read barrier thunk. - virtual void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) = 0; - - virtual std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo( - uint32_t executable_offset) = 0; - - protected: - RelativePatcher() - : size_code_alignment_(0u), - size_relative_call_thunks_(0u), - size_misc_thunks_(0u) { - } - - bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta); - bool WriteThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk); - bool WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk); - - private: - uint32_t size_code_alignment_; - uint32_t size_relative_call_thunks_; - uint32_t size_misc_thunks_; - - DISALLOW_COPY_AND_ASSIGN(RelativePatcher); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_RELATIVE_PATCHER_H_ diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h deleted file mode 100644 index d21f2795b98..00000000000 --- a/compiler/linker/relative_patcher_test.h +++ /dev/null @@ -1,289 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_ -#define ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_ - -#include "arch/instruction_set.h" -#include "arch/instruction_set_features.h" -#include "base/array_ref.h" -#include "base/macros.h" -#include "compiled_method-inl.h" -#include "dex/verification_results.h" -#include "dex/method_reference.h" -#include "dex/string_reference.h" -#include "driver/compiler_driver.h" -#include "driver/compiler_options.h" -#include "globals.h" -#include "gtest/gtest.h" -#include "linker/relative_patcher.h" -#include "oat.h" -#include "oat_quick_method_header.h" -#include "vector_output_stream.h" - -namespace art { -namespace linker { - -// Base class providing infrastructure for architecture-specific tests. -class RelativePatcherTest : public testing::Test { - protected: - RelativePatcherTest(InstructionSet instruction_set, const std::string& variant) - : compiler_options_(), - verification_results_(&compiler_options_), - driver_(&compiler_options_, - &verification_results_, - Compiler::kQuick, - instruction_set, - /* instruction_set_features*/ nullptr, - /* image_classes */ nullptr, - /* compiled_classes */ nullptr, - /* compiled_methods */ nullptr, - /* thread_count */ 1u, - /* swap_fd */ -1, - /* profile_compilation_info */ nullptr), - error_msg_(), - instruction_set_(instruction_set), - features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)), - method_offset_map_(), - patcher_(RelativePatcher::Create(instruction_set, features_.get(), &method_offset_map_)), - bss_begin_(0u), - compiled_method_refs_(), - compiled_methods_(), - patched_code_(), - output_(), - out_("test output stream", &output_) { - CHECK(error_msg_.empty()) << instruction_set << "/" << variant; - patched_code_.reserve(16 * KB); - } - - MethodReference MethodRef(uint32_t method_idx) { - CHECK_NE(method_idx, 0u); - return MethodReference(nullptr, method_idx); - } - - void AddCompiledMethod( - MethodReference method_ref, - const ArrayRef<const uint8_t>& code, - const ArrayRef<const LinkerPatch>& patches = ArrayRef<const LinkerPatch>()) { - compiled_method_refs_.push_back(method_ref); - compiled_methods_.emplace_back(new CompiledMethod( - &driver_, - instruction_set_, - code, - /* frame_size_in_bytes */ 0u, - /* core_spill_mask */ 0u, - /* fp_spill_mask */ 0u, - /* method_info */ ArrayRef<const uint8_t>(), - /* vmap_table */ ArrayRef<const uint8_t>(), - /* cfi_info */ ArrayRef<const uint8_t>(), - patches)); - } - - uint32_t CodeAlignmentSize(uint32_t header_offset_to_align) { - // We want to align the code rather than the preheader. - uint32_t unaligned_code_offset = header_offset_to_align + sizeof(OatQuickMethodHeader); - uint32_t aligned_code_offset = - CompiledMethod::AlignCode(unaligned_code_offset, instruction_set_); - return aligned_code_offset - unaligned_code_offset; - } - - void Link() { - // Reserve space. - static_assert(kTrampolineOffset == 0u, "Unexpected trampoline offset."); - uint32_t offset = kTrampolineSize; - size_t idx = 0u; - for (auto& compiled_method : compiled_methods_) { - offset = patcher_->ReserveSpace(offset, compiled_method.get(), compiled_method_refs_[idx]); - - uint32_t alignment_size = CodeAlignmentSize(offset); - offset += alignment_size; - - offset += sizeof(OatQuickMethodHeader); - uint32_t quick_code_offset = offset + compiled_method->CodeDelta(); - const auto code = compiled_method->GetQuickCode(); - offset += code.size(); - - method_offset_map_.map.Put(compiled_method_refs_[idx], quick_code_offset); - ++idx; - } - offset = patcher_->ReserveSpaceEnd(offset); - uint32_t output_size = offset; - output_.reserve(output_size); - - // Write data. - DCHECK(output_.empty()); - uint8_t dummy_trampoline[kTrampolineSize]; - memset(dummy_trampoline, 0, sizeof(dummy_trampoline)); - out_.WriteFully(dummy_trampoline, kTrampolineSize); - offset = kTrampolineSize; - static const uint8_t kPadding[] = { - 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u - }; - uint8_t dummy_header[sizeof(OatQuickMethodHeader)]; - memset(dummy_header, 0, sizeof(dummy_header)); - for (auto& compiled_method : compiled_methods_) { - offset = patcher_->WriteThunks(&out_, offset); - - uint32_t alignment_size = CodeAlignmentSize(offset); - CHECK_LE(alignment_size, sizeof(kPadding)); - out_.WriteFully(kPadding, alignment_size); - offset += alignment_size; - - out_.WriteFully(dummy_header, sizeof(OatQuickMethodHeader)); - offset += sizeof(OatQuickMethodHeader); - ArrayRef<const uint8_t> code = compiled_method->GetQuickCode(); - if (!compiled_method->GetPatches().empty()) { - patched_code_.assign(code.begin(), code.end()); - code = ArrayRef<const uint8_t>(patched_code_); - for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (patch.GetType() == LinkerPatch::Type::kCallRelative) { - auto result = method_offset_map_.FindMethodOffset(patch.TargetMethod()); - uint32_t target_offset = - result.first ? result.second : kTrampolineOffset + compiled_method->CodeDelta(); - patcher_->PatchCall(&patched_code_, patch.LiteralOffset(), - offset + patch.LiteralOffset(), target_offset); - } else if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) { - uint32_t target_offset = - bss_begin_ + string_index_to_offset_map_.Get(patch.TargetStringIndex().index_); - patcher_->PatchPcRelativeReference(&patched_code_, - patch, - offset + patch.LiteralOffset(), - target_offset); - } else if (patch.GetType() == LinkerPatch::Type::kStringRelative) { - uint32_t target_offset = - string_index_to_offset_map_.Get(patch.TargetStringIndex().index_); - patcher_->PatchPcRelativeReference(&patched_code_, - patch, - offset + patch.LiteralOffset(), - target_offset); - } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) { - patcher_->PatchBakerReadBarrierBranch(&patched_code_, - patch, - offset + patch.LiteralOffset()); - } else { - LOG(FATAL) << "Bad patch type. " << patch.GetType(); - UNREACHABLE(); - } - } - } - out_.WriteFully(&code[0], code.size()); - offset += code.size(); - } - offset = patcher_->WriteThunks(&out_, offset); - CHECK_EQ(offset, output_size); - CHECK_EQ(output_.size(), output_size); - } - - bool CheckLinkedMethod(MethodReference method_ref, const ArrayRef<const uint8_t>& expected_code) { - // Sanity check: original code size must match linked_code.size(). - size_t idx = 0u; - for (auto ref : compiled_method_refs_) { - if (ref == method_ref) { - break; - } - ++idx; - } - CHECK_NE(idx, compiled_method_refs_.size()); - CHECK_EQ(compiled_methods_[idx]->GetQuickCode().size(), expected_code.size()); - - auto result = method_offset_map_.FindMethodOffset(method_ref); - CHECK(result.first); // Must have been linked. - size_t offset = result.second - compiled_methods_[idx]->CodeDelta(); - CHECK_LT(offset, output_.size()); - CHECK_LE(offset + expected_code.size(), output_.size()); - ArrayRef<const uint8_t> linked_code(&output_[offset], expected_code.size()); - if (linked_code == expected_code) { - return true; - } - // Log failure info. - DumpDiff(expected_code, linked_code); - return false; - } - - void DumpDiff(const ArrayRef<const uint8_t>& expected_code, - const ArrayRef<const uint8_t>& linked_code) { - std::ostringstream expected_hex; - std::ostringstream linked_hex; - std::ostringstream diff_indicator; - static const char digits[] = "0123456789abcdef"; - bool found_diff = false; - for (size_t i = 0; i != expected_code.size(); ++i) { - expected_hex << " " << digits[expected_code[i] >> 4] << digits[expected_code[i] & 0xf]; - linked_hex << " " << digits[linked_code[i] >> 4] << digits[linked_code[i] & 0xf]; - if (!found_diff) { - found_diff = (expected_code[i] != linked_code[i]); - diff_indicator << (found_diff ? " ^^" : " "); - } - } - CHECK(found_diff); - std::string expected_hex_str = expected_hex.str(); - std::string linked_hex_str = linked_hex.str(); - std::string diff_indicator_str = diff_indicator.str(); - if (diff_indicator_str.length() > 60) { - CHECK_EQ(diff_indicator_str.length() % 3u, 0u); - size_t remove = diff_indicator_str.length() / 3 - 5; - std::ostringstream oss; - oss << "[stripped " << remove << "]"; - std::string replacement = oss.str(); - expected_hex_str.replace(0u, remove * 3u, replacement); - linked_hex_str.replace(0u, remove * 3u, replacement); - diff_indicator_str.replace(0u, remove * 3u, replacement); - } - LOG(ERROR) << "diff expected_code linked_code"; - LOG(ERROR) << "<" << expected_hex_str; - LOG(ERROR) << ">" << linked_hex_str; - LOG(ERROR) << " " << diff_indicator_str; - } - - // Map method reference to assinged offset. - // Wrap the map in a class implementing RelativePatcherTargetProvider. - class MethodOffsetMap FINAL : public RelativePatcherTargetProvider { - public: - std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE { - auto it = map.find(ref); - if (it == map.end()) { - return std::pair<bool, uint32_t>(false, 0u); - } else { - return std::pair<bool, uint32_t>(true, it->second); - } - } - SafeMap<MethodReference, uint32_t> map; - }; - - static const uint32_t kTrampolineSize = 4u; - static const uint32_t kTrampolineOffset = 0u; - - CompilerOptions compiler_options_; - VerificationResults verification_results_; - CompilerDriver driver_; // Needed for constructing CompiledMethod. - std::string error_msg_; - InstructionSet instruction_set_; - std::unique_ptr<const InstructionSetFeatures> features_; - MethodOffsetMap method_offset_map_; - std::unique_ptr<RelativePatcher> patcher_; - uint32_t bss_begin_; - SafeMap<uint32_t, uint32_t> string_index_to_offset_map_; - std::vector<MethodReference> compiled_method_refs_; - std::vector<std::unique_ptr<CompiledMethod>> compiled_methods_; - std::vector<uint8_t> patched_code_; - std::vector<uint8_t> output_; - VectorOutputStream out_; -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_ diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc deleted file mode 100644 index cdd2cef13ab..00000000000 --- a/compiler/linker/x86/relative_patcher_x86.cc +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/x86/relative_patcher_x86.h" - -#include "compiled_method.h" -#include "linker/linker_patch.h" - -namespace art { -namespace linker { - -void X86RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) { - uint32_t anchor_literal_offset = patch.PcInsnOffset(); - uint32_t literal_offset = patch.LiteralOffset(); - - // Check that the anchor points to pop in a "call +0; pop <reg>" sequence. - DCHECK_GE(anchor_literal_offset, 5u); - DCHECK_LT(anchor_literal_offset, code->size()); - DCHECK_EQ((*code)[anchor_literal_offset - 5u], 0xe8u); - DCHECK_EQ((*code)[anchor_literal_offset - 4u], 0x00u); - DCHECK_EQ((*code)[anchor_literal_offset - 3u], 0x00u); - DCHECK_EQ((*code)[anchor_literal_offset - 2u], 0x00u); - DCHECK_EQ((*code)[anchor_literal_offset - 1u], 0x00u); - DCHECK_EQ((*code)[anchor_literal_offset] & 0xf8u, 0x58u); - - // Check that the patched data contains kDummy32BitOffset. - // Must match X86Mir2Lir::kDummy32BitOffset and CodeGeneratorX86_64::kDummy32BitOffset. - constexpr int kDummy32BitOffset = 256; - DCHECK_LE(literal_offset, code->size()); - DCHECK_EQ((*code)[literal_offset + 0u], static_cast<uint8_t>(kDummy32BitOffset >> 0)); - DCHECK_EQ((*code)[literal_offset + 1u], static_cast<uint8_t>(kDummy32BitOffset >> 8)); - DCHECK_EQ((*code)[literal_offset + 2u], static_cast<uint8_t>(kDummy32BitOffset >> 16)); - DCHECK_EQ((*code)[literal_offset + 3u], static_cast<uint8_t>(kDummy32BitOffset >> 24)); - - // Apply patch. - uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset; - uint32_t diff = target_offset - anchor_offset; - (*code)[literal_offset + 0u] = static_cast<uint8_t>(diff >> 0); - (*code)[literal_offset + 1u] = static_cast<uint8_t>(diff >> 8); - (*code)[literal_offset + 2u] = static_cast<uint8_t>(diff >> 16); - (*code)[literal_offset + 3u] = static_cast<uint8_t>(diff >> 24); -} - -void X86RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/x86/relative_patcher_x86.h b/compiler/linker/x86/relative_patcher_x86.h deleted file mode 100644 index 63a83387223..00000000000 --- a/compiler/linker/x86/relative_patcher_x86.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_ -#define ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_ - -#include "linker/x86/relative_patcher_x86_base.h" - -namespace art { -namespace linker { - -class X86RelativePatcher FINAL : public X86BaseRelativePatcher { - public: - X86RelativePatcher() { } - - void PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) OVERRIDE; -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_ diff --git a/compiler/linker/x86/relative_patcher_x86_base.cc b/compiler/linker/x86/relative_patcher_x86_base.cc deleted file mode 100644 index 6a9690d7681..00000000000 --- a/compiler/linker/x86/relative_patcher_x86_base.cc +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/x86/relative_patcher_x86_base.h" - -#include "debug/method_debug_info.h" - -namespace art { -namespace linker { - -uint32_t X86BaseRelativePatcher::ReserveSpace( - uint32_t offset, - const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, - MethodReference method_ref ATTRIBUTE_UNUSED) { - return offset; // No space reserved; no limit on relative call distance. -} - -uint32_t X86BaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) { - return offset; // No space reserved; no limit on relative call distance. -} - -uint32_t X86BaseRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) { - return offset; // No thunks added; no limit on relative call distance. -} - -std::vector<debug::MethodDebugInfo> X86BaseRelativePatcher::GenerateThunkDebugInfo( - uint32_t executable_offset ATTRIBUTE_UNUSED) { - return std::vector<debug::MethodDebugInfo>(); // No thunks added. -} - -void X86BaseRelativePatcher::PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) { - DCHECK_LE(literal_offset + 4u, code->size()); - // Unsigned arithmetic with its well-defined overflow behavior is just fine here. - uint32_t displacement = target_offset - patch_offset; - displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. - - typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t; - reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement; -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/x86/relative_patcher_x86_base.h b/compiler/linker/x86/relative_patcher_x86_base.h deleted file mode 100644 index 6097345657d..00000000000 --- a/compiler/linker/x86/relative_patcher_x86_base.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_ -#define ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_ - -#include "linker/relative_patcher.h" - -namespace art { -namespace linker { - -class X86BaseRelativePatcher : public RelativePatcher { - public: - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) OVERRIDE; - uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; - uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; - void PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo(uint32_t executable_offset) OVERRIDE; - - protected: - X86BaseRelativePatcher() { } - - // PC displacement from patch location; the base address of x86/x86-64 relative - // calls and x86-64 RIP-relative addressing is the PC of the next instruction and - // the patch location is 4 bytes earlier. - static constexpr int32_t kPcDisplacement = 4; - - private: - DISALLOW_COPY_AND_ASSIGN(X86BaseRelativePatcher); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_ diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc deleted file mode 100644 index b855dec91db..00000000000 --- a/compiler/linker/x86/relative_patcher_x86_test.cc +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/x86/relative_patcher_x86.h" - -#include "linker/relative_patcher_test.h" - -namespace art { -namespace linker { - -class X86RelativePatcherTest : public RelativePatcherTest { - public: - X86RelativePatcherTest() : RelativePatcherTest(InstructionSet::kX86, "default") { } - - protected: - static const uint8_t kCallRawCode[]; - static const ArrayRef<const uint8_t> kCallCode; - - uint32_t GetMethodOffset(uint32_t method_idx) { - auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(result.first); - return result.second; - } -}; - -const uint8_t X86RelativePatcherTest::kCallRawCode[] = { - 0xe8, 0x00, 0x01, 0x00, 0x00 -}; - -const ArrayRef<const uint8_t> X86RelativePatcherTest::kCallCode(kCallRawCode); - -TEST_F(X86RelativePatcherTest, CallSelf) { - LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - static const uint8_t expected_code[] = { - 0xe8, 0xfb, 0xff, 0xff, 0xff - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(X86RelativePatcherTest, CallOther) { - LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); - LinkerPatch method2_patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t method2_offset = GetMethodOffset(2u); - uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */); - static const uint8_t method1_expected_code[] = { - 0xe8, - static_cast<uint8_t>(diff_after), - static_cast<uint8_t>(diff_after >> 8), - static_cast<uint8_t>(diff_after >> 16), - static_cast<uint8_t>(diff_after >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); - uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */); - static const uint8_t method2_expected_code[] = { - 0xe8, - static_cast<uint8_t>(diff_before), - static_cast<uint8_t>(diff_before >> 8), - static_cast<uint8_t>(diff_before >> 16), - static_cast<uint8_t>(diff_before >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); -} - -TEST_F(X86RelativePatcherTest, CallTrampoline) { - LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1)); - ASSERT_TRUE(result.first); - uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size()); - static const uint8_t expected_code[] = { - 0xe8, - static_cast<uint8_t>(diff), - static_cast<uint8_t>(diff >> 8), - static_cast<uint8_t>(diff >> 16), - static_cast<uint8_t>(diff >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(X86RelativePatcherTest, StringBssEntry) { - bss_begin_ = 0x12345678; - constexpr size_t kStringEntryOffset = 0x1234; - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, kStringEntryOffset); - static const uint8_t raw_code[] = { - 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0 - 0x5b, // pop ebx - 0x8b, 0x83, 0x00, 0x01, 0x00, 0x00, // mov eax, [ebx + 256 (kDummy32BitValue)] - }; - constexpr uint32_t anchor_offset = 5u; // After call +0. - ArrayRef<const uint8_t> code(raw_code); - LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(code.size() - 4u, nullptr, anchor_offset, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - uint32_t diff = bss_begin_ + kStringEntryOffset - (result.second + anchor_offset); - static const uint8_t expected_code[] = { - 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0 - 0x5b, // pop ebx - 0x8b, 0x83, // mov eax, [ebx + diff] - static_cast<uint8_t>(diff), - static_cast<uint8_t>(diff >> 8), - static_cast<uint8_t>(diff >> 16), - static_cast<uint8_t>(diff >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(X86RelativePatcherTest, StringReference) { - constexpr uint32_t kStringIndex = 1u; - constexpr uint32_t kStringOffset = 0x12345678; - string_index_to_offset_map_.Put(kStringIndex, kStringOffset); - static const uint8_t raw_code[] = { - 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0 - 0x5b, // pop ebx - 0x8d, 0x83, 0x00, 0x01, 0x00, 0x00, // lea eax, [ebx + 256 (kDummy32BitValue)] - }; - constexpr uint32_t anchor_offset = 5u; // After call +0. - ArrayRef<const uint8_t> code(raw_code); - LinkerPatch patches[] = { - LinkerPatch::RelativeStringPatch(code.size() - 4u, nullptr, anchor_offset, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - uint32_t diff = kStringOffset - (result.second + anchor_offset); - static const uint8_t expected_code[] = { - 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0 - 0x5b, // pop ebx - 0x8d, 0x83, // lea eax, [ebx + diff] - static_cast<uint8_t>(diff), - static_cast<uint8_t>(diff >> 8), - static_cast<uint8_t>(diff >> 16), - static_cast<uint8_t>(diff >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.cc b/compiler/linker/x86_64/relative_patcher_x86_64.cc deleted file mode 100644 index 96335649990..00000000000 --- a/compiler/linker/x86_64/relative_patcher_x86_64.cc +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/x86_64/relative_patcher_x86_64.h" - -#include "compiled_method.h" -#include "linker/linker_patch.h" - -namespace art { -namespace linker { - -void X86_64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) { - DCHECK_LE(patch.LiteralOffset() + 4u, code->size()); - // Unsigned arithmetic with its well-defined overflow behavior is just fine here. - uint32_t displacement = target_offset - patch_offset; - displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. - - typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t; - reinterpret_cast<unaligned_int32_t*>(&(*code)[patch.LiteralOffset()])[0] = displacement; -} - -void X86_64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.h b/compiler/linker/x86_64/relative_patcher_x86_64.h deleted file mode 100644 index 4f3ec498cb8..00000000000 --- a/compiler/linker/x86_64/relative_patcher_x86_64.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_ -#define ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_ - -#include "linker/x86/relative_patcher_x86_base.h" - -namespace art { -namespace linker { - -class X86_64RelativePatcher FINAL : public X86BaseRelativePatcher { - public: - X86_64RelativePatcher() { } - - void PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) OVERRIDE; -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_ diff --git a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc deleted file mode 100644 index 6baa92de36c..00000000000 --- a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/x86_64/relative_patcher_x86_64.h" - -#include "linker/relative_patcher_test.h" - -namespace art { -namespace linker { - -class X86_64RelativePatcherTest : public RelativePatcherTest { - public: - X86_64RelativePatcherTest() : RelativePatcherTest(InstructionSet::kX86_64, "default") { } - - protected: - static const uint8_t kCallRawCode[]; - static const ArrayRef<const uint8_t> kCallCode; - static const uint8_t kDexCacheLoadRawCode[]; - static const ArrayRef<const uint8_t> kDexCacheLoadCode; - static const uint8_t kStringReferenceRawCode[]; - static const ArrayRef<const uint8_t> kStringReferenceCode; - - uint32_t GetMethodOffset(uint32_t method_idx) { - auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(result.first); - return result.second; - } -}; - -const uint8_t X86_64RelativePatcherTest::kCallRawCode[] = { - 0xe8, 0x00, 0x01, 0x00, 0x00 -}; - -const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kCallCode(kCallRawCode); - -const uint8_t X86_64RelativePatcherTest::kDexCacheLoadRawCode[] = { - 0x8b, 0x05, // mov eax, [rip + <offset>] - 0x00, 0x01, 0x00, 0x00 -}; - -const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kDexCacheLoadCode( - kDexCacheLoadRawCode); - -const uint8_t X86_64RelativePatcherTest::kStringReferenceRawCode[] = { - 0x8d, 0x05, // lea eax, [rip + <offset>] - 0x00, 0x01, 0x00, 0x00 -}; - -const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kStringReferenceCode( - kStringReferenceRawCode); - -TEST_F(X86_64RelativePatcherTest, CallSelf) { - LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - static const uint8_t expected_code[] = { - 0xe8, 0xfb, 0xff, 0xff, 0xff - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(X86_64RelativePatcherTest, CallOther) { - LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); - LinkerPatch method2_patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t method2_offset = GetMethodOffset(2u); - uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */); - static const uint8_t method1_expected_code[] = { - 0xe8, - static_cast<uint8_t>(diff_after), - static_cast<uint8_t>(diff_after >> 8), - static_cast<uint8_t>(diff_after >> 16), - static_cast<uint8_t>(diff_after >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); - uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */); - static const uint8_t method2_expected_code[] = { - 0xe8, - static_cast<uint8_t>(diff_before), - static_cast<uint8_t>(diff_before >> 8), - static_cast<uint8_t>(diff_before >> 16), - static_cast<uint8_t>(diff_before >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); -} - -TEST_F(X86_64RelativePatcherTest, CallTrampoline) { - LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size()); - static const uint8_t expected_code[] = { - 0xe8, - static_cast<uint8_t>(diff), - static_cast<uint8_t>(diff >> 8), - static_cast<uint8_t>(diff >> 16), - static_cast<uint8_t>(diff >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(X86_64RelativePatcherTest, StringBssEntry) { - bss_begin_ = 0x12345678; - constexpr size_t kStringEntryOffset = 0x1234; - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, kStringEntryOffset); - LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(kDexCacheLoadCode.size() - 4u, nullptr, 0u, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), kDexCacheLoadCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - uint32_t diff = bss_begin_ + kStringEntryOffset - (result.second + kDexCacheLoadCode.size()); - static const uint8_t expected_code[] = { - 0x8b, 0x05, - static_cast<uint8_t>(diff), - static_cast<uint8_t>(diff >> 8), - static_cast<uint8_t>(diff >> 16), - static_cast<uint8_t>(diff >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(X86_64RelativePatcherTest, StringReference) { - constexpr uint32_t kStringIndex = 1u; - constexpr uint32_t kStringOffset = 0x12345678; - string_index_to_offset_map_.Put(kStringIndex, kStringOffset); - LinkerPatch patches[] = { - LinkerPatch::RelativeStringPatch( - kStringReferenceCode.size() - 4u, nullptr, 0u, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), kStringReferenceCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - uint32_t diff = kStringOffset - (result.second + kStringReferenceCode.size()); - static const uint8_t expected_code[] = { - 0x8d, 0x05, - static_cast<uint8_t>(diff), - static_cast<uint8_t>(diff >> 8), - static_cast<uint8_t>(diff >> 16), - static_cast<uint8_t>(diff >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -} // namespace linker -} // namespace art diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 6abda9b3026..231017f55e6 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -51,6 +51,8 @@ #include "dex/verified_method.h" #include "driver/compiler_driver.h" #include "graph_visualizer.h" +#include "image.h" +#include "gc/space/image_space.h" #include "intern_table.h" #include "intrinsics.h" #include "mirror/array-inl.h" @@ -447,6 +449,18 @@ void CodeGenerator::EmitLinkerPatches( // No linker patches by default. } +bool CodeGenerator::NeedsThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED) const { + // Code generators that create patches requiring thunk compilation should override this function. + return false; +} + +void CodeGenerator::EmitThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED, + /*out*/ ArenaVector<uint8_t>* code ATTRIBUTE_UNUSED, + /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) { + // Code generators that create patches requiring thunk compilation should override this function. + LOG(FATAL) << "Unexpected call to EmitThunkCode()."; +} + void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, size_t maximum_safepoint_spill_size, size_t number_of_out_slots, @@ -722,6 +736,47 @@ void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) { } } +static uint32_t GetBootImageOffsetImpl(const void* object, ImageHeader::ImageSections section) { + Runtime* runtime = Runtime::Current(); + DCHECK(runtime->IsAotCompiler()); + const std::vector<gc::space::ImageSpace*>& boot_image_spaces = + runtime->GetHeap()->GetBootImageSpaces(); + // Check that the `object` is in the expected section of one of the boot image files. + DCHECK(std::any_of(boot_image_spaces.begin(), + boot_image_spaces.end(), + [object, section](gc::space::ImageSpace* space) { + uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin()); + uintptr_t offset = reinterpret_cast<uintptr_t>(object) - begin; + return space->GetImageHeader().GetImageSection(section).Contains(offset); + })); + uintptr_t begin = reinterpret_cast<uintptr_t>(boot_image_spaces.front()->Begin()); + uintptr_t offset = reinterpret_cast<uintptr_t>(object) - begin; + return dchecked_integral_cast<uint32_t>(offset); +} + +// NO_THREAD_SAFETY_ANALYSIS: Avoid taking the mutator lock, boot image classes are non-moveable. +uint32_t CodeGenerator::GetBootImageOffset(HLoadClass* load_class) NO_THREAD_SAFETY_ANALYSIS { + DCHECK_EQ(load_class->GetLoadKind(), HLoadClass::LoadKind::kBootImageRelRo); + ObjPtr<mirror::Class> klass = load_class->GetClass().Get(); + DCHECK(klass != nullptr); + return GetBootImageOffsetImpl(klass.Ptr(), ImageHeader::kSectionObjects); +} + +// NO_THREAD_SAFETY_ANALYSIS: Avoid taking the mutator lock, boot image strings are non-moveable. +uint32_t CodeGenerator::GetBootImageOffset(HLoadString* load_string) NO_THREAD_SAFETY_ANALYSIS { + DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kBootImageRelRo); + ObjPtr<mirror::String> string = load_string->GetString().Get(); + DCHECK(string != nullptr); + return GetBootImageOffsetImpl(string.Ptr(), ImageHeader::kSectionObjects); +} + +uint32_t CodeGenerator::GetBootImageOffset(HInvokeStaticOrDirect* invoke) { + DCHECK_EQ(invoke->GetMethodLoadKind(), HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo); + ArtMethod* method = invoke->GetResolvedMethod(); + DCHECK(method != nullptr); + return GetBootImageOffsetImpl(method, ImageHeader::kSectionArtMethods); +} + void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const { // The DCHECKS below check that a register is not specified twice in // the summary. The out location can overlap with an input, so we need diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index f784a1a8573..62cacebaa1e 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -21,15 +21,16 @@ #include "arch/instruction_set_features.h" #include "base/arena_containers.h" #include "base/arena_object.h" +#include "base/array_ref.h" #include "base/bit_field.h" #include "base/bit_utils.h" #include "base/enums.h" +#include "base/memory_region.h" #include "dex/string_reference.h" #include "dex/type_reference.h" #include "globals.h" #include "graph_visualizer.h" #include "locations.h" -#include "memory_region.h" #include "nodes.h" #include "optimizing_compiler_stats.h" #include "read_barrier_option.h" @@ -74,6 +75,7 @@ class CodeAllocator { virtual ~CodeAllocator() {} virtual uint8_t* Allocate(size_t size) = 0; + virtual ArrayRef<const uint8_t> GetMemory() const = 0; private: DISALLOW_COPY_AND_ASSIGN(CodeAllocator); @@ -210,6 +212,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual void Initialize() = 0; virtual void Finalize(CodeAllocator* allocator); virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches); + virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const; + virtual void EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name); virtual void GenerateFrameEntry() = 0; virtual void GenerateFrameExit() = 0; virtual void Bind(HBasicBlock* block) = 0; @@ -438,6 +444,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: return false; + case TypeCheckKind::kBitstringCheck: + return true; } LOG(FATAL) << "Unreachable"; UNREACHABLE(); @@ -556,6 +564,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { Location runtime_return_location); void GenerateLoadClassRuntimeCall(HLoadClass* cls); + uint32_t GetBootImageOffset(HLoadClass* load_class); + uint32_t GetBootImageOffset(HLoadString* load_string); + uint32_t GetBootImageOffset(HInvokeStaticOrDirect* invoke); + static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 60f8f98757d..d4cfab82de3 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -30,7 +30,6 @@ #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_arm64.h" -#include "linker/arm64/relative_patcher_arm64.h" #include "linker/linker_patch.h" #include "lock_word.h" #include "mirror/array-inl.h" @@ -78,6 +77,7 @@ using helpers::OutputFPRegister; using helpers::OutputRegister; using helpers::QRegisterFrom; using helpers::RegisterFrom; +using helpers::SRegisterFrom; using helpers::StackOperandFrom; using helpers::VIXLRegCodeFromART; using helpers::WRegisterFrom; @@ -1424,6 +1424,62 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { __ FinalizeCode(); CodeGenerator::Finalize(allocator); + + // Verify Baker read barrier linker patches. + if (kIsDebugBuild) { + ArrayRef<const uint8_t> code = allocator->GetMemory(); + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + DCHECK(info.label.IsBound()); + uint32_t literal_offset = info.label.GetLocation(); + DCHECK_ALIGNED(literal_offset, 4u); + + auto GetInsn = [&code](uint32_t offset) { + DCHECK_ALIGNED(offset, 4u); + return + (static_cast<uint32_t>(code[offset + 0]) << 0) + + (static_cast<uint32_t>(code[offset + 1]) << 8) + + (static_cast<uint32_t>(code[offset + 2]) << 16)+ + (static_cast<uint32_t>(code[offset + 3]) << 24); + }; + + const uint32_t encoded_data = info.custom_data; + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + // Check that the next instruction matches the expected LDR. + switch (kind) { + case BakerReadBarrierKind::kField: { + DCHECK_GE(code.size() - literal_offset, 8u); + uint32_t next_insn = GetInsn(literal_offset + 4u); + // LDR (immediate) with correct base_reg. + CheckValidReg(next_insn & 0x1fu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5)); + break; + } + case BakerReadBarrierKind::kArray: { + DCHECK_GE(code.size() - literal_offset, 8u); + uint32_t next_insn = GetInsn(literal_offset + 4u); + // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL), + // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2]. + CheckValidReg(next_insn & 0x1fu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5)); + CheckValidReg((next_insn >> 16) & 0x1f); // Check index register + break; + } + case BakerReadBarrierKind::kGcRoot: { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn(literal_offset - 4u); + // LDR (immediate) with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } + } + } } void ParallelMoveResolverARM64::PrepareForEmitNativeCode() { @@ -2128,6 +2184,26 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare( + HTypeCheckInstruction* check, vixl::aarch64::Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset())); + } else { + // /* uint32_t */ temp = temp->status_ + __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset())); + // Extract the bitstring bits. + __ Ubfx(temp, temp, 0, mask_bits); + } + // Compare the bitstring bits to `path_to_root`. + __ Cmp(temp, path_to_root); +} + void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { BarrierType type = BarrierAll; @@ -3865,6 +3941,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -3873,7 +3951,13 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The "out" register is used as a temporary, so it overlaps with the inputs. // Note that TypeCheckSlowPathARM64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -3886,7 +3970,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); - Register cls = InputRegisterAt(instruction, 1); + Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? Register() + : InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); Register out = OutputRegister(instruction); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -4072,6 +4158,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Cset(out, eq); + if (zero.IsLinked()) { + __ B(&done); + } + break; + } } if (zero.IsLinked()) { @@ -4094,7 +4197,13 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64. locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -4104,7 +4213,9 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); - Register cls = InputRegisterAt(instruction, 1); + Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? Register() + : InputRegisterAt(instruction, 1); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); DCHECK_GE(num_temps, 1u); DCHECK_LE(num_temps, 3u); @@ -4285,6 +4396,20 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ B(ne, &start_loop); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ B(ne, type_check_slow_path->GetEntryLabel()); + break; + } } __ Bind(&done); @@ -4459,12 +4584,23 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall( // Load method address from literal pool. __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + // Add ADRP with its PC-relative .data.bimg.rel.ro patch. + uint32_t boot_image_offset = GetBootImageOffset(invoke); + vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset); + EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); + // Add LDR with its PC-relative .data.bimg.rel.ro patch. + vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label); + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp)); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { - // Add ADRP with its PC-relative DexCache access patch. + // Add ADRP with its PC-relative .bss entry patch. MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method); EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); - // Add LDR with its PC-relative DexCache access patch. + // Add LDR with its PC-relative .bss entry patch. vixl::aarch64::Label* ldr_label = NewMethodBssEntryPatch(target_method, adrp_label); EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); @@ -4559,6 +4695,13 @@ void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* i codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch( + uint32_t boot_image_offset, + vixl::aarch64::Label* adrp_label) { + return NewPcRelativePatch( + /* dex_file */ nullptr, boot_image_offset, adrp_label, &boot_image_method_patches_); +} + vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch( MethodReference target_method, vixl::aarch64::Label* adrp_label) { @@ -4681,6 +4824,14 @@ inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( } } +linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. + return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -4700,11 +4851,10 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -4719,6 +4869,44 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin DCHECK_EQ(size, linker_patches->size()); } +bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const { + return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch || + patch.GetType() == linker::LinkerPatch::Type::kCallRelative; +} + +void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name) { + Arm64Assembler assembler(GetGraph()->GetAllocator()); + switch (patch.GetType()) { + case linker::LinkerPatch::Type::kCallRelative: { + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArm64PointerSize).Int32Value()); + assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); + if (GetCompilerOptions().GenerateAnyDebugInfo()) { + *debug_name = "MethodCallThunk"; + } + break; + } + case linker::LinkerPatch::Type::kBakerReadBarrierBranch: { + DCHECK_EQ(patch.GetBakerCustomValue2(), 0u); + CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name); + break; + } + default: + LOG(FATAL) << "Unexpected patch type " << patch.GetType(); + UNREACHABLE(); + } + + // Ensure we emit the literal pool if any. + assembler.FinalizeCode(); + code->resize(assembler.CodeSize()); + MemoryRegion code_region(code->data(), code->size()); + assembler.FinalizeInstructions(code_region); +} + vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) { return uint32_literals_.GetOrCreate( value, @@ -4779,7 +4967,7 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -4859,12 +5047,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA DCHECK(!cls->MustGenerateClinitCheck()); // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ Register current_method = InputRegisterAt(cls, 0); - GenerateGcRootFieldLoad(cls, - out_loc, - current_method, - ArtMethod::DeclaringClassOffset().Int32Value(), - /* fixup_label */ nullptr, - read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, + out_loc, + current_method, + ArtMethod::DeclaringClassOffset().Int32Value(), + /* fixup_label */ nullptr, + read_barrier_option); break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { @@ -4888,23 +5076,16 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); break; } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - // Add ADRP with its PC-relative type patch. - const DexFile& dex_file = cls->GetDexFile(); - dex::TypeIndex type_index = cls->GetTypeIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls); + // Add ADRP with its PC-relative .data.bimg.rel.ro patch. + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); - // Add LDR with its PC-relative type patch. + // Add LDR with its PC-relative .data.bimg.rel.ro patch. vixl::aarch64::Label* ldr_label = - codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label); + codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label); codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(dex_file.StringByTypeIdx(type_index))); - if (masked_hash != 0) { - __ Sub(out.W(), out.W(), Operand(masked_hash)); - } break; } case HLoadClass::LoadKind::kBssEntry: { @@ -4914,16 +5095,16 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA vixl::aarch64::Register temp = XRegisterFrom(out_loc); vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index); codegen_->EmitAdrpPlaceholder(adrp_label, temp); - // Add LDR with its PC-relative Class patch. + // Add LDR with its PC-relative Class .bss entry patch. vixl::aarch64::Label* ldr_label = codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label); // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ - GenerateGcRootFieldLoad(cls, - out_loc, - temp, - /* offset placeholder */ 0u, - ldr_label, - read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, + out_loc, + temp, + /* offset placeholder */ 0u, + ldr_label, + read_barrier_option); generate_null_check = true; break; } @@ -4931,12 +5112,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass())); - GenerateGcRootFieldLoad(cls, - out_loc, - out.X(), - /* offset */ 0, - /* fixup_label */ nullptr, - read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, + out_loc, + out.X(), + /* offset */ 0, + /* fixup_label */ nullptr, + read_barrier_option); break; } case HLoadClass::LoadKind::kRuntimeCall: @@ -4989,7 +5170,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5055,16 +5236,15 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); return; } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - // Add ADRP with its PC-relative String patch. - const DexFile& dex_file = load->GetDexFile(); - const dex::StringIndex string_index = load->GetStringIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index); + // Add ADRP with its PC-relative .data.bimg.rel.ro patch. + uint32_t boot_image_offset = codegen_->GetBootImageOffset(load); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); - // Add LDR with its PC-relative String patch. + // Add LDR with its PC-relative .data.bimg.rel.ro patch. vixl::aarch64::Label* ldr_label = - codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label); + codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label); codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); return; } @@ -5076,16 +5256,16 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD Register temp = XRegisterFrom(out_loc); vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index); codegen_->EmitAdrpPlaceholder(adrp_label, temp); - // Add LDR with its .bss entry String patch. + // Add LDR with its PC-relative String .bss entry patch. vixl::aarch64::Label* ldr_label = codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label); // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ - GenerateGcRootFieldLoad(load, - out_loc, - temp, - /* offset placeholder */ 0u, - ldr_label, - kCompilerReadBarrierOption); + codegen_->GenerateGcRootFieldLoad(load, + out_loc, + temp, + /* offset placeholder */ 0u, + ldr_label, + kCompilerReadBarrierOption); SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load); codegen_->AddSlowPath(slow_path); @@ -5098,12 +5278,12 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(), load->GetStringIndex(), load->GetString())); - GenerateGcRootFieldLoad(load, - out_loc, - out.X(), - /* offset */ 0, - /* fixup_label */ nullptr, - kCompilerReadBarrierOption); + codegen_->GenerateGcRootFieldLoad(load, + out_loc, + out.X(), + /* offset */ 0, + /* fixup_label */ nullptr, + kCompilerReadBarrierOption); return; } default: @@ -5462,6 +5642,153 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { } } +// TODO: integrate with HandleBinaryOp? +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorARM64::GenerateMinMaxInt(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1 = locations->InAt(0); + Location op2 = locations->InAt(1); + Location out = locations->Out(); + + Register op1_reg; + Register op2_reg; + Register out_reg; + if (type == DataType::Type::kInt64) { + op1_reg = XRegisterFrom(op1); + op2_reg = XRegisterFrom(op2); + out_reg = XRegisterFrom(out); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + op1_reg = WRegisterFrom(op1); + op2_reg = WRegisterFrom(op2); + out_reg = WRegisterFrom(out); + } + + __ Cmp(op1_reg, op2_reg); + __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt); +} + +void InstructionCodeGeneratorARM64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1 = locations->InAt(0); + Location op2 = locations->InAt(1); + Location out = locations->Out(); + + FPRegister op1_reg; + FPRegister op2_reg; + FPRegister out_reg; + if (type == DataType::Type::kFloat64) { + op1_reg = DRegisterFrom(op1); + op2_reg = DRegisterFrom(op2); + out_reg = DRegisterFrom(out); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + op1_reg = SRegisterFrom(op1); + op2_reg = SRegisterFrom(op2); + out_reg = SRegisterFrom(out); + } + + if (is_min) { + __ Fmin(out_reg, op1_reg, op2_reg); + } else { + __ Fmax(out_reg, op1_reg, op2_reg); + } +} + +// TODO: integrate with HandleBinaryOp? +void InstructionCodeGeneratorARM64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min, type); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderARM64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorARM64::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderARM64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorARM64::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderARM64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) { + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + Register in_reg = InputRegisterAt(abs, 0); + Register out_reg = OutputRegister(abs); + __ Cmp(in_reg, Operand(0)); + __ Cneg(out_reg, in_reg, lt); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + FPRegister in_reg = InputFPRegisterAt(abs, 0); + FPRegister out_reg = OutputFPRegister(abs); + __ Fabs(out_reg, in_reg); + break; + } + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } @@ -5905,7 +6232,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters( } } -void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( +void CodeGeneratorARM64::GenerateGcRootFieldLoad( HInstruction* instruction, Location root, Register obj, @@ -5939,9 +6266,8 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( DCHECK(temps.IsAvailable(ip0)); DCHECK(temps.IsAvailable(ip1)); temps.Exclude(ip0, ip1); - uint32_t custom_data = - linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); - vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data); + uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); + vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); vixl::aarch64::Label return_address; @@ -5970,14 +6296,14 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( // Slow path marking the GC root `root`. The entrypoint will // be loaded by the slow path code. SlowPathCodeARM64* slow_path = - new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root); - codegen_->AddSlowPath(slow_path); + new (GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root); + AddSlowPath(slow_path); // /* GcRoot<mirror::Object> */ root = *(obj + offset) if (fixup_label == nullptr) { __ Ldr(root_reg, MemOperand(obj, offset)); } else { - codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj); + EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj); } static_assert( sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), @@ -5997,10 +6323,10 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( if (fixup_label == nullptr) { __ Add(root_reg.X(), obj.X(), offset); } else { - codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X()); + EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X()); } // /* mirror::Object* */ root = root->Read() - codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + GenerateReadBarrierForRootSlow(instruction, root, root); } } else { // Plain GC root load with no read barrier. @@ -6008,12 +6334,12 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( if (fixup_label == nullptr) { __ Ldr(root_reg, MemOperand(obj, offset)); } else { - codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X()); + EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X()); } // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -6062,9 +6388,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins DCHECK(temps.IsAvailable(ip0)); DCHECK(temps.IsAvailable(ip1)); temps.Exclude(ip0, ip1); - uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData( - base.GetCode(), - obj.GetCode()); + uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode()); vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); { @@ -6149,8 +6473,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins DCHECK(temps.IsAvailable(ip0)); DCHECK(temps.IsAvailable(ip1)); temps.Exclude(ip0, ip1); - uint32_t custom_data = - linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode()); + uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode()); vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); __ Add(temp.X(), obj.X(), Operand(data_offset)); @@ -6510,5 +6833,176 @@ void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_ #undef __ #undef QUICK_ENTRY_POINT +#define __ assembler.GetVIXLAssembler()-> + +static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, + vixl::aarch64::Register base_reg, + vixl::aarch64::MemOperand& lock_word, + vixl::aarch64::Label* slow_path, + vixl::aarch64::Label* throw_npe = nullptr) { + // Load the lock word containing the rb_state. + __ Ldr(ip0.W(), lock_word); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path); + static_assert( + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET, + "Field and array LDR offsets must be the same to reuse the same code."); + // To throw NPE, we return to the fast path; the artificial dependence below does not matter. + if (throw_npe != nullptr) { + __ Bind(throw_npe); + } + // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning). + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Field LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + __ Add(base_reg, base_reg, Operand(ip0, LSR, 32)); + __ Br(lr); // And return back to the function. + // Note: The fake dependency is unnecessary for the slow path. +} + +// Load the read barrier introspection entrypoint in register `entrypoint`. +static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler, + vixl::aarch64::Register entrypoint) { + // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip0.GetCode(), 16u); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); + __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); +} + +void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler, + uint32_t encoded_data, + /*out*/ std::string* debug_name) { + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + switch (kind) { + case BakerReadBarrierKind::kField: { + auto base_reg = + Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + auto holder_reg = + Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data)); + CheckValidReg(holder_reg.GetCode()); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip0, ip1); + // If base_reg differs from holder_reg, the offset was too large and we must have emitted + // an explicit null check before the load. Otherwise, for implicit null checks, we need to + // null-check the holder as we do not necessarily do that check before going to the thunk. + vixl::aarch64::Label throw_npe_label; + vixl::aarch64::Label* throw_npe = nullptr; + if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) { + throw_npe = &throw_npe_label; + __ Cbz(holder_reg.W(), throw_npe); + } + // Check if the holder is gray and, if not, add fake dependency to the base register + // and return to the LDR instruction to load the reference. Otherwise, use introspection + // to load the reference and call the entrypoint that performs further checks on the + // reference and marks it if needed. + vixl::aarch64::Label slow_path; + MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe); + __ Bind(&slow_path); + MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); + __ Ldr(ip0.W(), ldr_address); // Load the LDR (immediate) unsigned offset. + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); + __ Ubfx(ip0.W(), ip0.W(), 10, 12); // Extract the offset. + __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2)); // Load the reference. + // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. + __ Br(ip1); // Jump to the entrypoint. + break; + } + case BakerReadBarrierKind::kArray: { + auto base_reg = + Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip0, ip1); + vixl::aarch64::Label slow_path; + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); + DCHECK_LT(lock_word.GetOffset(), 0); + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + __ Bind(&slow_path); + MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); + __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset. + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); + __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set). + __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create + // a switch case target based on the index register. + __ Mov(ip0, base_reg); // Move the base register to ip0. + __ Br(ip1); // Jump to the entrypoint's array switch case. + break; + } + case BakerReadBarrierKind::kGcRoot: { + // Check if the reference needs to be marked and if so (i.e. not null, not marked yet + // and it does not have a forwarding address), call the correct introspection entrypoint; + // otherwise return the reference (or the extracted forwarding address). + // There is no gray bit check for GC roots. + auto root_reg = + Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(root_reg.GetCode()); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip0, ip1); + vixl::aarch64::Label return_label, not_marked, forwarding_address; + __ Cbz(root_reg, &return_label); + MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value()); + __ Ldr(ip0.W(), lock_word); + __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, ¬_marked); + __ Bind(&return_label); + __ Br(lr); + __ Bind(¬_marked); + __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1)); + __ B(&forwarding_address, mi); + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); + // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to + // art_quick_read_barrier_mark_introspection_gc_roots. + __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET)); + __ Mov(ip0.W(), root_reg); + __ Br(ip1); + __ Bind(&forwarding_address); + __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift); + __ Br(lr); + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } + + if (GetCompilerOptions().GenerateAnyDebugInfo()) { + std::ostringstream oss; + oss << "BakerReadBarrierThunk"; + switch (kind) { + case BakerReadBarrierKind::kField: + oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) + << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); + break; + case BakerReadBarrierKind::kArray: + oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + break; + case BakerReadBarrierKind::kGcRoot: + oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + break; + } + *debug_name = oss.str(); + } +} + +#undef __ + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 0654046de5d..aa343b1185d 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_ #include "arch/arm64/quick_method_frame_info_arm64.h" +#include "base/bit_field.h" #include "code_generator.h" #include "common_arm64.h" #include "dex/dex_file_types.h" @@ -36,6 +37,11 @@ #pragma GCC diagnostic pop namespace art { + +namespace linker { +class Arm64RelativePatcherTest; +} // namespace linker + namespace arm64 { class CodeGeneratorARM64; @@ -264,6 +270,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::aarch64::Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + vixl::aarch64::Register temp); void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); @@ -273,6 +281,10 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void HandleCondition(HCondition* instruction); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -303,17 +315,6 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { uint32_t offset, Location maybe_temp, ReadBarrierOption read_barrier_option); - // Generate a GC root reference load: - // - // root <- *(obj + offset) - // - // while honoring read barriers based on read_barrier_option. - void GenerateGcRootFieldLoad(HInstruction* instruction, - Location root, - vixl::aarch64::Register obj, - uint32_t offset, - vixl::aarch64::Label* fixup_label, - ReadBarrierOption read_barrier_option); // Generate a floating-point comparison. void GenerateFcmp(HInstruction* instruction); @@ -561,7 +562,14 @@ class CodeGeneratorARM64 : public CodeGenerator { UNIMPLEMENTED(FATAL); } - // Add a new PC-relative method patch for an instruction and return the label + // Add a new boot image relocation patch for an instruction and return the label + // to be bound before the instruction. The instruction will be either the + // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing + // to the associated ADRP patch label). + vixl::aarch64::Label* NewBootImageRelRoPatch(uint32_t boot_image_offset, + vixl::aarch64::Label* adrp_label = nullptr); + + // Add a new boot image method patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). @@ -575,7 +583,7 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Label* NewMethodBssEntryPatch(MethodReference target_method, vixl::aarch64::Label* adrp_label = nullptr); - // Add a new PC-relative type patch for an instruction and return the label + // Add a new boot image type patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). @@ -591,7 +599,7 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::TypeIndex type_index, vixl::aarch64::Label* adrp_label = nullptr); - // Add a new PC-relative string patch for an instruction and return the label + // Add a new boot image string patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). @@ -628,9 +636,24 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Register base); void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; + bool NeedsThunkCode(const linker::LinkerPatch& patch) const OVERRIDE; + void EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name) OVERRIDE; void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers based on read_barrier_option. + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + vixl::aarch64::Register obj, + uint32_t offset, + vixl::aarch64::Label* fixup_label, + ReadBarrierOption read_barrier_option); // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -765,6 +788,62 @@ class CodeGeneratorARM64 : public CodeGenerator { void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; private: + // Encoding of thunk type and data for link-time generated thunks for Baker read barriers. + + enum class BakerReadBarrierKind : uint8_t { + kField, // Field get or array get with constant offset (i.e. constant index). + kArray, // Array get with index in register. + kGcRoot, // GC root load. + kLast = kGcRoot + }; + + static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* sp/zr is invalid */ 31u; + + static constexpr size_t kBitsForBakerReadBarrierKind = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); + static constexpr size_t kBakerReadBarrierBitsForRegister = + MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg); + using BakerReadBarrierKindField = + BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; + using BakerReadBarrierFirstRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>; + using BakerReadBarrierSecondRegField = + BitField<uint32_t, + kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister, + kBakerReadBarrierBitsForRegister>; + + static void CheckValidReg(uint32_t reg) { + DCHECK(reg < vixl::aarch64::lr.GetCode() && + reg != vixl::aarch64::ip0.GetCode() && + reg != vixl::aarch64::ip1.GetCode()) << reg; + } + + static inline uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) { + CheckValidReg(base_reg); + CheckValidReg(holder_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(holder_reg); + } + + static inline uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { + CheckValidReg(base_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg); + } + + static inline uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) { + CheckValidReg(root_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | + BakerReadBarrierFirstRegField::Encode(root_reg) | + BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg); + } + + void CompileBakerReadBarrierThunk(Arm64Assembler& assembler, + uint32_t encoded_data, + /*out*/ std::string* debug_name); + using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>; using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>; using StringToLiteralMap = ArenaSafeMap<StringReference, @@ -820,7 +899,8 @@ class CodeGeneratorARM64 : public CodeGenerator { Uint32ToLiteralMap uint32_literals_; // Deduplication map for 64-bit literals, used for non-patchable method address or method code. Uint64ToLiteralMap uint64_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/BootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; @@ -828,7 +908,7 @@ class CodeGeneratorARM64 : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; @@ -840,6 +920,7 @@ class CodeGeneratorARM64 : public CodeGenerator { // Patches for class literals in JIT compiled code. TypeToLiteralMap jit_class_patches_; + friend class linker::Arm64RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64); }; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 2f495fc15fd..7350b146f95 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -29,7 +29,6 @@ #include "gc/accounting/card_table.h" #include "heap_poisoning.h" #include "intrinsics_arm_vixl.h" -#include "linker/arm/relative_patcher_thumb2.h" #include "linker/linker_patch.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" @@ -94,9 +93,6 @@ constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; -// The reserved entrypoint register for link-time generated thunks. -const vixl32::Register kBakerCcEntrypointRegister = r4; - // Using a base helps identify when we hit Marking Register check breakpoints. constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10; @@ -116,8 +112,6 @@ static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope DCHECK(temps->IsAvailable(ip)); temps->Exclude(ip); DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister)); - DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(), - linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister); DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u); DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp( instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister)); @@ -2422,6 +2416,80 @@ void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) { FixJumpTables(); GetAssembler()->FinalizeCode(); CodeGenerator::Finalize(allocator); + + // Verify Baker read barrier linker patches. + if (kIsDebugBuild) { + ArrayRef<const uint8_t> code = allocator->GetMemory(); + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + DCHECK(info.label.IsBound()); + uint32_t literal_offset = info.label.GetLocation(); + DCHECK_ALIGNED(literal_offset, 2u); + + auto GetInsn16 = [&code](uint32_t offset) { + DCHECK_ALIGNED(offset, 2u); + return (static_cast<uint32_t>(code[offset + 0]) << 0) + + (static_cast<uint32_t>(code[offset + 1]) << 8); + }; + auto GetInsn32 = [=](uint32_t offset) { + return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0); + }; + + uint32_t encoded_data = info.custom_data; + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + // Check that the next instruction matches the expected LDR. + switch (kind) { + case BakerReadBarrierKind::kField: { + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + if (width == BakerReadBarrierWidth::kWide) { + DCHECK_GE(code.size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(literal_offset + 4u); + // LDR (immediate), encoding T3, with correct base_reg. + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16)); + } else { + DCHECK_GE(code.size() - literal_offset, 6u); + uint32_t next_insn = GetInsn16(literal_offset + 4u); + // LDR (immediate), encoding T1, with correct base_reg. + CheckValidReg(next_insn & 0x7u); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3)); + } + break; + } + case BakerReadBarrierKind::kArray: { + DCHECK_GE(code.size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(literal_offset + 4u); + // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]). + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16)); + CheckValidReg(next_insn & 0xf); // Check index register + break; + } + case BakerReadBarrierKind::kGcRoot: { + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + if (width == BakerReadBarrierWidth::kWide) { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn32(literal_offset - 4u); + // LDR (immediate), encoding T3, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12)); + } else { + DCHECK_GE(literal_offset, 2u); + uint32_t prev_insn = GetInsn16(literal_offset - 2u); + // LDR (immediate), encoding T1, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg); + } + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } + } + } } void CodeGeneratorARMVIXL::SetupBlockedRegisters() const { @@ -4690,6 +4758,299 @@ void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + vixl32::Register op1 = RegisterFrom(op1_loc); + vixl32::Register op2 = RegisterFrom(op2_loc); + vixl32::Register out = RegisterFrom(out_loc); + + __ Cmp(op1, op2); + + { + ExactAssemblyScope aas(GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ ite(is_min ? lt : gt); + __ mov(is_min ? lt : gt, out, op1); + __ mov(is_min ? ge : le, out, op2); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. + return; + } + + vixl32::Register op1_lo = LowRegisterFrom(op1_loc); + vixl32::Register op1_hi = HighRegisterFrom(op1_loc); + vixl32::Register op2_lo = LowRegisterFrom(op2_loc); + vixl32::Register op2_hi = HighRegisterFrom(op2_loc); + vixl32::Register out_lo = LowRegisterFrom(out_loc); + vixl32::Register out_hi = HighRegisterFrom(out_loc); + UseScratchRegisterScope temps(GetVIXLAssembler()); + const vixl32::Register temp = temps.Acquire(); + + DCHECK(op1_lo.Is(out_lo)); + DCHECK(op1_hi.Is(out_hi)); + + // Compare op1 >= op2, or op1 < op2. + __ Cmp(out_lo, op2_lo); + __ Sbcs(temp, out_hi, op2_hi); + + // Now GE/LT condition code is correct for the long comparison. + { + vixl32::ConditionType cond = is_min ? ge : lt; + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ itt(cond); + __ mov(cond, out_lo, op2_lo); + __ mov(cond, out_hi, op2_hi); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) { + LocationSummary* locations = minmax->GetLocations(); + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. + return; + } + + vixl32::SRegister op1 = SRegisterFrom(op1_loc); + vixl32::SRegister op2 = SRegisterFrom(op2_loc); + vixl32::SRegister out = SRegisterFrom(out_loc); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + const vixl32::Register temp1 = temps.Acquire(); + vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0)); + vixl32::Label nan, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done); + + DCHECK(op1.Is(out)); + + __ Vcmp(op1, op2); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling. + + // op1 <> op2 + vixl32::ConditionType cond = is_min ? gt : lt; + { + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cond); + __ vmov(cond, F32, out, op2); + } + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* far_target */ false); + + // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). + __ Vmov(temp1, op1); + __ Vmov(temp2, op2); + if (is_min) { + __ Orr(temp1, temp1, temp2); + } else { + __ And(temp1, temp1, temp2); + } + __ Vmov(out, temp1); + __ B(final_label); + + // handle NaN input. + __ Bind(&nan); + __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. + __ Vmov(out, temp1); + + if (done.IsReferenced()) { + __ Bind(&done); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) { + LocationSummary* locations = minmax->GetLocations(); + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in. + return; + } + + vixl32::DRegister op1 = DRegisterFrom(op1_loc); + vixl32::DRegister op2 = DRegisterFrom(op2_loc); + vixl32::DRegister out = DRegisterFrom(out_loc); + vixl32::Label handle_nan_eq, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done); + + DCHECK(op1.Is(out)); + + __ Vcmp(op1, op2); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling. + + // op1 <> op2 + vixl32::ConditionType cond = is_min ? gt : lt; + { + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cond); + __ vmov(cond, F64, out, op2); + } + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* far_target */ false); + + // handle op1 == op2, max(+0.0,-0.0). + if (!is_min) { + __ Vand(F64, out, op1, op2); + __ B(final_label); + } + + // handle op1 == op2, min(+0.0,-0.0), NaN input. + __ Bind(&handle_nan_eq); + __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. + + if (done.IsReferenced()) { + __ Bind(&done); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + GenerateMinMaxInt(minmax->GetLocations(), is_min); + break; + case DataType::Type::kInt64: + GenerateMinMaxLong(minmax->GetLocations(), is_min); + break; + case DataType::Type::kFloat32: + GenerateMinMaxFloat(minmax, is_min); + break; + case DataType::Type::kFloat64: + GenerateMinMaxDouble(minmax, is_min); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderARMVIXL::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderARMVIXL::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + vixl32::Register in_reg = RegisterFrom(locations->InAt(0)); + vixl32::Register out_reg = RegisterFrom(locations->Out()); + vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); + __ Asr(mask, in_reg, 31); + __ Add(out_reg, in_reg, mask); + __ Eor(out_reg, out_reg, mask); + break; + } + case DataType::Type::kInt64: { + Location in = locations->InAt(0); + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + Location output = locations->Out(); + vixl32::Register out_reg_lo = LowRegisterFrom(output); + vixl32::Register out_reg_hi = HighRegisterFrom(output); + DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected."; + vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); + __ Asr(mask, in_reg_hi, 31); + __ Adds(out_reg_lo, in_reg_lo, mask); + __ Adc(out_reg_hi, in_reg_hi, mask); + __ Eor(out_reg_lo, out_reg_lo, mask); + __ Eor(out_reg_hi, out_reg_hi, mask); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0)); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); @@ -7033,7 +7394,7 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -7120,11 +7481,11 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ DCHECK(!cls->MustGenerateClinitCheck()); // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ vixl32::Register current_method = InputRegisterAt(cls, 0); - GenerateGcRootFieldLoad(cls, - out_loc, - current_method, - ArtMethod::DeclaringClassOffset().Int32Value(), - read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, + out_loc, + current_method, + ArtMethod::DeclaringClassOffset().Int32Value(), + read_barrier_option); break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { @@ -7143,25 +7504,19 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); break; } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(cls)); codegen_->EmitMovwMovtPlaceholder(labels, out); __ Ldr(out, MemOperand(out, /* offset */ 0)); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ Sub(out, out, Operand(masked_hash)); - } break; } case HLoadClass::LoadKind::kBssEntry: { CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); - GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); generate_null_check = true; break; } @@ -7170,7 +7525,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ cls->GetTypeIndex(), cls->GetClass())); // /* GcRoot<mirror::Class> */ out = *out - GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); break; } case HLoadClass::LoadKind::kRuntimeCall: @@ -7236,11 +7591,72 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare( + HTypeCheckInstruction* check, + vixl32::Register temp, + vixl32::FlagsUpdate flags_update) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs + // the Z flag for BNE. This is indicated by the `flags_update` parameter. + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value())); + // Check if the bitstring bits are equal to `path_to_root`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root); + } else { + __ Sub(temp, temp, path_to_root); + } + } else { + // /* uint32_t */ temp = temp->status_ + __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value())); + if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) { + // Compare the bitstring bits using SUB. + __ Sub(temp, temp, path_to_root); + // Shift out bits that do not contribute to the comparison. + __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + } else if (IsUint<16>(path_to_root)) { + if (temp.IsLow()) { + // Note: Optimized for size but contains one more dependent instruction than necessary. + // MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the + // macro assembler would use the high reg IP for the constant by default. + // Compare the bitstring bits using SUB. + __ Sub(temp, temp, path_to_root & 0x00ffu); // 16-bit SUB (immediate) T2 + __ Sub(temp, temp, path_to_root & 0xff00u); // 32-bit SUB (immediate) T3 + // Shift out bits that do not contribute to the comparison. + __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + } else { + // Extract the bitstring bits. + __ Ubfx(temp, temp, 0, mask_bits); + // Check if the bitstring bits are equal to `path_to_root`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root); + } else { + __ Sub(temp, temp, path_to_root); + } + } + } else { + // Shift out bits that do not contribute to the comparison. + __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root << (32u - mask_bits)); + } else { + __ Sub(temp, temp, path_to_root << (32u - mask_bits)); + } + } + } +} + HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -7304,10 +7720,10 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); return; } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(load)); codegen_->EmitMovwMovtPlaceholder(labels, out); __ Ldr(out, MemOperand(out, /* offset */ 0)); return; @@ -7317,7 +7733,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); - GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption); + codegen_->GenerateGcRootFieldLoad( + load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption); LoadStringSlowPathARMVIXL* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load); codegen_->AddSlowPath(slow_path); @@ -7331,7 +7748,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE load->GetStringIndex(), load->GetString())); // /* GcRoot<mirror::String> */ out = *out - GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption); + codegen_->GenerateGcRootFieldLoad( + load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption); return; } default: @@ -7427,6 +7845,8 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -7435,7 +7855,13 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The "out" register is used as a temporary, so it overlaps with the inputs. // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -7450,7 +7876,9 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); vixl32::Register obj = InputRegisterAt(instruction, 0); - vixl32::Register cls = InputRegisterAt(instruction, 1); + vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? vixl32::Register() + : InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); vixl32::Register out = OutputRegister(instruction); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -7690,6 +8118,26 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) __ B(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out, DontCare); + // If `out` is a low reg and we would have another low reg temp, we could + // optimize this as RSBS+ADC, see GenerateConditionWithZero(). + // + // Also, in some cases when `out` is a low reg and we're loading a constant to IP + // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size + // would be the same and we would have fewer direct data dependencies. + codegen_->GenerateConditionWithZero(kCondEQ, out, out); // CLZ+LSR + break; + } } if (done.IsReferenced()) { @@ -7707,7 +8155,13 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -7716,7 +8170,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); vixl32::Register obj = InputRegisterAt(instruction, 0); - vixl32::Register cls = InputRegisterAt(instruction, 1); + vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? vixl32::Register() + : InputRegisterAt(instruction, 1); Location temp_loc = locations->GetTemp(0); vixl32::Register temp = RegisterFrom(temp_loc); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -7901,6 +8357,20 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { __ B(ne, &start_loop, /* far_target */ false); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags); + __ B(ne, type_check_slow_path->GetEntryLabel()); + break; + } } if (done.IsReferenced()) { __ Bind(&done); @@ -8330,7 +8800,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters( } } -void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( +void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( HInstruction* instruction, Location root, vixl32::Register obj, @@ -8361,9 +8831,8 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( UseScratchRegisterScope temps(GetVIXLAssembler()); ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); - uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData( - root_reg.GetCode(), narrow); - vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); + uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow); + vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes); vixl32::Label return_address; @@ -8374,7 +8843,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( DCHECK_LT(offset, kReferenceLoadMinFarOffset); ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); - EmitPlaceholderBne(codegen_, bne_label); + EmitPlaceholderBne(this, bne_label); __ Bind(&return_address); DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET @@ -8394,8 +8863,8 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( // Slow path marking the GC root `root`. The entrypoint will // be loaded by the slow path code. SlowPathCodeARMVIXL* slow_path = - new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root); - codegen_->AddSlowPath(slow_path); + new (GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root); + AddSlowPath(slow_path); // /* GcRoot<mirror::Object> */ root = *(obj + offset) GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); @@ -8416,7 +8885,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( // /* GcRoot<mirror::Object>* */ root = obj + offset __ Add(root_reg, obj, offset); // /* mirror::Object* */ root = root->Read() - codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + GenerateReadBarrierForRootSlow(instruction, root, root); } } else { // Plain GC root load with no read barrier. @@ -8425,7 +8894,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18); + MaybeGenerateMarkingRegisterCheck(/* code */ 18); } void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { @@ -8486,8 +8955,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i } UseScratchRegisterScope temps(GetVIXLAssembler()); ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); - uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - base.GetCode(), obj.GetCode(), narrow); + uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow); vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); { @@ -8573,8 +9041,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i UseScratchRegisterScope temps(GetVIXLAssembler()); ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); - uint32_t custom_data = - linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode()); + uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode()); vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); __ Add(data_reg, obj, Operand(data_offset)); @@ -8711,7 +9178,7 @@ void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction, Location ref, - vixl::aarch32::Register obj, + vixl32::Register obj, uint32_t offset, Location index, ScaleFactor scale_factor, @@ -8901,6 +9368,14 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress())); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + uint32_t boot_image_offset = GetBootImageOffset(invoke); + PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset); + vixl32::Register temp_reg = RegisterFrom(temp); + EmitMovwMovtPlaceholder(labels, temp_reg); + GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { PcRelativePatchInfo* labels = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); @@ -8998,6 +9473,13 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall( } } +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch( + uint32_t boot_image_offset) { + return NewPcRelativePatch(/* dex_file */ nullptr, + boot_image_offset, + &boot_image_method_patches_); +} + CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch( MethodReference target_method) { return NewPcRelativePatch( @@ -9036,7 +9518,7 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa return &patches->back(); } -vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) { +vixl32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) { baker_read_barrier_patches_.emplace_back(custom_data); return &baker_read_barrier_patches_.back().label; } @@ -9088,6 +9570,14 @@ inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( } } +linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. + return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -9107,11 +9597,10 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -9126,6 +9615,45 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l DCHECK_EQ(size, linker_patches->size()); } +bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const { + return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch || + patch.GetType() == linker::LinkerPatch::Type::kCallRelative; +} + +void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name) { + arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator()); + switch (patch.GetType()) { + case linker::LinkerPatch::Type::kCallRelative: + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + assembler.LoadFromOffset( + arm::kLoadWord, + vixl32::pc, + vixl32::r0, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); + assembler.GetVIXLAssembler()->Bkpt(0); + if (GetCompilerOptions().GenerateAnyDebugInfo()) { + *debug_name = "MethodCallThunk"; + } + break; + case linker::LinkerPatch::Type::kBakerReadBarrierBranch: + DCHECK_EQ(patch.GetBakerCustomValue2(), 0u); + CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name); + break; + default: + LOG(FATAL) << "Unexpected patch type " << patch.GetType(); + UNREACHABLE(); + } + + // Ensure we emit the literal pool if any. + assembler.FinalizeCode(); + code->resize(assembler.CodeSize()); + MemoryRegion code_region(code->data(), code->size()); + assembler.FinalizeInstructions(code_region); +} + VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal( uint32_t value, Uint32ToLiteralMap* map) { @@ -9370,5 +9898,211 @@ void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder( #undef QUICK_ENTRY_POINT #undef TODO_VIXL32 +#define __ assembler.GetVIXLAssembler()-> + +static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler, + vixl32::Register base_reg, + vixl32::MemOperand& lock_word, + vixl32::Label* slow_path, + int32_t raw_ldr_offset, + vixl32::Label* throw_npe = nullptr) { + // Load the lock word containing the rb_state. + __ Ldr(ip, lock_word); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted)); + __ B(ne, slow_path, /* is_far_target */ false); + // To throw NPE, we return to the fast path; the artificial dependence below does not matter. + if (throw_npe != nullptr) { + __ Bind(throw_npe); + } + __ Add(lr, lr, raw_ldr_offset); + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + __ Add(base_reg, base_reg, Operand(ip, LSR, 32)); + __ Bx(lr); // And return back to the function. + // Note: The fake dependency is unnecessary for the slow path. +} + +// Load the read barrier introspection entrypoint in register `entrypoint` +static void LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler, + vixl32::Register entrypoint) { + // The register where the read barrier introspection entrypoint is loaded + // is fixed: `kBakerCcEntrypointRegister` (R4). + DCHECK(entrypoint.Is(kBakerCcEntrypointRegister)); + // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); +} + +void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler, + uint32_t encoded_data, + /*out*/ std::string* debug_name) { + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + switch (kind) { + case BakerReadBarrierKind::kField: { + vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data)); + CheckValidReg(holder_reg.GetCode()); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + // If base_reg differs from holder_reg, the offset was too large and we must have emitted + // an explicit null check before the load. Otherwise, for implicit null checks, we need to + // null-check the holder as we do not necessarily do that check before going to the thunk. + vixl32::Label throw_npe_label; + vixl32::Label* throw_npe = nullptr; + if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) { + throw_npe = &throw_npe_label; + __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target */ false); + } + // Check if the holder is gray and, if not, add fake dependency to the base register + // and return to the LDR instruction to load the reference. Otherwise, use introspection + // to load the reference and call the entrypoint that performs further checks on the + // reference and marks it if needed. + vixl32::Label slow_path; + MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); + const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide) + ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET; + EmitGrayCheckAndFastPath( + assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe); + __ Bind(&slow_path); + const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + + raw_ldr_offset; + vixl32::Register ep_reg(kBakerCcEntrypointRegister); + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); + if (width == BakerReadBarrierWidth::kWide) { + MemOperand ldr_half_address(lr, ldr_offset + 2); + __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". + __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12. + __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference. + } else { + MemOperand ldr_address(lr, ldr_offset); + __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1. + __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint + ep_reg, // for narrow LDR. + Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET)); + __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4. + __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference. + } + // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. + __ Bx(ep_reg); // Jump to the entrypoint. + break; + } + case BakerReadBarrierKind::kArray: { + vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + vixl32::Label slow_path; + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); + DCHECK_LT(lock_word.GetOffsetImmediate(), 0); + const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET; + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); + __ Bind(&slow_path); + const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + + raw_ldr_offset; + MemOperand ldr_address(lr, ldr_offset + 2); + __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm", + // i.e. Rm+32 because the scale in imm2 is 2. + vixl32::Register ep_reg(kBakerCcEntrypointRegister); + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); + __ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create + // a switch case target based on the index register. + __ Mov(ip, base_reg); // Move the base register to ip0. + __ Bx(ep_reg); // Jump to the entrypoint's array switch case. + break; + } + case BakerReadBarrierKind::kGcRoot: { + // Check if the reference needs to be marked and if so (i.e. not null, not marked yet + // and it does not have a forwarding address), call the correct introspection entrypoint; + // otherwise return the reference (or the extracted forwarding address). + // There is no gray bit check for GC roots. + vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(root_reg.GetCode()); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + vixl32::Label return_label, not_marked, forwarding_address; + __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target */ false); + MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value()); + __ Ldr(ip, lock_word); + __ Tst(ip, LockWord::kMarkBitStateMaskShifted); + __ B(eq, ¬_marked); + __ Bind(&return_label); + __ Bx(lr); + __ Bind(¬_marked); + static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3, + "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in " + " the highest bits and the 'forwarding address' state to have all bits set"); + __ Cmp(ip, Operand(0xc0000000)); + __ B(hs, &forwarding_address); + vixl32::Register ep_reg(kBakerCcEntrypointRegister); + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); + // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister + // to art_quick_read_barrier_mark_introspection_gc_roots. + int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide) + ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET; + __ Add(ep_reg, ep_reg, Operand(entrypoint_offset)); + __ Mov(ip, root_reg); + __ Bx(ep_reg); + __ Bind(&forwarding_address); + __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift); + __ Bx(lr); + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } + + if (GetCompilerOptions().GenerateAnyDebugInfo()) { + std::ostringstream oss; + oss << "BakerReadBarrierThunk"; + switch (kind) { + case BakerReadBarrierKind::kField: + oss << "Field"; + if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) { + oss << "Wide"; + } + oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) + << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); + break; + case BakerReadBarrierKind::kArray: + oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide); + break; + case BakerReadBarrierKind::kGcRoot: + oss << "GcRoot"; + if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) { + oss << "Wide"; + } + oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + break; + } + *debug_name = oss.str(); + } +} + +#undef __ + } // namespace arm } // namespace art diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 536da41d07f..6b9919ab15e 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -36,6 +36,11 @@ #pragma GCC diagnostic pop namespace art { + +namespace linker { +class Thumb2RelativePatcherTest; +} // namespace linker + namespace arm { // This constant is used as an approximate margin when emission of veneer and literal pools @@ -108,6 +113,9 @@ static const vixl::aarch32::SRegister kRuntimeParameterFpuRegistersVIXL[] = { static const size_t kRuntimeParameterFpuRegistersLengthVIXL = arraysize(kRuntimeParameterFpuRegistersVIXL); +// The reserved entrypoint register for link-time generated thunks. +const vixl::aarch32::Register kBakerCcEntrypointRegister = vixl32::r4; + class LoadClassSlowPathARMVIXL; class CodeGeneratorARMVIXL; @@ -322,6 +330,9 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + vixl::aarch32::Register temp, + vixl::aarch32::FlagsUpdate flags_update); void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); @@ -349,6 +360,12 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min); + void GenerateMinMaxLong(LocationSummary* locations, bool is_min); + void GenerateMinMaxFloat(HInstruction* minmax, bool is_min); + void GenerateMinMaxDouble(HInstruction* minmax, bool is_min); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -379,16 +396,6 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { uint32_t offset, Location maybe_temp, ReadBarrierOption read_barrier_option); - // Generate a GC root reference load: - // - // root <- *(obj + offset) - // - // while honoring read barriers based on read_barrier_option. - void GenerateGcRootFieldLoad(HInstruction* instruction, - Location root, - vixl::aarch32::Register obj, - uint32_t offset, - ReadBarrierOption read_barrier_option); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, vixl::aarch32::Label* true_target, @@ -574,6 +581,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Label add_pc_label; }; + PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset); PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); @@ -596,6 +604,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { Handle<mirror::Class> handle); void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; + bool NeedsThunkCode(const linker::LinkerPatch& patch) const OVERRIDE; + void EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name) OVERRIDE; void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; @@ -603,6 +615,16 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // is added only for AOT compilation if link-time generated thunks for fields are enabled. void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers based on read_barrier_option. + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + vixl::aarch32::Register obj, + uint32_t offset, + ReadBarrierOption read_barrier_option); // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -757,6 +779,83 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Register temp = vixl32::Register()); private: + // Encoding of thunk type and data for link-time generated thunks for Baker read barriers. + + enum class BakerReadBarrierKind : uint8_t { + kField, // Field get or array get with constant offset (i.e. constant index). + kArray, // Array get with index in register. + kGcRoot, // GC root load. + kLast = kGcRoot + }; + + enum class BakerReadBarrierWidth : uint8_t { + kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled). + kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled). + kLast = kNarrow + }; + + static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* pc is invalid */ 15u; + + static constexpr size_t kBitsForBakerReadBarrierKind = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); + static constexpr size_t kBakerReadBarrierBitsForRegister = + MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg); + using BakerReadBarrierKindField = + BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; + using BakerReadBarrierFirstRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>; + using BakerReadBarrierSecondRegField = + BitField<uint32_t, + kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister, + kBakerReadBarrierBitsForRegister>; + static constexpr size_t kBitsForBakerReadBarrierWidth = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast)); + using BakerReadBarrierWidthField = + BitField<BakerReadBarrierWidth, + kBitsForBakerReadBarrierKind + 2 * kBakerReadBarrierBitsForRegister, + kBitsForBakerReadBarrierWidth>; + + static void CheckValidReg(uint32_t reg) { + DCHECK(reg < vixl::aarch32::ip.GetCode() && reg != kBakerCcEntrypointRegister.GetCode()) << reg; + } + + static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, + uint32_t holder_reg, + bool narrow) { + CheckValidReg(base_reg); + CheckValidReg(holder_reg); + DCHECK(!narrow || base_reg < 8u) << base_reg; + BakerReadBarrierWidth width = + narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(holder_reg) | + BakerReadBarrierWidthField::Encode(width); + } + + static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { + CheckValidReg(base_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide); + } + + static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) { + CheckValidReg(root_reg); + DCHECK(!narrow || root_reg < 8u) << root_reg; + BakerReadBarrierWidth width = + narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | + BakerReadBarrierFirstRegField::Encode(root_reg) | + BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(width); + } + + void CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler, + uint32_t encoded_data, + /*out*/ std::string* debug_name); + vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, vixl::aarch32::Register temp); @@ -798,7 +897,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; @@ -806,7 +906,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; @@ -818,6 +918,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Patches for class literals in JIT compiled code. TypeToLiteralMap jit_class_patches_; + friend class linker::Thumb2RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL); }; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 87e6d6834b7..25e2eddbfab 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1597,6 +1597,14 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( } } +linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. + return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -1615,11 +1623,10 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1630,6 +1637,13 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link DCHECK_EQ(size, linker_patches->size()); } +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageRelRoPatch( + uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_); +} + CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageMethodPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { @@ -1936,6 +1950,34 @@ void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCode __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorMIPS::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ LoadFromOffset( + kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + if (IsUint<16>(path_to_root)) { + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + __ LoadConst32(TMP, path_to_root); + __ Xor(temp, temp, TMP); + } + // Shift out bits that do not contribute to the comparison. + __ Sll(temp, temp, 32 - mask_bits); + } +} + void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) { __ Sync(0); // Only stype 0 is supported. } @@ -3287,7 +3329,13 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -3296,7 +3344,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); - Register cls = locations->InAt(1).AsRegister<Register>(); + Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); Register temp = temp_loc.AsRegister<Register>(); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -3335,7 +3383,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ Bne(temp, cls, slow_path->GetEntryLabel()); + __ Bne(temp, cls.AsRegister<Register>(), slow_path->GetEntryLabel()); break; } @@ -3361,7 +3409,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // exception. __ Beqz(temp, slow_path->GetEntryLabel()); // Otherwise, compare the classes. - __ Bne(temp, cls, &loop); + __ Bne(temp, cls.AsRegister<Register>(), &loop); break; } @@ -3376,7 +3424,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // Walk over the class hierarchy to find a match. MipsLabel loop; __ Bind(&loop); - __ Beq(temp, cls, &done); + __ Beq(temp, cls.AsRegister<Register>(), &done); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, @@ -3399,7 +3447,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { maybe_temp2_loc, kWithoutReadBarrier); // Do an exact check. - __ Beq(temp, cls, &done); + __ Beq(temp, cls.AsRegister<Register>(), &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -3458,7 +3506,21 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // Go to next interface. __ Addiu(TMP, TMP, -2); // Compare the classes and continue the loop if they do not match. - __ Bne(AT, cls, &loop); + __ Bne(AT, cls.AsRegister<Register>(), &loop); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ Bnez(temp, slow_path->GetEntryLabel()); break; } } @@ -7401,6 +7463,8 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -7409,7 +7473,13 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -7421,7 +7491,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); - Register cls = locations->InAt(1).AsRegister<Register>(); + Location cls = locations->InAt(1); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -7453,7 +7523,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, read_barrier_option); // Classes must be equal for the instanceof to succeed. - __ Xor(out, out, cls); + __ Xor(out, out, cls.AsRegister<Register>()); __ Sltiu(out, out, 1); break; } @@ -7480,7 +7550,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); - __ Bne(out, cls, &loop); + __ Bne(out, cls.AsRegister<Register>(), &loop); __ LoadConst32(out, 1); break; } @@ -7498,7 +7568,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { // Walk over the class hierarchy to find a match. MipsLabel loop, success; __ Bind(&loop); - __ Beq(out, cls, &success); + __ Beq(out, cls.AsRegister<Register>(), &success); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, @@ -7525,7 +7595,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { read_barrier_option); // Do an exact check. MipsLabel success; - __ Beq(out, cls, &success); + __ Beq(out, cls.AsRegister<Register>(), &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -7557,7 +7627,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); - __ Bne(out, cls, slow_path->GetEntryLabel()); + __ Bne(out, cls.AsRegister<Register>(), slow_path->GetEntryLabel()); __ LoadConst32(out, 1); break; } @@ -7589,6 +7659,20 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { __ B(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Sltiu(out, out, 1); + break; + } } __ Bind(&done); @@ -7725,7 +7809,7 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -7748,7 +7832,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -7835,6 +7919,15 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + uint32_t boot_image_offset = GetBootImageOffset(invoke); + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high); + Register temp_reg = temp.AsRegister<Register>(); + EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); + __ Lw(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { PcRelativePatchInfo* info_high = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); @@ -7956,7 +8049,7 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { // We need an extra register for PC-relative literals on R2. case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: if (isR6) { break; @@ -8008,7 +8101,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF // We need an extra register for PC-relative literals on R2. case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: base_or_current_method_reg = (isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>(); @@ -8065,22 +8158,17 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF } break; } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageRelRoPatch(boot_image_offset); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); __ Lw(out, out, /* placeholder */ 0x5678, &info_low->label); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ Addiu(out, out, -masked_hash); - } break; } case HLoadClass::LoadKind::kBssEntry: { @@ -8171,7 +8259,7 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { // We need an extra register for PC-relative literals on R2. case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: if (isR6) { break; @@ -8223,7 +8311,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ // We need an extra register for PC-relative literals on R2. case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: base_or_current_method_reg = (isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>(); @@ -8259,12 +8347,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ } return; } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(load); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageRelRoPatch(boot_image_offset); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); @@ -8779,6 +8868,501 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMaxInt(LocationSummary* locations, + bool is_min, + bool isR6, + DataType::Type type) { + if (isR6) { + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions + // always change the target (output) register. If the condition is + // true the output register gets the contents of the "rs" register; + // otherwise, the output register is set to zero. One consequence + // of this is that to implement something like "rd = c==0 ? rs : rt" + // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions. + // After executing this pair of instructions one of the output + // registers from the pair will necessarily contain zero. Then the + // code ORs the output registers from the SELEQZ/SELNEZ instructions + // to get the final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (type == DataType::Type::kInt64) { + Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); + Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel compare_done; + + if (a_lo == b_lo) { + if (out_lo != a_lo) { + __ Move(out_lo, a_lo); + __ Move(out_hi, a_hi); + } + } else { + __ Slt(TMP, b_hi, a_hi); + __ Bne(b_hi, a_hi, &compare_done); + + __ Sltu(TMP, b_lo, a_lo); + + __ Bind(&compare_done); + + if (is_min) { + __ Seleqz(AT, a_lo, TMP); + __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo + // because at this point we're + // done using a_lo/b_lo. + } else { + __ Selnez(AT, a_lo, TMP); + __ Seleqz(out_lo, b_lo, TMP); // ditto + } + __ Or(out_lo, out_lo, AT); + if (is_min) { + __ Seleqz(AT, a_hi, TMP); + __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi + } else { + __ Selnez(AT, a_hi, TMP); + __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi + } + __ Or(out_hi, out_hi, AT); + } + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register a = locations->InAt(0).AsRegister<Register>(); + Register b = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (a == b) { + if (out != a) { + __ Move(out, a); + } + } else { + __ Slt(AT, b, a); + if (is_min) { + __ Seleqz(TMP, a, AT); + __ Selnez(AT, b, AT); + } else { + __ Selnez(TMP, a, AT); + __ Seleqz(AT, b, AT); + } + __ Or(out, TMP, AT); + } + } + } else { // !isR6 + if (type == DataType::Type::kInt64) { + Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); + Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel compare_done; + + if (a_lo == b_lo) { + if (out_lo != a_lo) { + __ Move(out_lo, a_lo); + __ Move(out_hi, a_hi); + } + } else { + __ Slt(TMP, a_hi, b_hi); + __ Bne(a_hi, b_hi, &compare_done); + + __ Sltu(TMP, a_lo, b_lo); + + __ Bind(&compare_done); + + if (is_min) { + if (out_lo != a_lo) { + __ Movn(out_hi, a_hi, TMP); + __ Movn(out_lo, a_lo, TMP); + } + if (out_lo != b_lo) { + __ Movz(out_hi, b_hi, TMP); + __ Movz(out_lo, b_lo, TMP); + } + } else { + if (out_lo != a_lo) { + __ Movz(out_hi, a_hi, TMP); + __ Movz(out_lo, a_lo, TMP); + } + if (out_lo != b_lo) { + __ Movn(out_hi, b_hi, TMP); + __ Movn(out_lo, b_lo, TMP); + } + } + } + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register a = locations->InAt(0).AsRegister<Register>(); + Register b = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (a == b) { + if (out != a) { + __ Move(out, a); + } + } else { + __ Slt(AT, a, b); + if (is_min) { + if (out != a) { + __ Movn(out, a, AT); + } + if (out != b) { + __ Movz(out, b, AT); + } + } else { + if (out != a) { + __ Movz(out, a, AT); + } + if (out != b) { + __ Movn(out, b, AT); + } + } + } + } + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + bool isR6, + DataType::Type type) { + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + FRegister a = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister b = locations->InAt(1).AsFpuRegister<FRegister>(); + + if (isR6) { + MipsLabel noNaNs; + MipsLabel done; + FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; + + // When Java computes min/max it prefers a NaN to a number; the + // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of + // the inputs is a NaN and the other is a valid number, the MIPS + // instruction will return the number; Java wants the NaN value + // returned. This is why there is extra logic preceding the use of + // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a + // NaN, return the NaN, otherwise return the min/max. + if (type == DataType::Type::kFloat64) { + __ CmpUnD(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqD(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelD(ftmp, a, b); + + if (ftmp != out) { + __ MovD(out, ftmp); + } + + __ B(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinD(out, a, b); + } else { + __ MaxD(out, a, b); + } + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CmpUnS(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqS(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelS(ftmp, a, b); + + if (ftmp != out) { + __ MovS(out, ftmp); + } + + __ B(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinS(out, a, b); + } else { + __ MaxS(out, a, b); + } + } + + __ Bind(&done); + + } else { // !isR6 + MipsLabel ordered; + MipsLabel compare; + MipsLabel select; + MipsLabel done; + + if (type == DataType::Type::kFloat64) { + __ CunD(a, b); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CunS(a, b); + } + __ Bc1f(&ordered); + + // a or b (or both) is a NaN. Return one, which is a NaN. + if (type == DataType::Type::kFloat64) { + __ CeqD(b, b); + } else { + __ CeqS(b, b); + } + __ B(&select); + + __ Bind(&ordered); + + // Neither is a NaN. + // a == b? (-0.0 compares equal with +0.0) + // If equal, handle zeroes, else compare further. + if (type == DataType::Type::kFloat64) { + __ CeqD(a, b); + } else { + __ CeqS(a, b); + } + __ Bc1f(&compare); + + // a == b either bit for bit or one is -0.0 and the other is +0.0. + if (type == DataType::Type::kFloat64) { + __ MoveFromFpuHigh(TMP, a); + __ MoveFromFpuHigh(AT, b); + } else { + __ Mfc1(TMP, a); + __ Mfc1(AT, b); + } + + if (is_min) { + // -0.0 prevails over +0.0. + __ Or(TMP, TMP, AT); + } else { + // +0.0 prevails over -0.0. + __ And(TMP, TMP, AT); + } + + if (type == DataType::Type::kFloat64) { + __ Mfc1(AT, a); + __ Mtc1(AT, out); + __ MoveToFpuHigh(TMP, out); + } else { + __ Mtc1(TMP, out); + } + __ B(&done); + + __ Bind(&compare); + + if (type == DataType::Type::kFloat64) { + if (is_min) { + // return (a <= b) ? a : b; + __ ColeD(a, b); + } else { + // return (a >= b) ? a : b; + __ ColeD(b, a); // b <= a + } + } else { + if (is_min) { + // return (a <= b) ? a : b; + __ ColeS(a, b); + } else { + // return (a >= b) ? a : b; + __ ColeS(b, a); // b <= a + } + } + + __ Bind(&select); + + if (type == DataType::Type::kFloat64) { + __ MovtD(out, a); + __ MovfD(out, b); + } else { + __ MovtS(out, a); + __ MovfS(out, b); + } + + __ Bind(&done); + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min, isR6, type); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, isR6, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderMIPS::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorMIPS::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderMIPS::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorMIPS::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderMIPS::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS::GenerateAbsFP(LocationSummary* locations, + DataType::Type type, + bool isR2OrNewer, + bool isR6) { + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + // Note, as a "quality of implementation", rather than pure "spec compliance", we require that + // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN + // (signaling NaN may become quiet though). + // + // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case, + // both regular floating point numbers and NAN values are treated alike, only the sign bit is + // affected by this instruction. + // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any + // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be + // changed when doing abs(NaN). Because of that, we clear sign bit in a different way. + if (isR6) { + if (type == DataType::Type::kFloat64) { + __ AbsD(out, in); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ AbsS(out, in); + } + } else { + if (type == DataType::Type::kFloat64) { + if (in != out) { + __ MovD(out, in); + } + __ MoveFromFpuHigh(TMP, in); + // ins instruction is not available for R1. + if (isR2OrNewer) { + __ Ins(TMP, ZERO, 31, 1); + } else { + __ Sll(TMP, TMP, 1); + __ Srl(TMP, TMP, 1); + } + __ MoveToFpuHigh(TMP, out); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ Mfc1(TMP, in); + // ins instruction is not available for R1. + if (isR2OrNewer) { + __ Ins(TMP, ZERO, 31, 1); + } else { + __ Sll(TMP, TMP, 1); + __ Srl(TMP, TMP, 1); + } + __ Mtc1(TMP, out); + } + } +} + +void InstructionCodeGeneratorMIPS::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + bool isR2OrNewer = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + __ Sra(AT, in, 31); + __ Xor(out, in, AT); + __ Subu(out, out, AT); + break; + } + case DataType::Type::kInt64: { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + // The comments in this section show the analogous operations which would + // be performed if we had 64-bit registers "in", and "out". + // __ Dsra32(AT, in, 31); + __ Sra(AT, in_hi, 31); + // __ Xor(out, in, AT); + __ Xor(TMP, in_lo, AT); + __ Xor(out_hi, in_hi, AT); + // __ Dsubu(out, out, AT); + __ Subu(out_lo, TMP, AT); + __ Sltu(TMP, out_lo, TMP); + __ Addu(out_hi, out_hi, TMP); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateAbsFP(locations, abs->GetResultType(), isR2OrNewer, isR6); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index c91cb62eda5..2e7c736dbd3 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -237,6 +237,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); @@ -246,6 +247,11 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type); + void GenerateMinMax(HBinaryOperation*, bool is_min); + void GenerateAbsFP(LocationSummary* locations, DataType::Type type, bool isR2OrNewer, bool isR6); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -615,6 +621,8 @@ class CodeGeneratorMIPS : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); }; + PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, @@ -689,7 +697,8 @@ class CodeGeneratorMIPS : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; @@ -697,7 +706,7 @@ class CodeGeneratorMIPS : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 985ac2ca554..5b07b55cbbb 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -1509,6 +1509,14 @@ inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches( } } +linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. + return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -1527,11 +1535,10 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1542,6 +1549,13 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li DCHECK_EQ(size, linker_patches->size()); } +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageRelRoPatch( + uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_); +} + CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageMethodPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { @@ -1780,6 +1794,34 @@ void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCo __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorMIPS64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + GpuRegister temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ LoadFromOffset( + kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + if (IsUint<16>(path_to_root)) { + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + __ LoadConst32(TMP, path_to_root); + __ Xor(temp, temp, TMP); + } + // Shift out bits that do not contribute to the comparison. + __ Sll(temp, temp, 32 - mask_bits); + } +} + void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) { __ Sync(0); // only stype 0 is supported } @@ -2840,7 +2882,13 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -2849,7 +2897,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); + Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); GpuRegister temp = temp_loc.AsRegister<GpuRegister>(); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -2888,7 +2936,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ Bnec(temp, cls, slow_path->GetEntryLabel()); + __ Bnec(temp, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); break; } @@ -2914,7 +2962,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { // exception. __ Beqzc(temp, slow_path->GetEntryLabel()); // Otherwise, compare the classes. - __ Bnec(temp, cls, &loop); + __ Bnec(temp, cls.AsRegister<GpuRegister>(), &loop); break; } @@ -2929,7 +2977,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { // Walk over the class hierarchy to find a match. Mips64Label loop; __ Bind(&loop); - __ Beqc(temp, cls, &done); + __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, @@ -2952,7 +3000,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { maybe_temp2_loc, kWithoutReadBarrier); // Do an exact check. - __ Beqc(temp, cls, &done); + __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -3011,7 +3059,21 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { __ Daddiu(temp, temp, 2 * kHeapReferenceSize); __ Addiu(TMP, TMP, -2); // Compare the classes and continue the loop if they do not match. - __ Bnec(AT, cls, &loop); + __ Bnec(AT, cls.AsRegister<GpuRegister>(), &loop); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ Bnezc(temp, slow_path->GetEntryLabel()); break; } } @@ -5515,6 +5577,8 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -5523,7 +5587,13 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -5535,7 +5605,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); + Location cls = locations->InAt(1); Location out_loc = locations->Out(); GpuRegister out = out_loc.AsRegister<GpuRegister>(); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -5567,7 +5637,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, read_barrier_option); // Classes must be equal for the instanceof to succeed. - __ Xor(out, out, cls); + __ Xor(out, out, cls.AsRegister<GpuRegister>()); __ Sltiu(out, out, 1); break; } @@ -5594,7 +5664,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); - __ Bnec(out, cls, &loop); + __ Bnec(out, cls.AsRegister<GpuRegister>(), &loop); __ LoadConst32(out, 1); break; } @@ -5612,7 +5682,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { // Walk over the class hierarchy to find a match. Mips64Label loop, success; __ Bind(&loop); - __ Beqc(out, cls, &success); + __ Beqc(out, cls.AsRegister<GpuRegister>(), &success); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, @@ -5639,7 +5709,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { read_barrier_option); // Do an exact check. Mips64Label success; - __ Beqc(out, cls, &success); + __ Beqc(out, cls.AsRegister<GpuRegister>(), &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -5671,7 +5741,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); - __ Bnec(out, cls, slow_path->GetEntryLabel()); + __ Bnec(out, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); __ LoadConst32(out, 1); break; } @@ -5703,6 +5773,20 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { __ Bc(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Sltiu(out, out, 1); + break; + } } __ Bind(&done); @@ -5839,7 +5923,7 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( bool fallback_load = false; switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5866,7 +5950,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5926,6 +6010,15 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall( kLoadDoubleword, DeduplicateUint64Literal(invoke->GetMethodAddress())); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + uint32_t boot_image_offset = GetBootImageOffset(invoke); + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + __ Lwu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { PcRelativePatchInfo* info_high = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); @@ -6113,20 +6206,15 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S codegen_->DeduplicateBootImageAddressLiteral(address)); break; } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageRelRoPatch(boot_image_offset); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Lwu(out, AT, /* placeholder */ 0x5678); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ Daddiu(out, out, -masked_hash); - } break; } case HLoadClass::LoadKind::kBssEntry: { @@ -6248,12 +6336,13 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA codegen_->DeduplicateBootImageAddressLiteral(address)); return; } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(load); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageRelRoPatch(boot_image_offset); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Lwu(out, AT, /* placeholder */ 0x5678); return; @@ -6665,6 +6754,236 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMaxInt(LocationSummary* locations, bool is_min) { + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (lhs == rhs) { + if (out != lhs) { + __ Move(out, lhs); + } + } else { + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always + // change the target (output) register. If the condition is true the + // output register gets the contents of the "rs" register; otherwise, + // the output register is set to zero. One consequence of this is + // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 + // needs to use a pair of SELEQZ/SELNEZ instructions. After + // executing this pair of instructions one of the output registers + // from the pair will necessarily contain zero. Then the code ORs the + // output registers from the SELEQZ/SELNEZ instructions to get the + // final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (out == lhs) { + __ Slt(AT, rhs, lhs); + if (is_min) { + __ Seleqz(out, lhs, AT); + __ Selnez(AT, rhs, AT); + } else { + __ Selnez(out, lhs, AT); + __ Seleqz(AT, rhs, AT); + } + } else { + __ Slt(AT, lhs, rhs); + if (is_min) { + __ Seleqz(out, rhs, AT); + __ Selnez(AT, lhs, AT); + } else { + __ Selnez(out, rhs, AT); + __ Seleqz(AT, lhs, AT); + } + } + __ Or(out, out, AT); + } +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + Mips64Label noNaNs; + Mips64Label done; + FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; + + // When Java computes min/max it prefers a NaN to a number; the + // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of + // the inputs is a NaN and the other is a valid number, the MIPS + // instruction will return the number; Java wants the NaN value + // returned. This is why there is extra logic preceding the use of + // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a + // NaN, return the NaN, otherwise return the min/max. + if (type == DataType::Type::kFloat64) { + __ CmpUnD(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqD(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelD(ftmp, a, b); + + if (ftmp != out) { + __ MovD(out, ftmp); + } + + __ Bc(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinD(out, a, b); + } else { + __ MaxD(out, a, b); + } + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CmpUnS(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqS(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelS(ftmp, a, b); + + if (ftmp != out) { + __ MovS(out, ftmp); + } + + __ Bc(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinS(out, a, b); + } else { + __ MaxS(out, a, b); + } + } + + __ Bind(&done); +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderMIPS64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorMIPS64::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderMIPS64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorMIPS64::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderMIPS64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS64::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + __ Sra(AT, in, 31); + __ Xor(out, in, AT); + __ Subu(out, out, AT); + break; + } + case DataType::Type::kInt64: { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + __ Dsra32(AT, in, 31); + __ Xor(out, in, AT); + __ Dsubu(out, out, AT); + break; + } + case DataType::Type::kFloat32: { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + __ AbsS(out, in); + break; + } + case DataType::Type::kFloat64: { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + __ AbsD(out, in); + break; + } + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index e6b69c469fd..6e69e4611a7 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -233,6 +233,7 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, GpuRegister temp); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); @@ -242,6 +243,10 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -586,6 +591,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); }; + PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, @@ -655,7 +662,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // Deduplication map for 64-bit literals, used for non-patchable method address or method code // address. Uint64ToLiteralMap uint64_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; @@ -663,7 +671,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 174efdf1155..6b0ec253e99 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -63,7 +63,7 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -125,7 +125,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -149,7 +149,7 @@ void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -173,7 +173,7 @@ void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* ins DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -200,7 +200,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -240,7 +240,7 @@ void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -259,7 +259,7 @@ void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Scvtf(dst.V4S(), src.V4S()); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } } @@ -299,7 +299,7 @@ void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) { __ Fneg(dst.V2D(), src.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -338,7 +338,7 @@ void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) { __ Fabs(dst.V2D(), src.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -366,7 +366,7 @@ void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) { __ Not(dst.V16B(), src.V16B()); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -389,7 +389,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -431,7 +431,39 @@ void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) { __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -471,7 +503,7 @@ void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instructi : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -513,7 +545,39 @@ void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) { __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -551,7 +615,7 @@ void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) { __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -575,7 +639,7 @@ void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) { __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -623,7 +687,7 @@ void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) { __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -671,7 +735,7 @@ void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) { __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -699,7 +763,7 @@ void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) { __ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -735,7 +799,7 @@ void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) { __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -762,7 +826,7 @@ void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) { __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -782,7 +846,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -816,7 +880,7 @@ void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) { __ Shl(dst.V2D(), lhs.V2D(), value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -850,7 +914,7 @@ void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) { __ Sshr(dst.V2D(), lhs.V2D(), value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -884,7 +948,7 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { __ Ushr(dst.V2D(), lhs.V2D(), value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -916,7 +980,7 @@ void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -957,7 +1021,7 @@ void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instructi __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -978,7 +1042,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1026,7 +1090,7 @@ void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccum } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1139,7 +1203,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1167,7 +1231,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1188,7 +1252,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins __ Sabal2(acc.V2D(), left.V4S(), right.V4S()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1204,12 +1268,12 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } } @@ -1237,7 +1301,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1331,7 +1395,7 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1362,7 +1426,7 @@ void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index 7c3155ab73b..7b66b179839 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -46,7 +46,7 @@ void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instr locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -71,7 +71,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScala __ Vdup(Untyped32, dst, InputRegisterAt(instruction, 0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -84,7 +84,7 @@ void LocationsBuilderARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instructi locations->SetOut(Location::RequiresRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -98,7 +98,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecExtractScalar(HVecExtractScalar* i __ Vmov(OutputRegister(instruction), DRegisterLane(src, 0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -122,7 +122,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -151,7 +151,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -188,7 +188,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) { __ Vneg(DataTypeValue::S32, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -215,7 +215,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) { __ Vabs(DataTypeValue::S32, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -242,7 +242,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) { __ Vmvn(I8, dst, src); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -262,7 +262,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -292,7 +292,39 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) { __ Vadd(I32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vqadd(DataTypeValue::U8, dst, lhs, rhs); + break; + case DataType::Type::kInt8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vqadd(DataTypeValue::S8, dst, lhs, rhs); + break; + case DataType::Type::kUint16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vqadd(DataTypeValue::U16, dst, lhs, rhs); + break; + case DataType::Type::kInt16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vqadd(DataTypeValue::S16, dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -332,7 +364,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruc : __ Vhadd(DataTypeValue::S16, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -362,7 +394,39 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) { __ Vsub(I32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vqsub(DataTypeValue::U8, dst, lhs, rhs); + break; + case DataType::Type::kInt8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vqsub(DataTypeValue::S8, dst, lhs, rhs); + break; + case DataType::Type::kUint16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vqsub(DataTypeValue::U16, dst, lhs, rhs); + break; + case DataType::Type::kInt16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vqsub(DataTypeValue::S16, dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -392,7 +456,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) { __ Vmul(I32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -440,7 +504,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) { __ Vmin(DataTypeValue::S32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -480,7 +544,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) { __ Vmax(DataTypeValue::S32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -505,7 +569,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) { __ Vand(I8, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -537,7 +601,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) { __ Vorr(I8, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -561,7 +625,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) { __ Veor(I8, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -580,7 +644,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -610,7 +674,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) { __ Vshl(I32, dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -640,7 +704,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) { __ Vshr(DataTypeValue::S32, dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -670,7 +734,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) { __ Vshr(DataTypeValue::U32, dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -690,7 +754,7 @@ void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -716,7 +780,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruc __ Vmov(Untyped32, DRegisterLane(dst, 0), InputRegisterAt(instruction, 0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -737,7 +801,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -780,12 +844,12 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* i break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -817,7 +881,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -923,7 +987,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -971,7 +1035,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index ed9de964965..df0e1485d69 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -42,7 +42,7 @@ void LocationsBuilderMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruct locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -89,7 +89,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar* /* is_double */ true); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -113,7 +113,7 @@ void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -138,7 +138,7 @@ void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* inst DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -170,7 +170,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation : Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -225,7 +225,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -244,7 +244,7 @@ void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Ffint_sW(dst, src); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } } @@ -290,7 +290,7 @@ void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) { __ FsubD(dst, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -337,7 +337,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAbs(HVecAbs* instruction) { __ AndV(dst, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -369,7 +369,7 @@ void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) { __ NorV(dst, src, src); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -392,7 +392,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -434,11 +434,19 @@ void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) { __ FaddD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); +} + void LocationsBuilderMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } @@ -474,7 +482,7 @@ void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instructio : __ Ave_sH(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -516,11 +524,19 @@ void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) { __ FsubD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); +} + void LocationsBuilderMIPS::VisitVecMul(HVecMul* instruction) { CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } @@ -558,7 +574,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) { __ FmulD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -582,7 +598,7 @@ void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) { __ FdivD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -640,7 +656,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) { __ FminD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -698,7 +714,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) { __ FmaxD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -727,7 +743,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) { __ AndV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -764,7 +780,7 @@ void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) { __ OrV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -793,7 +809,7 @@ void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) { __ XorV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -813,7 +829,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -847,7 +863,7 @@ void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) { __ SlliD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -881,7 +897,7 @@ void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) { __ SraiD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -915,7 +931,7 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) { __ SrliD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -947,7 +963,7 @@ void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -989,7 +1005,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instructio __ InsertW(dst, locations->InAt(0).AsRegisterPairHigh<Register>(), 1); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1010,7 +1026,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1060,7 +1076,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumu } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1162,7 +1178,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1201,7 +1217,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1231,7 +1247,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1247,13 +1263,13 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1282,7 +1298,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1357,7 +1373,7 @@ void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) { __ LdD(reg, base, offset); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1395,7 +1411,7 @@ void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) { __ StD(reg, base, offset); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 9ea55ec8d79..de354b63a15 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -47,7 +47,7 @@ void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instru locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -88,7 +88,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar /* is_double */ true); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -112,7 +112,7 @@ void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instructio locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -136,7 +136,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* in DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -168,7 +168,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation : Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -223,7 +223,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -242,7 +242,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Ffint_sW(dst, src); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -289,7 +289,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) { __ FsubD(dst, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -336,7 +336,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) { __ AndV(dst, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -368,7 +368,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) { __ NorV(dst, src, src); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -391,7 +391,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -433,11 +433,19 @@ void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) { __ FaddD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); +} + void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } @@ -473,7 +481,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruct : __ Ave_sH(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -515,11 +523,19 @@ void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) { __ FsubD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); +} + void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) { CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } @@ -557,7 +573,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) { __ FmulD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -581,7 +597,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) { __ FdivD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -639,7 +655,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) { __ FminD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -697,7 +713,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) { __ FmaxD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -726,7 +742,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) { __ AndV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -763,7 +779,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) { __ OrV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -792,7 +808,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) { __ XorV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -812,7 +828,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -846,7 +862,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) { __ SlliD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -880,7 +896,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) { __ SraiD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -914,7 +930,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { __ SrliD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -946,7 +962,7 @@ void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -987,7 +1003,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruct __ InsertD(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1008,7 +1024,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1058,7 +1074,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccu } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1160,7 +1176,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1199,7 +1215,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1229,7 +1245,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1245,13 +1261,13 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1280,7 +1296,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1355,7 +1371,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) { __ LdD(reg, base, offset); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1393,7 +1409,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) { __ StD(reg, base, offset); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index f2ffccc8879..086ae07a064 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -54,7 +54,7 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi : Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -111,7 +111,7 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i __ shufpd(dst, dst, Immediate(0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -138,7 +138,7 @@ void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -152,7 +152,7 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: // TODO: up to here, and? - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); case DataType::Type::kInt32: DCHECK_LE(4u, instruction->GetVectorLength()); @@ -174,7 +174,7 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -196,7 +196,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -258,12 +258,12 @@ void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) { break; case HVecReduce::kMin: case HVecReduce::kMax: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -282,7 +282,7 @@ void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ cvtdq2ps(dst, src); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } } @@ -328,7 +328,7 @@ void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) { __ subpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -369,7 +369,7 @@ void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) { __ andpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -418,7 +418,7 @@ void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) { __ xorpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -441,7 +441,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -483,7 +483,39 @@ void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) { __ addpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddusb(dst, src); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddsb(dst, src); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddusw(dst, src); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddsw(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -503,14 +535,14 @@ void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction switch (instruction->GetPackedType()) { case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ pavgb(dst, src); - return; + __ pavgb(dst, src); + break; case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pavgw(dst, src); - return; + break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -552,7 +584,39 @@ void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) { __ subpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubusb(dst, src); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubsb(dst, src); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubusw(dst, src); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubsw(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -585,7 +649,7 @@ void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) { __ mulpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -609,7 +673,7 @@ void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) { __ divpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -658,7 +722,7 @@ void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) { __ minpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -707,7 +771,7 @@ void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { __ maxpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -742,7 +806,7 @@ void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) { __ andpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -777,7 +841,7 @@ void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) { __ andnpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -812,7 +876,7 @@ void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) { __ orpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -847,7 +911,7 @@ void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) { __ xorpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -865,7 +929,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -894,7 +958,7 @@ void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) { __ psllq(dst, Immediate(static_cast<uint8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -919,7 +983,7 @@ void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) { __ psrad(dst, Immediate(static_cast<uint8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -948,7 +1012,7 @@ void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) { __ psrlq(dst, Immediate(static_cast<uint8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -985,7 +1049,7 @@ void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1011,7 +1075,7 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: // TODO: up to here, and? - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); @@ -1035,7 +1099,7 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1056,7 +1120,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1103,7 +1167,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1184,7 +1248,7 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1220,7 +1284,7 @@ void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) { is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index e2b0485f890..4d31ab68d11 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -49,7 +49,7 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru : Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -102,7 +102,7 @@ void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar __ shufpd(dst, dst, Immediate(0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -126,7 +126,7 @@ void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instructio locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -140,7 +140,7 @@ void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* in case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: // TODO: up to here, and? - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); @@ -157,7 +157,7 @@ void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* in DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -179,7 +179,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -241,12 +241,12 @@ void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) { break; case HVecReduce::kMin: case HVecReduce::kMax: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -265,7 +265,7 @@ void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ cvtdq2ps(dst, src); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } } @@ -311,7 +311,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) { __ subpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -352,7 +352,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) { __ andpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -401,7 +401,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) { __ xorpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -424,7 +424,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -466,7 +466,39 @@ void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) { __ addpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddusb(dst, src); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddsb(dst, src); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddusw(dst, src); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddsw(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -486,14 +518,14 @@ void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruct switch (instruction->GetPackedType()) { case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ pavgb(dst, src); - return; + __ pavgb(dst, src); + break; case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pavgw(dst, src); - return; + break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -535,7 +567,39 @@ void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) { __ subpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubusb(dst, src); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubsb(dst, src); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubusw(dst, src); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubsw(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -568,7 +632,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) { __ mulpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -592,7 +656,7 @@ void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) { __ divpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -641,7 +705,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { __ minpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -690,7 +754,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { __ maxpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -725,7 +789,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) { __ andpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -760,7 +824,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) { __ andnpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -795,7 +859,7 @@ void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) { __ orpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -830,7 +894,7 @@ void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) { __ xorpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -848,7 +912,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -877,7 +941,7 @@ void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) { __ psllq(dst, Immediate(static_cast<int8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -902,7 +966,7 @@ void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) { __ psrad(dst, Immediate(static_cast<int8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -931,7 +995,7 @@ void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) { __ psrlq(dst, Immediate(static_cast<int8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -963,7 +1027,7 @@ void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -989,7 +1053,7 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: // TODO: up to here, and? - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); @@ -1008,7 +1072,7 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1029,7 +1093,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1076,7 +1140,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1157,7 +1221,7 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1193,7 +1257,7 @@ void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) { is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 6bf045885d6..82d1fda8789 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -51,6 +51,9 @@ static constexpr int kC2ConditionMask = 0x400; static constexpr int kFakeReturnRegister = Register(8); +static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); +static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value() @@ -3802,6 +3805,301 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + // Register to use to perform a long subtract to set cc. + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + if (type == DataType::Type::kInt64) { + // Need to perform a subtract to get the sign right. + // op1 is already in the same location as the output. + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + + Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); + Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); + + // The comparison is performed by subtracting the second operand from + // the first operand and then setting the status flags in the same + // manner as the SUB instruction." + __ cmpl(output_lo, op2_lo); + + // Now use a temp and the borrow to finish the subtraction of op2_hi. + Register temp = locations->GetTemp(0).AsRegister<Register>(); + __ movl(temp, output_hi); + __ sbbl(temp, op2_hi); + + // Now the condition code is correct. + Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; + __ cmovl(cond, output_lo, op2_lo); + __ cmovl(cond, output_hi, op2_hi); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register out = locations->Out().AsRegister<Register>(); + Register op2 = op2_loc.AsRegister<Register>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + __ cmpl(out, op2); + Condition cond = is_min ? Condition::kGreater : Condition::kLess; + __ cmovl(cond, out, op2); + } +} + +void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + NearLabel nan, done, op2_label; + if (type == DataType::Type::kFloat64) { + __ ucomisd(out, op2); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (type == DataType::Type::kFloat64) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (type == DataType::Type::kFloat64) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + if (type == DataType::Type::kFloat64) { + // TODO: Use a constant from the constant table (requires extra input). + __ LoadLongConstant(out, kDoubleNaN); + } else { + Register constant = locations->GetTemp(0).AsRegister<Register>(); + __ movl(constant, Immediate(kFloatNaN)); + __ movd(out, constant); + } + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (type == DataType::Type::kFloat64) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min, type); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderX86::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorX86::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderX86::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorX86::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderX86::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RegisterLocation(EAX)); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RegisterLocation(EDX)); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + Register out = locations->Out().AsRegister<Register>(); + DCHECK_EQ(out, EAX); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + DCHECK_EQ(temp, EDX); + // Sign extend EAX into EDX. + __ cdq(); + // XOR EAX with sign. + __ xorl(EAX, EDX); + // Subtract out sign to correct. + __ subl(EAX, EDX); + // The result is in EAX. + break; + } + case DataType::Type::kInt64: { + Location input = locations->InAt(0); + Register input_lo = input.AsRegisterPairLow<Register>(); + Register input_hi = input.AsRegisterPairHigh<Register>(); + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + // Compute the sign into the temporary. + __ movl(temp, input_hi); + __ sarl(temp, Immediate(31)); + // Store the sign into the output. + __ movl(output_lo, temp); + __ movl(output_hi, temp); + // XOR the input to the output. + __ xorl(output_lo, input_lo); + __ xorl(output_hi, input_hi); + // Subtract the sign. + __ subl(output_lo, temp); + __ sbbl(output_hi, temp); + break; + } + case DataType::Type::kFloat32: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + Register constant = locations->GetTemp(1).AsRegister<Register>(); + __ movl(constant, Immediate(INT32_C(0x7FFFFFFF))); + __ movd(temp, constant); + __ andps(out, temp); + break; + } + case DataType::Type::kFloat64: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // TODO: Use a constant from the constant table (requires extra input). + __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF)); + __ andpd(out, temp); + break; + } + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); switch (instruction->GetType()) { @@ -4534,6 +4832,15 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, + temp.AsRegister<Register>()); + __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); + RecordBootImageRelRoPatch( + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(), + GetBootImageOffset(invoke)); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()); @@ -4595,6 +4902,13 @@ void CodeGeneratorX86::GenerateVirtualCall( RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } +void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, + uint32_t boot_image_offset) { + boot_image_method_patches_.emplace_back( + method_address, /* target_dex_file */ nullptr, boot_image_offset); + __ Bind(&boot_image_method_patches_.back().label); +} + void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); HX86ComputeBaseMethodAddress* method_address = @@ -4664,6 +4978,14 @@ inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( } } +linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. + return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -4682,11 +5004,10 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -6055,7 +6376,7 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -6093,7 +6414,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kReferrersClass || load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadClass::LoadKind::kBootImageClassTable || + load_kind == HLoadClass::LoadKind::kBootImageRelRo || load_kind == HLoadClass::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -6169,17 +6490,12 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE __ movl(out, Immediate(address)); break; } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); Register method_address = locations->InAt(0).AsRegister<Register>(); __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootImageTypePatch(cls); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ subl(out, Immediate(masked_hash)); - } + codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(), + codegen_->GetBootImageOffset(cls)); break; } case HLoadClass::LoadKind::kBssEntry: { @@ -6255,11 +6571,31 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( // No need for memory fence, thanks to the X86 memory model. } +void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Compare the bitstring in memory. + __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ movl(temp, Address(temp, mirror::Class::StatusOffset())); + // Compare the bitstring bits using SUB. + __ subl(temp, Immediate(path_to_root)); + // Shift out bits that do not contribute to the comparison. + __ shll(temp, Immediate(32u - mask_bits)); + } +} + HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -6278,7 +6614,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadString::LoadKind::kBootImageInternTable || + load_kind == HLoadString::LoadKind::kBootImageRelRo || load_kind == HLoadString::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -6332,11 +6668,12 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S __ movl(out, Immediate(address)); return; } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); Register method_address = locations->InAt(0).AsRegister<Register>(); __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootImageStringPatch(load); + codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(), + codegen_->GetBootImageOffset(load)); return; } case HLoadString::LoadKind::kBssEntry: { @@ -6418,8 +6755,8 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { return 0; } -// Interface case has 3 temps, one for holding the number of interfaces, one for the current -// interface pointer, one for loading the current interface. +// Interface case has 2 temps, one for holding the number of interfaces, one for the current +// interface pointer, the current interface is compared in memory. // The other checks have one temp for loading the object's class. static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { if (type_check_kind == TypeCheckKind::kInterfaceCheck) { @@ -6447,6 +6784,8 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -6455,7 +6794,13 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::Any()); + } // Note that TypeCheckSlowPathX86 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for some cases. @@ -6676,6 +7021,21 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ j(kNotEqual, &zero); + __ movl(out, Immediate(1)); + __ jmp(&done); + break; + } } if (zero.IsLinked()) { @@ -6702,12 +7062,14 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { // Require a register for the interface check since there is a loop that compares the class to // a memory address. locations->SetInAt(1, Location::RequiresRegister()); + } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); } else { locations->SetInAt(1, Location::Any()); } - // Note that TypeCheckSlowPathX86 uses this "temp" register too. - locations->AddTemp(Location::RequiresRegister()); - // When read barriers are enabled, we need an additional temporary register for some cases. + // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -6921,6 +7283,19 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); + break; + } } __ Bind(&done); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 51e5bca00b6..6c76e27d35f 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -211,6 +211,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { // the suspend call. void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void DivRemOneOrMinusOne(HBinaryOperation* instruction); @@ -225,6 +226,9 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void GenerateShlLong(const Location& loc, int shift); void GenerateShrLong(const Location& loc, int shift); void GenerateUShrLong(const Location& loc, int shift); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, @@ -414,6 +418,8 @@ class CodeGeneratorX86 : public CodeGenerator { void GenerateVirtualCall( HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, + uint32_t boot_image_offset); void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke); void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke); void RecordBootImageTypePatch(HLoadClass* load_class); @@ -631,17 +637,18 @@ class CodeGeneratorX86 : public CodeGenerator { X86Assembler assembler_; const X86InstructionSetFeatures& isa_features_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_; - // Type patch locations for kBssEntry. + // PC-relative type patch info for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_; - // String patch locations; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_; - // String patch locations for kBssEntry. + // PC-relative String patch info for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_; // Patches for string root accesses in JIT compiled code. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 7be360536b2..322b0cfc4c1 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -998,6 +998,13 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + __ movl(temp.AsRegister<CpuRegister>(), + Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); + RecordBootImageRelRoPatch(GetBootImageOffset(invoke)); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { __ movq(temp.AsRegister<CpuRegister>(), Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); @@ -1059,6 +1066,11 @@ void CodeGeneratorX86_64::GenerateVirtualCall( RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } +void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) { + boot_image_method_patches_.emplace_back(/* target_dex_file */ nullptr, boot_image_offset); + __ Bind(&boot_image_method_patches_.back().label); +} + void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { boot_image_method_patches_.emplace_back( invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index); @@ -1110,6 +1122,14 @@ inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( } } +linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. + return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -1128,11 +1148,10 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -3821,6 +3840,241 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + // The following is sub-optimal, but all we can do for now. It would be fine to also accept + // the second input to be the output (we can simply swap inputs). + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + if (type == DataType::Type::kInt64) { + __ cmpq(out, op2); + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + __ cmpl(out, op2); + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false); + } +} + +void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + NearLabel nan, done, op2_label; + if (type == DataType::Type::kFloat64) { + __ ucomisd(out, op2); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (type == DataType::Type::kFloat64) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (type == DataType::Type::kFloat64) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + if (type == DataType::Type::kFloat64) { + __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000))); + } else { + __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000))); + } + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (type == DataType::Type::kFloat64) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min, type); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderX86_64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderX86_64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderX86_64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); + // Create mask. + __ movl(mask, out); + __ sarl(mask, Immediate(31)); + // Add mask. + __ addl(out, mask); + __ xorl(out, mask); + break; + } + case DataType::Type::kInt64: { + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); + // Create mask. + __ movq(mask, out); + __ sarq(mask, Immediate(63)); + // Add mask. + __ addq(out, mask); + __ xorq(out, mask); + break; + } + case DataType::Type::kFloat32: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF))); + __ andps(out, mask); + break; + } + case DataType::Type::kFloat64: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); + __ andpd(out, mask); + break; + } + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); locations->SetInAt(0, Location::Any()); @@ -5462,6 +5716,26 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( // No need for memory fence, thanks to the x86-64 memory model. } +void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + CpuRegister temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Compare the bitstring in memory. + __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ movl(temp, Address(temp, mirror::Class::StatusOffset())); + // Compare the bitstring bits using SUB. + __ subl(temp, Immediate(path_to_root)); + // Shift out bits that do not contribute to the comparison. + __ shll(temp, Immediate(32u - mask_bits)); + } +} + HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { switch (desired_class_load_kind) { @@ -5471,7 +5745,7 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5579,16 +5853,10 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. break; } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootImageTypePatch(cls); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ subl(out, Immediate(masked_hash)); - } + codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(cls)); break; } case HLoadClass::LoadKind::kBssEntry: { @@ -5653,7 +5921,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5719,10 +5987,10 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. return; } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootImageStringPatch(load); + codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(load)); return; } case HLoadString::LoadKind::kBssEntry: { @@ -5795,24 +6063,26 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } -static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { - if (type_check_kind == TypeCheckKind::kInterfaceCheck) { - // We need a temporary for holding the iftable length. - return true; - } - return kEmitCompilerReadBarrier && +// Temp is used for read barrier. +static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier && (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck); + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + return 1; + } + return 0; } -static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { - return kEmitCompilerReadBarrier && - !kUseBakerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck); +// Interface case has 2 temps, one for holding the number of interfaces, one for the current +// interface pointer, the current interface is compared in memory. +// The other checks have one temp for loading the object's class. +static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { + if (type_check_kind == TypeCheckKind::kInterfaceCheck) { + return 2; + } + return 1 + NumberOfInstanceOfTemps(type_check_kind); } void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -5834,6 +6104,8 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -5842,14 +6114,16 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::Any()); + } // Note that TypeCheckSlowPathX86_64 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); - // When read barriers are enabled, we need a temporary register for - // some cases. - if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) { - locations->AddTemp(Location::RequiresRegister()); - } + locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); } void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -5860,9 +6134,9 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { Location cls = locations->InAt(1); Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); - Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ? - locations->GetTemp(0) : - Location::NoLocation(); + const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); + DCHECK_LE(num_temps, 1u); + Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -6071,6 +6345,27 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + if (zero.IsLinked()) { + __ j(kNotEqual, &zero); + __ movl(out, Immediate(1)); + __ jmp(&done); + } else { + __ setcc(kEqual, out); + // setcc only sets the low byte. + __ andl(out, Immediate(1)); + } + break; + } } if (zero.IsLinked()) { @@ -6097,17 +6392,15 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { // Require a register for the interface check since there is a loop that compares the class to // a memory address. locations->SetInAt(1, Location::RequiresRegister()); + } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); } else { locations->SetInAt(1, Location::Any()); } - - // Note that TypeCheckSlowPathX86_64 uses this "temp" register too. - locations->AddTemp(Location::RequiresRegister()); - // When read barriers are enabled, we need an additional temporary - // register for some cases. - if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) { - locations->AddTemp(Location::RequiresRegister()); - } + // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. + locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { @@ -6118,9 +6411,10 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ? - locations->GetTemp(1) : - Location::NoLocation(); + const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); + DCHECK_GE(num_temps, 1u); + DCHECK_LE(num_temps, 2u); + Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation(); const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -6283,7 +6577,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { break; } - case TypeCheckKind::kInterfaceCheck: + case TypeCheckKind::kInterfaceCheck: { // Fast path for the interface check. Try to avoid read barriers to improve the fast path. // We can not get false positives by doing this. // /* HeapReference<Class> */ temp = obj->klass_ @@ -6319,6 +6613,20 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // If `cls` was poisoned above, unpoison it. __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>()); break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); + break; + } } if (done.IsLinked()) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 1079e94dfc2..9a4c53b5240 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -208,6 +208,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { // the suspend call. void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp); void HandleBitwiseOperation(HBinaryOperation* operation); void GenerateRemFP(HRem* rem); void DivRemOneOrMinusOne(HBinaryOperation* instruction); @@ -222,6 +223,10 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -410,6 +415,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateVirtualCall( HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void RecordBootImageRelRoPatch(uint32_t boot_image_offset); void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke); void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke); void RecordBootImageTypePatch(HLoadClass* load_class); @@ -604,17 +610,18 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Used for fixups to the constant area. int constant_area_start_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; - // Type patch locations for kBssEntry. + // PC-relative type patch info for kBssEntry. ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; - // String patch locations; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PatchInfo<Label>> boot_image_string_patches_; - // String patch locations for kBssEntry. + // PC-relative String patch info for kBssEntry. ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; // Patches for string literals in JIT compiled code. diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index c41c290c8b4..792cfb539a6 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -195,7 +195,9 @@ class InternalCodeAllocator : public CodeAllocator { } size_t GetSize() const { return size_; } - uint8_t* GetMemory() const { return memory_.get(); } + ArrayRef<const uint8_t> GetMemory() const OVERRIDE { + return ArrayRef<const uint8_t>(memory_.get(), size_); + } private: size_t size_; @@ -269,8 +271,8 @@ static void Run(const InternalCodeAllocator& allocator, InstructionSet target_isa = codegen.GetInstructionSet(); typedef Expected (*fptr)(); - CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize()); - fptr f = reinterpret_cast<fptr>(allocator.GetMemory()); + CommonCompilerTest::MakeExecutable(allocator.GetMemory().data(), allocator.GetMemory().size()); + fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(allocator.GetMemory().data())); if (target_isa == InstructionSet::kThumb2) { // For thumb we need the bottom bit set. f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1); diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h index 4a6c91459fc..be26e67af37 100644 --- a/compiler/optimizing/data_type.h +++ b/compiler/optimizing/data_type.h @@ -210,6 +210,12 @@ class DataType { static bool IsTypeConversionImplicit(Type input_type, Type result_type); static bool IsTypeConversionImplicit(int64_t value, Type result_type); + static bool IsZeroExtension(Type input_type, Type result_type) { + return IsIntOrLongType(result_type) && + IsUnsignedType(input_type) && + Size(result_type) > Size(input_type); + } + static const char* PrettyDescriptor(Type type); private: diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index c88baa8610f..fbcbe3608e6 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -25,6 +25,11 @@ #include "base/bit_vector-inl.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" +#include "handle.h" +#include "mirror/class.h" +#include "obj_ptr-inl.h" +#include "scoped_thread_state_change-inl.h" +#include "subtype_check.h" namespace art { @@ -548,30 +553,85 @@ void GraphChecker::VisitReturnVoid(HReturnVoid* ret) { } } -void GraphChecker::VisitCheckCast(HCheckCast* check) { - VisitInstruction(check); - HInstruction* input = check->InputAt(1); - if (!input->IsLoadClass()) { - AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.", +void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, + size_t input_pos, + bool check_value, + uint32_t expected_value, + const char* name) { + if (!check->InputAt(input_pos)->IsIntConstant()) { + AddError(StringPrintf("%s:%d (bitstring) expects a HIntConstant input %zu (%s), not %s:%d.", check->DebugName(), check->GetId(), - input->DebugName(), - input->GetId())); + input_pos, + name, + check->InputAt(2)->DebugName(), + check->InputAt(2)->GetId())); + } else if (check_value) { + uint32_t actual_value = + static_cast<uint32_t>(check->InputAt(input_pos)->AsIntConstant()->GetValue()); + if (actual_value != expected_value) { + AddError(StringPrintf("%s:%d (bitstring) has %s 0x%x, not 0x%x as expected.", + check->DebugName(), + check->GetId(), + name, + actual_value, + expected_value)); + } } } -void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { - VisitInstruction(instruction); - HInstruction* input = instruction->InputAt(1); - if (!input->IsLoadClass()) { - AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.", - instruction->DebugName(), - instruction->GetId(), - input->DebugName(), - input->GetId())); +void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) { + VisitInstruction(check); + HInstruction* input = check->InputAt(1); + if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + if (!input->IsNullConstant()) { + AddError(StringPrintf("%s:%d (bitstring) expects a HNullConstant as second input, not %s:%d.", + check->DebugName(), + check->GetId(), + input->DebugName(), + input->GetId())); + } + bool check_values = false; + BitString::StorageType expected_path_to_root = 0u; + BitString::StorageType expected_mask = 0u; + { + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> klass = check->GetClass().Get(); + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass); + if (state == SubtypeCheckInfo::kAssigned) { + expected_path_to_root = + SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass); + expected_mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass); + check_values = true; + } else { + AddError(StringPrintf("%s:%d (bitstring) references a class with unassigned bitstring.", + check->DebugName(), + check->GetId())); + } + } + CheckTypeCheckBitstringInput( + check, /* input_pos */ 2, check_values, expected_path_to_root, "path_to_root"); + CheckTypeCheckBitstringInput(check, /* input_pos */ 3, check_values, expected_mask, "mask"); + } else { + if (!input->IsLoadClass()) { + AddError(StringPrintf("%s:%d (classic) expects a HLoadClass as second input, not %s:%d.", + check->DebugName(), + check->GetId(), + input->DebugName(), + input->GetId())); + } } } +void GraphChecker::VisitCheckCast(HCheckCast* check) { + HandleTypeCheckInstruction(check); +} + +void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { + HandleTypeCheckInstruction(instruction); +} + void GraphChecker::HandleLoop(HBasicBlock* loop_header) { int id = loop_header->GetBlockId(); HLoopInformation* loop_information = loop_header->GetLoopInformation(); diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 0f0b49d240a..dbedc405185 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -71,6 +71,12 @@ class GraphChecker : public HGraphDelegateVisitor { void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE; void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; + void CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, + size_t input_pos, + bool check_value, + uint32_t expected_value, + const char* name); + void HandleTypeCheckInstruction(HTypeCheckInstruction* instruction); void HandleLoop(HBasicBlock* loop_header); void HandleBooleanInput(HInstruction* instruction, size_t input_index); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 5ff31cead58..54d46445804 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -390,16 +390,23 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("load_kind") << load_string->GetLoadKind(); } - void VisitCheckCast(HCheckCast* check_cast) OVERRIDE { - StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind(); + void HandleTypeCheckInstruction(HTypeCheckInstruction* check) { + StartAttributeStream("check_kind") << check->GetTypeCheckKind(); StartAttributeStream("must_do_null_check") << std::boolalpha - << check_cast->MustDoNullCheck() << std::noboolalpha; + << check->MustDoNullCheck() << std::noboolalpha; + if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + StartAttributeStream("path_to_root") << std::hex + << "0x" << check->GetBitstringPathToRoot() << std::dec; + StartAttributeStream("mask") << std::hex << "0x" << check->GetBitstringMask() << std::dec; + } + } + + void VisitCheckCast(HCheckCast* check_cast) OVERRIDE { + HandleTypeCheckInstruction(check_cast); } void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE { - StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind(); - StartAttributeStream("must_do_null_check") << std::boolalpha - << instance_of->MustDoNullCheck() << std::noboolalpha; + HandleTypeCheckInstruction(instance_of); } void VisitArrayLength(HArrayLength* array_length) OVERRIDE { @@ -576,6 +583,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } StartAttributeStream() << input_list; } + if (instruction->GetDexPc() != kNoDexPc) { + StartAttributeStream("dex_pc") << instruction->GetDexPc(); + } else { + StartAttributeStream("dex_pc") << "n/a"; + } instruction->Accept(this); if (instruction->HasEnvironment()) { StringList envs; @@ -641,20 +653,32 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha; } + // For the builder and the inliner, we want to add extra information on HInstructions + // that have reference types, and also HInstanceOf/HCheckcast. if ((IsPass(HGraphBuilder::kBuilderPassName) || IsPass(HInliner::kInlinerPassName)) - && (instruction->GetType() == DataType::Type::kReference)) { - ReferenceTypeInfo info = instruction->IsLoadClass() - ? instruction->AsLoadClass()->GetLoadedClassRTI() - : instruction->GetReferenceTypeInfo(); + && (instruction->GetType() == DataType::Type::kReference || + instruction->IsInstanceOf() || + instruction->IsCheckCast())) { + ReferenceTypeInfo info = (instruction->GetType() == DataType::Type::kReference) + ? instruction->IsLoadClass() + ? instruction->AsLoadClass()->GetLoadedClassRTI() + : instruction->GetReferenceTypeInfo() + : instruction->IsInstanceOf() + ? instruction->AsInstanceOf()->GetTargetClassRTI() + : instruction->AsCheckCast()->GetTargetClassRTI(); ScopedObjectAccess soa(Thread::Current()); if (info.IsValid()) { StartAttributeStream("klass") << mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get()); - StartAttributeStream("can_be_null") - << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; + if (instruction->GetType() == DataType::Type::kReference) { + StartAttributeStream("can_be_null") + << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; + } StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; - } else if (instruction->IsLoadClass()) { + } else if (instruction->IsLoadClass() || + instruction->IsInstanceOf() || + instruction->IsCheckCast()) { StartAttributeStream("klass") << "unresolved"; } else { // The NullConstant may be added to the graph during other passes that happen between diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 99dec112400..55eca2316a1 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -78,22 +78,15 @@ static bool IsGEZero(HInstruction* instruction) { DCHECK(instruction != nullptr); if (instruction->IsArrayLength()) { return true; - } else if (instruction->IsInvokeStaticOrDirect()) { - switch (instruction->AsInvoke()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - // Instruction MIN(>=0, >=0) is >= 0. - return IsGEZero(instruction->InputAt(0)) && - IsGEZero(instruction->InputAt(1)); - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - // Instruction ABS(>=0) is >= 0. - // NOTE: ABS(minint) = minint prevents assuming - // >= 0 without looking at the argument. - return IsGEZero(instruction->InputAt(0)); - default: - break; - } + } else if (instruction->IsMin()) { + // Instruction MIN(>=0, >=0) is >= 0. + return IsGEZero(instruction->InputAt(0)) && + IsGEZero(instruction->InputAt(1)); + } else if (instruction->IsAbs()) { + // Instruction ABS(>=0) is >= 0. + // NOTE: ABS(minint) = minint prevents assuming + // >= 0 without looking at the argument. + return IsGEZero(instruction->InputAt(0)); } int64_t value = -1; return IsInt64AndGet(instruction, &value) && value >= 0; @@ -102,21 +95,14 @@ static bool IsGEZero(HInstruction* instruction) { /** Hunts "under the hood" for a suitable instruction at the hint. */ static bool IsMaxAtHint( HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) { - if (instruction->IsInvokeStaticOrDirect()) { - switch (instruction->AsInvoke()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - // For MIN(x, y), return most suitable x or y as maximum. - return IsMaxAtHint(instruction->InputAt(0), hint, suitable) || - IsMaxAtHint(instruction->InputAt(1), hint, suitable); - default: - break; - } + if (instruction->IsMin()) { + // For MIN(x, y), return most suitable x or y as maximum. + return IsMaxAtHint(instruction->InputAt(0), hint, suitable) || + IsMaxAtHint(instruction->InputAt(1), hint, suitable); } else { *suitable = instruction; return HuntForDeclaration(instruction) == hint; } - return false; } /** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */ @@ -365,14 +351,16 @@ void InductionVarRange::Replace(HInstruction* instruction, } } -bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const { - HInductionVarAnalysis::InductionInfo *trip = - induction_analysis_->LookupInfo(loop, GetLoopControl(loop)); - if (trip != nullptr && !IsUnsafeTripCount(trip)) { - IsConstant(trip->op_a, kExact, tc); - return true; - } - return false; +bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const { + bool is_constant_unused = false; + return CheckForFiniteAndConstantProps(loop, &is_constant_unused, trip_count); +} + +bool InductionVarRange::HasKnownTripCount(HLoopInformation* loop, + /*out*/ int64_t* trip_count) const { + bool is_constant = false; + CheckForFiniteAndConstantProps(loop, &is_constant, trip_count); + return is_constant; } bool InductionVarRange::IsUnitStride(HInstruction* context, @@ -431,6 +419,18 @@ HInstruction* InductionVarRange::GenerateTripCount(HLoopInformation* loop, // Private class methods. // +bool InductionVarRange::CheckForFiniteAndConstantProps(HLoopInformation* loop, + /*out*/ bool* is_constant, + /*out*/ int64_t* trip_count) const { + HInductionVarAnalysis::InductionInfo *trip = + induction_analysis_->LookupInfo(loop, GetLoopControl(loop)); + if (trip != nullptr && !IsUnsafeTripCount(trip)) { + *is_constant = IsConstant(trip->op_a, kExact, trip_count); + return true; + } + return false; +} + bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info, ConstantRequest request, /*out*/ int64_t* value) const { diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 0b980f596a3..906dc6bb7b9 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -161,9 +161,15 @@ class InductionVarRange { } /** - * Checks if header logic of a loop terminates. Sets trip-count tc if known. + * Checks if header logic of a loop terminates. If trip count is known sets 'trip_count' to its + * value. */ - bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const; + bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const; + + /** + * Checks if a trip count is known for the loop and sets 'trip_count' to its value in this case. + */ + bool HasKnownTripCount(HLoopInformation* loop, /*out*/ int64_t* trip_count) const; /** * Checks if the given instruction is a unit stride induction inside the closest enveloping @@ -194,6 +200,14 @@ class InductionVarRange { }; /** + * Checks if header logic of a loop terminates. If trip count is known (constant) sets + * 'is_constant' to true and 'trip_count' to the trip count value. + */ + bool CheckForFiniteAndConstantProps(HLoopInformation* loop, + /*out*/ bool* is_constant, + /*out*/ int64_t* trip_count) const; + + /** * Returns true if exact or upper/lower bound on the given induction * information is known as a 64-bit constant, which is returned in value. */ diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 4fc7262265e..8b10a78212e 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -147,10 +147,11 @@ void HInliner::Run() { // that this method is actually inlined; // - if a method's name contains the substring "$noinline$", do not // inline that method. - // We limit this to AOT compilation, as the JIT may or may not inline + // We limit the latter to AOT compilation, as the JIT may or may not inline // depending on the state of classes at runtime. - const bool honor_inlining_directives = - IsCompilingWithCoreImage() && Runtime::Current()->IsAotCompiler(); + const bool honor_noinline_directives = IsCompilingWithCoreImage(); + const bool honor_inline_directives = + honor_noinline_directives && Runtime::Current()->IsAotCompiler(); // Keep a copy of all blocks when starting the visit. ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder(); @@ -164,18 +165,19 @@ void HInliner::Run() { HInvoke* call = instruction->AsInvoke(); // As long as the call is not intrinsified, it is worth trying to inline. if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { - if (honor_inlining_directives) { + if (honor_noinline_directives) { // Debugging case: directives in method names control or assert on inlining. std::string callee_name = outer_compilation_unit_.GetDexFile()->PrettyMethod( call->GetDexMethodIndex(), /* with_signature */ false); // Tests prevent inlining by having $noinline$ in their method names. if (callee_name.find("$noinline$") == std::string::npos) { - if (!TryInline(call)) { + if (!TryInline(call) && honor_inline_directives) { bool should_have_inlined = (callee_name.find("$inline$") != std::string::npos); CHECK(!should_have_inlined) << "Could not inline " << callee_name; } } } else { + DCHECK(!honor_inline_directives); // Normal case: try to inline. TryInline(call); } diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index c7aef3779d1..9647dd5d41c 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -1815,29 +1815,6 @@ void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object, } } -static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls) - REQUIRES_SHARED(Locks::mutator_lock_) { - if (cls == nullptr) { - return TypeCheckKind::kUnresolvedCheck; - } else if (cls->IsInterface()) { - return TypeCheckKind::kInterfaceCheck; - } else if (cls->IsArrayClass()) { - if (cls->GetComponentType()->IsObjectClass()) { - return TypeCheckKind::kArrayObjectCheck; - } else if (cls->CannotBeAssignedFromOtherTypes()) { - return TypeCheckKind::kExactCheck; - } else { - return TypeCheckKind::kArrayCheck; - } - } else if (cls->IsFinal()) { - return TypeCheckKind::kExactCheck; - } else if (cls->IsAbstract()) { - return TypeCheckKind::kAbstractClassCheck; - } else { - return TypeCheckKind::kClassHierarchyCheck; - } -} - void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) { HLoadString* load_string = new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc); @@ -1852,22 +1829,8 @@ void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) { ScopedObjectAccess soa(Thread::Current()); const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); - Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); - Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass( - soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_)); - - bool needs_access_check = true; - if (klass != nullptr) { - if (klass->IsPublic()) { - needs_access_check = false; - } else { - ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); - if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) { - needs_access_check = false; - } - } - } - + Handle<mirror::Class> klass = ResolveClass(soa, type_index); + bool needs_access_check = LoadClassNeedsAccessCheck(klass); return BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check); } @@ -1912,25 +1875,83 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, return load_class; } +Handle<mirror::Class> HInstructionBuilder::ResolveClass(ScopedObjectAccess& soa, + dex::TypeIndex type_index) { + Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); + ObjPtr<mirror::Class> klass = compiler_driver_->ResolveClass( + soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_); + // TODO: Avoid creating excessive handles if the method references the same class repeatedly. + // (Use a map on the local_allocator_.) + return handles_->NewHandle(klass); +} + +bool HInstructionBuilder::LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) { + if (klass == nullptr) { + return true; + } else if (klass->IsPublic()) { + return false; + } else { + ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); + return compiling_class == nullptr || !compiling_class->CanAccess(klass.Get()); + } +} + void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction, uint8_t destination, uint8_t reference, dex::TypeIndex type_index, uint32_t dex_pc) { HInstruction* object = LoadLocal(reference, DataType::Type::kReference); - HLoadClass* cls = BuildLoadClass(type_index, dex_pc); ScopedObjectAccess soa(Thread::Current()); - TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass()); + const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); + Handle<mirror::Class> klass = ResolveClass(soa, type_index); + bool needs_access_check = LoadClassNeedsAccessCheck(klass); + TypeCheckKind check_kind = HSharpening::ComputeTypeCheckKind( + klass.Get(), code_generator_, compiler_driver_, needs_access_check); + + HInstruction* class_or_null = nullptr; + HIntConstant* bitstring_path_to_root = nullptr; + HIntConstant* bitstring_mask = nullptr; + if (check_kind == TypeCheckKind::kBitstringCheck) { + // TODO: Allow using the bitstring check also if we need an access check. + DCHECK(!needs_access_check); + class_or_null = graph_->GetNullConstant(dex_pc); + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + uint32_t path_to_root = + SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass.Get()); + uint32_t mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass.Get()); + bitstring_path_to_root = graph_->GetIntConstant(static_cast<int32_t>(path_to_root), dex_pc); + bitstring_mask = graph_->GetIntConstant(static_cast<int32_t>(mask), dex_pc); + } else { + class_or_null = BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check); + } + DCHECK(class_or_null != nullptr); + if (instruction.Opcode() == Instruction::INSTANCE_OF) { - AppendInstruction(new (allocator_) HInstanceOf(object, cls, check_kind, dex_pc)); + AppendInstruction(new (allocator_) HInstanceOf(object, + class_or_null, + check_kind, + klass, + dex_pc, + allocator_, + bitstring_path_to_root, + bitstring_mask)); UpdateLocal(destination, current_block_->GetLastInstruction()); } else { DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST); // We emit a CheckCast followed by a BoundType. CheckCast is a statement // which may throw. If it succeeds BoundType sets the new type of `object` // for all subsequent uses. - AppendInstruction(new (allocator_) HCheckCast(object, cls, check_kind, dex_pc)); + AppendInstruction( + new (allocator_) HCheckCast(object, + class_or_null, + check_kind, + klass, + dex_pc, + allocator_, + bitstring_path_to_root, + bitstring_mask)); AppendInstruction(new (allocator_) HBoundType(object, dex_pc)); UpdateLocal(reference, current_block_->GetLastInstruction()); } diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 4428c532779..f78829232d4 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -39,6 +39,7 @@ class DexCompilationUnit; class HBasicBlockBuilder; class Instruction; class OptimizingCompilerStats; +class ScopedObjectAccess; class SsaBuilder; class VariableSizedHandleScope; @@ -232,6 +233,12 @@ class HInstructionBuilder : public ValueObject { bool needs_access_check) REQUIRES_SHARED(Locks::mutator_lock_); + Handle<mirror::Class> ResolveClass(ScopedObjectAccess& soa, dex::TypeIndex type_index) + REQUIRES_SHARED(Locks::mutator_lock_); + + bool LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) + REQUIRES_SHARED(Locks::mutator_lock_); + // Returns the outer-most compiling method's class. ObjPtr<mirror::Class> GetOutermostCompilingClass() const; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index a42a85dc1d2..d3cf9568c2d 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -67,7 +67,6 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { bool TryCombineVecMultiplyAccumulate(HVecMul* mul); void VisitShift(HBinaryOperation* shift); - void VisitEqual(HEqual* equal) OVERRIDE; void VisitNotEqual(HNotEqual* equal) OVERRIDE; void VisitBooleanNot(HBooleanNot* bool_not) OVERRIDE; @@ -78,6 +77,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void VisitNullCheck(HNullCheck* instruction) OVERRIDE; void VisitArrayLength(HArrayLength* instruction) OVERRIDE; void VisitCheckCast(HCheckCast* instruction) OVERRIDE; + void VisitAbs(HAbs* instruction) OVERRIDE; void VisitAdd(HAdd* instruction) OVERRIDE; void VisitAnd(HAnd* instruction) OVERRIDE; void VisitCondition(HCondition* instruction) OVERRIDE; @@ -120,6 +120,9 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void SimplifyReturnThis(HInvoke* invoke); void SimplifyAllocationIntrinsic(HInvoke* invoke); void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind); + void SimplifyMin(HInvoke* invoke, DataType::Type type); + void SimplifyMax(HInvoke* invoke, DataType::Type type); + void SimplifyAbs(HInvoke* invoke, DataType::Type type); CodeGenerator* codegen_; CompilerDriver* compiler_driver_; @@ -576,7 +579,9 @@ bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInst // Returns whether doing a type test between the class of `object` against `klass` has // a statically known outcome. The result of the test is stored in `outcome`. -static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) { +static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti, + HInstruction* object, + /*out*/bool* outcome) { DCHECK(!object->IsNullConstant()) << "Null constants should be special cased"; ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo(); ScopedObjectAccess soa(Thread::Current()); @@ -586,7 +591,6 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo return false; } - ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI(); if (!class_rti.IsValid()) { // Happens when the loaded class is unresolved. return false; @@ -611,8 +615,8 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { HInstruction* object = check_cast->InputAt(0); - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - if (load_class->NeedsAccessCheck()) { + if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck && + check_cast->GetTargetClass()->NeedsAccessCheck()) { // If we need to perform an access check we cannot remove the instruction. return; } @@ -630,15 +634,18 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { // Note: The `outcome` is initialized to please valgrind - the compiler can reorder // the return value check with the `outcome` check, b/27651442 . bool outcome = false; - if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { + if (TypeCheckHasKnownOutcome(check_cast->GetTargetClassRTI(), object, &outcome)) { if (outcome) { check_cast->GetBlock()->RemoveInstruction(check_cast); MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast); - if (!load_class->HasUses()) { - // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. - // However, here we know that it cannot because the checkcast was successfull, hence - // the class was already loaded. - load_class->GetBlock()->RemoveInstruction(load_class); + if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) { + HLoadClass* load_class = check_cast->GetTargetClass(); + if (!load_class->HasUses()) { + // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. + // However, here we know that it cannot because the checkcast was successfull, hence + // the class was already loaded. + load_class->GetBlock()->RemoveInstruction(load_class); + } } } else { // Don't do anything for exceptional cases for now. Ideally we should remove @@ -649,8 +656,8 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { HInstruction* object = instruction->InputAt(0); - HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass(); - if (load_class->NeedsAccessCheck()) { + if (instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck && + instruction->GetTargetClass()->NeedsAccessCheck()) { // If we need to perform an access check we cannot remove the instruction. return; } @@ -673,7 +680,7 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { // Note: The `outcome` is initialized to please valgrind - the compiler can reorder // the return value check with the `outcome` check, b/27651442 . bool outcome = false; - if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { + if (TypeCheckHasKnownOutcome(instruction->GetTargetClassRTI(), object, &outcome)) { MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf); if (outcome && can_be_null) { // Type test will succeed, we just need a null test. @@ -686,11 +693,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { } RecordSimplification(); instruction->GetBlock()->RemoveInstruction(instruction); - if (outcome && !load_class->HasUses()) { - // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. - // However, here we know that it cannot because the instanceof check was successfull, hence - // the class was already loaded. - load_class->GetBlock()->RemoveInstruction(load_class); + if (outcome && instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) { + HLoadClass* load_class = instruction->GetTargetClass(); + if (!load_class->HasUses()) { + // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. + // However, here we know that it cannot because the instanceof check was successfull, hence + // the class was already loaded. + load_class->GetBlock()->RemoveInstruction(load_class); + } } } } @@ -849,35 +859,29 @@ void InstructionSimplifierVisitor::VisitBooleanNot(HBooleanNot* bool_not) { static HInstruction* NewIntegralAbs(ArenaAllocator* allocator, HInstruction* x, HInstruction* cursor) { - DataType::Type type = x->GetType(); - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); - // Construct a fake intrinsic with as much context as is needed to allocate one. - // The intrinsic will always be lowered into code later anyway. - // TODO: b/65164101 : moving towards a real HAbs node makes more sense. - HInvokeStaticOrDirect::DispatchInfo dispatch_info = { - HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u - }; - HInvokeStaticOrDirect* invoke = new (allocator) HInvokeStaticOrDirect( - allocator, - 1, - type, - x->GetDexPc(), - /*method_idx*/ -1, - /*resolved_method*/ nullptr, - dispatch_info, - kStatic, - MethodReference(nullptr, dex::kDexNoIndex), - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); - invoke->SetArgumentAt(0, x); - invoke->SetIntrinsic(type == DataType::Type::kInt32 ? Intrinsics::kMathAbsInt - : Intrinsics::kMathAbsLong, - kNoEnvironmentOrCache, - kNoSideEffects, - kNoThrow); - cursor->GetBlock()->InsertInstructionBefore(invoke, cursor); - return invoke; + DataType::Type type = DataType::Kind(x->GetType()); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); + HAbs* abs = new (allocator) HAbs(type, x, cursor->GetDexPc()); + cursor->GetBlock()->InsertInstructionBefore(abs, cursor); + return abs; +} + +// Constructs a new MIN/MAX(x, y) node in the HIR. +static HInstruction* NewIntegralMinMax(ArenaAllocator* allocator, + HInstruction* x, + HInstruction* y, + HInstruction* cursor, + bool is_min) { + DataType::Type type = DataType::Kind(x->GetType()); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); + HBinaryOperation* minmax = nullptr; + if (is_min) { + minmax = new (allocator) HMin(type, x, y, cursor->GetDexPc()); + } else { + minmax = new (allocator) HMax(type, x, y, cursor->GetDexPc()); + } + cursor->GetBlock()->InsertInstructionBefore(minmax, cursor); + return minmax; } // Returns true if operands a and b consists of widening type conversions @@ -899,6 +903,30 @@ static bool AreLowerPrecisionArgs(DataType::Type to_type, HInstruction* a, HInst to_type == DataType::Type::kInt64); } +// Returns an acceptable substitution for "a" on the select +// construct "a <cmp> b ? c : .." during MIN/MAX recognition. +static HInstruction* AllowInMinMax(IfCondition cmp, + HInstruction* a, + HInstruction* b, + HInstruction* c) { + int64_t value = 0; + if (IsInt64AndGet(b, /*out*/ &value) && + (((cmp == kCondLT || cmp == kCondLE) && c->IsMax()) || + ((cmp == kCondGT || cmp == kCondGE) && c->IsMin()))) { + HConstant* other = c->AsBinaryOperation()->GetConstantRight(); + if (other != nullptr && a == c->AsBinaryOperation()->GetLeastConstantLeft()) { + int64_t other_value = Int64FromConstant(other); + bool is_max = (cmp == kCondLT || cmp == kCondLE); + // Allow the max for a < 100 ? max(a, -100) : .. + // or the min for a > -100 ? min(a, 100) : .. + if (is_max ? (value >= other_value) : (value <= other_value)) { + return c; + } + } + } + return nullptr; +} + void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { HInstruction* replace_with = nullptr; HInstruction* condition = select->GetCondition(); @@ -942,23 +970,35 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { DataType::Type t_type = true_value->GetType(); DataType::Type f_type = false_value->GetType(); // Here we have a <cmp> b ? true_value : false_value. - // Test if both values are same-typed int or long. - if (t_type == f_type && - (t_type == DataType::Type::kInt32 || t_type == DataType::Type::kInt64)) { - // Try to replace typical integral ABS constructs. - if (true_value->IsNeg()) { - HInstruction* negated = true_value->InputAt(0); - if ((cmp == kCondLT || cmp == kCondLE) && - (a == negated && a == false_value && IsInt64Value(b, 0))) { - // Found a < 0 ? -a : a which can be replaced by ABS(a). - replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), false_value, select); - } - } else if (false_value->IsNeg()) { - HInstruction* negated = false_value->InputAt(0); - if ((cmp == kCondGT || cmp == kCondGE) && - (a == true_value && a == negated && IsInt64Value(b, 0))) { - // Found a > 0 ? a : -a which can be replaced by ABS(a). - replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select); + // Test if both values are compatible integral types (resulting MIN/MAX/ABS + // type will be int or long, like the condition). Replacements are general, + // but assume conditions prefer constants on the right. + if (DataType::IsIntegralType(t_type) && DataType::Kind(t_type) == DataType::Kind(f_type)) { + // Allow a < 100 ? max(a, -100) : .. + // or a > -100 ? min(a, 100) : .. + // to use min/max instead of a to detect nested min/max expressions. + HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value); + if (new_a != nullptr) { + a = new_a; + } + // Try to replace typical integral MIN/MAX/ABS constructs. + if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) && + ((a == true_value && b == false_value) || + (b == true_value && a == false_value))) { + // Found a < b ? a : b (MIN) or a < b ? b : a (MAX) + // or a > b ? a : b (MAX) or a > b ? b : a (MIN). + bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value); + replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min); + } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) || + ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) { + bool negLeft = (cmp == kCondLT || cmp == kCondLE); + HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0); + HInstruction* not_negated = negLeft ? false_value : true_value; + if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) { + // Found a < 0 ? -a : a + // or a > 0 ? a : -a + // which can be replaced by ABS(a). + replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select); } } else if (true_value->IsSub() && false_value->IsSub()) { HInstruction* true_sub1 = true_value->InputAt(0); @@ -970,8 +1010,8 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { ((cmp == kCondLT || cmp == kCondLE) && (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) && AreLowerPrecisionArgs(t_type, a, b)) { - // Found a > b ? a - b : b - a or - // a < b ? b - a : a - b + // Found a > b ? a - b : b - a + // or a < b ? b - a : a - b // which can be replaced by ABS(a - b) for lower precision operands a, b. replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select); } @@ -1230,6 +1270,17 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct } } +void InstructionSimplifierVisitor::VisitAbs(HAbs* instruction) { + HInstruction* input = instruction->GetInput(); + if (DataType::IsZeroExtension(input->GetType(), instruction->GetResultType())) { + // Zero extension from narrow to wide can never set sign bit in the wider + // operand, making the subsequent Abs redundant (e.g., abs(b & 0xff) for byte b). + instruction->ReplaceWith(input); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + } +} + void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); @@ -2430,6 +2481,27 @@ void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, mem_barrier); } +void InstructionSimplifierVisitor::SimplifyMin(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HMin* min = new (GetGraph()->GetAllocator()) + HMin(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, min); +} + +void InstructionSimplifierVisitor::SimplifyMax(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HMax* max = new (GetGraph()->GetAllocator()) + HMax(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, max); +} + +void InstructionSimplifierVisitor::SimplifyAbs(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HAbs* abs = new (GetGraph()->GetAllocator()) + HAbs(type, invoke->InputAt(0), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, abs); +} + void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { switch (instruction->GetIntrinsic()) { case Intrinsics::kStringEquals: @@ -2513,6 +2585,42 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { case Intrinsics::kVarHandleStoreStoreFence: SimplifyMemBarrier(instruction, MemBarrierKind::kStoreStore); break; + case Intrinsics::kMathMinIntInt: + SimplifyMin(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathMinLongLong: + SimplifyMin(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathMinFloatFloat: + SimplifyMin(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathMinDoubleDouble: + SimplifyMin(instruction, DataType::Type::kFloat64); + break; + case Intrinsics::kMathMaxIntInt: + SimplifyMax(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathMaxLongLong: + SimplifyMax(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathMaxFloatFloat: + SimplifyMax(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathMaxDoubleDouble: + SimplifyMax(instruction, DataType::Type::kFloat64); + break; + case Intrinsics::kMathAbsInt: + SimplifyAbs(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathAbsLong: + SimplifyAbs(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathAbsFloat: + SimplifyAbs(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathAbsDouble: + SimplifyAbs(instruction, DataType::Type::kFloat64); + break; default: break; } diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 62991435c7a..1035cbc2c46 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -266,6 +266,18 @@ void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke) { \ << " should have been converted to HIR"; \ } #define UNREACHABLE_INTRINSICS(Arch) \ +UNREACHABLE_INTRINSIC(Arch, MathMinIntInt) \ +UNREACHABLE_INTRINSIC(Arch, MathMinLongLong) \ +UNREACHABLE_INTRINSIC(Arch, MathMinFloatFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathMinDoubleDouble) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxIntInt) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxLongLong) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxFloatFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxDoubleDouble) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsInt) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsLong) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsDouble) \ UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits) \ UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits) \ UNREACHABLE_INTRINSIC(Arch, FloatIsNaN) \ diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 0e6485be9f7..c3d643a7d18 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -344,14 +344,6 @@ void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler()); } -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - static void GenNumberOfLeadingZeros(LocationSummary* locations, DataType::Type type, MacroAssembler* masm) { @@ -536,168 +528,6 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } -static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { - Location in = locations->InAt(0); - Location out = locations->Out(); - - FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in); - FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out); - - __ Fabs(out_reg, in_reg); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); -} - -static void GenAbsInteger(LocationSummary* locations, - bool is64bit, - MacroAssembler* masm) { - Location in = locations->InAt(0); - Location output = locations->Out(); - - Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in); - Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output); - - __ Cmp(in_reg, Operand(0)); - __ Cneg(out_reg, in_reg, lt); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - bool is_double, - MacroAssembler* masm) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1); - FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2); - FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out); - if (is_min) { - __ Fmin(out_reg, op1_reg, op2_reg); - } else { - __ Fmax(out_reg, op1_reg, op2_reg); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler()); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - bool is_long, - MacroAssembler* masm) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1); - Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2); - Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out); - - __ Cmp(op1_reg, op2_reg); - __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler()); -} - void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 97a145664c3..29aecbc0975 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -432,341 +432,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invo GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_); } -static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) { - __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0)); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke, GetAssembler()); -} - -static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsInteger(LocationSummary* locations, - bool is64bit, - ArmVIXLAssembler* assembler) { - Location in = locations->InAt(0); - Location output = locations->Out(); - - vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); - - if (is64bit) { - vixl32::Register in_reg_lo = LowRegisterFrom(in); - vixl32::Register in_reg_hi = HighRegisterFrom(in); - vixl32::Register out_reg_lo = LowRegisterFrom(output); - vixl32::Register out_reg_hi = HighRegisterFrom(output); - - DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected."; - - __ Asr(mask, in_reg_hi, 31); - __ Adds(out_reg_lo, in_reg_lo, mask); - __ Adc(out_reg_hi, in_reg_hi, mask); - __ Eor(out_reg_lo, mask, out_reg_lo); - __ Eor(out_reg_hi, mask, out_reg_hi); - } else { - vixl32::Register in_reg = RegisterFrom(in); - vixl32::Register out_reg = RegisterFrom(output); - - __ Asr(mask, in_reg, 31); - __ Add(out_reg, in_reg, mask); - __ Eor(out_reg, mask, out_reg); - } -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { - ArmVIXLAssembler* assembler = codegen->GetAssembler(); - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. - return; - } - - vixl32::SRegister op1 = SRegisterFrom(op1_loc); - vixl32::SRegister op2 = SRegisterFrom(op2_loc); - vixl32::SRegister out = OutputSRegister(invoke); - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp1 = temps.Acquire(); - vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0)); - vixl32::Label nan, done; - vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); - - DCHECK(op1.Is(out)); - - __ Vcmp(op1, op2); - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling. - - // op1 <> op2 - vixl32::ConditionType cond = is_min ? gt : lt; - { - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ it(cond); - __ vmov(cond, F32, out, op2); - } - // for <>(not equal), we've done min/max calculation. - __ B(ne, final_label, /* far_target */ false); - - // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). - __ Vmov(temp1, op1); - __ Vmov(temp2, op2); - if (is_min) { - __ Orr(temp1, temp1, temp2); - } else { - __ And(temp1, temp1, temp2); - } - __ Vmov(out, temp1); - __ B(final_label); - - // handle NaN input. - __ Bind(&nan); - __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. - __ Vmov(out, temp1); - - if (done.IsReferenced()) { - __ Bind(&done); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); - invoke->GetLocations()->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ true, codegen_); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); - invoke->GetLocations()->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ false, codegen_); -} - -static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { - ArmVIXLAssembler* assembler = codegen->GetAssembler(); - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in. - return; - } - - vixl32::DRegister op1 = DRegisterFrom(op1_loc); - vixl32::DRegister op2 = DRegisterFrom(op2_loc); - vixl32::DRegister out = OutputDRegister(invoke); - vixl32::Label handle_nan_eq, done; - vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); - - DCHECK(op1.Is(out)); - - __ Vcmp(op1, op2); - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling. - - // op1 <> op2 - vixl32::ConditionType cond = is_min ? gt : lt; - { - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ it(cond); - __ vmov(cond, F64, out, op2); - } - // for <>(not equal), we've done min/max calculation. - __ B(ne, final_label, /* far_target */ false); - - // handle op1 == op2, max(+0.0,-0.0). - if (!is_min) { - __ Vand(F64, out, op1, op2); - __ B(final_label); - } - - // handle op1 == op2, min(+0.0,-0.0), NaN input. - __ Bind(&handle_nan_eq); - __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. - - if (done.IsReferenced()) { - __ Bind(&done); - } -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ true , codegen_); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ false, codegen_); -} - -static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. - return; - } - - vixl32::Register op1_lo = LowRegisterFrom(op1_loc); - vixl32::Register op1_hi = HighRegisterFrom(op1_loc); - vixl32::Register op2_lo = LowRegisterFrom(op2_loc); - vixl32::Register op2_hi = HighRegisterFrom(op2_loc); - vixl32::Register out_lo = LowRegisterFrom(out_loc); - vixl32::Register out_hi = HighRegisterFrom(out_loc); - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp = temps.Acquire(); - - DCHECK(op1_lo.Is(out_lo)); - DCHECK(op1_hi.Is(out_hi)); - - // Compare op1 >= op2, or op1 < op2. - __ Cmp(out_lo, op2_lo); - __ Sbcs(temp, out_hi, op2_hi); - - // Now GE/LT condition code is correct for the long comparison. - { - vixl32::ConditionType cond = is_min ? ge : lt; - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 3 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ itt(cond); - __ mov(cond, out_lo, op2_lo); - __ mov(cond, out_hi, op2_hi); - } -} - -static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMaxLong(invoke, /* is_min */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMaxLong(invoke, /* is_min */ false, GetAssembler()); -} - -static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { - vixl32::Register op1 = InputRegisterAt(invoke, 0); - vixl32::Register op2 = InputRegisterAt(invoke, 1); - vixl32::Register out = OutputRegister(invoke); - - __ Cmp(op1, op2); - - { - ExactAssemblyScope aas(assembler->GetVIXLAssembler(), - 3 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - - __ ite(is_min ? lt : gt); - __ mov(is_min ? lt : gt, out, op1); - __ mov(is_min ? ge : le, out, op2); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke, /* is_min */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke, /* is_min */ false, GetAssembler()); -} - void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index b7936b9c8ec..ae248a3e5c7 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -58,6 +58,10 @@ inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const { return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint(); } +inline bool IntrinsicCodeGeneratorMIPS::HasMsa() const { + return codegen_->GetInstructionSetFeatures().HasMsa(); +} + #define __ codegen->GetAssembler()-> static void MoveFromReturnRegister(Location trg, @@ -612,6 +616,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { static void GenBitCount(LocationSummary* locations, DataType::Type type, bool isR6, + bool hasMsa, MipsAssembler* assembler) { Register out = locations->Out().AsRegister<Register>(); @@ -637,85 +642,102 @@ static void GenBitCount(LocationSummary* locations, // instructions compared to a loop-based algorithm which required 47 // instructions. - if (type == DataType::Type::kInt32) { - Register in = locations->InAt(0).AsRegister<Register>(); - - __ Srl(TMP, in, 1); - __ LoadConst32(AT, 0x55555555); - __ And(TMP, TMP, AT); - __ Subu(TMP, in, TMP); - __ LoadConst32(AT, 0x33333333); - __ And(out, TMP, AT); - __ Srl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Addu(TMP, out, TMP); - __ Srl(out, TMP, 4); - __ Addu(out, out, TMP); - __ LoadConst32(AT, 0x0F0F0F0F); - __ And(out, out, AT); - __ LoadConst32(TMP, 0x01010101); - if (isR6) { - __ MulR6(out, out, TMP); + if (hasMsa) { + if (type == DataType::Type::kInt32) { + Register in = locations->InAt(0).AsRegister<Register>(); + __ Mtc1(in, FTMP); + __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Mfc1(out, FTMP); } else { - __ MulR2(out, out, TMP); + DCHECK_EQ(type, DataType::Type::kInt64); + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + __ Mtc1(in_lo, FTMP); + __ Mthc1(in_hi, FTMP); + __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Mfc1(out, FTMP); } - __ Srl(out, out, 24); } else { - DCHECK_EQ(type, DataType::Type::kInt64); - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register tmp_hi = locations->GetTemp(0).AsRegister<Register>(); - Register out_hi = locations->GetTemp(1).AsRegister<Register>(); - Register tmp_lo = TMP; - Register out_lo = out; + if (type == DataType::Type::kInt32) { + Register in = locations->InAt(0).AsRegister<Register>(); + + __ Srl(TMP, in, 1); + __ LoadConst32(AT, 0x55555555); + __ And(TMP, TMP, AT); + __ Subu(TMP, in, TMP); + __ LoadConst32(AT, 0x33333333); + __ And(out, TMP, AT); + __ Srl(TMP, TMP, 2); + __ And(TMP, TMP, AT); + __ Addu(TMP, out, TMP); + __ Srl(out, TMP, 4); + __ Addu(out, out, TMP); + __ LoadConst32(AT, 0x0F0F0F0F); + __ And(out, out, AT); + __ LoadConst32(TMP, 0x01010101); + if (isR6) { + __ MulR6(out, out, TMP); + } else { + __ MulR2(out, out, TMP); + } + __ Srl(out, out, 24); + } else { + DCHECK_EQ(type, DataType::Type::kInt64); + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register tmp_hi = locations->GetTemp(0).AsRegister<Register>(); + Register out_hi = locations->GetTemp(1).AsRegister<Register>(); + Register tmp_lo = TMP; + Register out_lo = out; - __ Srl(tmp_lo, in_lo, 1); - __ Srl(tmp_hi, in_hi, 1); + __ Srl(tmp_lo, in_lo, 1); + __ Srl(tmp_hi, in_hi, 1); - __ LoadConst32(AT, 0x55555555); + __ LoadConst32(AT, 0x55555555); - __ And(tmp_lo, tmp_lo, AT); - __ Subu(tmp_lo, in_lo, tmp_lo); + __ And(tmp_lo, tmp_lo, AT); + __ Subu(tmp_lo, in_lo, tmp_lo); - __ And(tmp_hi, tmp_hi, AT); - __ Subu(tmp_hi, in_hi, tmp_hi); + __ And(tmp_hi, tmp_hi, AT); + __ Subu(tmp_hi, in_hi, tmp_hi); - __ LoadConst32(AT, 0x33333333); + __ LoadConst32(AT, 0x33333333); - __ And(out_lo, tmp_lo, AT); - __ Srl(tmp_lo, tmp_lo, 2); - __ And(tmp_lo, tmp_lo, AT); - __ Addu(tmp_lo, out_lo, tmp_lo); + __ And(out_lo, tmp_lo, AT); + __ Srl(tmp_lo, tmp_lo, 2); + __ And(tmp_lo, tmp_lo, AT); + __ Addu(tmp_lo, out_lo, tmp_lo); - __ And(out_hi, tmp_hi, AT); - __ Srl(tmp_hi, tmp_hi, 2); - __ And(tmp_hi, tmp_hi, AT); - __ Addu(tmp_hi, out_hi, tmp_hi); + __ And(out_hi, tmp_hi, AT); + __ Srl(tmp_hi, tmp_hi, 2); + __ And(tmp_hi, tmp_hi, AT); + __ Addu(tmp_hi, out_hi, tmp_hi); - // Here we deviate from the original algorithm a bit. We've reached - // the stage where the bitfields holding the subtotals are large - // enough to hold the combined subtotals for both the low word, and - // the high word. This means that we can add the subtotals for the - // the high, and low words into a single word, and compute the final - // result for both the high, and low words using fewer instructions. - __ LoadConst32(AT, 0x0F0F0F0F); + // Here we deviate from the original algorithm a bit. We've reached + // the stage where the bitfields holding the subtotals are large + // enough to hold the combined subtotals for both the low word, and + // the high word. This means that we can add the subtotals for the + // the high, and low words into a single word, and compute the final + // result for both the high, and low words using fewer instructions. + __ LoadConst32(AT, 0x0F0F0F0F); - __ Addu(TMP, tmp_hi, tmp_lo); + __ Addu(TMP, tmp_hi, tmp_lo); - __ Srl(out, TMP, 4); - __ And(out, out, AT); - __ And(TMP, TMP, AT); - __ Addu(out, out, TMP); + __ Srl(out, TMP, 4); + __ And(out, out, AT); + __ And(TMP, TMP, AT); + __ Addu(out, out, TMP); - __ LoadConst32(AT, 0x01010101); + __ LoadConst32(AT, 0x01010101); - if (isR6) { - __ MulR6(out, out, AT); - } else { - __ MulR2(out, out, AT); - } + if (isR6) { + __ MulR6(out, out, AT); + } else { + __ MulR2(out, out, AT); + } - __ Srl(out, out, 24); + __ Srl(out, out, 24); + } } } @@ -725,7 +747,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), HasMsa(), GetAssembler()); } // int java.lang.Long.bitCount(int) @@ -739,575 +761,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler()); -} - -static void MathAbsFP(LocationSummary* locations, - bool is64bit, - bool isR2OrNewer, - bool isR6, - MipsAssembler* assembler) { - FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - - // Note, as a "quality of implementation", rather than pure "spec compliance", we require that - // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN - // (signaling NaN may become quiet though). - // - // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case, - // both regular floating point numbers and NAN values are treated alike, only the sign bit is - // affected by this instruction. - // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any - // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be - // changed when doing abs(NaN). Because of that, we clear sign bit in a different way. - if (isR6) { - if (is64bit) { - __ AbsD(out, in); - } else { - __ AbsS(out, in); - } - } else { - if (is64bit) { - if (in != out) { - __ MovD(out, in); - } - __ MoveFromFpuHigh(TMP, in); - // ins instruction is not available for R1. - if (isR2OrNewer) { - __ Ins(TMP, ZERO, 31, 1); - } else { - __ Sll(TMP, TMP, 1); - __ Srl(TMP, TMP, 1); - } - __ MoveToFpuHigh(TMP, out); - } else { - __ Mfc1(TMP, in); - // ins instruction is not available for R1. - if (isR2OrNewer) { - __ Ins(TMP, ZERO, 31, 1); - } else { - __ Sll(TMP, TMP, 1); - __ Srl(TMP, TMP, 1); - } - __ Mtc1(TMP, out); - } - } -} - -// double java.lang.Math.abs(double) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, IsR2OrNewer(), IsR6(), GetAssembler()); -} - -// float java.lang.Math.abs(float) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, IsR2OrNewer(), IsR6(), GetAssembler()); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { - if (is64bit) { - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - // The comments in this section show the analogous operations which would - // be performed if we had 64-bit registers "in", and "out". - // __ Dsra32(AT, in, 31); - __ Sra(AT, in_hi, 31); - // __ Xor(out, in, AT); - __ Xor(TMP, in_lo, AT); - __ Xor(out_hi, in_hi, AT); - // __ Dsubu(out, out, AT); - __ Subu(out_lo, TMP, AT); - __ Sltu(TMP, out_lo, TMP); - __ Addu(out_hi, out_hi, TMP); - } else { - Register in = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - __ Sra(AT, in, 31); - __ Xor(out, in, AT); - __ Subu(out, out, AT); - } -} - -// int java.lang.Math.abs(int) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -// long java.lang.Math.abs(long) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - DataType::Type type, - bool is_R6, - MipsAssembler* assembler) { - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - FRegister a = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister b = locations->InAt(1).AsFpuRegister<FRegister>(); - - if (is_R6) { - MipsLabel noNaNs; - MipsLabel done; - FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; - - // When Java computes min/max it prefers a NaN to a number; the - // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of - // the inputs is a NaN and the other is a valid number, the MIPS - // instruction will return the number; Java wants the NaN value - // returned. This is why there is extra logic preceding the use of - // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a - // NaN, return the NaN, otherwise return the min/max. - if (type == DataType::Type::kFloat64) { - __ CmpUnD(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqD(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelD(ftmp, a, b); - - if (ftmp != out) { - __ MovD(out, ftmp); - } - - __ B(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinD(out, a, b); - } else { - __ MaxD(out, a, b); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CmpUnS(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqS(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelS(ftmp, a, b); - - if (ftmp != out) { - __ MovS(out, ftmp); - } - - __ B(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinS(out, a, b); - } else { - __ MaxS(out, a, b); - } - } - - __ Bind(&done); - } else { - MipsLabel ordered; - MipsLabel compare; - MipsLabel select; - MipsLabel done; - - if (type == DataType::Type::kFloat64) { - __ CunD(a, b); - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CunS(a, b); - } - __ Bc1f(&ordered); - - // a or b (or both) is a NaN. Return one, which is a NaN. - if (type == DataType::Type::kFloat64) { - __ CeqD(b, b); - } else { - __ CeqS(b, b); - } - __ B(&select); - - __ Bind(&ordered); - - // Neither is a NaN. - // a == b? (-0.0 compares equal with +0.0) - // If equal, handle zeroes, else compare further. - if (type == DataType::Type::kFloat64) { - __ CeqD(a, b); - } else { - __ CeqS(a, b); - } - __ Bc1f(&compare); - - // a == b either bit for bit or one is -0.0 and the other is +0.0. - if (type == DataType::Type::kFloat64) { - __ MoveFromFpuHigh(TMP, a); - __ MoveFromFpuHigh(AT, b); - } else { - __ Mfc1(TMP, a); - __ Mfc1(AT, b); - } - - if (is_min) { - // -0.0 prevails over +0.0. - __ Or(TMP, TMP, AT); - } else { - // +0.0 prevails over -0.0. - __ And(TMP, TMP, AT); - } - - if (type == DataType::Type::kFloat64) { - __ Mfc1(AT, a); - __ Mtc1(AT, out); - __ MoveToFpuHigh(TMP, out); - } else { - __ Mtc1(TMP, out); - } - __ B(&done); - - __ Bind(&compare); - - if (type == DataType::Type::kFloat64) { - if (is_min) { - // return (a <= b) ? a : b; - __ ColeD(a, b); - } else { - // return (a >= b) ? a : b; - __ ColeD(b, a); // b <= a - } - } else { - if (is_min) { - // return (a <= b) ? a : b; - __ ColeS(a, b); - } else { - // return (a >= b) ? a : b; - __ ColeS(b, a); // b <= a - } - } - - __ Bind(&select); - - if (type == DataType::Type::kFloat64) { - __ MovtD(out, a); - __ MovfD(out, b); - } else { - __ MovtS(out, a); - __ MovfS(out, b); - } - - __ Bind(&done); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); -} - -// double java.lang.Math.min(double, double) -void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kFloat64, - IsR6(), - GetAssembler()); -} - -// float java.lang.Math.min(float, float) -void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kFloat32, - IsR6(), - GetAssembler()); -} - -// double java.lang.Math.max(double, double) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kFloat64, - IsR6(), - GetAssembler()); -} - -// float java.lang.Math.max(float, float) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kFloat32, - IsR6(), - GetAssembler()); -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - DataType::Type type, - bool is_R6, - MipsAssembler* assembler) { - if (is_R6) { - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions - // always change the target (output) register. If the condition is - // true the output register gets the contents of the "rs" register; - // otherwise, the output register is set to zero. One consequence - // of this is that to implement something like "rd = c==0 ? rs : rt" - // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions. - // After executing this pair of instructions one of the output - // registers from the pair will necessarily contain zero. Then the - // code ORs the output registers from the SELEQZ/SELNEZ instructions - // to get the final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (type == DataType::Type::kInt64) { - Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); - Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - MipsLabel compare_done; - - if (a_lo == b_lo) { - if (out_lo != a_lo) { - __ Move(out_lo, a_lo); - __ Move(out_hi, a_hi); - } - } else { - __ Slt(TMP, b_hi, a_hi); - __ Bne(b_hi, a_hi, &compare_done); - - __ Sltu(TMP, b_lo, a_lo); - - __ Bind(&compare_done); - - if (is_min) { - __ Seleqz(AT, a_lo, TMP); - __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo - // because at this point we're - // done using a_lo/b_lo. - } else { - __ Selnez(AT, a_lo, TMP); - __ Seleqz(out_lo, b_lo, TMP); // ditto - } - __ Or(out_lo, out_lo, AT); - if (is_min) { - __ Seleqz(AT, a_hi, TMP); - __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi - } else { - __ Selnez(AT, a_hi, TMP); - __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi - } - __ Or(out_hi, out_hi, AT); - } - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - Register a = locations->InAt(0).AsRegister<Register>(); - Register b = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (a == b) { - if (out != a) { - __ Move(out, a); - } - } else { - __ Slt(AT, b, a); - if (is_min) { - __ Seleqz(TMP, a, AT); - __ Selnez(AT, b, AT); - } else { - __ Selnez(TMP, a, AT); - __ Seleqz(AT, b, AT); - } - __ Or(out, TMP, AT); - } - } - } else { - if (type == DataType::Type::kInt64) { - Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); - Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - MipsLabel compare_done; - - if (a_lo == b_lo) { - if (out_lo != a_lo) { - __ Move(out_lo, a_lo); - __ Move(out_hi, a_hi); - } - } else { - __ Slt(TMP, a_hi, b_hi); - __ Bne(a_hi, b_hi, &compare_done); - - __ Sltu(TMP, a_lo, b_lo); - - __ Bind(&compare_done); - - if (is_min) { - if (out_lo != a_lo) { - __ Movn(out_hi, a_hi, TMP); - __ Movn(out_lo, a_lo, TMP); - } - if (out_lo != b_lo) { - __ Movz(out_hi, b_hi, TMP); - __ Movz(out_lo, b_lo, TMP); - } - } else { - if (out_lo != a_lo) { - __ Movz(out_hi, a_hi, TMP); - __ Movz(out_lo, a_lo, TMP); - } - if (out_lo != b_lo) { - __ Movn(out_hi, b_hi, TMP); - __ Movn(out_lo, b_lo, TMP); - } - } - } - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - Register a = locations->InAt(0).AsRegister<Register>(); - Register b = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (a == b) { - if (out != a) { - __ Move(out, a); - } - } else { - __ Slt(AT, a, b); - if (is_min) { - if (out != a) { - __ Movn(out, a, AT); - } - if (out != b) { - __ Movz(out, b, AT); - } - } else { - if (out != a) { - __ Movz(out, a, AT); - } - if (out != b) { - __ Movn(out, b, AT); - } - } - } - } - } -} - -// int java.lang.Math.min(int, int) -void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kInt32, - IsR6(), - GetAssembler()); -} - -// long java.lang.Math.min(long, long) -void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kInt64, - IsR6(), - GetAssembler()); -} - -// int java.lang.Math.max(int, int) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kInt32, - IsR6(), - GetAssembler()); -} - -// long java.lang.Math.max(long, long) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kInt64, - IsR6(), - GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), HasMsa(), GetAssembler()); } // double java.lang.Math.sqrt(double) diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h index 13397f11d4c..1c1ba401325 100644 --- a/compiler/optimizing/intrinsics_mips.h +++ b/compiler/optimizing/intrinsics_mips.h @@ -71,6 +71,7 @@ class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor { bool IsR2OrNewer() const; bool IsR6() const; bool Is32BitFPU() const; + bool HasMsa() const; private: MipsAssembler* GetAssembler(); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 4668c561ed3..9a9ae714bc6 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -46,6 +46,10 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() { return codegen_->GetGraph()->GetAllocator(); } +inline bool IntrinsicCodeGeneratorMIPS64::HasMsa() const { + return codegen_->GetInstructionSetFeatures().HasMsa(); +} + #define __ codegen->GetAssembler()-> static void MoveFromReturnRegister(Location trg, @@ -386,6 +390,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { static void GenBitCount(LocationSummary* locations, const DataType::Type type, + const bool hasMsa, Mips64Assembler* assembler) { GpuRegister out = locations->Out().AsRegister<GpuRegister>(); GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); @@ -414,41 +419,52 @@ static void GenBitCount(LocationSummary* locations, // bits are set but the algorithm here attempts to minimize the total // number of instructions executed even when a large number of bits // are set. - - if (type == DataType::Type::kInt32) { - __ Srl(TMP, in, 1); - __ LoadConst32(AT, 0x55555555); - __ And(TMP, TMP, AT); - __ Subu(TMP, in, TMP); - __ LoadConst32(AT, 0x33333333); - __ And(out, TMP, AT); - __ Srl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Addu(TMP, out, TMP); - __ Srl(out, TMP, 4); - __ Addu(out, out, TMP); - __ LoadConst32(AT, 0x0F0F0F0F); - __ And(out, out, AT); - __ LoadConst32(TMP, 0x01010101); - __ MulR6(out, out, TMP); - __ Srl(out, out, 24); - } else if (type == DataType::Type::kInt64) { - __ Dsrl(TMP, in, 1); - __ LoadConst64(AT, 0x5555555555555555L); - __ And(TMP, TMP, AT); - __ Dsubu(TMP, in, TMP); - __ LoadConst64(AT, 0x3333333333333333L); - __ And(out, TMP, AT); - __ Dsrl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Daddu(TMP, out, TMP); - __ Dsrl(out, TMP, 4); - __ Daddu(out, out, TMP); - __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL); - __ And(out, out, AT); - __ LoadConst64(TMP, 0x0101010101010101L); - __ Dmul(out, out, TMP); - __ Dsrl32(out, out, 24); + if (hasMsa) { + if (type == DataType::Type::kInt32) { + __ Mtc1(in, FTMP); + __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Mfc1(out, FTMP); + } else { + __ Dmtc1(in, FTMP); + __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Dmfc1(out, FTMP); + } + } else { + if (type == DataType::Type::kInt32) { + __ Srl(TMP, in, 1); + __ LoadConst32(AT, 0x55555555); + __ And(TMP, TMP, AT); + __ Subu(TMP, in, TMP); + __ LoadConst32(AT, 0x33333333); + __ And(out, TMP, AT); + __ Srl(TMP, TMP, 2); + __ And(TMP, TMP, AT); + __ Addu(TMP, out, TMP); + __ Srl(out, TMP, 4); + __ Addu(out, out, TMP); + __ LoadConst32(AT, 0x0F0F0F0F); + __ And(out, out, AT); + __ LoadConst32(TMP, 0x01010101); + __ MulR6(out, out, TMP); + __ Srl(out, out, 24); + } else { + __ Dsrl(TMP, in, 1); + __ LoadConst64(AT, 0x5555555555555555L); + __ And(TMP, TMP, AT); + __ Dsubu(TMP, in, TMP); + __ LoadConst64(AT, 0x3333333333333333L); + __ And(out, TMP, AT); + __ Dsrl(TMP, TMP, 2); + __ And(TMP, TMP, AT); + __ Daddu(TMP, out, TMP); + __ Dsrl(out, TMP, 4); + __ Daddu(out, out, TMP); + __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL); + __ And(out, out, AT); + __ LoadConst64(TMP, 0x0101010101010101L); + __ Dmul(out, out, TMP); + __ Dsrl32(out, out, 24); + } } } @@ -458,7 +474,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, HasMsa(), GetAssembler()); } // int java.lang.Long.bitCount(long) @@ -467,291 +483,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); -} - -static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { - FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - if (is64bit) { - __ AbsD(out, in); - } else { - __ AbsS(out, in); - } -} - -// double java.lang.Math.abs(double) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -// float java.lang.Math.abs(float) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -static void CreateIntToInt(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (is64bit) { - __ Dsra32(AT, in, 31); - __ Xor(out, in, AT); - __ Dsubu(out, out, AT); - } else { - __ Sra(AT, in, 31); - __ Xor(out, in, AT); - __ Subu(out, out, AT); - } -} - -// int java.lang.Math.abs(int) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToInt(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -// long java.lang.Math.abs(long) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToInt(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - DataType::Type type, - Mips64Assembler* assembler) { - FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - Mips64Label noNaNs; - Mips64Label done; - FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; - - // When Java computes min/max it prefers a NaN to a number; the - // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of - // the inputs is a NaN and the other is a valid number, the MIPS - // instruction will return the number; Java wants the NaN value - // returned. This is why there is extra logic preceding the use of - // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a - // NaN, return the NaN, otherwise return the min/max. - if (type == DataType::Type::kFloat64) { - __ CmpUnD(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqD(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelD(ftmp, a, b); - - if (ftmp != out) { - __ MovD(out, ftmp); - } - - __ Bc(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinD(out, a, b); - } else { - __ MaxD(out, a, b); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CmpUnS(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqS(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelS(ftmp, a, b); - - if (ftmp != out) { - __ MovS(out, ftmp); - } - - __ Bc(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinS(out, a, b); - } else { - __ MaxS(out, a, b); - } - } - - __ Bind(&done); -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); -} - -// double java.lang.Math.min(double, double) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat64, GetAssembler()); -} - -// float java.lang.Math.min(float, float) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat32, GetAssembler()); -} - -// double java.lang.Math.max(double, double) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat64, GetAssembler()); -} - -// float java.lang.Math.max(float, float) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat32, GetAssembler()); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - Mips64Assembler* assembler) { - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (lhs == rhs) { - if (out != lhs) { - __ Move(out, lhs); - } - } else { - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always - // change the target (output) register. If the condition is true the - // output register gets the contents of the "rs" register; otherwise, - // the output register is set to zero. One consequence of this is - // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 - // needs to use a pair of SELEQZ/SELNEZ instructions. After - // executing this pair of instructions one of the output registers - // from the pair will necessarily contain zero. Then the code ORs the - // output registers from the SELEQZ/SELNEZ instructions to get the - // final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (out == lhs) { - __ Slt(AT, rhs, lhs); - if (is_min) { - __ Seleqz(out, lhs, AT); - __ Selnez(AT, rhs, AT); - } else { - __ Selnez(out, lhs, AT); - __ Seleqz(AT, rhs, AT); - } - } else { - __ Slt(AT, lhs, rhs); - if (is_min) { - __ Seleqz(out, rhs, AT); - __ Selnez(AT, lhs, AT); - } else { - __ Selnez(out, rhs, AT); - __ Seleqz(AT, lhs, AT); - } - } - __ Or(out, out, AT); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -// int java.lang.Math.min(int, int) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); -} - -// long java.lang.Math.min(long, long) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); -} - -// int java.lang.Math.max(int, int) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); -} - -// long java.lang.Math.max(long, long) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, HasMsa(), GetAssembler()); } // double java.lang.Math.sqrt(double) diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h index 6f40d90ddbf..748b0b02b2e 100644 --- a/compiler/optimizing/intrinsics_mips64.h +++ b/compiler/optimizing/intrinsics_mips64.h @@ -68,6 +68,8 @@ class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor { #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS + bool HasMsa() const; + private: Mips64Assembler* GetAssembler(); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 0763ef23529..f84a33bb8e3 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -40,11 +40,6 @@ namespace art { namespace x86 { -static constexpr int kDoubleNaNHigh = 0x7FF80000; -static constexpr int kDoubleNaNLow = 0x00000000; -static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); -static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); - IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) { @@ -333,432 +328,6 @@ void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } - -// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we -// need is 64b. - -static void CreateFloatToFloat(ArenaAllocator* allocator, HInvoke* invoke) { - // TODO: Enable memory operations when the assembler supports them. - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(static_or_direct != nullptr); - if (static_or_direct->HasSpecialInput() && - invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { - // We need addressibility for the constant area. - locations->SetInAt(1, Location::RequiresRegister()); - // We need a temporary to hold the constant. - locations->AddTemp(Location::RequiresFpuRegister()); - } -} - -static void MathAbsFP(HInvoke* invoke, - bool is64bit, - X86Assembler* assembler, - CodeGeneratorX86* codegen) { - LocationSummary* locations = invoke->GetLocations(); - Location output = locations->Out(); - - DCHECK(output.IsFpuRegister()); - if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { - HX86ComputeBaseMethodAddress* method_address = - invoke->InputAt(1)->AsX86ComputeBaseMethodAddress(); - DCHECK(locations->InAt(1).IsRegister()); - // We also have a constant area pointer. - Register constant_area = locations->InAt(1).AsRegister<Register>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - if (is64bit) { - __ movsd(temp, codegen->LiteralInt64Address( - INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area)); - __ andpd(output.AsFpuRegister<XmmRegister>(), temp); - } else { - __ movss(temp, codegen->LiteralInt32Address( - INT32_C(0x7FFFFFFF), method_address, constant_area)); - __ andps(output.AsFpuRegister<XmmRegister>(), temp); - } - } else { - // Create the right constant on an aligned stack. - if (is64bit) { - __ subl(ESP, Immediate(8)); - __ pushl(Immediate(0x7FFFFFFF)); - __ pushl(Immediate(0xFFFFFFFF)); - __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); - } else { - __ subl(ESP, Immediate(12)); - __ pushl(Immediate(0x7FFFFFFF)); - __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); - } - __ addl(ESP, Immediate(16)); - } -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) { - CreateFloatToFloat(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { - CreateFloatToFloat(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_); -} - -static void CreateAbsIntLocation(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RegisterLocation(EAX)); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RegisterLocation(EDX)); -} - -static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) { - Location output = locations->Out(); - Register out = output.AsRegister<Register>(); - DCHECK_EQ(out, EAX); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - DCHECK_EQ(temp, EDX); - - // Sign extend EAX into EDX. - __ cdq(); - - // XOR EAX with sign. - __ xorl(EAX, EDX); - - // Subtract out sign to correct. - __ subl(EAX, EDX); - - // The result is in EAX. -} - -static void CreateAbsLongLocation(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) { - Location input = locations->InAt(0); - Register input_lo = input.AsRegisterPairLow<Register>(); - Register input_hi = input.AsRegisterPairHigh<Register>(); - Location output = locations->Out(); - Register output_lo = output.AsRegisterPairLow<Register>(); - Register output_hi = output.AsRegisterPairHigh<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - - // Compute the sign into the temporary. - __ movl(temp, input_hi); - __ sarl(temp, Immediate(31)); - - // Store the sign into the output. - __ movl(output_lo, temp); - __ movl(output_hi, temp); - - // XOR the input to the output. - __ xorl(output_lo, input_lo); - __ xorl(output_hi, input_hi); - - // Subtract the sign. - __ subl(output_lo, temp); - __ sbbl(output_hi, temp); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) { - CreateAbsIntLocation(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) { - CreateAbsLongLocation(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) { - GenAbsLong(invoke->GetLocations(), GetAssembler()); -} - -static void GenMinMaxFP(HInvoke* invoke, - bool is_min, - bool is_double, - X86Assembler* assembler, - CodeGeneratorX86* codegen) { - LocationSummary* locations = invoke->GetLocations(); - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - Location out_loc = locations->Out(); - XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); - return; - } - - // (out := op1) - // out <=? op2 - // if Nan jmp Nan_label - // if out is min jmp done - // if op2 is min jmp op2_label - // handle -0/+0 - // jmp done - // Nan_label: - // out := NaN - // op2_label: - // out := op2 - // done: - // - // This removes one jmp, but needs to copy one input (op1) to out. - // - // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? - - XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); - - NearLabel nan, done, op2_label; - if (is_double) { - __ ucomisd(out, op2); - } else { - __ ucomiss(out, op2); - } - - __ j(Condition::kParityEven, &nan); - - __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); - __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); - - // Handle 0.0/-0.0. - if (is_min) { - if (is_double) { - __ orpd(out, op2); - } else { - __ orps(out, op2); - } - } else { - if (is_double) { - __ andpd(out, op2); - } else { - __ andps(out, op2); - } - } - __ jmp(&done); - - // NaN handling. - __ Bind(&nan); - // Do we have a constant area pointer? - if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) { - HX86ComputeBaseMethodAddress* method_address = - invoke->InputAt(2)->AsX86ComputeBaseMethodAddress(); - DCHECK(locations->InAt(2).IsRegister()); - Register constant_area = locations->InAt(2).AsRegister<Register>(); - if (is_double) { - __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area)); - } else { - __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area)); - } - } else { - if (is_double) { - __ pushl(Immediate(kDoubleNaNHigh)); - __ pushl(Immediate(kDoubleNaNLow)); - __ movsd(out, Address(ESP, 0)); - __ addl(ESP, Immediate(8)); - } else { - __ pushl(Immediate(kFloatNaN)); - __ movss(out, Address(ESP, 0)); - __ addl(ESP, Immediate(4)); - } - } - __ jmp(&done); - - // out := op2; - __ Bind(&op2_label); - if (is_double) { - __ movsd(out, op2); - } else { - __ movss(out, op2); - } - - // Done. - __ Bind(&done); -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - // The following is sub-optimal, but all we can do for now. It would be fine to also accept - // the second input to be the output (we can simply swap inputs). - locations->SetOut(Location::SameAsFirstInput()); - HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(static_or_direct != nullptr); - if (static_or_direct->HasSpecialInput() && - invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { - locations->SetInAt(2, Location::RequiresRegister()); - } -} - -void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ true, - /* is_double */ true, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ true, - /* is_double */ false, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ false, - /* is_double */ true, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ false, - /* is_double */ false, - GetAssembler(), - codegen_); -} - -static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, - X86Assembler* assembler) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - // Can return immediately, as op1_loc == out_loc. - // Note: if we ever support separate registers, e.g., output into memory, we need to check for - // a copy here. - DCHECK(locations->Out().Equals(op1_loc)); - return; - } - - if (is_long) { - // Need to perform a subtract to get the sign right. - // op1 is already in the same location as the output. - Location output = locations->Out(); - Register output_lo = output.AsRegisterPairLow<Register>(); - Register output_hi = output.AsRegisterPairHigh<Register>(); - - Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); - Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); - - // Spare register to compute the subtraction to set condition code. - Register temp = locations->GetTemp(0).AsRegister<Register>(); - - // Subtract off op2_low. - __ movl(temp, output_lo); - __ subl(temp, op2_lo); - - // Now use the same tempo and the borrow to finish the subtraction of op2_hi. - __ movl(temp, output_hi); - __ sbbl(temp, op2_hi); - - // Now the condition code is correct. - Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; - __ cmovl(cond, output_lo, op2_lo); - __ cmovl(cond, output_hi, op2_hi); - } else { - Register out = locations->Out().AsRegister<Register>(); - Register op2 = op2_loc.AsRegister<Register>(); - - // (out := op1) - // out <=? op2 - // if out is min jmp done - // out := op2 - // done: - - __ cmpl(out, op2); - Condition cond = is_min ? Condition::kGreater : Condition::kLess; - __ cmovl(cond, out, op2); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - // Register to use to perform a long subtract to set cc. - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); -} - static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 91a505ede1a..7627dc9490a 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -236,304 +236,6 @@ void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } - -// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we -// need is 64b. - -static void CreateFloatToFloatPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) { - // TODO: Enable memory operations when the assembler supports them. - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. -} - -static void MathAbsFP(LocationSummary* locations, - bool is64bit, - X86_64Assembler* assembler, - CodeGeneratorX86_64* codegen) { - Location output = locations->Out(); - - DCHECK(output.IsFpuRegister()); - XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - - // TODO: Can mask directly with constant area using pand if we can guarantee - // that the literal is aligned on a 16 byte boundary. This will avoid a - // temporary. - if (is64bit) { - __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); - __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); - } else { - __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); - __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); - } -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFloatToFloatPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFloatToFloatPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_); -} - -static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { - Location output = locations->Out(); - CpuRegister out = output.AsRegister<CpuRegister>(); - CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); - - if (is64bit) { - // Create mask. - __ movq(mask, out); - __ sarq(mask, Immediate(63)); - // Add mask. - __ addq(out, mask); - __ xorq(out, mask); - } else { - // Create mask. - __ movl(mask, out); - __ sarl(mask, Immediate(31)); - // Add mask. - __ addl(out, mask); - __ xorl(out, mask); - } -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - bool is_double, - X86_64Assembler* assembler, - CodeGeneratorX86_64* codegen) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - Location out_loc = locations->Out(); - XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); - return; - } - - // (out := op1) - // out <=? op2 - // if Nan jmp Nan_label - // if out is min jmp done - // if op2 is min jmp op2_label - // handle -0/+0 - // jmp done - // Nan_label: - // out := NaN - // op2_label: - // out := op2 - // done: - // - // This removes one jmp, but needs to copy one input (op1) to out. - // - // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? - - XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); - - NearLabel nan, done, op2_label; - if (is_double) { - __ ucomisd(out, op2); - } else { - __ ucomiss(out, op2); - } - - __ j(Condition::kParityEven, &nan); - - __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); - __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); - - // Handle 0.0/-0.0. - if (is_min) { - if (is_double) { - __ orpd(out, op2); - } else { - __ orps(out, op2); - } - } else { - if (is_double) { - __ andpd(out, op2); - } else { - __ andps(out, op2); - } - } - __ jmp(&done); - - // NaN handling. - __ Bind(&nan); - if (is_double) { - __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); - } else { - __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); - } - __ jmp(&done); - - // out := op2; - __ Bind(&op2_label); - if (is_double) { - __ movsd(out, op2); - } else { - __ movss(out, op2); - } - - // Done. - __ Bind(&done); -} - -static void CreateFPFPToFP(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - // The following is sub-optimal, but all we can do for now. It would be fine to also accept - // the second input to be the output (we can simply swap inputs). - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_); -} - -static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, - X86_64Assembler* assembler) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - // Can return immediately, as op1_loc == out_loc. - // Note: if we ever support separate registers, e.g., output into memory, we need to check for - // a copy here. - DCHECK(locations->Out().Equals(op1_loc)); - return; - } - - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); - - // (out := op1) - // out <=? op2 - // if out is min jmp done - // out := op2 - // done: - - if (is_long) { - __ cmpq(out, op2); - } else { - __ cmpl(out, op2); - } - - __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long); -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); -} - static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc new file mode 100644 index 00000000000..a0760eff691 --- /dev/null +++ b/compiler/optimizing/loop_analysis.cc @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loop_analysis.h" + +#include "base/bit_vector-inl.h" + +namespace art { + +void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, + LoopAnalysisInfo* analysis_results) { + for (HBlocksInLoopIterator block_it(*loop_info); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* block = block_it.Current(); + + for (HBasicBlock* successor : block->GetSuccessors()) { + if (!loop_info->Contains(*successor)) { + analysis_results->exits_num_++; + } + } + + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (MakesScalarPeelingUnrollingNonBeneficial(instruction)) { + analysis_results->has_instructions_preventing_scalar_peeling_ = true; + analysis_results->has_instructions_preventing_scalar_unrolling_ = true; + } + analysis_results->instr_num_++; + } + analysis_results->bb_num_++; + } +} + +bool LoopAnalysis::HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info) { + HGraph* graph = loop_info->GetHeader()->GetGraph(); + for (uint32_t block_id : loop_info->GetBlocks().Indexes()) { + HBasicBlock* block = graph->GetBlocks()[block_id]; + DCHECK(block != nullptr); + if (block->EndsWithIf()) { + HIf* hif = block->GetLastInstruction()->AsIf(); + HInstruction* input = hif->InputAt(0); + if (IsLoopExit(loop_info, hif) && !loop_info->Contains(*input->GetBlock())) { + return true; + } + } + } + return false; +} + +class Arm64LoopHelper : public ArchDefaultLoopHelper { + public: + // Scalar loop unrolling parameters and heuristics. + // + // Maximum possible unrolling factor. + static constexpr uint32_t kArm64ScalarMaxUnrollFactor = 2; + // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled. + static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40; + // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled. + static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8; + + // SIMD loop unrolling parameters and heuristics. + // + // Maximum possible unrolling factor. + static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8; + // Loop's maximum instruction count. Loops with higher count will not be unrolled. + static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50; + + bool IsLoopTooBigForScalarPeelingUnrolling(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE { + size_t instr_num = loop_analysis_info->GetNumberOfInstructions(); + size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks(); + return (instr_num >= kArm64ScalarHeuristicMaxBodySizeInstr || + bb_num >= kArm64ScalarHeuristicMaxBodySizeBlocks); + } + + uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED, + uint64_t trip_count) const OVERRIDE { + uint32_t desired_unrolling_factor = kArm64ScalarMaxUnrollFactor; + if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) { + return kNoUnrollingFactor; + } + + return desired_unrolling_factor; + } + + bool IsLoopPeelingEnabled() const OVERRIDE { return true; } + + uint32_t GetSIMDUnrollingFactor(HBasicBlock* block, + int64_t trip_count, + uint32_t max_peel, + uint32_t vector_length) const OVERRIDE { + // Don't unroll with insufficient iterations. + // TODO: Unroll loops with unknown trip count. + DCHECK_NE(vector_length, 0u); + if (trip_count < (2 * vector_length + max_peel)) { + return kNoUnrollingFactor; + } + // Don't unroll for large loop body size. + uint32_t instruction_count = block->GetInstructions().CountSize(); + if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) { + return kNoUnrollingFactor; + } + // Find a beneficial unroll factor with the following restrictions: + // - At least one iteration of the transformed loop should be executed. + // - The loop body shouldn't be "too big" (heuristic). + + uint32_t uf1 = kArm64SimdHeuristicMaxBodySizeInstr / instruction_count; + uint32_t uf2 = (trip_count - max_peel) / vector_length; + uint32_t unroll_factor = + TruncToPowerOfTwo(std::min({uf1, uf2, kArm64SimdMaxUnrollFactor})); + DCHECK_GE(unroll_factor, 1u); + return unroll_factor; + } +}; + +ArchDefaultLoopHelper* ArchDefaultLoopHelper::Create(InstructionSet isa, + ArenaAllocator* allocator) { + switch (isa) { + case InstructionSet::kArm64: { + return new (allocator) Arm64LoopHelper; + } + default: { + return new (allocator) ArchDefaultLoopHelper; + } + } +} + +} // namespace art diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h new file mode 100644 index 00000000000..ece98581367 --- /dev/null +++ b/compiler/optimizing/loop_analysis.h @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ +#define ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ + +#include "nodes.h" + +namespace art { + +class LoopAnalysis; + +// No loop unrolling factor (just one copy of the loop-body). +static constexpr uint32_t kNoUnrollingFactor = 1; + +// Class to hold cached information on properties of the loop. +class LoopAnalysisInfo : public ValueObject { + public: + explicit LoopAnalysisInfo(HLoopInformation* loop_info) + : bb_num_(0), + instr_num_(0), + exits_num_(0), + has_instructions_preventing_scalar_peeling_(false), + has_instructions_preventing_scalar_unrolling_(false), + loop_info_(loop_info) {} + + size_t GetNumberOfBasicBlocks() const { return bb_num_; } + size_t GetNumberOfInstructions() const { return instr_num_; } + size_t GetNumberOfExits() const { return exits_num_; } + + bool HasInstructionsPreventingScalarPeeling() const { + return has_instructions_preventing_scalar_peeling_; + } + + bool HasInstructionsPreventingScalarUnrolling() const { + return has_instructions_preventing_scalar_unrolling_; + } + + const HLoopInformation* GetLoopInfo() const { return loop_info_; } + + private: + // Number of basic blocks in the loop body. + size_t bb_num_; + // Number of instructions in the loop body. + size_t instr_num_; + // Number of loop's exits. + size_t exits_num_; + // Whether the loop has instructions which make scalar loop peeling non-beneficial. + bool has_instructions_preventing_scalar_peeling_; + // Whether the loop has instructions which make scalar loop unrolling non-beneficial. + bool has_instructions_preventing_scalar_unrolling_; + + // Corresponding HLoopInformation. + const HLoopInformation* loop_info_; + + friend class LoopAnalysis; +}; + +// Placeholder class for methods and routines used to analyse loops, calculate loop properties +// and characteristics. +class LoopAnalysis : public ValueObject { + public: + // Calculates loops basic properties like body size, exits number, etc. and fills + // 'analysis_results' with this information. + static void CalculateLoopBasicProperties(HLoopInformation* loop_info, + LoopAnalysisInfo* analysis_results); + + // Returns whether the loop has at least one loop invariant exit. + static bool HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info); + + // Returns whether HIf's true or false successor is outside the specified loop. + // + // Prerequisite: HIf must be in the specified loop. + static bool IsLoopExit(HLoopInformation* loop_info, const HIf* hif) { + DCHECK(loop_info->Contains(*hif->GetBlock())); + HBasicBlock* true_succ = hif->IfTrueSuccessor(); + HBasicBlock* false_succ = hif->IfFalseSuccessor(); + return (!loop_info->Contains(*true_succ) || !loop_info->Contains(*false_succ)); + } + + private: + // Returns whether an instruction makes scalar loop peeling/unrolling non-beneficial. + // + // If in the loop body we have a dex/runtime call then its contribution to the whole + // loop performance will probably prevail. So peeling/unrolling optimization will not bring + // any noticeable performance improvement. It will increase the code size. + static bool MakesScalarPeelingUnrollingNonBeneficial(HInstruction* instruction) { + return (instruction->IsNewArray() || + instruction->IsNewInstance() || + instruction->IsUnresolvedInstanceFieldGet() || + instruction->IsUnresolvedInstanceFieldSet() || + instruction->IsUnresolvedStaticFieldGet() || + instruction->IsUnresolvedStaticFieldSet() || + // TODO: Support loops with intrinsified invokes. + instruction->IsInvoke() || + // TODO: Support loops with ClinitChecks. + instruction->IsClinitCheck()); + } +}; + +// +// Helper class which holds target-dependent methods and constants needed for loop optimizations. +// +// To support peeling/unrolling for a new architecture one needs to create new helper class, +// inherit it from this and add implementation for the following methods. +// +class ArchDefaultLoopHelper : public ArenaObject<kArenaAllocOptimization> { + public: + virtual ~ArchDefaultLoopHelper() {} + + // Creates an instance of specialised helper for the target or default helper if the target + // doesn't support loop peeling and unrolling. + static ArchDefaultLoopHelper* Create(InstructionSet isa, ArenaAllocator* allocator); + + // Returns whether the loop is too big for loop peeling/unrolling by checking its total number of + // basic blocks and instructions. + // + // If the loop body has too many instructions then peeling/unrolling optimization will not bring + // any noticeable performance improvement however will increase the code size. + // + // Returns 'true' by default, should be overridden by particular target loop helper. + virtual bool IsLoopTooBigForScalarPeelingUnrolling( + LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; } + + // Returns optimal scalar unrolling factor for the loop. + // + // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. + virtual uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED, + uint64_t trip_count ATTRIBUTE_UNUSED) const { + return kNoUnrollingFactor; + } + + // Returns whether scalar loop peeling is enabled, + // + // Returns 'false' by default, should be overridden by particular target loop helper. + virtual bool IsLoopPeelingEnabled() const { return false; } + + // Returns optimal SIMD unrolling factor for the loop. + // + // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. + virtual uint32_t GetSIMDUnrollingFactor(HBasicBlock* block ATTRIBUTE_UNUSED, + int64_t trip_count ATTRIBUTE_UNUSED, + uint32_t max_peel ATTRIBUTE_UNUSED, + uint32_t vector_length ATTRIBUTE_UNUSED) const { + return kNoUnrollingFactor; + } +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 899496328eb..1462404932e 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -33,8 +33,8 @@ namespace art { // Enables vectorization (SIMDization) in the loop optimizer. static constexpr bool kEnableVectorization = true; -// No loop unrolling factor (just one copy of the loop-body). -static constexpr uint32_t kNoUnrollingFactor = 1; +// Enables scalar loop unrolling in the loop optimizer. +static constexpr bool kEnableScalarPeelingUnrolling = false; // // Static helpers. @@ -153,6 +153,18 @@ static bool IsSignExtensionAndGet(HInstruction* instruction, return false; } } + // A MIN-MAX on narrower operands qualifies as well + // (returning the operator itself). + if (instruction->IsMin() || instruction->IsMax()) { + HBinaryOperation* min_max = instruction->AsBinaryOperation(); + DCHECK(min_max->GetType() == DataType::Type::kInt32 || + min_max->GetType() == DataType::Type::kInt64); + if (IsSignExtensionAndGet(min_max->InputAt(0), type, operand) && + IsSignExtensionAndGet(min_max->InputAt(1), type, operand)) { + *operand = min_max; + return true; + } + } return false; } @@ -216,6 +228,18 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction, return false; } } + // A MIN-MAX on narrower operands qualifies as well + // (returning the operator itself). + if (instruction->IsMin() || instruction->IsMax()) { + HBinaryOperation* min_max = instruction->AsBinaryOperation(); + DCHECK(min_max->GetType() == DataType::Type::kInt32 || + min_max->GetType() == DataType::Type::kInt64); + if (IsZeroExtensionAndGet(min_max->InputAt(0), type, operand) && + IsZeroExtensionAndGet(min_max->InputAt(1), type, operand)) { + *operand = min_max; + return true; + } + } return false; } @@ -227,6 +251,7 @@ static bool IsNarrowerOperands(HInstruction* a, /*out*/ HInstruction** r, /*out*/ HInstruction** s, /*out*/ bool* is_unsigned) { + DCHECK(a != nullptr && b != nullptr); // Look for a matching sign extension. DataType::Type stype = HVecOperation::ToSignedType(type); if (IsSignExtensionAndGet(a, stype, r) && IsSignExtensionAndGet(b, stype, s)) { @@ -247,6 +272,7 @@ static bool IsNarrowerOperand(HInstruction* a, DataType::Type type, /*out*/ HInstruction** r, /*out*/ bool* is_unsigned) { + DCHECK(a != nullptr); // Look for a matching sign extension. DataType::Type stype = HVecOperation::ToSignedType(type); if (IsSignExtensionAndGet(a, stype, r)) { @@ -270,20 +296,28 @@ static uint32_t GetOtherVL(DataType::Type other_type, DataType::Type vector_type return vl >> (DataType::SizeShift(other_type) - DataType::SizeShift(vector_type)); } -// Detect up to two instructions a and b, and an acccumulated constant c. -static bool IsAddConstHelper(HInstruction* instruction, - /*out*/ HInstruction** a, - /*out*/ HInstruction** b, - /*out*/ int64_t* c, - int32_t depth) { - static constexpr int32_t kMaxDepth = 8; // don't search too deep +// Detect up to two added operands a and b and an acccumulated constant c. +static bool IsAddConst(HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b, + /*out*/ int64_t* c, + int32_t depth = 8) { // don't search too deep int64_t value = 0; + // Enter add/sub while still within reasonable depth. + if (depth > 0) { + if (instruction->IsAdd()) { + return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1) && + IsAddConst(instruction->InputAt(1), a, b, c, depth - 1); + } else if (instruction->IsSub() && + IsInt64AndGet(instruction->InputAt(1), &value)) { + *c -= value; + return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1); + } + } + // Otherwise, deal with leaf nodes. if (IsInt64AndGet(instruction, &value)) { *c += value; return true; - } else if (instruction->IsAdd() && depth <= kMaxDepth) { - return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) && - IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1); } else if (*a == nullptr) { *a = instruction; return true; @@ -291,72 +325,170 @@ static bool IsAddConstHelper(HInstruction* instruction, *b = instruction; return true; } - return false; // too many non-const operands + return false; // too many operands } -// Detect a + b + c for an optional constant c. -static bool IsAddConst(HInstruction* instruction, - /*out*/ HInstruction** a, - /*out*/ HInstruction** b, - /*out*/ int64_t* c) { - if (instruction->IsAdd()) { - // Try to find a + b and accumulated c. - if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) && - IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) && - *b != nullptr) { - return true; +// Detect a + b + c with optional constant c. +static bool IsAddConst2(HGraph* graph, + HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b, + /*out*/ int64_t* c) { + if (IsAddConst(instruction, a, b, c) && *a != nullptr) { + if (*b == nullptr) { + // Constant is usually already present, unless accumulated. + *b = graph->GetConstant(instruction->GetType(), (*c)); + *c = 0; } - // Found a + b. + return true; + } + return false; +} + +// Detect a direct a - b or a hidden a - (-c). +static bool IsSubConst2(HGraph* graph, + HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b) { + int64_t c = 0; + if (instruction->IsSub()) { *a = instruction->InputAt(0); *b = instruction->InputAt(1); - *c = 0; + return true; + } else if (IsAddConst(instruction, a, b, &c) && *a != nullptr && *b == nullptr) { + // Constant for the hidden subtraction. + *b = graph->GetConstant(instruction->GetType(), -c); return true; } return false; } -// Detect a + c for constant c. -static bool IsAddConst(HInstruction* instruction, - /*out*/ HInstruction** a, - /*out*/ int64_t* c) { - if (instruction->IsAdd()) { - if (IsInt64AndGet(instruction->InputAt(0), c)) { - *a = instruction->InputAt(1); - return true; - } else if (IsInt64AndGet(instruction->InputAt(1), c)) { - *a = instruction->InputAt(0); - return true; +// Detect clipped [lo, hi] range for nested MIN-MAX operations on a clippee, +// such as MIN(hi, MAX(lo, clippee)) for an arbitrary clippee expression. +// Example: MIN(10, MIN(20, MAX(0, x))) yields [0, 10] with clippee x. +static HInstruction* FindClippee(HInstruction* instruction, + /*out*/ int64_t* lo, + /*out*/ int64_t* hi) { + // Iterate into MIN(.., c)-MAX(.., c) expressions and 'tighten' the range [lo, hi]. + while (instruction->IsMin() || instruction->IsMax()) { + HBinaryOperation* min_max = instruction->AsBinaryOperation(); + DCHECK(min_max->GetType() == DataType::Type::kInt32 || + min_max->GetType() == DataType::Type::kInt64); + // Process the constant. + HConstant* right = min_max->GetConstantRight(); + if (right == nullptr) { + break; + } else if (instruction->IsMin()) { + *hi = std::min(*hi, Int64FromConstant(right)); + } else { + *lo = std::max(*lo, Int64FromConstant(right)); } + instruction = min_max->GetLeastConstantLeft(); + } + // Iteration ends in any other expression (possibly MIN/MAX without constant). + // This leaf expression is the clippee with range [lo, hi]. + return instruction; +} + +// Set value range for type (or fail). +static bool CanSetRange(DataType::Type type, + /*out*/ int64_t* uhi, + /*out*/ int64_t* slo, + /*out*/ int64_t* shi) { + if (DataType::Size(type) == 1) { + *uhi = std::numeric_limits<uint8_t>::max(); + *slo = std::numeric_limits<int8_t>::min(); + *shi = std::numeric_limits<int8_t>::max(); + return true; + } else if (DataType::Size(type) == 2) { + *uhi = std::numeric_limits<uint16_t>::max(); + *slo = std::numeric_limits<int16_t>::min(); + *shi = std::numeric_limits<int16_t>::max(); + return true; } return false; } +// Accept various saturated addition forms. +static bool IsSaturatedAdd(HInstruction* a, + HInstruction* b, + DataType::Type type, + int64_t lo, + int64_t hi, + bool is_unsigned) { + int64_t ulo = 0, uhi = 0, slo = 0, shi = 0; + if (!CanSetRange(type, &uhi, &slo, &shi)) { + return false; + } + // Tighten the range for signed single clipping on constant. + if (!is_unsigned) { + int64_t c = 0; + if (IsInt64AndGet(a, &c) || IsInt64AndGet(b, &c)) { + // For c in proper range and narrower operand r: + // MIN(r + c, 127) c > 0 + // or MAX(r + c, -128) c < 0 (and possibly redundant bound). + if (0 < c && c <= shi && hi == shi) { + if (lo <= (slo + c)) { + return true; + } + } else if (slo <= c && c < 0 && lo == slo) { + if (hi >= (shi + c)) { + return true; + } + } + } + } + // Detect for narrower operands r and s: + // MIN(r + s, 255) => SAT_ADD_unsigned + // MAX(MIN(r + s, 127), -128) => SAT_ADD_signed. + return is_unsigned ? (lo <= ulo && hi == uhi) : (lo == slo && hi == shi); +} + +// Accept various saturated subtraction forms. +static bool IsSaturatedSub(HInstruction* a, + DataType::Type type, + int64_t lo, + int64_t hi, + bool is_unsigned) { + int64_t ulo = 0, uhi = 0, slo = 0, shi = 0; + if (!CanSetRange(type, &uhi, &slo, &shi)) { + return false; + } + // Tighten the range for signed single clipping on constant. + if (!is_unsigned) { + int64_t c = 0; + if (IsInt64AndGet(a, /*out*/ &c)) { + // For c in proper range and narrower operand r: + // MIN(c - r, 127) c > 0 + // or MAX(c - r, -128) c < 0 (and possibly redundant bound). + if (0 < c && c <= shi && hi == shi) { + if (lo <= (c - shi)) { + return true; + } + } else if (slo <= c && c < 0 && lo == slo) { + if (hi >= (c - slo)) { + return true; + } + } + } + } + // Detect for narrower operands r and s: + // MAX(r - s, 0) => SAT_SUB_unsigned + // MIN(MAX(r - s, -128), 127) => SAT_ADD_signed. + return is_unsigned ? (lo == ulo && hi >= uhi) : (lo == slo && hi == shi); +} + // Detect reductions of the following forms, // x = x_phi + .. // x = x_phi - .. -// x = max(x_phi, ..) // x = min(x_phi, ..) +// x = max(x_phi, ..) static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { - if (reduction->IsAdd()) { + if (reduction->IsAdd() || reduction->IsMin() || reduction->IsMax()) { return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) || (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi); } else if (reduction->IsSub()) { return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi); - } else if (reduction->IsInvokeStaticOrDirect()) { - switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: - return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) || - (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi); - default: - return false; - } } return false; } @@ -401,6 +533,43 @@ static bool CheckInductionSetFullyRemoved(ScopedArenaSet<HInstruction*>* iset) { return true; } +// Tries to statically evaluate condition of the specified "HIf" for other condition checks. +static void TryToEvaluateIfCondition(HIf* instruction, HGraph* graph) { + HInstruction* cond = instruction->InputAt(0); + + // If a condition 'cond' is evaluated in an HIf instruction then in the successors of the + // IF_BLOCK we statically know the value of the condition 'cond' (TRUE in TRUE_SUCC, FALSE in + // FALSE_SUCC). Using that we can replace another evaluation (use) EVAL of the same 'cond' + // with TRUE value (FALSE value) if every path from the ENTRY_BLOCK to EVAL_BLOCK contains the + // edge HIF_BLOCK->TRUE_SUCC (HIF_BLOCK->FALSE_SUCC). + // if (cond) { if(cond) { + // if (cond) {} if (1) {} + // } else { =======> } else { + // if (cond) {} if (0) {} + // } } + if (!cond->IsConstant()) { + HBasicBlock* true_succ = instruction->IfTrueSuccessor(); + HBasicBlock* false_succ = instruction->IfFalseSuccessor(); + + DCHECK_EQ(true_succ->GetPredecessors().size(), 1u); + DCHECK_EQ(false_succ->GetPredecessors().size(), 1u); + + const HUseList<HInstruction*>& uses = cond->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HInstruction* user = it->GetUser(); + size_t index = it->GetIndex(); + HBasicBlock* user_block = user->GetBlock(); + // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). + ++it; + if (true_succ->Dominates(user_block)) { + user->ReplaceInput(graph->GetIntConstant(1), index); + } else if (false_succ->Dominates(user_block)) { + user->ReplaceInput(graph->GetIntConstant(0), index); + } + } + } +} + // // Public methods. // @@ -432,7 +601,11 @@ HLoopOptimization::HLoopOptimization(HGraph* graph, vector_preheader_(nullptr), vector_header_(nullptr), vector_body_(nullptr), - vector_index_(nullptr) { + vector_index_(nullptr), + arch_loop_helper_(ArchDefaultLoopHelper::Create(compiler_driver_ != nullptr + ? compiler_driver_->GetInstructionSet() + : InstructionSet::kNone, + global_allocator_)) { } void HLoopOptimization::Run() { @@ -643,7 +816,7 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) { } } -bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { +bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); // Ensure loop header logic is finite. @@ -713,6 +886,103 @@ bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { return false; } +bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { + return TryOptimizeInnerLoopFinite(node) || + TryPeelingForLoopInvariantExitsElimination(node) || + TryUnrollingForBranchPenaltyReduction(node); +} + + + +// +// Loop unrolling: generic part methods. +// + +bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) { + // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests) + // as InstructionSet is needed. + if (!kEnableScalarPeelingUnrolling || compiler_driver_ == nullptr) { + return false; + } + + HLoopInformation* loop_info = node->loop_info; + int64_t trip_count = 0; + // Only unroll loops with a known tripcount. + if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) { + return false; + } + + uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(loop_info, trip_count); + if (unrolling_factor == kNoUnrollingFactor) { + return false; + } + + LoopAnalysisInfo loop_analysis_info(loop_info); + LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info); + + // Check "IsLoopClonable" last as it can be time-consuming. + if (arch_loop_helper_->IsLoopTooBigForScalarPeelingUnrolling(&loop_analysis_info) || + (loop_analysis_info.GetNumberOfExits() > 1) || + loop_analysis_info.HasInstructionsPreventingScalarUnrolling() || + !PeelUnrollHelper::IsLoopClonable(loop_info)) { + return false; + } + + // TODO: support other unrolling factors. + DCHECK_EQ(unrolling_factor, 2u); + + // Perform unrolling. + PeelUnrollSimpleHelper helper(loop_info); + helper.DoUnrolling(); + + // Remove the redundant loop check after unrolling. + HIf* copy_hif = + helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf(); + int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0; + copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u); + + return true; +} + +bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* node) { + // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests) + // as InstructionSet is needed. + if (!kEnableScalarPeelingUnrolling || compiler_driver_ == nullptr) { + return false; + } + + HLoopInformation* loop_info = node->loop_info; + // Check 'IsLoopClonable' the last as it might be time-consuming. + if (!arch_loop_helper_->IsLoopPeelingEnabled()) { + return false; + } + + LoopAnalysisInfo loop_analysis_info(loop_info); + LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info); + + // Check "IsLoopClonable" last as it can be time-consuming. + if (arch_loop_helper_->IsLoopTooBigForScalarPeelingUnrolling(&loop_analysis_info) || + loop_analysis_info.HasInstructionsPreventingScalarPeeling() || + !LoopAnalysis::HasLoopAtLeastOneInvariantExit(loop_info) || + !PeelUnrollHelper::IsLoopClonable(loop_info)) { + return false; + } + + // Perform peeling. + PeelUnrollSimpleHelper helper(loop_info); + helper.DoPeeling(); + + const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap(); + for (auto entry : *hir_map) { + HInstruction* copy = entry.second; + if (copy->IsIf()) { + TryToEvaluateIfCondition(copy->AsIf(), graph_); + } + } + + return true; +} + // // Loop vectorization. The implementation is based on the book by Aart J.C. Bik: // "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance." @@ -843,7 +1113,8 @@ void HLoopOptimization::Vectorize(LoopNode* node, HBasicBlock* preheader = node->loop_info->GetPreHeader(); // Pick a loop unrolling factor for the vector loop. - uint32_t unroll = GetUnrollingFactor(block, trip_count); + uint32_t unroll = arch_loop_helper_->GetSIMDUnrollingFactor( + block, trip_count, MaxNumberPeeled(), vector_length_); uint32_t chunk = vector_length_ * unroll; DCHECK(trip_count == 0 || (trip_count >= MaxNumberPeeled() + chunk)); @@ -1082,6 +1353,11 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, HInstruction* index = instruction->InputAt(1); HInstruction* value = instruction->InputAt(2); HInstruction* offset = nullptr; + // For narrow types, explicit type conversion may have been + // optimized way, so set the no hi bits restriction here. + if (DataType::Size(type) <= 2) { + restrictions |= kNoHiBits; + } if (TrySetVectorType(type, &restrictions) && node->loop_info->IsDefinedOutOfTheLoop(base) && induction_range_.IsUnitStride(instruction, index, graph_, &offset) && @@ -1124,7 +1400,6 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite(); } -// TODO: saturation arithmetic. bool HLoopOptimization::VectorizeUse(LoopNode* node, HInstruction* instruction, bool generate_code, @@ -1297,80 +1572,62 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return true; } } - } else if (instruction->IsInvokeStaticOrDirect()) { - // Accept particular intrinsics. - HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect(); - switch (invoke->GetIntrinsic()) { - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - case Intrinsics::kMathAbsFloat: - case Intrinsics::kMathAbsDouble: { - // Deal with vector restrictions. - HInstruction* opa = instruction->InputAt(0); - HInstruction* r = opa; - bool is_unsigned = false; - if (HasVectorRestrictions(restrictions, kNoAbs)) { - return false; - } else if (HasVectorRestrictions(restrictions, kNoHiBits) && - (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { - return false; // reject, unless operand is sign-extension narrower - } - // Accept ABS(x) for vectorizable operand. - DCHECK(r != nullptr); - if (generate_code && vector_mode_ != kVector) { // de-idiom - r = opa; - } - if (VectorizeUse(node, r, generate_code, type, restrictions)) { - if (generate_code) { - GenerateVecOp(instruction, - vector_map_->Get(r), - nullptr, - HVecOperation::ToProperType(type, is_unsigned)); - } - return true; - } - return false; + } else if (instruction->IsAbs()) { + // Deal with vector restrictions. + HInstruction* opa = instruction->InputAt(0); + HInstruction* r = opa; + bool is_unsigned = false; + if (HasVectorRestrictions(restrictions, kNoAbs)) { + return false; + } else if (HasVectorRestrictions(restrictions, kNoHiBits) && + (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { + return false; // reject, unless operand is sign-extension narrower + } + // Accept ABS(x) for vectorizable operand. + DCHECK(r != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + } + if (VectorizeUse(node, r, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp(instruction, + vector_map_->Get(r), + nullptr, + HVecOperation::ToProperType(type, is_unsigned)); } - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: { - // Deal with vector restrictions. - HInstruction* opa = instruction->InputAt(0); - HInstruction* opb = instruction->InputAt(1); - HInstruction* r = opa; - HInstruction* s = opb; - bool is_unsigned = false; - if (HasVectorRestrictions(restrictions, kNoMinMax)) { - return false; - } else if (HasVectorRestrictions(restrictions, kNoHiBits) && - !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) { - return false; // reject, unless all operands are same-extension narrower - } - // Accept MIN/MAX(x, y) for vectorizable operands. - DCHECK(r != nullptr); - DCHECK(s != nullptr); - if (generate_code && vector_mode_ != kVector) { // de-idiom - r = opa; - s = opb; - } - if (VectorizeUse(node, r, generate_code, type, restrictions) && - VectorizeUse(node, s, generate_code, type, restrictions)) { - if (generate_code) { - GenerateVecOp( - instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned); - } - return true; - } - return false; + return true; + } + } else if (instruction->IsMin() || instruction->IsMax()) { + // Recognize saturation arithmetic. + if (VectorizeSaturationIdiom(node, instruction, generate_code, type, restrictions)) { + return true; + } + // Deal with vector restrictions. + HInstruction* opa = instruction->InputAt(0); + HInstruction* opb = instruction->InputAt(1); + HInstruction* r = opa; + HInstruction* s = opb; + bool is_unsigned = false; + if (HasVectorRestrictions(restrictions, kNoMinMax)) { + return false; + } else if (HasVectorRestrictions(restrictions, kNoHiBits) && + !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) { + return false; // reject, unless all operands are same-extension narrower + } + // Accept MIN/MAX(x, y) for vectorizable operands. + DCHECK(r != nullptr && s != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + s = opb; + } + if (VectorizeUse(node, r, generate_code, type, restrictions) && + VectorizeUse(node, s, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp( + instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned); } - default: - return false; - } // switch + return true; + } } return false; } @@ -1475,11 +1732,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoSaturation; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoSaturation | kNoStringCharAt; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv; @@ -1504,11 +1761,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoSaturation; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoSaturation | kNoStringCharAt; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv; @@ -1811,83 +2068,29 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, GENERATE_VEC( new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_, dex_pc), new (global_allocator_) HUShr(org_type, opa, opb, dex_pc)); - case HInstruction::kInvokeStaticOrDirect: { - HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect(); - if (vector_mode_ == kVector) { - switch (invoke->GetIntrinsic()) { - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - case Intrinsics::kMathAbsFloat: - case Intrinsics::kMathAbsDouble: - DCHECK(opb == nullptr); - vector = new (global_allocator_) - HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc); - break; - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: { - vector = new (global_allocator_) - HVecMin(global_allocator_, - opa, - opb, - HVecOperation::ToProperType(type, is_unsigned), - vector_length_, - dex_pc); - break; - } - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: { - vector = new (global_allocator_) - HVecMax(global_allocator_, - opa, - opb, - HVecOperation::ToProperType(type, is_unsigned), - vector_length_, - dex_pc); - break; - } - default: - LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId(); - UNREACHABLE(); - } // switch invoke - } else { - // In scalar code, simply clone the method invoke, and replace its operands with the - // corresponding new scalar instructions in the loop. The instruction will get an - // environment while being inserted from the instruction map in original program order. - DCHECK(vector_mode_ == kSequential); - size_t num_args = invoke->GetNumberOfArguments(); - HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect( - global_allocator_, - num_args, - invoke->GetType(), - invoke->GetDexPc(), - invoke->GetDexMethodIndex(), - invoke->GetResolvedMethod(), - invoke->GetDispatchInfo(), - invoke->GetInvokeType(), - invoke->GetTargetMethod(), - invoke->GetClinitCheckRequirement()); - HInputsRef inputs = invoke->GetInputs(); - size_t num_inputs = inputs.size(); - DCHECK_LE(num_args, num_inputs); - DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree - for (size_t index = 0; index < num_inputs; ++index) { - HInstruction* new_input = index < num_args - ? vector_map_->Get(inputs[index]) - : inputs[index]; // beyond arguments: just pass through - new_invoke->SetArgumentAt(index, new_input); - } - new_invoke->SetIntrinsic(invoke->GetIntrinsic(), - kNeedsEnvironmentOrCache, - kNoSideEffects, - kNoThrow); - vector = new_invoke; - } - break; - } + case HInstruction::kMin: + GENERATE_VEC( + new (global_allocator_) HVecMin(global_allocator_, + opa, + opb, + HVecOperation::ToProperType(type, is_unsigned), + vector_length_, + dex_pc), + new (global_allocator_) HMin(org_type, opa, opb, dex_pc)); + case HInstruction::kMax: + GENERATE_VEC( + new (global_allocator_) HVecMax(global_allocator_, + opa, + opb, + HVecOperation::ToProperType(type, is_unsigned), + vector_length_, + dex_pc), + new (global_allocator_) HMax(org_type, opa, opb, dex_pc)); + case HInstruction::kAbs: + DCHECK(opb == nullptr); + GENERATE_VEC( + new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc), + new (global_allocator_) HAbs(org_type, opa, dex_pc)); default: break; } // switch @@ -1901,6 +2104,79 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, // Vectorization idioms. // +// Method recognizes single and double clipping saturation arithmetic. +bool HLoopOptimization::VectorizeSaturationIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type type, + uint64_t restrictions) { + // Deal with vector restrictions. + if (HasVectorRestrictions(restrictions, kNoSaturation)) { + return false; + } + // Restrict type (generalize if one day we generalize allowed MIN/MAX integral types). + if (instruction->GetType() != DataType::Type::kInt32 && + instruction->GetType() != DataType::Type::kInt64) { + return false; + } + // Clipped addition or subtraction on narrower operands? We will try both + // formats since, e.g., x+c can be interpreted as x+c and x-(-c), depending + // on what clipping values are used, to get most benefits. + int64_t lo = std::numeric_limits<int64_t>::min(); + int64_t hi = std::numeric_limits<int64_t>::max(); + HInstruction* clippee = FindClippee(instruction, &lo, &hi); + HInstruction* a = nullptr; + HInstruction* b = nullptr; + HInstruction* r = nullptr; + HInstruction* s = nullptr; + bool is_unsigned = false; + bool is_add = true; + int64_t c = 0; + // First try for saturated addition. + if (IsAddConst2(graph_, clippee, /*out*/ &a, /*out*/ &b, /*out*/ &c) && c == 0 && + IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned) && + IsSaturatedAdd(r, s, type, lo, hi, is_unsigned)) { + is_add = true; + } else { + // Then try again for saturated subtraction. + a = b = r = s = nullptr; + if (IsSubConst2(graph_, clippee, /*out*/ &a, /*out*/ &b) && + IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned) && + IsSaturatedSub(r, type, lo, hi, is_unsigned)) { + is_add = false; + } else { + return false; + } + } + // Accept saturation idiom for vectorizable operands. + DCHECK(r != nullptr && s != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = instruction->InputAt(0); + s = instruction->InputAt(1); + restrictions &= ~(kNoHiBits | kNoMinMax); // allow narrow MIN/MAX in seq + } + if (VectorizeUse(node, r, generate_code, type, restrictions) && + VectorizeUse(node, s, generate_code, type, restrictions)) { + if (generate_code) { + if (vector_mode_ == kVector) { + DataType::Type vtype = HVecOperation::ToProperType(type, is_unsigned); + HInstruction* op1 = vector_map_->Get(r); + HInstruction* op2 = vector_map_->Get(s); + vector_map_->Put(instruction, is_add + ? reinterpret_cast<HInstruction*>(new (global_allocator_) HVecSaturationAdd( + global_allocator_, op1, op2, vtype, vector_length_, kNoDexPc)) + : reinterpret_cast<HInstruction*>(new (global_allocator_) HVecSaturationSub( + global_allocator_, op1, op2, vtype, vector_length_, kNoDexPc))); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); + } else { + GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type); + } + } + return true; + } + return false; +} + // Method recognizes the following idioms: // rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b // truncated halving add (a + b) >> 1 for unsigned/signed operands a, b @@ -1924,8 +2200,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, HInstruction* a = nullptr; HInstruction* b = nullptr; int64_t c = 0; - if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) { - DCHECK(a != nullptr && b != nullptr); + if (IsAddConst2(graph_, instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) { // Accept c == 1 (rounded) or c == 0 (not rounded). bool is_rounded = false; if (c == 1) { @@ -1947,8 +2222,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, } // Accept recognized halving add for vectorizable operands. Vectorized code uses the // shorthand idiomatic operation. Sequential code uses the original scalar expressions. - DCHECK(r != nullptr); - DCHECK(s != nullptr); + DCHECK(r != nullptr && s != nullptr); if (generate_code && vector_mode_ != kVector) { // de-idiom r = instruction->InputAt(0); s = instruction->InputAt(1); @@ -1998,21 +2272,11 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, HInstruction* v = instruction->InputAt(1); HInstruction* a = nullptr; HInstruction* b = nullptr; - if (v->IsInvokeStaticOrDirect() && - (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt || - v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) { - HInstruction* x = v->InputAt(0); - if (x->GetType() == reduction_type) { - int64_t c = 0; - if (x->IsSub()) { - a = x->InputAt(0); - b = x->InputAt(1); - } else if (IsAddConst(x, /*out*/ &a, /*out*/ &c)) { - b = graph_->GetConstant(reduction_type, -c); // hidden SUB! - } - } - } - if (a == nullptr || b == nullptr) { + if (v->IsAbs() && + v->GetType() == reduction_type && + IsSubConst2(graph_, v->InputAt(0), /*out*/ &a, /*out*/ &b)) { + DCHECK(a != nullptr && b != nullptr); + } else { return false; } // Accept same-type or consistent sign extension for narrower-type on operands a and b. @@ -2045,8 +2309,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, } // Accept SAD idiom for vectorizable operands. Vectorized code uses the shorthand // idiomatic operation. Sequential code uses the original scalar expressions. - DCHECK(r != nullptr); - DCHECK(s != nullptr); + DCHECK(r != nullptr && s != nullptr); if (generate_code && vector_mode_ != kVector) { // de-idiom r = s = v->InputAt(0); } @@ -2054,14 +2317,13 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, VectorizeUse(node, r, generate_code, sub_type, restrictions) && VectorizeUse(node, s, generate_code, sub_type, restrictions)) { if (generate_code) { - reduction_type = HVecOperation::ToProperType(reduction_type, is_unsigned); if (vector_mode_ == kVector) { vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate( global_allocator_, vector_map_->Get(q), vector_map_->Get(r), vector_map_->Get(s), - reduction_type, + HVecOperation::ToProperType(reduction_type, is_unsigned), GetOtherVL(reduction_type, sub_type, vector_length_), kNoDexPc)); MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); @@ -2134,41 +2396,6 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) { return true; } -static constexpr uint32_t ARM64_SIMD_MAXIMUM_UNROLL_FACTOR = 8; -static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50; - -uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) { - uint32_t max_peel = MaxNumberPeeled(); - switch (compiler_driver_->GetInstructionSet()) { - case InstructionSet::kArm64: { - // Don't unroll with insufficient iterations. - // TODO: Unroll loops with unknown trip count. - DCHECK_NE(vector_length_, 0u); - if (trip_count < (2 * vector_length_ + max_peel)) { - return kNoUnrollingFactor; - } - // Don't unroll for large loop body size. - uint32_t instruction_count = block->GetInstructions().CountSize(); - if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) { - return kNoUnrollingFactor; - } - // Find a beneficial unroll factor with the following restrictions: - // - At least one iteration of the transformed loop should be executed. - // - The loop body shouldn't be "too big" (heuristic). - uint32_t uf1 = ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE / instruction_count; - uint32_t uf2 = (trip_count - max_peel) / vector_length_; - uint32_t unroll_factor = - TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR})); - DCHECK_GE(unroll_factor, 1u); - return unroll_factor; - } - case InstructionSet::kX86: - case InstructionSet::kX86_64: - default: - return kNoUnrollingFactor; - } -} - // // Helpers. // diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index a707ad13580..f9a31a34d40 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -20,12 +20,15 @@ #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "induction_var_range.h" +#include "loop_analysis.h" #include "nodes.h" #include "optimization.h" +#include "superblock_cloner.h" namespace art { class CompilerDriver; +class ArchDefaultLoopHelper; /** * Loop optimizations. Builds a loop hierarchy and applies optimizations to @@ -80,6 +83,7 @@ class HLoopOptimization : public HOptimization { kNoReduction = 1 << 10, // no reduction kNoSAD = 1 << 11, // no sum of absolute differences (SAD) kNoWideSAD = 1 << 12, // no sum of absolute differences (SAD) with operand widening + kNoSaturation = 1 << 13, // no saturation arithmetic }; /* @@ -134,10 +138,21 @@ class HLoopOptimization : public HOptimization { void SimplifyInduction(LoopNode* node); void SimplifyBlocks(LoopNode* node); - // Performs optimizations specific to inner loop (empty loop removal, + // Performs optimizations specific to inner loop with finite header logic (empty loop removal, // unrolling, vectorization). Returns true if anything changed. + bool TryOptimizeInnerLoopFinite(LoopNode* node); + + // Performs optimizations specific to inner loop. Returns true if anything changed. bool OptimizeInnerLoop(LoopNode* node); + // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling + // opportunities. Returns whether transformation happened. + bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node); + + // Tries to apply loop peeling for loop invariant exits elimination. Returns whether + // transformation happened. + bool TryPeelingForLoopInvariantExitsElimination(LoopNode* loop_node); + // // Vectorization analysis and synthesis. // @@ -177,6 +192,11 @@ class HLoopOptimization : public HOptimization { bool is_unsigned = false); // Vectorization idioms. + bool VectorizeSaturationIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type type, + uint64_t restrictions); bool VectorizeHalvingAddIdiom(LoopNode* node, HInstruction* instruction, bool generate_code, @@ -197,7 +217,6 @@ class HLoopOptimization : public HOptimization { const ArrayReference* peeling_candidate); uint32_t MaxNumberPeeled(); bool IsVectorizationProfitable(int64_t trip_count); - uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count); // // Helpers. @@ -291,6 +310,9 @@ class HLoopOptimization : public HOptimization { HBasicBlock* vector_body_; // body of the new loop HInstruction* vector_index_; // normalized index of the new loop + // Helper for target-specific behaviour for loop optimizations. + ArchDefaultLoopHelper* arch_loop_helper_; + friend class LoopOptimizationTest; DISALLOW_COPY_AND_ASSIGN(HLoopOptimization); diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc index db8368986c9..c21bd65d97e 100644 --- a/compiler/optimizing/loop_optimization_test.cc +++ b/compiler/optimizing/loop_optimization_test.cc @@ -227,11 +227,14 @@ TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) { graph_->ClearDominanceInformation(); graph_->BuildDominatorTree(); + // BuildDominatorTree inserts a block beetween loop header and entry block. + EXPECT_EQ(header->GetPredecessors()[0]->GetSinglePredecessor(), entry_block_); + // Check that after optimizations in BuildDominatorTree()/SimplifyCFG() phi inputs // are still mapped correctly to the block predecessors. for (size_t i = 0, e = phi->InputCount(); i < e; i++) { HInstruction* input = phi->InputAt(i); - ASSERT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i])); + EXPECT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i])); } } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index f6ba19f22a8..f784f8f7f35 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2891,6 +2891,8 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind return os << "BootImageLinkTimePcRelative"; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: return os << "DirectAddress"; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: + return os << "BootImageRelRo"; case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: return os << "BssEntry"; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: @@ -2925,7 +2927,7 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const { } switch (GetLoadKind()) { case LoadKind::kBootImageAddress: - case LoadKind::kBootImageClassTable: + case LoadKind::kBootImageRelRo: case LoadKind::kJitTableAddress: { ScopedObjectAccess soa(Thread::Current()); return GetClass().Get() == other_load_class->GetClass().Get(); @@ -2944,8 +2946,8 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) { return os << "BootImageLinkTimePcRelative"; case HLoadClass::LoadKind::kBootImageAddress: return os << "BootImageAddress"; - case HLoadClass::LoadKind::kBootImageClassTable: - return os << "BootImageClassTable"; + case HLoadClass::LoadKind::kBootImageRelRo: + return os << "BootImageRelRo"; case HLoadClass::LoadKind::kBssEntry: return os << "BssEntry"; case HLoadClass::LoadKind::kJitTableAddress: @@ -2968,7 +2970,7 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const { } switch (GetLoadKind()) { case LoadKind::kBootImageAddress: - case LoadKind::kBootImageInternTable: + case LoadKind::kBootImageRelRo: case LoadKind::kJitTableAddress: { ScopedObjectAccess soa(Thread::Current()); return GetString().Get() == other_load_string->GetString().Get(); @@ -2984,8 +2986,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) { return os << "BootImageLinkTimePcRelative"; case HLoadString::LoadKind::kBootImageAddress: return os << "BootImageAddress"; - case HLoadString::LoadKind::kBootImageInternTable: - return os << "BootImageInternTable"; + case HLoadString::LoadKind::kBootImageRelRo: + return os << "BootImageRelRo"; case HLoadString::LoadKind::kBssEntry: return os << "BssEntry"; case HLoadString::LoadKind::kJitTableAddress: @@ -3101,6 +3103,8 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) { return os << "array_object_check"; case TypeCheckKind::kArrayCheck: return os << "array_check"; + case TypeCheckKind::kBitstringCheck: + return os << "bitstring_check"; default: LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs); UNREACHABLE(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index fe992a7f399..79d733060b3 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1338,6 +1338,7 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ M(Above, Condition) \ M(AboveOrEqual, Condition) \ + M(Abs, UnaryOperation) \ M(Add, BinaryOperation) \ M(And, BinaryOperation) \ M(ArrayGet, Instruction) \ @@ -1383,7 +1384,9 @@ class HLoopInformationOutwardIterator : public ValueObject { M(LoadException, Instruction) \ M(LoadString, Instruction) \ M(LongConstant, Constant) \ + M(Max, Instruction) \ M(MemoryBarrier, Instruction) \ + M(Min, BinaryOperation) \ M(MonitorOperation, Instruction) \ M(Mul, BinaryOperation) \ M(NativeDebugInfo, Instruction) \ @@ -1437,6 +1440,8 @@ class HLoopInformationOutwardIterator : public ValueObject { M(VecAndNot, VecBinaryOperation) \ M(VecOr, VecBinaryOperation) \ M(VecXor, VecBinaryOperation) \ + M(VecSaturationAdd, VecBinaryOperation) \ + M(VecSaturationSub, VecBinaryOperation) \ M(VecShl, VecBinaryOperation) \ M(VecShr, VecBinaryOperation) \ M(VecUShr, VecBinaryOperation) \ @@ -4428,6 +4433,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Used for app->boot calls with non-relocatable image and for JIT-compiled calls. kDirectAddress, + // Load from an entry in the .data.bimg.rel.ro using a PC-relative load. + // Used for app->boot calls with relocatable image. + kBootImageRelRo, + // Load from an entry in the .bss section using a PC-relative load. // Used for classes outside boot image when .bss is accessible with a PC-relative load. kBssEntry, @@ -4560,6 +4569,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; } bool HasPcRelativeMethodLoadKind() const { return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative || + GetMethodLoadKind() == MethodLoadKind::kBootImageRelRo || GetMethodLoadKind() == MethodLoadKind::kBssEntry; } bool HasCurrentMethodInput() const { @@ -5016,6 +5026,117 @@ class HRem FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Rem); }; +class HMin FINAL : public HBinaryOperation { + public: + HMin(DataType::Type result_type, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc) + : HBinaryOperation(kMin, result_type, left, right, SideEffects::None(), dex_pc) {} + + bool IsCommutative() const OVERRIDE { return true; } + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x, T y) { + return (x <= y) ? x : y; + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + // TODO: Evaluation for floating-point values. + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + + DECLARE_INSTRUCTION(Min); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Min); +}; + +class HMax FINAL : public HBinaryOperation { + public: + HMax(DataType::Type result_type, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc) + : HBinaryOperation(kMax, result_type, left, right, SideEffects::None(), dex_pc) {} + + bool IsCommutative() const OVERRIDE { return true; } + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x, T y) { + return (x >= y) ? x : y; + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + // TODO: Evaluation for floating-point values. + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + + DECLARE_INSTRUCTION(Max); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Max); +}; + +class HAbs FINAL : public HUnaryOperation { + public: + HAbs(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) + : HUnaryOperation(kAbs, result_type, input, dex_pc) {} + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x) { + return x < 0 ? -x : x; + } + + // Evaluation for floating-point values. + // Note, as a "quality of implementation", rather than pure "spec compliance", + // we require that Math.abs() clears the sign bit (but changes nothing else) + // for all floating-point numbers, including NaN (signaling NaN may become quiet though). + // http://b/30758343 + template <typename T, typename S> static T ComputeFP(T x) { + S bits = bit_cast<S, T>(x); + return bit_cast<T, S>(bits & std::numeric_limits<S>::max()); + } + + HConstant* Evaluate(HIntConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(ComputeIntegral(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(ComputeIntegral(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HFloatConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetFloatConstant( + ComputeFP<float, int32_t>(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HDoubleConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetDoubleConstant( + ComputeFP<double, int64_t>(x->GetValue()), GetDexPc()); + } + + DECLARE_INSTRUCTION(Abs); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Abs); +}; + class HDivZeroCheck FINAL : public HExpression<1> { public: // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException` @@ -6025,12 +6146,12 @@ class HLoadClass FINAL : public HInstruction { kBootImageLinkTimePcRelative, // Use a known boot image Class* address, embedded in the code by the codegen. - // Used for boot image classes referenced by apps in AOT- and JIT-compiled code. + // Used for boot image classes referenced by apps in JIT- and AOT-compiled code (non-PIC). kBootImageAddress, - // Use a PC-relative load from a boot image ClassTable mmapped into the .bss - // of the oat file. - kBootImageClassTable, + // Load from an entry in the .data.bimg.rel.ro using a PC-relative load. + // Used for boot image classes referenced by apps in AOT-compiled code (PIC). + kBootImageRelRo, // Load from an entry in the .bss section using a PC-relative load. // Used for classes outside boot image when .bss is accessible with a PC-relative load. @@ -6057,8 +6178,7 @@ class HLoadClass FINAL : public HInstruction { special_input_(HUserRecord<HInstruction*>(current_method)), type_index_(type_index), dex_file_(dex_file), - klass_(klass), - loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { + klass_(klass) { // Referrers class should not need access check. We never inline unverified // methods so we can't possibly end up in this situation. DCHECK(!is_referrers_class || !needs_access_check); @@ -6068,6 +6188,7 @@ class HLoadClass FINAL : public HInstruction { SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); SetPackedFlag<kFlagIsInBootImage>(false); SetPackedFlag<kFlagGenerateClInitCheck>(false); + SetPackedFlag<kFlagValidLoadedClassRTI>(false); } bool IsClonable() const OVERRIDE { return true; } @@ -6078,6 +6199,12 @@ class HLoadClass FINAL : public HInstruction { return GetPackedField<LoadKindField>(); } + bool HasPcRelativeLoadKind() const { + return GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || + GetLoadKind() == LoadKind::kBootImageRelRo || + GetLoadKind() == LoadKind::kBssEntry; + } + bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other) const; @@ -6116,13 +6243,18 @@ class HLoadClass FINAL : public HInstruction { } ReferenceTypeInfo GetLoadedClassRTI() { - return loaded_class_rti_; + if (GetPackedFlag<kFlagValidLoadedClassRTI>()) { + // Note: The is_exact flag from the return value should not be used. + return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true); + } else { + return ReferenceTypeInfo::CreateInvalid(); + } } - void SetLoadedClassRTI(ReferenceTypeInfo rti) { - // Make sure we only set exact types (the loaded class should never be merged). - DCHECK(rti.IsExact()); - loaded_class_rti_ = rti; + // Loaded class RTI is marked as valid by RTP if the klass_ is admissible. + void SetValidLoadedClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(klass_ != nullptr); + SetPackedFlag<kFlagValidLoadedClassRTI>(true); } dex::TypeIndex GetTypeIndex() const { return type_index_; } @@ -6175,14 +6307,14 @@ class HLoadClass FINAL : public HInstruction { static constexpr size_t kFieldLoadKind = kFlagGenerateClInitCheck + 1; static constexpr size_t kFieldLoadKindSize = MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast)); - static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize; + static constexpr size_t kFlagValidLoadedClassRTI = kFieldLoadKind + kFieldLoadKindSize; + static constexpr size_t kNumberOfLoadClassPackedBits = kFlagValidLoadedClassRTI + 1; static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields."); using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>; static bool HasTypeReference(LoadKind load_kind) { return load_kind == LoadKind::kReferrersClass || load_kind == LoadKind::kBootImageLinkTimePcRelative || - load_kind == LoadKind::kBootImageClassTable || load_kind == LoadKind::kBssEntry || load_kind == LoadKind::kRuntimeCall; } @@ -6203,8 +6335,6 @@ class HLoadClass FINAL : public HInstruction { const DexFile& dex_file_; Handle<mirror::Class> klass_; - - ReferenceTypeInfo loaded_class_rti_; }; std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs); @@ -6228,7 +6358,7 @@ inline void HLoadClass::AddSpecialInput(HInstruction* special_input) { // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || GetLoadKind() == LoadKind::kBootImageAddress || - GetLoadKind() == LoadKind::kBootImageClassTable || + GetLoadKind() == LoadKind::kBootImageRelRo || GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind(); DCHECK(special_input_.GetInstruction() == nullptr); special_input_ = HUserRecord<HInstruction*>(special_input); @@ -6244,12 +6374,12 @@ class HLoadString FINAL : public HInstruction { kBootImageLinkTimePcRelative, // Use a known boot image String* address, embedded in the code by the codegen. - // Used for boot image strings referenced by apps in AOT- and JIT-compiled code. + // Used for boot image strings referenced by apps in JIT- and AOT-compiled code (non-PIC). kBootImageAddress, - // Use a PC-relative load from a boot image InternTable mmapped into the .bss - // of the oat file. - kBootImageInternTable, + // Load from an entry in the .data.bimg.rel.ro using a PC-relative load. + // Used for boot image strings referenced by apps in AOT-compiled code (PIC). + kBootImageRelRo, // Load from an entry in the .bss section using a PC-relative load. // Used for strings outside boot image when .bss is accessible with a PC-relative load. @@ -6284,6 +6414,12 @@ class HLoadString FINAL : public HInstruction { return GetPackedField<LoadKindField>(); } + bool HasPcRelativeLoadKind() const { + return GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || + GetLoadKind() == LoadKind::kBootImageRelRo || + GetLoadKind() == LoadKind::kBssEntry; + } + const DexFile& GetDexFile() const { return dex_file_; } @@ -6312,7 +6448,7 @@ class HLoadString FINAL : public HInstruction { LoadKind load_kind = GetLoadKind(); if (load_kind == LoadKind::kBootImageLinkTimePcRelative || load_kind == LoadKind::kBootImageAddress || - load_kind == LoadKind::kBootImageInternTable || + load_kind == LoadKind::kBootImageRelRo || load_kind == LoadKind::kJitTableAddress) { return false; } @@ -6390,7 +6526,7 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) { // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || GetLoadKind() == LoadKind::kBootImageAddress || - GetLoadKind() == LoadKind::kBootImageInternTable || + GetLoadKind() == LoadKind::kBootImageRelRo || GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind(); // HLoadString::GetInputRecords() returns an empty array at this point, // so use the GetInputRecords() from the base class to set the input record. @@ -6750,72 +6886,159 @@ enum class TypeCheckKind { kInterfaceCheck, // No optimization yet when checking against an interface. kArrayObjectCheck, // Can just check if the array is not primitive. kArrayCheck, // No optimization yet when checking against a generic array. + kBitstringCheck, // Compare the type check bitstring. kLast = kArrayCheck }; std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs); -class HInstanceOf FINAL : public HExpression<2> { +// Note: HTypeCheckInstruction is just a helper class, not an abstract instruction with an +// `IsTypeCheckInstruction()`. (New virtual methods in the HInstruction class have a high cost.) +class HTypeCheckInstruction : public HVariableInputSizeInstruction { public: - HInstanceOf(HInstruction* object, - HLoadClass* target_class, - TypeCheckKind check_kind, - uint32_t dex_pc) - : HExpression(kInstanceOf, - DataType::Type::kBool, - SideEffectsForArchRuntimeCalls(check_kind), - dex_pc) { + HTypeCheckInstruction(InstructionKind kind, + HInstruction* object, + HInstruction* target_class_or_null, + TypeCheckKind check_kind, + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask, + SideEffects side_effects) + : HVariableInputSizeInstruction( + kind, + side_effects, + dex_pc, + allocator, + /* number_of_inputs */ check_kind == TypeCheckKind::kBitstringCheck ? 4u : 2u, + kArenaAllocTypeCheckInputs), + klass_(klass) { SetPackedField<TypeCheckKindField>(check_kind); SetPackedFlag<kFlagMustDoNullCheck>(true); + SetPackedFlag<kFlagValidTargetClassRTI>(false); SetRawInputAt(0, object); - SetRawInputAt(1, target_class); + SetRawInputAt(1, target_class_or_null); + DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_path_to_root != nullptr); + DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_mask != nullptr); + if (check_kind == TypeCheckKind::kBitstringCheck) { + DCHECK(target_class_or_null->IsNullConstant()); + SetRawInputAt(2, bitstring_path_to_root); + SetRawInputAt(3, bitstring_mask); + } else { + DCHECK(target_class_or_null->IsLoadClass()); + } } HLoadClass* GetTargetClass() const { + DCHECK_NE(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); HInstruction* load_class = InputAt(1); DCHECK(load_class->IsLoadClass()); return load_class->AsLoadClass(); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } + uint32_t GetBitstringPathToRoot() const { + DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); + HInstruction* path_to_root = InputAt(2); + DCHECK(path_to_root->IsIntConstant()); + return static_cast<uint32_t>(path_to_root->AsIntConstant()->GetValue()); + } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; + uint32_t GetBitstringMask() const { + DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); + HInstruction* mask = InputAt(3); + DCHECK(mask->IsIntConstant()); + return static_cast<uint32_t>(mask->AsIntConstant()->GetValue()); } - bool NeedsEnvironment() const OVERRIDE { - return CanCallRuntime(GetTypeCheckKind()); + bool IsClonable() const OVERRIDE { return true; } + bool CanBeMoved() const OVERRIDE { return true; } + + bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + DCHECK(other->IsInstanceOf() || other->IsCheckCast()) << other->DebugName(); + return GetPackedFields() == down_cast<const HTypeCheckInstruction*>(other)->GetPackedFields(); } - // Used only in code generation. bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); } void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); } TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); } bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; } - static bool CanCallRuntime(TypeCheckKind check_kind) { - // Mips currently does runtime calls for any other checks. - return check_kind != TypeCheckKind::kExactCheck; + ReferenceTypeInfo GetTargetClassRTI() { + if (GetPackedFlag<kFlagValidTargetClassRTI>()) { + // Note: The is_exact flag from the return value should not be used. + return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true); + } else { + return ReferenceTypeInfo::CreateInvalid(); + } } - static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) { - return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None(); + // Target class RTI is marked as valid by RTP if the klass_ is admissible. + void SetValidTargetClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(klass_ != nullptr); + SetPackedFlag<kFlagValidTargetClassRTI>(true); } - DECLARE_INSTRUCTION(InstanceOf); + Handle<mirror::Class> GetClass() const { + return klass_; + } protected: - DEFAULT_COPY_CONSTRUCTOR(InstanceOf); + DEFAULT_COPY_CONSTRUCTOR(TypeCheckInstruction); private: - static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits; + static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits; static constexpr size_t kFieldTypeCheckKindSize = MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast)); static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; - static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1; + static constexpr size_t kFlagValidTargetClassRTI = kFlagMustDoNullCheck + 1; + static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagValidTargetClassRTI + 1; static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; + + Handle<mirror::Class> klass_; +}; + +class HInstanceOf FINAL : public HTypeCheckInstruction { + public: + HInstanceOf(HInstruction* object, + HInstruction* target_class_or_null, + TypeCheckKind check_kind, + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask) + : HTypeCheckInstruction(kInstanceOf, + object, + target_class_or_null, + check_kind, + klass, + dex_pc, + allocator, + bitstring_path_to_root, + bitstring_mask, + SideEffectsForArchRuntimeCalls(check_kind)) {} + + DataType::Type GetType() const OVERRIDE { return DataType::Type::kBool; } + + bool NeedsEnvironment() const OVERRIDE { + return CanCallRuntime(GetTypeCheckKind()); + } + + static bool CanCallRuntime(TypeCheckKind check_kind) { + // Mips currently does runtime calls for any other checks. + return check_kind != TypeCheckKind::kExactCheck; + } + + static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) { + return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None(); + } + + DECLARE_INSTRUCTION(InstanceOf); + + protected: + DEFAULT_COPY_CONSTRUCTOR(InstanceOf); }; class HBoundType FINAL : public HExpression<1> { @@ -6865,31 +7088,26 @@ class HBoundType FINAL : public HExpression<1> { ReferenceTypeInfo upper_bound_; }; -class HCheckCast FINAL : public HTemplateInstruction<2> { +class HCheckCast FINAL : public HTypeCheckInstruction { public: HCheckCast(HInstruction* object, - HLoadClass* target_class, + HInstruction* target_class_or_null, TypeCheckKind check_kind, - uint32_t dex_pc) - : HTemplateInstruction(kCheckCast, SideEffects::CanTriggerGC(), dex_pc) { - SetPackedField<TypeCheckKindField>(check_kind); - SetPackedFlag<kFlagMustDoNullCheck>(true); - SetRawInputAt(0, object); - SetRawInputAt(1, target_class); - } - - HLoadClass* GetTargetClass() const { - HInstruction* load_class = InputAt(1); - DCHECK(load_class->IsLoadClass()); - return load_class->AsLoadClass(); - } - - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; - } + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask) + : HTypeCheckInstruction(kCheckCast, + object, + target_class_or_null, + check_kind, + klass, + dex_pc, + allocator, + bitstring_path_to_root, + bitstring_mask, + SideEffects::CanTriggerGC()) {} bool NeedsEnvironment() const OVERRIDE { // Instruction may throw a CheckCastError. @@ -6898,24 +7116,10 @@ class HCheckCast FINAL : public HTemplateInstruction<2> { bool CanThrow() const OVERRIDE { return true; } - bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); } - void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); } - TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); } - bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; } - DECLARE_INSTRUCTION(CheckCast); protected: DEFAULT_COPY_CONSTRUCTOR(CheckCast); - - private: - static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits; - static constexpr size_t kFieldTypeCheckKindSize = - MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast)); - static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; - static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1; - static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; }; /** diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index d1eaf5c3666..1a484e1944f 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -328,7 +328,7 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation { uint32_t dex_pc) : HVecUnaryOperation( kVecReplicateScalar, allocator, scalar, packed_type, vector_length, dex_pc) { - DCHECK(!scalar->IsVecOperation()); + DCHECK(!ReturnsSIMDValue(scalar)); } // A replicate needs to stay in place, since SIMD registers are not @@ -533,6 +533,31 @@ class HVecAdd FINAL : public HVecBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(VecAdd); }; +// Adds every component in the two vectors using saturation arithmetic, +// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 +_sat y1, .. , xn +_sat yn ] +// for either both signed or both unsigned operands x, y (reflected in packed_type). +class HVecSaturationAdd FINAL : public HVecBinaryOperation { + public: + HVecSaturationAdd(ArenaAllocator* allocator, + HInstruction* left, + HInstruction* right, + DataType::Type packed_type, + size_t vector_length, + uint32_t dex_pc) + : HVecBinaryOperation( + kVecSaturationAdd, allocator, left, right, packed_type, vector_length, dex_pc) { + DCHECK(HasConsistentPackedTypes(left, packed_type)); + DCHECK(HasConsistentPackedTypes(right, packed_type)); + } + + bool CanBeMoved() const OVERRIDE { return true; } + + DECLARE_INSTRUCTION(VecSaturationAdd); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecSaturationAdd); +}; + // Performs halving add on every component in the two vectors, viz. // rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ] // truncated [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ] @@ -598,6 +623,31 @@ class HVecSub FINAL : public HVecBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(VecSub); }; +// Subtracts every component in the two vectors using saturation arithmetic, +// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 -_sat y1, .. , xn -_sat yn ] +// for either both signed or both unsigned operands x, y (reflected in packed_type). +class HVecSaturationSub FINAL : public HVecBinaryOperation { + public: + HVecSaturationSub(ArenaAllocator* allocator, + HInstruction* left, + HInstruction* right, + DataType::Type packed_type, + size_t vector_length, + uint32_t dex_pc) + : HVecBinaryOperation( + kVecSaturationSub, allocator, left, right, packed_type, vector_length, dex_pc) { + DCHECK(HasConsistentPackedTypes(left, packed_type)); + DCHECK(HasConsistentPackedTypes(right, packed_type)); + } + + bool CanBeMoved() const OVERRIDE { return true; } + + DECLARE_INSTRUCTION(VecSaturationSub); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecSaturationSub); +}; + // Multiplies every component in the two vectors, // viz. [ x1, .. , xn ] * [ y1, .. , yn ] = [ x1 * y1, .. , xn * yn ]. class HVecMul FINAL : public HVecBinaryOperation { diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index d20b681b49a..2e189fdd141 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -105,15 +105,15 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { const std::vector<uint8_t>& expected_asm, const std::vector<uint8_t>& expected_cfi) { // Get the outputs. - const std::vector<uint8_t>& actual_asm = code_allocator_.GetMemory(); + ArrayRef<const uint8_t> actual_asm = code_allocator_.GetMemory(); Assembler* opt_asm = code_gen_->GetAssembler(); - const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data()); + ArrayRef<const uint8_t> actual_cfi(*(opt_asm->cfi().data())); if (kGenerateExpected) { GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi); } else { - EXPECT_EQ(expected_asm, actual_asm); - EXPECT_EQ(expected_cfi, actual_cfi); + EXPECT_EQ(ArrayRef<const uint8_t>(expected_asm), actual_asm); + EXPECT_EQ(ArrayRef<const uint8_t>(expected_cfi), actual_cfi); } } @@ -140,7 +140,7 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { return memory_.data(); } - const std::vector<uint8_t>& GetMemory() { return memory_; } + ArrayRef<const uint8_t> GetMemory() const OVERRIDE { return ArrayRef<const uint8_t>(memory_); } private: std::vector<uint8_t> memory_; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index e42dfc10ba5..cadefc3b015 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -75,22 +75,18 @@ static constexpr const char* kPassNameSeparator = "$"; class CodeVectorAllocator FINAL : public CodeAllocator { public: explicit CodeVectorAllocator(ArenaAllocator* allocator) - : memory_(allocator->Adapter(kArenaAllocCodeBuffer)), - size_(0) {} + : memory_(allocator->Adapter(kArenaAllocCodeBuffer)) {} virtual uint8_t* Allocate(size_t size) { - size_ = size; memory_.resize(size); return &memory_[0]; } - size_t GetSize() const { return size_; } - const ArenaVector<uint8_t>& GetMemory() const { return memory_; } + ArrayRef<const uint8_t> GetMemory() const OVERRIDE { return ArrayRef<const uint8_t>(memory_); } uint8_t* GetData() { return memory_.data(); } private: ArenaVector<uint8_t> memory_; - size_t size_; DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator); }; @@ -647,15 +643,13 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, MaybeRunInliner(graph, codegen, dex_compilation_unit, pass_observer, handles); OptimizationDef optimizations2[] = { - // SelectGenerator depends on the InstructionSimplifier removing - // redundant suspend checks to recognize empty blocks. + OptDef(OptimizationPass::kSideEffectsAnalysis, "side_effects$before_gvn"), + OptDef(OptimizationPass::kGlobalValueNumbering), OptDef(OptimizationPass::kSelectGenerator), - // TODO: if we don't inline we can also skip fold2. OptDef(OptimizationPass::kConstantFolding, "constant_folding$after_inlining"), OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$after_inlining"), OptDef(OptimizationPass::kDeadCodeElimination, "dead_code_elimination$after_inlining"), - OptDef(OptimizationPass::kSideEffectsAnalysis, "side_effects$before_gvn"), - OptDef(OptimizationPass::kGlobalValueNumbering), + OptDef(OptimizationPass::kSideEffectsAnalysis, "side_effects$before_licm"), OptDef(OptimizationPass::kInvariantCodeMotion), OptDef(OptimizationPass::kInductionVarAnalysis), OptDef(OptimizationPass::kBoundsCheckElimination), @@ -719,7 +713,7 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator, CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( GetCompilerDriver(), codegen->GetInstructionSet(), - ArrayRef<const uint8_t>(code_allocator->GetMemory()), + code_allocator->GetMemory(), // Follow Quick's behavior and set the frame size to zero if it is // considered "empty" (see the definition of // art::CodeGenerator::HasEmptyFrame). @@ -731,6 +725,16 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator, ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), ArrayRef<const linker::LinkerPatch>(linker_patches)); + CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage(); + for (const linker::LinkerPatch& patch : linker_patches) { + if (codegen->NeedsThunkCode(patch) && storage->GetThunkCode(patch).empty()) { + ArenaVector<uint8_t> code(allocator->Adapter()); + std::string debug_name; + codegen->EmitThunkCode(patch, &code, &debug_name); + storage->SetThunkCode(patch, ArrayRef<const uint8_t>(code), debug_name); + } + } + return compiled_method; } @@ -1339,7 +1343,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), code_allocator.GetMemory().data(), - code_allocator.GetSize(), + code_allocator.GetMemory().size(), data_size, osr, roots, @@ -1369,7 +1373,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.is_optimized = true; info.is_code_address_text_relative = false; info.code_address = code_address; - info.code_size = code_allocator.GetSize(); + info.code_size = code_allocator.GetMemory().size(); info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = stack_map_size == 0 ? nullptr : stack_map_data; info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()); @@ -1378,7 +1382,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); if (jit_logger != nullptr) { - jit_logger->WriteLog(code, code_allocator.GetSize(), method); + jit_logger->WriteLog(code, code_allocator.GetMemory().size(), method); } if (kArenaAllocatorCountAllocations) { diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 00194ff1fe0..9a26f2f6c40 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -99,6 +99,7 @@ enum class MethodCompilationStat { kConstructorFenceRemovedLSE, kConstructorFenceRemovedPFRA, kConstructorFenceRemovedCFRE, + kBitstringTypeCheck, kJitOutOfMemoryForCommit, kLastStat }; @@ -124,11 +125,6 @@ class OptimizingCompilerStats { } void Log() const { - if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) { - // Log only in debug builds or if the compiler is verbose. - return; - } - uint32_t compiled_intrinsics = GetStat(MethodCompilationStat::kCompiledIntrinsic); uint32_t compiled_native_stubs = GetStat(MethodCompilationStat::kCompiledNativeStub); uint32_t bytecode_attempts = diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 6dcbadba6ed..a9bc5664c09 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -20,6 +20,7 @@ #include <memory> #include <vector> +#include "base/malloc_arena_pool.h" #include "base/scoped_arena_allocator.h" #include "builder.h" #include "common_compiler_test.h" @@ -97,7 +98,7 @@ class ArenaPoolAndAllocator { ScopedArenaAllocator* GetScopedAllocator() { return &scoped_allocator_; } private: - ArenaPool pool_; + MallocArenaPool pool_; ArenaAllocator allocator_; ArenaStack arena_stack_; ScopedArenaAllocator scoped_allocator_; diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index cb87cabe1cd..be352011668 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/malloc_arena_pool.h" #include "nodes.h" #include "parallel_move_resolver.h" @@ -180,7 +181,7 @@ TYPED_TEST_CASE(ParallelMoveTest, ParallelMoveResolverTestTypes); TYPED_TEST(ParallelMoveTest, Dependency) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -207,7 +208,7 @@ TYPED_TEST(ParallelMoveTest, Dependency) { } TYPED_TEST(ParallelMoveTest, Cycle) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -257,7 +258,7 @@ TYPED_TEST(ParallelMoveTest, Cycle) { } TYPED_TEST(ParallelMoveTest, ConstantLast) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); @@ -276,7 +277,7 @@ TYPED_TEST(ParallelMoveTest, ConstantLast) { } TYPED_TEST(ParallelMoveTest, Pairs) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -453,7 +454,7 @@ TYPED_TEST(ParallelMoveTest, Pairs) { } TYPED_TEST(ParallelMoveTest, MultiCycles) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -551,7 +552,7 @@ TYPED_TEST(ParallelMoveTest, MultiCycles) { // Test that we do 64bits moves before 32bits moves. TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -610,7 +611,7 @@ TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { } TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index 9d5358514ee..01022542062 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -75,7 +75,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { switch (load_kind) { case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: // Add a base register for PC-relative literals on R2. InitializePCRelativeBasePointer(); @@ -91,7 +91,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { switch (load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: // Add a base register for PC-relative literals on R2. InitializePCRelativeBasePointer(); diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index f92f4b274ae..647336b6b93 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -81,20 +81,14 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } void VisitLoadClass(HLoadClass* load_class) OVERRIDE { - HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadClass::LoadKind::kBootImageClassTable || - load_kind == HLoadClass::LoadKind::kBssEntry) { + if (load_class->HasPcRelativeLoadKind()) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_class); load_class->AddSpecialInput(method_address); } } void VisitLoadString(HLoadString* load_string) OVERRIDE { - HLoadString::LoadKind load_kind = load_string->GetLoadKind(); - if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadString::LoadKind::kBootImageInternTable || - load_kind == HLoadString::LoadKind::kBssEntry) { + if (load_string->HasPcRelativeLoadKind()) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_string); load_string->AddSpecialInput(method_address); } @@ -238,6 +232,9 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { case Intrinsics::kMathMaxFloatFloat: case Intrinsics::kMathMinDoubleDouble: case Intrinsics::kMathMinFloatFloat: + LOG(FATAL) << "Unreachable min/max/abs: intrinsics should have been lowered " + "to IR nodes by instruction simplifier"; + UNREACHABLE(); case Intrinsics::kMathRoundFloat: if (!base_added) { DCHECK(invoke_static_or_direct != nullptr); diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index f843c008d8b..59733397bfe 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -34,6 +34,20 @@ void PrepareForRegisterAllocation::Run() { } } +void PrepareForRegisterAllocation::VisitCheckCast(HCheckCast* check_cast) { + // Record only those bitstring type checks that make it to the codegen stage. + if (check_cast->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck); + } +} + +void PrepareForRegisterAllocation::VisitInstanceOf(HInstanceOf* instance_of) { + // Record only those bitstring type checks that make it to the codegen stage. + if (instance_of->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck); + } +} + void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) { check->ReplaceWith(check->InputAt(0)); } diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index 2c64f016c17..f6e4d3ef99b 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -40,6 +40,8 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { "prepare_for_register_allocation"; private: + void VisitCheckCast(HCheckCast* check_cast) OVERRIDE; + void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE; void VisitNullCheck(HNullCheck* check) OVERRIDE; void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE; void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 67a61fc01de..4030883a57e 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -87,6 +87,7 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE; void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; void VisitLoadClass(HLoadClass* load_class) OVERRIDE; + void VisitInstanceOf(HInstanceOf* load_class) OVERRIDE; void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE; void VisitLoadString(HLoadString* instr) OVERRIDE; void VisitLoadException(HLoadException* instr) OVERRIDE; @@ -171,6 +172,12 @@ void ReferenceTypePropagation::ValidateTypes() { << "NullCheck " << instr->GetReferenceTypeInfo() << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo(); } + } else if (instr->IsInstanceOf()) { + HInstanceOf* iof = instr->AsInstanceOf(); + DCHECK(!iof->GetTargetClassRTI().IsValid() || iof->GetTargetClassRTI().IsExact()); + } else if (instr->IsCheckCast()) { + HCheckCast* check = instr->AsCheckCast(); + DCHECK(!check->GetTargetClassRTI().IsValid() || check->GetTargetClassRTI().IsExact()); } } } @@ -499,8 +506,7 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock* return; } - HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + ReferenceTypeInfo class_rti = instanceOf->GetTargetClassRTI(); if (!class_rti.IsValid()) { // He have loaded an unresolved class. Don't bother bounding the type. return; @@ -643,15 +649,20 @@ void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedStaticFieldGet( void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) { ScopedObjectAccess soa(Thread::Current()); - Handle<mirror::Class> resolved_class = instr->GetClass(); - if (IsAdmissible(resolved_class.Get())) { - instr->SetLoadedClassRTI(ReferenceTypeInfo::Create( - resolved_class, /* is_exact */ true)); + if (IsAdmissible(instr->GetClass().Get())) { + instr->SetValidLoadedClassRTI(); } instr->SetReferenceTypeInfo( ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true)); } +void ReferenceTypePropagation::RTPVisitor::VisitInstanceOf(HInstanceOf* instr) { + ScopedObjectAccess soa(Thread::Current()); + if (IsAdmissible(instr->GetClass().Get())) { + instr->SetValidTargetClassRTI(); + } +} + void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) { instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo()); } @@ -719,8 +730,6 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) { } void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) { - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); HBoundType* bound_type = check_cast->GetNext()->AsBoundType(); if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) { // The next instruction is not an uninitialized BoundType. This must be @@ -729,12 +738,14 @@ void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast } DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0)); - if (class_rti.IsValid()) { + ScopedObjectAccess soa(Thread::Current()); + Handle<mirror::Class> klass = check_cast->GetClass(); + if (IsAdmissible(klass.Get())) { DCHECK(is_first_run_); - ScopedObjectAccess soa(Thread::Current()); + check_cast->SetValidTargetClassRTI(); // This is the first run of RTP and class is resolved. - bool is_exact = class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes(); - bound_type->SetUpperBound(ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), is_exact), + bool is_exact = klass->CannotBeAssignedFromOtherTypes(); + bound_type->SetUpperBound(ReferenceTypeInfo::Create(klass, is_exact), /* CheckCast succeeds for nulls. */ true); } else { // This is the first run of RTP and class is unresolved. Remove the binding. diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index bb28d50b569..bca538fb170 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -667,7 +667,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { // HUnaryOperation (or HBinaryOperation), check in debug mode that we have // the exhaustive lists here. if (instruction->IsUnaryOperation()) { - DCHECK(instruction->IsBooleanNot() || + DCHECK(instruction->IsAbs() || + instruction->IsBooleanNot() || instruction->IsNot() || instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName(); return true; @@ -678,6 +679,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { instruction->IsCompare() || instruction->IsCondition() || instruction->IsDiv() || + instruction->IsMin() || + instruction->IsMax() || instruction->IsMul() || instruction->IsOr() || instruction->IsRem() || diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index 66e51421ca3..f9acf5aa9a9 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -16,6 +16,7 @@ #include "select_generator.h" +#include "base/scoped_arena_containers.h" #include "reference_type_propagation.h" namespace art { @@ -43,12 +44,16 @@ static bool IsSimpleBlock(HBasicBlock* block) { for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); if (instruction->IsControlFlow()) { - if (num_instructions > kMaxInstructionsInBranch) { - return false; - } return instruction->IsGoto() || instruction->IsReturn(); } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) { - num_instructions++; + if (instruction->IsSelect() && + instruction->AsSelect()->GetCondition()->GetBlock() == block) { + // Count one HCondition and HSelect in the same block as a single instruction. + // This enables finding nested selects. + continue; + } else if (++num_instructions > kMaxInstructionsInBranch) { + return false; // bail as soon as we exceed number of allowed instructions + } } else { return false; } @@ -86,9 +91,13 @@ static HPhi* GetSingleChangedPhi(HBasicBlock* block, size_t index1, size_t index } void HSelectGenerator::Run() { + // Select cache with local allocator. + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + ScopedArenaSafeMap<HInstruction*, HSelect*> cache( + std::less<HInstruction*>(), allocator.Adapter(kArenaAllocSelectGenerator)); + // Iterate in post order in the unlikely case that removing one occurrence of // the selection pattern empties a branch block of another occurrence. - // Otherwise the order does not matter. for (HBasicBlock* block : graph_->GetPostOrder()) { if (!block->EndsWithIf()) continue; @@ -97,6 +106,7 @@ void HSelectGenerator::Run() { HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); DCHECK_NE(true_block, false_block); + if (!IsSimpleBlock(true_block) || !IsSimpleBlock(false_block) || !BlocksMergeTogether(true_block, false_block)) { @@ -107,10 +117,10 @@ void HSelectGenerator::Run() { // If the branches are not empty, move instructions in front of the If. // TODO(dbrazdil): This puts an instruction between If and its condition. // Implement moving of conditions to first users if possible. - if (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { + while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { true_block->GetFirstInstruction()->MoveBefore(if_instruction); } - if (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { + while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { false_block->GetFirstInstruction()->MoveBefore(if_instruction); } DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn()); @@ -138,7 +148,8 @@ void HSelectGenerator::Run() { DCHECK(both_successors_return || phi != nullptr); // Create the Select instruction and insert it in front of the If. - HSelect* select = new (graph_->GetAllocator()) HSelect(if_instruction->InputAt(0), + HInstruction* condition = if_instruction->InputAt(0); + HSelect* select = new (graph_->GetAllocator()) HSelect(condition, true_value, false_value, if_instruction->GetDexPc()); @@ -175,6 +186,26 @@ void HSelectGenerator::Run() { MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated); + // Very simple way of finding common subexpressions in the generated HSelect statements + // (since this runs after GVN). Lookup by condition, and reuse latest one if possible + // (due to post order, latest select is most likely replacement). If needed, we could + // improve this by e.g. using the operands in the map as well. + auto it = cache.find(condition); + if (it == cache.end()) { + cache.Put(condition, select); + } else { + // Found cached value. See if latest can replace cached in the HIR. + HSelect* cached = it->second; + DCHECK_EQ(cached->GetCondition(), select->GetCondition()); + if (cached->GetTrueValue() == select->GetTrueValue() && + cached->GetFalseValue() == select->GetFalseValue() && + select->StrictlyDominates(cached)) { + cached->ReplaceWith(select); + cached->GetBlock()->RemoveInstruction(cached); + } + it->second = select; // always cache latest + } + // No need to update dominance information, as we are simplifying // a simple diamond shape, where the join block is merged with the // entry block. Any following blocks would have had the join block diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 1e49411c72d..70b45763af7 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -125,8 +125,12 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, BootImageAOTCanEmbedMethod(callee, compiler_driver)) { method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + } else if (IsInBootImage(callee)) { + // Use PC-relative access to the .data.bimg.rel.ro methods array. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo; + code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } else { - // Use PC-relative access to the .bss methods arrays. + // Use PC-relative access to the .bss methods array. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } @@ -207,7 +211,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( } else if (is_in_boot_image) { // AOT app compilation, boot image class. if (codegen->GetCompilerOptions().GetCompilePic()) { - desired_load_kind = HLoadClass::LoadKind::kBootImageClassTable; + desired_load_kind = HLoadClass::LoadKind::kBootImageRelRo; } else { desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; } @@ -236,6 +240,75 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( return load_kind; } +static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass, + CodeGenerator* codegen, + CompilerDriver* compiler_driver) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(!klass->IsProxyClass()); + DCHECK(!klass->IsArrayClass()); + + if (Runtime::Current()->UseJitCompilation()) { + // If we're JITting, try to assign a type check bitstring (fall through). + } else if (codegen->GetCompilerOptions().IsBootImage()) { + const char* descriptor = klass->GetDexFile().StringByTypeIdx(klass->GetDexTypeIndex()); + if (!compiler_driver->IsImageClass(descriptor)) { + return false; + } + // If the target is a boot image class, try to assign a type check bitstring (fall through). + // (If --force-determinism, this was already done; repeating is OK and yields the same result.) + } else { + // TODO: Use the bitstring also for AOT app compilation if the target class has a bitstring + // already assigned in the boot image. + return false; + } + + // Try to assign a type check bitstring. + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + if ((false) && // FIXME: Inliner does not respect compiler_driver->IsClassToCompile() + // and we're hitting an unassigned bitstring in dex2oat_image_test. b/26687569 + kIsDebugBuild && + codegen->GetCompilerOptions().IsBootImage() && + codegen->GetCompilerOptions().IsForceDeterminism()) { + SubtypeCheckInfo::State old_state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass); + CHECK(old_state == SubtypeCheckInfo::kAssigned || old_state == SubtypeCheckInfo::kOverflowed) + << klass->PrettyDescriptor() << "/" << old_state + << " in " << codegen->GetGraph()->PrettyMethod(); + } + SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass); + return state == SubtypeCheckInfo::kAssigned; +} + +TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + bool needs_access_check) { + if (klass == nullptr) { + return TypeCheckKind::kUnresolvedCheck; + } else if (klass->IsInterface()) { + return TypeCheckKind::kInterfaceCheck; + } else if (klass->IsArrayClass()) { + if (klass->GetComponentType()->IsObjectClass()) { + return TypeCheckKind::kArrayObjectCheck; + } else if (klass->CannotBeAssignedFromOtherTypes()) { + return TypeCheckKind::kExactCheck; + } else { + return TypeCheckKind::kArrayCheck; + } + } else if (klass->IsFinal()) { // TODO: Consider using bitstring for final classes. + return TypeCheckKind::kExactCheck; + } else if (kBitstringSubtypeCheckEnabled && + !needs_access_check && + CanUseTypeCheckBitstring(klass, codegen, compiler_driver)) { + // TODO: We should not need the `!needs_access_check` check but getting rid of that + // requires rewriting some optimizations in instruction simplifier. + return TypeCheckKind::kBitstringCheck; + } else if (klass->IsAbstract()) { + return TypeCheckKind::kAbstractClassCheck; + } else { + return TypeCheckKind::kClassHierarchyCheck; + } +} + void HSharpening::ProcessLoadString( HLoadString* load_string, CodeGenerator* codegen, @@ -288,7 +361,7 @@ void HSharpening::ProcessLoadString( string = class_linker->LookupString(string_index, dex_cache.Get()); if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { if (codegen->GetCompilerOptions().GetCompilePic()) { - desired_load_kind = HLoadString::LoadKind::kBootImageInternTable; + desired_load_kind = HLoadString::LoadKind::kBootImageRelRo; } else { desired_load_kind = HLoadString::LoadKind::kBootImageAddress; } diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 6df7d6d91ed..fa3e948eeb5 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -44,12 +44,10 @@ class HSharpening : public HOptimization { static constexpr const char* kSharpeningPassName = "sharpening"; - // Used by the builder. - static void ProcessLoadString(HLoadString* load_string, - CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const DexCompilationUnit& dex_compilation_unit, - VariableSizedHandleScope* handles); + // Used by Sharpening and InstructionSimplifier. + static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, + CodeGenerator* codegen, + CompilerDriver* compiler_driver); // Used by the builder and the inliner. static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class, @@ -58,10 +56,19 @@ class HSharpening : public HOptimization { const DexCompilationUnit& dex_compilation_unit) REQUIRES_SHARED(Locks::mutator_lock_); - // Used by Sharpening and InstructionSimplifier. - static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, - CodeGenerator* codegen, - CompilerDriver* compiler_driver); + // Used by the builder. + static TypeCheckKind ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + bool needs_access_check) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Used by the builder. + static void ProcessLoadString(HLoadString* load_string, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + const DexCompilationUnit& dex_compilation_unit, + VariableSizedHandleScope* handles); private: CodeGenerator* codegen_; diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 579aabdb5f5..268e9bd6e0c 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -19,9 +19,9 @@ #include "base/bit_vector-inl.h" #include "base/hash_map.h" +#include "base/memory_region.h" #include "base/scoped_arena_containers.h" #include "base/value_object.h" -#include "memory_region.h" #include "method_info.h" #include "nodes.h" #include "stack_map.h" diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 7e517f34850..e36c5926622 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -18,6 +18,7 @@ #include "art_method.h" #include "base/arena_bit_vector.h" +#include "base/malloc_arena_pool.h" #include "stack_map_stream.h" #include "gtest/gtest.h" @@ -46,7 +47,7 @@ static bool CheckStackMask( using Kind = DexRegisterLocation::Kind; TEST(StackMapTest, Test1) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -128,7 +129,7 @@ TEST(StackMapTest, Test1) { } TEST(StackMapTest, Test2) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -413,7 +414,7 @@ TEST(StackMapTest, Test2) { } TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -508,7 +509,7 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { } TEST(StackMapTest, TestNonLiveDexRegisters) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -588,7 +589,7 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { // StackMap::kNoDexRegisterMapSmallEncoding, and ensure we do // not treat it as kNoDexRegisterMap. TEST(StackMapTest, DexRegisterMapOffsetOverflow) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -652,7 +653,7 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { } TEST(StackMapTest, TestShareDexRegisterMap) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -711,7 +712,7 @@ TEST(StackMapTest, TestShareDexRegisterMap) { } TEST(StackMapTest, TestNoDexRegisterMap) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -761,7 +762,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) { } TEST(StackMapTest, InlineTest) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -949,7 +950,7 @@ TEST(StackMapTest, CodeOffsetTest) { } TEST(StackMapTest, TestDeduplicateStackMask) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -978,7 +979,7 @@ TEST(StackMapTest, TestDeduplicateStackMask) { } TEST(StackMapTest, TestInvokeInfo) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc index a7c23bef7e3..fad7729956b 100644 --- a/compiler/optimizing/superblock_cloner.cc +++ b/compiler/optimizing/superblock_cloner.cc @@ -70,20 +70,18 @@ static bool ArePhiInputsTheSame(const HPhi* phi) { return true; } -// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole -// graph. -static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) { - if (loop1 != nullptr || loop2 != nullptr) { - return nullptr; +// Returns whether two Edge sets are equal (ArenaHashSet doesn't have "Equal" method). +static bool EdgeHashSetsEqual(const HEdgeSet* set1, const HEdgeSet* set2) { + if (set1->Size() != set2->Size()) { + return false; } - if (loop1->IsIn(*loop2)) { - return loop2; - } else if (loop2->IsIn(*loop1)) { - return loop1; + for (auto e : *set1) { + if (set2->Find(e) == set2->end()) { + return false; + } } - HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader()); - return block->GetLoopInformation(); + return true; } // Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph. @@ -95,6 +93,21 @@ static void OrderLoopsHeadersPredecessors(HGraph* graph) { } } +// Performs DFS on the subgraph (specified by 'bb_set') starting from the specified block; while +// traversing the function removes basic blocks from the bb_set (instead of traditional DFS +// 'marking'). So what is left in the 'bb_set' after the traversal is not reachable from the start +// block. +static void TraverseSubgraphForConnectivity(HBasicBlock* block, HBasicBlockSet* bb_set) { + DCHECK(bb_set->IsBitSet(block->GetBlockId())); + bb_set->ClearBit(block->GetBlockId()); + + for (HBasicBlock* succ : block->GetSuccessors()) { + if (bb_set->IsBitSet(succ->GetBlockId())) { + TraverseSubgraphForConnectivity(succ, bb_set); + } + } +} + // // Helpers for CloneBasicBlock. // @@ -268,7 +281,6 @@ void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVect } void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) { - // TODO: DCHECK that after the transformation the graph is connected. HBasicBlock* block_entry = nullptr; if (outer_loop_ == nullptr) { @@ -424,6 +436,11 @@ void SuperblockCloner::FindAndSetLocalAreaForAdjustments() { outer_loop_ = nullptr; break; } + if (outer_loop_ == nullptr) { + // We should not use the initial outer_loop_ value 'nullptr' when finding the most outer + // common loop. + outer_loop_ = loop_exit_loop_info; + } outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info); } @@ -507,6 +524,34 @@ void SuperblockCloner::ResolveDataFlow() { // Debug and logging methods. // +// Debug function to dump graph' BasicBlocks info. +void DumpBB(HGraph* graph) { + for (HBasicBlock* bb : graph->GetBlocks()) { + if (bb == nullptr) { + continue; + } + std::cout << bb->GetBlockId(); + std::cout << " <- "; + for (HBasicBlock* pred : bb->GetPredecessors()) { + std::cout << pred->GetBlockId() << " "; + } + std::cout << " -> "; + for (HBasicBlock* succ : bb->GetSuccessors()) { + std::cout << succ->GetBlockId() << " "; + } + + if (bb->GetDominator()) { + std::cout << " dom " << bb->GetDominator()->GetBlockId(); + } + + if (bb->GetLoopInformation()) { + std::cout << "\tloop: " << bb->GetLoopInformation()->GetHeader()->GetBlockId(); + } + + std::cout << std::endl; + } +} + void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) { DCHECK(!orig_instr->IsPhi()); HInstruction* copy_instr = GetInstrCopy(orig_instr); @@ -542,6 +587,82 @@ void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) } } +bool SuperblockCloner::CheckRemappingInfoIsValid() { + for (HEdge edge : *remap_orig_internal_) { + if (!IsEdgeValid(edge, graph_) || + !IsInOrigBBSet(edge.GetFrom()) || + !IsInOrigBBSet(edge.GetTo())) { + return false; + } + } + + for (auto edge : *remap_copy_internal_) { + if (!IsEdgeValid(edge, graph_) || + !IsInOrigBBSet(edge.GetFrom()) || + !IsInOrigBBSet(edge.GetTo())) { + return false; + } + } + + for (auto edge : *remap_incoming_) { + if (!IsEdgeValid(edge, graph_) || + IsInOrigBBSet(edge.GetFrom()) || + !IsInOrigBBSet(edge.GetTo())) { + return false; + } + } + + return true; +} + +void SuperblockCloner::VerifyGraph() { + for (auto it : *hir_map_) { + HInstruction* orig_instr = it.first; + HInstruction* copy_instr = it.second; + if (!orig_instr->IsPhi() && !orig_instr->IsSuspendCheck()) { + DCHECK(it.first->GetBlock() != nullptr); + } + if (!copy_instr->IsPhi() && !copy_instr->IsSuspendCheck()) { + DCHECK(it.second->GetBlock() != nullptr); + } + } + + GraphChecker checker(graph_); + checker.Run(); + if (!checker.IsValid()) { + for (const std::string& error : checker.GetErrors()) { + std::cout << error << std::endl; + } + LOG(FATAL) << "GraphChecker failed: superblock cloner\n"; + } +} + +void DumpBBSet(const ArenaBitVector* set) { + for (uint32_t idx : set->Indexes()) { + std::cout << idx << "\n"; + } +} + +void SuperblockCloner::DumpInputSets() { + std::cout << graph_->PrettyMethod() << "\n"; + std::cout << "orig_bb_set:\n"; + for (uint32_t idx : orig_bb_set_.Indexes()) { + std::cout << idx << "\n"; + } + std::cout << "remap_orig_internal:\n"; + for (HEdge e : *remap_orig_internal_) { + std::cout << e << "\n"; + } + std::cout << "remap_copy_internal:\n"; + for (auto e : *remap_copy_internal_) { + std::cout << e << "\n"; + } + std::cout << "remap_incoming:\n"; + for (auto e : *remap_incoming_) { + std::cout << e << "\n"; + } +} + // // Public methods. // @@ -569,6 +690,7 @@ void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_inte remap_orig_internal_ = remap_orig_internal; remap_copy_internal_ = remap_copy_internal; remap_incoming_ = remap_incoming; + DCHECK(CheckRemappingInfoIsValid()); } bool SuperblockCloner::IsSubgraphClonable() const { @@ -602,6 +724,63 @@ bool SuperblockCloner::IsSubgraphClonable() const { return true; } +bool SuperblockCloner::IsFastCase() const { + // Check that loop unrolling/loop peeling is being conducted. + // Check that all the basic blocks belong to the same loop. + bool flag = false; + HLoopInformation* common_loop_info = nullptr; + for (uint32_t idx : orig_bb_set_.Indexes()) { + HBasicBlock* block = GetBlockById(idx); + HLoopInformation* block_loop_info = block->GetLoopInformation(); + if (!flag) { + common_loop_info = block_loop_info; + } else { + if (block_loop_info != common_loop_info) { + return false; + } + } + } + + // Check that orig_bb_set_ corresponds to loop peeling/unrolling. + if (common_loop_info == nullptr || !orig_bb_set_.SameBitsSet(&common_loop_info->GetBlocks())) { + return false; + } + + bool peeling_or_unrolling = false; + HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + + // Check whether remapping info corresponds to loop unrolling. + CollectRemappingInfoForPeelUnroll(/* to_unroll*/ true, + common_loop_info, + &remap_orig_internal, + &remap_copy_internal, + &remap_incoming); + + peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) && + EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) && + EdgeHashSetsEqual(&remap_incoming, remap_incoming_); + + remap_orig_internal.Clear(); + remap_copy_internal.Clear(); + remap_incoming.Clear(); + + // Check whether remapping info corresponds to loop peeling. + CollectRemappingInfoForPeelUnroll(/* to_unroll*/ false, + common_loop_info, + &remap_orig_internal, + &remap_copy_internal, + &remap_incoming); + + peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) && + EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) && + EdgeHashSetsEqual(&remap_incoming, remap_incoming_); + + return peeling_or_unrolling; +} + void SuperblockCloner::Run() { DCHECK(bb_map_ != nullptr); DCHECK(hir_map_ != nullptr); @@ -609,6 +788,11 @@ void SuperblockCloner::Run() { remap_copy_internal_ != nullptr && remap_incoming_ != nullptr); DCHECK(IsSubgraphClonable()); + DCHECK(IsFastCase()); + + if (kSuperblockClonerLogging) { + DumpInputSets(); + } // Find an area in the graph for which control flow information should be adjusted. FindAndSetLocalAreaForAdjustments(); @@ -618,6 +802,19 @@ void SuperblockCloner::Run() { // Connect the blocks together/remap successors and fix phis which are directly affected my the // remapping. RemapEdgesSuccessors(); + + // Check that the subgraph is connected. + if (kIsDebugBuild) { + HBasicBlockSet work_set(arena_, orig_bb_set_.GetSizeOf(), true, kArenaAllocSuperblockCloner); + + // Add original and copy blocks of the subgraph to the work set. + for (auto iter : *bb_map_) { + work_set.SetBit(iter.first->GetBlockId()); // Original block. + work_set.SetBit(iter.second->GetBlockId()); // Copy block. + } + CHECK(IsSubgraphConnected(&work_set, graph_)); + } + // Recalculate dominance and backedge information which is required by the next stage. AdjustControlFlowInfo(); // Fix data flow of the graph. @@ -650,6 +847,10 @@ void SuperblockCloner::CleanUp() { } } } + + if (kIsDebugBuild) { + VerifyGraph(); + } } HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) { @@ -701,4 +902,127 @@ void SuperblockCloner::CloneBasicBlocks() { } } +// +// Stand-alone methods. +// + +void CollectRemappingInfoForPeelUnroll(bool to_unroll, + HLoopInformation* loop_info, + HEdgeSet* remap_orig_internal, + HEdgeSet* remap_copy_internal, + HEdgeSet* remap_incoming) { + DCHECK(loop_info != nullptr); + HBasicBlock* loop_header = loop_info->GetHeader(); + // Set up remap_orig_internal edges set - set is empty. + // Set up remap_copy_internal edges set. + for (HBasicBlock* back_edge_block : loop_info->GetBackEdges()) { + HEdge e = HEdge(back_edge_block, loop_header); + if (to_unroll) { + remap_orig_internal->Insert(e); + remap_copy_internal->Insert(e); + } else { + remap_copy_internal->Insert(e); + } + } + + // Set up remap_incoming edges set. + if (!to_unroll) { + remap_incoming->Insert(HEdge(loop_info->GetPreHeader(), loop_header)); + } +} + +bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph) { + ArenaVector<HBasicBlock*> entry_blocks( + graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + // Find subgraph entry blocks. + for (uint32_t orig_block_id : work_set->Indexes()) { + HBasicBlock* block = graph->GetBlocks()[orig_block_id]; + for (HBasicBlock* pred : block->GetPredecessors()) { + if (!work_set->IsBitSet(pred->GetBlockId())) { + entry_blocks.push_back(block); + break; + } + } + } + + for (HBasicBlock* entry_block : entry_blocks) { + if (work_set->IsBitSet(entry_block->GetBlockId())) { + TraverseSubgraphForConnectivity(entry_block, work_set); + } + } + + // Return whether there are unvisited - unreachable - blocks. + return work_set->NumSetBits() == 0; +} + +HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) { + if (loop1 == nullptr || loop2 == nullptr) { + return nullptr; + } + + if (loop1->IsIn(*loop2)) { + return loop2; + } + + HLoopInformation* current = loop1; + while (current != nullptr && !loop2->IsIn(*current)) { + current = current->GetPreHeader()->GetLoopInformation(); + } + + return current; +} + +bool PeelUnrollHelper::IsLoopClonable(HLoopInformation* loop_info) { + PeelUnrollHelper helper(loop_info, nullptr, nullptr); + return helper.IsLoopClonable(); +} + +HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) { + // For now do peeling only for natural loops. + DCHECK(!loop_info_->IsIrreducible()); + + HBasicBlock* loop_header = loop_info_->GetHeader(); + // Check that loop info is up-to-date. + DCHECK(loop_info_ == loop_header->GetLoopInformation()); + + HGraph* graph = loop_header->GetGraph(); + ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool()); + + HEdgeSet remap_orig_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_copy_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_incoming(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + CollectRemappingInfoForPeelUnroll(to_unroll, + loop_info_, + &remap_orig_internal, + &remap_copy_internal, + &remap_incoming); + + cloner_.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming); + cloner_.Run(); + cloner_.CleanUp(); + + // Check that loop info is preserved. + DCHECK(loop_info_ == loop_header->GetLoopInformation()); + + return loop_header; +} + +PeelUnrollSimpleHelper::PeelUnrollSimpleHelper(HLoopInformation* info) + : bb_map_(std::less<HBasicBlock*>(), + info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)), + hir_map_(std::less<HInstruction*>(), + info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)), + helper_(info, &bb_map_, &hir_map_) {} + } // namespace art + +namespace std { + +ostream& operator<<(ostream& os, const art::HEdge& e) { + e.Dump(os); + return os; +} + +} // namespace std diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h index 23de6926735..e0931674cb3 100644 --- a/compiler/optimizing/superblock_cloner.h +++ b/compiler/optimizing/superblock_cloner.h @@ -25,7 +25,6 @@ namespace art { static const bool kSuperblockClonerLogging = false; -static const bool kSuperblockClonerVerify = false; // Represents an edge between two HBasicBlocks. // @@ -152,6 +151,15 @@ class SuperblockCloner : public ValueObject { // TODO: Start from small range of graph patterns then extend it. bool IsSubgraphClonable() const; + // Returns whether selected subgraph satisfies the criteria for fast data flow resolution + // when iterative DF algorithm is not required and dominators/instructions inputs can be + // trivially adjusted. + // + // TODO: formally describe the criteria. + // + // Loop peeling and unrolling satisfy the criteria. + bool IsFastCase() const; + // Runs the copy algorithm according to the description. void Run(); @@ -202,11 +210,17 @@ class SuperblockCloner : public ValueObject { return IsInOrigBBSet(block->GetBlockId()); } + // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops, + // dominators) needs to be adjusted. + HLoopInformation* GetRegionToBeAdjusted() const { + return outer_loop_; + } + private: // Fills the 'exits' vector with the subgraph exits. void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits); - // Finds and records information about the area in the graph for which control-flow (back edges, + // Finds and records information about the area in the graph for which control flow (back edges, // loops, dominators) needs to be adjusted. void FindAndSetLocalAreaForAdjustments(); @@ -217,7 +231,7 @@ class SuperblockCloner : public ValueObject { // phis' nor instructions' inputs values are resolved. void RemapEdgesSuccessors(); - // Adjusts control-flow (back edges, loops, dominators) for the local area defined by + // Adjusts control flow (back edges, loops, dominators) for the local area defined by // FindAndSetLocalAreaForAdjustments. void AdjustControlFlowInfo(); @@ -272,6 +286,9 @@ class SuperblockCloner : public ValueObject { // Debug and logging methods. // void CheckInstructionInputsRemapping(HInstruction* orig_instr); + bool CheckRemappingInfoIsValid(); + void VerifyGraph(); + void DumpInputSets(); HBasicBlock* GetBlockById(uint32_t block_id) const { DCHECK(block_id < graph_->GetBlocks().size()); @@ -295,15 +312,97 @@ class SuperblockCloner : public ValueObject { HBasicBlockMap* bb_map_; // Correspondence map for instructions: (original HInstruction, copy HInstruction). HInstructionMap* hir_map_; - // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted. + // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted. HLoopInformation* outer_loop_; HBasicBlockSet outer_loop_bb_set_; ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo); + ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected); DISALLOW_COPY_AND_ASSIGN(SuperblockCloner); }; +// Helper class to perform loop peeling/unrolling. +// +// This helper should be used when correspondence map between original and copied +// basic blocks/instructions are demanded. +class PeelUnrollHelper : public ValueObject { + public: + explicit PeelUnrollHelper(HLoopInformation* info, + SuperblockCloner::HBasicBlockMap* bb_map, + SuperblockCloner::HInstructionMap* hir_map) : + loop_info_(info), + cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map) { + // For now do peeling/unrolling only for natural loops. + DCHECK(!info->IsIrreducible()); + } + + // Returns whether the loop can be peeled/unrolled (static function). + static bool IsLoopClonable(HLoopInformation* loop_info); + + // Returns whether the loop can be peeled/unrolled. + bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); } + + HBasicBlock* DoPeeling() { return DoPeelUnrollImpl(/* to_unroll */ false); } + HBasicBlock* DoUnrolling() { return DoPeelUnrollImpl(/* to_unroll */ true); } + HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); } + + protected: + // Applies loop peeling/unrolling for the loop specified by 'loop_info'. + // + // Depending on 'do_unroll' either unrolls loop by 2 or peels one iteration from it. + HBasicBlock* DoPeelUnrollImpl(bool to_unroll); + + private: + HLoopInformation* loop_info_; + SuperblockCloner cloner_; + + DISALLOW_COPY_AND_ASSIGN(PeelUnrollHelper); +}; + +// Helper class to perform loop peeling/unrolling. +// +// This helper should be used when there is no need to get correspondence information between +// original and copied basic blocks/instructions. +class PeelUnrollSimpleHelper : public ValueObject { + public: + explicit PeelUnrollSimpleHelper(HLoopInformation* info); + bool IsLoopClonable() const { return helper_.IsLoopClonable(); } + HBasicBlock* DoPeeling() { return helper_.DoPeeling(); } + HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); } + HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); } + + const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; } + const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; } + + private: + SuperblockCloner::HBasicBlockMap bb_map_; + SuperblockCloner::HInstructionMap hir_map_; + PeelUnrollHelper helper_; + + DISALLOW_COPY_AND_ASSIGN(PeelUnrollSimpleHelper); +}; + +// Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info. +void CollectRemappingInfoForPeelUnroll(bool to_unroll, + HLoopInformation* loop_info, + SuperblockCloner::HEdgeSet* remap_orig_internal, + SuperblockCloner::HEdgeSet* remap_copy_internal, + SuperblockCloner::HEdgeSet* remap_incoming); + +// Returns whether blocks from 'work_set' are reachable from the rest of the graph. +// +// Returns whether such a set 'outer_entries' of basic blocks exists that: +// - each block from 'outer_entries' is not from 'work_set'. +// - each block from 'work_set' is reachable from at least one block from 'outer_entries'. +// +// After the function returns work_set contains only blocks from the original 'work_set' +// which are unreachable from the rest of the graph. +bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph); + +// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole +// graph. +HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2); } // namespace art namespace std { @@ -312,11 +411,12 @@ template <> struct hash<art::HEdge> { size_t operator()(art::HEdge const& x) const noexcept { // Use Cantor pairing function as the hash function. - uint32_t a = x.GetFrom(); - uint32_t b = x.GetTo(); + size_t a = x.GetFrom(); + size_t b = x.GetTo(); return (a + b) * (a + b + 1) / 2 + b; } }; +ostream& operator<<(ostream& os, const art::HEdge& e); } // namespace std diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc index f1b7bffdf5f..df2e517afff 100644 --- a/compiler/optimizing/superblock_cloner_test.cc +++ b/compiler/optimizing/superblock_cloner_test.cc @@ -25,52 +25,65 @@ namespace art { using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; using HInstructionMap = SuperblockCloner::HInstructionMap; +using HBasicBlockSet = SuperblockCloner::HBasicBlockSet; +using HEdgeSet = SuperblockCloner::HEdgeSet; // This class provides methods and helpers for testing various cloning and copying routines: // individual instruction cloning and cloning of the more coarse-grain structures. class SuperblockClonerTest : public OptimizingUnitTest { public: - SuperblockClonerTest() - : graph_(CreateGraph()), entry_block_(nullptr), exit_block_(nullptr), parameter_(nullptr) {} + SuperblockClonerTest() : graph_(CreateGraph()), + entry_block_(nullptr), + return_block_(nullptr), + exit_block_(nullptr), + parameter_(nullptr) {} - void CreateBasicLoopControlFlow(/* out */ HBasicBlock** header_p, - /* out */ HBasicBlock** body_p) { + void InitGraph() { entry_block_ = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry_block_); graph_->SetEntryBlock(entry_block_); + return_block_ = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(return_block_); + + exit_block_ = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(exit_block_); + graph_->SetExitBlock(exit_block_); + + entry_block_->AddSuccessor(return_block_); + return_block_->AddSuccessor(exit_block_); + + parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kInt32); + entry_block_->AddInstruction(parameter_); + return_block_->AddInstruction(new (GetAllocator()) HReturnVoid()); + exit_block_->AddInstruction(new (GetAllocator()) HExit()); + } + + void CreateBasicLoopControlFlow(HBasicBlock* position, + HBasicBlock* successor, + /* out */ HBasicBlock** header_p, + /* out */ HBasicBlock** body_p) { HBasicBlock* loop_preheader = new (GetAllocator()) HBasicBlock(graph_); HBasicBlock* loop_header = new (GetAllocator()) HBasicBlock(graph_); HBasicBlock* loop_body = new (GetAllocator()) HBasicBlock(graph_); - HBasicBlock* loop_exit = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(loop_preheader); graph_->AddBlock(loop_header); graph_->AddBlock(loop_body); - graph_->AddBlock(loop_exit); - exit_block_ = new (GetAllocator()) HBasicBlock(graph_); - graph_->AddBlock(exit_block_); - graph_->SetExitBlock(exit_block_); + position->ReplaceSuccessor(successor, loop_preheader); - entry_block_->AddSuccessor(loop_preheader); loop_preheader->AddSuccessor(loop_header); // Loop exit first to have a proper exit condition/target for HIf. - loop_header->AddSuccessor(loop_exit); + loop_header->AddSuccessor(successor); loop_header->AddSuccessor(loop_body); loop_body->AddSuccessor(loop_header); - loop_exit->AddSuccessor(exit_block_); *header_p = loop_header; *body_p = loop_body; - - parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(0), - 0, - DataType::Type::kInt32); - entry_block_->AddInstruction(parameter_); - loop_exit->AddInstruction(new (GetAllocator()) HReturnVoid()); - exit_block_->AddInstruction(new (GetAllocator()) HExit()); } void CreateBasicLoopDataFlow(HBasicBlock* loop_header, HBasicBlock* loop_body) { @@ -84,11 +97,12 @@ class SuperblockClonerTest : public OptimizingUnitTest { // Header block. HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); HInstruction* suspend_check = new (GetAllocator()) HSuspendCheck(); + HInstruction* loop_check = new (GetAllocator()) HGreaterThanOrEqual(phi, const_128); loop_header->AddPhi(phi); loop_header->AddInstruction(suspend_check); - loop_header->AddInstruction(new (GetAllocator()) HGreaterThanOrEqual(phi, const_128)); - loop_header->AddInstruction(new (GetAllocator()) HIf(parameter_)); + loop_header->AddInstruction(loop_check); + loop_header->AddInstruction(new (GetAllocator()) HIf(loop_check)); // Loop body block. HInstruction* null_check = new (GetAllocator()) HNullCheck(parameter_, dex_pc); @@ -97,8 +111,8 @@ class SuperblockClonerTest : public OptimizingUnitTest { HInstruction* array_get = new (GetAllocator()) HArrayGet(null_check, bounds_check, DataType::Type::kInt32, dex_pc); HInstruction* add = new (GetAllocator()) HAdd(DataType::Type::kInt32, array_get, const_1); - HInstruction* array_set = - new (GetAllocator()) HArraySet(null_check, bounds_check, add, DataType::Type::kInt32, dex_pc); + HInstruction* array_set = new (GetAllocator()) HArraySet( + null_check, bounds_check, add, DataType::Type::kInt32, dex_pc); HInstruction* induction_inc = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi, const_1); loop_body->AddInstruction(null_check); @@ -153,6 +167,7 @@ class SuperblockClonerTest : public OptimizingUnitTest { HGraph* graph_; HBasicBlock* entry_block_; + HBasicBlock* return_block_; HBasicBlock* exit_block_; HInstruction* parameter_; @@ -162,10 +177,11 @@ TEST_F(SuperblockClonerTest, IndividualInstrCloner) { HBasicBlock* header = nullptr; HBasicBlock* loop_body = nullptr; - CreateBasicLoopControlFlow(&header, &loop_body); + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); CreateBasicLoopDataFlow(header, loop_body); graph_->BuildDominatorTree(); - ASSERT_TRUE(CheckGraph()); + EXPECT_TRUE(CheckGraph()); HSuspendCheck* old_suspend_check = header->GetLoopInformation()->GetSuspendCheck(); CloneAndReplaceInstructionVisitor visitor(graph_); @@ -193,7 +209,8 @@ TEST_F(SuperblockClonerTest, CloneBasicBlocks) { HBasicBlock* loop_body = nullptr; ArenaAllocator* arena = graph_->GetAllocator(); - CreateBasicLoopControlFlow(&header, &loop_body); + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); CreateBasicLoopDataFlow(header, loop_body); graph_->BuildDominatorTree(); ASSERT_TRUE(CheckGraph()); @@ -272,7 +289,8 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) { HBasicBlock* loop_body = nullptr; ArenaAllocator* arena = graph_->GetAllocator(); - CreateBasicLoopControlFlow(&header, &loop_body); + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); CreateBasicLoopDataFlow(header, loop_body); graph_->BuildDominatorTree(); ASSERT_TRUE(CheckGraph()); @@ -303,4 +321,487 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) { EXPECT_TRUE(loop_info->IsBackEdge(*loop_body)); } +// Tests IsSubgraphConnected function for negative case. +TEST_F(SuperblockClonerTest, IsGraphConnected) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + ArenaAllocator* arena = graph_->GetAllocator(); + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* unreachable_block = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(unreachable_block); + + HBasicBlockSet bb_set( + arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + bb_set.SetBit(header->GetBlockId()); + bb_set.SetBit(loop_body->GetBlockId()); + bb_set.SetBit(unreachable_block->GetBlockId()); + + EXPECT_FALSE(IsSubgraphConnected(&bb_set, graph_)); + EXPECT_EQ(bb_set.NumSetBits(), 1u); + EXPECT_TRUE(bb_set.IsBitSet(unreachable_block->GetBlockId())); +} + +// Tests SuperblockCloner for loop peeling case. +// +// Control Flow of the example (ignoring critical edges splitting). +// +// Before After +// +// |B| |B| +// | | +// v v +// |1| |1| +// | | +// v v +// |2|<-\ (6) |2A| +// / \ / / \ +// v v/ / v +// |4| |3| / |3A| (7) +// | / / +// v | v +// |E| \ |2|<-\ +// \ / \ / +// v v / +// |4| |3| +// | +// v +// |E| +TEST_F(SuperblockClonerTest, LoopPeeling) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HBasicBlockMap bb_map( + std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HInstructionMap hir_map( + std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + HLoopInformation* loop_info = header->GetLoopInformation(); + PeelUnrollHelper helper(loop_info, &bb_map, &hir_map); + EXPECT_TRUE(helper.IsLoopClonable()); + HBasicBlock* new_header = helper.DoPeeling(); + HLoopInformation* new_loop_info = new_header->GetLoopInformation(); + + EXPECT_TRUE(CheckGraph()); + + // Check loop body successors. + EXPECT_EQ(loop_body->GetSingleSuccessor(), header); + EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header); + + // Check loop structure. + EXPECT_EQ(header, new_header); + EXPECT_EQ(new_loop_info->GetHeader(), header); + EXPECT_EQ(new_loop_info->GetBackEdges().size(), 1u); + EXPECT_EQ(new_loop_info->GetBackEdges()[0], loop_body); +} + +// Tests SuperblockCloner for loop unrolling case. +// +// Control Flow of the example (ignoring critical edges splitting). +// +// Before After +// +// |B| |B| +// | | +// v v +// |1| |1| +// | | +// v v +// |2|<-\ (6) |2A|<-\ +// / \ / / \ \ +// v v/ / v \ +// |4| |3| /(7)|3A| | +// | / / / +// v | v / +// |E| \ |2| / +// \ / \ / +// v v/ +// |4| |3| +// | +// v +// |E| +TEST_F(SuperblockClonerTest, LoopUnrolling) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HBasicBlockMap bb_map( + std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HInstructionMap hir_map( + std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + HLoopInformation* loop_info = header->GetLoopInformation(); + PeelUnrollHelper helper(loop_info, &bb_map, &hir_map); + EXPECT_TRUE(helper.IsLoopClonable()); + HBasicBlock* new_header = helper.DoUnrolling(); + + EXPECT_TRUE(CheckGraph()); + + // Check loop body successors. + EXPECT_EQ(loop_body->GetSingleSuccessor(), bb_map.Get(header)); + EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header); + + // Check loop structure. + EXPECT_EQ(header, new_header); + EXPECT_EQ(loop_info, new_header->GetLoopInformation()); + EXPECT_EQ(loop_info->GetHeader(), new_header); + EXPECT_EQ(loop_info->GetBackEdges().size(), 1u); + EXPECT_EQ(loop_info->GetBackEdges()[0], bb_map.Get(loop_body)); +} + +// Checks that loop unrolling works fine for a loop with multiple back edges. Tests that after +// the transformation the loop has a single preheader. +TEST_F(SuperblockClonerTest, LoopPeelingMultipleBackEdges) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + + // Transform a basic loop to have multiple back edges. + HBasicBlock* latch = header->GetSuccessors()[1]; + HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* temp1 = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(if_block); + graph_->AddBlock(temp1); + header->ReplaceSuccessor(latch, if_block); + if_block->AddSuccessor(latch); + if_block->AddSuccessor(temp1); + temp1->AddSuccessor(header); + + if_block->AddInstruction(new (GetAllocator()) HIf(parameter_)); + + HInstructionIterator it(header->GetPhis()); + DCHECK(!it.Done()); + HPhi* loop_phi = it.Current()->AsPhi(); + HInstruction* temp_add = new (GetAllocator()) HAdd(DataType::Type::kInt32, + loop_phi, + graph_->GetIntConstant(2)); + temp1->AddInstruction(temp_add); + temp1->AddInstruction(new (GetAllocator()) HGoto()); + loop_phi->AddInput(temp_add); + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HLoopInformation* loop_info = header->GetLoopInformation(); + PeelUnrollSimpleHelper helper(loop_info); + HBasicBlock* new_header = helper.DoPeeling(); + EXPECT_EQ(header, new_header); + + EXPECT_TRUE(CheckGraph()); + EXPECT_EQ(header->GetPredecessors().size(), 3u); +} + +static void CheckLoopStructureForLoopPeelingNested(HBasicBlock* loop1_header, + HBasicBlock* loop2_header, + HBasicBlock* loop3_header) { + EXPECT_EQ(loop1_header->GetLoopInformation()->GetHeader(), loop1_header); + EXPECT_EQ(loop2_header->GetLoopInformation()->GetHeader(), loop2_header); + EXPECT_EQ(loop3_header->GetLoopInformation()->GetHeader(), loop3_header); + EXPECT_EQ(loop1_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr); + EXPECT_EQ(loop2_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr); + EXPECT_EQ(loop3_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation()->GetHeader(), + loop2_header); +} + +TEST_F(SuperblockClonerTest, LoopPeelingNested) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + + // Create the following nested structure of loops + // Headers: 1 2 3 + // [ ], [ [ ] ] + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop1_header = header; + + CreateBasicLoopControlFlow(header, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop2_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop3_header = header; + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HLoopInformation* loop2_info_before = loop2_header->GetLoopInformation(); + HLoopInformation* loop3_info_before = loop3_header->GetLoopInformation(); + + // Check nested loops structure. + CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header); + PeelUnrollSimpleHelper helper(loop1_header->GetLoopInformation()); + helper.DoPeeling(); + // Check that nested loops structure has not changed after the transformation. + CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header); + + // Test that the loop info is preserved. + EXPECT_EQ(loop2_info_before, loop2_header->GetLoopInformation()); + EXPECT_EQ(loop3_info_before, loop3_header->GetLoopInformation()); + + EXPECT_EQ(loop3_info_before->GetPreHeader()->GetLoopInformation(), loop2_info_before); + EXPECT_EQ(loop2_info_before->GetPreHeader()->GetLoopInformation(), nullptr); + + EXPECT_EQ(helper.GetRegionToBeAdjusted(), nullptr); + + EXPECT_TRUE(CheckGraph()); +} + +// Checks that the loop population is correctly propagated after an inner loop is peeled. +TEST_F(SuperblockClonerTest, OuterLoopPopulationAfterInnerPeeled) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + + // Create the following nested structure of loops + // Headers: 1 2 3 4 + // [ [ [ ] ] ], [ ] + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop1_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop2_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop3_header = header; + + CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop4_header = header; + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation()); + helper.DoPeeling(); + HLoopInformation* loop1 = loop1_header->GetLoopInformation(); + HLoopInformation* loop2 = loop2_header->GetLoopInformation(); + HLoopInformation* loop3 = loop3_header->GetLoopInformation(); + HLoopInformation* loop4 = loop4_header->GetLoopInformation(); + + EXPECT_TRUE(loop1->Contains(*loop2_header)); + EXPECT_TRUE(loop1->Contains(*loop3_header)); + EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader())); + + // Check that loop4 info has not been touched after local run of AnalyzeLoops. + EXPECT_EQ(loop4, loop4_header->GetLoopInformation()); + + EXPECT_TRUE(loop1->IsIn(*loop1)); + EXPECT_TRUE(loop2->IsIn(*loop1)); + EXPECT_TRUE(loop3->IsIn(*loop1)); + EXPECT_TRUE(loop3->IsIn(*loop2)); + EXPECT_TRUE(!loop4->IsIn(*loop1)); + + EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), nullptr); + + EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop2); + + EXPECT_TRUE(CheckGraph()); +} + +// Checks the case when inner loop have an exit not to its immediate outer_loop but some other loop +// in the hierarchy. Loop population information must be valid after loop peeling. +TEST_F(SuperblockClonerTest, NestedCaseExitToOutermost) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + + // Create the following nested structure of loops then peel loop3. + // Headers: 1 2 3 + // [ [ [ ] ] ] + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop1_header = header; + HBasicBlock* loop_body1 = loop_body; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop3_header = header; + HBasicBlock* loop_body3 = loop_body; + + // Change the loop3 - insert an exit which leads to loop1. + HBasicBlock* loop3_extra_if_block = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(loop3_extra_if_block); + loop3_extra_if_block->AddInstruction(new (GetAllocator()) HIf(parameter_)); + + loop3_header->ReplaceSuccessor(loop_body3, loop3_extra_if_block); + loop3_extra_if_block->AddSuccessor(loop_body1); // Long exit. + loop3_extra_if_block->AddSuccessor(loop_body3); + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HBasicBlock* loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0]; + EXPECT_TRUE(loop1_header->GetLoopInformation()->Contains(*loop3_long_exit)); + + PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation()); + helper.DoPeeling(); + + HLoopInformation* loop1 = loop1_header->GetLoopInformation(); + // Check that after the transformation the local area for CF adjustments has been chosen + // correctly and loop population has been updated. + loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0]; + EXPECT_TRUE(loop1->Contains(*loop3_long_exit)); + + EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop1); + + EXPECT_TRUE(loop1->Contains(*loop3_header)); + EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader())); + + EXPECT_TRUE(CheckGraph()); +} + +TEST_F(SuperblockClonerTest, FastCaseCheck) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + ArenaAllocator* arena = graph_->GetAllocator(); + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + + HLoopInformation* loop_info = header->GetLoopInformation(); + + ArenaBitVector orig_bb_set( + arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + orig_bb_set.Union(&loop_info->GetBlocks()); + + HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + CollectRemappingInfoForPeelUnroll(true, + loop_info, + &remap_orig_internal, + &remap_copy_internal, + &remap_incoming); + + // Insert some extra nodes and edges. + HBasicBlock* preheader = loop_info->GetPreHeader(); + orig_bb_set.SetBit(preheader->GetBlockId()); + + // Adjust incoming edges. + remap_incoming.Clear(); + remap_incoming.Insert(HEdge(preheader->GetSinglePredecessor(), preheader)); + + HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner)); + HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner)); + + SuperblockCloner cloner(graph_, + &orig_bb_set, + &bb_map, + &hir_map); + cloner.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming); + + EXPECT_FALSE(cloner.IsFastCase()); +} + +// Helper for FindCommonLoop which also check that FindCommonLoop is symmetric. +static HLoopInformation* FindCommonLoopCheck(HLoopInformation* loop1, HLoopInformation* loop2) { + HLoopInformation* common_loop12 = FindCommonLoop(loop1, loop2); + HLoopInformation* common_loop21 = FindCommonLoop(loop2, loop1); + EXPECT_EQ(common_loop21, common_loop12); + return common_loop12; +} + +// Tests FindCommonLoop function on a loop nest. +TEST_F(SuperblockClonerTest, FindCommonLoop) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + + // Create the following nested structure of loops + // Headers: 1 2 3 4 5 + // [ [ [ ] ], [ ] ], [ ] + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop1_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop2_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop3_header = header; + + CreateBasicLoopControlFlow(loop2_header, loop2_header->GetSuccessors()[0], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop4_header = header; + + CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop5_header = header; + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HLoopInformation* loop1 = loop1_header->GetLoopInformation(); + HLoopInformation* loop2 = loop2_header->GetLoopInformation(); + HLoopInformation* loop3 = loop3_header->GetLoopInformation(); + HLoopInformation* loop4 = loop4_header->GetLoopInformation(); + HLoopInformation* loop5 = loop5_header->GetLoopInformation(); + + EXPECT_TRUE(loop1->IsIn(*loop1)); + EXPECT_TRUE(loop2->IsIn(*loop1)); + EXPECT_TRUE(loop3->IsIn(*loop1)); + EXPECT_TRUE(loop3->IsIn(*loop2)); + EXPECT_TRUE(loop4->IsIn(*loop1)); + + EXPECT_FALSE(loop5->IsIn(*loop1)); + EXPECT_FALSE(loop4->IsIn(*loop2)); + EXPECT_FALSE(loop4->IsIn(*loop3)); + + EXPECT_EQ(loop1->GetPreHeader()->GetLoopInformation(), nullptr); + EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), loop1); + + EXPECT_EQ(FindCommonLoopCheck(nullptr, nullptr), nullptr); + EXPECT_EQ(FindCommonLoopCheck(loop2, nullptr), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop1, loop1), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop1, loop2), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop1, loop3), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop1, loop4), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop1, loop5), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop2, loop3), loop2); + EXPECT_EQ(FindCommonLoopCheck(loop2, loop4), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop2, loop5), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop3, loop4), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop3, loop5), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop4, loop5), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop5, loop5), loop5); +} + } // namespace art diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc index 921d4018492..57360e74a33 100644 --- a/compiler/trampolines/trampoline_compiler.cc +++ b/compiler/trampolines/trampoline_compiler.cc @@ -17,6 +17,7 @@ #include "trampoline_compiler.h" #include "base/arena_allocator.h" +#include "base/malloc_arena_pool.h" #include "jni_env_ext.h" #ifdef ART_ENABLE_CODEGEN_arm @@ -243,7 +244,7 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocat std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet isa, EntryPointCallingConvention abi, ThreadOffset64 offset) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); switch (isa) { #ifdef ART_ENABLE_CODEGEN_arm64 @@ -269,7 +270,7 @@ std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet is std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet isa, EntryPointCallingConvention abi, ThreadOffset32 offset) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); switch (isa) { #ifdef ART_ENABLE_CODEGEN_arm diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc index 944c64b5918..421c1b60895 100644 --- a/compiler/utils/assembler.cc +++ b/compiler/utils/assembler.cc @@ -20,8 +20,8 @@ #include <vector> #include "base/casts.h" +#include "base/memory_region.h" #include "globals.h" -#include "memory_region.h" namespace art { diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 5b0cd6baa8d..379a6396eb6 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -29,10 +29,10 @@ #include "base/array_ref.h" #include "base/enums.h" #include "base/macros.h" +#include "base/memory_region.h" #include "debug/dwarf/debug_frame_opcode_writer.h" #include "label.h" #include "managed_register.h" -#include "memory_region.h" #include "mips/constants_mips.h" #include "offsets.h" #include "x86/constants_x86.h" diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index 0cb8bbb2d54..7c800b355fe 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -26,6 +26,7 @@ #include <fstream> #include <iterator> +#include "base/malloc_arena_pool.h" #include "assembler_test_base.h" #include "common_runtime_test.h" // For ScratchFile @@ -1606,7 +1607,7 @@ class AssemblerTest : public testing::Test { static constexpr size_t kWarnManyCombinationsThreshold = 500; - ArenaPool pool_; + MallocArenaPool pool_; std::unique_ptr<ArenaAllocator> allocator_; std::unique_ptr<Ass> assembler_; std::unique_ptr<AssemblerTestInfrastructure> test_helper_; diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 655d17d4fbf..053e202523a 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -27,6 +27,7 @@ #include "utils/arm/jni_macro_assembler_arm_vixl.h" #include "base/hex_dump.h" +#include "base/malloc_arena_pool.h" #include "common_runtime_test.h" namespace art { @@ -169,7 +170,7 @@ class ArmVIXLAssemblerTest : public ::testing::Test { public: ArmVIXLAssemblerTest() : pool(), allocator(&pool), assembler(&allocator) { } - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator; ArmVIXLJNIMacroAssembler assembler; }; diff --git a/compiler/utils/atomic_dex_ref_map-inl.h b/compiler/utils/atomic_dex_ref_map-inl.h index ce3302bb628..9915498acca 100644 --- a/compiler/utils/atomic_dex_ref_map-inl.h +++ b/compiler/utils/atomic_dex_ref_map-inl.h @@ -70,7 +70,7 @@ inline bool AtomicDexRefMap<DexFileReferenceType, Value>::Get(const DexFileRefer if (array == nullptr) { return false; } - *out = (*array)[ref.index].LoadRelaxed(); + *out = (*array)[ref.index].load(std::memory_order_relaxed); return true; } @@ -81,7 +81,7 @@ inline bool AtomicDexRefMap<DexFileReferenceType, Value>::Remove(const DexFileRe if (array == nullptr) { return false; } - *out = (*array)[ref.index].ExchangeSequentiallyConsistent(nullptr); + *out = (*array)[ref.index].exchange(nullptr, std::memory_order_seq_cst); return true; } @@ -120,7 +120,7 @@ inline void AtomicDexRefMap<DexFileReferenceType, Value>::Visit(const Visitor& v const DexFile* dex_file = pair.first; const ElementArray& elements = pair.second; for (size_t i = 0; i < elements.size(); ++i) { - visitor(DexFileReference(dex_file, i), elements[i].LoadRelaxed()); + visitor(DexFileReference(dex_file, i), elements[i].load(std::memory_order_relaxed)); } } } @@ -129,7 +129,7 @@ template <typename DexFileReferenceType, typename Value> inline void AtomicDexRefMap<DexFileReferenceType, Value>::ClearEntries() { for (auto& it : arrays_) { for (auto& element : it.second) { - element.StoreRelaxed(nullptr); + element.store(nullptr, std::memory_order_relaxed); } } } diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc index 3f7691b6a86..0c34aa4f1dc 100644 --- a/compiler/utils/jni_macro_assembler.cc +++ b/compiler/utils/jni_macro_assembler.cc @@ -38,8 +38,8 @@ #include "x86_64/jni_macro_assembler_x86_64.h" #endif #include "base/casts.h" +#include "base/memory_region.h" #include "globals.h" -#include "memory_region.h" namespace art { diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h index 1aefc84c78c..b70c18b3e2f 100644 --- a/compiler/utils/jni_macro_assembler_test.h +++ b/compiler/utils/jni_macro_assembler_test.h @@ -20,6 +20,7 @@ #include "jni_macro_assembler.h" #include "assembler_test_base.h" +#include "base/malloc_arena_pool.h" #include "common_runtime_test.h" // For ScratchFile #include <sys/stat.h> @@ -139,7 +140,7 @@ class JNIMacroAssemblerTest : public testing::Test { test_helper_->Driver(*data, assembly_text, test_name); } - ArenaPool pool_; + MallocArenaPool pool_; std::unique_ptr<ArenaAllocator> allocator_; std::unique_ptr<Ass> assembler_; std::unique_ptr<AssemblerTestInfrastructure> test_helper_; diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index 2218ef9af29..dce5b95fec3 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -18,9 +18,9 @@ #include "base/bit_utils.h" #include "base/casts.h" +#include "base/memory_region.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" -#include "memory_region.h" #include "thread.h" namespace art { @@ -2793,6 +2793,26 @@ void MipsAssembler::Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister DsFsmInstr(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x15)).FprOuts(wd).FprIns(ws, wt); } +void MipsAssembler::PcntB(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + DsFsmInstr(EmitMsa2R(0xc1, 0x0, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws); +} + +void MipsAssembler::PcntH(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + DsFsmInstr(EmitMsa2R(0xc1, 0x1, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws); +} + +void MipsAssembler::PcntW(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + DsFsmInstr(EmitMsa2R(0xc1, 0x2, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws); +} + +void MipsAssembler::PcntD(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + DsFsmInstr(EmitMsa2R(0xc1, 0x3, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws); +} + void MipsAssembler::ReplicateFPToVectorRegister(VectorRegister dst, FRegister src, bool is_double) { diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 7de8e2e3665..c6ce62b4f4a 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -756,6 +756,11 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); void Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void PcntB(VectorRegister wd, VectorRegister ws); + void PcntH(VectorRegister wd, VectorRegister ws); + void PcntW(VectorRegister wd, VectorRegister ws); + void PcntD(VectorRegister wd, VectorRegister ws); + // Helper for replicating floating point value in all destination elements. void ReplicateFPToVectorRegister(VectorRegister dst, FRegister src, bool is_double); diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc index 937ee25bcb1..691c33f3e7a 100644 --- a/compiler/utils/mips/assembler_mips32r6_test.cc +++ b/compiler/utils/mips/assembler_mips32r6_test.cc @@ -2277,6 +2277,22 @@ TEST_F(AssemblerMIPS32r6Test, FillW) { DriverStr(RepeatVR(&mips::MipsAssembler::FillW, "fill.w ${reg1}, ${reg2}"), "fill.w"); } +TEST_F(AssemblerMIPS32r6Test, PcntB) { + DriverStr(RepeatVV(&mips::MipsAssembler::PcntB, "pcnt.b ${reg1}, ${reg2}"), "pcnt.b"); +} + +TEST_F(AssemblerMIPS32r6Test, PcntH) { + DriverStr(RepeatVV(&mips::MipsAssembler::PcntH, "pcnt.h ${reg1}, ${reg2}"), "pcnt.h"); +} + +TEST_F(AssemblerMIPS32r6Test, PcntW) { + DriverStr(RepeatVV(&mips::MipsAssembler::PcntW, "pcnt.w ${reg1}, ${reg2}"), "pcnt.w"); +} + +TEST_F(AssemblerMIPS32r6Test, PcntD) { + DriverStr(RepeatVV(&mips::MipsAssembler::PcntD, "pcnt.d ${reg1}, ${reg2}"), "pcnt.d"); +} + TEST_F(AssemblerMIPS32r6Test, LdiB) { DriverStr(RepeatVIb(&mips::MipsAssembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b"); } diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index e1b0e75108b..bb1bb82fa5d 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -18,9 +18,9 @@ #include "base/bit_utils.h" #include "base/casts.h" +#include "base/memory_region.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" -#include "memory_region.h" #include "thread.h" namespace art { @@ -2279,6 +2279,26 @@ void Mips64Assembler::Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegist EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x15); } +void Mips64Assembler::PcntB(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2R(0xc1, 0x0, ws, wd, 0x1e); +} + +void Mips64Assembler::PcntH(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2R(0xc1, 0x1, ws, wd, 0x1e); +} + +void Mips64Assembler::PcntW(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2R(0xc1, 0x2, ws, wd, 0x1e); +} + +void Mips64Assembler::PcntD(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2R(0xc1, 0x3, ws, wd, 0x1e); +} + void Mips64Assembler::ReplicateFPToVectorRegister(VectorRegister dst, FpuRegister src, bool is_double) { diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 7a61f39e64a..542dbafc87d 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -863,6 +863,11 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); void Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void PcntB(VectorRegister wd, VectorRegister ws); + void PcntH(VectorRegister wd, VectorRegister ws); + void PcntW(VectorRegister wd, VectorRegister ws); + void PcntD(VectorRegister wd, VectorRegister ws); + // Helper for replicating floating point value in all destination elements. void ReplicateFPToVectorRegister(VectorRegister dst, FpuRegister src, bool is_double); diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index b0e1d91c3f8..fb5f12be936 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -3529,6 +3529,22 @@ TEST_F(AssemblerMIPS64Test, FillD) { DriverStr(RepeatVR(&mips64::Mips64Assembler::FillD, "fill.d ${reg1}, ${reg2}"), "fill.d"); } +TEST_F(AssemblerMIPS64Test, PcntB) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntB, "pcnt.b ${reg1}, ${reg2}"), "pcnt.b"); +} + +TEST_F(AssemblerMIPS64Test, PcntH) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntH, "pcnt.h ${reg1}, ${reg2}"), "pcnt.h"); +} + +TEST_F(AssemblerMIPS64Test, PcntW) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntW, "pcnt.w ${reg1}, ${reg2}"), "pcnt.w"); +} + +TEST_F(AssemblerMIPS64Test, PcntD) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntD, "pcnt.d ${reg1}, ${reg2}"), "pcnt.d"); +} + TEST_F(AssemblerMIPS64Test, LdiB) { DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b"); } diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index ea160c8993c..86f9010ea32 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -17,8 +17,8 @@ #include "assembler_x86.h" #include "base/casts.h" +#include "base/memory_region.h" #include "entrypoints/quick/quick_entrypoints.h" -#include "memory_region.h" #include "thread.h" namespace art { @@ -913,6 +913,78 @@ void X86Assembler::psubq(XmmRegister dst, XmmRegister src) { } +void X86Assembler::paddusb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xDC); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::paddsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xEC); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::paddusw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xDD); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::paddsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xED); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubusb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xD8); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xE8); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubusw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xD9); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xE9); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::cvtsi2ss(XmmRegister dst, Register src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index a0856770836..22eaedce612 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -449,6 +449,15 @@ class X86Assembler FINAL : public Assembler { void paddq(XmmRegister dst, XmmRegister src); void psubq(XmmRegister dst, XmmRegister src); + void paddusb(XmmRegister dst, XmmRegister src); + void paddsb(XmmRegister dst, XmmRegister src); + void paddusw(XmmRegister dst, XmmRegister src); + void paddsw(XmmRegister dst, XmmRegister src); + void psubusb(XmmRegister dst, XmmRegister src); + void psubsb(XmmRegister dst, XmmRegister src); + void psubusw(XmmRegister dst, XmmRegister src); + void psubsw(XmmRegister dst, XmmRegister src); + void cvtsi2ss(XmmRegister dst, Register src); void cvtsi2sd(XmmRegister dst, Register src); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 2fd1b271828..cd007b32d41 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -17,13 +17,14 @@ #include "assembler_x86.h" #include "base/arena_allocator.h" +#include "base/malloc_arena_pool.h" #include "base/stl_util.h" #include "utils/assembler_test.h" namespace art { TEST(AssemblerX86, CreateBuffer) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); AssemblerBuffer buffer(&allocator); AssemblerBuffer::EnsureCapacity ensured(&buffer); @@ -600,6 +601,38 @@ TEST_F(AssemblerX86Test, PSubQ) { DriverStr(RepeatFF(&x86::X86Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq"); } +TEST_F(AssemblerX86Test, PAddUSB) { + DriverStr(RepeatFF(&x86::X86Assembler::paddusb, "paddusb %{reg2}, %{reg1}"), "paddusb"); +} + +TEST_F(AssemblerX86Test, PAddSB) { + DriverStr(RepeatFF(&x86::X86Assembler::paddsb, "paddsb %{reg2}, %{reg1}"), "paddsb"); +} + +TEST_F(AssemblerX86Test, PAddUSW) { + DriverStr(RepeatFF(&x86::X86Assembler::paddusw, "paddusw %{reg2}, %{reg1}"), "paddusw"); +} + +TEST_F(AssemblerX86Test, PAddSW) { + DriverStr(RepeatFF(&x86::X86Assembler::psubsw, "psubsw %{reg2}, %{reg1}"), "psubsw"); +} + +TEST_F(AssemblerX86Test, PSubUSB) { + DriverStr(RepeatFF(&x86::X86Assembler::psubusb, "psubusb %{reg2}, %{reg1}"), "psubusb"); +} + +TEST_F(AssemblerX86Test, PSubSB) { + DriverStr(RepeatFF(&x86::X86Assembler::psubsb, "psubsb %{reg2}, %{reg1}"), "psubsb"); +} + +TEST_F(AssemblerX86Test, PSubUSW) { + DriverStr(RepeatFF(&x86::X86Assembler::psubusw, "psubusw %{reg2}, %{reg1}"), "psubusw"); +} + +TEST_F(AssemblerX86Test, PSubSW) { + DriverStr(RepeatFF(&x86::X86Assembler::psubsw, "psubsw %{reg2}, %{reg1}"), "psubsw"); +} + TEST_F(AssemblerX86Test, XorPD) { DriverStr(RepeatFF(&x86::X86Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd"); } diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index ff5a357c5e3..bd31561937d 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -17,8 +17,8 @@ #include "assembler_x86_64.h" #include "base/casts.h" +#include "base/memory_region.h" #include "entrypoints/quick/quick_entrypoints.h" -#include "memory_region.h" #include "thread.h" namespace art { @@ -1011,6 +1011,86 @@ void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) { } +void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xDC); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xEC); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xDD); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xED); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xD8); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xE8); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xD9); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xE9); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) { cvtsi2ss(dst, src, false); } diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 7a5fdb502f6..ab761fb1fc1 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -485,6 +485,15 @@ class X86_64Assembler FINAL : public Assembler { void paddq(XmmRegister dst, XmmRegister src); void psubq(XmmRegister dst, XmmRegister src); + void paddusb(XmmRegister dst, XmmRegister src); + void paddsb(XmmRegister dst, XmmRegister src); + void paddusw(XmmRegister dst, XmmRegister src); + void paddsw(XmmRegister dst, XmmRegister src); + void psubusb(XmmRegister dst, XmmRegister src); + void psubsb(XmmRegister dst, XmmRegister src); + void psubusw(XmmRegister dst, XmmRegister src); + void psubsw(XmmRegister dst, XmmRegister src); + void cvtsi2ss(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version. void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit); void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 6b1e53c35ab..0589df55d23 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -21,6 +21,7 @@ #include <random> #include "base/bit_utils.h" +#include "base/malloc_arena_pool.h" #include "base/stl_util.h" #include "jni_macro_assembler_x86_64.h" #include "utils/assembler_test.h" @@ -29,7 +30,7 @@ namespace art { TEST(AssemblerX86_64, CreateBuffer) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); AssemblerBuffer buffer(&allocator); AssemblerBuffer::EnsureCapacity ensured(&buffer); @@ -1282,6 +1283,38 @@ TEST_F(AssemblerX86_64Test, Psubq) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq"); } +TEST_F(AssemblerX86_64Test, Paddusb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddusb, "paddusb %{reg2}, %{reg1}"), "paddusb"); +} + +TEST_F(AssemblerX86_64Test, Paddsb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddsb, "paddsb %{reg2}, %{reg1}"), "paddsb"); +} + +TEST_F(AssemblerX86_64Test, Paddusw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddusw, "paddusw %{reg2}, %{reg1}"), "paddusw"); +} + +TEST_F(AssemblerX86_64Test, Paddsw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddsw, "paddsw %{reg2}, %{reg1}"), "paddsw"); +} + +TEST_F(AssemblerX86_64Test, Psubusb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubusb, "psubusb %{reg2}, %{reg1}"), "psubusb"); +} + +TEST_F(AssemblerX86_64Test, Psubsb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubsb, "psubsb %{reg2}, %{reg1}"), "psubsb"); +} + +TEST_F(AssemblerX86_64Test, Psubusw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubusw, "psubusw %{reg2}, %{reg1}"), "psubusw"); +} + +TEST_F(AssemblerX86_64Test, Psubsw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubsw, "psubsw %{reg2}, %{reg1}"), "psubsw"); +} + TEST_F(AssemblerX86_64Test, Cvtsi2ss) { DriverStr(RepeatFr(&x86_64::X86_64Assembler::cvtsi2ss, "cvtsi2ss %{reg2}, %{reg1}"), "cvtsi2ss"); } diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc index 5766f9d44b9..9486cb44c5b 100644 --- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc +++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc @@ -17,8 +17,8 @@ #include "jni_macro_assembler_x86_64.h" #include "base/casts.h" +#include "base/memory_region.h" #include "entrypoints/quick/quick_entrypoints.h" -#include "memory_region.h" #include "thread.h" namespace art { diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc index 76448d819c2..553d131e2f5 100644 --- a/compiler/verifier_deps_test.cc +++ b/compiler/verifier_deps_test.cc @@ -18,6 +18,7 @@ #include "verifier/verifier_deps.h" #include "art_method-inl.h" +#include "base/indenter.h" #include "class_linker.h" #include "common_compiler_test.h" #include "compiler_callbacks.h" @@ -28,7 +29,6 @@ #include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" #include "handle_scope-inl.h" -#include "indenter.h" #include "mirror/class_loader.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" |
