132 files changed, 7539 insertions, 10283 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index e42261c5561..cde64b058cb 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -39,7 +39,6 @@ art_cc_defaults {
         "linker/file_output_stream.cc",
         "linker/output_stream.cc",
         "linker/vector_output_stream.cc",
-        "linker/relative_patcher.cc",
         "jit/jit_compiler.cc",
         "jit/jit_logger.cc",
         "jni/quick/calling_convention.cc",
@@ -70,6 +69,7 @@ art_cc_defaults {
         "optimizing/load_store_analysis.cc",
         "optimizing/load_store_elimination.cc",
         "optimizing/locations.cc",
+        "optimizing/loop_analysis.cc",
         "optimizing/loop_optimization.cc",
         "optimizing/nodes.cc",
         "optimizing/optimization.cc",
@@ -101,8 +101,6 @@ art_cc_defaults {
         arm: {
             srcs: [
                 "jni/quick/arm/calling_convention_arm.cc",
-                "linker/arm/relative_patcher_arm_base.cc",
-                "linker/arm/relative_patcher_thumb2.cc",
                 "optimizing/code_generator_arm_vixl.cc",
                 "optimizing/code_generator_vector_arm_vixl.cc",
                 "optimizing/instruction_simplifier_arm.cc",
@@ -119,7 +117,6 @@ art_cc_defaults {
         arm64: {
             srcs: [
                 "jni/quick/arm64/calling_convention_arm64.cc",
-                "linker/arm64/relative_patcher_arm64.cc",
                 "optimizing/code_generator_arm64.cc",
                 "optimizing/code_generator_vector_arm64.cc",
                 "optimizing/scheduler_arm64.cc",
@@ -133,7 +130,6 @@ art_cc_defaults {
         mips: {
             srcs: [
                 "jni/quick/mips/calling_convention_mips.cc",
-                "linker/mips/relative_patcher_mips.cc",
                 "optimizing/code_generator_mips.cc",
                 "optimizing/code_generator_vector_mips.cc",
                 "optimizing/instruction_simplifier_mips.cc",
@@ -146,7 +142,6 @@ art_cc_defaults {
         mips64: {
             srcs: [
                 "jni/quick/mips64/calling_convention_mips64.cc",
-                "linker/mips64/relative_patcher_mips64.cc",
                 "optimizing/code_generator_mips64.cc",
                 "optimizing/code_generator_vector_mips64.cc",
                 "optimizing/intrinsics_mips64.cc",
@@ -157,8 +152,6 @@ art_cc_defaults {
         x86: {
             srcs: [
                 "jni/quick/x86/calling_convention_x86.cc",
-                "linker/x86/relative_patcher_x86.cc",
-                "linker/x86/relative_patcher_x86_base.cc",
                 "optimizing/code_generator_x86.cc",
                 "optimizing/code_generator_vector_x86.cc",
                 "optimizing/intrinsics_x86.cc",
@@ -172,7 +165,6 @@ art_cc_defaults {
         x86_64: {
             srcs: [
                 "jni/quick/x86_64/calling_convention_x86_64.cc",
-                "linker/x86_64/relative_patcher_x86_64.cc",
                 "optimizing/intrinsics_x86_64.cc",
                 "optimizing/code_generator_x86_64.cc",
                 "optimizing/code_generator_vector_x86_64.cc",
@@ -372,31 +364,25 @@ art_cc_test {
     codegen: {
         arm: {
             srcs: [
-                "linker/arm/relative_patcher_thumb2_test.cc",
                 "utils/arm/managed_register_arm_test.cc",
             ],
         },
         arm64: {
             srcs: [
-                "linker/arm64/relative_patcher_arm64_test.cc",
                 "utils/arm64/managed_register_arm64_test.cc",
             ],
         },
         mips: {
             srcs: [
-                "linker/mips/relative_patcher_mips_test.cc",
-                "linker/mips/relative_patcher_mips32r6_test.cc",
             ],
         },
         mips64: {
             srcs: [
-                "linker/mips64/relative_patcher_mips64_test.cc",
                 "utils/mips64/managed_register_mips64_test.cc",
             ],
         },
         x86: {
             srcs: [
-                "linker/x86/relative_patcher_x86_test.cc",
                 "utils/x86/managed_register_x86_test.cc",
 
                 // These tests are testing architecture-independent
@@ -412,7 +398,8 @@ art_cc_test {
         },
         x86_64: {
             srcs: [
-                "linker/x86_64/relative_patcher_x86_64_test.cc",
+                // Is this test a bit-rotten copy of the x86 test? b/77951326
+                // "utils/x86_64/managed_register_x86_64_test.cc",
             ],
         },
     },
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
index 29ff235cea7..581edaa773c 100644
--- a/compiler/cfi_test.h
+++ b/compiler/cfi_test.h
@@ -37,8 +37,8 @@ constexpr dwarf::CFIFormat kCFIFormat = dwarf::DW_DEBUG_FRAME_FORMAT;
 class CFITest : public dwarf::DwarfTest {
  public:
   void GenerateExpected(FILE* f, InstructionSet isa, const char* isa_str,
-                        const std::vector<uint8_t>& actual_asm,
-                        const std::vector<uint8_t>& actual_cfi) {
+                        ArrayRef<const uint8_t> actual_asm,
+                        ArrayRef<const uint8_t> actual_cfi) {
     std::vector<std::string> lines;
     // Print the raw bytes.
     fprintf(f, "static constexpr uint8_t expected_asm_%s[] = {", isa_str);
@@ -50,11 +50,18 @@ class CFITest : public dwarf::DwarfTest {
     // Pretty-print CFI opcodes.
     constexpr bool is64bit = false;
     dwarf::DebugFrameOpCodeWriter<> initial_opcodes;
-    dwarf::WriteCIE(is64bit, dwarf::Reg(8),
-                    initial_opcodes, kCFIFormat, &debug_frame_data_);
+    dwarf::WriteCIE(is64bit, dwarf::Reg(8), initial_opcodes, kCFIFormat, &debug_frame_data_);
     std::vector<uintptr_t> debug_frame_patches;
-    dwarf::WriteFDE(is64bit, 0, 0, 0, actual_asm.size(), ArrayRef<const uint8_t>(actual_cfi),
-                    kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
+    dwarf::WriteFDE(is64bit,
+                    /* section_address */ 0,
+                    /* cie_address */ 0,
+                    /* code_address */ 0,
+                    actual_asm.size(),
+                    actual_cfi,
+                    kCFIFormat,
+                    /* buffer_address */ 0,
+                    &debug_frame_data_,
+                    &debug_frame_patches);
     ReformatCfi(Objdump(false, "-W"), &lines);
     // Pretty-print assembly.
     const uint8_t* asm_base = actual_asm.data();
@@ -142,7 +149,7 @@ class CFITest : public dwarf::DwarfTest {
   }
 
   // Pretty-print byte array.  12 bytes per line.
-  static void HexDump(FILE* f, const std::vector<uint8_t>& data) {
+  static void HexDump(FILE* f, ArrayRef<const uint8_t> data) {
     for (size_t i = 0; i < data.size(); i++) {
       fprintf(f, i % 12 == 0 ? "\n    " : " ");  // Whitespace.
       fprintf(f, "0x%02X,", data[i]);
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index d3e3a51f7a5..96a0c1be4db 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -138,20 +138,6 @@ std::unordered_set<std::string>* CommonCompilerTest::GetImageClasses() {
   return new std::unordered_set<std::string>();
 }
 
-// Get the set of compiled classes given to the compiler-driver in SetUp. Note: the compiler
-// driver assumes ownership of the set, so the test should properly release the set.
-std::unordered_set<std::string>* CommonCompilerTest::GetCompiledClasses() {
-  // Null, no selection of compiled-classes.
-  return nullptr;
-}
-
-// Get the set of compiled methods given to the compiler-driver in SetUp. Note: the compiler
-// driver assumes ownership of the set, so the test should properly release the set.
-std::unordered_set<std::string>* CommonCompilerTest::GetCompiledMethods() {
-  // Null, no selection of compiled-methods.
-  return nullptr;
-}
-
 // Get ProfileCompilationInfo that should be passed to the driver.
 ProfileCompilationInfo* CommonCompilerTest::GetProfileCompilationInfo() {
   // Null, profile information will not be taken into account.
@@ -190,8 +176,6 @@ void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind,
                                             isa,
                                             instruction_set_features_.get(),
                                             GetImageClasses(),
-                                            GetCompiledClasses(),
-                                            GetCompiledMethods(),
                                             number_of_threads,
                                             /* swap_fd */ -1,
                                             GetProfileCompilationInfo()));
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 8af29d44f0c..39c8bd817bb 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -67,14 +67,6 @@ class CommonCompilerTest : public CommonRuntimeTest {
   // driver assumes ownership of the set, so the test should properly release the set.
   virtual std::unordered_set<std::string>* GetImageClasses();
 
-  // Get the set of compiled classes given to the compiler-driver in SetUp. Note: the compiler
-  // driver assumes ownership of the set, so the test should properly release the set.
-  virtual std::unordered_set<std::string>* GetCompiledClasses();
-
-  // Get the set of compiled methods given to the compiler-driver in SetUp. Note: the compiler
-  // driver assumes ownership of the set, so the test should properly release the set.
-  virtual std::unordered_set<std::string>* GetCompiledMethods();
-
   virtual ProfileCompilationInfo* GetProfileCompilationInfo();
 
   virtual CompilerFilter::Filter GetCompilerFilter() const {
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index a26a985ff9b..aa8277edb4d 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -161,6 +161,46 @@ class CompiledMethodStorage::LengthPrefixedArrayAlloc {
   SwapSpace* const swap_space_;
 };
 
+class CompiledMethodStorage::ThunkMapKey {
+ public:
+  ThunkMapKey(linker::LinkerPatch::Type type, uint32_t custom_value1, uint32_t custom_value2)
+      : type_(type), custom_value1_(custom_value1), custom_value2_(custom_value2) {}
+
+  bool operator<(const ThunkMapKey& other) const {
+    if (custom_value1_ != other.custom_value1_) {
+      return custom_value1_ < other.custom_value1_;
+    }
+    if (custom_value2_ != other.custom_value2_) {
+      return custom_value2_ < other.custom_value2_;
+    }
+    return type_ < other.type_;
+  }
+
+ private:
+  linker::LinkerPatch::Type type_;
+  uint32_t custom_value1_;
+  uint32_t custom_value2_;
+};
+
+class CompiledMethodStorage::ThunkMapValue {
+ public:
+  ThunkMapValue(std::vector<uint8_t, SwapAllocator<uint8_t>>&& code,
+                const std::string& debug_name)
+      : code_(std::move(code)), debug_name_(debug_name) {}
+
+  ArrayRef<const uint8_t> GetCode() const {
+    return ArrayRef<const uint8_t>(code_);
+  }
+
+  const std::string& GetDebugName() const {
+    return debug_name_;
+  }
+
+ private:
+  std::vector<uint8_t, SwapAllocator<uint8_t>> code_;
+  std::string debug_name_;
+};
+
 CompiledMethodStorage::CompiledMethodStorage(int swap_fd)
     : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)),
       dedupe_enabled_(true),
@@ -171,7 +211,9 @@ CompiledMethodStorage::CompiledMethodStorage(int swap_fd)
                          LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
       dedupe_cfi_info_("dedupe cfi info", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
       dedupe_linker_patches_("dedupe cfi info",
-                             LengthPrefixedArrayAlloc<linker::LinkerPatch>(swap_space_.get())) {
+                             LengthPrefixedArrayAlloc<linker::LinkerPatch>(swap_space_.get())),
+      thunk_map_lock_("thunk_map_lock"),
+      thunk_map_(std::less<ThunkMapKey>(), SwapAllocator<ThunkMapValueType>(swap_space_.get())) {
 }
 
 CompiledMethodStorage::~CompiledMethodStorage() {
@@ -237,4 +279,55 @@ void CompiledMethodStorage::ReleaseLinkerPatches(
   ReleaseArrayIfNotDeduplicated(linker_patches);
 }
 
+CompiledMethodStorage::ThunkMapKey CompiledMethodStorage::GetThunkMapKey(
+    const linker::LinkerPatch& linker_patch) {
+  uint32_t custom_value1 = 0u;
+  uint32_t custom_value2 = 0u;
+  switch (linker_patch.GetType()) {
+    case linker::LinkerPatch::Type::kBakerReadBarrierBranch:
+      custom_value1 = linker_patch.GetBakerCustomValue1();
+      custom_value2 = linker_patch.GetBakerCustomValue2();
+      break;
+    case linker::LinkerPatch::Type::kCallRelative:
+      // No custom values.
+      break;
+    default:
+      LOG(FATAL) << "Unexpected patch type: " << linker_patch.GetType();
+      UNREACHABLE();
+  }
+  return ThunkMapKey(linker_patch.GetType(), custom_value1, custom_value2);
+}
+
+ArrayRef<const uint8_t> CompiledMethodStorage::GetThunkCode(const linker::LinkerPatch& linker_patch,
+                                                            /*out*/ std::string* debug_name) {
+  ThunkMapKey key = GetThunkMapKey(linker_patch);
+  MutexLock lock(Thread::Current(), thunk_map_lock_);
+  auto it = thunk_map_.find(key);
+  if (it != thunk_map_.end()) {
+    const ThunkMapValue& value = it->second;
+    if (debug_name != nullptr) {
+      *debug_name = value.GetDebugName();
+    }
+    return value.GetCode();
+  } else {
+    if (debug_name != nullptr) {
+      *debug_name = std::string();
+    }
+    return ArrayRef<const uint8_t>();
+  }
+}
+
+void CompiledMethodStorage::SetThunkCode(const linker::LinkerPatch& linker_patch,
+                                         ArrayRef<const uint8_t> code,
+                                         const std::string& debug_name) {
+  DCHECK(!code.empty());
+  ThunkMapKey key = GetThunkMapKey(linker_patch);
+  std::vector<uint8_t, SwapAllocator<uint8_t>> code_copy(
+      code.begin(), code.end(), SwapAllocator<uint8_t>(swap_space_.get()));
+  ThunkMapValue value(std::move(code_copy), debug_name);
+  MutexLock lock(Thread::Current(), thunk_map_lock_);
+  // Note: Multiple threads can try and compile the same thunk, so this may not create a new entry.
+  thunk_map_.emplace(key, std::move(value));
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h
index 249f06c20f3..1634facb7ca 100644
--- a/compiler/driver/compiled_method_storage.h
+++ b/compiler/driver/compiled_method_storage.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_
 
 #include <iosfwd>
+#include <map>
 #include <memory>
 
 #include "base/array_ref.h"
@@ -67,7 +68,29 @@ class CompiledMethodStorage {
       const ArrayRef<const linker::LinkerPatch>& linker_patches);
   void ReleaseLinkerPatches(const LengthPrefixedArray<linker::LinkerPatch>* linker_patches);
 
+  // Returns the code associated with the given patch.
+  // If the code has not been set, returns empty data.
+  // If `debug_name` is not null, stores the associated debug name in `*debug_name`.
+  ArrayRef<const uint8_t> GetThunkCode(const linker::LinkerPatch& linker_patch,
+                                       /*out*/ std::string* debug_name = nullptr);
+
+  // Sets the code and debug name associated with the given patch.
+  void SetThunkCode(const linker::LinkerPatch& linker_patch,
+                    ArrayRef<const uint8_t> code,
+                    const std::string& debug_name);
+
  private:
+  class ThunkMapKey;
+  class ThunkMapValue;
+  using ThunkMapValueType = std::pair<const ThunkMapKey, ThunkMapValue>;
+  using ThunkMap = std::map<ThunkMapKey,
+                            ThunkMapValue,
+                            std::less<ThunkMapKey>,
+                            SwapAllocator<ThunkMapValueType>>;
+  static_assert(std::is_same<ThunkMapValueType, ThunkMap::value_type>::value, "Value type check.");
+
+  static ThunkMapKey GetThunkMapKey(const linker::LinkerPatch& linker_patch);
+
   template <typename T, typename DedupeSetType>
   const LengthPrefixedArray<T>* AllocateOrDeduplicateArray(const ArrayRef<const T>& data,
                                                            DedupeSetType* dedupe_set);
@@ -102,6 +125,9 @@ class CompiledMethodStorage {
   ArrayDedupeSet<uint8_t> dedupe_cfi_info_;
   ArrayDedupeSet<linker::LinkerPatch> dedupe_linker_patches_;
 
+  Mutex thunk_map_lock_;
+  ThunkMap thunk_map_ GUARDED_BY(thunk_map_lock_);
+
   DISALLOW_COPY_AND_ASSIGN(CompiledMethodStorage);
 };
 
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index 0769561d0ed..42fbba5109e 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -34,8 +34,6 @@ TEST(CompiledMethodStorage, Deduplicate) {
                         /* instruction_set_ */ InstructionSet::kNone,
                         /* instruction_set_features */ nullptr,
                         /* image_classes */ nullptr,
-                        /* compiled_classes */ nullptr,
-                        /* compiled_methods */ nullptr,
                         /* thread_count */ 1u,
                         /* swap_fd */ -1,
                         /* profile_compilation_info */ nullptr);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 53604761d12..41b7e7be47f 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -264,8 +264,6 @@ CompilerDriver::CompilerDriver(
     InstructionSet instruction_set,
     const InstructionSetFeatures* instruction_set_features,
     std::unordered_set<std::string>* image_classes,
-    std::unordered_set<std::string>* compiled_classes,
-    std::unordered_set<std::string>* compiled_methods,
     size_t thread_count,
     int swap_fd,
     const ProfileCompilationInfo* profile_compilation_info)
@@ -279,8 +277,6 @@ CompilerDriver::CompilerDriver(
       requires_constructor_barrier_lock_("constructor barrier lock"),
       non_relative_linker_patch_count_(0u),
       image_classes_(image_classes),
-      classes_to_compile_(compiled_classes),
-      methods_to_compile_(compiled_methods),
       number_of_soft_verifier_failures_(0),
       had_hard_verifier_failure_(false),
       parallel_thread_count_(thread_count),
@@ -638,7 +634,6 @@ static void CompileMethodQuick(
           (verified_method->GetEncounteredVerificationFailures() &
               (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 &&
               // Is eligable for compilation by methods-to-compile filter.
-              driver->IsMethodToCompile(method_ref) &&
               driver->ShouldCompileBasedOnProfile(method_ref);
 
       if (compile) {
@@ -781,7 +776,8 @@ void CompilerDriver::Resolve(jobject class_loader,
 // TODO: Collect the relevant string indices in parallel, then allocate them sequentially in a
 //       stable order.
 
-static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache,
+static void ResolveConstStrings(ClassLinker* class_linker,
+                                Handle<mirror::DexCache> dex_cache,
                                 const DexFile& dex_file,
                                 const DexFile::CodeItem* code_item)
       REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -790,7 +786,6 @@ static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache,
     return;
   }
 
-  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
   for (const DexInstructionPcPair& inst : CodeItemInstructionAccessor(dex_file, code_item)) {
     switch (inst->Opcode()) {
       case Instruction::CONST_STRING:
@@ -838,22 +833,105 @@ static void ResolveConstStrings(CompilerDriver* driver,
           dex_file->StringByTypeIdx(class_def.class_idx_));
       if (!compilation_enabled) {
         // Compilation is skipped, do not resolve const-string in code of this class.
-        // TODO: Make sure that inlining honors this.
+        // FIXME: Make sure that inlining honors this. b/26687569
         continue;
       }
 
       // Direct and virtual methods.
-      int64_t previous_method_idx = -1;
       while (it.HasNextMethod()) {
-        uint32_t method_idx = it.GetMemberIndex();
-        if (method_idx == previous_method_idx) {
-          // smali can create dex files with two encoded_methods sharing the same method_idx
-          // http://code.google.com/p/smali/issues/detail?id=119
-          it.Next();
-          continue;
+        ResolveConstStrings(class_linker, dex_cache, *dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      DCHECK(!it.HasNext());
+    }
+  }
+}
+
+// Initialize type check bit strings for check-cast and instance-of in the code. Done to have
+// deterministic allocation behavior. Right now this is single-threaded for simplicity.
+// TODO: Collect the relevant type indices in parallel, then process them sequentially in a
+//       stable order.
+
+static void InitializeTypeCheckBitstrings(CompilerDriver* driver,
+                                          ClassLinker* class_linker,
+                                          Handle<mirror::DexCache> dex_cache,
+                                          const DexFile& dex_file,
+                                          const DexFile::CodeItem* code_item)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (code_item == nullptr) {
+    // Abstract or native method.
+    return;
+  }
+
+  for (const DexInstructionPcPair& inst : CodeItemInstructionAccessor(dex_file, code_item)) {
+    switch (inst->Opcode()) {
+      case Instruction::CHECK_CAST:
+      case Instruction::INSTANCE_OF: {
+        dex::TypeIndex type_index(
+            (inst->Opcode() == Instruction::CHECK_CAST) ? inst->VRegB_21c() : inst->VRegC_22c());
+        const char* descriptor = dex_file.StringByTypeIdx(type_index);
+        // We currently do not use the bitstring type check for array or final (including
+        // primitive) classes. We may reconsider this in future if it's deemed to be beneficial.
+        // And we cannot use it for classes outside the boot image as we do not know the runtime
+        // value of their bitstring when compiling (it may not even get assigned at runtime).
+        if (descriptor[0] == 'L' && driver->IsImageClass(descriptor)) {
+          ObjPtr<mirror::Class> klass =
+              class_linker->LookupResolvedType(type_index,
+                                               dex_cache.Get(),
+                                               /* class_loader */ nullptr);
+          CHECK(klass != nullptr) << descriptor << " should have been previously resolved.";
+          // Now assign the bitstring if the class is not final. Keep this in sync with sharpening.
+          if (!klass->IsFinal()) {
+            MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+            SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass);
+          }
         }
-        previous_method_idx = method_idx;
-        ResolveConstStrings(dex_cache, *dex_file, it.GetMethodCodeItem());
+        break;
+      }
+
+      default:
+        break;
+    }
+  }
+}
+
+static void InitializeTypeCheckBitstrings(CompilerDriver* driver,
+                                          const std::vector<const DexFile*>& dex_files,
+                                          TimingLogger* timings) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  MutableHandle<mirror::DexCache> dex_cache(hs.NewHandle<mirror::DexCache>(nullptr));
+
+  for (const DexFile* dex_file : dex_files) {
+    dex_cache.Assign(class_linker->FindDexCache(soa.Self(), *dex_file));
+    TimingLogger::ScopedTiming t("Initialize type check bitstrings", timings);
+
+    size_t class_def_count = dex_file->NumClassDefs();
+    for (size_t class_def_index = 0; class_def_index < class_def_count; ++class_def_index) {
+      const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
+
+      const uint8_t* class_data = dex_file->GetClassData(class_def);
+      if (class_data == nullptr) {
+        // empty class, probably a marker interface
+        continue;
+      }
+
+      ClassDataItemIterator it(*dex_file, class_data);
+      it.SkipAllFields();
+
+      bool compilation_enabled = driver->IsClassToCompile(
+          dex_file->StringByTypeIdx(class_def.class_idx_));
+      if (!compilation_enabled) {
+        // Compilation is skipped, do not look for type checks in code of this class.
+        // FIXME: Make sure that inlining honors this. b/26687569
+        continue;
+      }
+
+      // Direct and virtual methods.
+      while (it.HasNextMethod()) {
+        InitializeTypeCheckBitstrings(
+            driver, class_linker, dex_cache, *dex_file, it.GetMethodCodeItem());
         it.Next();
       }
       DCHECK(!it.HasNext());
@@ -955,6 +1033,14 @@ void CompilerDriver::PreCompile(jobject class_loader,
 
   UpdateImageClasses(timings);
   VLOG(compiler) << "UpdateImageClasses: " << GetMemoryUsageString(false);
+
+  if (kBitstringSubtypeCheckEnabled &&
+      GetCompilerOptions().IsForceDeterminism() && GetCompilerOptions().IsBootImage()) {
+    // Initialize type check bit string used by check-cast and instanceof.
+    // Do this now to have a deterministic image.
+    // Note: This is done after UpdateImageClasses() at it relies on the image classes to be final.
+    InitializeTypeCheckBitstrings(this, dex_files, timings);
+  }
 }
 
 bool CompilerDriver::IsImageClass(const char* descriptor) const {
@@ -974,15 +1060,6 @@ bool CompilerDriver::IsClassToCompile(const char* descriptor) const {
   return classes_to_compile_->find(descriptor) != classes_to_compile_->end();
 }
 
-bool CompilerDriver::IsMethodToCompile(const MethodReference& method_ref) const {
-  if (methods_to_compile_ == nullptr) {
-    return true;
-  }
-
-  std::string tmp = method_ref.PrettyMethod();
-  return methods_to_compile_->find(tmp.c_str()) != methods_to_compile_->end();
-}
-
 bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_ref) const {
   // Profile compilation info may be null if no profile is passed.
   if (!CompilerFilter::DependsOnProfile(compiler_options_->GetCompilerFilter())) {
@@ -1555,7 +1632,7 @@ class ParallelCompilationManager {
     self->AssertNoPendingException();
     CHECK_GT(work_units, 0U);
 
-    index_.StoreRelaxed(begin);
+    index_.store(begin, std::memory_order_relaxed);
     for (size_t i = 0; i < work_units; ++i) {
       thread_pool_->AddTask(self, new ForAllClosureLambda<Fn>(this, end, fn));
     }
@@ -1573,7 +1650,7 @@ class ParallelCompilationManager {
   }
 
   size_t NextIndex() {
-    return index_.FetchAndAddSequentiallyConsistent(1);
+    return index_.fetch_add(1, std::memory_order_seq_cst);
   }
 
  private:
@@ -2838,7 +2915,8 @@ void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref,
                                                               /*expected*/ nullptr,
                                                               compiled_method);
   CHECK(result == MethodTable::kInsertResultSuccess);
-  non_relative_linker_patch_count_.FetchAndAddRelaxed(non_relative_linker_patch_count);
+  non_relative_linker_patch_count_.fetch_add(non_relative_linker_patch_count,
+                                             std::memory_order_relaxed);
   DCHECK(GetCompiledMethod(method_ref) != nullptr) << method_ref.PrettyMethod();
 }
 
@@ -2949,7 +3027,7 @@ bool CompilerDriver::IsMethodVerifiedWithoutFailures(uint32_t method_idx,
 }
 
 size_t CompilerDriver::GetNonRelativeLinkerPatchCount() const {
-  return non_relative_linker_patch_count_.LoadRelaxed();
+  return non_relative_linker_patch_count_.load(std::memory_order_relaxed);
 }
 
 void CompilerDriver::SetRequiresConstructorBarrier(Thread* self,
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index a5462eefe2a..55f3561e3a8 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -100,8 +100,6 @@ class CompilerDriver {
                  InstructionSet instruction_set,
                  const InstructionSetFeatures* instruction_set_features,
                  std::unordered_set<std::string>* image_classes,
-                 std::unordered_set<std::string>* compiled_classes,
-                 std::unordered_set<std::string>* compiled_methods,
                  size_t thread_count,
                  int swap_fd,
                  const ProfileCompilationInfo* profile_compilation_info);
@@ -316,9 +314,6 @@ class CompilerDriver {
   // Checks whether the provided class should be compiled, i.e., is in classes_to_compile_.
   bool IsClassToCompile(const char* descriptor) const;
 
-  // Checks whether the provided method should be compiled, i.e., is in method_to_compile_.
-  bool IsMethodToCompile(const MethodReference& method_ref) const;
-
   // Checks whether profile guided compilation is enabled and if the method should be compiled
   // according to the profile file.
   bool ShouldCompileBasedOnProfile(const MethodReference& method_ref) const;
@@ -505,12 +500,8 @@ class CompilerDriver {
   // This option may be restricted to the boot image, depending on a flag in the implementation.
   std::unique_ptr<std::unordered_set<std::string>> classes_to_compile_;
 
-  // Specifies the methods that will be compiled. Note that if methods_to_compile_ is null,
-  // all methods are eligible for compilation (compilation filters etc. will still apply).
-  // This option may be restricted to the boot image, depending on a flag in the implementation.
-  std::unique_ptr<std::unordered_set<std::string>> methods_to_compile_;
-
   std::atomic<uint32_t> number_of_soft_verifier_failures_;
+
   bool had_hard_verifier_failure_;
 
   // A thread pool that can (potentially) run tasks in parallel.
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 162904c0e73..1332280d20c 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -184,59 +184,6 @@ TEST_F(CompilerDriverTest, AbstractMethodErrorStub) {
   }
 }
 
-class CompilerDriverMethodsTest : public CompilerDriverTest {
- protected:
-  std::unordered_set<std::string>* GetCompiledMethods() OVERRIDE {
-    return new std::unordered_set<std::string>({
-      "byte StaticLeafMethods.identity(byte)",
-      "int StaticLeafMethods.sum(int, int, int)",
-      "double StaticLeafMethods.sum(double, double, double, double)"
-    });
-  }
-};
-
-TEST_F(CompilerDriverMethodsTest, Selection) {
-  Thread* self = Thread::Current();
-  jobject class_loader;
-  {
-    ScopedObjectAccess soa(self);
-    class_loader = LoadDex("StaticLeafMethods");
-  }
-  ASSERT_NE(class_loader, nullptr);
-
-  // Need to enable dex-file writability. Methods rejected to be compiled will run through the
-  // dex-to-dex compiler.
-  for (const DexFile* dex_file : GetDexFiles(class_loader)) {
-    ASSERT_TRUE(dex_file->EnableWrite());
-  }
-
-  CompileAll(class_loader);
-
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  ScopedObjectAccess soa(self);
-  StackHandleScope<1> hs(self);
-  Handle<mirror::ClassLoader> h_loader(
-      hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader)));
-  mirror::Class* klass = class_linker->FindClass(self, "LStaticLeafMethods;", h_loader);
-  ASSERT_NE(klass, nullptr);
-
-  std::unique_ptr<std::unordered_set<std::string>> expected(GetCompiledMethods());
-
-  const auto pointer_size = class_linker->GetImagePointerSize();
-  for (auto& m : klass->GetDirectMethods(pointer_size)) {
-    std::string name = m.PrettyMethod(true);
-    const void* code = m.GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
-    ASSERT_NE(code, nullptr);
-    if (expected->find(name) != expected->end()) {
-      expected->erase(name);
-      EXPECT_FALSE(class_linker->IsQuickToInterpreterBridge(code));
-    } else {
-      EXPECT_TRUE(class_linker->IsQuickToInterpreterBridge(code));
-    }
-  }
-  EXPECT_TRUE(expected->empty());
-}
-
 class CompilerDriverProfileTest : public CompilerDriverTest {
  protected:
   ProfileCompilationInfo* GetProfileCompilationInfo() OVERRIDE {
diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc
index f582341b180..c139fcf1d8a 100644
--- a/compiler/exception_test.cc
+++ b/compiler/exception_test.cc
@@ -20,6 +20,7 @@
 #include "base/callee_save_type.h"
 #include "base/enums.h"
 #include "base/leb128.h"
+#include "base/malloc_arena_pool.h"
 #include "class_linker.h"
 #include "common_runtime_test.h"
 #include "dex/code_item_accessors-inl.h"
@@ -67,7 +68,7 @@ class ExceptionTest : public CommonRuntimeTest {
       fake_code_.push_back(0x70 | i);
     }
 
-    ArenaPool pool;
+    MallocArenaPool pool;
     ArenaStack arena_stack(&pool);
     ScopedArenaAllocator allocator(&arena_stack);
     StackMapStream stack_maps(&allocator, kRuntimeISA);
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index ac5c6fb01f8..0de00a82fa4 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -142,8 +142,6 @@ JitCompiler::JitCompiler() {
       instruction_set,
       instruction_set_features_.get(),
       /* image_classes */ nullptr,
-      /* compiled_classes */ nullptr,
-      /* compiled_methods */ nullptr,
       /* thread_count */ 1,
       /* swap_fd */ -1,
       /* profile_compilation_info */ nullptr));
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 236b5c0c2e3..920a3a8da63 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -20,6 +20,7 @@
 #include "arch/instruction_set.h"
 #include "base/arena_allocator.h"
 #include "base/enums.h"
+#include "base/malloc_arena_pool.h"
 #include "cfi_test.h"
 #include "gtest/gtest.h"
 #include "jni/quick/calling_convention.h"
@@ -61,7 +62,7 @@ class JNICFITest : public CFITest {
     const bool is_synchronized = false;
     const char* shorty = "IIFII";
 
-    ArenaPool pool;
+    MallocArenaPool pool;
     ArenaAllocator allocator(&pool);
 
     std::unique_ptr<JniCallingConvention> jni_conv(
@@ -94,7 +95,11 @@ class JNICFITest : public CFITest {
     const std::vector<uint8_t>& actual_cfi = *(jni_asm->cfi().data());
 
     if (kGenerateExpected) {
-      GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
+      GenerateExpected(stdout,
+                       isa,
+                       isa_str,
+                       ArrayRef<const uint8_t>(actual_asm),
+                       ArrayRef<const uint8_t>(actual_cfi));
     } else {
       EXPECT_EQ(expected_asm, actual_asm);
       EXPECT_EQ(expected_cfi, actual_cfi);
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 451a9099651..730a1a63e8e 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -21,6 +21,7 @@
 
 #include "art_method-inl.h"
 #include "base/bit_utils.h"
+#include "base/mem_map.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "compiler.h"
@@ -29,7 +30,6 @@
 #include "indirect_reference_table.h"
 #include "java_vm_ext.h"
 #include "jni_internal.h"
-#include "mem_map.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index d001cfe4fc5..8cb1998f7f6 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -27,6 +27,8 @@
 #include "base/enums.h"
 #include "base/logging.h"  // For VLOG.
 #include "base/macros.h"
+#include "base/malloc_arena_pool.h"
+#include "base/memory_region.h"
 #include "base/utils.h"
 #include "calling_convention.h"
 #include "class_linker.h"
@@ -36,7 +38,6 @@
 #include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "jni_env_ext.h"
-#include "memory_region.h"
 #include "thread.h"
 #include "utils/arm/managed_register_arm.h"
 #include "utils/arm64/managed_register_arm64.h"
@@ -174,7 +175,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(CompilerDriver* driver,
     }
   }
 
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaAllocator allocator(&pool);
 
   // Calling conventions used to iterate over parameters to method
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
deleted file mode 100644
index 6e0286afac1..00000000000
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ /dev/null
@@ -1,522 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/arm/relative_patcher_arm_base.h"
-
-#include "base/stl_util.h"
-#include "compiled_method-inl.h"
-#include "debug/method_debug_info.h"
-#include "dex/dex_file_types.h"
-#include "linker/linker_patch.h"
-#include "linker/output_stream.h"
-#include "oat.h"
-#include "oat_quick_method_header.h"
-
-namespace art {
-namespace linker {
-
-class ArmBaseRelativePatcher::ThunkData {
- public:
-  ThunkData(std::vector<uint8_t> code, uint32_t max_next_offset)
-      : code_(std::move(code)),
-        offsets_(),
-        max_next_offset_(max_next_offset),
-        pending_offset_(0u) {
-    DCHECK(NeedsNextThunk());  // The data is constructed only when we expect to need the thunk.
-  }
-
-  ThunkData(ThunkData&& src) = default;
-
-  size_t CodeSize() const {
-    return code_.size();
-  }
-
-  ArrayRef<const uint8_t> GetCode() const {
-    return ArrayRef<const uint8_t>(code_);
-  }
-
-  bool NeedsNextThunk() const {
-    return max_next_offset_ != 0u;
-  }
-
-  uint32_t MaxNextOffset() const {
-    DCHECK(NeedsNextThunk());
-    return max_next_offset_;
-  }
-
-  void ClearMaxNextOffset() {
-    DCHECK(NeedsNextThunk());
-    max_next_offset_ = 0u;
-  }
-
-  void SetMaxNextOffset(uint32_t max_next_offset) {
-    DCHECK(!NeedsNextThunk());
-    max_next_offset_ = max_next_offset;
-  }
-
-  // Adjust the MaxNextOffset() down if needed to fit the code before the next thunk.
-  // Returns true if it was adjusted, false if the old value was kept.
-  bool MakeSpaceBefore(const ThunkData& next_thunk, size_t alignment) {
-    DCHECK(NeedsNextThunk());
-    DCHECK(next_thunk.NeedsNextThunk());
-    DCHECK_ALIGNED_PARAM(MaxNextOffset(), alignment);
-    DCHECK_ALIGNED_PARAM(next_thunk.MaxNextOffset(), alignment);
-    if (next_thunk.MaxNextOffset() - CodeSize() < MaxNextOffset()) {
-      max_next_offset_ = RoundDown(next_thunk.MaxNextOffset() - CodeSize(), alignment);
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  uint32_t ReserveOffset(size_t offset) {
-    DCHECK(NeedsNextThunk());
-    DCHECK_LE(offset, max_next_offset_);
-    max_next_offset_ = 0u;  // The reserved offset should satisfy all pending references.
-    offsets_.push_back(offset);
-    return offset + CodeSize();
-  }
-
-  bool HasReservedOffset() const {
-    return !offsets_.empty();
-  }
-
-  uint32_t LastReservedOffset() const {
-    DCHECK(HasReservedOffset());
-    return offsets_.back();
-  }
-
-  bool HasPendingOffset() const {
-    return pending_offset_ != offsets_.size();
-  }
-
-  uint32_t GetPendingOffset() const {
-    DCHECK(HasPendingOffset());
-    return offsets_[pending_offset_];
-  }
-
-  void MarkPendingOffsetAsWritten() {
-    DCHECK(HasPendingOffset());
-    ++pending_offset_;
-  }
-
-  bool HasWrittenOffset() const {
-    return pending_offset_ != 0u;
-  }
-
-  uint32_t LastWrittenOffset() const {
-    DCHECK(HasWrittenOffset());
-    return offsets_[pending_offset_ - 1u];
-  }
-
-  size_t IndexOfFirstThunkAtOrAfter(uint32_t offset) const {
-    size_t number_of_thunks = NumberOfThunks();
-    for (size_t i = 0; i != number_of_thunks; ++i) {
-      if (GetThunkOffset(i) >= offset) {
-        return i;
-      }
-    }
-    return number_of_thunks;
-  }
-
-  size_t NumberOfThunks() const {
-    return offsets_.size();
-  }
-
-  uint32_t GetThunkOffset(size_t index) const {
-    DCHECK_LT(index, NumberOfThunks());
-    return offsets_[index];
-  }
-
- private:
-  std::vector<uint8_t> code_;       // The code of the thunk.
-  std::vector<uint32_t> offsets_;   // Offsets at which the thunk needs to be written.
-  uint32_t max_next_offset_;        // The maximum offset at which the next thunk can be placed.
-  uint32_t pending_offset_;         // The index of the next offset to write.
-};
-
-class ArmBaseRelativePatcher::PendingThunkComparator {
- public:
-  bool operator()(const ThunkData* lhs, const ThunkData* rhs) const {
-    DCHECK(lhs->HasPendingOffset());
-    DCHECK(rhs->HasPendingOffset());
-    // The top of the heap is defined to contain the highest element and we want to pick
-    // the thunk with the smallest pending offset, so use the reverse ordering, i.e. ">".
-    return lhs->GetPendingOffset() > rhs->GetPendingOffset();
-  }
-};
-
-uint32_t ArmBaseRelativePatcher::ReserveSpace(uint32_t offset,
-                                              const CompiledMethod* compiled_method,
-                                              MethodReference method_ref) {
-  return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u);
-}
-
-uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
-  // For multi-oat compilations (boot image), ReserveSpaceEnd() is called for each oat file.
-  // Since we do not know here whether this is the last file or whether the next opportunity
-  // to place thunk will be soon enough, we need to reserve all needed thunks now. Code for
-  // subsequent oat files can still call back to them.
-  if (!unprocessed_method_call_patches_.empty()) {
-    ResolveMethodCalls(offset, MethodReference(nullptr, dex::kDexNoIndex));
-  }
-  for (ThunkData* data : unreserved_thunks_) {
-    uint32_t thunk_offset = CompiledCode::AlignCode(offset, instruction_set_);
-    offset = data->ReserveOffset(thunk_offset);
-  }
-  unreserved_thunks_.clear();
-  // We also need to delay initiating the pending_thunks_ until the call to WriteThunks().
-  // Check that the `pending_thunks_.capacity()` indicates that no WriteThunks() has taken place.
-  DCHECK_EQ(pending_thunks_.capacity(), 0u);
-  return offset;
-}
-
-uint32_t ArmBaseRelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) {
-  if (pending_thunks_.capacity() == 0u) {
-    if (thunks_.empty()) {
-      return offset;
-    }
-    // First call to WriteThunks(), prepare the thunks for writing.
-    pending_thunks_.reserve(thunks_.size());
-    for (auto& entry : thunks_) {
-      ThunkData* data = &entry.second;
-      if (data->HasPendingOffset()) {
-        pending_thunks_.push_back(data);
-      }
-    }
-    std::make_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator());
-  }
-  uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
-  while (!pending_thunks_.empty() &&
-         pending_thunks_.front()->GetPendingOffset() == aligned_offset) {
-    // Write alignment bytes and code.
-    uint32_t aligned_code_delta = aligned_offset - offset;
-    if (aligned_code_delta != 0u && UNLIKELY(!WriteCodeAlignment(out, aligned_code_delta))) {
-      return 0u;
-    }
-    if (UNLIKELY(!WriteThunk(out, pending_thunks_.front()->GetCode()))) {
-      return 0u;
-    }
-    offset = aligned_offset + pending_thunks_.front()->CodeSize();
-    // Mark the thunk as written at the pending offset and update the `pending_thunks_` heap.
-    std::pop_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator());
-    pending_thunks_.back()->MarkPendingOffsetAsWritten();
-    if (pending_thunks_.back()->HasPendingOffset()) {
-      std::push_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator());
-    } else {
-      pending_thunks_.pop_back();
-    }
-    aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
-  }
-  DCHECK(pending_thunks_.empty() || pending_thunks_.front()->GetPendingOffset() > aligned_offset);
-  return offset;
-}
-
-std::vector<debug::MethodDebugInfo> ArmBaseRelativePatcher::GenerateThunkDebugInfo(
-    uint32_t executable_offset) {
-  // For multi-oat compilation (boot image), `thunks_` records thunks for all oat files.
-  // To return debug info for the current oat file, we must ignore thunks before the
-  // `executable_offset` as they are in the previous oat files and this function must be
-  // called before reserving thunk positions for subsequent oat files.
-  size_t number_of_thunks = 0u;
-  for (auto&& entry : thunks_) {
-    const ThunkData& data = entry.second;
-    number_of_thunks += data.NumberOfThunks() - data.IndexOfFirstThunkAtOrAfter(executable_offset);
-  }
-  std::vector<debug::MethodDebugInfo> result;
-  result.reserve(number_of_thunks);
-  for (auto&& entry : thunks_) {
-    const ThunkKey& key = entry.first;
-    const ThunkData& data = entry.second;
-    size_t start = data.IndexOfFirstThunkAtOrAfter(executable_offset);
-    if (start == data.NumberOfThunks()) {
-      continue;
-    }
-    // Get the base name to use for the first occurrence of the thunk.
-    std::string base_name = GetThunkDebugName(key);
-    for (size_t i = start, num = data.NumberOfThunks(); i != num; ++i) {
-      debug::MethodDebugInfo info = {};
-      if (i == 0u) {
-        info.custom_name = base_name;
-      } else {
-        // Add a disambiguating tag for subsequent identical thunks. Since the `thunks_`
-        // keeps records also for thunks in previous oat files, names based on the thunk
-        // index shall be unique across the whole multi-oat output.
-        info.custom_name = base_name + "_" + std::to_string(i);
-      }
-      info.isa = instruction_set_;
-      info.is_code_address_text_relative = true;
-      info.code_address = data.GetThunkOffset(i) - executable_offset;
-      info.code_size = data.CodeSize();
-      result.push_back(std::move(info));
-    }
-  }
-  return result;
-}
-
-ArmBaseRelativePatcher::ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider,
-                                               InstructionSet instruction_set)
-    : provider_(provider),
-      instruction_set_(instruction_set),
-      thunks_(),
-      unprocessed_method_call_patches_(),
-      method_call_thunk_(nullptr),
-      pending_thunks_() {
-}
-
-ArmBaseRelativePatcher::~ArmBaseRelativePatcher() {
-  // All work done by member destructors.
-}
-
-uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset,
-                                                      const CompiledMethod* compiled_method,
-                                                      MethodReference method_ref,
-                                                      uint32_t max_extra_space) {
-  // Adjust code size for extra space required by the subclass.
-  uint32_t max_code_size = compiled_method->GetQuickCode().size() + max_extra_space;
-  uint32_t code_offset;
-  uint32_t next_aligned_offset;
-  while (true) {
-    code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader));
-    next_aligned_offset = compiled_method->AlignCode(code_offset + max_code_size);
-    if (unreserved_thunks_.empty() ||
-        unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset) {
-      break;
-    }
-    ThunkData* thunk = unreserved_thunks_.front();
-    if (thunk == method_call_thunk_) {
-      ResolveMethodCalls(code_offset, method_ref);
-      // This may have changed `method_call_thunk_` data, so re-check if we need to reserve.
-      if (unreserved_thunks_.empty() ||
-          unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset) {
-        break;
-      }
-      // We need to process the new `front()` whether it's still the `method_call_thunk_` or not.
-      thunk = unreserved_thunks_.front();
-    }
-    unreserved_thunks_.pop_front();
-    uint32_t thunk_offset = CompiledCode::AlignCode(offset, instruction_set_);
-    offset = thunk->ReserveOffset(thunk_offset);
-    if (thunk == method_call_thunk_) {
-      // All remaining method call patches will be handled by this thunk.
-      DCHECK(!unprocessed_method_call_patches_.empty());
-      DCHECK_LE(thunk_offset - unprocessed_method_call_patches_.front().GetPatchOffset(),
-                MaxPositiveDisplacement(GetMethodCallKey()));
-      unprocessed_method_call_patches_.clear();
-    }
-  }
-
-  // Process patches and check that adding thunks for the current method did not push any
-  // thunks (previously existing or newly added) before `next_aligned_offset`. This is
-  // essentially a check that we never compile a method that's too big. The calls or branches
-  // from the method should be able to reach beyond the end of the method and over any pending
-  // thunks. (The number of different thunks should be relatively low and their code short.)
-  ProcessPatches(compiled_method, code_offset);
-  CHECK(unreserved_thunks_.empty() ||
-        unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset);
-
-  return offset;
-}
-
-uint32_t ArmBaseRelativePatcher::CalculateMethodCallDisplacement(uint32_t patch_offset,
-                                                                 uint32_t target_offset) {
-  DCHECK(method_call_thunk_ != nullptr);
-  // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
-  uint32_t displacement = target_offset - patch_offset;
-  uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey());
-  uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey());
-  // NOTE: With unsigned arithmetic we do mean to use && rather than || below.
-  if (displacement > max_positive_displacement && displacement < -max_negative_displacement) {
-    // Unwritten thunks have higher offsets, check if it's within range.
-    DCHECK(!method_call_thunk_->HasPendingOffset() ||
-           method_call_thunk_->GetPendingOffset() > patch_offset);
-    if (method_call_thunk_->HasPendingOffset() &&
-        method_call_thunk_->GetPendingOffset() - patch_offset <= max_positive_displacement) {
-      displacement = method_call_thunk_->GetPendingOffset() - patch_offset;
-    } else {
-      // We must have a previous thunk then.
-      DCHECK(method_call_thunk_->HasWrittenOffset());
-      DCHECK_LT(method_call_thunk_->LastWrittenOffset(), patch_offset);
-      displacement = method_call_thunk_->LastWrittenOffset() - patch_offset;
-      DCHECK_GE(displacement, -max_negative_displacement);
-    }
-  }
-  return displacement;
-}
-
-uint32_t ArmBaseRelativePatcher::GetThunkTargetOffset(const ThunkKey& key, uint32_t patch_offset) {
-  auto it = thunks_.find(key);
-  CHECK(it != thunks_.end());
-  const ThunkData& data = it->second;
-  if (data.HasWrittenOffset()) {
-    uint32_t offset = data.LastWrittenOffset();
-    DCHECK_LT(offset, patch_offset);
-    if (patch_offset - offset <= MaxNegativeDisplacement(key)) {
-      return offset;
-    }
-  }
-  DCHECK(data.HasPendingOffset());
-  uint32_t offset = data.GetPendingOffset();
-  DCHECK_GT(offset, patch_offset);
-  DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key));
-  return offset;
-}
-
-ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetMethodCallKey() {
-  return ThunkKey(ThunkType::kMethodCall);
-}
-
-ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetBakerThunkKey(
-    const LinkerPatch& patch) {
-  DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch);
-  return ThunkKey(ThunkType::kBakerReadBarrier,
-                  patch.GetBakerCustomValue1(),
-                  patch.GetBakerCustomValue2());
-}
-
-void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_method,
-                                            uint32_t code_offset) {
-  for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-    uint32_t patch_offset = code_offset + patch.LiteralOffset();
-    ThunkKey key(static_cast<ThunkType>(-1));
-    ThunkData* old_data = nullptr;
-    if (patch.GetType() == LinkerPatch::Type::kCallRelative) {
-      key = GetMethodCallKey();
-      unprocessed_method_call_patches_.emplace_back(patch_offset, patch.TargetMethod());
-      if (method_call_thunk_ == nullptr) {
-        uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key);
-        auto it = thunks_.Put(key, ThunkData(CompileThunk(key), max_next_offset));
-        method_call_thunk_ = &it->second;
-        AddUnreservedThunk(method_call_thunk_);
-      } else {
-        old_data = method_call_thunk_;
-      }
-    } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) {
-      key = GetBakerThunkKey(patch);
-      auto lb = thunks_.lower_bound(key);
-      if (lb == thunks_.end() || thunks_.key_comp()(key, lb->first)) {
-        uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key);
-        auto it = thunks_.PutBefore(lb, key, ThunkData(CompileThunk(key), max_next_offset));
-        AddUnreservedThunk(&it->second);
-      } else {
-        old_data = &lb->second;
-      }
-    }
-    if (old_data != nullptr) {
-      // Shared path where an old thunk may need an update.
-      DCHECK(key.GetType() != static_cast<ThunkType>(-1));
-      DCHECK(!old_data->HasReservedOffset() || old_data->LastReservedOffset() < patch_offset);
-      if (old_data->NeedsNextThunk()) {
-        // Patches for a method are ordered by literal offset, so if we still need to place
-        // this thunk for a previous patch, that thunk shall be in range for this patch.
-        DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key));
-      } else {
-        if (!old_data->HasReservedOffset() ||
-            patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key)) {
-          old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key));
-          AddUnreservedThunk(old_data);
-        }
-      }
-    }
-  }
-}
-
-void ArmBaseRelativePatcher::AddUnreservedThunk(ThunkData* data) {
-  DCHECK(data->NeedsNextThunk());
-  size_t index = unreserved_thunks_.size();
-  while (index != 0u && data->MaxNextOffset() < unreserved_thunks_[index - 1u]->MaxNextOffset()) {
-    --index;
-  }
-  unreserved_thunks_.insert(unreserved_thunks_.begin() + index, data);
-  // We may need to update the max next offset(s) if the thunk code would not fit.
-  size_t alignment = GetInstructionSetAlignment(instruction_set_);
-  if (index + 1u != unreserved_thunks_.size()) {
-    // Note: Ignore the return value as we need to process previous thunks regardless.
-    data->MakeSpaceBefore(*unreserved_thunks_[index + 1u], alignment);
-  }
-  // Make space for previous thunks. Once we find a pending thunk that does
-  // not need an adjustment, we can stop.
-  while (index != 0u && unreserved_thunks_[index - 1u]->MakeSpaceBefore(*data, alignment)) {
-    --index;
-    data = unreserved_thunks_[index];
-  }
-}
-
-void ArmBaseRelativePatcher::ResolveMethodCalls(uint32_t quick_code_offset,
-                                                MethodReference method_ref) {
-  DCHECK(!unreserved_thunks_.empty());
-  DCHECK(!unprocessed_method_call_patches_.empty());
-  DCHECK(method_call_thunk_ != nullptr);
-  uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey());
-  uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey());
-  // Process as many patches as possible, stop only on unresolved targets or calls too far back.
-  while (!unprocessed_method_call_patches_.empty()) {
-    MethodReference target_method = unprocessed_method_call_patches_.front().GetTargetMethod();
-    uint32_t patch_offset = unprocessed_method_call_patches_.front().GetPatchOffset();
-    DCHECK(!method_call_thunk_->HasReservedOffset() ||
-           method_call_thunk_->LastReservedOffset() <= patch_offset);
-    if (!method_call_thunk_->HasReservedOffset() ||
-        patch_offset - method_call_thunk_->LastReservedOffset() > max_negative_displacement) {
-      // No previous thunk in range, check if we can reach the target directly.
-      if (target_method == method_ref) {
-        DCHECK_GT(quick_code_offset, patch_offset);
-        if (quick_code_offset - patch_offset > max_positive_displacement) {
-          break;
-        }
-      } else {
-        auto result = provider_->FindMethodOffset(target_method);
-        if (!result.first) {
-          break;
-        }
-        uint32_t target_offset = result.second - CompiledCode::CodeDelta(instruction_set_);
-        if (target_offset >= patch_offset) {
-          DCHECK_LE(target_offset - patch_offset, max_positive_displacement);
-        } else if (patch_offset - target_offset > max_negative_displacement) {
-          break;
-        }
-      }
-    }
-    unprocessed_method_call_patches_.pop_front();
-  }
-  if (!unprocessed_method_call_patches_.empty()) {
-    // Try to adjust the max next offset in `method_call_thunk_`. Do this conservatively only if
-    // the thunk shall be at the end of the `unreserved_thunks_` to avoid dealing with overlaps.
-    uint32_t new_max_next_offset =
-        unprocessed_method_call_patches_.front().GetPatchOffset() + max_positive_displacement;
-    if (new_max_next_offset >
-        unreserved_thunks_.back()->MaxNextOffset() + unreserved_thunks_.back()->CodeSize()) {
-      method_call_thunk_->ClearMaxNextOffset();
-      method_call_thunk_->SetMaxNextOffset(new_max_next_offset);
-      if (method_call_thunk_ != unreserved_thunks_.back()) {
-        RemoveElement(unreserved_thunks_, method_call_thunk_);
-        unreserved_thunks_.push_back(method_call_thunk_);
-      }
-    }
-  } else {
-    // We have resolved all method calls, we do not need a new thunk anymore.
-    method_call_thunk_->ClearMaxNextOffset();
-    RemoveElement(unreserved_thunks_, method_call_thunk_);
-  }
-}
-
-inline uint32_t ArmBaseRelativePatcher::CalculateMaxNextOffset(uint32_t patch_offset,
-                                                               const ThunkKey& key) {
-  return RoundDown(patch_offset + MaxPositiveDisplacement(key),
-                   GetInstructionSetAlignment(instruction_set_));
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h
deleted file mode 100644
index ee09bf96b3d..00000000000
--- a/compiler/linker/arm/relative_patcher_arm_base.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_
-#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_
-
-#include <deque>
-#include <vector>
-
-#include "base/safe_map.h"
-#include "dex/method_reference.h"
-#include "linker/relative_patcher.h"
-
-namespace art {
-namespace linker {
-
-class ArmBaseRelativePatcher : public RelativePatcher {
- public:
-  uint32_t ReserveSpace(uint32_t offset,
-                        const CompiledMethod* compiled_method,
-                        MethodReference method_ref) OVERRIDE;
-  uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
-  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
-  std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo(uint32_t executable_offset) OVERRIDE;
-
- protected:
-  ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider,
-                         InstructionSet instruction_set);
-  ~ArmBaseRelativePatcher();
-
-  enum class ThunkType {
-    kMethodCall,              // Method call thunk.
-    kBakerReadBarrier,        // Baker read barrier.
-  };
-
-  class ThunkKey {
-   public:
-    explicit ThunkKey(ThunkType type, uint32_t custom_value1 = 0u, uint32_t custom_value2 = 0u)
-        : type_(type), custom_value1_(custom_value1), custom_value2_(custom_value2) { }
-
-    ThunkType GetType() const {
-      return type_;
-    }
-
-    uint32_t GetCustomValue1() const {
-      return custom_value1_;
-    }
-
-    uint32_t GetCustomValue2() const {
-      return custom_value2_;
-    }
-
-   private:
-    ThunkType type_;
-    uint32_t custom_value1_;
-    uint32_t custom_value2_;
-  };
-
-  class ThunkKeyCompare {
-   public:
-    bool operator()(const ThunkKey& lhs, const ThunkKey& rhs) const {
-      if (lhs.GetType() != rhs.GetType()) {
-        return lhs.GetType() < rhs.GetType();
-      }
-      if (lhs.GetCustomValue1() != rhs.GetCustomValue1()) {
-        return lhs.GetCustomValue1() < rhs.GetCustomValue1();
-      }
-      return lhs.GetCustomValue2() < rhs.GetCustomValue2();
-    }
-  };
-
-  static ThunkKey GetMethodCallKey();
-  static ThunkKey GetBakerThunkKey(const LinkerPatch& patch);
-
-  uint32_t ReserveSpaceInternal(uint32_t offset,
-                                const CompiledMethod* compiled_method,
-                                MethodReference method_ref,
-                                uint32_t max_extra_space);
-  uint32_t GetThunkTargetOffset(const ThunkKey& key, uint32_t patch_offset);
-
-  uint32_t CalculateMethodCallDisplacement(uint32_t patch_offset,
-                                           uint32_t target_offset);
-
-  virtual std::vector<uint8_t> CompileThunk(const ThunkKey& key) = 0;
-  virtual std::string GetThunkDebugName(const ThunkKey& key) = 0;
-  virtual uint32_t MaxPositiveDisplacement(const ThunkKey& key) = 0;
-  virtual uint32_t MaxNegativeDisplacement(const ThunkKey& key) = 0;
-
- private:
-  class ThunkData;
-
-  void ProcessPatches(const CompiledMethod* compiled_method, uint32_t code_offset);
-  void AddUnreservedThunk(ThunkData* data);
-
-  void ResolveMethodCalls(uint32_t quick_code_offset, MethodReference method_ref);
-
-  uint32_t CalculateMaxNextOffset(uint32_t patch_offset, const ThunkKey& key);
-
-  RelativePatcherTargetProvider* const provider_;
-  const InstructionSet instruction_set_;
-
-  // The data for all thunks.
-  // SafeMap<> nodes don't move after being inserted, so we can use direct pointers to the data.
-  using ThunkMap = SafeMap<ThunkKey, ThunkData, ThunkKeyCompare>;
-  ThunkMap thunks_;
-
-  // ReserveSpace() tracks unprocessed method call patches. These may be resolved later.
-  class UnprocessedMethodCallPatch {
-   public:
-    UnprocessedMethodCallPatch(uint32_t patch_offset, MethodReference target_method)
-        : patch_offset_(patch_offset), target_method_(target_method) { }
-
-    uint32_t GetPatchOffset() const {
-      return patch_offset_;
-    }
-
-    MethodReference GetTargetMethod() const {
-      return target_method_;
-    }
-
-   private:
-    uint32_t patch_offset_;
-    MethodReference target_method_;
-  };
-  std::deque<UnprocessedMethodCallPatch> unprocessed_method_call_patches_;
-  // Once we have compiled a method call thunk, cache pointer to the data.
-  ThunkData* method_call_thunk_;
-
-  // Thunks
-  std::deque<ThunkData*> unreserved_thunks_;
-
-  class PendingThunkComparator;
-  std::vector<ThunkData*> pending_thunks_;  // Heap with the PendingThunkComparator.
-
-  friend class Arm64RelativePatcherTest;
-  friend class Thumb2RelativePatcherTest;
-
-  DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativePatcher);
-};
-
-}  // namespace linker
-}  // namespace art
-
-#endif  // ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
deleted file mode 100644
index 78755176e43..00000000000
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ /dev/null
@@ -1,484 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/arm/relative_patcher_thumb2.h"
-
-#include <sstream>
-
-#include "arch/arm/asm_support_arm.h"
-#include "art_method.h"
-#include "base/bit_utils.h"
-#include "compiled_method.h"
-#include "entrypoints/quick/quick_entrypoints_enum.h"
-#include "linker/linker_patch.h"
-#include "lock_word.h"
-#include "mirror/array-inl.h"
-#include "mirror/object.h"
-#include "read_barrier.h"
-#include "utils/arm/assembler_arm_vixl.h"
-
-namespace art {
-namespace linker {
-
-// PC displacement from patch location; Thumb2 PC is always at instruction address + 4.
-static constexpr int32_t kPcDisplacement = 4;
-
-// Maximum positive and negative displacement for method call measured from the patch location.
-// (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from
-// the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.)
-constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
-constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 24) - kPcDisplacement;
-
-// Maximum positive and negative displacement for a conditional branch measured from the patch
-// location. (Signed 21 bit displacement with the last bit 0 has range [-2^20, 2^20-2] measured
-// from the Thumb2 PC pointing right after the B.cond, i.e. 4 bytes later than the patch location.)
-constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 2u + kPcDisplacement;
-constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20) - kPcDisplacement;
-
-Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider)
-    : ArmBaseRelativePatcher(provider, InstructionSet::kThumb2) {
-}
-
-void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code,
-                                      uint32_t literal_offset,
-                                      uint32_t patch_offset,
-                                      uint32_t target_offset) {
-  DCHECK_LE(literal_offset + 4u, code->size());
-  DCHECK_EQ(literal_offset & 1u, 0u);
-  DCHECK_EQ(patch_offset & 1u, 0u);
-  DCHECK_EQ(target_offset & 1u, 1u);  // Thumb2 mode bit.
-  uint32_t displacement = CalculateMethodCallDisplacement(patch_offset, target_offset & ~1u);
-  displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
-  DCHECK_EQ(displacement & 1u, 0u);
-  DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u);  // 25-bit signed.
-  uint32_t signbit = (displacement >> 31) & 0x1;
-  uint32_t i1 = (displacement >> 23) & 0x1;
-  uint32_t i2 = (displacement >> 22) & 0x1;
-  uint32_t imm10 = (displacement >> 12) & 0x03ff;
-  uint32_t imm11 = (displacement >> 1) & 0x07ff;
-  uint32_t j1 = i1 ^ (signbit ^ 1);
-  uint32_t j2 = i2 ^ (signbit ^ 1);
-  uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11;
-  value |= 0xf000d000;  // BL
-
-  // Check that we're just overwriting an existing BL.
-  DCHECK_EQ(GetInsn32(code, literal_offset) & 0xf800d000, 0xf000d000);
-  // Write the new BL.
-  SetInsn32(code, literal_offset, value);
-}
-
-void Thumb2RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
-                                                     const LinkerPatch& patch,
-                                                     uint32_t patch_offset,
-                                                     uint32_t target_offset) {
-  uint32_t literal_offset = patch.LiteralOffset();
-  uint32_t pc_literal_offset = patch.PcInsnOffset();
-  uint32_t pc_base = patch_offset + (pc_literal_offset - literal_offset) + 4u /* PC adjustment */;
-  uint32_t diff = target_offset - pc_base;
-
-  uint32_t insn = GetInsn32(code, literal_offset);
-  DCHECK_EQ(insn & 0xff7ff0ffu, 0xf2400000u);  // MOVW/MOVT, unpatched (imm16 == 0).
-  uint32_t diff16 = ((insn & 0x00800000u) != 0u) ? (diff >> 16) : (diff & 0xffffu);
-  uint32_t imm4 = (diff16 >> 12) & 0xfu;
-  uint32_t imm = (diff16 >> 11) & 0x1u;
-  uint32_t imm3 = (diff16 >> 8) & 0x7u;
-  uint32_t imm8 = diff16 & 0xffu;
-  insn = (insn & 0xfbf08f00u) | (imm << 26) | (imm4 << 16) | (imm3 << 12) | imm8;
-  SetInsn32(code, literal_offset, insn);
-}
-
-void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
-                                                        const LinkerPatch& patch,
-                                                        uint32_t patch_offset) {
-  DCHECK_ALIGNED(patch_offset, 2u);
-  uint32_t literal_offset = patch.LiteralOffset();
-  DCHECK_ALIGNED(literal_offset, 2u);
-  DCHECK_LT(literal_offset, code->size());
-  uint32_t insn = GetInsn32(code, literal_offset);
-  DCHECK_EQ(insn, 0xf0408000);  // BNE +0 (unpatched)
-  ThunkKey key = GetBakerThunkKey(patch);
-  if (kIsDebugBuild) {
-    const uint32_t encoded_data = key.GetCustomValue1();
-    BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
-    // Check that the next instruction matches the expected LDR.
-    switch (kind) {
-      case BakerReadBarrierKind::kField: {
-        BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
-        if (width == BakerReadBarrierWidth::kWide) {
-          DCHECK_GE(code->size() - literal_offset, 8u);
-          uint32_t next_insn = GetInsn32(code, literal_offset + 4u);
-          // LDR (immediate), encoding T3, with correct base_reg.
-          CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
-          const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
-          CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
-        } else {
-          DCHECK_GE(code->size() - literal_offset, 6u);
-          uint32_t next_insn = GetInsn16(code, literal_offset + 4u);
-          // LDR (immediate), encoding T1, with correct base_reg.
-          CheckValidReg(next_insn & 0x7u);  // Check destination register.
-          const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
-          CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
-        }
-        break;
-      }
-      case BakerReadBarrierKind::kArray: {
-        DCHECK_GE(code->size() - literal_offset, 8u);
-        uint32_t next_insn = GetInsn32(code, literal_offset + 4u);
-        // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
-        CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
-        const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
-        CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
-        CheckValidReg(next_insn & 0xf);  // Check index register
-        break;
-      }
-      case BakerReadBarrierKind::kGcRoot: {
-        BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
-        if (width == BakerReadBarrierWidth::kWide) {
-          DCHECK_GE(literal_offset, 4u);
-          uint32_t prev_insn = GetInsn32(code, literal_offset - 4u);
-          // LDR (immediate), encoding T3, with correct root_reg.
-          const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
-          CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
-        } else {
-          DCHECK_GE(literal_offset, 2u);
-          uint32_t prev_insn = GetInsn16(code, literal_offset - 2u);
-          // LDR (immediate), encoding T1, with correct root_reg.
-          const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
-          CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
-        }
-        break;
-      }
-      default:
-        LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType());
-        UNREACHABLE();
-    }
-  }
-  uint32_t target_offset = GetThunkTargetOffset(key, patch_offset);
-  DCHECK_ALIGNED(target_offset, 4u);
-  uint32_t disp = target_offset - (patch_offset + kPcDisplacement);
-  DCHECK((disp >> 20) == 0u || (disp >> 20) == 0xfffu);   // 21-bit signed.
-  insn |= ((disp << (26 - 20)) & 0x04000000u) |           // Shift bit 20 to 26, "S".
-          ((disp >> (19 - 11)) & 0x00000800u) |           // Shift bit 19 to 13, "J1".
-          ((disp >> (18 - 13)) & 0x00002000u) |           // Shift bit 18 to 11, "J2".
-          ((disp << (16 - 12)) & 0x003f0000u) |           // Shift bits 12-17 to 16-25, "imm6".
-          ((disp >> (1 - 0)) & 0x000007ffu);              // Shift bits 1-12 to 0-11, "imm11".
-  SetInsn32(code, literal_offset, insn);
-}
-
-#define __ assembler.GetVIXLAssembler()->
-
-static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler,
-                                     vixl::aarch32::Register base_reg,
-                                     vixl::aarch32::MemOperand& lock_word,
-                                     vixl::aarch32::Label* slow_path,
-                                     int32_t raw_ldr_offset) {
-  using namespace vixl::aarch32;  // NOLINT(build/namespaces)
-  // Load the lock word containing the rb_state.
-  __ Ldr(ip, lock_word);
-  // Given the numeric representation, it's enough to check the low bit of the rb_state.
-  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
-  __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
-  __ B(ne, slow_path, /* is_far_target */ false);
-  __ Add(lr, lr, raw_ldr_offset);
-  // Introduce a dependency on the lock_word including rb_state,
-  // to prevent load-load reordering, and without using
-  // a memory barrier (which would be more expensive).
-  __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
-  __ Bx(lr);          // And return back to the function.
-  // Note: The fake dependency is unnecessary for the slow path.
-}
-
-// Load the read barrier introspection entrypoint in register `entrypoint`
-static void LoadReadBarrierMarkIntrospectionEntrypoint(arm::ArmVIXLAssembler& assembler,
-                                                       vixl::aarch32::Register entrypoint) {
-  using vixl::aarch32::MemOperand;
-  using vixl::aarch32::ip;
-  // Thread Register.
-  const vixl::aarch32::Register tr = vixl::aarch32::r9;
-
-  // The register where the read barrier introspection entrypoint is loaded
-  // is fixed: `Thumb2RelativePatcher::kBakerCcEntrypointRegister` (R4).
-  DCHECK_EQ(entrypoint.GetCode(), Thumb2RelativePatcher::kBakerCcEntrypointRegister);
-  // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
-  DCHECK_EQ(ip.GetCode(), 12u);
-  const int32_t entry_point_offset =
-      Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
-  __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
-}
-
-void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler,
-                                                         uint32_t encoded_data) {
-  using namespace vixl::aarch32;  // NOLINT(build/namespaces)
-  BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
-  switch (kind) {
-    case BakerReadBarrierKind::kField: {
-      // Check if the holder is gray and, if not, add fake dependency to the base register
-      // and return to the LDR instruction to load the reference. Otherwise, use introspection
-      // to load the reference and call the entrypoint (in kBakerCcEntrypointRegister)
-      // that performs further checks on the reference and marks it if needed.
-      Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
-      CheckValidReg(base_reg.GetCode());
-      Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
-      CheckValidReg(holder_reg.GetCode());
-      BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
-      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
-      temps.Exclude(ip);
-      // If base_reg differs from holder_reg, the offset was too large and we must have
-      // emitted an explicit null check before the load. Otherwise, we need to null-check
-      // the holder as we do not necessarily do that check before going to the thunk.
-      vixl::aarch32::Label throw_npe;
-      if (holder_reg.Is(base_reg)) {
-        __ CompareAndBranchIfZero(holder_reg, &throw_npe, /* is_far_target */ false);
-      }
-      vixl::aarch32::Label slow_path;
-      MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
-      const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
-          ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
-          : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
-      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
-      __ Bind(&slow_path);
-      const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
-                                 raw_ldr_offset;
-      Register ep_reg(kBakerCcEntrypointRegister);
-      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
-      if (width == BakerReadBarrierWidth::kWide) {
-        MemOperand ldr_half_address(lr, ldr_offset + 2);
-        __ Ldrh(ip, ldr_half_address);        // Load the LDR immediate half-word with "Rt | imm12".
-        __ Ubfx(ip, ip, 0, 12);               // Extract the offset imm12.
-        __ Ldr(ip, MemOperand(base_reg, ip));   // Load the reference.
-      } else {
-        MemOperand ldr_address(lr, ldr_offset);
-        __ Ldrh(ip, ldr_address);             // Load the LDR immediate, encoding T1.
-        __ Add(ep_reg,                        // Adjust the entrypoint address to the entrypoint
-               ep_reg,                        // for narrow LDR.
-               Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
-        __ Ubfx(ip, ip, 6, 5);                // Extract the imm5, i.e. offset / 4.
-        __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2));   // Load the reference.
-      }
-      // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
-      __ Bx(ep_reg);                          // Jump to the entrypoint.
-      if (holder_reg.Is(base_reg)) {
-        // Add null check slow path. The stack map is at the address pointed to by LR.
-        __ Bind(&throw_npe);
-        int32_t offset = GetThreadOffset<kArmPointerSize>(kQuickThrowNullPointer).Int32Value();
-        __ Ldr(ip, MemOperand(/* Thread* */ vixl::aarch32::r9, offset));
-        __ Bx(ip);
-      }
-      break;
-    }
-    case BakerReadBarrierKind::kArray: {
-      Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
-      CheckValidReg(base_reg.GetCode());
-      DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
-      DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
-      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
-      temps.Exclude(ip);
-      vixl::aarch32::Label slow_path;
-      int32_t data_offset =
-          mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
-      MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
-      DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
-      const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
-      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
-      __ Bind(&slow_path);
-      const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
-                                 raw_ldr_offset;
-      MemOperand ldr_address(lr, ldr_offset + 2);
-      __ Ldrb(ip, ldr_address);               // Load the LDR (register) byte with "00 | imm2 | Rm",
-                                              // i.e. Rm+32 because the scale in imm2 is 2.
-      Register ep_reg(kBakerCcEntrypointRegister);
-      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
-      __ Bfi(ep_reg, ip, 3, 6);               // Insert ip to the entrypoint address to create
-                                              // a switch case target based on the index register.
-      __ Mov(ip, base_reg);                   // Move the base register to ip0.
-      __ Bx(ep_reg);                          // Jump to the entrypoint's array switch case.
-      break;
-    }
-    case BakerReadBarrierKind::kGcRoot: {
-      // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
-      // and it does not have a forwarding address), call the correct introspection entrypoint;
-      // otherwise return the reference (or the extracted forwarding address).
-      // There is no gray bit check for GC roots.
-      Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
-      CheckValidReg(root_reg.GetCode());
-      DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
-      BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
-      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
-      temps.Exclude(ip);
-      vixl::aarch32::Label return_label, not_marked, forwarding_address;
-      __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target */ false);
-      MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
-      __ Ldr(ip, lock_word);
-      __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
-      __ B(eq, &not_marked);
-      __ Bind(&return_label);
-      __ Bx(lr);
-      __ Bind(&not_marked);
-      static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
-                    "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
-                    " the highest bits and the 'forwarding address' state to have all bits set");
-      __ Cmp(ip, Operand(0xc0000000));
-      __ B(hs, &forwarding_address);
-      Register ep_reg(kBakerCcEntrypointRegister);
-      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
-      // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
-      // to art_quick_read_barrier_mark_introspection_gc_roots.
-      int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide)
-          ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
-          : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET;
-      __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
-      __ Mov(ip, root_reg);
-      __ Bx(ep_reg);
-      __ Bind(&forwarding_address);
-      __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
-      __ Bx(lr);
-      break;
-    }
-    default:
-      LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
-      UNREACHABLE();
-  }
-}
-
-std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-  arm::ArmVIXLAssembler assembler(&allocator);
-
-  switch (key.GetType()) {
-    case ThunkType::kMethodCall:
-      // The thunk just uses the entry point in the ArtMethod. This works even for calls
-      // to the generic JNI and interpreter trampolines.
-      assembler.LoadFromOffset(
-          arm::kLoadWord,
-          vixl::aarch32::pc,
-          vixl::aarch32::r0,
-          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
-      __ Bkpt(0);
-      break;
-    case ThunkType::kBakerReadBarrier:
-      CompileBakerReadBarrierThunk(assembler, key.GetCustomValue1());
-      break;
-  }
-
-  assembler.FinalizeCode();
-  std::vector<uint8_t> thunk_code(assembler.CodeSize());
-  MemoryRegion code(thunk_code.data(), thunk_code.size());
-  assembler.FinalizeInstructions(code);
-  return thunk_code;
-}
-
-std::string Thumb2RelativePatcher::GetThunkDebugName(const ThunkKey& key) {
-  switch (key.GetType()) {
-    case ThunkType::kMethodCall:
-      return "MethodCallThunk";
-
-    case ThunkType::kBakerReadBarrier: {
-      uint32_t encoded_data = key.GetCustomValue1();
-      BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
-      std::ostringstream oss;
-      oss << "BakerReadBarrierThunk";
-      switch (kind) {
-        case BakerReadBarrierKind::kField:
-          oss << "Field";
-          if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
-            oss << "Wide";
-          }
-          oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
-              << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
-          break;
-        case BakerReadBarrierKind::kArray:
-          oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
-          DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
-          DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
-          break;
-        case BakerReadBarrierKind::kGcRoot:
-          oss << "GcRoot";
-          if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
-            oss << "Wide";
-          }
-          oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
-          DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
-          break;
-      }
-      return oss.str();
-    }
-  }
-}
-
-#undef __
-
-uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) {
-  switch (key.GetType()) {
-    case ThunkType::kMethodCall:
-      return kMaxMethodCallPositiveDisplacement;
-    case ThunkType::kBakerReadBarrier:
-      return kMaxBcondPositiveDisplacement;
-  }
-}
-
-uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) {
-  switch (key.GetType()) {
-    case ThunkType::kMethodCall:
-      return kMaxMethodCallNegativeDisplacement;
-    case ThunkType::kBakerReadBarrier:
-      return kMaxBcondNegativeDisplacement;
-  }
-}
-
-void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
-  DCHECK_LE(offset + 4u, code->size());
-  DCHECK_ALIGNED(offset, 2u);
-  uint8_t* addr = &(*code)[offset];
-  addr[0] = (value >> 16) & 0xff;
-  addr[1] = (value >> 24) & 0xff;
-  addr[2] = (value >> 0) & 0xff;
-  addr[3] = (value >> 8) & 0xff;
-}
-
-uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) {
-  DCHECK_LE(offset + 4u, code.size());
-  DCHECK_ALIGNED(offset, 2u);
-  const uint8_t* addr = &code[offset];
-  return
-      (static_cast<uint32_t>(addr[0]) << 16) +
-      (static_cast<uint32_t>(addr[1]) << 24) +
-      (static_cast<uint32_t>(addr[2]) << 0)+
-      (static_cast<uint32_t>(addr[3]) << 8);
-}
-
-template <typename Vector>
-uint32_t Thumb2RelativePatcher::GetInsn32(Vector* code, uint32_t offset) {
-  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-  return GetInsn32(ArrayRef<const uint8_t>(*code), offset);
-}
-
-uint32_t Thumb2RelativePatcher::GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset) {
-  DCHECK_LE(offset + 2u, code.size());
-  DCHECK_ALIGNED(offset, 2u);
-  const uint8_t* addr = &code[offset];
-  return (static_cast<uint32_t>(addr[0]) << 0) + (static_cast<uint32_t>(addr[1]) << 8);
-}
-
-template <typename Vector>
-uint32_t Thumb2RelativePatcher::GetInsn16(Vector* code, uint32_t offset) {
-  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-  return GetInsn16(ArrayRef<const uint8_t>(*code), offset);
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
deleted file mode 100644
index 68386c00f4a..00000000000
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
-#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
-
-#include "arch/arm/registers_arm.h"
-#include "base/array_ref.h"
-#include "base/bit_field.h"
-#include "base/bit_utils.h"
-#include "linker/arm/relative_patcher_arm_base.h"
-
-namespace art {
-
-namespace arm {
-class ArmVIXLAssembler;
-}  // namespace arm
-
-namespace linker {
-
-class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher {
- public:
-  static constexpr uint32_t kBakerCcEntrypointRegister = 4u;
-
-  static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg,
-                                                  uint32_t holder_reg,
-                                                  bool narrow) {
-    CheckValidReg(base_reg);
-    CheckValidReg(holder_reg);
-    DCHECK(!narrow || base_reg < 8u) << base_reg;
-    BakerReadBarrierWidth width =
-        narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
-    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
-           BakerReadBarrierFirstRegField::Encode(base_reg) |
-           BakerReadBarrierSecondRegField::Encode(holder_reg) |
-           BakerReadBarrierWidthField::Encode(width);
-  }
-
-  static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
-    CheckValidReg(base_reg);
-    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
-           BakerReadBarrierFirstRegField::Encode(base_reg) |
-           BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) |
-           BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide);
-  }
-
-  static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) {
-    CheckValidReg(root_reg);
-    DCHECK(!narrow || root_reg < 8u) << root_reg;
-    BakerReadBarrierWidth width =
-        narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
-    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
-           BakerReadBarrierFirstRegField::Encode(root_reg) |
-           BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) |
-           BakerReadBarrierWidthField::Encode(width);
-  }
-
-  explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider);
-
-  void PatchCall(std::vector<uint8_t>* code,
-                 uint32_t literal_offset,
-                 uint32_t patch_offset,
-                 uint32_t target_offset) OVERRIDE;
-  void PatchPcRelativeReference(std::vector<uint8_t>* code,
-                                const LinkerPatch& patch,
-                                uint32_t patch_offset,
-                                uint32_t target_offset) OVERRIDE;
-  void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
-                                   const LinkerPatch& patch,
-                                   uint32_t patch_offset) OVERRIDE;
-
- protected:
-  std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE;
-  std::string GetThunkDebugName(const ThunkKey& key) OVERRIDE;
-  uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE;
-  uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE;
-
- private:
-  static constexpr uint32_t kInvalidEncodedReg = /* pc is invalid */ 15u;
-
-  enum class BakerReadBarrierKind : uint8_t {
-    kField,   // Field get or array get with constant offset (i.e. constant index).
-    kArray,   // Array get with index in register.
-    kGcRoot,  // GC root load.
-    kLast = kGcRoot
-  };
-
-  enum class BakerReadBarrierWidth : uint8_t {
-    kWide,          // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled).
-    kNarrow,        // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled).
-    kLast = kNarrow
-  };
-
-  static constexpr size_t kBitsForBakerReadBarrierKind =
-      MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
-  static constexpr size_t kBitsForRegister = 4u;
-  using BakerReadBarrierKindField =
-      BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
-  using BakerReadBarrierFirstRegField =
-      BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>;
-  using BakerReadBarrierSecondRegField =
-      BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>;
-  static constexpr size_t kBitsForBakerReadBarrierWidth =
-      MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast));
-  using BakerReadBarrierWidthField = BitField<BakerReadBarrierWidth,
-                                              kBitsForBakerReadBarrierKind + 2 * kBitsForRegister,
-                                              kBitsForBakerReadBarrierWidth>;
-
-  static void CheckValidReg(uint32_t reg) {
-    DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister) << reg;
-  }
-
-  void CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data);
-
-  void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
-  static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset);
-
-  template <typename Vector>
-  static uint32_t GetInsn32(Vector* code, uint32_t offset);
-
-  static uint32_t GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset);
-
-  template <typename Vector>
-  static uint32_t GetInsn16(Vector* code, uint32_t offset);
-
-  friend class Thumb2RelativePatcherTest;
-
-  DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher);
-};
-
-}  // namespace linker
-}  // namespace art
-
-#endif  // ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
deleted file mode 100644
index 2c22a352c23..00000000000
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ /dev/null
@@ -1,1287 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/arm/relative_patcher_thumb2.h"
-
-#include "base/casts.h"
-#include "linker/relative_patcher_test.h"
-#include "lock_word.h"
-#include "mirror/array-inl.h"
-#include "mirror/object.h"
-#include "oat_quick_method_header.h"
-
-namespace art {
-namespace linker {
-
-class Thumb2RelativePatcherTest : public RelativePatcherTest {
- public:
-  Thumb2RelativePatcherTest() : RelativePatcherTest(InstructionSet::kThumb2, "default") { }
-
- protected:
-  static const uint8_t kCallRawCode[];
-  static const ArrayRef<const uint8_t> kCallCode;
-  static const uint8_t kNopRawCode[];
-  static const ArrayRef<const uint8_t> kNopCode;
-  static const uint8_t kUnpatchedPcRelativeRawCode[];
-  static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode;
-  static const uint32_t kPcInsnOffset;
-
-  // The PC in Thumb mode is 4 bytes after the instruction location.
-  static constexpr uint32_t kPcAdjustment = 4u;
-
-  // Branches within range [-256, 256) can be created from these by adding the low 8 bits.
-  static constexpr uint32_t kBlPlus0 = 0xf000f800u;
-  static constexpr uint32_t kBlMinus256 = 0xf7ffff00u;
-
-  // Special BL values.
-  static constexpr uint32_t kBlPlusMax = 0xf3ffd7ffu;
-  static constexpr uint32_t kBlMinusMax = 0xf400d000u;
-
-  // BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset.
-  static constexpr uint32_t kBneWPlus0 = 0xf0408000u;
-
-  // LDR immediate, 16-bit, encoding T1. Bits 6-10 are imm5, 0-2 are Rt, 3-5 are Rn.
-  static constexpr uint32_t kLdrInsn = 0x6800u;
-
-  // LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn.
-  static constexpr uint32_t kLdrWInsn = 0xf8d00000u;
-
-  // LDR immediate, negative offset, encoding T4. Bits 0-7 are the offset to subtract.
-  static constexpr uint32_t kLdrNegativeOffset = 0xf8500c00u;
-
-  // LDR register, lsl #2. Bits 4-5 are the imm2, i.e. the lsl shift.
-  static constexpr uint32_t kLdrRegLsl2 = 0xf8500020u;
-
-  // NOP instructions.
-  static constexpr uint32_t kNopInsn = 0xbf00u;
-  static constexpr uint32_t kNopWInsn = 0xf3af8000u;
-
-  void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
-    CHECK_LE(pos, code->size());
-    if (IsUint<16>(insn)) {
-      const uint8_t insn_code[] = {
-          static_cast<uint8_t>(insn),
-          static_cast<uint8_t>(insn >> 8),
-      };
-      static_assert(sizeof(insn_code) == 2u, "Invalid sizeof(insn_code).");
-      code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
-    } else {
-      const uint8_t insn_code[] = {
-          static_cast<uint8_t>(insn >> 16),
-          static_cast<uint8_t>(insn >> 24),
-          static_cast<uint8_t>(insn),
-          static_cast<uint8_t>(insn >> 8),
-      };
-      static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code).");
-      code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
-    }
-  }
-
-  void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) {
-    InsertInsn(code, code->size(), insn);
-  }
-
-  std::vector<uint8_t> GenNops(size_t num_nops) {
-    std::vector<uint8_t> result;
-    result.reserve(num_nops * 2u);
-    for (size_t i = 0; i != num_nops; ++i) {
-      PushBackInsn(&result, kNopInsn);
-    }
-    return result;
-  }
-
-  std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) {
-    std::vector<uint8_t> raw_code;
-    size_t number_of_16_bit_insns =
-        std::count_if(insns.begin(), insns.end(), [](uint32_t x) { return IsUint<16>(x); });
-    raw_code.reserve(insns.size() * 4u - number_of_16_bit_insns * 2u);
-    for (uint32_t insn : insns) {
-      PushBackInsn(&raw_code, insn);
-    }
-    return raw_code;
-  }
-
-  uint32_t BneWWithOffset(uint32_t bne_offset, uint32_t target_offset) {
-    if (!IsAligned<2u>(bne_offset)) {
-      LOG(ERROR) << "Unaligned bne_offset: " << bne_offset;
-      return 0xffffffffu;  // Fails code diff later.
-    }
-    if (!IsAligned<2u>(target_offset)) {
-      LOG(ERROR) << "Unaligned target_offset: " << target_offset;
-      return 0xffffffffu;  // Fails code diff later.
-    }
-    uint32_t diff = target_offset - bne_offset - kPcAdjustment;
-    DCHECK_ALIGNED(diff, 2u);
-    if ((diff >> 20) != 0 && (diff >> 20) != 0xfffu) {
-      LOG(ERROR) << "Target out of range: " << diff;
-      return 0xffffffffu;  // Fails code diff later.
-    }
-    return kBneWPlus0 | ((diff >> 1) & 0x7ffu)          // imm11
-                      | (((diff >> 12) & 0x3fu) << 16)  // imm6
-                      | (((diff >> 18) & 1) << 13)      // J1
-                      | (((diff >> 19) & 1) << 11)      // J2
-                      | (((diff >> 20) & 1) << 26);     // S
-  }
-
-  bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code,
-                             const ArrayRef<const LinkerPatch>& method1_patches,
-                             const ArrayRef<const uint8_t>& method3_code,
-                             const ArrayRef<const LinkerPatch>& method3_patches,
-                             uint32_t distance_without_thunks) {
-    CHECK_EQ(distance_without_thunks % kArmAlignment, 0u);
-    uint32_t method1_offset =
-        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
-    AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
-
-    // We want to put the method3 at a very precise offset.
-    const uint32_t method3_offset = method1_offset + distance_without_thunks;
-    CHECK_ALIGNED(method3_offset, kArmAlignment);
-
-    // Calculate size of method2 so that we put method3 at the correct place.
-    const uint32_t method1_end = method1_offset + method1_code.size();
-    const uint32_t method2_offset =
-        method1_end + CodeAlignmentSize(method1_end) + sizeof(OatQuickMethodHeader);
-    const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset);
-    std::vector<uint8_t> method2_raw_code(method2_size);
-    ArrayRef<const uint8_t> method2_code(method2_raw_code);
-    AddCompiledMethod(MethodRef(2u), method2_code);
-
-    AddCompiledMethod(MethodRef(3u), method3_code, method3_patches);
-
-    Link();
-
-    // Check assumptions.
-    CHECK_EQ(GetMethodOffset(1), method1_offset);
-    CHECK_EQ(GetMethodOffset(2), method2_offset);
-    auto result3 = method_offset_map_.FindMethodOffset(MethodRef(3));
-    CHECK(result3.first);
-    // There may be a thunk before method2.
-    if (result3.second == method3_offset + 1 /* thumb mode */) {
-      return false;  // No thunk.
-    } else {
-      uint32_t thunk_end =
-          CompiledCode::AlignCode(method3_offset - sizeof(OatQuickMethodHeader),
-                                  InstructionSet::kThumb2) +
-          MethodCallThunkSize();
-      uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end);
-      CHECK_EQ(result3.second, header_offset + sizeof(OatQuickMethodHeader) + 1 /* thumb mode */);
-      return true;   // Thunk present.
-    }
-  }
-
-  uint32_t GetMethodOffset(uint32_t method_idx) {
-    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
-    CHECK(result.first);
-    CHECK_NE(result.second & 1u, 0u);
-    return result.second - 1 /* thumb mode */;
-  }
-
-  std::vector<uint8_t> CompileMethodCallThunk() {
-    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey();
-    return static_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
-  }
-
-  uint32_t MethodCallThunkSize() {
-    return CompileMethodCallThunk().size();
-  }
-
-  bool CheckThunk(uint32_t thunk_offset) {
-    const std::vector<uint8_t> expected_code = CompileMethodCallThunk();
-    if (output_.size() < thunk_offset + expected_code.size()) {
-      LOG(ERROR) << "output_.size() == " << output_.size() << " < "
-          << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size());
-      return false;
-    }
-    ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size());
-    if (linked_code == ArrayRef<const uint8_t>(expected_code)) {
-      return true;
-    }
-    // Log failure info.
-    DumpDiff(ArrayRef<const uint8_t>(expected_code), linked_code);
-    return false;
-  }
-
-  std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) {
-    std::vector<uint8_t> result;
-    result.reserve(num_nops * 2u + 4u);
-    for (size_t i = 0; i != num_nops; ++i) {
-      PushBackInsn(&result, kNopInsn);
-    }
-    PushBackInsn(&result, bl);
-    return result;
-  }
-
-  void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset);
-  void TestStringReference(uint32_t string_offset);
-  void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
-
-  std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg,
-                                               uint32_t holder_reg,
-                                               bool narrow) {
-    const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
-        0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg, narrow));
-    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
-    return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
-  }
-
-  std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) {
-    LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
-        0u, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg));
-    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
-    return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
-  }
-
-  std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg, bool narrow) {
-    LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
-        0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow));
-    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
-    return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
-  }
-
-  uint32_t GetOutputInsn32(uint32_t offset) {
-    CHECK_LE(offset, output_.size());
-    CHECK_GE(output_.size() - offset, 4u);
-    return (static_cast<uint32_t>(output_[offset]) << 16) |
-           (static_cast<uint32_t>(output_[offset + 1]) << 24) |
-           (static_cast<uint32_t>(output_[offset + 2]) << 0) |
-           (static_cast<uint32_t>(output_[offset + 3]) << 8);
-  }
-
-  uint16_t GetOutputInsn16(uint32_t offset) {
-    CHECK_LE(offset, output_.size());
-    CHECK_GE(output_.size() - offset, 2u);
-    return (static_cast<uint32_t>(output_[offset]) << 0) |
-           (static_cast<uint32_t>(output_[offset + 1]) << 8);
-  }
-
-  void TestBakerFieldWide(uint32_t offset, uint32_t ref_reg);
-  void TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg);
-};
-
-const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = {
-    0x00, 0xf0, 0x00, 0xf8
-};
-
-const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kCallCode(kCallRawCode);
-
-const uint8_t Thumb2RelativePatcherTest::kNopRawCode[] = {
-    0x00, 0xbf
-};
-
-const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kNopCode(kNopRawCode);
-
-const uint8_t Thumb2RelativePatcherTest::kUnpatchedPcRelativeRawCode[] = {
-    0x40, 0xf2, 0x00, 0x00,   // MOVW r0, #0 (placeholder)
-    0xc0, 0xf2, 0x00, 0x00,   // MOVT r0, #0 (placeholder)
-    0x78, 0x44,               // ADD r0, pc
-};
-const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kUnpatchedPcRelativeCode(
-    kUnpatchedPcRelativeRawCode);
-const uint32_t Thumb2RelativePatcherTest::kPcInsnOffset = 8u;
-
-void Thumb2RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin,
-                                                   uint32_t string_entry_offset) {
-  constexpr uint32_t kStringIndex = 1u;
-  string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
-  bss_begin_ = bss_begin;
-  const LinkerPatch patches[] = {
-      LinkerPatch::StringBssEntryPatch(0u, nullptr, kPcInsnOffset, kStringIndex),
-      LinkerPatch::StringBssEntryPatch(4u, nullptr, kPcInsnOffset, kStringIndex),
-  };
-  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset);
-}
-
-void Thumb2RelativePatcherTest::TestStringReference(uint32_t string_offset) {
-  constexpr uint32_t kStringIndex = 1u;
-  string_index_to_offset_map_.Put(kStringIndex, string_offset);
-  const LinkerPatch patches[] = {
-      LinkerPatch::RelativeStringPatch(0u, nullptr, kPcInsnOffset, kStringIndex),
-      LinkerPatch::RelativeStringPatch(4u, nullptr, kPcInsnOffset, kStringIndex),
-  };
-  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
-}
-
-void Thumb2RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches,
-                                                     uint32_t target_offset) {
-  AddCompiledMethod(MethodRef(1u), kUnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  uint32_t method1_offset = GetMethodOffset(1u);
-  uint32_t pc_base_offset = method1_offset + kPcInsnOffset + 4u /* PC adjustment */;
-  uint32_t diff = target_offset - pc_base_offset;
-  // Distribute the bits of the diff between the MOVW and MOVT:
-  uint32_t diffw = diff & 0xffffu;
-  uint32_t difft = diff >> 16;
-  uint32_t movw = 0xf2400000u |           // MOVW r0, #0 (placeholder),
-      ((diffw & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
-      ((diffw & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
-      ((diffw & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
-      ((diffw & 0x00ffu));                // keep imm8 at bits 0-7.
-  uint32_t movt = 0xf2c00000u |           // MOVT r0, #0 (placeholder),
-      ((difft & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
-      ((difft & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
-      ((difft & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
-      ((difft & 0x00ffu));                // keep imm8 at bits 0-7.
-  const uint8_t expected_code[] = {
-      static_cast<uint8_t>(movw >> 16), static_cast<uint8_t>(movw >> 24),
-      static_cast<uint8_t>(movw >> 0), static_cast<uint8_t>(movw >> 8),
-      static_cast<uint8_t>(movt >> 16), static_cast<uint8_t>(movt >> 24),
-      static_cast<uint8_t>(movt >> 0), static_cast<uint8_t>(movt >> 8),
-      0x78, 0x44,
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(Thumb2RelativePatcherTest, CallSelf) {
-  const LinkerPatch patches[] = {
-      LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
-  };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  static const uint8_t expected_code[] = {
-      0xff, 0xf7, 0xfe, 0xff
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(Thumb2RelativePatcherTest, CallOther) {
-  const LinkerPatch method1_patches[] = {
-      LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
-  };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
-  const LinkerPatch method2_patches[] = {
-      LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
-  };
-  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
-  Link();
-
-  uint32_t method1_offset = GetMethodOffset(1u);
-  uint32_t method2_offset = GetMethodOffset(2u);
-  uint32_t diff_after = method2_offset - (method1_offset + 4u /* PC adjustment */);
-  ASSERT_EQ(diff_after & 1u, 0u);
-  ASSERT_LT(diff_after >> 1, 1u << 8);  // Simple encoding, (diff_after >> 1) fits into 8 bits.
-  static const uint8_t method1_expected_code[] = {
-      0x00, 0xf0, static_cast<uint8_t>(diff_after >> 1), 0xf8
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
-  uint32_t diff_before = method1_offset - (method2_offset + 4u /* PC adjustment */);
-  ASSERT_EQ(diff_before & 1u, 0u);
-  ASSERT_GE(diff_before, -1u << 9);  // Simple encoding, -256 <= (diff >> 1) < 0.
-  auto method2_expected_code = GenNopsAndBl(0u, kBlMinus256 | ((diff_before >> 1) & 0xffu));
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
-}
-
-TEST_F(Thumb2RelativePatcherTest, CallTrampoline) {
-  const LinkerPatch patches[] = {
-      LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
-  };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  uint32_t method1_offset = GetMethodOffset(1u);
-  uint32_t diff = kTrampolineOffset - (method1_offset + 4u);
-  ASSERT_EQ(diff & 1u, 0u);
-  ASSERT_GE(diff, -1u << 9);  // Simple encoding, -256 <= (diff >> 1) < 0 (checked as unsigned).
-  auto expected_code = GenNopsAndBl(0u, kBlMinus256 | ((diff >> 1) & 0xffu));
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(Thumb2RelativePatcherTest, CallTrampolineTooFar) {
-  constexpr uint32_t missing_method_index = 1024u;
-  auto method3_raw_code = GenNopsAndBl(3u, kBlPlus0);
-  constexpr uint32_t bl_offset_in_method3 = 3u * 2u;  // After NOPs.
-  ArrayRef<const uint8_t> method3_code(method3_raw_code);
-  ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
-  const LinkerPatch method3_patches[] = {
-      LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index),
-  };
-
-  constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */;
-  bool thunk_in_gap = Create2MethodsWithGap(kNopCode,
-                                            ArrayRef<const LinkerPatch>(),
-                                            method3_code,
-                                            ArrayRef<const LinkerPatch>(method3_patches),
-                                            just_over_max_negative_disp - bl_offset_in_method3);
-  ASSERT_FALSE(thunk_in_gap);  // There should be a thunk but it should be after the method2.
-  ASSERT_FALSE(method_offset_map_.FindMethodOffset(MethodRef(missing_method_index)).first);
-
-  // Check linked code.
-  uint32_t method3_offset = GetMethodOffset(3u);
-  uint32_t thunk_offset = CompiledCode::AlignCode(method3_offset + method3_code.size(),
-                                                  InstructionSet::kThumb2);
-  uint32_t diff = thunk_offset - (method3_offset + bl_offset_in_method3 + 4u /* PC adjustment */);
-  ASSERT_EQ(diff & 1u, 0u);
-  ASSERT_LT(diff >> 1, 1u << 8);  // Simple encoding, (diff >> 1) fits into 8 bits.
-  auto expected_code = GenNopsAndBl(3u, kBlPlus0 | ((diff >> 1) & 0xffu));
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code)));
-  EXPECT_TRUE(CheckThunk(thunk_offset));
-}
-
-TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) {
-  auto method1_raw_code = GenNopsAndBl(3u, kBlPlus0);
-  constexpr uint32_t bl_offset_in_method1 = 3u * 2u;  // After NOPs.
-  ArrayRef<const uint8_t> method1_code(method1_raw_code);
-  ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
-  const LinkerPatch method1_patches[] = {
-      LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u),
-  };
-
-  constexpr uint32_t max_positive_disp = 16 * MB - 2u + 4u /* PC adjustment */;
-  bool thunk_in_gap = Create2MethodsWithGap(method1_code,
-                                            ArrayRef<const LinkerPatch>(method1_patches),
-                                            kNopCode,
-                                            ArrayRef<const LinkerPatch>(),
-                                            bl_offset_in_method1 + max_positive_disp);
-  ASSERT_FALSE(thunk_in_gap);  // There should be no thunk.
-
-  // Check linked code.
-  auto expected_code = GenNopsAndBl(3u, kBlPlusMax);
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarBefore) {
-  auto method3_raw_code = GenNopsAndBl(2u, kBlPlus0);
-  constexpr uint32_t bl_offset_in_method3 = 2u * 2u;  // After NOPs.
-  ArrayRef<const uint8_t> method3_code(method3_raw_code);
-  ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
-  const LinkerPatch method3_patches[] = {
-      LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u),
-  };
-
-  constexpr uint32_t just_over_max_negative_disp = 16 * MB - 4u /* PC adjustment */;
-  bool thunk_in_gap = Create2MethodsWithGap(kNopCode,
-                                            ArrayRef<const LinkerPatch>(),
-                                            method3_code,
-                                            ArrayRef<const LinkerPatch>(method3_patches),
-                                            just_over_max_negative_disp - bl_offset_in_method3);
-  ASSERT_FALSE(thunk_in_gap);  // There should be no thunk.
-
-  // Check linked code.
-  auto expected_code = GenNopsAndBl(2u, kBlMinusMax);
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) {
-  auto method1_raw_code = GenNopsAndBl(2u, kBlPlus0);
-  constexpr uint32_t bl_offset_in_method1 = 2u * 2u;  // After NOPs.
-  ArrayRef<const uint8_t> method1_code(method1_raw_code);
-  ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
-  const LinkerPatch method1_patches[] = {
-      LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u),
-  };
-
-  constexpr uint32_t just_over_max_positive_disp = 16 * MB + 4u /* PC adjustment */;
-  bool thunk_in_gap = Create2MethodsWithGap(method1_code,
-                                            ArrayRef<const LinkerPatch>(method1_patches),
-                                            kNopCode,
-                                            ArrayRef<const LinkerPatch>(),
-                                            bl_offset_in_method1 + just_over_max_positive_disp);
-  ASSERT_TRUE(thunk_in_gap);
-
-  uint32_t method1_offset = GetMethodOffset(1u);
-  uint32_t method3_offset = GetMethodOffset(3u);
-  ASSERT_TRUE(IsAligned<kArmAlignment>(method3_offset));
-  uint32_t method3_header_offset = method3_offset - sizeof(OatQuickMethodHeader);
-  uint32_t thunk_size = MethodCallThunkSize();
-  uint32_t thunk_offset = RoundDown(method3_header_offset - thunk_size, kArmAlignment);
-  DCHECK_EQ(thunk_offset + thunk_size + CodeAlignmentSize(thunk_offset + thunk_size),
-            method3_header_offset);
-  ASSERT_TRUE(IsAligned<kArmAlignment>(thunk_offset));
-  uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1 + 4u /* PC adjustment */);
-  ASSERT_EQ(diff & 1u, 0u);
-  ASSERT_GE(diff, 16 * MB - (1u << 9));  // Simple encoding, unknown bits fit into the low 8 bits.
-  auto expected_code = GenNopsAndBl(2u, 0xf3ffd700 | ((diff >> 1) & 0xffu));
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-  CheckThunk(thunk_offset);
-}
-
-TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) {
-  auto method3_raw_code = GenNopsAndBl(3u, kBlPlus0);
-  constexpr uint32_t bl_offset_in_method3 = 3u * 2u;  // After NOPs.
-  ArrayRef<const uint8_t> method3_code(method3_raw_code);
-  ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
-  const LinkerPatch method3_patches[] = {
-      LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u),
-  };
-
-  constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */;
-  bool thunk_in_gap = Create2MethodsWithGap(kNopCode,
-                                            ArrayRef<const LinkerPatch>(),
-                                            method3_code,
-                                            ArrayRef<const LinkerPatch>(method3_patches),
-                                            just_over_max_negative_disp - bl_offset_in_method3);
-  ASSERT_FALSE(thunk_in_gap);  // There should be a thunk but it should be after the method2.
-
-  // Check linked code.
-  uint32_t method3_offset = GetMethodOffset(3u);
-  uint32_t thunk_offset = CompiledCode::AlignCode(method3_offset + method3_code.size(),
-                                                  InstructionSet::kThumb2);
-  uint32_t diff = thunk_offset - (method3_offset + bl_offset_in_method3 + 4u /* PC adjustment */);
-  ASSERT_EQ(diff & 1u, 0u);
-  ASSERT_LT(diff >> 1, 1u << 8);  // Simple encoding, (diff >> 1) fits into 8 bits.
-  auto expected_code = GenNopsAndBl(3u, kBlPlus0 | ((diff >> 1) & 0xffu));
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code)));
-  EXPECT_TRUE(CheckThunk(thunk_offset));
-}
-
-TEST_F(Thumb2RelativePatcherTest, StringBssEntry1) {
-  TestStringBssEntry(0x00ff0000u, 0x00fcu);
-  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
-}
-
-TEST_F(Thumb2RelativePatcherTest, StringBssEntry2) {
-  TestStringBssEntry(0x02ff0000u, 0x05fcu);
-  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
-}
-
-TEST_F(Thumb2RelativePatcherTest, StringBssEntry3) {
-  TestStringBssEntry(0x08ff0000u, 0x08fcu);
-  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
-}
-
-TEST_F(Thumb2RelativePatcherTest, StringBssEntry4) {
-  TestStringBssEntry(0xd0ff0000u, 0x60fcu);
-  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
-}
-
-TEST_F(Thumb2RelativePatcherTest, StringReference1) {
-  TestStringReference(0x00ff00fcu);
-  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
-}
-
-TEST_F(Thumb2RelativePatcherTest, StringReference2) {
-  TestStringReference(0x02ff05fcu);
-  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
-}
-
-TEST_F(Thumb2RelativePatcherTest, StringReference3) {
-  TestStringReference(0x08ff08fcu);
-  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
-}
-
-TEST_F(Thumb2RelativePatcherTest, StringReference4) {
-  TestStringReference(0xd0ff60fcu);
-  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
-}
-
-void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref_reg) {
-  uint32_t valid_regs[] = {
-      0,  1,  2,  3,      5,  6,  7,  // R4 is reserved for entrypoint address.
-      8,  9, 10, 11,                  // IP, SP, LR and PC are reserved.
-  };
-  DCHECK_ALIGNED(offset, 4u);
-  DCHECK_LT(offset, 4 * KB);
-  constexpr size_t kMethodCodeSize = 8u;
-  constexpr size_t kLiteralOffset = 0u;
-  uint32_t method_idx = 0u;
-  for (uint32_t base_reg : valid_regs) {
-    for (uint32_t holder_reg : valid_regs) {
-      uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12);
-      const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
-      ASSERT_EQ(kMethodCodeSize, raw_code.size());
-      ArrayRef<const uint8_t> code(raw_code);
-      uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
-          base_reg, holder_reg, /* narrow */ false);
-      const LinkerPatch patches[] = {
-          LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
-      };
-      ++method_idx;
-      AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
-    }
-  }
-  Link();
-
-  // All thunks are at the end.
-  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
-  method_idx = 0u;
-  for (uint32_t base_reg : valid_regs) {
-    for (uint32_t holder_reg : valid_regs) {
-      ++method_idx;
-      uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
-      uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12);
-      const std::vector<uint8_t> expected_code = RawCode({bne, ldr});
-      ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne;
-      ASSERT_TRUE(
-          CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
-
-      std::vector<uint8_t> expected_thunk =
-          CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ false);
-      ASSERT_GT(output_.size(), thunk_offset);
-      ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
-      ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
-                                             expected_thunk.size());
-      if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
-        DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
-        ASSERT_TRUE(false);
-      }
-
-      size_t gray_check_offset = thunk_offset;
-      if (holder_reg == base_reg) {
-        // Verify that the null-check uses the correct register, i.e. holder_reg.
-        if (holder_reg < 8) {
-          ASSERT_GE(output_.size() - gray_check_offset, 2u);
-          ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
-          gray_check_offset +=2u;
-        } else {
-          ASSERT_GE(output_.size() - gray_check_offset, 6u);
-          ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
-          ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u);  // BEQ
-          gray_check_offset += 6u;
-        }
-      }
-      // Verify that the lock word for gray bit check is loaded from the holder address.
-      ASSERT_GE(output_.size() - gray_check_offset,
-                4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
-      const uint32_t load_lock_word =
-          kLdrWInsn |
-          (holder_reg << 16) |
-          (/* IP */ 12 << 12) |
-          mirror::Object::MonitorOffset().Uint32Value();
-      ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset));
-      // Verify the gray bit check.
-      DCHECK_GE(LockWord::kReadBarrierStateShift, 8u);  // ROR modified immediate.
-      uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
-      const uint32_t tst_gray_bit_without_offset =
-          0xf0100f00 | (/* IP */ 12 << 16)
-                     | (((ror_shift >> 4) & 1) << 26)   // i
-                     | (((ror_shift >> 1) & 7) << 12)   // imm3
-                     | ((ror_shift & 1) << 7);          // imm8, ROR('1':imm8<7:0>, ror_shift).
-      EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u));
-      EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u);  // BNE
-      // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset").
-      const uint32_t fake_dependency =
-          0xeb000010 |              // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
-          (/* IP */ 12) |           // Rm = IP
-          (base_reg << 16) |        // Rn = base_reg
-          (base_reg << 8);          // Rd = base_reg
-      EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u));
-      // Do not check the rest of the implementation.
-
-      // The next thunk follows on the next aligned offset.
-      thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
-    }
-  }
-}
-
-void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg) {
-  uint32_t valid_regs[] = {
-      0,  1,  2,  3,      5,  6,  7,  // R4 is reserved for entrypoint address.
-      8,  9, 10, 11,                  // IP, SP, LR and PC are reserved.
-  };
-  DCHECK_ALIGNED(offset, 4u);
-  DCHECK_LT(offset, 32u);
-  constexpr size_t kMethodCodeSize = 6u;
-  constexpr size_t kLiteralOffset = 0u;
-  uint32_t method_idx = 0u;
-  for (uint32_t base_reg : valid_regs) {
-    if (base_reg >= 8u) {
-      continue;
-    }
-    for (uint32_t holder_reg : valid_regs) {
-      uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg;
-      const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
-      ASSERT_EQ(kMethodCodeSize, raw_code.size());
-      ArrayRef<const uint8_t> code(raw_code);
-      uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
-          base_reg, holder_reg, /* narrow */ true);
-      const LinkerPatch patches[] = {
-          LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
-      };
-      ++method_idx;
-      AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
-    }
-  }
-  Link();
-
-  // All thunks are at the end.
-  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
-  method_idx = 0u;
-  for (uint32_t base_reg : valid_regs) {
-    if (base_reg >= 8u) {
-      continue;
-    }
-    for (uint32_t holder_reg : valid_regs) {
-      ++method_idx;
-      uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
-      uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg;
-      const std::vector<uint8_t> expected_code = RawCode({bne, ldr});
-      ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne;
-      ASSERT_TRUE(
-          CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
-
-      std::vector<uint8_t> expected_thunk =
-          CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ true);
-      ASSERT_GT(output_.size(), thunk_offset);
-      ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
-      ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
-                                             expected_thunk.size());
-      if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
-        DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
-        ASSERT_TRUE(false);
-      }
-
-      size_t gray_check_offset = thunk_offset;
-      if (holder_reg == base_reg) {
-        // Verify that the null-check uses the correct register, i.e. holder_reg.
-        if (holder_reg < 8) {
-          ASSERT_GE(output_.size() - gray_check_offset, 2u);
-          ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
-          gray_check_offset +=2u;
-        } else {
-          ASSERT_GE(output_.size() - gray_check_offset, 6u);
-          ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
-          ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u);  // BEQ
-          gray_check_offset += 6u;
-        }
-      }
-      // Verify that the lock word for gray bit check is loaded from the holder address.
-      ASSERT_GE(output_.size() - gray_check_offset,
-                4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
-      const uint32_t load_lock_word =
-          kLdrWInsn |
-          (holder_reg << 16) |
-          (/* IP */ 12 << 12) |
-          mirror::Object::MonitorOffset().Uint32Value();
-      ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset));
-      // Verify the gray bit check.
-      DCHECK_GE(LockWord::kReadBarrierStateShift, 8u);  // ROR modified immediate.
-      uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
-      const uint32_t tst_gray_bit_without_offset =
-          0xf0100f00 | (/* IP */ 12 << 16)
-                     | (((ror_shift >> 4) & 1) << 26)   // i
-                     | (((ror_shift >> 1) & 7) << 12)   // imm3
-                     | ((ror_shift & 1) << 7);          // imm8, ROR('1':imm8<7:0>, ror_shift).
-      EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u));
-      EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u);  // BNE
-      // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset").
-      const uint32_t fake_dependency =
-          0xeb000010 |              // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
-          (/* IP */ 12) |           // Rm = IP
-          (base_reg << 16) |        // Rn = base_reg
-          (base_reg << 8);          // Rd = base_reg
-      EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u));
-      // Do not check the rest of the implementation.
-
-      // The next thunk follows on the next aligned offset.
-      thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
-    }
-  }
-}
-
-#define TEST_BAKER_FIELD_WIDE(offset, ref_reg)    \
-  TEST_F(Thumb2RelativePatcherTest,               \
-    BakerOffsetWide##offset##_##ref_reg) {        \
-    TestBakerFieldWide(offset, ref_reg);          \
-  }
-
-TEST_BAKER_FIELD_WIDE(/* offset */ 0, /* ref_reg */ 0)
-TEST_BAKER_FIELD_WIDE(/* offset */ 8, /* ref_reg */ 3)
-TEST_BAKER_FIELD_WIDE(/* offset */ 28, /* ref_reg */ 7)
-TEST_BAKER_FIELD_WIDE(/* offset */ 0xffc, /* ref_reg */ 11)
-
-#define TEST_BAKER_FIELD_NARROW(offset, ref_reg)  \
-  TEST_F(Thumb2RelativePatcherTest,               \
-    BakerOffsetNarrow##offset##_##ref_reg) {      \
-    TestBakerFieldNarrow(offset, ref_reg);        \
-  }
-
-TEST_BAKER_FIELD_NARROW(/* offset */ 0, /* ref_reg */ 0)
-TEST_BAKER_FIELD_NARROW(/* offset */ 8, /* ref_reg */ 3)
-TEST_BAKER_FIELD_NARROW(/* offset */ 28, /* ref_reg */ 7)
-
-TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) {
-  // One thunk in the middle with maximum distance branches to it from both sides.
-  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
-  constexpr uint32_t kLiteralOffset1 = 6u;
-  const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
-  ArrayRef<const uint8_t> code1(raw_code1);
-  uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
-      /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
-  const LinkerPatch patches1[] = {
-      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
-  };
-  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
-
-  constexpr uint32_t expected_thunk_offset =
-      kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u);
-  static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
-  size_t filler1_size = expected_thunk_offset -
-                        RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
-  std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
-  ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
-  AddCompiledMethod(MethodRef(2u), filler1_code);
-
-  // Enforce thunk reservation with a tiny method.
-  AddCompiledMethod(MethodRef(3u), kNopCode);
-
-  constexpr uint32_t kLiteralOffset2 = 4;
-  static_assert(IsAligned<kArmAlignment>(kLiteralOffset2 + kPcAdjustment),
-                "PC for BNE must be aligned.");
-
-  // Allow reaching the thunk from the very beginning of a method almost 1MiB away. Backward branch
-  // reaches the full 1MiB but we need to take PC adjustment into account. Things to subtract:
-  //   - thunk size and method 3 pre-header, rounded up (padding in between if needed)
-  //   - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
-  //   - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
-  size_t thunk_size =
-      CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size();
-  size_t filler2_size =
-      1 * MB - (kLiteralOffset2 + kPcAdjustment)
-             - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
-             - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
-             - sizeof(OatQuickMethodHeader);
-  std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u);
-  ArrayRef<const uint8_t> filler2_code(raw_filler2_code);
-  AddCompiledMethod(MethodRef(4u), filler2_code);
-
-  const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn});
-  ArrayRef<const uint8_t> code2(raw_code2);
-  const LinkerPatch patches2[] = {
-      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data),
-  };
-  AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2));
-
-  Link();
-
-  uint32_t first_method_offset = GetMethodOffset(1u);
-  uint32_t last_method_offset = GetMethodOffset(5u);
-  EXPECT_EQ(2 * MB, last_method_offset - first_method_offset);
-
-  const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff;
-  const uint32_t bne_max_backward = kBneWPlus0 | 0x04000000;
-  const std::vector<uint8_t> expected_code1 =
-      RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn});
-  const std::vector<uint8_t> expected_code2 = RawCode({kNopWInsn, bne_max_backward, kLdrWInsn});
-  ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
-  ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
-}
-
-TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkBeforeFiller) {
-  // Based on the first part of BakerOffsetThunkInTheMiddle but the BNE is one instruction
-  // earlier, so the thunk is emitted before the filler.
-  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
-  constexpr uint32_t kLiteralOffset1 = 4u;
-  const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn});
-  ArrayRef<const uint8_t> code1(raw_code1);
-  uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
-      /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
-  const LinkerPatch patches1[] = {
-      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
-  };
-  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
-
-  constexpr uint32_t expected_thunk_offset =
-      kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement + 2 */ (1u << 20);
-  static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
-  size_t filler1_size = expected_thunk_offset -
-                        RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
-  std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
-  ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
-  AddCompiledMethod(MethodRef(2u), filler1_code);
-
-  Link();
-
-  const uint32_t bne = BneWWithOffset(kLiteralOffset1, RoundUp(raw_code1.size(), kArmAlignment));
-  const std::vector<uint8_t> expected_code1 = RawCode({kNopWInsn, bne, kLdrWInsn, kNopInsn});
-  ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
-}
-
-TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast) {
-  // Based on the BakerOffsetThunkInTheMiddle but the BNE in the last method is preceded
-  // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end.
-  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
-  constexpr uint32_t kLiteralOffset1 = 6u;
-  const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
-  ArrayRef<const uint8_t> code1(raw_code1);
-  uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
-      /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
-  const LinkerPatch patches1[] = {
-      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
-  };
-  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
-
-  constexpr uint32_t expected_thunk_offset =
-      kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u);
-  static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
-  size_t filler1_size = expected_thunk_offset -
-                        RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
-  std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
-  ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
-  AddCompiledMethod(MethodRef(2u), filler1_code);
-
-  // Enforce thunk reservation with a tiny method.
-  AddCompiledMethod(MethodRef(3u), kNopCode);
-
-  constexpr uint32_t kReachableFromOffset2 = 4;
-  constexpr uint32_t kLiteralOffset2 = kReachableFromOffset2 + 2;
-  static_assert(IsAligned<kArmAlignment>(kReachableFromOffset2 + kPcAdjustment),
-                "PC for BNE must be aligned.");
-
-  // If not for the extra NOP, this would allow reaching the thunk from the BNE
-  // of a method 1MiB away. Backward branch reaches the full 1MiB  but we need to take
-  // PC adjustment into account. Things to subtract:
-  //   - thunk size and method 3 pre-header, rounded up (padding in between if needed)
-  //   - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
-  //   - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
-  size_t thunk_size =
-      CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size();
-  size_t filler2_size =
-      1 * MB - (kReachableFromOffset2 + kPcAdjustment)
-             - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
-             - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
-             - sizeof(OatQuickMethodHeader);
-  std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u);
-  ArrayRef<const uint8_t> filler2_code(raw_filler2_code);
-  AddCompiledMethod(MethodRef(4u), filler2_code);
-
-  // Extra 16-bit NOP compared to BakerOffsetThunkInTheMiddle.
-  const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
-  ArrayRef<const uint8_t> code2(raw_code2);
-  const LinkerPatch patches2[] = {
-      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data),
-  };
-  AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2));
-
-  Link();
-
-  uint32_t first_method_offset = GetMethodOffset(1u);
-  uint32_t last_method_offset = GetMethodOffset(5u);
-  EXPECT_EQ(2 * MB, last_method_offset - first_method_offset);
-
-  const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff;
-  const uint32_t bne_last =
-      BneWWithOffset(kLiteralOffset2, RoundUp(raw_code2.size(), kArmAlignment));
-  const std::vector<uint8_t> expected_code1 =
-      RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn});
-  const std::vector<uint8_t> expected_code2 =
-      RawCode({kNopWInsn, kNopInsn, bne_last, kLdrWInsn});
-  ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
-  ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
-}
-
-TEST_F(Thumb2RelativePatcherTest, BakerArray) {
-  uint32_t valid_regs[] = {
-      0,  1,  2,  3,      5,  6,  7,  // R4 is reserved for entrypoint address.
-      8,  9, 10, 11,                  // IP, SP, LR and PC are reserved.
-  };
-  auto ldr = [](uint32_t base_reg) {
-    uint32_t index_reg = (base_reg == 0u) ? 1u : 0u;
-    uint32_t ref_reg = (base_reg == 2) ? 3u : 2u;
-    return kLdrRegLsl2 | index_reg | (base_reg << 16) | (ref_reg << 12);
-  };
-  constexpr size_t kMethodCodeSize = 8u;
-  constexpr size_t kLiteralOffset = 0u;
-  uint32_t method_idx = 0u;
-  for (uint32_t base_reg : valid_regs) {
-    ++method_idx;
-    const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr(base_reg)});
-    ASSERT_EQ(kMethodCodeSize, raw_code.size());
-    ArrayRef<const uint8_t> code(raw_code);
-    const LinkerPatch patches[] = {
-        LinkerPatch::BakerReadBarrierBranchPatch(
-            kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)),
-    };
-    AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
-  }
-  Link();
-
-  // All thunks are at the end.
-  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
-  method_idx = 0u;
-  for (uint32_t base_reg : valid_regs) {
-    ++method_idx;
-    uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
-    const std::vector<uint8_t> expected_code = RawCode({bne, ldr(base_reg)});
-    ASSERT_EQ(kMethodCodeSize, expected_code.size());
-    EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
-
-    std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg);
-    ASSERT_GT(output_.size(), thunk_offset);
-    ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
-    ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
-                                           expected_thunk.size());
-    if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
-      DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
-      ASSERT_TRUE(false);
-    }
-
-    // Verify that the lock word for gray bit check is loaded from the correct address
-    // before the base_reg which points to the array data.
-    ASSERT_GE(output_.size() - thunk_offset,
-              4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
-    int32_t data_offset =
-        mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
-    int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset;
-    ASSERT_LT(offset, 0);
-    ASSERT_GT(offset, -256);
-    const uint32_t load_lock_word =
-        kLdrNegativeOffset |
-        (-offset & 0xffu) |
-        (base_reg << 16) |
-        (/* IP */ 12 << 12);
-    EXPECT_EQ(load_lock_word, GetOutputInsn32(thunk_offset));
-    // Verify the gray bit check.
-    DCHECK_GE(LockWord::kReadBarrierStateShift, 8u);  // ROR modified immediate.
-    uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
-    const uint32_t tst_gray_bit_without_offset =
-        0xf0100f00 | (/* IP */ 12 << 16)
-                   | (((ror_shift >> 4) & 1) << 26)   // i
-                   | (((ror_shift >> 1) & 7) << 12)   // imm3
-                   | ((ror_shift & 1) << 7);          // imm8, ROR('1':imm8<7:0>, ror_shift).
-    EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(thunk_offset + 4u));
-    EXPECT_EQ(0xd100u, GetOutputInsn16(thunk_offset + 8u) & 0xff00u);  // BNE
-    // Verify the fake dependency.
-    const uint32_t fake_dependency =
-        0xeb000010 |              // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
-        (/* IP */ 12) |           // Rm = IP
-        (base_reg << 16) |        // Rn = base_reg
-        (base_reg << 8);          // Rd = base_reg
-    EXPECT_EQ(fake_dependency, GetOutputInsn32(thunk_offset + 14u));
-    // Do not check the rest of the implementation.
-
-    // The next thunk follows on the next aligned offset.
-    thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
-  }
-}
-
-TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) {
-  uint32_t valid_regs[] = {
-      0,  1,  2,  3,      5,  6,  7,  // R4 is reserved for entrypoint address.
-      8,  9, 10, 11,                  // IP, SP, LR and PC are reserved.
-  };
-  constexpr size_t kMethodCodeSize = 8u;
-  constexpr size_t kLiteralOffset = 4u;
-  uint32_t method_idx = 0u;
-  for (uint32_t root_reg : valid_regs) {
-    ++method_idx;
-    uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12);
-    const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0});
-    ASSERT_EQ(kMethodCodeSize, raw_code.size());
-    ArrayRef<const uint8_t> code(raw_code);
-    const LinkerPatch patches[] = {
-        LinkerPatch::BakerReadBarrierBranchPatch(
-            kLiteralOffset,
-            Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ false)),
-    };
-    AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
-  }
-  Link();
-
-  // All thunks are at the end.
-  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
-  method_idx = 0u;
-  for (uint32_t root_reg : valid_regs) {
-    ++method_idx;
-    uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
-    uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12);
-    const std::vector<uint8_t> expected_code = RawCode({ldr, bne});
-    ASSERT_EQ(kMethodCodeSize, expected_code.size());
-    EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
-
-    std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ false);
-    ASSERT_GT(output_.size(), thunk_offset);
-    ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
-    ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
-                                           expected_thunk.size());
-    if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
-      DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
-      ASSERT_TRUE(false);
-    }
-
-    // Verify that the fast-path null-check uses the correct register, i.e. root_reg.
-    if (root_reg < 8) {
-      ASSERT_GE(output_.size() - thunk_offset, 2u);
-      ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
-    } else {
-      ASSERT_GE(output_.size() - thunk_offset, 6u);
-      ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
-      ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u);  // BEQ
-    }
-    // Do not check the rest of the implementation.
-
-    // The next thunk follows on the next aligned offset.
-    thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
-  }
-}
-
-TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) {
-  uint32_t valid_regs[] = {
-      0,  1,  2,  3,      5,  6,  7,  // R4 is reserved for entrypoint address.
-                                      // Not appplicable to high registers.
-  };
-  constexpr size_t kMethodCodeSize = 6u;
-  constexpr size_t kLiteralOffset = 2u;
-  uint32_t method_idx = 0u;
-  for (uint32_t root_reg : valid_regs) {
-    ++method_idx;
-    uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg;
-    const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0});
-    ASSERT_EQ(kMethodCodeSize, raw_code.size());
-    ArrayRef<const uint8_t> code(raw_code);
-    const LinkerPatch patches[] = {
-        LinkerPatch::BakerReadBarrierBranchPatch(
-            kLiteralOffset,
-            Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ true)),
-    };
-    AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
-  }
-  Link();
-
-  // All thunks are at the end.
-  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
-  method_idx = 0u;
-  for (uint32_t root_reg : valid_regs) {
-    ++method_idx;
-    uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
-    uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg;
-    const std::vector<uint8_t> expected_code = RawCode({ldr, bne});
-    ASSERT_EQ(kMethodCodeSize, expected_code.size());
-    EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
-
-    std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ true);
-    ASSERT_GT(output_.size(), thunk_offset);
-    ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
-    ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
-                                           expected_thunk.size());
-    if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
-      DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
-      ASSERT_TRUE(false);
-    }
-
-    // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
-    ASSERT_GE(output_.size() - thunk_offset, 2u);
-    ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
-    // Do not check the rest of the implementation.
-
-    // The next thunk follows on the next aligned offset.
-    thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
-  }
-}
-
-TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) {
-  // Test 1MiB of patches to the same thunk to stress-test different large offsets.
-  // (The low bits are not that important but the location of the high bits is easy to get wrong.)
-  std::vector<uint8_t> code;
-  code.reserve(1 * MB);
-  const size_t num_patches = 1 * MB / 8u;
-  std::vector<LinkerPatch> patches;
-  patches.reserve(num_patches);
-  const uint32_t ldr =
-      kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12);
-  uint32_t encoded_data =
-      Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0, /* narrow */ false);
-  for (size_t i = 0; i != num_patches; ++i) {
-    PushBackInsn(&code, ldr);
-    PushBackInsn(&code, kBneWPlus0);
-    patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data));
-  }
-  ASSERT_EQ(1 * MB, code.size());
-  ASSERT_EQ(num_patches, patches.size());
-  AddCompiledMethod(MethodRef(1u),
-                    ArrayRef<const uint8_t>(code),
-                    ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  // The thunk is right after the method code.
-  DCHECK_ALIGNED(1 * MB, kArmAlignment);
-  std::vector<uint8_t> expected_code;
-  for (size_t i = 0; i != num_patches; ++i) {
-    PushBackInsn(&expected_code, ldr);
-    PushBackInsn(&expected_code, BneWWithOffset(8u * i + 4u, 1 * MB));
-    patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data));
-  }
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) {
-  // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());`
-  // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily
-  // hold when we're reserving thunks of different sizes. This test exposes the situation
-  // by using Baker thunks and a method call thunk.
-
-  // Add a method call patch that can reach to method 1 offset + 16MiB.
-  uint32_t method_idx = 0u;
-  constexpr size_t kMethodCallLiteralOffset = 2u;
-  constexpr uint32_t kMissingMethodIdx = 2u;
-  const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0});
-  const LinkerPatch method1_patches[] = {
-      LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u),
-  };
-  ArrayRef<const uint8_t> code1(raw_code1);
-  ++method_idx;
-  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches));
-
-  // Skip kMissingMethodIdx.
-  ++method_idx;
-  ASSERT_EQ(kMissingMethodIdx, method_idx);
-  // Add a method with the right size that the method code for the next one starts 1MiB
-  // after code for method 1.
-  size_t filler_size =
-      1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
-             - sizeof(OatQuickMethodHeader);
-  std::vector<uint8_t> filler_code = GenNops(filler_size / 2u);
-  ++method_idx;
-  AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code));
-  // Add 14 methods with 1MiB code+header, making the code for the next method start 1MiB
-  // before the currently scheduled MaxNextOffset() for the method call thunk.
-  for (uint32_t i = 0; i != 14; ++i) {
-    filler_size = 1 * MB - sizeof(OatQuickMethodHeader);
-    filler_code = GenNops(filler_size / 2u);
-    ++method_idx;
-    AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code));
-  }
-
-  // Add 2 Baker GC root patches to the last method, one that would allow the thunk at
-  // 1MiB + kArmAlignment, i.e. kArmAlignment after the method call thunk, and the
-  // second that needs it kArmAlignment after that. Given the size of the GC root thunk
-  // is more than the space required by the method call thunk plus kArmAlignment,
-  // this pushes the first GC root thunk's pending MaxNextOffset() before the method call
-  // thunk's pending MaxNextOffset() which needs to be adjusted.
-  ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment,
-            CompileBakerGcRootThunk(/* root_reg */ 0, /* narrow */ false).size());
-  static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8");
-  constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment;
-  constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment;
-  // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | (root_reg << 12)`.
-  const uint32_t ldr1 = kLdrWInsn | (/* root_reg */ 1 << 12);
-  const uint32_t ldr2 = kLdrWInsn | (/* root_reg */ 2 << 12);
-  const std::vector<uint8_t> last_method_raw_code = RawCode({
-      kNopInsn,                                 // Padding before first GC root read barrier.
-      ldr1, kBneWPlus0,                         // First GC root LDR with read barrier.
-      ldr2, kBneWPlus0,                         // Second GC root LDR with read barrier.
-  });
-  uint32_t encoded_data1 =
-      Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1, /* narrow */ false);
-  uint32_t encoded_data2 =
-      Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2, /* narrow */ false);
-  const LinkerPatch last_method_patches[] = {
-      LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1),
-      LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2),
-  };
-  ++method_idx;
-  AddCompiledMethod(MethodRef(method_idx),
-                    ArrayRef<const uint8_t>(last_method_raw_code),
-                    ArrayRef<const LinkerPatch>(last_method_patches));
-
-  // The main purpose of the test is to check that Link() does not cause a crash.
-  Link();
-
-  ASSERT_EQ(15 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u));
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
deleted file mode 100644
index 52a07965b92..00000000000
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ /dev/null
@@ -1,683 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/arm64/relative_patcher_arm64.h"
-
-#include "arch/arm64/asm_support_arm64.h"
-#include "arch/arm64/instruction_set_features_arm64.h"
-#include "art_method.h"
-#include "base/bit_utils.h"
-#include "compiled_method-inl.h"
-#include "driver/compiler_driver.h"
-#include "entrypoints/quick/quick_entrypoints_enum.h"
-#include "heap_poisoning.h"
-#include "linker/linker_patch.h"
-#include "linker/output_stream.h"
-#include "lock_word.h"
-#include "mirror/array-inl.h"
-#include "mirror/object.h"
-#include "oat.h"
-#include "oat_quick_method_header.h"
-#include "read_barrier.h"
-#include "utils/arm64/assembler_arm64.h"
-
-namespace art {
-namespace linker {
-
-namespace {
-
-// Maximum positive and negative displacement for method call measured from the patch location.
-// (Signed 28 bit displacement with the last two bits 0 has range [-2^27, 2^27-4] measured from
-// the ARM64 PC pointing to the BL.)
-constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 27) - 4u;
-constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 27);
-
-// Maximum positive and negative displacement for a conditional branch measured from the patch
-// location. (Signed 21 bit displacement with the last two bits 0 has range [-2^20, 2^20-4]
-// measured from the ARM64 PC pointing to the B.cond.)
-constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 4u;
-constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20);
-
-// The ADRP thunk for erratum 843419 is 2 instructions, i.e. 8 bytes.
-constexpr uint32_t kAdrpThunkSize = 8u;
-
-inline bool IsAdrpPatch(const LinkerPatch& patch) {
-  switch (patch.GetType()) {
-    case LinkerPatch::Type::kCall:
-    case LinkerPatch::Type::kCallRelative:
-    case LinkerPatch::Type::kBakerReadBarrierBranch:
-      return false;
-    case LinkerPatch::Type::kMethodRelative:
-    case LinkerPatch::Type::kMethodBssEntry:
-    case LinkerPatch::Type::kTypeRelative:
-    case LinkerPatch::Type::kTypeClassTable:
-    case LinkerPatch::Type::kTypeBssEntry:
-    case LinkerPatch::Type::kStringRelative:
-    case LinkerPatch::Type::kStringInternTable:
-    case LinkerPatch::Type::kStringBssEntry:
-      return patch.LiteralOffset() == patch.PcInsnOffset();
-  }
-}
-
-inline uint32_t MaxExtraSpace(size_t num_adrp, size_t code_size) {
-  if (num_adrp == 0u) {
-    return 0u;
-  }
-  uint32_t alignment_bytes =
-      CompiledMethod::AlignCode(code_size, InstructionSet::kArm64) - code_size;
-  return kAdrpThunkSize * num_adrp + alignment_bytes;
-}
-
-}  // anonymous namespace
-
-Arm64RelativePatcher::Arm64RelativePatcher(RelativePatcherTargetProvider* provider,
-                                           const Arm64InstructionSetFeatures* features)
-    : ArmBaseRelativePatcher(provider, InstructionSet::kArm64),
-      fix_cortex_a53_843419_(features->NeedFixCortexA53_843419()),
-      reserved_adrp_thunks_(0u),
-      processed_adrp_thunks_(0u) {
-  if (fix_cortex_a53_843419_) {
-    adrp_thunk_locations_.reserve(16u);
-    current_method_thunks_.reserve(16u * kAdrpThunkSize);
-  }
-}
-
-uint32_t Arm64RelativePatcher::ReserveSpace(uint32_t offset,
-                                            const CompiledMethod* compiled_method,
-                                            MethodReference method_ref) {
-  if (!fix_cortex_a53_843419_) {
-    DCHECK(adrp_thunk_locations_.empty());
-    return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u);
-  }
-
-  // Add thunks for previous method if any.
-  if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) {
-    size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_;
-    offset = CompiledMethod::AlignCode(offset, InstructionSet::kArm64) +
-             kAdrpThunkSize * num_adrp_thunks;
-    reserved_adrp_thunks_ = adrp_thunk_locations_.size();
-  }
-
-  // Count the number of ADRP insns as the upper bound on the number of thunks needed
-  // and use it to reserve space for other linker patches.
-  size_t num_adrp = 0u;
-  DCHECK(compiled_method != nullptr);
-  for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-    if (IsAdrpPatch(patch)) {
-      ++num_adrp;
-    }
-  }
-  ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
-  uint32_t max_extra_space = MaxExtraSpace(num_adrp, code.size());
-  offset = ReserveSpaceInternal(offset, compiled_method, method_ref, max_extra_space);
-  if (num_adrp == 0u) {
-    return offset;
-  }
-
-  // Now that we have the actual offset where the code will be placed, locate the ADRP insns
-  // that actually require the thunk.
-  uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader));
-  uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size());
-  DCHECK(compiled_method != nullptr);
-  for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-    if (IsAdrpPatch(patch)) {
-      uint32_t patch_offset = quick_code_offset + patch.LiteralOffset();
-      if (NeedsErratum843419Thunk(code, patch.LiteralOffset(), patch_offset)) {
-        adrp_thunk_locations_.emplace_back(patch_offset, thunk_offset);
-        thunk_offset += kAdrpThunkSize;
-      }
-    }
-  }
-  return offset;
-}
-
-uint32_t Arm64RelativePatcher::ReserveSpaceEnd(uint32_t offset) {
-  if (!fix_cortex_a53_843419_) {
-    DCHECK(adrp_thunk_locations_.empty());
-  } else {
-    // Add thunks for the last method if any.
-    if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) {
-      size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_;
-      offset = CompiledMethod::AlignCode(offset, InstructionSet::kArm64) +
-               kAdrpThunkSize * num_adrp_thunks;
-      reserved_adrp_thunks_ = adrp_thunk_locations_.size();
-    }
-  }
-  return ArmBaseRelativePatcher::ReserveSpaceEnd(offset);
-}
-
-uint32_t Arm64RelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) {
-  if (fix_cortex_a53_843419_) {
-    if (!current_method_thunks_.empty()) {
-      uint32_t aligned_offset = CompiledMethod::AlignCode(offset, InstructionSet::kArm64);
-      if (kIsDebugBuild) {
-        CHECK_ALIGNED(current_method_thunks_.size(), kAdrpThunkSize);
-        size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize;
-        CHECK_LE(num_thunks, processed_adrp_thunks_);
-        for (size_t i = 0u; i != num_thunks; ++i) {
-          const auto& entry = adrp_thunk_locations_[processed_adrp_thunks_ - num_thunks + i];
-          CHECK_EQ(entry.second, aligned_offset + i * kAdrpThunkSize);
-        }
-      }
-      uint32_t aligned_code_delta = aligned_offset - offset;
-      if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) {
-        return 0u;
-      }
-      if (!WriteMiscThunk(out, ArrayRef<const uint8_t>(current_method_thunks_))) {
-        return 0u;
-      }
-      offset = aligned_offset + current_method_thunks_.size();
-      current_method_thunks_.clear();
-    }
-  }
-  return ArmBaseRelativePatcher::WriteThunks(out, offset);
-}
-
-void Arm64RelativePatcher::PatchCall(std::vector<uint8_t>* code,
-                                     uint32_t literal_offset,
-                                     uint32_t patch_offset, uint32_t
-                                     target_offset) {
-  DCHECK_LE(literal_offset + 4u, code->size());
-  DCHECK_EQ(literal_offset & 3u, 0u);
-  DCHECK_EQ(patch_offset & 3u, 0u);
-  DCHECK_EQ(target_offset & 3u, 0u);
-  uint32_t displacement = CalculateMethodCallDisplacement(patch_offset, target_offset & ~1u);
-  DCHECK_EQ(displacement & 3u, 0u);
-  DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u);  // 28-bit signed.
-  uint32_t insn = (displacement & 0x0fffffffu) >> 2;
-  insn |= 0x94000000;  // BL
-
-  // Check that we're just overwriting an existing BL.
-  DCHECK_EQ(GetInsn(code, literal_offset) & 0xfc000000u, 0x94000000u);
-  // Write the new BL.
-  SetInsn(code, literal_offset, insn);
-}
-
-void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
-                                                    const LinkerPatch& patch,
-                                                    uint32_t patch_offset,
-                                                    uint32_t target_offset) {
-  DCHECK_EQ(patch_offset & 3u, 0u);
-  DCHECK_EQ(target_offset & 3u, 0u);
-  uint32_t literal_offset = patch.LiteralOffset();
-  uint32_t insn = GetInsn(code, literal_offset);
-  uint32_t pc_insn_offset = patch.PcInsnOffset();
-  uint32_t disp = target_offset - ((patch_offset - literal_offset + pc_insn_offset) & ~0xfffu);
-  bool wide = (insn & 0x40000000) != 0;
-  uint32_t shift = wide ? 3u : 2u;
-  if (literal_offset == pc_insn_offset) {
-    // Check it's an ADRP with imm == 0 (unset).
-    DCHECK_EQ((insn & 0xffffffe0u), 0x90000000u)
-        << literal_offset << ", " << pc_insn_offset << ", 0x" << std::hex << insn;
-    if (fix_cortex_a53_843419_ && processed_adrp_thunks_ != adrp_thunk_locations_.size() &&
-        adrp_thunk_locations_[processed_adrp_thunks_].first == patch_offset) {
-      DCHECK(NeedsErratum843419Thunk(ArrayRef<const uint8_t>(*code),
-                                     literal_offset, patch_offset));
-      uint32_t thunk_offset = adrp_thunk_locations_[processed_adrp_thunks_].second;
-      uint32_t adrp_disp = target_offset - (thunk_offset & ~0xfffu);
-      uint32_t adrp = PatchAdrp(insn, adrp_disp);
-
-      uint32_t out_disp = thunk_offset - patch_offset;
-      DCHECK_EQ(out_disp & 3u, 0u);
-      DCHECK((out_disp >> 27) == 0u || (out_disp >> 27) == 31u);  // 28-bit signed.
-      insn = (out_disp & 0x0fffffffu) >> shift;
-      insn |= 0x14000000;  // B <thunk>
-
-      uint32_t back_disp = -out_disp;
-      DCHECK_EQ(back_disp & 3u, 0u);
-      DCHECK((back_disp >> 27) == 0u || (back_disp >> 27) == 31u);  // 28-bit signed.
-      uint32_t b_back = (back_disp & 0x0fffffffu) >> 2;
-      b_back |= 0x14000000;  // B <back>
-      size_t thunks_code_offset = current_method_thunks_.size();
-      current_method_thunks_.resize(thunks_code_offset + kAdrpThunkSize);
-      SetInsn(&current_method_thunks_, thunks_code_offset, adrp);
-      SetInsn(&current_method_thunks_, thunks_code_offset + 4u, b_back);
-      static_assert(kAdrpThunkSize == 2 * 4u, "thunk has 2 instructions");
-
-      processed_adrp_thunks_ += 1u;
-    } else {
-      insn = PatchAdrp(insn, disp);
-    }
-    // Write the new ADRP (or B to the erratum 843419 thunk).
-    SetInsn(code, literal_offset, insn);
-  } else {
-    if ((insn & 0xfffffc00) == 0x91000000) {
-      // ADD immediate, 64-bit with imm12 == 0 (unset).
-      if (!kEmitCompilerReadBarrier) {
-        DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative ||
-               patch.GetType() == LinkerPatch::Type::kTypeRelative ||
-               patch.GetType() == LinkerPatch::Type::kStringRelative) << patch.GetType();
-      } else {
-        // With the read barrier (non-Baker) enabled, it could be kStringBssEntry or kTypeBssEntry.
-        DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative ||
-               patch.GetType() == LinkerPatch::Type::kTypeRelative ||
-               patch.GetType() == LinkerPatch::Type::kStringRelative ||
-               patch.GetType() == LinkerPatch::Type::kTypeBssEntry ||
-               patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType();
-      }
-      shift = 0u;  // No shift for ADD.
-    } else {
-      // LDR/STR 32-bit or 64-bit with imm12 == 0 (unset).
-      DCHECK(patch.GetType() == LinkerPatch::Type::kMethodBssEntry ||
-             patch.GetType() == LinkerPatch::Type::kTypeClassTable ||
-             patch.GetType() == LinkerPatch::Type::kTypeBssEntry ||
-             patch.GetType() == LinkerPatch::Type::kStringInternTable ||
-             patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType();
-      DCHECK_EQ(insn & 0xbfbffc00, 0xb9000000) << std::hex << insn;
-    }
-    if (kIsDebugBuild) {
-      uint32_t adrp = GetInsn(code, pc_insn_offset);
-      if ((adrp & 0x9f000000u) != 0x90000000u) {
-        CHECK(fix_cortex_a53_843419_);
-        CHECK_EQ(adrp & 0xfc000000u, 0x14000000u);  // B <thunk>
-        CHECK_ALIGNED(current_method_thunks_.size(), kAdrpThunkSize);
-        size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize;
-        CHECK_LE(num_thunks, processed_adrp_thunks_);
-        uint32_t b_offset = patch_offset - literal_offset + pc_insn_offset;
-        for (size_t i = processed_adrp_thunks_ - num_thunks; ; ++i) {
-          CHECK_NE(i, processed_adrp_thunks_);
-          if (adrp_thunk_locations_[i].first == b_offset) {
-            size_t idx = num_thunks - (processed_adrp_thunks_ - i);
-            adrp = GetInsn(&current_method_thunks_, idx * kAdrpThunkSize);
-            break;
-          }
-        }
-      }
-      CHECK_EQ(adrp & 0x9f00001fu,                    // Check that pc_insn_offset points
-               0x90000000 | ((insn >> 5) & 0x1fu));   // to ADRP with matching register.
-    }
-    uint32_t imm12 = (disp & 0xfffu) >> shift;
-    insn = (insn & ~(0xfffu << 10)) | (imm12 << 10);
-    SetInsn(code, literal_offset, insn);
-  }
-}
-
-void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
-                                                       const LinkerPatch& patch,
-                                                       uint32_t patch_offset) {
-  DCHECK_ALIGNED(patch_offset, 4u);
-  uint32_t literal_offset = patch.LiteralOffset();
-  DCHECK_ALIGNED(literal_offset, 4u);
-  DCHECK_LT(literal_offset, code->size());
-  uint32_t insn = GetInsn(code, literal_offset);
-  DCHECK_EQ(insn & 0xffffffe0u, 0xb5000000);  // CBNZ Xt, +0 (unpatched)
-  ThunkKey key = GetBakerThunkKey(patch);
-  if (kIsDebugBuild) {
-    const uint32_t encoded_data = key.GetCustomValue1();
-    BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
-    // Check that the next instruction matches the expected LDR.
-    switch (kind) {
-      case BakerReadBarrierKind::kField: {
-        DCHECK_GE(code->size() - literal_offset, 8u);
-        uint32_t next_insn = GetInsn(code, literal_offset + 4u);
-        // LDR (immediate) with correct base_reg.
-        CheckValidReg(next_insn & 0x1fu);  // Check destination register.
-        const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
-        CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
-        break;
-      }
-      case BakerReadBarrierKind::kArray: {
-        DCHECK_GE(code->size() - literal_offset, 8u);
-        uint32_t next_insn = GetInsn(code, literal_offset + 4u);
-        // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
-        // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
-        CheckValidReg(next_insn & 0x1fu);  // Check destination register.
-        const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
-        CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
-        CheckValidReg((next_insn >> 16) & 0x1f);  // Check index register
-        break;
-      }
-      case BakerReadBarrierKind::kGcRoot: {
-        DCHECK_GE(literal_offset, 4u);
-        uint32_t prev_insn = GetInsn(code, literal_offset - 4u);
-        // LDR (immediate) with correct root_reg.
-        const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
-        CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg);
-        break;
-      }
-      default:
-        LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
-        UNREACHABLE();
-    }
-  }
-  uint32_t target_offset = GetThunkTargetOffset(key, patch_offset);
-  DCHECK_ALIGNED(target_offset, 4u);
-  uint32_t disp = target_offset - patch_offset;
-  DCHECK((disp >> 20) == 0u || (disp >> 20) == 4095u);  // 21-bit signed.
-  insn |= (disp << (5 - 2)) & 0x00ffffe0u;              // Shift bits 2-20 to 5-23.
-  SetInsn(code, literal_offset, insn);
-}
-
-#define __ assembler.GetVIXLAssembler()->
-
-static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
-                                     vixl::aarch64::Register base_reg,
-                                     vixl::aarch64::MemOperand& lock_word,
-                                     vixl::aarch64::Label* slow_path) {
-  using namespace vixl::aarch64;  // NOLINT(build/namespaces)
-  // Load the lock word containing the rb_state.
-  __ Ldr(ip0.W(), lock_word);
-  // Given the numeric representation, it's enough to check the low bit of the rb_state.
-  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
-  __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
-  static_assert(
-      BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
-      "Field and array LDR offsets must be the same to reuse the same code.");
-  // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
-  static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
-                "Field LDR must be 1 instruction (4B) before the return address label; "
-                " 2 instructions (8B) for heap poisoning.");
-  __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
-  // Introduce a dependency on the lock_word including rb_state,
-  // to prevent load-load reordering, and without using
-  // a memory barrier (which would be more expensive).
-  __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
-  __ Br(lr);          // And return back to the function.
-  // Note: The fake dependency is unnecessary for the slow path.
-}
-
-// Load the read barrier introspection entrypoint in register `entrypoint`.
-static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler,
-                                                       vixl::aarch64::Register entrypoint) {
-  using vixl::aarch64::MemOperand;
-  using vixl::aarch64::ip0;
-  // Thread Register.
-  const vixl::aarch64::Register tr = vixl::aarch64::x19;
-
-  // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
-  DCHECK_EQ(ip0.GetCode(), 16u);
-  const int32_t entry_point_offset =
-      Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
-  __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
-}
-
-void Arm64RelativePatcher::CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler,
-                                                        uint32_t encoded_data) {
-  using namespace vixl::aarch64;  // NOLINT(build/namespaces)
-  BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
-  switch (kind) {
-    case BakerReadBarrierKind::kField: {
-      // Check if the holder is gray and, if not, add fake dependency to the base register
-      // and return to the LDR instruction to load the reference. Otherwise, use introspection
-      // to load the reference and call the entrypoint (in IP1) that performs further checks
-      // on the reference and marks it if needed.
-      auto base_reg =
-          Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
-      CheckValidReg(base_reg.GetCode());
-      auto holder_reg =
-          Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data));
-      CheckValidReg(holder_reg.GetCode());
-      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
-      temps.Exclude(ip0, ip1);
-      // If base_reg differs from holder_reg, the offset was too large and we must have
-      // emitted an explicit null check before the load. Otherwise, we need to null-check
-      // the holder as we do not necessarily do that check before going to the thunk.
-      vixl::aarch64::Label throw_npe;
-      if (holder_reg.Is(base_reg)) {
-        __ Cbz(holder_reg.W(), &throw_npe);
-      }
-      vixl::aarch64::Label slow_path;
-      MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
-      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
-      __ Bind(&slow_path);
-      MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
-      __ Ldr(ip0.W(), ldr_address);         // Load the LDR (immediate) unsigned offset.
-      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
-      __ Ubfx(ip0.W(), ip0.W(), 10, 12);    // Extract the offset.
-      __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2));   // Load the reference.
-      // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
-      __ Br(ip1);                           // Jump to the entrypoint.
-      if (holder_reg.Is(base_reg)) {
-        // Add null check slow path. The stack map is at the address pointed to by LR.
-        __ Bind(&throw_npe);
-        int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value();
-        __ Ldr(ip0, MemOperand(/* Thread* */ vixl::aarch64::x19, offset));
-        __ Br(ip0);
-      }
-      break;
-    }
-    case BakerReadBarrierKind::kArray: {
-      auto base_reg =
-          Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
-      CheckValidReg(base_reg.GetCode());
-      DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
-      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
-      temps.Exclude(ip0, ip1);
-      vixl::aarch64::Label slow_path;
-      int32_t data_offset =
-          mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
-      MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
-      DCHECK_LT(lock_word.GetOffset(), 0);
-      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
-      __ Bind(&slow_path);
-      MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
-      __ Ldr(ip0.W(), ldr_address);         // Load the LDR (register) unsigned offset.
-      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
-      __ Ubfx(ip0, ip0, 16, 6);             // Extract the index register, plus 32 (bit 21 is set).
-      __ Bfi(ip1, ip0, 3, 6);               // Insert ip0 to the entrypoint address to create
-                                            // a switch case target based on the index register.
-      __ Mov(ip0, base_reg);                // Move the base register to ip0.
-      __ Br(ip1);                           // Jump to the entrypoint's array switch case.
-      break;
-    }
-    case BakerReadBarrierKind::kGcRoot: {
-      // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
-      // and it does not have a forwarding address), call the correct introspection entrypoint;
-      // otherwise return the reference (or the extracted forwarding address).
-      // There is no gray bit check for GC roots.
-      auto root_reg =
-          Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
-      CheckValidReg(root_reg.GetCode());
-      DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
-      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
-      temps.Exclude(ip0, ip1);
-      vixl::aarch64::Label return_label, not_marked, forwarding_address;
-      __ Cbz(root_reg, &return_label);
-      MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value());
-      __ Ldr(ip0.W(), lock_word);
-      __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, &not_marked);
-      __ Bind(&return_label);
-      __ Br(lr);
-      __ Bind(&not_marked);
-      __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
-      __ B(&forwarding_address, mi);
-      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
-      // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
-      // art_quick_read_barrier_mark_introspection_gc_roots.
-      __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
-      __ Mov(ip0.W(), root_reg);
-      __ Br(ip1);
-      __ Bind(&forwarding_address);
-      __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift);
-      __ Br(lr);
-      break;
-    }
-    default:
-      LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
-      UNREACHABLE();
-  }
-}
-
-std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-  arm64::Arm64Assembler assembler(&allocator);
-
-  switch (key.GetType()) {
-    case ThunkType::kMethodCall: {
-      // The thunk just uses the entry point in the ArtMethod. This works even for calls
-      // to the generic JNI and interpreter trampolines.
-      Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-          kArm64PointerSize).Int32Value());
-      assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
-      break;
-    }
-    case ThunkType::kBakerReadBarrier: {
-      CompileBakerReadBarrierThunk(assembler, key.GetCustomValue1());
-      break;
-    }
-  }
-
-  // Ensure we emit the literal pool.
-  assembler.FinalizeCode();
-  std::vector<uint8_t> thunk_code(assembler.CodeSize());
-  MemoryRegion code(thunk_code.data(), thunk_code.size());
-  assembler.FinalizeInstructions(code);
-  return thunk_code;
-}
-
-std::string Arm64RelativePatcher::GetThunkDebugName(const ThunkKey& key) {
-  switch (key.GetType()) {
-    case ThunkType::kMethodCall:
-      return "MethodCallThunk";
-
-    case ThunkType::kBakerReadBarrier: {
-      uint32_t encoded_data = key.GetCustomValue1();
-      BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
-      std::ostringstream oss;
-      oss << "BakerReadBarrierThunk";
-      switch (kind) {
-        case BakerReadBarrierKind::kField:
-          oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
-              << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
-          break;
-        case BakerReadBarrierKind::kArray:
-          oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
-          DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
-          break;
-        case BakerReadBarrierKind::kGcRoot:
-          oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
-          DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
-          break;
-      }
-      return oss.str();
-    }
-  }
-}
-
-#undef __
-
-uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) {
-  switch (key.GetType()) {
-    case ThunkType::kMethodCall:
-      return kMaxMethodCallPositiveDisplacement;
-    case ThunkType::kBakerReadBarrier:
-      return kMaxBcondPositiveDisplacement;
-  }
-}
-
-uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) {
-  switch (key.GetType()) {
-    case ThunkType::kMethodCall:
-      return kMaxMethodCallNegativeDisplacement;
-    case ThunkType::kBakerReadBarrier:
-      return kMaxBcondNegativeDisplacement;
-  }
-}
-
-uint32_t Arm64RelativePatcher::PatchAdrp(uint32_t adrp, uint32_t disp) {
-  return (adrp & 0x9f00001fu) |  // Clear offset bits, keep ADRP with destination reg.
-      // Bottom 12 bits are ignored, the next 2 lowest bits are encoded in bits 29-30.
-      ((disp & 0x00003000u) << (29 - 12)) |
-      // The next 16 bits are encoded in bits 5-22.
-      ((disp & 0xffffc000u) >> (12 + 2 - 5)) |
-      // Since the target_offset is based on the beginning of the oat file and the
-      // image space precedes the oat file, the target_offset into image space will
-      // be negative yet passed as uint32_t. Therefore we limit the displacement
-      // to +-2GiB (rather than the maximim +-4GiB) and determine the sign bit from
-      // the highest bit of the displacement. This is encoded in bit 23.
-      ((disp & 0x80000000u) >> (31 - 23));
-}
-
-bool Arm64RelativePatcher::NeedsErratum843419Thunk(ArrayRef<const uint8_t> code,
-                                                   uint32_t literal_offset,
-                                                   uint32_t patch_offset) {
-  DCHECK_EQ(patch_offset & 0x3u, 0u);
-  if ((patch_offset & 0xff8) == 0xff8) {  // ...ff8 or ...ffc
-    uint32_t adrp = GetInsn(code, literal_offset);
-    DCHECK_EQ(adrp & 0x9f000000, 0x90000000);
-    uint32_t next_offset = patch_offset + 4u;
-    uint32_t next_insn = GetInsn(code, literal_offset + 4u);
-
-    // Below we avoid patching sequences where the adrp is followed by a load which can easily
-    // be proved to be aligned.
-
-    // First check if the next insn is the LDR using the result of the ADRP.
-    // LDR <Wt>, [<Xn>, #pimm], where <Xn> == ADRP destination reg.
-    if ((next_insn & 0xffc00000) == 0xb9400000 &&
-        (((next_insn >> 5) ^ adrp) & 0x1f) == 0) {
-      return false;
-    }
-
-    // And since LinkerPatch::Type::k{Method,Type,String}Relative is using the result
-    // of the ADRP for an ADD immediate, check for that as well. We generalize a bit
-    // to include ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination
-    // or stores the result to a different register.
-    if ((next_insn & 0x1f000000) == 0x11000000 &&
-        ((((next_insn >> 5) ^ adrp) & 0x1f) == 0 || ((next_insn ^ adrp) & 0x1f) != 0)) {
-      return false;
-    }
-
-    // LDR <Wt>, <label> is always aligned and thus it doesn't cause boundary crossing.
-    if ((next_insn & 0xff000000) == 0x18000000) {
-      return false;
-    }
-
-    // LDR <Xt>, <label> is aligned iff the pc + displacement is a multiple of 8.
-    if ((next_insn & 0xff000000) == 0x58000000) {
-      bool is_aligned_load = (((next_offset >> 2) ^ (next_insn >> 5)) & 1) == 0;
-      return !is_aligned_load;
-    }
-
-    // LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned loads, as SP is
-    // guaranteed to be 128-bits aligned and <pimm> is multiple of the load size.
-    if ((next_insn & 0xbfc003e0) == 0xb94003e0) {
-      return false;
-    }
-    return true;
-  }
-  return false;
-}
-
-void Arm64RelativePatcher::SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
-  DCHECK_LE(offset + 4u, code->size());
-  DCHECK_EQ(offset & 3u, 0u);
-  uint8_t* addr = &(*code)[offset];
-  addr[0] = (value >> 0) & 0xff;
-  addr[1] = (value >> 8) & 0xff;
-  addr[2] = (value >> 16) & 0xff;
-  addr[3] = (value >> 24) & 0xff;
-}
-
-uint32_t Arm64RelativePatcher::GetInsn(ArrayRef<const uint8_t> code, uint32_t offset) {
-  DCHECK_LE(offset + 4u, code.size());
-  DCHECK_EQ(offset & 3u, 0u);
-  const uint8_t* addr = &code[offset];
-  return
-      (static_cast<uint32_t>(addr[0]) << 0) +
-      (static_cast<uint32_t>(addr[1]) << 8) +
-      (static_cast<uint32_t>(addr[2]) << 16)+
-      (static_cast<uint32_t>(addr[3]) << 24);
-}
-
-template <typename Alloc>
-uint32_t Arm64RelativePatcher::GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset) {
-  return GetInsn(ArrayRef<const uint8_t>(*code), offset);
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
deleted file mode 100644
index 8ba59976e7d..00000000000
--- a/compiler/linker/arm64/relative_patcher_arm64.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_
-#define ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_
-
-#include "base/array_ref.h"
-#include "base/bit_field.h"
-#include "base/bit_utils.h"
-#include "linker/arm/relative_patcher_arm_base.h"
-
-namespace art {
-
-namespace arm64 {
-class Arm64Assembler;
-}  // namespace arm64
-
-namespace linker {
-
-class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher {
- public:
-  static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
-    CheckValidReg(base_reg);
-    CheckValidReg(holder_reg);
-    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
-           BakerReadBarrierFirstRegField::Encode(base_reg) |
-           BakerReadBarrierSecondRegField::Encode(holder_reg);
-  }
-
-  static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
-    CheckValidReg(base_reg);
-    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
-           BakerReadBarrierFirstRegField::Encode(base_reg) |
-           BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
-  }
-
-  static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
-    CheckValidReg(root_reg);
-    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
-           BakerReadBarrierFirstRegField::Encode(root_reg) |
-           BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
-  }
-
-  Arm64RelativePatcher(RelativePatcherTargetProvider* provider,
-                       const Arm64InstructionSetFeatures* features);
-
-  uint32_t ReserveSpace(uint32_t offset,
-                        const CompiledMethod* compiled_method,
-                        MethodReference method_ref) OVERRIDE;
-  uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
-  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
-  void PatchCall(std::vector<uint8_t>* code,
-                 uint32_t literal_offset,
-                 uint32_t patch_offset,
-                 uint32_t target_offset) OVERRIDE;
-  void PatchPcRelativeReference(std::vector<uint8_t>* code,
-                                const LinkerPatch& patch,
-                                uint32_t patch_offset,
-                                uint32_t target_offset) OVERRIDE;
-  void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
-                                   const LinkerPatch& patch,
-                                   uint32_t patch_offset) OVERRIDE;
-
- protected:
-  std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE;
-  std::string GetThunkDebugName(const ThunkKey& key) OVERRIDE;
-  uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE;
-  uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE;
-
- private:
-  static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u;
-
-  enum class BakerReadBarrierKind : uint8_t {
-    kField,   // Field get or array get with constant offset (i.e. constant index).
-    kArray,   // Array get with index in register.
-    kGcRoot,  // GC root load.
-    kLast = kGcRoot
-  };
-
-  static constexpr size_t kBitsForBakerReadBarrierKind =
-      MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
-  static constexpr size_t kBitsForRegister = 5u;
-  using BakerReadBarrierKindField =
-      BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
-  using BakerReadBarrierFirstRegField =
-      BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>;
-  using BakerReadBarrierSecondRegField =
-      BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>;
-
-  static void CheckValidReg(uint32_t reg) {
-    DCHECK(reg < 30u && reg != 16u && reg != 17u) << reg;
-  }
-
-  void CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data);
-
-  static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp);
-
-  static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset,
-                                      uint32_t patch_offset);
-  void SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
-  static uint32_t GetInsn(ArrayRef<const uint8_t> code, uint32_t offset);
-
-  template <typename Alloc>
-  static uint32_t GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset);
-
-  const bool fix_cortex_a53_843419_;
-  // Map original patch_offset to thunk offset.
-  std::vector<std::pair<uint32_t, uint32_t>> adrp_thunk_locations_;
-  size_t reserved_adrp_thunks_;
-  size_t processed_adrp_thunks_;
-  std::vector<uint8_t> current_method_thunks_;
-
-  friend class Arm64RelativePatcherTest;
-
-  DISALLOW_COPY_AND_ASSIGN(Arm64RelativePatcher);
-};
-
-}  // namespace linker
-}  // namespace art
-
-#endif  // ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
deleted file mode 100644
index 05459a2a823..00000000000
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ /dev/null
@@ -1,1364 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/arm64/relative_patcher_arm64.h"
-
-#include "base/casts.h"
-#include "linker/relative_patcher_test.h"
-#include "lock_word.h"
-#include "mirror/array-inl.h"
-#include "mirror/object.h"
-#include "oat_quick_method_header.h"
-
-namespace art {
-namespace linker {
-
-class Arm64RelativePatcherTest : public RelativePatcherTest {
- public:
-  explicit Arm64RelativePatcherTest(const std::string& variant)
-      : RelativePatcherTest(InstructionSet::kArm64, variant) { }
-
- protected:
-  static const uint8_t kCallRawCode[];
-  static const ArrayRef<const uint8_t> kCallCode;
-  static const uint8_t kNopRawCode[];
-  static const ArrayRef<const uint8_t> kNopCode;
-
-  // NOP instruction.
-  static constexpr uint32_t kNopInsn = 0xd503201f;
-
-  // All branches can be created from kBlPlus0 or kBPlus0 by adding the low 26 bits.
-  static constexpr uint32_t kBlPlus0 = 0x94000000u;
-  static constexpr uint32_t kBPlus0 = 0x14000000u;
-
-  // Special BL values.
-  static constexpr uint32_t kBlPlusMax = 0x95ffffffu;
-  static constexpr uint32_t kBlMinusMax = 0x96000000u;
-
-  // LDR immediate, 32-bit, unsigned offset.
-  static constexpr uint32_t kLdrWInsn = 0xb9400000u;
-
-  // LDR register, 32-bit, LSL #2.
-  static constexpr uint32_t kLdrWLsl2Insn = 0xb8607800u;
-
-  // LDUR, 32-bit.
-  static constexpr uint32_t kLdurWInsn = 0xb8400000u;
-
-  // ADD/ADDS/SUB/SUBS immediate, 64-bit.
-  static constexpr uint32_t kAddXInsn = 0x91000000u;
-  static constexpr uint32_t kAddsXInsn = 0xb1000000u;
-  static constexpr uint32_t kSubXInsn = 0xd1000000u;
-  static constexpr uint32_t kSubsXInsn = 0xf1000000u;
-
-  // LDUR x2, [sp, #4], i.e. unaligned load crossing 64-bit boundary (assuming aligned sp).
-  static constexpr uint32_t kLdurInsn = 0xf840405fu;
-
-  // LDR w12, <label> and LDR x12, <label>. Bits 5-23 contain label displacement in 4-byte units.
-  static constexpr uint32_t kLdrWPcRelInsn = 0x1800000cu;
-  static constexpr uint32_t kLdrXPcRelInsn = 0x5800000cu;
-
-  // LDR w13, [SP, #<pimm>] and LDR x13, [SP, #<pimm>]. Bits 10-21 contain displacement from SP
-  // in units of 4-bytes (for 32-bit load) or 8-bytes (for 64-bit load).
-  static constexpr uint32_t kLdrWSpRelInsn = 0xb94003edu;
-  static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu;
-
-  // CBNZ x17, +0. Bits 5-23 are a placeholder for target offset from PC in units of 4-bytes.
-  static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011u;
-
-  void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
-    CHECK_LE(pos, code->size());
-    const uint8_t insn_code[] = {
-        static_cast<uint8_t>(insn),
-        static_cast<uint8_t>(insn >> 8),
-        static_cast<uint8_t>(insn >> 16),
-        static_cast<uint8_t>(insn >> 24),
-    };
-    static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code).");
-    code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
-  }
-
-  void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) {
-    InsertInsn(code, code->size(), insn);
-  }
-
-  std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) {
-    std::vector<uint8_t> raw_code;
-    raw_code.reserve(insns.size() * 4u);
-    for (uint32_t insn : insns) {
-      PushBackInsn(&raw_code, insn);
-    }
-    return raw_code;
-  }
-
-  uint32_t Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code,
-                                 const ArrayRef<const LinkerPatch>& method1_patches,
-                                 const ArrayRef<const uint8_t>& last_method_code,
-                                 const ArrayRef<const LinkerPatch>& last_method_patches,
-                                 uint32_t distance_without_thunks) {
-    CHECK_EQ(distance_without_thunks % kArm64Alignment, 0u);
-    uint32_t method1_offset =
-        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
-    AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
-    const uint32_t gap_start = method1_offset + method1_code.size();
-
-    // We want to put the method3 at a very precise offset.
-    const uint32_t last_method_offset = method1_offset + distance_without_thunks;
-    CHECK_ALIGNED(last_method_offset, kArm64Alignment);
-    const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader);
-
-    // Fill the gap with intermediate methods in chunks of 2MiB and the first in [2MiB, 4MiB).
-    // (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB
-    // offsets by this test. Making the first chunk bigger makes it easy to give all intermediate
-    // methods the same alignment of the end, so the thunk insertion adds a predictable size as
-    // long as it's after the first chunk.)
-    uint32_t method_idx = 2u;
-    constexpr uint32_t kSmallChunkSize = 2 * MB;
-    std::vector<uint8_t> gap_code;
-    uint32_t gap_size = gap_end - gap_start;
-    uint32_t num_small_chunks = std::max(gap_size / kSmallChunkSize, 1u) - 1u;
-    uint32_t chunk_start = gap_start;
-    uint32_t chunk_size = gap_size - num_small_chunks * kSmallChunkSize;
-    for (uint32_t i = 0; i <= num_small_chunks; ++i) {  // num_small_chunks+1 iterations.
-      uint32_t chunk_code_size =
-          chunk_size - CodeAlignmentSize(chunk_start) - sizeof(OatQuickMethodHeader);
-      gap_code.resize(chunk_code_size, 0u);
-      AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code));
-      method_idx += 1u;
-      chunk_start += chunk_size;
-      chunk_size = kSmallChunkSize;  // For all but the first chunk.
-      DCHECK_EQ(CodeAlignmentSize(gap_end), CodeAlignmentSize(chunk_start));
-    }
-
-    // Add the last method and link
-    AddCompiledMethod(MethodRef(method_idx), last_method_code, last_method_patches);
-    Link();
-
-    // Check assumptions.
-    CHECK_EQ(GetMethodOffset(1), method1_offset);
-    auto last_result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
-    CHECK(last_result.first);
-    // There may be a thunk before method2.
-    if (last_result.second != last_method_offset) {
-      // Thunk present. Check that there's only one.
-      uint32_t thunk_end =
-          CompiledCode::AlignCode(gap_end, InstructionSet::kArm64) + MethodCallThunkSize();
-      uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end);
-      CHECK_EQ(last_result.second, header_offset + sizeof(OatQuickMethodHeader));
-    }
-    return method_idx;
-  }
-
-  uint32_t GetMethodOffset(uint32_t method_idx) {
-    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
-    CHECK(result.first);
-    CHECK_ALIGNED(result.second, 4u);
-    return result.second;
-  }
-
-  std::vector<uint8_t> CompileMethodCallThunk() {
-    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey();
-    return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
-  }
-
-  uint32_t MethodCallThunkSize() {
-    return CompileMethodCallThunk().size();
-  }
-
-  bool CheckThunk(uint32_t thunk_offset) {
-    const std::vector<uint8_t> expected_code = CompileMethodCallThunk();
-    if (output_.size() < thunk_offset + expected_code.size()) {
-      LOG(ERROR) << "output_.size() == " << output_.size() << " < "
-          << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size());
-      return false;
-    }
-    ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size());
-    if (linked_code == ArrayRef<const uint8_t>(expected_code)) {
-      return true;
-    }
-    // Log failure info.
-    DumpDiff(ArrayRef<const uint8_t>(expected_code), linked_code);
-    return false;
-  }
-
-  std::vector<uint8_t> GenNops(size_t num_nops) {
-    std::vector<uint8_t> result;
-    result.reserve(num_nops * 4u);
-    for (size_t i = 0; i != num_nops; ++i) {
-      PushBackInsn(&result, kNopInsn);
-    }
-    return result;
-  }
-
-  std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) {
-    std::vector<uint8_t> result;
-    result.reserve(num_nops * 4u + 4u);
-    for (size_t i = 0; i != num_nops; ++i) {
-      PushBackInsn(&result, kNopInsn);
-    }
-    PushBackInsn(&result, bl);
-    return result;
-  }
-
-  std::vector<uint8_t> GenNopsAndAdrpAndUse(size_t num_nops,
-                                            uint32_t method_offset,
-                                            uint32_t target_offset,
-                                            uint32_t use_insn) {
-    std::vector<uint8_t> result;
-    result.reserve(num_nops * 4u + 8u);
-    for (size_t i = 0; i != num_nops; ++i) {
-      PushBackInsn(&result, kNopInsn);
-    }
-    CHECK_ALIGNED(method_offset, 4u);
-    CHECK_ALIGNED(target_offset, 4u);
-    uint32_t adrp_offset = method_offset + num_nops * 4u;
-    uint32_t disp = target_offset - (adrp_offset & ~0xfffu);
-    if (use_insn == kLdrWInsn) {
-      DCHECK_ALIGNED(disp, 1u << 2);
-      use_insn |= 1 |                         // LDR x1, [x0, #(imm12 << 2)]
-          ((disp & 0xfffu) << (10 - 2));      // imm12 = ((disp & 0xfffu) >> 2) is at bit 10.
-    } else if (use_insn == kAddXInsn) {
-      use_insn |= 1 |                         // ADD x1, x0, #imm
-          (disp & 0xfffu) << 10;              // imm12 = (disp & 0xfffu) is at bit 10.
-    } else {
-      LOG(FATAL) << "Unexpected instruction: 0x" << std::hex << use_insn;
-    }
-    uint32_t adrp = 0x90000000u |             // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
-        ((disp & 0x3000u) << (29 - 12)) |     // immlo = ((disp & 0x3000u) >> 12) is at bit 29,
-        ((disp & 0xffffc000) >> (14 - 5)) |   // immhi = (disp >> 14) is at bit 5,
-        // We take the sign bit from the disp, limiting disp to +- 2GiB.
-        ((disp & 0x80000000) >> (31 - 23));   // sign bit in immhi is at bit 23.
-    PushBackInsn(&result, adrp);
-    PushBackInsn(&result, use_insn);
-    return result;
-  }
-
-  std::vector<uint8_t> GenNopsAndAdrpLdr(size_t num_nops,
-                                         uint32_t method_offset,
-                                         uint32_t target_offset) {
-    return GenNopsAndAdrpAndUse(num_nops, method_offset, target_offset, kLdrWInsn);
-  }
-
-  void TestNopsAdrpLdr(size_t num_nops, uint32_t bss_begin, uint32_t string_entry_offset) {
-    constexpr uint32_t kStringIndex = 1u;
-    string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
-    bss_begin_ = bss_begin;
-    auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u);  // Unpatched.
-    const LinkerPatch patches[] = {
-        LinkerPatch::StringBssEntryPatch(num_nops * 4u     , nullptr, num_nops * 4u, kStringIndex),
-        LinkerPatch::StringBssEntryPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, kStringIndex),
-    };
-    AddCompiledMethod(MethodRef(1u),
-                      ArrayRef<const uint8_t>(code),
-                      ArrayRef<const LinkerPatch>(patches));
-    Link();
-
-    uint32_t method1_offset = GetMethodOffset(1u);
-    uint32_t target_offset = bss_begin_ + string_entry_offset;
-    auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset);
-    EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-  }
-
-  std::vector<uint8_t> GenNopsAndAdrpAdd(size_t num_nops,
-                                         uint32_t method_offset,
-                                         uint32_t target_offset) {
-    return GenNopsAndAdrpAndUse(num_nops, method_offset, target_offset, kAddXInsn);
-  }
-
-  void TestNopsAdrpAdd(size_t num_nops, uint32_t string_offset) {
-    constexpr uint32_t kStringIndex = 1u;
-    string_index_to_offset_map_.Put(kStringIndex, string_offset);
-    auto code = GenNopsAndAdrpAdd(num_nops, 0u, 0u);  // Unpatched.
-    const LinkerPatch patches[] = {
-        LinkerPatch::RelativeStringPatch(num_nops * 4u     , nullptr, num_nops * 4u, kStringIndex),
-        LinkerPatch::RelativeStringPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, kStringIndex),
-    };
-    AddCompiledMethod(MethodRef(1u),
-                      ArrayRef<const uint8_t>(code),
-                      ArrayRef<const LinkerPatch>(patches));
-    Link();
-
-    uint32_t method1_offset = GetMethodOffset(1u);
-    auto expected_code = GenNopsAndAdrpAdd(num_nops, method1_offset, string_offset);
-    EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-  }
-
-  void PrepareNopsAdrpInsn2Ldr(size_t num_nops,
-                               uint32_t insn2,
-                               uint32_t bss_begin,
-                               uint32_t string_entry_offset) {
-    constexpr uint32_t kStringIndex = 1u;
-    string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
-    bss_begin_ = bss_begin;
-    auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u);  // Unpatched.
-    InsertInsn(&code, num_nops * 4u + 4u, insn2);
-    const LinkerPatch patches[] = {
-        LinkerPatch::StringBssEntryPatch(num_nops * 4u     , nullptr, num_nops * 4u, kStringIndex),
-        LinkerPatch::StringBssEntryPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, kStringIndex),
-    };
-    AddCompiledMethod(MethodRef(1u),
-                      ArrayRef<const uint8_t>(code),
-                      ArrayRef<const LinkerPatch>(patches));
-    Link();
-  }
-
-  void PrepareNopsAdrpInsn2Add(size_t num_nops, uint32_t insn2, uint32_t string_offset) {
-    constexpr uint32_t kStringIndex = 1u;
-    string_index_to_offset_map_.Put(kStringIndex, string_offset);
-    auto code = GenNopsAndAdrpAdd(num_nops, 0u, 0u);  // Unpatched.
-    InsertInsn(&code, num_nops * 4u + 4u, insn2);
-    const LinkerPatch patches[] = {
-        LinkerPatch::RelativeStringPatch(num_nops * 4u     , nullptr, num_nops * 4u, kStringIndex),
-        LinkerPatch::RelativeStringPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, kStringIndex),
-    };
-    AddCompiledMethod(MethodRef(1u),
-                      ArrayRef<const uint8_t>(code),
-                      ArrayRef<const LinkerPatch>(patches));
-    Link();
-  }
-
-  void TestNopsAdrpInsn2AndUse(size_t num_nops,
-                               uint32_t insn2,
-                               uint32_t target_offset,
-                               uint32_t use_insn) {
-    uint32_t method1_offset = GetMethodOffset(1u);
-    auto expected_code = GenNopsAndAdrpAndUse(num_nops, method1_offset, target_offset, use_insn);
-    InsertInsn(&expected_code, num_nops * 4u + 4u, insn2);
-    EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-  }
-
-  void TestNopsAdrpInsn2AndUseHasThunk(size_t num_nops,
-                                       uint32_t insn2,
-                                       uint32_t target_offset,
-                                       uint32_t use_insn) {
-    uint32_t method1_offset = GetMethodOffset(1u);
-    CHECK(!compiled_method_refs_.empty());
-    CHECK_EQ(compiled_method_refs_[0].index, 1u);
-    CHECK_EQ(compiled_method_refs_.size(), compiled_methods_.size());
-    uint32_t method1_size = compiled_methods_[0]->GetQuickCode().size();
-    uint32_t thunk_offset =
-        CompiledCode::AlignCode(method1_offset + method1_size, InstructionSet::kArm64);
-    uint32_t b_diff = thunk_offset - (method1_offset + num_nops * 4u);
-    CHECK_ALIGNED(b_diff, 4u);
-    ASSERT_LT(b_diff, 128 * MB);
-    uint32_t b_out = kBPlus0 + ((b_diff >> 2) & 0x03ffffffu);
-    uint32_t b_in = kBPlus0 + ((-b_diff >> 2) & 0x03ffffffu);
-
-    auto expected_code = GenNopsAndAdrpAndUse(num_nops, method1_offset, target_offset, use_insn);
-    InsertInsn(&expected_code, num_nops * 4u + 4u, insn2);
-    // Replace adrp with bl.
-    expected_code.erase(expected_code.begin() + num_nops * 4u,
-                        expected_code.begin() + num_nops * 4u + 4u);
-    InsertInsn(&expected_code, num_nops * 4u, b_out);
-    EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-
-    auto expected_thunk_code = GenNopsAndAdrpLdr(0u, thunk_offset, target_offset);
-    ASSERT_EQ(expected_thunk_code.size(), 8u);
-    expected_thunk_code.erase(expected_thunk_code.begin() + 4u, expected_thunk_code.begin() + 8u);
-    InsertInsn(&expected_thunk_code, 4u, b_in);
-    ASSERT_EQ(expected_thunk_code.size(), 8u);
-
-    uint32_t thunk_size = MethodCallThunkSize();
-    ASSERT_EQ(thunk_offset + thunk_size, output_.size());
-    ASSERT_EQ(thunk_size, expected_thunk_code.size());
-    ArrayRef<const uint8_t> thunk_code(&output_[thunk_offset], thunk_size);
-    if (ArrayRef<const uint8_t>(expected_thunk_code) != thunk_code) {
-      DumpDiff(ArrayRef<const uint8_t>(expected_thunk_code), thunk_code);
-      FAIL();
-    }
-  }
-
-  void TestAdrpInsn2Ldr(uint32_t insn2,
-                        uint32_t adrp_offset,
-                        bool has_thunk,
-                        uint32_t bss_begin,
-                        uint32_t string_entry_offset) {
-    uint32_t method1_offset =
-        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
-    ASSERT_LT(method1_offset, adrp_offset);
-    CHECK_ALIGNED(adrp_offset, 4u);
-    uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
-    PrepareNopsAdrpInsn2Ldr(num_nops, insn2, bss_begin, string_entry_offset);
-    uint32_t target_offset = bss_begin_ + string_entry_offset;
-    if (has_thunk) {
-      TestNopsAdrpInsn2AndUseHasThunk(num_nops, insn2, target_offset, kLdrWInsn);
-    } else {
-      TestNopsAdrpInsn2AndUse(num_nops, insn2, target_offset, kLdrWInsn);
-    }
-    ASSERT_EQ(method1_offset, GetMethodOffset(1u));  // If this fails, num_nops is wrong.
-  }
-
-  void TestAdrpLdurLdr(uint32_t adrp_offset,
-                       bool has_thunk,
-                       uint32_t bss_begin,
-                       uint32_t string_entry_offset) {
-    TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, bss_begin, string_entry_offset);
-  }
-
-  void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn,
-                           int32_t pcrel_disp,
-                           uint32_t adrp_offset,
-                           bool has_thunk,
-                           uint32_t bss_begin,
-                           uint32_t string_entry_offset) {
-    ASSERT_LT(pcrel_disp, 0x100000);
-    ASSERT_GE(pcrel_disp, -0x100000);
-    ASSERT_EQ(pcrel_disp & 0x3, 0);
-    uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5);
-    TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, bss_begin, string_entry_offset);
-  }
-
-  void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn,
-                           uint32_t sprel_disp_in_load_units,
-                           uint32_t adrp_offset,
-                           bool has_thunk,
-                           uint32_t bss_begin,
-                           uint32_t string_entry_offset) {
-    ASSERT_LT(sprel_disp_in_load_units, 0x1000u);
-    uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10);
-    TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, bss_begin, string_entry_offset);
-  }
-
-  void TestAdrpInsn2Add(uint32_t insn2,
-                        uint32_t adrp_offset,
-                        bool has_thunk,
-                        uint32_t string_offset) {
-    uint32_t method1_offset =
-        kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
-    ASSERT_LT(method1_offset, adrp_offset);
-    CHECK_ALIGNED(adrp_offset, 4u);
-    uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
-    PrepareNopsAdrpInsn2Add(num_nops, insn2, string_offset);
-    if (has_thunk) {
-      TestNopsAdrpInsn2AndUseHasThunk(num_nops, insn2, string_offset, kAddXInsn);
-    } else {
-      TestNopsAdrpInsn2AndUse(num_nops, insn2, string_offset, kAddXInsn);
-    }
-    ASSERT_EQ(method1_offset, GetMethodOffset(1u));  // If this fails, num_nops is wrong.
-  }
-
-  void TestAdrpLdurAdd(uint32_t adrp_offset, bool has_thunk, uint32_t string_offset) {
-    TestAdrpInsn2Add(kLdurInsn, adrp_offset, has_thunk, string_offset);
-  }
-
-  void TestAdrpLdrPcRelAdd(uint32_t pcrel_ldr_insn,
-                           int32_t pcrel_disp,
-                           uint32_t adrp_offset,
-                           bool has_thunk,
-                           uint32_t string_offset) {
-    ASSERT_LT(pcrel_disp, 0x100000);
-    ASSERT_GE(pcrel_disp, -0x100000);
-    ASSERT_EQ(pcrel_disp & 0x3, 0);
-    uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5);
-    TestAdrpInsn2Add(insn2, adrp_offset, has_thunk, string_offset);
-  }
-
-  void TestAdrpLdrSpRelAdd(uint32_t sprel_ldr_insn,
-                           uint32_t sprel_disp_in_load_units,
-                           uint32_t adrp_offset,
-                           bool has_thunk,
-                           uint32_t string_offset) {
-    ASSERT_LT(sprel_disp_in_load_units, 0x1000u);
-    uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10);
-    TestAdrpInsn2Add(insn2, adrp_offset, has_thunk, string_offset);
-  }
-
-  std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) {
-    const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
-        0u, Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg));
-    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
-    return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
-  }
-
-  std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) {
-    LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
-        0u, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg));
-    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
-    return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
-  }
-
-  std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) {
-    LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
-        0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg));
-    ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
-    return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
-  }
-
-  uint32_t GetOutputInsn(uint32_t offset) {
-    CHECK_LE(offset, output_.size());
-    CHECK_GE(output_.size() - offset, 4u);
-    return (static_cast<uint32_t>(output_[offset]) << 0) |
-           (static_cast<uint32_t>(output_[offset + 1]) << 8) |
-           (static_cast<uint32_t>(output_[offset + 2]) << 16) |
-           (static_cast<uint32_t>(output_[offset + 3]) << 24);
-  }
-
-  void TestBakerField(uint32_t offset, uint32_t ref_reg);
-};
-
-const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = {
-    0x00, 0x00, 0x00, 0x94
-};
-
-const ArrayRef<const uint8_t> Arm64RelativePatcherTest::kCallCode(kCallRawCode);
-
-const uint8_t Arm64RelativePatcherTest::kNopRawCode[] = {
-    0x1f, 0x20, 0x03, 0xd5
-};
-
-const ArrayRef<const uint8_t> Arm64RelativePatcherTest::kNopCode(kNopRawCode);
-
-class Arm64RelativePatcherTestDefault : public Arm64RelativePatcherTest {
- public:
-  Arm64RelativePatcherTestDefault() : Arm64RelativePatcherTest("default") { }
-};
-
-class Arm64RelativePatcherTestDenver64 : public Arm64RelativePatcherTest {
- public:
-  Arm64RelativePatcherTestDenver64() : Arm64RelativePatcherTest("denver64") { }
-};
-
-TEST_F(Arm64RelativePatcherTestDefault, CallSelf) {
-  const LinkerPatch patches[] = {
-      LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
-  };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  const std::vector<uint8_t> expected_code = RawCode({kBlPlus0});
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, CallOther) {
-  const LinkerPatch method1_patches[] = {
-      LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
-  };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
-  const LinkerPatch method2_patches[] = {
-      LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
-  };
-  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
-  Link();
-
-  uint32_t method1_offset = GetMethodOffset(1u);
-  uint32_t method2_offset = GetMethodOffset(2u);
-  uint32_t diff_after = method2_offset - method1_offset;
-  CHECK_ALIGNED(diff_after, 4u);
-  ASSERT_LT(diff_after >> 2, 1u << 8);  // Simple encoding, (diff_after >> 2) fits into 8 bits.
-  const std::vector<uint8_t> method1_expected_code = RawCode({kBlPlus0 + (diff_after >> 2)});
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
-  uint32_t diff_before = method1_offset - method2_offset;
-  CHECK_ALIGNED(diff_before, 4u);
-  ASSERT_GE(diff_before, -1u << 27);
-  auto method2_expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff_before >> 2) & 0x03ffffffu));
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, CallTrampoline) {
-  const LinkerPatch patches[] = {
-      LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
-  };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  uint32_t method1_offset = GetMethodOffset(1u);
-  uint32_t diff = kTrampolineOffset - method1_offset;
-  ASSERT_EQ(diff & 1u, 0u);
-  ASSERT_GE(diff, -1u << 9);  // Simple encoding, -256 <= (diff >> 1) < 0 (checked as unsigned).
-  auto expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff >> 2) & 0x03ffffffu));
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, CallTrampolineTooFar) {
-  constexpr uint32_t missing_method_index = 1024u;
-  auto last_method_raw_code = GenNopsAndBl(1u, kBlPlus0);
-  constexpr uint32_t bl_offset_in_last_method = 1u * 4u;  // After NOPs.
-  ArrayRef<const uint8_t> last_method_code(last_method_raw_code);
-  ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size());
-  const LinkerPatch last_method_patches[] = {
-      LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, missing_method_index),
-  };
-
-  constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4;
-  uint32_t last_method_idx = Create2MethodsWithGap(
-      kNopCode, ArrayRef<const LinkerPatch>(), last_method_code,
-      ArrayRef<const LinkerPatch>(last_method_patches),
-      just_over_max_negative_disp - bl_offset_in_last_method);
-  uint32_t method1_offset = GetMethodOffset(1u);
-  uint32_t last_method_offset = GetMethodOffset(last_method_idx);
-  ASSERT_EQ(method1_offset,
-            last_method_offset + bl_offset_in_last_method - just_over_max_negative_disp);
-  ASSERT_FALSE(method_offset_map_.FindMethodOffset(MethodRef(missing_method_index)).first);
-
-  // Check linked code.
-  uint32_t thunk_offset =
-      CompiledCode::AlignCode(last_method_offset + last_method_code.size(), InstructionSet::kArm64);
-  uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method);
-  CHECK_ALIGNED(diff, 4u);
-  ASSERT_LT(diff, 128 * MB);
-  auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2));
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx),
-                                ArrayRef<const uint8_t>(expected_code)));
-  EXPECT_TRUE(CheckThunk(thunk_offset));
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarAfter) {
-  auto method1_raw_code = GenNopsAndBl(1u, kBlPlus0);
-  constexpr uint32_t bl_offset_in_method1 = 1u * 4u;  // After NOPs.
-  ArrayRef<const uint8_t> method1_code(method1_raw_code);
-  ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
-  uint32_t expected_last_method_idx = 65;  // Based on 2MiB chunks in Create2MethodsWithGap().
-  const LinkerPatch method1_patches[] = {
-      LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx),
-  };
-
-  constexpr uint32_t max_positive_disp = 128 * MB - 4u;
-  uint32_t last_method_idx = Create2MethodsWithGap(method1_code,
-                                                   ArrayRef<const LinkerPatch>(method1_patches),
-                                                   kNopCode,
-                                                   ArrayRef<const LinkerPatch>(),
-                                                   bl_offset_in_method1 + max_positive_disp);
-  ASSERT_EQ(expected_last_method_idx, last_method_idx);
-
-  uint32_t method1_offset = GetMethodOffset(1u);
-  uint32_t last_method_offset = GetMethodOffset(last_method_idx);
-  ASSERT_EQ(method1_offset + bl_offset_in_method1 + max_positive_disp, last_method_offset);
-
-  // Check linked code.
-  auto expected_code = GenNopsAndBl(1u, kBlPlusMax);
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarBefore) {
-  auto last_method_raw_code = GenNopsAndBl(0u, kBlPlus0);
-  constexpr uint32_t bl_offset_in_last_method = 0u * 4u;  // After NOPs.
-  ArrayRef<const uint8_t> last_method_code(last_method_raw_code);
-  ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size());
-  const LinkerPatch last_method_patches[] = {
-      LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u),
-  };
-
-  constexpr uint32_t max_negative_disp = 128 * MB;
-  uint32_t last_method_idx = Create2MethodsWithGap(kNopCode,
-                                                   ArrayRef<const LinkerPatch>(),
-                                                   last_method_code,
-                                                   ArrayRef<const LinkerPatch>(last_method_patches),
-                                                   max_negative_disp - bl_offset_in_last_method);
-  uint32_t method1_offset = GetMethodOffset(1u);
-  uint32_t last_method_offset = GetMethodOffset(last_method_idx);
-  ASSERT_EQ(method1_offset, last_method_offset + bl_offset_in_last_method - max_negative_disp);
-
-  // Check linked code.
-  auto expected_code = GenNopsAndBl(0u, kBlMinusMax);
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx),
-                                ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarAfter) {
-  auto method1_raw_code = GenNopsAndBl(0u, kBlPlus0);
-  constexpr uint32_t bl_offset_in_method1 = 0u * 4u;  // After NOPs.
-  ArrayRef<const uint8_t> method1_code(method1_raw_code);
-  ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
-  uint32_t expected_last_method_idx = 65;  // Based on 2MiB chunks in Create2MethodsWithGap().
-  const LinkerPatch method1_patches[] = {
-      LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx),
-  };
-
-  constexpr uint32_t just_over_max_positive_disp = 128 * MB;
-  uint32_t last_method_idx = Create2MethodsWithGap(
-      method1_code,
-      ArrayRef<const LinkerPatch>(method1_patches),
-      kNopCode,
-      ArrayRef<const LinkerPatch>(),
-      bl_offset_in_method1 + just_over_max_positive_disp);
-  ASSERT_EQ(expected_last_method_idx, last_method_idx);
-
-  uint32_t method1_offset = GetMethodOffset(1u);
-  uint32_t last_method_offset = GetMethodOffset(last_method_idx);
-  ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_offset));
-  uint32_t last_method_header_offset = last_method_offset - sizeof(OatQuickMethodHeader);
-  uint32_t thunk_size = MethodCallThunkSize();
-  uint32_t thunk_offset = RoundDown(last_method_header_offset - thunk_size, kArm64Alignment);
-  DCHECK_EQ(thunk_offset + thunk_size + CodeAlignmentSize(thunk_offset + thunk_size),
-            last_method_header_offset);
-  uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1);
-  CHECK_ALIGNED(diff, 4u);
-  ASSERT_LT(diff, 128 * MB);
-  auto expected_code = GenNopsAndBl(0u, kBlPlus0 | (diff >> 2));
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-  CheckThunk(thunk_offset);
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarBefore) {
-  auto last_method_raw_code = GenNopsAndBl(1u, kBlPlus0);
-  constexpr uint32_t bl_offset_in_last_method = 1u * 4u;  // After NOPs.
-  ArrayRef<const uint8_t> last_method_code(last_method_raw_code);
-  ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size());
-  const LinkerPatch last_method_patches[] = {
-      LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u),
-  };
-
-  constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4;
-  uint32_t last_method_idx = Create2MethodsWithGap(
-      kNopCode, ArrayRef<const LinkerPatch>(), last_method_code,
-      ArrayRef<const LinkerPatch>(last_method_patches),
-      just_over_max_negative_disp - bl_offset_in_last_method);
-  uint32_t method1_offset = GetMethodOffset(1u);
-  uint32_t last_method_offset = GetMethodOffset(last_method_idx);
-  ASSERT_EQ(method1_offset,
-            last_method_offset + bl_offset_in_last_method - just_over_max_negative_disp);
-
-  // Check linked code.
-  uint32_t thunk_offset =
-      CompiledCode::AlignCode(last_method_offset + last_method_code.size(), InstructionSet::kArm64);
-  uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method);
-  CHECK_ALIGNED(diff, 4u);
-  ASSERT_LT(diff, 128 * MB);
-  auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2));
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx),
-                                ArrayRef<const uint8_t>(expected_code)));
-  EXPECT_TRUE(CheckThunk(thunk_offset));
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry1) {
-  TestNopsAdrpLdr(0u, 0x12345678u, 0x1234u);
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry2) {
-  TestNopsAdrpLdr(0u, -0x12345678u, 0x4444u);
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry3) {
-  TestNopsAdrpLdr(0u, 0x12345000u, 0x3ffcu);
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry4) {
-  TestNopsAdrpLdr(0u, 0x12345000u, 0x4000u);
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, StringReference1) {
-  TestNopsAdrpAdd(0u, 0x12345678u);
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, StringReference2) {
-  TestNopsAdrpAdd(0u, -0x12345678u);
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, StringReference3) {
-  TestNopsAdrpAdd(0u, 0x12345000u);
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, StringReference4) {
-  TestNopsAdrpAdd(0u, 0x12345ffcu);
-}
-
-#define TEST_FOR_OFFSETS(test, disp1, disp2) \
-  test(0xff4u, disp1) test(0xff8u, disp1) test(0xffcu, disp1) test(0x1000u, disp1) \
-  test(0xff4u, disp2) test(0xff8u, disp2) test(0xffcu, disp2) test(0x1000u, disp2)
-
-#define DEFAULT_LDUR_LDR_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## Ldur ## disp) { \
-    bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu); \
-    TestAdrpLdurLdr(adrp_offset, has_thunk, 0x12345678u, disp); \
-  }
-
-TEST_FOR_OFFSETS(DEFAULT_LDUR_LDR_TEST, 0x1234, 0x1238)
-
-#define DENVER64_LDUR_LDR_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDenver64, StringBssEntry ## adrp_offset ## Ldur ## disp) { \
-    TestAdrpLdurLdr(adrp_offset, false, 0x12345678u, disp); \
-  }
-
-TEST_FOR_OFFSETS(DENVER64_LDUR_LDR_TEST, 0x1234, 0x1238)
-
-// LDR <Wt>, <label> is always aligned. We should never have to use a fixup.
-#define LDRW_PCREL_LDR_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## WPcRel ## disp) { \
-    TestAdrpLdrPcRelLdr(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u, 0x1234u); \
-  }
-
-TEST_FOR_OFFSETS(LDRW_PCREL_LDR_TEST, 0x1234, 0x1238)
-
-// LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8.
-#define LDRX_PCREL_LDR_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## XPcRel ## disp) { \
-    bool unaligned = !IsAligned<8u>((adrp_offset) + 4u + static_cast<uint32_t>(disp)); \
-    bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu) && unaligned; \
-    TestAdrpLdrPcRelLdr(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u, 0x1234u); \
-  }
-
-TEST_FOR_OFFSETS(LDRX_PCREL_LDR_TEST, 0x1234, 0x1238)
-
-// LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed.
-#define LDRW_SPREL_LDR_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## WSpRel ## disp) { \
-    TestAdrpLdrSpRelLdr(kLdrWSpRelInsn, (disp) >> 2, adrp_offset, false, 0x12345678u, 0x1234u); \
-  }
-
-TEST_FOR_OFFSETS(LDRW_SPREL_LDR_TEST, 0, 4)
-
-#define LDRX_SPREL_LDR_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## XSpRel ## disp) { \
-    TestAdrpLdrSpRelLdr(kLdrXSpRelInsn, (disp) >> 3, adrp_offset, false, 0x12345678u, 0x1234u); \
-  }
-
-TEST_FOR_OFFSETS(LDRX_SPREL_LDR_TEST, 0, 8)
-
-#define DEFAULT_LDUR_ADD_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## Ldur ## disp) { \
-    bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu); \
-    TestAdrpLdurAdd(adrp_offset, has_thunk, disp); \
-  }
-
-TEST_FOR_OFFSETS(DEFAULT_LDUR_ADD_TEST, 0x12345678, 0xffffc840)
-
-#define DENVER64_LDUR_ADD_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDenver64, StringReference ## adrp_offset ## Ldur ## disp) { \
-    TestAdrpLdurAdd(adrp_offset, false, disp); \
-  }
-
-TEST_FOR_OFFSETS(DENVER64_LDUR_ADD_TEST, 0x12345678, 0xffffc840)
-
-#define DEFAULT_SUBX3X2_ADD_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## SubX3X2 ## disp) { \
-    /* SUB unrelated to "ADRP x0, addr". */ \
-    uint32_t sub = kSubXInsn | (100 << 10) | (2u << 5) | 3u;  /* SUB x3, x2, #100 */ \
-    TestAdrpInsn2Add(sub, adrp_offset, false, disp); \
-  }
-
-TEST_FOR_OFFSETS(DEFAULT_SUBX3X2_ADD_TEST, 0x12345678, 0xffffc840)
-
-#define DEFAULT_SUBSX3X0_ADD_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## SubsX3X0 ## disp) { \
-    /* SUBS that uses the result of "ADRP x0, addr". */ \
-    uint32_t subs = kSubsXInsn | (100 << 10) | (0u << 5) | 3u;  /* SUBS x3, x0, #100 */ \
-    TestAdrpInsn2Add(subs, adrp_offset, false, disp); \
-  }
-
-TEST_FOR_OFFSETS(DEFAULT_SUBSX3X0_ADD_TEST, 0x12345678, 0xffffc840)
-
-#define DEFAULT_ADDX0X0_ADD_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## AddX0X0 ## disp) { \
-    /* ADD that uses the result register of "ADRP x0, addr" as both source and destination. */ \
-    uint32_t add = kSubXInsn | (100 << 10) | (0u << 5) | 0u;  /* ADD x0, x0, #100 */ \
-    TestAdrpInsn2Add(add, adrp_offset, false, disp); \
-  }
-
-TEST_FOR_OFFSETS(DEFAULT_ADDX0X0_ADD_TEST, 0x12345678, 0xffffc840)
-
-#define DEFAULT_ADDSX0X2_ADD_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## AddsX0X2 ## disp) { \
-    /* ADDS that does not use the result of "ADRP x0, addr" but overwrites that register. */ \
-    uint32_t adds = kAddsXInsn | (100 << 10) | (2u << 5) | 0u;  /* ADDS x0, x2, #100 */ \
-    bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu); \
-    TestAdrpInsn2Add(adds, adrp_offset, has_thunk, disp); \
-  }
-
-TEST_FOR_OFFSETS(DEFAULT_ADDSX0X2_ADD_TEST, 0x12345678, 0xffffc840)
-
-// LDR <Wt>, <label> is always aligned. We should never have to use a fixup.
-#define LDRW_PCREL_ADD_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## WPcRel ## disp) { \
-    TestAdrpLdrPcRelAdd(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u); \
-  }
-
-TEST_FOR_OFFSETS(LDRW_PCREL_ADD_TEST, 0x1234, 0x1238)
-
-// LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8.
-#define LDRX_PCREL_ADD_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## XPcRel ## disp) { \
-    bool unaligned = !IsAligned<8u>((adrp_offset) + 4u + static_cast<uint32_t>(disp)); \
-    bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu) && unaligned; \
-    TestAdrpLdrPcRelAdd(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u); \
-  }
-
-TEST_FOR_OFFSETS(LDRX_PCREL_ADD_TEST, 0x1234, 0x1238)
-
-// LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed.
-#define LDRW_SPREL_ADD_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## WSpRel ## disp) { \
-    TestAdrpLdrSpRelAdd(kLdrWSpRelInsn, (disp) >> 2, adrp_offset, false, 0x12345678u); \
-  }
-
-TEST_FOR_OFFSETS(LDRW_SPREL_ADD_TEST, 0, 4)
-
-#define LDRX_SPREL_ADD_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## XSpRel ## disp) { \
-    TestAdrpLdrSpRelAdd(kLdrXSpRelInsn, (disp) >> 3, adrp_offset, false, 0x12345678u); \
-  }
-
-TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8)
-
-void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) {
-  uint32_t valid_regs[] = {
-      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
-      10, 11, 12, 13, 14, 15,         18, 19,  // IP0 and IP1 are reserved.
-      20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
-      // LR and SP/ZR are reserved.
-  };
-  DCHECK_ALIGNED(offset, 4u);
-  DCHECK_LT(offset, 16 * KB);
-  constexpr size_t kMethodCodeSize = 8u;
-  constexpr size_t kLiteralOffset = 0u;
-  uint32_t method_idx = 0u;
-  for (uint32_t base_reg : valid_regs) {
-    for (uint32_t holder_reg : valid_regs) {
-      uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg;
-      const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr});
-      ASSERT_EQ(kMethodCodeSize, raw_code.size());
-      ArrayRef<const uint8_t> code(raw_code);
-      uint32_t encoded_data =
-          Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg);
-      const LinkerPatch patches[] = {
-          LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
-      };
-      ++method_idx;
-      AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
-    }
-  }
-  Link();
-
-  // All thunks are at the end.
-  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment);
-  method_idx = 0u;
-  for (uint32_t base_reg : valid_regs) {
-    for (uint32_t holder_reg : valid_regs) {
-      ++method_idx;
-      uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
-      uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
-      uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg;
-      const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr});
-      ASSERT_EQ(kMethodCodeSize, expected_code.size());
-      ASSERT_TRUE(
-          CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
-
-      std::vector<uint8_t> expected_thunk = CompileBakerOffsetThunk(base_reg, holder_reg);
-      ASSERT_GT(output_.size(), thunk_offset);
-      ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
-      ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
-                                             expected_thunk.size());
-      if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
-        DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
-        ASSERT_TRUE(false);
-      }
-
-      size_t gray_check_offset = thunk_offset;
-      if (holder_reg == base_reg) {
-        // Verify that the null-check CBZ uses the correct register, i.e. holder_reg.
-        ASSERT_GE(output_.size() - gray_check_offset, 4u);
-        ASSERT_EQ(0x34000000u | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001fu);
-        gray_check_offset +=4u;
-      }
-      // Verify that the lock word for gray bit check is loaded from the holder address.
-      static constexpr size_t kGrayCheckInsns = 5;
-      ASSERT_GE(output_.size() - gray_check_offset, 4u * kGrayCheckInsns);
-      const uint32_t load_lock_word =
-          kLdrWInsn |
-          (mirror::Object::MonitorOffset().Uint32Value() << (10 - 2)) |
-          (holder_reg << 5) |
-          /* ip0 */ 16;
-      EXPECT_EQ(load_lock_word, GetOutputInsn(gray_check_offset));
-      // Verify the gray bit check.
-      const uint32_t check_gray_bit_without_offset =
-          0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
-      EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001fu);
-      // Verify the fake dependency.
-      const uint32_t fake_dependency =
-          0x8b408000u |             // ADD Xd, Xn, Xm, LSR 32
-          (/* ip0 */ 16 << 16) |    // Xm = ip0
-          (base_reg << 5) |         // Xn = base_reg
-          base_reg;                 // Xd = base_reg
-      EXPECT_EQ(fake_dependency, GetOutputInsn(gray_check_offset + 12u));
-      // Do not check the rest of the implementation.
-
-      // The next thunk follows on the next aligned offset.
-      thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment);
-    }
-  }
-}
-
-#define TEST_BAKER_FIELD(offset, ref_reg)     \
-  TEST_F(Arm64RelativePatcherTestDefault,     \
-    BakerOffset##offset##_##ref_reg) {        \
-    TestBakerField(offset, ref_reg);          \
-  }
-
-TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0)
-TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 15)
-TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* ref_reg */ 29)
-
-TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) {
-  // One thunk in the middle with maximum distance branches to it from both sides.
-  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
-  constexpr uint32_t kLiteralOffset1 = 4;
-  const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
-  ArrayRef<const uint8_t> code1(raw_code1);
-  uint32_t encoded_data =
-      Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
-  const LinkerPatch patches1[] = {
-      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
-  };
-  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
-
-  // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4
-  // allows the branch to reach that thunk.
-  size_t filler1_size =
-      1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment);
-  std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u);
-  ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
-  AddCompiledMethod(MethodRef(2u), filler1_code);
-
-  // Enforce thunk reservation with a tiny method.
-  AddCompiledMethod(MethodRef(3u), kNopCode);
-
-  // Allow reaching the thunk from the very beginning of a method 1MiB away. Backward branch
-  // reaches the full 1MiB. Things to subtract:
-  //   - thunk size and method 3 pre-header, rounded up (padding in between if needed)
-  //   - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
-  //   - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
-  size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size();
-  size_t filler2_size =
-      1 * MB - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArm64Alignment)
-             - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArm64Alignment)
-             - sizeof(OatQuickMethodHeader);
-  std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 4u);
-  ArrayRef<const uint8_t> filler2_code(raw_filler2_code);
-  AddCompiledMethod(MethodRef(4u), filler2_code);
-
-  constexpr uint32_t kLiteralOffset2 = 0;
-  const std::vector<uint8_t> raw_code2 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn});
-  ArrayRef<const uint8_t> code2(raw_code2);
-  const LinkerPatch patches2[] = {
-      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data),
-  };
-  AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2));
-
-  Link();
-
-  uint32_t first_method_offset = GetMethodOffset(1u);
-  uint32_t last_method_offset = GetMethodOffset(5u);
-  EXPECT_EQ(2 * MB, last_method_offset - first_method_offset);
-
-  const uint32_t cbnz_max_forward = kCbnzIP1Plus0Insn | 0x007fffe0;
-  const uint32_t cbnz_max_backward = kCbnzIP1Plus0Insn | 0x00800000;
-  const std::vector<uint8_t> expected_code1 = RawCode({kNopInsn, cbnz_max_forward, kLdrWInsn});
-  const std::vector<uint8_t> expected_code2 = RawCode({cbnz_max_backward, kLdrWInsn});
-  ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
-  ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) {
-  // Based on the first part of BakerOffsetThunkInTheMiddle but the CBNZ is one instruction
-  // earlier, so the thunk is emitted before the filler.
-  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
-  constexpr uint32_t kLiteralOffset1 = 0;
-  const std::vector<uint8_t> raw_code1 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn, kNopInsn});
-  ArrayRef<const uint8_t> code1(raw_code1);
-  uint32_t encoded_data =
-      Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
-  const LinkerPatch patches1[] = {
-      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
-  };
-  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
-
-  // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4
-  // allows the branch to reach that thunk.
-  size_t filler1_size =
-      1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment);
-  std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u);
-  ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
-  AddCompiledMethod(MethodRef(2u), filler1_code);
-
-  Link();
-
-  const uint32_t cbnz_offset = RoundUp(raw_code1.size(), kArm64Alignment) - kLiteralOffset1;
-  const uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
-  const std::vector<uint8_t> expected_code1 = RawCode({cbnz, kLdrWInsn, kNopInsn});
-  ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFromLast) {
-  // Based on the BakerOffsetThunkInTheMiddle but the CBNZ in the last method is preceded
-  // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end.
-  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
-  constexpr uint32_t kLiteralOffset1 = 4;
-  const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
-  ArrayRef<const uint8_t> code1(raw_code1);
-  uint32_t encoded_data =
-      Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
-  const LinkerPatch patches1[] = {
-      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
-  };
-  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
-
-  // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4
-  // allows the branch to reach that thunk.
-  size_t filler1_size =
-      1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment);
-  std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u);
-  ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
-  AddCompiledMethod(MethodRef(2u), filler1_code);
-
-  // Enforce thunk reservation with a tiny method.
-  AddCompiledMethod(MethodRef(3u), kNopCode);
-
-  // If not for the extra NOP, this would allow reaching the thunk from the very beginning
-  // of a method 1MiB away. Backward branch reaches the full 1MiB. Things to subtract:
-  //   - thunk size and method 3 pre-header, rounded up (padding in between if needed)
-  //   - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
-  //   - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
-  size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size();
-  size_t filler2_size =
-      1 * MB - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArm64Alignment)
-             - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArm64Alignment)
-             - sizeof(OatQuickMethodHeader);
-  std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 4u);
-  ArrayRef<const uint8_t> filler2_code(raw_filler2_code);
-  AddCompiledMethod(MethodRef(4u), filler2_code);
-
-  // Extra NOP compared to BakerOffsetThunkInTheMiddle.
-  constexpr uint32_t kLiteralOffset2 = 4;
-  const std::vector<uint8_t> raw_code2 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
-  ArrayRef<const uint8_t> code2(raw_code2);
-  const LinkerPatch patches2[] = {
-      LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data),
-  };
-  AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2));
-
-  Link();
-
-  const uint32_t cbnz_max_forward = kCbnzIP1Plus0Insn | 0x007fffe0;
-  const uint32_t cbnz_last_offset = RoundUp(raw_code2.size(), kArm64Alignment) - kLiteralOffset2;
-  const uint32_t cbnz_last = kCbnzIP1Plus0Insn | (cbnz_last_offset << (5 - 2));
-  const std::vector<uint8_t> expected_code1 = RawCode({kNopInsn, cbnz_max_forward, kLdrWInsn});
-  const std::vector<uint8_t> expected_code2 = RawCode({kNopInsn, cbnz_last, kLdrWInsn});
-  ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
-  ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, BakerArray) {
-  uint32_t valid_regs[] = {
-      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
-      10, 11, 12, 13, 14, 15,         18, 19,  // IP0 and IP1 are reserved.
-      20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
-      // LR and SP/ZR are reserved.
-  };
-  auto ldr = [](uint32_t base_reg) {
-    uint32_t index_reg = (base_reg == 0u) ? 1u : 0u;
-    uint32_t ref_reg = (base_reg == 2) ? 3u : 2u;
-    return kLdrWLsl2Insn | (index_reg << 16) | (base_reg << 5) | ref_reg;
-  };
-  constexpr size_t kMethodCodeSize = 8u;
-  constexpr size_t kLiteralOffset = 0u;
-  uint32_t method_idx = 0u;
-  for (uint32_t base_reg : valid_regs) {
-    ++method_idx;
-    const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr(base_reg)});
-    ASSERT_EQ(kMethodCodeSize, raw_code.size());
-    ArrayRef<const uint8_t> code(raw_code);
-    const LinkerPatch patches[] = {
-        LinkerPatch::BakerReadBarrierBranchPatch(
-            kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)),
-    };
-    AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
-  }
-  Link();
-
-  // All thunks are at the end.
-  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment);
-  method_idx = 0u;
-  for (uint32_t base_reg : valid_regs) {
-    ++method_idx;
-    uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
-    uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
-    const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr(base_reg)});
-    ASSERT_EQ(kMethodCodeSize, expected_code.size());
-    EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
-
-    std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg);
-    ASSERT_GT(output_.size(), thunk_offset);
-    ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
-    ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
-                                           expected_thunk.size());
-    if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
-      DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
-      ASSERT_TRUE(false);
-    }
-
-    // Verify that the lock word for gray bit check is loaded from the correct address
-    // before the base_reg which points to the array data.
-    static constexpr size_t kGrayCheckInsns = 5;
-    ASSERT_GE(output_.size() - thunk_offset, 4u * kGrayCheckInsns);
-    int32_t data_offset =
-        mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
-    int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset;
-    ASSERT_LT(offset, 0);
-    const uint32_t load_lock_word =
-        kLdurWInsn |
-        ((offset & 0x1ffu) << 12) |
-        (base_reg << 5) |
-        /* ip0 */ 16;
-    EXPECT_EQ(load_lock_word, GetOutputInsn(thunk_offset));
-    // Verify the gray bit check.
-    const uint32_t check_gray_bit_without_offset =
-        0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
-    EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(thunk_offset + 4u) & 0xfff8001fu);
-    // Verify the fake dependency.
-    const uint32_t fake_dependency =
-        0x8b408000u |             // ADD Xd, Xn, Xm, LSR 32
-        (/* ip0 */ 16 << 16) |    // Xm = ip0
-        (base_reg << 5) |         // Xn = base_reg
-        base_reg;                 // Xd = base_reg
-    EXPECT_EQ(fake_dependency, GetOutputInsn(thunk_offset + 12u));
-    // Do not check the rest of the implementation.
-
-    // The next thunk follows on the next aligned offset.
-    thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment);
-  }
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, BakerGcRoot) {
-  uint32_t valid_regs[] = {
-      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
-      10, 11, 12, 13, 14, 15,         18, 19,  // IP0 and IP1 are reserved.
-      20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
-      // LR and SP/ZR are reserved.
-  };
-  constexpr size_t kMethodCodeSize = 8u;
-  constexpr size_t kLiteralOffset = 4u;
-  uint32_t method_idx = 0u;
-  for (uint32_t root_reg : valid_regs) {
-    ++method_idx;
-    uint32_t ldr = kLdrWInsn | (/* offset */ 8 << (10 - 2)) | (/* base_reg */ 0 << 5) | root_reg;
-    const std::vector<uint8_t> raw_code = RawCode({ldr, kCbnzIP1Plus0Insn});
-    ASSERT_EQ(kMethodCodeSize, raw_code.size());
-    ArrayRef<const uint8_t> code(raw_code);
-    const LinkerPatch patches[] = {
-        LinkerPatch::BakerReadBarrierBranchPatch(
-            kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)),
-    };
-    AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
-  }
-  Link();
-
-  // All thunks are at the end.
-  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment);
-  method_idx = 0u;
-  for (uint32_t root_reg : valid_regs) {
-    ++method_idx;
-    uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
-    uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
-    uint32_t ldr = kLdrWInsn | (/* offset */ 8 << (10 - 2)) | (/* base_reg */ 0 << 5) | root_reg;
-    const std::vector<uint8_t> expected_code = RawCode({ldr, cbnz});
-    ASSERT_EQ(kMethodCodeSize, expected_code.size());
-    EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
-
-    std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg);
-    ASSERT_GT(output_.size(), thunk_offset);
-    ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
-    ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
-                                           expected_thunk.size());
-    if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
-      DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
-      ASSERT_TRUE(false);
-    }
-
-    // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
-    ASSERT_GE(output_.size() - thunk_offset, 4u);
-    ASSERT_EQ(0x34000000u | root_reg, GetOutputInsn(thunk_offset) & 0xff00001fu);
-    // Do not check the rest of the implementation.
-
-    // The next thunk follows on the next aligned offset.
-    thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment);
-  }
-}
-
-TEST_F(Arm64RelativePatcherTestDefault, BakerAndMethodCallInteraction) {
-  // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());`
-  // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily
-  // hold when we're reserving thunks of different sizes. This test exposes the situation
-  // by using Baker thunks and a method call thunk.
-
-  // Add a method call patch that can reach to method 1 offset + 128MiB.
-  uint32_t method_idx = 0u;
-  constexpr size_t kMethodCallLiteralOffset = 4u;
-  constexpr uint32_t kMissingMethodIdx = 2u;
-  const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0});
-  const LinkerPatch method1_patches[] = {
-      LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u),
-  };
-  ArrayRef<const uint8_t> code1(raw_code1);
-  ++method_idx;
-  AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches));
-
-  // Skip kMissingMethodIdx.
-  ++method_idx;
-  ASSERT_EQ(kMissingMethodIdx, method_idx);
-  // Add a method with the right size that the method code for the next one starts 1MiB
-  // after code for method 1.
-  size_t filler_size =
-      1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment)
-             - sizeof(OatQuickMethodHeader);
-  std::vector<uint8_t> filler_code = GenNops(filler_size / 4u);
-  ++method_idx;
-  AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code));
-  // Add 126 methods with 1MiB code+header, making the code for the next method start 1MiB
-  // before the currently scheduled MaxNextOffset() for the method call thunk.
-  for (uint32_t i = 0; i != 126; ++i) {
-    filler_size = 1 * MB - sizeof(OatQuickMethodHeader);
-    filler_code = GenNops(filler_size / 4u);
-    ++method_idx;
-    AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code));
-  }
-
-  // Add 2 Baker GC root patches to the last method, one that would allow the thunk at
-  // 1MiB + kArm64Alignment, i.e. kArm64Alignment after the method call thunk, and the
-  // second that needs it kArm64Alignment after that. Given the size of the GC root thunk
-  // is more than the space required by the method call thunk plus kArm64Alignment,
-  // this pushes the first GC root thunk's pending MaxNextOffset() before the method call
-  // thunk's pending MaxNextOffset() which needs to be adjusted.
-  ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArm64Alignment) + kArm64Alignment,
-            CompileBakerGcRootThunk(/* root_reg */ 0).size());
-  static_assert(kArm64Alignment == 16, "Code below assumes kArm64Alignment == 16");
-  constexpr size_t kBakerLiteralOffset1 = 4u + kArm64Alignment;
-  constexpr size_t kBakerLiteralOffset2 = 4u + 2 * kArm64Alignment;
-  // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | root_reg`.
-  const uint32_t ldr1 = kLdrWInsn | /* root_reg */ 1;
-  const uint32_t ldr2 = kLdrWInsn | /* root_reg */ 2;
-  const std::vector<uint8_t> last_method_raw_code = RawCode({
-      kNopInsn, kNopInsn, kNopInsn, kNopInsn,   // Padding before first GC root read barrier.
-      ldr1, kCbnzIP1Plus0Insn,                  // First GC root LDR with read barrier.
-      kNopInsn, kNopInsn,                       // Padding before second GC root read barrier.
-      ldr2, kCbnzIP1Plus0Insn,                  // Second GC root LDR with read barrier.
-  });
-  uint32_t encoded_data1 = Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1);
-  uint32_t encoded_data2 = Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2);
-  const LinkerPatch last_method_patches[] = {
-      LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1),
-      LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2),
-  };
-  ++method_idx;
-  AddCompiledMethod(MethodRef(method_idx),
-                    ArrayRef<const uint8_t>(last_method_raw_code),
-                    ArrayRef<const LinkerPatch>(last_method_patches));
-
-  // The main purpose of the test is to check that Link() does not cause a crash.
-  Link();
-
-  ASSERT_EQ(127 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u));
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/elf_builder.h b/compiler/linker/elf_builder.h
index a5f60992cae..3da7a437627 100644
--- a/compiler/linker/elf_builder.h
+++ b/compiler/linker/elf_builder.h
@@ -529,6 +529,8 @@ class ElfBuilder FINAL {
         stream_(output),
         rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
         text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0),
+        data_bimg_rel_ro_(
+            this, ".data.bimg.rel.ro", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
         bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
         dex_(this, ".dex", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
         dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize),
@@ -552,6 +554,7 @@ class ElfBuilder FINAL {
         loaded_size_(0u),
         virtual_address_(0) {
     text_.phdr_flags_ = PF_R | PF_X;
+    data_bimg_rel_ro_.phdr_flags_ = PF_R | PF_W;  // Shall be made read-only at run time.
     bss_.phdr_flags_ = PF_R | PF_W;
     dex_.phdr_flags_ = PF_R;
     dynamic_.phdr_flags_ = PF_R | PF_W;
@@ -566,6 +569,7 @@ class ElfBuilder FINAL {
   BuildIdSection* GetBuildId() { return &build_id_; }
   Section* GetRoData() { return &rodata_; }
   Section* GetText() { return &text_; }
+  Section* GetDataBimgRelRo() { return &data_bimg_rel_ro_; }
   Section* GetBss() { return &bss_; }
   Section* GetDex() { return &dex_; }
   StringSection* GetStrTab() { return &strtab_; }
@@ -694,6 +698,7 @@ class ElfBuilder FINAL {
   void PrepareDynamicSection(const std::string& elf_file_path,
                              Elf_Word rodata_size,
                              Elf_Word text_size,
+                             Elf_Word data_bimg_rel_ro_size,
                              Elf_Word bss_size,
                              Elf_Word bss_methods_offset,
                              Elf_Word bss_roots_offset,
@@ -707,6 +712,9 @@ class ElfBuilder FINAL {
     // Allocate all pre-dynamic sections.
     rodata_.AllocateVirtualMemory(rodata_size);
     text_.AllocateVirtualMemory(text_size);
+    if (data_bimg_rel_ro_size != 0) {
+      data_bimg_rel_ro_.AllocateVirtualMemory(data_bimg_rel_ro_size);
+    }
     if (bss_size != 0) {
       bss_.AllocateVirtualMemory(bss_size);
     }
@@ -735,6 +743,24 @@ class ElfBuilder FINAL {
       Elf_Word oatlastword_address = rodata_.GetAddress() + rodata_size - 4;
       dynsym_.Add(oatlastword, &rodata_, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT);
     }
+    if (data_bimg_rel_ro_size != 0u) {
+      Elf_Word oatdatabimgrelro = dynstr_.Add("oatdatabimgrelro");
+      dynsym_.Add(oatdatabimgrelro,
+                  &data_bimg_rel_ro_,
+                  data_bimg_rel_ro_.GetAddress(),
+                  data_bimg_rel_ro_size,
+                  STB_GLOBAL,
+                  STT_OBJECT);
+      Elf_Word oatdatabimgrelrolastword = dynstr_.Add("oatdatabimgrelrolastword");
+      Elf_Word oatdatabimgrelrolastword_address =
+          data_bimg_rel_ro_.GetAddress() + data_bimg_rel_ro_size - 4;
+      dynsym_.Add(oatdatabimgrelrolastword,
+                  &data_bimg_rel_ro_,
+                  oatdatabimgrelrolastword_address,
+                  4,
+                  STB_GLOBAL,
+                  STT_OBJECT);
+    }
     DCHECK_LE(bss_roots_offset, bss_size);
     if (bss_size != 0u) {
       Elf_Word oatbss = dynstr_.Add("oatbss");
@@ -1010,6 +1036,7 @@ class ElfBuilder FINAL {
 
   Section rodata_;
   Section text_;
+  Section data_bimg_rel_ro_;
   Section bss_;
   Section dex_;
   CachedStringSection dynstr_;
diff --git a/compiler/linker/linker_patch.h b/compiler/linker/linker_patch.h
index 77d689d4dbb..7b35fd9b0c3 100644
--- a/compiler/linker/linker_patch.h
+++ b/compiler/linker/linker_patch.h
@@ -41,19 +41,27 @@ class LinkerPatch {
   // choose to squeeze the Type into fewer than 8 bits, we'll have to declare
   // patch_type_ as an uintN_t and do explicit static_cast<>s.
   enum class Type : uint8_t {
+    kDataBimgRelRo,           // NOTE: Actual patching is instruction_set-dependent.
     kMethodRelative,          // NOTE: Actual patching is instruction_set-dependent.
     kMethodBssEntry,          // NOTE: Actual patching is instruction_set-dependent.
     kCall,
     kCallRelative,            // NOTE: Actual patching is instruction_set-dependent.
     kTypeRelative,            // NOTE: Actual patching is instruction_set-dependent.
-    kTypeClassTable,          // NOTE: Actual patching is instruction_set-dependent.
     kTypeBssEntry,            // NOTE: Actual patching is instruction_set-dependent.
     kStringRelative,          // NOTE: Actual patching is instruction_set-dependent.
-    kStringInternTable,       // NOTE: Actual patching is instruction_set-dependent.
     kStringBssEntry,          // NOTE: Actual patching is instruction_set-dependent.
     kBakerReadBarrierBranch,  // NOTE: Actual patching is instruction_set-dependent.
   };
 
+  static LinkerPatch DataBimgRelRoPatch(size_t literal_offset,
+                                        uint32_t pc_insn_offset,
+                                        uint32_t boot_image_offset) {
+    LinkerPatch patch(literal_offset, Type::kDataBimgRelRo, /* target_dex_file */ nullptr);
+    patch.boot_image_offset_ = boot_image_offset;
+    patch.pc_insn_offset_ = pc_insn_offset;
+    return patch;
+  }
+
   static LinkerPatch RelativeMethodPatch(size_t literal_offset,
                                          const DexFile* target_dex_file,
                                          uint32_t pc_insn_offset,
@@ -100,16 +108,6 @@ class LinkerPatch {
     return patch;
   }
 
-  static LinkerPatch TypeClassTablePatch(size_t literal_offset,
-                                         const DexFile* target_dex_file,
-                                         uint32_t pc_insn_offset,
-                                         uint32_t target_type_idx) {
-    LinkerPatch patch(literal_offset, Type::kTypeClassTable, target_dex_file);
-    patch.type_idx_ = target_type_idx;
-    patch.pc_insn_offset_ = pc_insn_offset;
-    return patch;
-  }
-
   static LinkerPatch TypeBssEntryPatch(size_t literal_offset,
                                        const DexFile* target_dex_file,
                                        uint32_t pc_insn_offset,
@@ -130,16 +128,6 @@ class LinkerPatch {
     return patch;
   }
 
-  static LinkerPatch StringInternTablePatch(size_t literal_offset,
-                                            const DexFile* target_dex_file,
-                                            uint32_t pc_insn_offset,
-                                            uint32_t target_string_idx) {
-    LinkerPatch patch(literal_offset, Type::kStringInternTable, target_dex_file);
-    patch.string_idx_ = target_string_idx;
-    patch.pc_insn_offset_ = pc_insn_offset;
-    return patch;
-  }
-
   static LinkerPatch StringBssEntryPatch(size_t literal_offset,
                                          const DexFile* target_dex_file,
                                          uint32_t pc_insn_offset,
@@ -153,7 +141,7 @@ class LinkerPatch {
   static LinkerPatch BakerReadBarrierBranchPatch(size_t literal_offset,
                                                  uint32_t custom_value1 = 0u,
                                                  uint32_t custom_value2 = 0u) {
-    LinkerPatch patch(literal_offset, Type::kBakerReadBarrierBranch, nullptr);
+    LinkerPatch patch(literal_offset, Type::kBakerReadBarrierBranch, /* target_dex_file */ nullptr);
     patch.baker_custom_value1_ = custom_value1;
     patch.baker_custom_value2_ = custom_value2;
     return patch;
@@ -172,14 +160,13 @@ class LinkerPatch {
 
   bool IsPcRelative() const {
     switch (GetType()) {
+      case Type::kDataBimgRelRo:
       case Type::kMethodRelative:
       case Type::kMethodBssEntry:
       case Type::kCallRelative:
       case Type::kTypeRelative:
-      case Type::kTypeClassTable:
       case Type::kTypeBssEntry:
       case Type::kStringRelative:
-      case Type::kStringInternTable:
       case Type::kStringBssEntry:
       case Type::kBakerReadBarrierBranch:
         return true;
@@ -188,6 +175,11 @@ class LinkerPatch {
     }
   }
 
+  uint32_t BootImageOffset() const {
+    DCHECK(patch_type_ == Type::kDataBimgRelRo);
+    return boot_image_offset_;
+  }
+
   MethodReference TargetMethod() const {
     DCHECK(patch_type_ == Type::kMethodRelative ||
            patch_type_ == Type::kMethodBssEntry ||
@@ -198,40 +190,35 @@ class LinkerPatch {
 
   const DexFile* TargetTypeDexFile() const {
     DCHECK(patch_type_ == Type::kTypeRelative ||
-           patch_type_ == Type::kTypeClassTable ||
            patch_type_ == Type::kTypeBssEntry);
     return target_dex_file_;
   }
 
   dex::TypeIndex TargetTypeIndex() const {
     DCHECK(patch_type_ == Type::kTypeRelative ||
-           patch_type_ == Type::kTypeClassTable ||
            patch_type_ == Type::kTypeBssEntry);
     return dex::TypeIndex(type_idx_);
   }
 
   const DexFile* TargetStringDexFile() const {
     DCHECK(patch_type_ == Type::kStringRelative ||
-           patch_type_ == Type::kStringInternTable ||
            patch_type_ == Type::kStringBssEntry);
     return target_dex_file_;
   }
 
   dex::StringIndex TargetStringIndex() const {
     DCHECK(patch_type_ == Type::kStringRelative ||
-           patch_type_ == Type::kStringInternTable ||
            patch_type_ == Type::kStringBssEntry);
     return dex::StringIndex(string_idx_);
   }
 
   uint32_t PcInsnOffset() const {
-    DCHECK(patch_type_ == Type::kMethodRelative ||
+    DCHECK(patch_type_ == Type::kDataBimgRelRo ||
+           patch_type_ == Type::kMethodRelative ||
            patch_type_ == Type::kMethodBssEntry ||
            patch_type_ == Type::kTypeRelative ||
-           patch_type_ == Type::kTypeClassTable ||
            patch_type_ == Type::kTypeBssEntry ||
            patch_type_ == Type::kStringRelative ||
-           patch_type_ == Type::kStringInternTable ||
            patch_type_ == Type::kStringBssEntry);
     return pc_insn_offset_;
   }
@@ -263,10 +250,11 @@ class LinkerPatch {
   uint32_t literal_offset_ : 24;  // Method code size up to 16MiB.
   Type patch_type_ : 8;
   union {
-    uint32_t cmp1_;             // Used for relational operators.
-    uint32_t method_idx_;       // Method index for Call/Method patches.
-    uint32_t type_idx_;         // Type index for Type patches.
-    uint32_t string_idx_;       // String index for String patches.
+    uint32_t cmp1_;               // Used for relational operators.
+    uint32_t boot_image_offset_;  // Data to write to the .data.bimg.rel.ro entry.
+    uint32_t method_idx_;         // Method index for Call/Method patches.
+    uint32_t type_idx_;           // Type index for Type patches.
+    uint32_t string_idx_;         // String index for String patches.
     uint32_t baker_custom_value1_;
     static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators");
     static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators");
diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc
deleted file mode 100644
index 69e0846cb7e..00000000000
--- a/compiler/linker/mips/relative_patcher_mips.cc
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/mips/relative_patcher_mips.h"
-
-#include "compiled_method.h"
-#include "debug/method_debug_info.h"
-#include "linker/linker_patch.h"
-
-namespace art {
-namespace linker {
-
-uint32_t MipsRelativePatcher::ReserveSpace(
-    uint32_t offset,
-    const CompiledMethod* compiled_method ATTRIBUTE_UNUSED,
-    MethodReference method_ref ATTRIBUTE_UNUSED) {
-  return offset;  // No space reserved; no limit on relative call distance.
-}
-
-uint32_t MipsRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
-  return offset;  // No space reserved; no limit on relative call distance.
-}
-
-uint32_t MipsRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) {
-  return offset;  // No thunks added; no limit on relative call distance.
-}
-
-void MipsRelativePatcher::PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                    uint32_t literal_offset ATTRIBUTE_UNUSED,
-                                    uint32_t patch_offset ATTRIBUTE_UNUSED,
-                                    uint32_t target_offset ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL) << "PatchCall unimplemented on MIPS";
-}
-
-void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
-                                                   const LinkerPatch& patch,
-                                                   uint32_t patch_offset,
-                                                   uint32_t target_offset) {
-  uint32_t anchor_literal_offset = patch.PcInsnOffset();
-  uint32_t literal_offset = patch.LiteralOffset();
-  bool high_patch = ((*code)[literal_offset + 0] == 0x34) && ((*code)[literal_offset + 1] == 0x12);
-
-  // Perform basic sanity checks.
-  if (high_patch) {
-    if (is_r6) {
-      // auipc reg, offset_high
-      DCHECK_EQ(((*code)[literal_offset + 2] & 0x1F), 0x1E);
-      DCHECK_EQ(((*code)[literal_offset + 3] & 0xFC), 0xEC);
-    } else {
-      // lui reg, offset_high
-      DCHECK_EQ(((*code)[literal_offset + 2] & 0xE0), 0x00);
-      DCHECK_EQ((*code)[literal_offset + 3], 0x3C);
-    }
-  } else {
-    // instr reg(s), offset_low
-    CHECK_EQ((*code)[literal_offset + 0], 0x78);
-    CHECK_EQ((*code)[literal_offset + 1], 0x56);
-  }
-
-  // Apply patch.
-  uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset;
-  uint32_t diff = target_offset - anchor_offset;
-  diff += (diff & 0x8000) << 1;  // Account for sign extension in "instr reg(s), offset_low".
-
-  if (high_patch) {
-    // lui reg, offset_high / auipc reg, offset_high
-    (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 16);
-    (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 24);
-  } else {
-    // instr reg(s), offset_low
-    (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 0);
-    (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 8);
-  }
-}
-
-void MipsRelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                                      const LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                                      uint32_t patch_offset ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "UNIMPLEMENTED";
-}
-
-std::vector<debug::MethodDebugInfo> MipsRelativePatcher::GenerateThunkDebugInfo(
-    uint32_t executable_offset ATTRIBUTE_UNUSED) {
-  return std::vector<debug::MethodDebugInfo>();  // No thunks added.
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/mips/relative_patcher_mips.h b/compiler/linker/mips/relative_patcher_mips.h
deleted file mode 100644
index 5714a7d1b0c..00000000000
--- a/compiler/linker/mips/relative_patcher_mips.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_
-#define ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_
-
-#include "arch/mips/instruction_set_features_mips.h"
-#include "linker/relative_patcher.h"
-
-namespace art {
-namespace linker {
-
-class MipsRelativePatcher FINAL : public RelativePatcher {
- public:
-  explicit MipsRelativePatcher(const MipsInstructionSetFeatures* features)
-      : is_r6(features->IsR6()) {}
-
-  uint32_t ReserveSpace(uint32_t offset,
-                        const CompiledMethod* compiled_method,
-                        MethodReference method_ref) OVERRIDE;
-  uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
-  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
-  void PatchCall(std::vector<uint8_t>* code,
-                 uint32_t literal_offset,
-                 uint32_t patch_offset,
-                 uint32_t target_offset) OVERRIDE;
-  void PatchPcRelativeReference(std::vector<uint8_t>* code,
-                                const LinkerPatch& patch,
-                                uint32_t patch_offset,
-                                uint32_t target_offset) OVERRIDE;
-  void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
-                                   const LinkerPatch& patch,
-                                   uint32_t patch_offset) OVERRIDE;
-  std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo(uint32_t executable_offset) OVERRIDE;
-
- private:
-  bool is_r6;
-
-  DISALLOW_COPY_AND_ASSIGN(MipsRelativePatcher);
-};
-
-}  // namespace linker
-}  // namespace art
-
-#endif  // ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_
diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
deleted file mode 100644
index 629fdd535de..00000000000
--- a/compiler/linker/mips/relative_patcher_mips32r6_test.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/mips/relative_patcher_mips.h"
-#include "linker/relative_patcher_test.h"
-
-namespace art {
-namespace linker {
-
-class Mips32r6RelativePatcherTest : public RelativePatcherTest {
- public:
-  Mips32r6RelativePatcherTest() : RelativePatcherTest(InstructionSet::kMips, "mips32r6") {}
-
- protected:
-  static const uint8_t kUnpatchedPcRelativeRawCode[];
-  static const uint32_t kLiteralOffsetHigh;
-  static const uint32_t kLiteralOffsetLow1;
-  static const uint32_t kLiteralOffsetLow2;
-  static const uint32_t kAnchorOffset;
-  static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode;
-
-  uint32_t GetMethodOffset(uint32_t method_idx) {
-    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
-    CHECK(result.first);
-    return result.second;
-  }
-
-  void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
-  void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset);
-  void TestStringReference(uint32_t string_offset);
-};
-
-const uint8_t Mips32r6RelativePatcherTest::kUnpatchedPcRelativeRawCode[] = {
-    0x34, 0x12, 0x5E, 0xEE,  // auipc s2, high(diff); placeholder = 0x1234
-    0x78, 0x56, 0x52, 0x26,  // addiu s2, s2, low(diff); placeholder = 0x5678
-    0x78, 0x56, 0x52, 0x8E,  // lw    s2, (low(diff))(s2) ; placeholder = 0x5678
-};
-const uint32_t Mips32r6RelativePatcherTest::kLiteralOffsetHigh = 0;  // At auipc.
-const uint32_t Mips32r6RelativePatcherTest::kLiteralOffsetLow1 = 4;  // At addiu.
-const uint32_t Mips32r6RelativePatcherTest::kLiteralOffsetLow2 = 8;  // At lw.
-const uint32_t Mips32r6RelativePatcherTest::kAnchorOffset = 0;  // At auipc (where PC+0 points).
-const ArrayRef<const uint8_t> Mips32r6RelativePatcherTest::kUnpatchedPcRelativeCode(
-    kUnpatchedPcRelativeRawCode);
-
-void Mips32r6RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches,
-                                                       uint32_t target_offset) {
-  AddCompiledMethod(MethodRef(1u), kUnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
-  ASSERT_TRUE(result.first);
-
-  uint32_t diff = target_offset - (result.second + kAnchorOffset);
-  diff += (diff & 0x8000) << 1;  // Account for sign extension in addiu/lw.
-
-  const uint8_t expected_code[] = {
-      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE,
-      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26,
-      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x8E,
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-void Mips32r6RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin,
-                                                     uint32_t string_entry_offset) {
-  constexpr uint32_t kStringIndex = 1u;
-  string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
-  bss_begin_ = bss_begin;
-  LinkerPatch patches[] = {
-      LinkerPatch::StringBssEntryPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex),
-      LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex),
-      LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex)
-  };
-  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset);
-}
-
-void Mips32r6RelativePatcherTest::TestStringReference(uint32_t string_offset) {
-  constexpr uint32_t kStringIndex = 1u;
-  string_index_to_offset_map_.Put(kStringIndex, string_offset);
-  LinkerPatch patches[] = {
-      LinkerPatch::RelativeStringPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex),
-      LinkerPatch::RelativeStringPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex),
-      LinkerPatch::RelativeStringPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex)
-  };
-  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
-}
-
-TEST_F(Mips32r6RelativePatcherTest, StringBssEntry) {
-  TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234);
-}
-
-TEST_F(Mips32r6RelativePatcherTest, StringReference) {
-  TestStringReference(/* string_offset*/ 0x87651234);
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc
deleted file mode 100644
index d876c76daae..00000000000
--- a/compiler/linker/mips/relative_patcher_mips_test.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/mips/relative_patcher_mips.h"
-
-#include "linker/relative_patcher_test.h"
-
-namespace art {
-namespace linker {
-
-class MipsRelativePatcherTest : public RelativePatcherTest {
- public:
-  MipsRelativePatcherTest() : RelativePatcherTest(InstructionSet::kMips, "mips32r2") {}
-
- protected:
-  static const uint8_t kUnpatchedPcRelativeRawCode[];
-  static const uint32_t kLiteralOffsetHigh;
-  static const uint32_t kLiteralOffsetLow1;
-  static const uint32_t kLiteralOffsetLow2;
-  static const uint32_t kAnchorOffset;
-  static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode;
-
-  uint32_t GetMethodOffset(uint32_t method_idx) {
-    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
-    CHECK(result.first);
-    return result.second;
-  }
-
-  void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
-  void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset);
-  void TestStringReference(uint32_t string_offset);
-};
-
-const uint8_t MipsRelativePatcherTest::kUnpatchedPcRelativeRawCode[] = {
-    0x00, 0x00, 0x10, 0x04,  // nal
-    0x34, 0x12, 0x12, 0x3C,  // lui   s2, high(diff); placeholder = 0x1234
-    0x21, 0x90, 0x5F, 0x02,  // addu  s2, s2, ra
-    0x78, 0x56, 0x52, 0x26,  // addiu s2, s2, low(diff); placeholder = 0x5678
-    0x78, 0x56, 0x52, 0x8E,  // lw    s2, (low(diff))(s2) ; placeholder = 0x5678
-};
-const uint32_t MipsRelativePatcherTest::kLiteralOffsetHigh = 4;  // At lui.
-const uint32_t MipsRelativePatcherTest::kLiteralOffsetLow1 = 12;  // At addiu.
-const uint32_t MipsRelativePatcherTest::kLiteralOffsetLow2 = 16;  // At lw.
-const uint32_t MipsRelativePatcherTest::kAnchorOffset = 8;  // At addu (where PC+0 points).
-const ArrayRef<const uint8_t> MipsRelativePatcherTest::kUnpatchedPcRelativeCode(
-    kUnpatchedPcRelativeRawCode);
-
-void MipsRelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches,
-                                                   uint32_t target_offset) {
-  AddCompiledMethod(MethodRef(1u), kUnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
-  ASSERT_TRUE(result.first);
-
-  uint32_t diff = target_offset - (result.second + kAnchorOffset);
-  diff += (diff & 0x8000) << 1;  // Account for sign extension in addiu/lw.
-
-  const uint8_t expected_code[] = {
-      0x00, 0x00, 0x10, 0x04,
-      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x12, 0x3C,
-      0x21, 0x90, 0x5F, 0x02,
-      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26,
-      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x8E,
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-void MipsRelativePatcherTest::TestStringBssEntry(uint32_t bss_begin,
-                                                 uint32_t string_entry_offset) {
-  constexpr uint32_t kStringIndex = 1u;
-  string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
-  bss_begin_ = bss_begin;
-  LinkerPatch patches[] = {
-      LinkerPatch::StringBssEntryPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex),
-      LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex),
-      LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex)
-  };
-  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset);
-}
-
-void MipsRelativePatcherTest::TestStringReference(uint32_t string_offset) {
-  constexpr uint32_t kStringIndex = 1u;
-  string_index_to_offset_map_.Put(kStringIndex, string_offset);
-  LinkerPatch patches[] = {
-      LinkerPatch::RelativeStringPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex),
-      LinkerPatch::RelativeStringPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex),
-      LinkerPatch::RelativeStringPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex)
-  };
-  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
-}
-
-TEST_F(MipsRelativePatcherTest, StringBssEntry) {
-  TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234);
-}
-
-TEST_F(MipsRelativePatcherTest, StringReference) {
-  TestStringReference(/* string_offset*/ 0x87651234);
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/mips64/relative_patcher_mips64.cc b/compiler/linker/mips64/relative_patcher_mips64.cc
deleted file mode 100644
index aae5746278d..00000000000
--- a/compiler/linker/mips64/relative_patcher_mips64.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/mips64/relative_patcher_mips64.h"
-
-#include "compiled_method.h"
-#include "debug/method_debug_info.h"
-#include "linker/linker_patch.h"
-
-namespace art {
-namespace linker {
-
-uint32_t Mips64RelativePatcher::ReserveSpace(
-    uint32_t offset,
-    const CompiledMethod* compiled_method ATTRIBUTE_UNUSED,
-    MethodReference method_ref ATTRIBUTE_UNUSED) {
-  return offset;  // No space reserved; no limit on relative call distance.
-}
-
-uint32_t Mips64RelativePatcher::ReserveSpaceEnd(uint32_t offset) {
-  return offset;  // No space reserved; no limit on relative call distance.
-}
-
-uint32_t Mips64RelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) {
-  return offset;  // No thunks added; no limit on relative call distance.
-}
-
-void Mips64RelativePatcher::PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                      uint32_t literal_offset ATTRIBUTE_UNUSED,
-                                      uint32_t patch_offset ATTRIBUTE_UNUSED,
-                                      uint32_t target_offset ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL) << "PatchCall unimplemented on MIPS64";
-}
-
-void Mips64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
-                                                     const LinkerPatch& patch,
-                                                     uint32_t patch_offset,
-                                                     uint32_t target_offset) {
-  uint32_t anchor_literal_offset = patch.PcInsnOffset();
-  uint32_t literal_offset = patch.LiteralOffset();
-  bool high_patch = ((*code)[literal_offset + 0] == 0x34) && ((*code)[literal_offset + 1] == 0x12);
-
-  // Perform basic sanity checks.
-  if (high_patch) {
-    // auipc reg, offset_high
-    DCHECK_EQ(((*code)[literal_offset + 2] & 0x1F), 0x1E);
-    DCHECK_EQ(((*code)[literal_offset + 3] & 0xFC), 0xEC);
-  } else {
-    // instr reg(s), offset_low
-    CHECK_EQ((*code)[literal_offset + 0], 0x78);
-    CHECK_EQ((*code)[literal_offset + 1], 0x56);
-  }
-
-  // Apply patch.
-  uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset;
-  uint32_t diff = target_offset - anchor_offset;
-  // Note that a combination of auipc with an instruction that adds a sign-extended
-  // 16-bit immediate operand (e.g. ld) provides a PC-relative range of
-  // PC-0x80000000 to PC+0x7FFF7FFF on MIPS64, that is, short of 2GB on one end
-  // by 32KB.
-  diff += (diff & 0x8000) << 1;  // Account for sign extension in "instr reg(s), offset_low".
-
-  if (high_patch) {
-    // auipc reg, offset_high
-    (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 16);
-    (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 24);
-  } else {
-    // instr reg(s), offset_low
-    (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 0);
-    (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 8);
-  }
-}
-
-void Mips64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                                        const LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                                        uint32_t patch_offset ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "UNIMPLEMENTED";
-}
-
-std::vector<debug::MethodDebugInfo> Mips64RelativePatcher::GenerateThunkDebugInfo(
-    uint32_t executable_offset ATTRIBUTE_UNUSED) {
-  return std::vector<debug::MethodDebugInfo>();  // No thunks added.
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/mips64/relative_patcher_mips64.h b/compiler/linker/mips64/relative_patcher_mips64.h
deleted file mode 100644
index 183bbedb396..00000000000
--- a/compiler/linker/mips64/relative_patcher_mips64.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_LINKER_MIPS64_RELATIVE_PATCHER_MIPS64_H_
-#define ART_COMPILER_LINKER_MIPS64_RELATIVE_PATCHER_MIPS64_H_
-
-#include "linker/relative_patcher.h"
-
-namespace art {
-namespace linker {
-
-class Mips64RelativePatcher FINAL : public RelativePatcher {
- public:
-  Mips64RelativePatcher() {}
-
-  uint32_t ReserveSpace(uint32_t offset,
-                        const CompiledMethod* compiled_method,
-                        MethodReference method_ref) OVERRIDE;
-  uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
-  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
-  void PatchCall(std::vector<uint8_t>* code,
-                 uint32_t literal_offset,
-                 uint32_t patch_offset,
-                 uint32_t target_offset) OVERRIDE;
-  void PatchPcRelativeReference(std::vector<uint8_t>* code,
-                                const LinkerPatch& patch,
-                                uint32_t patch_offset,
-                                uint32_t target_offset) OVERRIDE;
-  void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
-                                   const LinkerPatch& patch,
-                                   uint32_t patch_offset) OVERRIDE;
-  std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo(uint32_t executable_offset) OVERRIDE;
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(Mips64RelativePatcher);
-};
-
-}  // namespace linker
-}  // namespace art
-
-#endif  // ART_COMPILER_LINKER_MIPS64_RELATIVE_PATCHER_MIPS64_H_
diff --git a/compiler/linker/mips64/relative_patcher_mips64_test.cc b/compiler/linker/mips64/relative_patcher_mips64_test.cc
deleted file mode 100644
index a02f5005e8c..00000000000
--- a/compiler/linker/mips64/relative_patcher_mips64_test.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/mips64/relative_patcher_mips64.h"
-
-#include "linker/relative_patcher_test.h"
-
-namespace art {
-namespace linker {
-
-class Mips64RelativePatcherTest : public RelativePatcherTest {
- public:
-  Mips64RelativePatcherTest() : RelativePatcherTest(InstructionSet::kMips64, "default") {}
-
- protected:
-  static const uint8_t kUnpatchedPcRelativeRawCode[];
-  static const uint8_t kUnpatchedPcRelativeCallRawCode[];
-  static const uint32_t kLiteralOffsetHigh;
-  static const uint32_t kLiteralOffsetLow1;
-  static const uint32_t kLiteralOffsetLow2;
-  static const uint32_t kAnchorOffset;
-  static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode;
-
-  uint32_t GetMethodOffset(uint32_t method_idx) {
-    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
-    CHECK(result.first);
-    return result.second;
-  }
-
-  void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
-  void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset);
-  void TestStringReference(uint32_t string_offset);
-};
-
-const uint8_t Mips64RelativePatcherTest::kUnpatchedPcRelativeRawCode[] = {
-    0x34, 0x12, 0x5E, 0xEE,  // auipc  s2, high(diff); placeholder = 0x1234
-    0x78, 0x56, 0x52, 0x66,  // daddiu s2, s2, low(diff); placeholder = 0x5678
-    0x78, 0x56, 0x52, 0x9E,  // lwu    s2, (low(diff))(s2) ; placeholder = 0x5678
-};
-const uint32_t Mips64RelativePatcherTest::kLiteralOffsetHigh = 0;  // At auipc.
-const uint32_t Mips64RelativePatcherTest::kLiteralOffsetLow1 = 4;  // At daddiu.
-const uint32_t Mips64RelativePatcherTest::kLiteralOffsetLow2 = 8;  // At lwu.
-const uint32_t Mips64RelativePatcherTest::kAnchorOffset = 0;  // At auipc (where PC+0 points).
-const ArrayRef<const uint8_t> Mips64RelativePatcherTest::kUnpatchedPcRelativeCode(
-    kUnpatchedPcRelativeRawCode);
-
-void Mips64RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches,
-                                                     uint32_t target_offset) {
-  AddCompiledMethod(MethodRef(1u), kUnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
-  ASSERT_TRUE(result.first);
-
-  uint32_t diff = target_offset - (result.second + kAnchorOffset);
-  diff += (diff & 0x8000) << 1;  // Account for sign extension in daddiu/lwu.
-
-  const uint8_t expected_code[] = {
-      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE,
-      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x66,
-      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x9E,
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-void Mips64RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin,
-                                                   uint32_t string_entry_offset) {
-  constexpr uint32_t kStringIndex = 1u;
-  string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
-  bss_begin_ = bss_begin;
-  LinkerPatch patches[] = {
-      LinkerPatch::StringBssEntryPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex),
-      LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex),
-      LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex)
-  };
-  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset);
-}
-
-TEST_F(Mips64RelativePatcherTest, StringBssEntry) {
-  TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234);
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc
deleted file mode 100644
index 13877f8f128..00000000000
--- a/compiler/linker/relative_patcher.cc
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/relative_patcher.h"
-
-#include "debug/method_debug_info.h"
-#ifdef ART_ENABLE_CODEGEN_arm
-#include "linker/arm/relative_patcher_thumb2.h"
-#endif
-#ifdef ART_ENABLE_CODEGEN_arm64
-#include "linker/arm64/relative_patcher_arm64.h"
-#endif
-#ifdef ART_ENABLE_CODEGEN_mips
-#include "linker/mips/relative_patcher_mips.h"
-#endif
-#ifdef ART_ENABLE_CODEGEN_mips64
-#include "linker/mips64/relative_patcher_mips64.h"
-#endif
-#ifdef ART_ENABLE_CODEGEN_x86
-#include "linker/x86/relative_patcher_x86.h"
-#endif
-#ifdef ART_ENABLE_CODEGEN_x86_64
-#include "linker/x86_64/relative_patcher_x86_64.h"
-#endif
-#include "output_stream.h"
-
-namespace art {
-namespace linker {
-
-std::unique_ptr<RelativePatcher> RelativePatcher::Create(
-    InstructionSet instruction_set,
-    const InstructionSetFeatures* features,
-    RelativePatcherTargetProvider* provider) {
-  class RelativePatcherNone FINAL : public RelativePatcher {
-   public:
-    RelativePatcherNone() { }
-
-    uint32_t ReserveSpace(uint32_t offset,
-                          const CompiledMethod* compiled_method ATTRIBUTE_UNUSED,
-                          MethodReference method_ref ATTRIBUTE_UNUSED) OVERRIDE {
-      return offset;  // No space reserved; no patches expected.
-    }
-
-    uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE {
-      return offset;  // No space reserved; no patches expected.
-    }
-
-    uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE {
-      return offset;  // No thunks added; no patches expected.
-    }
-
-    void PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                   uint32_t literal_offset ATTRIBUTE_UNUSED,
-                   uint32_t patch_offset ATTRIBUTE_UNUSED,
-                   uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE {
-      LOG(FATAL) << "Unexpected relative call patch.";
-    }
-
-    void PatchPcRelativeReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                  const LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                  uint32_t patch_offset ATTRIBUTE_UNUSED,
-                                  uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE {
-      LOG(FATAL) << "Unexpected relative dex cache array patch.";
-    }
-
-    void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                     const LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                     uint32_t patch_offset ATTRIBUTE_UNUSED) {
-      LOG(FATAL) << "Unexpected baker read barrier branch patch.";
-    }
-
-    std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo(
-        uint32_t executable_offset ATTRIBUTE_UNUSED) OVERRIDE {
-      return std::vector<debug::MethodDebugInfo>();  // No thunks added.
-    }
-
-   private:
-    DISALLOW_COPY_AND_ASSIGN(RelativePatcherNone);
-  };
-
-  UNUSED(features);
-  UNUSED(provider);
-  switch (instruction_set) {
-#ifdef ART_ENABLE_CODEGEN_x86
-    case InstructionSet::kX86:
-      return std::unique_ptr<RelativePatcher>(new X86RelativePatcher());
-#endif
-#ifdef ART_ENABLE_CODEGEN_x86_64
-    case InstructionSet::kX86_64:
-      return std::unique_ptr<RelativePatcher>(new X86_64RelativePatcher());
-#endif
-#ifdef ART_ENABLE_CODEGEN_arm
-    case InstructionSet::kArm:
-      // Fall through: we generate Thumb2 code for "arm".
-    case InstructionSet::kThumb2:
-      return std::unique_ptr<RelativePatcher>(new Thumb2RelativePatcher(provider));
-#endif
-#ifdef ART_ENABLE_CODEGEN_arm64
-    case InstructionSet::kArm64:
-      return std::unique_ptr<RelativePatcher>(
-          new Arm64RelativePatcher(provider, features->AsArm64InstructionSetFeatures()));
-#endif
-#ifdef ART_ENABLE_CODEGEN_mips
-    case InstructionSet::kMips:
-      return std::unique_ptr<RelativePatcher>(
-          new MipsRelativePatcher(features->AsMipsInstructionSetFeatures()));
-#endif
-#ifdef ART_ENABLE_CODEGEN_mips64
-    case InstructionSet::kMips64:
-      return std::unique_ptr<RelativePatcher>(new Mips64RelativePatcher());
-#endif
-    default:
-      return std::unique_ptr<RelativePatcher>(new RelativePatcherNone);
-  }
-}
-
-bool RelativePatcher::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) {
-  static const uint8_t kPadding[] = {
-      0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
-  };
-  DCHECK_LE(aligned_code_delta, sizeof(kPadding));
-  if (UNLIKELY(!out->WriteFully(kPadding, aligned_code_delta))) {
-    return false;
-  }
-  size_code_alignment_ += aligned_code_delta;
-  return true;
-}
-
-bool RelativePatcher::WriteThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) {
-  if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) {
-    return false;
-  }
-  size_relative_call_thunks_ += thunk.size();
-  return true;
-}
-
-bool RelativePatcher::WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) {
-  if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) {
-    return false;
-  }
-  size_misc_thunks_ += thunk.size();
-  return true;
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/relative_patcher.h b/compiler/linker/relative_patcher.h
deleted file mode 100644
index b58e3dffbd6..00000000000
--- a/compiler/linker/relative_patcher.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_LINKER_RELATIVE_PATCHER_H_
-#define ART_COMPILER_LINKER_RELATIVE_PATCHER_H_
-
-#include <vector>
-
-#include "arch/instruction_set.h"
-#include "arch/instruction_set_features.h"
-#include "base/array_ref.h"
-#include "base/macros.h"
-#include "dex/method_reference.h"
-
-namespace art {
-
-class CompiledMethod;
-
-namespace debug {
-struct MethodDebugInfo;
-}  // namespace debug
-
-namespace linker {
-
-class LinkerPatch;
-class OutputStream;
-
-/**
- * @class RelativePatcherTargetProvider
- * @brief Interface for providing method offsets for relative call targets.
- */
-class RelativePatcherTargetProvider {
- public:
-  /**
-   * Find the offset of the target method of a relative call if known.
-   *
-   * The process of assigning target method offsets includes calls to the relative patcher's
-   * ReserveSpace() which in turn can use FindMethodOffset() to determine if a method already
-   * has an offset assigned and, if so, what's that offset. If the offset has not yet been
-   * assigned or if it's too far for the particular architecture's relative call,
-   * ReserveSpace() may need to allocate space for a special dispatch thunk.
-   *
-   * @param ref the target method of the relative call.
-   * @return true in the first element of the pair if the method was found, false otherwise;
-   *         if found, the second element specifies the offset.
-   */
-  virtual std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) = 0;
-
- protected:
-  virtual ~RelativePatcherTargetProvider() { }
-};
-
-/**
- * @class RelativePatcher
- * @brief Interface for architecture-specific link-time patching of PC-relative references.
- */
-class RelativePatcher {
- public:
-  static std::unique_ptr<RelativePatcher> Create(
-      InstructionSet instruction_set, const InstructionSetFeatures* features,
-      RelativePatcherTargetProvider* provider);
-
-  virtual ~RelativePatcher() { }
-
-  uint32_t CodeAlignmentSize() const {
-    return size_code_alignment_;
-  }
-
-  uint32_t RelativeCallThunksSize() const {
-    return size_relative_call_thunks_;
-  }
-
-  uint32_t MiscThunksSize() const {
-    return size_misc_thunks_;
-  }
-
-  // Reserve space for thunks if needed before a method, return adjusted offset.
-  virtual uint32_t ReserveSpace(uint32_t offset,
-                                const CompiledMethod* compiled_method,
-                                MethodReference method_ref) = 0;
-
-  // Reserve space for thunks if needed after the last method, return adjusted offset.
-  // The caller may use this method to preemptively force thunk space reservation and
-  // then resume reservation for more methods. This is useful when there is a gap in
-  // the .text segment, for example when going to the next oat file for multi-image.
-  virtual uint32_t ReserveSpaceEnd(uint32_t offset) = 0;
-
-  // Write relative call thunks if needed, return adjusted offset. Returns 0 on write failure.
-  virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0;
-
-  // Patch method code. The input displacement is relative to the patched location,
-  // the patcher may need to adjust it if the correct base is different.
-  virtual void PatchCall(std::vector<uint8_t>* code,
-                         uint32_t literal_offset,
-                         uint32_t patch_offset,
-                         uint32_t target_offset) = 0;
-
-  // Patch a reference to a dex cache location.
-  virtual void PatchPcRelativeReference(std::vector<uint8_t>* code,
-                                        const LinkerPatch& patch,
-                                        uint32_t patch_offset,
-                                        uint32_t target_offset) = 0;
-
-  // Patch a branch to a Baker read barrier thunk.
-  virtual void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
-                                           const LinkerPatch& patch,
-                                           uint32_t patch_offset) = 0;
-
-  virtual std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo(
-      uint32_t executable_offset) = 0;
-
- protected:
-  RelativePatcher()
-      : size_code_alignment_(0u),
-        size_relative_call_thunks_(0u),
-        size_misc_thunks_(0u) {
-  }
-
-  bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
-  bool WriteThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk);
-  bool WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk);
-
- private:
-  uint32_t size_code_alignment_;
-  uint32_t size_relative_call_thunks_;
-  uint32_t size_misc_thunks_;
-
-  DISALLOW_COPY_AND_ASSIGN(RelativePatcher);
-};
-
-}  // namespace linker
-}  // namespace art
-
-#endif  // ART_COMPILER_LINKER_RELATIVE_PATCHER_H_
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
deleted file mode 100644
index d21f2795b98..00000000000
--- a/compiler/linker/relative_patcher_test.h
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_
-#define ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_
-
-#include "arch/instruction_set.h"
-#include "arch/instruction_set_features.h"
-#include "base/array_ref.h"
-#include "base/macros.h"
-#include "compiled_method-inl.h"
-#include "dex/verification_results.h"
-#include "dex/method_reference.h"
-#include "dex/string_reference.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_options.h"
-#include "globals.h"
-#include "gtest/gtest.h"
-#include "linker/relative_patcher.h"
-#include "oat.h"
-#include "oat_quick_method_header.h"
-#include "vector_output_stream.h"
-
-namespace art {
-namespace linker {
-
-// Base class providing infrastructure for architecture-specific tests.
-class RelativePatcherTest : public testing::Test {
- protected:
-  RelativePatcherTest(InstructionSet instruction_set, const std::string& variant)
-      : compiler_options_(),
-        verification_results_(&compiler_options_),
-        driver_(&compiler_options_,
-                &verification_results_,
-                Compiler::kQuick,
-                instruction_set,
-                /* instruction_set_features*/ nullptr,
-                /* image_classes */ nullptr,
-                /* compiled_classes */ nullptr,
-                /* compiled_methods */ nullptr,
-                /* thread_count */ 1u,
-                /* swap_fd */ -1,
-                /* profile_compilation_info */ nullptr),
-        error_msg_(),
-        instruction_set_(instruction_set),
-        features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)),
-        method_offset_map_(),
-        patcher_(RelativePatcher::Create(instruction_set, features_.get(), &method_offset_map_)),
-        bss_begin_(0u),
-        compiled_method_refs_(),
-        compiled_methods_(),
-        patched_code_(),
-        output_(),
-        out_("test output stream", &output_) {
-    CHECK(error_msg_.empty()) << instruction_set << "/" << variant;
-    patched_code_.reserve(16 * KB);
-  }
-
-  MethodReference MethodRef(uint32_t method_idx) {
-    CHECK_NE(method_idx, 0u);
-    return MethodReference(nullptr, method_idx);
-  }
-
-  void AddCompiledMethod(
-      MethodReference method_ref,
-      const ArrayRef<const uint8_t>& code,
-      const ArrayRef<const LinkerPatch>& patches = ArrayRef<const LinkerPatch>()) {
-    compiled_method_refs_.push_back(method_ref);
-    compiled_methods_.emplace_back(new CompiledMethod(
-        &driver_,
-        instruction_set_,
-        code,
-        /* frame_size_in_bytes */ 0u,
-        /* core_spill_mask */ 0u,
-        /* fp_spill_mask */ 0u,
-        /* method_info */ ArrayRef<const uint8_t>(),
-        /* vmap_table */ ArrayRef<const uint8_t>(),
-        /* cfi_info */ ArrayRef<const uint8_t>(),
-        patches));
-  }
-
-  uint32_t CodeAlignmentSize(uint32_t header_offset_to_align) {
-    // We want to align the code rather than the preheader.
-    uint32_t unaligned_code_offset = header_offset_to_align + sizeof(OatQuickMethodHeader);
-    uint32_t aligned_code_offset =
-        CompiledMethod::AlignCode(unaligned_code_offset, instruction_set_);
-    return aligned_code_offset - unaligned_code_offset;
-  }
-
-  void Link() {
-    // Reserve space.
-    static_assert(kTrampolineOffset == 0u, "Unexpected trampoline offset.");
-    uint32_t offset = kTrampolineSize;
-    size_t idx = 0u;
-    for (auto& compiled_method : compiled_methods_) {
-      offset = patcher_->ReserveSpace(offset, compiled_method.get(), compiled_method_refs_[idx]);
-
-      uint32_t alignment_size = CodeAlignmentSize(offset);
-      offset += alignment_size;
-
-      offset += sizeof(OatQuickMethodHeader);
-      uint32_t quick_code_offset = offset + compiled_method->CodeDelta();
-      const auto code = compiled_method->GetQuickCode();
-      offset += code.size();
-
-      method_offset_map_.map.Put(compiled_method_refs_[idx], quick_code_offset);
-      ++idx;
-    }
-    offset = patcher_->ReserveSpaceEnd(offset);
-    uint32_t output_size = offset;
-    output_.reserve(output_size);
-
-    // Write data.
-    DCHECK(output_.empty());
-    uint8_t dummy_trampoline[kTrampolineSize];
-    memset(dummy_trampoline, 0, sizeof(dummy_trampoline));
-    out_.WriteFully(dummy_trampoline, kTrampolineSize);
-    offset = kTrampolineSize;
-    static const uint8_t kPadding[] = {
-        0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
-    };
-    uint8_t dummy_header[sizeof(OatQuickMethodHeader)];
-    memset(dummy_header, 0, sizeof(dummy_header));
-    for (auto& compiled_method : compiled_methods_) {
-      offset = patcher_->WriteThunks(&out_, offset);
-
-      uint32_t alignment_size = CodeAlignmentSize(offset);
-      CHECK_LE(alignment_size, sizeof(kPadding));
-      out_.WriteFully(kPadding, alignment_size);
-      offset += alignment_size;
-
-      out_.WriteFully(dummy_header, sizeof(OatQuickMethodHeader));
-      offset += sizeof(OatQuickMethodHeader);
-      ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
-      if (!compiled_method->GetPatches().empty()) {
-        patched_code_.assign(code.begin(), code.end());
-        code = ArrayRef<const uint8_t>(patched_code_);
-        for (const LinkerPatch& patch : compiled_method->GetPatches()) {
-          if (patch.GetType() == LinkerPatch::Type::kCallRelative) {
-            auto result = method_offset_map_.FindMethodOffset(patch.TargetMethod());
-            uint32_t target_offset =
-                result.first ? result.second : kTrampolineOffset + compiled_method->CodeDelta();
-            patcher_->PatchCall(&patched_code_, patch.LiteralOffset(),
-                                offset + patch.LiteralOffset(), target_offset);
-          } else if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) {
-            uint32_t target_offset =
-                bss_begin_ + string_index_to_offset_map_.Get(patch.TargetStringIndex().index_);
-            patcher_->PatchPcRelativeReference(&patched_code_,
-                                               patch,
-                                               offset + patch.LiteralOffset(),
-                                               target_offset);
-          } else if (patch.GetType() == LinkerPatch::Type::kStringRelative) {
-            uint32_t target_offset =
-                string_index_to_offset_map_.Get(patch.TargetStringIndex().index_);
-            patcher_->PatchPcRelativeReference(&patched_code_,
-                                               patch,
-                                               offset + patch.LiteralOffset(),
-                                               target_offset);
-          } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) {
-            patcher_->PatchBakerReadBarrierBranch(&patched_code_,
-                                                  patch,
-                                                  offset + patch.LiteralOffset());
-          } else {
-            LOG(FATAL) << "Bad patch type. " << patch.GetType();
-            UNREACHABLE();
-          }
-        }
-      }
-      out_.WriteFully(&code[0], code.size());
-      offset += code.size();
-    }
-    offset = patcher_->WriteThunks(&out_, offset);
-    CHECK_EQ(offset, output_size);
-    CHECK_EQ(output_.size(), output_size);
-  }
-
-  bool CheckLinkedMethod(MethodReference method_ref, const ArrayRef<const uint8_t>& expected_code) {
-    // Sanity check: original code size must match linked_code.size().
-    size_t idx = 0u;
-    for (auto ref : compiled_method_refs_) {
-      if (ref == method_ref) {
-        break;
-      }
-      ++idx;
-    }
-    CHECK_NE(idx, compiled_method_refs_.size());
-    CHECK_EQ(compiled_methods_[idx]->GetQuickCode().size(), expected_code.size());
-
-    auto result = method_offset_map_.FindMethodOffset(method_ref);
-    CHECK(result.first);  // Must have been linked.
-    size_t offset = result.second - compiled_methods_[idx]->CodeDelta();
-    CHECK_LT(offset, output_.size());
-    CHECK_LE(offset + expected_code.size(), output_.size());
-    ArrayRef<const uint8_t> linked_code(&output_[offset], expected_code.size());
-    if (linked_code == expected_code) {
-      return true;
-    }
-    // Log failure info.
-    DumpDiff(expected_code, linked_code);
-    return false;
-  }
-
-  void DumpDiff(const ArrayRef<const uint8_t>& expected_code,
-                const ArrayRef<const uint8_t>& linked_code) {
-    std::ostringstream expected_hex;
-    std::ostringstream linked_hex;
-    std::ostringstream diff_indicator;
-    static const char digits[] = "0123456789abcdef";
-    bool found_diff = false;
-    for (size_t i = 0; i != expected_code.size(); ++i) {
-      expected_hex << " " << digits[expected_code[i] >> 4] << digits[expected_code[i] & 0xf];
-      linked_hex << " " << digits[linked_code[i] >> 4] << digits[linked_code[i] & 0xf];
-      if (!found_diff) {
-        found_diff = (expected_code[i] != linked_code[i]);
-        diff_indicator << (found_diff ? " ^^" : "   ");
-      }
-    }
-    CHECK(found_diff);
-    std::string expected_hex_str = expected_hex.str();
-    std::string linked_hex_str = linked_hex.str();
-    std::string diff_indicator_str = diff_indicator.str();
-    if (diff_indicator_str.length() > 60) {
-      CHECK_EQ(diff_indicator_str.length() % 3u, 0u);
-      size_t remove = diff_indicator_str.length() / 3 - 5;
-      std::ostringstream oss;
-      oss << "[stripped " << remove << "]";
-      std::string replacement = oss.str();
-      expected_hex_str.replace(0u, remove * 3u, replacement);
-      linked_hex_str.replace(0u, remove * 3u, replacement);
-      diff_indicator_str.replace(0u, remove * 3u, replacement);
-    }
-    LOG(ERROR) << "diff expected_code linked_code";
-    LOG(ERROR) << "<" << expected_hex_str;
-    LOG(ERROR) << ">" << linked_hex_str;
-    LOG(ERROR) << " " << diff_indicator_str;
-  }
-
-  // Map method reference to assinged offset.
-  // Wrap the map in a class implementing RelativePatcherTargetProvider.
-  class MethodOffsetMap FINAL : public RelativePatcherTargetProvider {
-   public:
-    std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE {
-      auto it = map.find(ref);
-      if (it == map.end()) {
-        return std::pair<bool, uint32_t>(false, 0u);
-      } else {
-        return std::pair<bool, uint32_t>(true, it->second);
-      }
-    }
-    SafeMap<MethodReference, uint32_t> map;
-  };
-
-  static const uint32_t kTrampolineSize = 4u;
-  static const uint32_t kTrampolineOffset = 0u;
-
-  CompilerOptions compiler_options_;
-  VerificationResults verification_results_;
-  CompilerDriver driver_;  // Needed for constructing CompiledMethod.
-  std::string error_msg_;
-  InstructionSet instruction_set_;
-  std::unique_ptr<const InstructionSetFeatures> features_;
-  MethodOffsetMap method_offset_map_;
-  std::unique_ptr<RelativePatcher> patcher_;
-  uint32_t bss_begin_;
-  SafeMap<uint32_t, uint32_t> string_index_to_offset_map_;
-  std::vector<MethodReference> compiled_method_refs_;
-  std::vector<std::unique_ptr<CompiledMethod>> compiled_methods_;
-  std::vector<uint8_t> patched_code_;
-  std::vector<uint8_t> output_;
-  VectorOutputStream out_;
-};
-
-}  // namespace linker
-}  // namespace art
-
-#endif  // ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_
diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc
deleted file mode 100644
index cdd2cef13ab..00000000000
--- a/compiler/linker/x86/relative_patcher_x86.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/x86/relative_patcher_x86.h"
-
-#include "compiled_method.h"
-#include "linker/linker_patch.h"
-
-namespace art {
-namespace linker {
-
-void X86RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
-                                                  const LinkerPatch& patch,
-                                                  uint32_t patch_offset,
-                                                  uint32_t target_offset) {
-  uint32_t anchor_literal_offset = patch.PcInsnOffset();
-  uint32_t literal_offset = patch.LiteralOffset();
-
-  // Check that the anchor points to pop in a "call +0; pop <reg>" sequence.
-  DCHECK_GE(anchor_literal_offset, 5u);
-  DCHECK_LT(anchor_literal_offset, code->size());
-  DCHECK_EQ((*code)[anchor_literal_offset - 5u], 0xe8u);
-  DCHECK_EQ((*code)[anchor_literal_offset - 4u], 0x00u);
-  DCHECK_EQ((*code)[anchor_literal_offset - 3u], 0x00u);
-  DCHECK_EQ((*code)[anchor_literal_offset - 2u], 0x00u);
-  DCHECK_EQ((*code)[anchor_literal_offset - 1u], 0x00u);
-  DCHECK_EQ((*code)[anchor_literal_offset] & 0xf8u, 0x58u);
-
-  // Check that the patched data contains kDummy32BitOffset.
-  // Must match X86Mir2Lir::kDummy32BitOffset and CodeGeneratorX86_64::kDummy32BitOffset.
-  constexpr int kDummy32BitOffset = 256;
-  DCHECK_LE(literal_offset, code->size());
-  DCHECK_EQ((*code)[literal_offset + 0u], static_cast<uint8_t>(kDummy32BitOffset >> 0));
-  DCHECK_EQ((*code)[literal_offset + 1u], static_cast<uint8_t>(kDummy32BitOffset >> 8));
-  DCHECK_EQ((*code)[literal_offset + 2u], static_cast<uint8_t>(kDummy32BitOffset >> 16));
-  DCHECK_EQ((*code)[literal_offset + 3u], static_cast<uint8_t>(kDummy32BitOffset >> 24));
-
-  // Apply patch.
-  uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset;
-  uint32_t diff = target_offset - anchor_offset;
-  (*code)[literal_offset + 0u] = static_cast<uint8_t>(diff >> 0);
-  (*code)[literal_offset + 1u] = static_cast<uint8_t>(diff >> 8);
-  (*code)[literal_offset + 2u] = static_cast<uint8_t>(diff >> 16);
-  (*code)[literal_offset + 3u] = static_cast<uint8_t>(diff >> 24);
-}
-
-void X86RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                                     const LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                                     uint32_t patch_offset ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "UNIMPLEMENTED";
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/x86/relative_patcher_x86.h b/compiler/linker/x86/relative_patcher_x86.h
deleted file mode 100644
index 63a83387223..00000000000
--- a/compiler/linker/x86/relative_patcher_x86.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_
-#define ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_
-
-#include "linker/x86/relative_patcher_x86_base.h"
-
-namespace art {
-namespace linker {
-
-class X86RelativePatcher FINAL : public X86BaseRelativePatcher {
- public:
-  X86RelativePatcher() { }
-
-  void PatchPcRelativeReference(std::vector<uint8_t>* code,
-                                const LinkerPatch& patch,
-                                uint32_t patch_offset,
-                                uint32_t target_offset) OVERRIDE;
-  void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
-                                   const LinkerPatch& patch,
-                                   uint32_t patch_offset) OVERRIDE;
-};
-
-}  // namespace linker
-}  // namespace art
-
-#endif  // ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_
diff --git a/compiler/linker/x86/relative_patcher_x86_base.cc b/compiler/linker/x86/relative_patcher_x86_base.cc
deleted file mode 100644
index 6a9690d7681..00000000000
--- a/compiler/linker/x86/relative_patcher_x86_base.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/x86/relative_patcher_x86_base.h"
-
-#include "debug/method_debug_info.h"
-
-namespace art {
-namespace linker {
-
-uint32_t X86BaseRelativePatcher::ReserveSpace(
-    uint32_t offset,
-    const CompiledMethod* compiled_method ATTRIBUTE_UNUSED,
-    MethodReference method_ref ATTRIBUTE_UNUSED) {
-  return offset;  // No space reserved; no limit on relative call distance.
-}
-
-uint32_t X86BaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
-  return offset;  // No space reserved; no limit on relative call distance.
-}
-
-uint32_t X86BaseRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) {
-  return offset;  // No thunks added; no limit on relative call distance.
-}
-
-std::vector<debug::MethodDebugInfo> X86BaseRelativePatcher::GenerateThunkDebugInfo(
-    uint32_t executable_offset ATTRIBUTE_UNUSED) {
-  return std::vector<debug::MethodDebugInfo>();  // No thunks added.
-}
-
-void X86BaseRelativePatcher::PatchCall(std::vector<uint8_t>* code,
-                                       uint32_t literal_offset,
-                                       uint32_t patch_offset,
-                                       uint32_t target_offset) {
-  DCHECK_LE(literal_offset + 4u, code->size());
-  // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
-  uint32_t displacement = target_offset - patch_offset;
-  displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
-
-  typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t;
-  reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement;
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/x86/relative_patcher_x86_base.h b/compiler/linker/x86/relative_patcher_x86_base.h
deleted file mode 100644
index 6097345657d..00000000000
--- a/compiler/linker/x86/relative_patcher_x86_base.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_
-#define ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_
-
-#include "linker/relative_patcher.h"
-
-namespace art {
-namespace linker {
-
-class X86BaseRelativePatcher : public RelativePatcher {
- public:
-  uint32_t ReserveSpace(uint32_t offset,
-                        const CompiledMethod* compiled_method,
-                        MethodReference method_ref) OVERRIDE;
-  uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
-  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
-  void PatchCall(std::vector<uint8_t>* code,
-                 uint32_t literal_offset,
-                 uint32_t patch_offset,
-                 uint32_t target_offset) OVERRIDE;
-  std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo(uint32_t executable_offset) OVERRIDE;
-
- protected:
-  X86BaseRelativePatcher() { }
-
-  // PC displacement from patch location; the base address of x86/x86-64 relative
-  // calls and x86-64 RIP-relative addressing is the PC of the next instruction and
-  // the patch location is 4 bytes earlier.
-  static constexpr int32_t kPcDisplacement = 4;
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(X86BaseRelativePatcher);
-};
-
-}  // namespace linker
-}  // namespace art
-
-#endif  // ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_
diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc
deleted file mode 100644
index b855dec91db..00000000000
--- a/compiler/linker/x86/relative_patcher_x86_test.cc
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/x86/relative_patcher_x86.h"
-
-#include "linker/relative_patcher_test.h"
-
-namespace art {
-namespace linker {
-
-class X86RelativePatcherTest : public RelativePatcherTest {
- public:
-  X86RelativePatcherTest() : RelativePatcherTest(InstructionSet::kX86, "default") { }
-
- protected:
-  static const uint8_t kCallRawCode[];
-  static const ArrayRef<const uint8_t> kCallCode;
-
-  uint32_t GetMethodOffset(uint32_t method_idx) {
-    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
-    CHECK(result.first);
-    return result.second;
-  }
-};
-
-const uint8_t X86RelativePatcherTest::kCallRawCode[] = {
-    0xe8, 0x00, 0x01, 0x00, 0x00
-};
-
-const ArrayRef<const uint8_t> X86RelativePatcherTest::kCallCode(kCallRawCode);
-
-TEST_F(X86RelativePatcherTest, CallSelf) {
-  LinkerPatch patches[] = {
-      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
-  };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  static const uint8_t expected_code[] = {
-      0xe8, 0xfb, 0xff, 0xff, 0xff
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(X86RelativePatcherTest, CallOther) {
-  LinkerPatch method1_patches[] = {
-      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
-  };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
-  LinkerPatch method2_patches[] = {
-      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
-  };
-  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
-  Link();
-
-  uint32_t method1_offset = GetMethodOffset(1u);
-  uint32_t method2_offset = GetMethodOffset(2u);
-  uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */);
-  static const uint8_t method1_expected_code[] = {
-      0xe8,
-      static_cast<uint8_t>(diff_after),
-      static_cast<uint8_t>(diff_after >> 8),
-      static_cast<uint8_t>(diff_after >> 16),
-      static_cast<uint8_t>(diff_after >> 24)
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
-  uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */);
-  static const uint8_t method2_expected_code[] = {
-      0xe8,
-      static_cast<uint8_t>(diff_before),
-      static_cast<uint8_t>(diff_before >> 8),
-      static_cast<uint8_t>(diff_before >> 16),
-      static_cast<uint8_t>(diff_before >> 24)
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
-}
-
-TEST_F(X86RelativePatcherTest, CallTrampoline) {
-  LinkerPatch patches[] = {
-      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
-  };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  auto result = method_offset_map_.FindMethodOffset(MethodRef(1));
-  ASSERT_TRUE(result.first);
-  uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size());
-  static const uint8_t expected_code[] = {
-      0xe8,
-      static_cast<uint8_t>(diff),
-      static_cast<uint8_t>(diff >> 8),
-      static_cast<uint8_t>(diff >> 16),
-      static_cast<uint8_t>(diff >> 24)
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(X86RelativePatcherTest, StringBssEntry) {
-  bss_begin_ = 0x12345678;
-  constexpr size_t kStringEntryOffset = 0x1234;
-  constexpr uint32_t kStringIndex = 1u;
-  string_index_to_offset_map_.Put(kStringIndex, kStringEntryOffset);
-  static const uint8_t raw_code[] = {
-      0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
-      0x5b,                                 // pop ebx
-      0x8b, 0x83, 0x00, 0x01, 0x00, 0x00,   // mov eax, [ebx + 256 (kDummy32BitValue)]
-  };
-  constexpr uint32_t anchor_offset = 5u;  // After call +0.
-  ArrayRef<const uint8_t> code(raw_code);
-  LinkerPatch patches[] = {
-      LinkerPatch::StringBssEntryPatch(code.size() - 4u, nullptr, anchor_offset, kStringIndex),
-  };
-  AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
-  ASSERT_TRUE(result.first);
-  uint32_t diff = bss_begin_ + kStringEntryOffset - (result.second + anchor_offset);
-  static const uint8_t expected_code[] = {
-      0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
-      0x5b,                                 // pop ebx
-      0x8b, 0x83,                           // mov eax, [ebx + diff]
-      static_cast<uint8_t>(diff),
-      static_cast<uint8_t>(diff >> 8),
-      static_cast<uint8_t>(diff >> 16),
-      static_cast<uint8_t>(diff >> 24)
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(X86RelativePatcherTest, StringReference) {
-  constexpr uint32_t kStringIndex = 1u;
-  constexpr uint32_t kStringOffset = 0x12345678;
-  string_index_to_offset_map_.Put(kStringIndex, kStringOffset);
-  static const uint8_t raw_code[] = {
-      0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
-      0x5b,                                 // pop ebx
-      0x8d, 0x83, 0x00, 0x01, 0x00, 0x00,   // lea eax, [ebx + 256 (kDummy32BitValue)]
-  };
-  constexpr uint32_t anchor_offset = 5u;  // After call +0.
-  ArrayRef<const uint8_t> code(raw_code);
-  LinkerPatch patches[] = {
-      LinkerPatch::RelativeStringPatch(code.size() - 4u, nullptr, anchor_offset, kStringIndex),
-  };
-  AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
-  ASSERT_TRUE(result.first);
-  uint32_t diff = kStringOffset - (result.second + anchor_offset);
-  static const uint8_t expected_code[] = {
-      0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
-      0x5b,                                 // pop ebx
-      0x8d, 0x83,                           // lea eax, [ebx + diff]
-      static_cast<uint8_t>(diff),
-      static_cast<uint8_t>(diff >> 8),
-      static_cast<uint8_t>(diff >> 16),
-      static_cast<uint8_t>(diff >> 24)
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.cc b/compiler/linker/x86_64/relative_patcher_x86_64.cc
deleted file mode 100644
index 96335649990..00000000000
--- a/compiler/linker/x86_64/relative_patcher_x86_64.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/x86_64/relative_patcher_x86_64.h"
-
-#include "compiled_method.h"
-#include "linker/linker_patch.h"
-
-namespace art {
-namespace linker {
-
-void X86_64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
-                                                     const LinkerPatch& patch,
-                                                     uint32_t patch_offset,
-                                                     uint32_t target_offset) {
-  DCHECK_LE(patch.LiteralOffset() + 4u, code->size());
-  // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
-  uint32_t displacement = target_offset - patch_offset;
-  displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
-
-  typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t;
-  reinterpret_cast<unaligned_int32_t*>(&(*code)[patch.LiteralOffset()])[0] = displacement;
-}
-
-void X86_64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                                        const LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                                        uint32_t patch_offset ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "UNIMPLEMENTED";
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.h b/compiler/linker/x86_64/relative_patcher_x86_64.h
deleted file mode 100644
index 4f3ec498cb8..00000000000
--- a/compiler/linker/x86_64/relative_patcher_x86_64.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_
-#define ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_
-
-#include "linker/x86/relative_patcher_x86_base.h"
-
-namespace art {
-namespace linker {
-
-class X86_64RelativePatcher FINAL : public X86BaseRelativePatcher {
- public:
-  X86_64RelativePatcher() { }
-
-  void PatchPcRelativeReference(std::vector<uint8_t>* code,
-                                const LinkerPatch& patch,
-                                uint32_t patch_offset,
-                                uint32_t target_offset) OVERRIDE;
-  void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
-                                   const LinkerPatch& patch,
-                                   uint32_t patch_offset) OVERRIDE;
-};
-
-}  // namespace linker
-}  // namespace art
-
-#endif  // ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
deleted file mode 100644
index 6baa92de36c..00000000000
--- a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linker/x86_64/relative_patcher_x86_64.h"
-
-#include "linker/relative_patcher_test.h"
-
-namespace art {
-namespace linker {
-
-class X86_64RelativePatcherTest : public RelativePatcherTest {
- public:
-  X86_64RelativePatcherTest() : RelativePatcherTest(InstructionSet::kX86_64, "default") { }
-
- protected:
-  static const uint8_t kCallRawCode[];
-  static const ArrayRef<const uint8_t> kCallCode;
-  static const uint8_t kDexCacheLoadRawCode[];
-  static const ArrayRef<const uint8_t> kDexCacheLoadCode;
-  static const uint8_t kStringReferenceRawCode[];
-  static const ArrayRef<const uint8_t> kStringReferenceCode;
-
-  uint32_t GetMethodOffset(uint32_t method_idx) {
-    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
-    CHECK(result.first);
-    return result.second;
-  }
-};
-
-const uint8_t X86_64RelativePatcherTest::kCallRawCode[] = {
-    0xe8, 0x00, 0x01, 0x00, 0x00
-};
-
-const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kCallCode(kCallRawCode);
-
-const uint8_t X86_64RelativePatcherTest::kDexCacheLoadRawCode[] = {
-    0x8b, 0x05,  // mov eax, [rip + <offset>]
-    0x00, 0x01, 0x00, 0x00
-};
-
-const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kDexCacheLoadCode(
-    kDexCacheLoadRawCode);
-
-const uint8_t X86_64RelativePatcherTest::kStringReferenceRawCode[] = {
-    0x8d, 0x05,  // lea eax, [rip + <offset>]
-    0x00, 0x01, 0x00, 0x00
-};
-
-const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kStringReferenceCode(
-    kStringReferenceRawCode);
-
-TEST_F(X86_64RelativePatcherTest, CallSelf) {
-  LinkerPatch patches[] = {
-      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
-  };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  static const uint8_t expected_code[] = {
-      0xe8, 0xfb, 0xff, 0xff, 0xff
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(X86_64RelativePatcherTest, CallOther) {
-  LinkerPatch method1_patches[] = {
-      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
-  };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
-  LinkerPatch method2_patches[] = {
-      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
-  };
-  AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
-  Link();
-
-  uint32_t method1_offset = GetMethodOffset(1u);
-  uint32_t method2_offset = GetMethodOffset(2u);
-  uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */);
-  static const uint8_t method1_expected_code[] = {
-      0xe8,
-      static_cast<uint8_t>(diff_after),
-      static_cast<uint8_t>(diff_after >> 8),
-      static_cast<uint8_t>(diff_after >> 16),
-      static_cast<uint8_t>(diff_after >> 24)
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
-  uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */);
-  static const uint8_t method2_expected_code[] = {
-      0xe8,
-      static_cast<uint8_t>(diff_before),
-      static_cast<uint8_t>(diff_before >> 8),
-      static_cast<uint8_t>(diff_before >> 16),
-      static_cast<uint8_t>(diff_before >> 24)
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
-}
-
-TEST_F(X86_64RelativePatcherTest, CallTrampoline) {
-  LinkerPatch patches[] = {
-      LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
-  };
-  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
-  ASSERT_TRUE(result.first);
-  uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size());
-  static const uint8_t expected_code[] = {
-      0xe8,
-      static_cast<uint8_t>(diff),
-      static_cast<uint8_t>(diff >> 8),
-      static_cast<uint8_t>(diff >> 16),
-      static_cast<uint8_t>(diff >> 24)
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(X86_64RelativePatcherTest, StringBssEntry) {
-  bss_begin_ = 0x12345678;
-  constexpr size_t kStringEntryOffset = 0x1234;
-  constexpr uint32_t kStringIndex = 1u;
-  string_index_to_offset_map_.Put(kStringIndex, kStringEntryOffset);
-  LinkerPatch patches[] = {
-      LinkerPatch::StringBssEntryPatch(kDexCacheLoadCode.size() - 4u, nullptr, 0u, kStringIndex),
-  };
-  AddCompiledMethod(MethodRef(1u), kDexCacheLoadCode, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
-  ASSERT_TRUE(result.first);
-  uint32_t diff = bss_begin_ + kStringEntryOffset - (result.second + kDexCacheLoadCode.size());
-  static const uint8_t expected_code[] = {
-      0x8b, 0x05,
-      static_cast<uint8_t>(diff),
-      static_cast<uint8_t>(diff >> 8),
-      static_cast<uint8_t>(diff >> 16),
-      static_cast<uint8_t>(diff >> 24)
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-TEST_F(X86_64RelativePatcherTest, StringReference) {
-  constexpr uint32_t kStringIndex = 1u;
-  constexpr uint32_t kStringOffset = 0x12345678;
-  string_index_to_offset_map_.Put(kStringIndex, kStringOffset);
-  LinkerPatch patches[] = {
-      LinkerPatch::RelativeStringPatch(
-          kStringReferenceCode.size() - 4u, nullptr, 0u, kStringIndex),
-  };
-  AddCompiledMethod(MethodRef(1u), kStringReferenceCode, ArrayRef<const LinkerPatch>(patches));
-  Link();
-
-  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
-  ASSERT_TRUE(result.first);
-  uint32_t diff = kStringOffset - (result.second + kStringReferenceCode.size());
-  static const uint8_t expected_code[] = {
-      0x8d, 0x05,
-      static_cast<uint8_t>(diff),
-      static_cast<uint8_t>(diff >> 8),
-      static_cast<uint8_t>(diff >> 16),
-      static_cast<uint8_t>(diff >> 24)
-  };
-  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
-}
-
-}  // namespace linker
-}  // namespace art
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 6abda9b3026..231017f55e6 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -51,6 +51,8 @@
 #include "dex/verified_method.h"
 #include "driver/compiler_driver.h"
 #include "graph_visualizer.h"
+#include "image.h"
+#include "gc/space/image_space.h"
 #include "intern_table.h"
 #include "intrinsics.h"
 #include "mirror/array-inl.h"
@@ -447,6 +449,18 @@ void CodeGenerator::EmitLinkerPatches(
   // No linker patches by default.
 }
 
+bool CodeGenerator::NeedsThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED) const {
+  // Code generators that create patches requiring thunk compilation should override this function.
+  return false;
+}
+
+void CodeGenerator::EmitThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED,
+                                  /*out*/ ArenaVector<uint8_t>* code ATTRIBUTE_UNUSED,
+                                  /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) {
+  // Code generators that create patches requiring thunk compilation should override this function.
+  LOG(FATAL) << "Unexpected call to EmitThunkCode().";
+}
+
 void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
                                              size_t maximum_safepoint_spill_size,
                                              size_t number_of_out_slots,
@@ -722,6 +736,47 @@ void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) {
   }
 }
 
+static uint32_t GetBootImageOffsetImpl(const void* object, ImageHeader::ImageSections section) {
+  Runtime* runtime = Runtime::Current();
+  DCHECK(runtime->IsAotCompiler());
+  const std::vector<gc::space::ImageSpace*>& boot_image_spaces =
+      runtime->GetHeap()->GetBootImageSpaces();
+  // Check that the `object` is in the expected section of one of the boot image files.
+  DCHECK(std::any_of(boot_image_spaces.begin(),
+                     boot_image_spaces.end(),
+                     [object, section](gc::space::ImageSpace* space) {
+                       uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
+                       uintptr_t offset = reinterpret_cast<uintptr_t>(object) - begin;
+                       return space->GetImageHeader().GetImageSection(section).Contains(offset);
+                     }));
+  uintptr_t begin = reinterpret_cast<uintptr_t>(boot_image_spaces.front()->Begin());
+  uintptr_t offset = reinterpret_cast<uintptr_t>(object) - begin;
+  return dchecked_integral_cast<uint32_t>(offset);
+}
+
+// NO_THREAD_SAFETY_ANALYSIS: Avoid taking the mutator lock, boot image classes are non-moveable.
+uint32_t CodeGenerator::GetBootImageOffset(HLoadClass* load_class) NO_THREAD_SAFETY_ANALYSIS {
+  DCHECK_EQ(load_class->GetLoadKind(), HLoadClass::LoadKind::kBootImageRelRo);
+  ObjPtr<mirror::Class> klass = load_class->GetClass().Get();
+  DCHECK(klass != nullptr);
+  return GetBootImageOffsetImpl(klass.Ptr(), ImageHeader::kSectionObjects);
+}
+
+// NO_THREAD_SAFETY_ANALYSIS: Avoid taking the mutator lock, boot image strings are non-moveable.
+uint32_t CodeGenerator::GetBootImageOffset(HLoadString* load_string) NO_THREAD_SAFETY_ANALYSIS {
+  DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kBootImageRelRo);
+  ObjPtr<mirror::String> string = load_string->GetString().Get();
+  DCHECK(string != nullptr);
+  return GetBootImageOffsetImpl(string.Ptr(), ImageHeader::kSectionObjects);
+}
+
+uint32_t CodeGenerator::GetBootImageOffset(HInvokeStaticOrDirect* invoke) {
+  DCHECK_EQ(invoke->GetMethodLoadKind(), HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo);
+  ArtMethod* method = invoke->GetResolvedMethod();
+  DCHECK(method != nullptr);
+  return GetBootImageOffsetImpl(method, ImageHeader::kSectionArtMethods);
+}
+
 void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const {
   // The DCHECKS below check that a register is not specified twice in
   // the summary. The out location can overlap with an input, so we need
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index f784a1a8573..62cacebaa1e 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -21,15 +21,16 @@
 #include "arch/instruction_set_features.h"
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
+#include "base/array_ref.h"
 #include "base/bit_field.h"
 #include "base/bit_utils.h"
 #include "base/enums.h"
+#include "base/memory_region.h"
 #include "dex/string_reference.h"
 #include "dex/type_reference.h"
 #include "globals.h"
 #include "graph_visualizer.h"
 #include "locations.h"
-#include "memory_region.h"
 #include "nodes.h"
 #include "optimizing_compiler_stats.h"
 #include "read_barrier_option.h"
@@ -74,6 +75,7 @@ class CodeAllocator {
   virtual ~CodeAllocator() {}
 
   virtual uint8_t* Allocate(size_t size) = 0;
+  virtual ArrayRef<const uint8_t> GetMemory() const = 0;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
@@ -210,6 +212,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
   virtual void Initialize() = 0;
   virtual void Finalize(CodeAllocator* allocator);
   virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches);
+  virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const;
+  virtual void EmitThunkCode(const linker::LinkerPatch& patch,
+                             /*out*/ ArenaVector<uint8_t>* code,
+                             /*out*/ std::string* debug_name);
   virtual void GenerateFrameEntry() = 0;
   virtual void GenerateFrameExit() = 0;
   virtual void Bind(HBasicBlock* block) = 0;
@@ -438,6 +444,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
       case TypeCheckKind::kArrayCheck:
       case TypeCheckKind::kUnresolvedCheck:
         return false;
+      case TypeCheckKind::kBitstringCheck:
+        return true;
     }
     LOG(FATAL) << "Unreachable";
     UNREACHABLE();
@@ -556,6 +564,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
                                                         Location runtime_return_location);
   void GenerateLoadClassRuntimeCall(HLoadClass* cls);
 
+  uint32_t GetBootImageOffset(HLoadClass* load_class);
+  uint32_t GetBootImageOffset(HLoadString* load_string);
+  uint32_t GetBootImageOffset(HInvokeStaticOrDirect* invoke);
+
   static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
 
   void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 60f8f98757d..d4cfab82de3 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -30,7 +30,6 @@
 #include "heap_poisoning.h"
 #include "intrinsics.h"
 #include "intrinsics_arm64.h"
-#include "linker/arm64/relative_patcher_arm64.h"
 #include "linker/linker_patch.h"
 #include "lock_word.h"
 #include "mirror/array-inl.h"
@@ -78,6 +77,7 @@ using helpers::OutputFPRegister;
 using helpers::OutputRegister;
 using helpers::QRegisterFrom;
 using helpers::RegisterFrom;
+using helpers::SRegisterFrom;
 using helpers::StackOperandFrom;
 using helpers::VIXLRegCodeFromART;
 using helpers::WRegisterFrom;
@@ -1424,6 +1424,62 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
   __ FinalizeCode();
 
   CodeGenerator::Finalize(allocator);
+
+  // Verify Baker read barrier linker patches.
+  if (kIsDebugBuild) {
+    ArrayRef<const uint8_t> code = allocator->GetMemory();
+    for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+      DCHECK(info.label.IsBound());
+      uint32_t literal_offset = info.label.GetLocation();
+      DCHECK_ALIGNED(literal_offset, 4u);
+
+      auto GetInsn = [&code](uint32_t offset) {
+        DCHECK_ALIGNED(offset, 4u);
+        return
+            (static_cast<uint32_t>(code[offset + 0]) << 0) +
+            (static_cast<uint32_t>(code[offset + 1]) << 8) +
+            (static_cast<uint32_t>(code[offset + 2]) << 16)+
+            (static_cast<uint32_t>(code[offset + 3]) << 24);
+      };
+
+      const uint32_t encoded_data = info.custom_data;
+      BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+      // Check that the next instruction matches the expected LDR.
+      switch (kind) {
+        case BakerReadBarrierKind::kField: {
+          DCHECK_GE(code.size() - literal_offset, 8u);
+          uint32_t next_insn = GetInsn(literal_offset + 4u);
+          // LDR (immediate) with correct base_reg.
+          CheckValidReg(next_insn & 0x1fu);  // Check destination register.
+          const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+          CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
+          break;
+        }
+        case BakerReadBarrierKind::kArray: {
+          DCHECK_GE(code.size() - literal_offset, 8u);
+          uint32_t next_insn = GetInsn(literal_offset + 4u);
+          // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
+          // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
+          CheckValidReg(next_insn & 0x1fu);  // Check destination register.
+          const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+          CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
+          CheckValidReg((next_insn >> 16) & 0x1f);  // Check index register
+          break;
+        }
+        case BakerReadBarrierKind::kGcRoot: {
+          DCHECK_GE(literal_offset, 4u);
+          uint32_t prev_insn = GetInsn(literal_offset - 4u);
+          // LDR (immediate) with correct root_reg.
+          const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+          CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg);
+          break;
+        }
+        default:
+          LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+          UNREACHABLE();
+      }
+    }
+  }
 }
 
 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
@@ -2128,6 +2184,26 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare(
+    HTypeCheckInstruction* check, vixl::aarch64::Register temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if (mask_bits == 16u) {
+    // Load only the bitstring part of the status word.
+    __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
+    // Extract the bitstring bits.
+    __ Ubfx(temp, temp, 0, mask_bits);
+  }
+  // Compare the bitstring bits to `path_to_root`.
+  __ Cmp(temp, path_to_root);
+}
+
 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
   BarrierType type = BarrierAll;
 
@@ -3865,6 +3941,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -3873,7 +3951,13 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // The "out" register is used as a temporary, so it overlaps with the inputs.
   // Note that TypeCheckSlowPathARM64 uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -3886,7 +3970,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
-  Register cls = InputRegisterAt(instruction, 1);
+  Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+      ? Register()
+      : InputRegisterAt(instruction, 1);
   Location out_loc = locations->Out();
   Register out = OutputRegister(instruction);
   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -4072,6 +4158,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
       }
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      __ Cset(out, eq);
+      if (zero.IsLinked()) {
+        __ B(&done);
+      }
+      break;
+    }
   }
 
   if (zero.IsLinked()) {
@@ -4094,7 +4197,13 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
@@ -4104,7 +4213,9 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
-  Register cls = InputRegisterAt(instruction, 1);
+  Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+      ? Register()
+      : InputRegisterAt(instruction, 1);
   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
   DCHECK_GE(num_temps, 1u);
   DCHECK_LE(num_temps, 3u);
@@ -4285,6 +4396,20 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
       __ B(ne, &start_loop);
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ B(ne, type_check_slow_path->GetEntryLabel());
+      break;
+    }
   }
   __ Bind(&done);
 
@@ -4459,12 +4584,23 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(
       // Load method address from literal pool.
       __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+      // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
+      uint32_t boot_image_offset = GetBootImageOffset(invoke);
+      vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset);
+      EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
+      // Add LDR with its PC-relative .data.bimg.rel.ro patch.
+      vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label);
+      // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+      EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp));
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
-      // Add ADRP with its PC-relative DexCache access patch.
+      // Add ADRP with its PC-relative .bss entry patch.
       MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
       vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method);
       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
-      // Add LDR with its PC-relative DexCache access patch.
+      // Add LDR with its PC-relative .bss entry patch.
       vixl::aarch64::Label* ldr_label =
           NewMethodBssEntryPatch(target_method, adrp_label);
       EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
@@ -4559,6 +4695,13 @@ void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* i
   codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
+vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch(
+    uint32_t boot_image_offset,
+    vixl::aarch64::Label* adrp_label) {
+  return NewPcRelativePatch(
+      /* dex_file */ nullptr, boot_image_offset, adrp_label, &boot_image_method_patches_);
+}
+
 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch(
     MethodReference target_method,
     vixl::aarch64::Label* adrp_label) {
@@ -4681,6 +4824,14 @@ inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
   }
 }
 
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+                                              const DexFile* target_dex_file,
+                                              uint32_t pc_insn_offset,
+                                              uint32_t boot_image_offset) {
+  DCHECK(target_dex_file == nullptr);  // Unused for DataBimgRelRoPatch(), should be null.
+  return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
@@ -4700,11 +4851,10 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin
     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
         boot_image_string_patches_, linker_patches);
   } else {
-    DCHECK(boot_image_method_patches_.empty());
-    EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
-        boot_image_type_patches_, linker_patches);
-    EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
-        boot_image_string_patches_, linker_patches);
+    EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+        boot_image_method_patches_, linker_patches);
+    DCHECK(boot_image_type_patches_.empty());
+    DCHECK(boot_image_string_patches_.empty());
   }
   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
       method_bss_entry_patches_, linker_patches);
@@ -4719,6 +4869,44 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin
   DCHECK_EQ(size, linker_patches->size());
 }
 
+bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const {
+  return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
+         patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
+}
+
+void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
+                                       /*out*/ ArenaVector<uint8_t>* code,
+                                       /*out*/ std::string* debug_name) {
+  Arm64Assembler assembler(GetGraph()->GetAllocator());
+  switch (patch.GetType()) {
+    case linker::LinkerPatch::Type::kCallRelative: {
+      // The thunk just uses the entry point in the ArtMethod. This works even for calls
+      // to the generic JNI and interpreter trampolines.
+      Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+          kArm64PointerSize).Int32Value());
+      assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+      if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+        *debug_name = "MethodCallThunk";
+      }
+      break;
+    }
+    case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
+      DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
+      CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected patch type " << patch.GetType();
+      UNREACHABLE();
+  }
+
+  // Ensure we emit the literal pool if any.
+  assembler.FinalizeCode();
+  code->resize(assembler.CodeSize());
+  MemoryRegion code_region(code->data(), code->size());
+  assembler.FinalizeInstructions(code_region);
+}
+
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
   return uint32_literals_.GetOrCreate(
       value,
@@ -4779,7 +4967,7 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
     case HLoadClass::LoadKind::kReferrersClass:
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-    case HLoadClass::LoadKind::kBootImageClassTable:
+    case HLoadClass::LoadKind::kBootImageRelRo:
     case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -4859,12 +5047,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
       DCHECK(!cls->MustGenerateClinitCheck());
       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
       Register current_method = InputRegisterAt(cls, 0);
-      GenerateGcRootFieldLoad(cls,
-                              out_loc,
-                              current_method,
-                              ArtMethod::DeclaringClassOffset().Int32Value(),
-                              /* fixup_label */ nullptr,
-                              read_barrier_option);
+      codegen_->GenerateGcRootFieldLoad(cls,
+                                        out_loc,
+                                        current_method,
+                                        ArtMethod::DeclaringClassOffset().Int32Value(),
+                                        /* fixup_label */ nullptr,
+                                        read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
@@ -4888,23 +5076,16 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
       break;
     }
-    case HLoadClass::LoadKind::kBootImageClassTable: {
+    case HLoadClass::LoadKind::kBootImageRelRo: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
-      // Add ADRP with its PC-relative type patch.
-      const DexFile& dex_file = cls->GetDexFile();
-      dex::TypeIndex type_index = cls->GetTypeIndex();
-      vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
+      uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
+      // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
+      vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
-      // Add LDR with its PC-relative type patch.
+      // Add LDR with its PC-relative .data.bimg.rel.ro patch.
       vixl::aarch64::Label* ldr_label =
-          codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
+          codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
       codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
-      // Extract the reference from the slot data, i.e. clear the hash bits.
-      int32_t masked_hash = ClassTable::TableSlot::MaskHash(
-          ComputeModifiedUtf8Hash(dex_file.StringByTypeIdx(type_index)));
-      if (masked_hash != 0) {
-        __ Sub(out.W(), out.W(), Operand(masked_hash));
-      }
       break;
     }
     case HLoadClass::LoadKind::kBssEntry: {
@@ -4914,16 +5095,16 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
       vixl::aarch64::Register temp = XRegisterFrom(out_loc);
       vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
-      // Add LDR with its PC-relative Class patch.
+      // Add LDR with its PC-relative Class .bss entry patch.
       vixl::aarch64::Label* ldr_label =
           codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
-      GenerateGcRootFieldLoad(cls,
-                              out_loc,
-                              temp,
-                              /* offset placeholder */ 0u,
-                              ldr_label,
-                              read_barrier_option);
+      codegen_->GenerateGcRootFieldLoad(cls,
+                                        out_loc,
+                                        temp,
+                                        /* offset placeholder */ 0u,
+                                        ldr_label,
+                                        read_barrier_option);
       generate_null_check = true;
       break;
     }
@@ -4931,12 +5112,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
                                                        cls->GetTypeIndex(),
                                                        cls->GetClass()));
-      GenerateGcRootFieldLoad(cls,
-                              out_loc,
-                              out.X(),
-                              /* offset */ 0,
-                              /* fixup_label */ nullptr,
-                              read_barrier_option);
+      codegen_->GenerateGcRootFieldLoad(cls,
+                                        out_loc,
+                                        out.X(),
+                                        /* offset */ 0,
+                                        /* fixup_label */ nullptr,
+                                        read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kRuntimeCall:
@@ -4989,7 +5170,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-    case HLoadString::LoadKind::kBootImageInternTable:
+    case HLoadString::LoadKind::kBootImageRelRo:
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -5055,16 +5236,15 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
       return;
     }
-    case HLoadString::LoadKind::kBootImageInternTable: {
+    case HLoadString::LoadKind::kBootImageRelRo: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
-      // Add ADRP with its PC-relative String patch.
-      const DexFile& dex_file = load->GetDexFile();
-      const dex::StringIndex string_index = load->GetStringIndex();
-      vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
+      // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
+      uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
+      vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
-      // Add LDR with its PC-relative String patch.
+      // Add LDR with its PC-relative .data.bimg.rel.ro patch.
       vixl::aarch64::Label* ldr_label =
-          codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
+          codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
       codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
       return;
     }
@@ -5076,16 +5256,16 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
       Register temp = XRegisterFrom(out_loc);
       vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
-      // Add LDR with its .bss entry String patch.
+      // Add LDR with its PC-relative String .bss entry patch.
       vixl::aarch64::Label* ldr_label =
           codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
       // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
-      GenerateGcRootFieldLoad(load,
-                              out_loc,
-                              temp,
-                              /* offset placeholder */ 0u,
-                              ldr_label,
-                              kCompilerReadBarrierOption);
+      codegen_->GenerateGcRootFieldLoad(load,
+                                        out_loc,
+                                        temp,
+                                        /* offset placeholder */ 0u,
+                                        ldr_label,
+                                        kCompilerReadBarrierOption);
       SlowPathCodeARM64* slow_path =
           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
       codegen_->AddSlowPath(slow_path);
@@ -5098,12 +5278,12 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
                                                         load->GetStringIndex(),
                                                         load->GetString()));
-      GenerateGcRootFieldLoad(load,
-                              out_loc,
-                              out.X(),
-                              /* offset */ 0,
-                              /* fixup_label */ nullptr,
-                              kCompilerReadBarrierOption);
+      codegen_->GenerateGcRootFieldLoad(load,
+                                        out_loc,
+                                        out.X(),
+                                        /* offset */ 0,
+                                        /* fixup_label */ nullptr,
+                                        kCompilerReadBarrierOption);
       return;
     }
     default:
@@ -5462,6 +5642,153 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
   }
 }
 
+// TODO: integrate with HandleBinaryOp?
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+  LocationSummary* locations = new (allocator) LocationSummary(minmax);
+  switch (minmax->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARM64::GenerateMinMaxInt(LocationSummary* locations,
+                                                      bool is_min,
+                                                      DataType::Type type) {
+  Location op1 = locations->InAt(0);
+  Location op2 = locations->InAt(1);
+  Location out = locations->Out();
+
+  Register op1_reg;
+  Register op2_reg;
+  Register out_reg;
+  if (type == DataType::Type::kInt64) {
+    op1_reg = XRegisterFrom(op1);
+    op2_reg = XRegisterFrom(op2);
+    out_reg = XRegisterFrom(out);
+  } else {
+    DCHECK_EQ(type, DataType::Type::kInt32);
+    op1_reg = WRegisterFrom(op1);
+    op2_reg = WRegisterFrom(op2);
+    out_reg = WRegisterFrom(out);
+  }
+
+  __ Cmp(op1_reg, op2_reg);
+  __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
+}
+
+void InstructionCodeGeneratorARM64::GenerateMinMaxFP(LocationSummary* locations,
+                                                     bool is_min,
+                                                     DataType::Type type) {
+  Location op1 = locations->InAt(0);
+  Location op2 = locations->InAt(1);
+  Location out = locations->Out();
+
+  FPRegister op1_reg;
+  FPRegister op2_reg;
+  FPRegister out_reg;
+  if (type == DataType::Type::kFloat64) {
+    op1_reg = DRegisterFrom(op1);
+    op2_reg = DRegisterFrom(op2);
+    out_reg = DRegisterFrom(out);
+  } else {
+    DCHECK_EQ(type, DataType::Type::kFloat32);
+    op1_reg = SRegisterFrom(op1);
+    op2_reg = SRegisterFrom(op2);
+    out_reg = SRegisterFrom(out);
+  }
+
+  if (is_min) {
+    __ Fmin(out_reg, op1_reg, op2_reg);
+  } else {
+    __ Fmax(out_reg, op1_reg, op2_reg);
+  }
+}
+
+// TODO: integrate with HandleBinaryOp?
+void InstructionCodeGeneratorARM64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+  DataType::Type type = minmax->GetResultType();
+  switch (type) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HMinMax " << type;
+  }
+}
+
+void LocationsBuilderARM64::VisitMin(HMin* min) {
+  CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorARM64::VisitMin(HMin* min) {
+  GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderARM64::VisitMax(HMax* max) {
+  CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorARM64::VisitMax(HMax* max) {
+  GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderARM64::VisitAbs(HAbs* abs) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+  switch (abs->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) {
+  switch (abs->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64: {
+      Register in_reg = InputRegisterAt(abs, 0);
+      Register out_reg = OutputRegister(abs);
+      __ Cmp(in_reg, Operand(0));
+      __ Cneg(out_reg, in_reg, lt);
+      break;
+    }
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64: {
+      FPRegister in_reg = InputFPRegisterAt(abs, 0);
+      FPRegister out_reg = OutputFPRegister(abs);
+      __ Fabs(out_reg, in_reg);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
+  }
+}
+
 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
   constructor_fence->SetLocations(nullptr);
 }
@@ -5905,7 +6232,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
   }
 }
 
-void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
+void CodeGeneratorARM64::GenerateGcRootFieldLoad(
     HInstruction* instruction,
     Location root,
     Register obj,
@@ -5939,9 +6266,8 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
         DCHECK(temps.IsAvailable(ip0));
         DCHECK(temps.IsAvailable(ip1));
         temps.Exclude(ip0, ip1);
-        uint32_t custom_data =
-            linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
-        vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
+        uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
+        vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
 
         EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
         vixl::aarch64::Label return_address;
@@ -5970,14 +6296,14 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
         // Slow path marking the GC root `root`. The entrypoint will
         // be loaded by the slow path code.
         SlowPathCodeARM64* slow_path =
-            new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root);
-        codegen_->AddSlowPath(slow_path);
+            new (GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root);
+        AddSlowPath(slow_path);
 
         // /* GcRoot<mirror::Object> */ root = *(obj + offset)
         if (fixup_label == nullptr) {
           __ Ldr(root_reg, MemOperand(obj, offset));
         } else {
-          codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
+          EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
         }
         static_assert(
             sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
@@ -5997,10 +6323,10 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
       if (fixup_label == nullptr) {
         __ Add(root_reg.X(), obj.X(), offset);
       } else {
-        codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
+        EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
       }
       // /* mirror::Object* */ root = root->Read()
-      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+      GenerateReadBarrierForRootSlow(instruction, root, root);
     }
   } else {
     // Plain GC root load with no read barrier.
@@ -6008,12 +6334,12 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
     if (fixup_label == nullptr) {
       __ Ldr(root_reg, MemOperand(obj, offset));
     } else {
-      codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
+      EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
     }
     // Note that GC roots are not affected by heap poisoning, thus we
     // do not have to unpoison `root_reg` here.
   }
-  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+  MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6062,9 +6388,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins
     DCHECK(temps.IsAvailable(ip0));
     DCHECK(temps.IsAvailable(ip1));
     temps.Exclude(ip0, ip1);
-    uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(
-        base.GetCode(),
-        obj.GetCode());
+    uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode());
     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
 
     {
@@ -6149,8 +6473,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins
     DCHECK(temps.IsAvailable(ip0));
     DCHECK(temps.IsAvailable(ip1));
     temps.Exclude(ip0, ip1);
-    uint32_t custom_data =
-        linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
+    uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
 
     __ Add(temp.X(), obj.X(), Operand(data_offset));
@@ -6510,5 +6833,176 @@ void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_
 #undef __
 #undef QUICK_ENTRY_POINT
 
+#define __ assembler.GetVIXLAssembler()->
+
+static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
+                                     vixl::aarch64::Register base_reg,
+                                     vixl::aarch64::MemOperand& lock_word,
+                                     vixl::aarch64::Label* slow_path,
+                                     vixl::aarch64::Label* throw_npe = nullptr) {
+  // Load the lock word containing the rb_state.
+  __ Ldr(ip0.W(), lock_word);
+  // Given the numeric representation, it's enough to check the low bit of the rb_state.
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+  __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
+  static_assert(
+      BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
+      "Field and array LDR offsets must be the same to reuse the same code.");
+  // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
+  if (throw_npe != nullptr) {
+    __ Bind(throw_npe);
+  }
+  // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
+  static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                "Field LDR must be 1 instruction (4B) before the return address label; "
+                " 2 instructions (8B) for heap poisoning.");
+  __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
+  // Introduce a dependency on the lock_word including rb_state,
+  // to prevent load-load reordering, and without using
+  // a memory barrier (which would be more expensive).
+  __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
+  __ Br(lr);          // And return back to the function.
+  // Note: The fake dependency is unnecessary for the slow path.
+}
+
+// Load the read barrier introspection entrypoint in register `entrypoint`.
+static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler,
+                                                       vixl::aarch64::Register entrypoint) {
+  // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+  DCHECK_EQ(ip0.GetCode(), 16u);
+  const int32_t entry_point_offset =
+      Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+  __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
+}
+
+void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
+                                                      uint32_t encoded_data,
+                                                      /*out*/ std::string* debug_name) {
+  BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+  switch (kind) {
+    case BakerReadBarrierKind::kField: {
+      auto base_reg =
+          Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      auto holder_reg =
+          Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data));
+      CheckValidReg(holder_reg.GetCode());
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip0, ip1);
+      // If base_reg differs from holder_reg, the offset was too large and we must have emitted
+      // an explicit null check before the load. Otherwise, for implicit null checks, we need to
+      // null-check the holder as we do not necessarily do that check before going to the thunk.
+      vixl::aarch64::Label throw_npe_label;
+      vixl::aarch64::Label* throw_npe = nullptr;
+      if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
+        throw_npe = &throw_npe_label;
+        __ Cbz(holder_reg.W(), throw_npe);
+      }
+      // Check if the holder is gray and, if not, add fake dependency to the base register
+      // and return to the LDR instruction to load the reference. Otherwise, use introspection
+      // to load the reference and call the entrypoint that performs further checks on the
+      // reference and marks it if needed.
+      vixl::aarch64::Label slow_path;
+      MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe);
+      __ Bind(&slow_path);
+      MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
+      __ Ldr(ip0.W(), ldr_address);         // Load the LDR (immediate) unsigned offset.
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
+      __ Ubfx(ip0.W(), ip0.W(), 10, 12);    // Extract the offset.
+      __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2));   // Load the reference.
+      // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
+      __ Br(ip1);                           // Jump to the entrypoint.
+      break;
+    }
+    case BakerReadBarrierKind::kArray: {
+      auto base_reg =
+          Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                BakerReadBarrierSecondRegField::Decode(encoded_data));
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip0, ip1);
+      vixl::aarch64::Label slow_path;
+      int32_t data_offset =
+          mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+      MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+      DCHECK_LT(lock_word.GetOffset(), 0);
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+      __ Bind(&slow_path);
+      MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+      __ Ldr(ip0.W(), ldr_address);         // Load the LDR (register) unsigned offset.
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
+      __ Ubfx(ip0, ip0, 16, 6);             // Extract the index register, plus 32 (bit 21 is set).
+      __ Bfi(ip1, ip0, 3, 6);               // Insert ip0 to the entrypoint address to create
+                                            // a switch case target based on the index register.
+      __ Mov(ip0, base_reg);                // Move the base register to ip0.
+      __ Br(ip1);                           // Jump to the entrypoint's array switch case.
+      break;
+    }
+    case BakerReadBarrierKind::kGcRoot: {
+      // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
+      // and it does not have a forwarding address), call the correct introspection entrypoint;
+      // otherwise return the reference (or the extracted forwarding address).
+      // There is no gray bit check for GC roots.
+      auto root_reg =
+          Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(root_reg.GetCode());
+      DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                BakerReadBarrierSecondRegField::Decode(encoded_data));
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip0, ip1);
+      vixl::aarch64::Label return_label, not_marked, forwarding_address;
+      __ Cbz(root_reg, &return_label);
+      MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value());
+      __ Ldr(ip0.W(), lock_word);
+      __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, &not_marked);
+      __ Bind(&return_label);
+      __ Br(lr);
+      __ Bind(&not_marked);
+      __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
+      __ B(&forwarding_address, mi);
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
+      // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
+      // art_quick_read_barrier_mark_introspection_gc_roots.
+      __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
+      __ Mov(ip0.W(), root_reg);
+      __ Br(ip1);
+      __ Bind(&forwarding_address);
+      __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift);
+      __ Br(lr);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+      UNREACHABLE();
+  }
+
+  if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+    std::ostringstream oss;
+    oss << "BakerReadBarrierThunk";
+    switch (kind) {
+      case BakerReadBarrierKind::kField:
+        oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
+            << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
+        break;
+      case BakerReadBarrierKind::kArray:
+        oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+        DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                  BakerReadBarrierSecondRegField::Decode(encoded_data));
+        break;
+      case BakerReadBarrierKind::kGcRoot:
+        oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+        DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                  BakerReadBarrierSecondRegField::Decode(encoded_data));
+        break;
+    }
+    *debug_name = oss.str();
+  }
+}
+
+#undef __
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 0654046de5d..aa343b1185d 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
 
 #include "arch/arm64/quick_method_frame_info_arm64.h"
+#include "base/bit_field.h"
 #include "code_generator.h"
 #include "common_arm64.h"
 #include "dex/dex_file_types.h"
@@ -36,6 +37,11 @@
 #pragma GCC diagnostic pop
 
 namespace art {
+
+namespace linker {
+class Arm64RelativePatcherTest;
+}  // namespace linker
+
 namespace arm64 {
 
 class CodeGeneratorARM64;
@@ -264,6 +270,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
  private:
   void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
                                         vixl::aarch64::Register class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                         vixl::aarch64::Register temp);
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* instr);
 
@@ -273,6 +281,10 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleCondition(HCondition* instruction);
 
+  void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
+  void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+  void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
   // Generate a heap reference load using one register `out`:
   //
   //   out <- *(out + offset)
@@ -303,17 +315,6 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
                                          uint32_t offset,
                                          Location maybe_temp,
                                          ReadBarrierOption read_barrier_option);
-  // Generate a GC root reference load:
-  //
-  //   root <- *(obj + offset)
-  //
-  // while honoring read barriers based on read_barrier_option.
-  void GenerateGcRootFieldLoad(HInstruction* instruction,
-                               Location root,
-                               vixl::aarch64::Register obj,
-                               uint32_t offset,
-                               vixl::aarch64::Label* fixup_label,
-                               ReadBarrierOption read_barrier_option);
 
   // Generate a floating-point comparison.
   void GenerateFcmp(HInstruction* instruction);
@@ -561,7 +562,14 @@ class CodeGeneratorARM64 : public CodeGenerator {
     UNIMPLEMENTED(FATAL);
   }
 
-  // Add a new PC-relative method patch for an instruction and return the label
+  // Add a new boot image relocation patch for an instruction and return the label
+  // to be bound before the instruction. The instruction will be either the
+  // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing
+  // to the associated ADRP patch label).
+  vixl::aarch64::Label* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+                                               vixl::aarch64::Label* adrp_label = nullptr);
+
+  // Add a new boot image method patch for an instruction and return the label
   // to be bound before the instruction. The instruction will be either the
   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
   // to the associated ADRP patch label).
@@ -575,7 +583,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
   vixl::aarch64::Label* NewMethodBssEntryPatch(MethodReference target_method,
                                                vixl::aarch64::Label* adrp_label = nullptr);
 
-  // Add a new PC-relative type patch for an instruction and return the label
+  // Add a new boot image type patch for an instruction and return the label
   // to be bound before the instruction. The instruction will be either the
   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
   // to the associated ADRP patch label).
@@ -591,7 +599,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
                                              dex::TypeIndex type_index,
                                              vixl::aarch64::Label* adrp_label = nullptr);
 
-  // Add a new PC-relative string patch for an instruction and return the label
+  // Add a new boot image string patch for an instruction and return the label
   // to be bound before the instruction. The instruction will be either the
   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
   // to the associated ADRP patch label).
@@ -628,9 +636,24 @@ class CodeGeneratorARM64 : public CodeGenerator {
                                 vixl::aarch64::Register base);
 
   void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
+  bool NeedsThunkCode(const linker::LinkerPatch& patch) const OVERRIDE;
+  void EmitThunkCode(const linker::LinkerPatch& patch,
+                     /*out*/ ArenaVector<uint8_t>* code,
+                     /*out*/ std::string* debug_name) OVERRIDE;
 
   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
 
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers based on read_barrier_option.
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               vixl::aarch64::Register obj,
+                               uint32_t offset,
+                               vixl::aarch64::Label* fixup_label,
+                               ReadBarrierOption read_barrier_option);
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -765,6 +788,62 @@ class CodeGeneratorARM64 : public CodeGenerator {
   void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
 
  private:
+  // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
+
+  enum class BakerReadBarrierKind : uint8_t {
+    kField,   // Field get or array get with constant offset (i.e. constant index).
+    kArray,   // Array get with index in register.
+    kGcRoot,  // GC root load.
+    kLast = kGcRoot
+  };
+
+  static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* sp/zr is invalid */ 31u;
+
+  static constexpr size_t kBitsForBakerReadBarrierKind =
+      MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
+  static constexpr size_t kBakerReadBarrierBitsForRegister =
+      MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg);
+  using BakerReadBarrierKindField =
+      BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
+  using BakerReadBarrierFirstRegField =
+      BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>;
+  using BakerReadBarrierSecondRegField =
+      BitField<uint32_t,
+               kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister,
+               kBakerReadBarrierBitsForRegister>;
+
+  static void CheckValidReg(uint32_t reg) {
+    DCHECK(reg < vixl::aarch64::lr.GetCode() &&
+           reg != vixl::aarch64::ip0.GetCode() &&
+           reg != vixl::aarch64::ip1.GetCode()) << reg;
+  }
+
+  static inline uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
+    CheckValidReg(base_reg);
+    CheckValidReg(holder_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(holder_reg);
+  }
+
+  static inline uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+    CheckValidReg(base_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg);
+  }
+
+  static inline uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
+    CheckValidReg(root_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
+           BakerReadBarrierFirstRegField::Encode(root_reg) |
+           BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg);
+  }
+
+  void CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
+                                    uint32_t encoded_data,
+                                    /*out*/ std::string* debug_name);
+
   using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
   using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
   using StringToLiteralMap = ArenaSafeMap<StringReference,
@@ -820,7 +899,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
   Uint32ToLiteralMap uint32_literals_;
   // Deduplication map for 64-bit literals, used for non-patchable method address or method code.
   Uint64ToLiteralMap uint64_literals_;
-  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  // PC-relative method patch info for kBootImageLinkTimePcRelative/BootImageRelRo.
+  // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
   ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
   // PC-relative method patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -828,7 +908,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
   ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
-  // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+  // PC-relative String patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
   // PC-relative String patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
@@ -840,6 +920,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
   // Patches for class literals in JIT compiled code.
   TypeToLiteralMap jit_class_patches_;
 
+  friend class linker::Arm64RelativePatcherTest;
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
 };
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 2f495fc15fd..7350b146f95 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -29,7 +29,6 @@
 #include "gc/accounting/card_table.h"
 #include "heap_poisoning.h"
 #include "intrinsics_arm_vixl.h"
-#include "linker/arm/relative_patcher_thumb2.h"
 #include "linker/linker_patch.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
@@ -94,9 +93,6 @@ constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
 
-// The reserved entrypoint register for link-time generated thunks.
-const vixl32::Register kBakerCcEntrypointRegister = r4;
-
 // Using a base helps identify when we hit Marking Register check breakpoints.
 constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
 
@@ -116,8 +112,6 @@ static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope
   DCHECK(temps->IsAvailable(ip));
   temps->Exclude(ip);
   DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister));
-  DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(),
-            linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister);
   DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
   DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp(
       instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister));
@@ -2422,6 +2416,80 @@ void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
   FixJumpTables();
   GetAssembler()->FinalizeCode();
   CodeGenerator::Finalize(allocator);
+
+  // Verify Baker read barrier linker patches.
+  if (kIsDebugBuild) {
+    ArrayRef<const uint8_t> code = allocator->GetMemory();
+    for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+      DCHECK(info.label.IsBound());
+      uint32_t literal_offset = info.label.GetLocation();
+      DCHECK_ALIGNED(literal_offset, 2u);
+
+      auto GetInsn16 = [&code](uint32_t offset) {
+        DCHECK_ALIGNED(offset, 2u);
+        return (static_cast<uint32_t>(code[offset + 0]) << 0) +
+               (static_cast<uint32_t>(code[offset + 1]) << 8);
+      };
+      auto GetInsn32 = [=](uint32_t offset) {
+        return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0);
+      };
+
+      uint32_t encoded_data = info.custom_data;
+      BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+      // Check that the next instruction matches the expected LDR.
+      switch (kind) {
+        case BakerReadBarrierKind::kField: {
+          BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+          if (width == BakerReadBarrierWidth::kWide) {
+            DCHECK_GE(code.size() - literal_offset, 8u);
+            uint32_t next_insn = GetInsn32(literal_offset + 4u);
+            // LDR (immediate), encoding T3, with correct base_reg.
+            CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
+            const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+            CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
+          } else {
+            DCHECK_GE(code.size() - literal_offset, 6u);
+            uint32_t next_insn = GetInsn16(literal_offset + 4u);
+            // LDR (immediate), encoding T1, with correct base_reg.
+            CheckValidReg(next_insn & 0x7u);  // Check destination register.
+            const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+            CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
+          }
+          break;
+        }
+        case BakerReadBarrierKind::kArray: {
+          DCHECK_GE(code.size() - literal_offset, 8u);
+          uint32_t next_insn = GetInsn32(literal_offset + 4u);
+          // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
+          CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
+          const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+          CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
+          CheckValidReg(next_insn & 0xf);  // Check index register
+          break;
+        }
+        case BakerReadBarrierKind::kGcRoot: {
+          BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+          if (width == BakerReadBarrierWidth::kWide) {
+            DCHECK_GE(literal_offset, 4u);
+            uint32_t prev_insn = GetInsn32(literal_offset - 4u);
+            // LDR (immediate), encoding T3, with correct root_reg.
+            const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+            CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
+          } else {
+            DCHECK_GE(literal_offset, 2u);
+            uint32_t prev_insn = GetInsn16(literal_offset - 2u);
+            // LDR (immediate), encoding T1, with correct root_reg.
+            const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+            CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
+          }
+          break;
+        }
+        default:
+          LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+          UNREACHABLE();
+      }
+    }
+  }
 }
 
 void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
@@ -4690,6 +4758,299 @@ void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
   }
 }
 
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+  LocationSummary* locations = new (allocator) LocationSummary(minmax);
+  switch (minmax->GetResultType()) {
+    case DataType::Type::kInt32:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    case DataType::Type::kFloat32:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      locations->AddTemp(Location::RequiresRegister());
+      break;
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
+  Location op1_loc = locations->InAt(0);
+  Location op2_loc = locations->InAt(1);
+  Location out_loc = locations->Out();
+
+  vixl32::Register op1 = RegisterFrom(op1_loc);
+  vixl32::Register op2 = RegisterFrom(op2_loc);
+  vixl32::Register out = RegisterFrom(out_loc);
+
+  __ Cmp(op1, op2);
+
+  {
+    ExactAssemblyScope aas(GetVIXLAssembler(),
+                           3 * kMaxInstructionSizeInBytes,
+                           CodeBufferCheckScope::kMaximumSize);
+
+    __ ite(is_min ? lt : gt);
+    __ mov(is_min ? lt : gt, out, op1);
+    __ mov(is_min ? ge : le, out, op2);
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) {
+  Location op1_loc = locations->InAt(0);
+  Location op2_loc = locations->InAt(1);
+  Location out_loc = locations->Out();
+
+  // Optimization: don't generate any code if inputs are the same.
+  if (op1_loc.Equals(op2_loc)) {
+    DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
+    return;
+  }
+
+  vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
+  vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
+  vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
+  vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
+  vixl32::Register out_lo = LowRegisterFrom(out_loc);
+  vixl32::Register out_hi = HighRegisterFrom(out_loc);
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  const vixl32::Register temp = temps.Acquire();
+
+  DCHECK(op1_lo.Is(out_lo));
+  DCHECK(op1_hi.Is(out_hi));
+
+  // Compare op1 >= op2, or op1 < op2.
+  __ Cmp(out_lo, op2_lo);
+  __ Sbcs(temp, out_hi, op2_hi);
+
+  // Now GE/LT condition code is correct for the long comparison.
+  {
+    vixl32::ConditionType cond = is_min ? ge : lt;
+    ExactAssemblyScope it_scope(GetVIXLAssembler(),
+                                3 * kMaxInstructionSizeInBytes,
+                                CodeBufferCheckScope::kMaximumSize);
+    __ itt(cond);
+    __ mov(cond, out_lo, op2_lo);
+    __ mov(cond, out_hi, op2_hi);
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) {
+  LocationSummary* locations = minmax->GetLocations();
+  Location op1_loc = locations->InAt(0);
+  Location op2_loc = locations->InAt(1);
+  Location out_loc = locations->Out();
+
+  // Optimization: don't generate any code if inputs are the same.
+  if (op1_loc.Equals(op2_loc)) {
+    DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
+    return;
+  }
+
+  vixl32::SRegister op1 = SRegisterFrom(op1_loc);
+  vixl32::SRegister op2 = SRegisterFrom(op2_loc);
+  vixl32::SRegister out = SRegisterFrom(out_loc);
+
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  const vixl32::Register temp1 = temps.Acquire();
+  vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0));
+  vixl32::Label nan, done;
+  vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
+
+  DCHECK(op1.Is(out));
+
+  __ Vcmp(op1, op2);
+  __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+  __ B(vs, &nan, /* far_target */ false);  // if un-ordered, go to NaN handling.
+
+  // op1 <> op2
+  vixl32::ConditionType cond = is_min ? gt : lt;
+  {
+    ExactAssemblyScope it_scope(GetVIXLAssembler(),
+                                2 * kMaxInstructionSizeInBytes,
+                                CodeBufferCheckScope::kMaximumSize);
+    __ it(cond);
+    __ vmov(cond, F32, out, op2);
+  }
+  // for <>(not equal), we've done min/max calculation.
+  __ B(ne, final_label, /* far_target */ false);
+
+  // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
+  __ Vmov(temp1, op1);
+  __ Vmov(temp2, op2);
+  if (is_min) {
+    __ Orr(temp1, temp1, temp2);
+  } else {
+    __ And(temp1, temp1, temp2);
+  }
+  __ Vmov(out, temp1);
+  __ B(final_label);
+
+  // handle NaN input.
+  __ Bind(&nan);
+  __ Movt(temp1, High16Bits(kNanFloat));  // 0x7FC0xxxx is a NaN.
+  __ Vmov(out, temp1);
+
+  if (done.IsReferenced()) {
+    __ Bind(&done);
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) {
+  LocationSummary* locations = minmax->GetLocations();
+  Location op1_loc = locations->InAt(0);
+  Location op2_loc = locations->InAt(1);
+  Location out_loc = locations->Out();
+
+  // Optimization: don't generate any code if inputs are the same.
+  if (op1_loc.Equals(op2_loc)) {
+    DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in.
+    return;
+  }
+
+  vixl32::DRegister op1 = DRegisterFrom(op1_loc);
+  vixl32::DRegister op2 = DRegisterFrom(op2_loc);
+  vixl32::DRegister out = DRegisterFrom(out_loc);
+  vixl32::Label handle_nan_eq, done;
+  vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
+
+  DCHECK(op1.Is(out));
+
+  __ Vcmp(op1, op2);
+  __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+  __ B(vs, &handle_nan_eq, /* far_target */ false);  // if un-ordered, go to NaN handling.
+
+  // op1 <> op2
+  vixl32::ConditionType cond = is_min ? gt : lt;
+  {
+    ExactAssemblyScope it_scope(GetVIXLAssembler(),
+                                2 * kMaxInstructionSizeInBytes,
+                                CodeBufferCheckScope::kMaximumSize);
+    __ it(cond);
+    __ vmov(cond, F64, out, op2);
+  }
+  // for <>(not equal), we've done min/max calculation.
+  __ B(ne, final_label, /* far_target */ false);
+
+  // handle op1 == op2, max(+0.0,-0.0).
+  if (!is_min) {
+    __ Vand(F64, out, op1, op2);
+    __ B(final_label);
+  }
+
+  // handle op1 == op2, min(+0.0,-0.0), NaN input.
+  __ Bind(&handle_nan_eq);
+  __ Vorr(F64, out, op1, op2);  // assemble op1/-0.0/NaN.
+
+  if (done.IsReferenced()) {
+    __ Bind(&done);
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+  DataType::Type type = minmax->GetResultType();
+  switch (type) {
+    case DataType::Type::kInt32:
+      GenerateMinMaxInt(minmax->GetLocations(), is_min);
+      break;
+    case DataType::Type::kInt64:
+      GenerateMinMaxLong(minmax->GetLocations(), is_min);
+      break;
+    case DataType::Type::kFloat32:
+      GenerateMinMaxFloat(minmax, is_min);
+      break;
+    case DataType::Type::kFloat64:
+      GenerateMinMaxDouble(minmax, is_min);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HMinMax " << type;
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitMin(HMin* min) {
+  CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) {
+  GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderARMVIXL::VisitMax(HMax* max) {
+  CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) {
+  GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+  switch (abs->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      locations->AddTemp(Location::RequiresRegister());
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) {
+  LocationSummary* locations = abs->GetLocations();
+  switch (abs->GetResultType()) {
+    case DataType::Type::kInt32: {
+      vixl32::Register in_reg = RegisterFrom(locations->InAt(0));
+      vixl32::Register out_reg = RegisterFrom(locations->Out());
+      vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
+      __ Asr(mask, in_reg, 31);
+      __ Add(out_reg, in_reg, mask);
+      __ Eor(out_reg, out_reg, mask);
+      break;
+    }
+    case DataType::Type::kInt64: {
+      Location in = locations->InAt(0);
+      vixl32::Register in_reg_lo = LowRegisterFrom(in);
+      vixl32::Register in_reg_hi = HighRegisterFrom(in);
+      Location output = locations->Out();
+      vixl32::Register out_reg_lo = LowRegisterFrom(output);
+      vixl32::Register out_reg_hi = HighRegisterFrom(output);
+      DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
+      vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
+      __ Asr(mask, in_reg_hi, 31);
+      __ Adds(out_reg_lo, in_reg_lo, mask);
+      __ Adc(out_reg_hi, in_reg_hi, mask);
+      __ Eor(out_reg_lo, out_reg_lo, mask);
+      __ Eor(out_reg_hi, out_reg_hi, mask);
+      break;
+    }
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0));
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
+  }
+}
 
 void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
@@ -7033,7 +7394,7 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
     case HLoadClass::LoadKind::kReferrersClass:
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-    case HLoadClass::LoadKind::kBootImageClassTable:
+    case HLoadClass::LoadKind::kBootImageRelRo:
     case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -7120,11 +7481,11 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
       DCHECK(!cls->MustGenerateClinitCheck());
       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
       vixl32::Register current_method = InputRegisterAt(cls, 0);
-      GenerateGcRootFieldLoad(cls,
-                              out_loc,
-                              current_method,
-                              ArtMethod::DeclaringClassOffset().Int32Value(),
-                              read_barrier_option);
+      codegen_->GenerateGcRootFieldLoad(cls,
+                                        out_loc,
+                                        current_method,
+                                        ArtMethod::DeclaringClassOffset().Int32Value(),
+                                        read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
@@ -7143,25 +7504,19 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
       __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
       break;
     }
-    case HLoadClass::LoadKind::kBootImageClassTable: {
+    case HLoadClass::LoadKind::kBootImageRelRo: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
-          codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+          codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(cls));
       codegen_->EmitMovwMovtPlaceholder(labels, out);
       __ Ldr(out, MemOperand(out, /* offset */ 0));
-      // Extract the reference from the slot data, i.e. clear the hash bits.
-      int32_t masked_hash = ClassTable::TableSlot::MaskHash(
-          ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
-      if (masked_hash != 0) {
-        __ Sub(out, out, Operand(masked_hash));
-      }
       break;
     }
     case HLoadClass::LoadKind::kBssEntry: {
       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
       codegen_->EmitMovwMovtPlaceholder(labels, out);
-      GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
+      codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
       generate_null_check = true;
       break;
     }
@@ -7170,7 +7525,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
                                                        cls->GetTypeIndex(),
                                                        cls->GetClass()));
       // /* GcRoot<mirror::Class> */ out = *out
-      GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
+      codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kRuntimeCall:
@@ -7236,11 +7591,72 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare(
+    HTypeCheckInstruction* check,
+    vixl32::Register temp,
+    vixl32::FlagsUpdate flags_update) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs
+  // the Z flag for BNE. This is indicated by the `flags_update` parameter.
+  if (mask_bits == 16u) {
+    // Load only the bitstring part of the status word.
+    __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
+    // Check if the bitstring bits are equal to `path_to_root`.
+    if (flags_update == SetFlags) {
+      __ Cmp(temp, path_to_root);
+    } else {
+      __ Sub(temp, temp, path_to_root);
+    }
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
+    if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) {
+      // Compare the bitstring bits using SUB.
+      __ Sub(temp, temp, path_to_root);
+      // Shift out bits that do not contribute to the comparison.
+      __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+    } else if (IsUint<16>(path_to_root)) {
+      if (temp.IsLow()) {
+        // Note: Optimized for size but contains one more dependent instruction than necessary.
+        //       MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the
+        //       macro assembler would use the high reg IP for the constant by default.
+        // Compare the bitstring bits using SUB.
+        __ Sub(temp, temp, path_to_root & 0x00ffu);  // 16-bit SUB (immediate) T2
+        __ Sub(temp, temp, path_to_root & 0xff00u);  // 32-bit SUB (immediate) T3
+        // Shift out bits that do not contribute to the comparison.
+        __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+      } else {
+        // Extract the bitstring bits.
+        __ Ubfx(temp, temp, 0, mask_bits);
+        // Check if the bitstring bits are equal to `path_to_root`.
+        if (flags_update == SetFlags) {
+          __ Cmp(temp, path_to_root);
+        } else {
+          __ Sub(temp, temp, path_to_root);
+        }
+      }
+    } else {
+      // Shift out bits that do not contribute to the comparison.
+      __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+      // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`.
+      if (flags_update == SetFlags) {
+        __ Cmp(temp, path_to_root << (32u - mask_bits));
+      } else {
+        __ Sub(temp, temp, path_to_root << (32u - mask_bits));
+      }
+    }
+  }
+}
+
 HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-    case HLoadString::LoadKind::kBootImageInternTable:
+    case HLoadString::LoadKind::kBootImageRelRo:
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -7304,10 +7720,10 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
       __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
       return;
     }
-    case HLoadString::LoadKind::kBootImageInternTable: {
+    case HLoadString::LoadKind::kBootImageRelRo: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
-          codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
+          codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(load));
       codegen_->EmitMovwMovtPlaceholder(labels, out);
       __ Ldr(out, MemOperand(out, /* offset */ 0));
       return;
@@ -7317,7 +7733,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
           codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
       codegen_->EmitMovwMovtPlaceholder(labels, out);
-      GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
+      codegen_->GenerateGcRootFieldLoad(
+          load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
       LoadStringSlowPathARMVIXL* slow_path =
           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
       codegen_->AddSlowPath(slow_path);
@@ -7331,7 +7748,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
                                                         load->GetStringIndex(),
                                                         load->GetString()));
       // /* GcRoot<mirror::String> */ out = *out
-      GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
+      codegen_->GenerateGcRootFieldLoad(
+          load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
       return;
     }
     default:
@@ -7427,6 +7845,8 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -7435,7 +7855,13 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // The "out" register is used as a temporary, so it overlaps with the inputs.
   // Note that TypeCheckSlowPathARM uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -7450,7 +7876,9 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   vixl32::Register obj = InputRegisterAt(instruction, 0);
-  vixl32::Register cls = InputRegisterAt(instruction, 1);
+  vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+      ? vixl32::Register()
+      : InputRegisterAt(instruction, 1);
   Location out_loc = locations->Out();
   vixl32::Register out = OutputRegister(instruction);
   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -7690,6 +8118,26 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
       __ B(slow_path->GetEntryLabel());
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out, DontCare);
+      // If `out` is a low reg and we would have another low reg temp, we could
+      // optimize this as RSBS+ADC, see GenerateConditionWithZero().
+      //
+      // Also, in some cases when `out` is a low reg and we're loading a constant to IP
+      // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size
+      // would be the same and we would have fewer direct data dependencies.
+      codegen_->GenerateConditionWithZero(kCondEQ, out, out);  // CLZ+LSR
+      break;
+    }
   }
 
   if (done.IsReferenced()) {
@@ -7707,7 +8155,13 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
@@ -7716,7 +8170,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   vixl32::Register obj = InputRegisterAt(instruction, 0);
-  vixl32::Register cls = InputRegisterAt(instruction, 1);
+  vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+      ? vixl32::Register()
+      : InputRegisterAt(instruction, 1);
   Location temp_loc = locations->GetTemp(0);
   vixl32::Register temp = RegisterFrom(temp_loc);
   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -7901,6 +8357,20 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
       __ B(ne, &start_loop, /* far_target */ false);
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags);
+      __ B(ne, type_check_slow_path->GetEntryLabel());
+      break;
+    }
   }
   if (done.IsReferenced()) {
     __ Bind(&done);
@@ -8330,7 +8800,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
   }
 }
 
-void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
+void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
     HInstruction* instruction,
     Location root,
     vixl32::Register obj,
@@ -8361,9 +8831,8 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
         UseScratchRegisterScope temps(GetVIXLAssembler());
         ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
         bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
-        uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(
-            root_reg.GetCode(), narrow);
-        vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
+        uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
+        vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
 
         vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes);
         vixl32::Label return_address;
@@ -8374,7 +8843,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
         DCHECK_LT(offset, kReferenceLoadMinFarOffset);
         ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
         __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
-        EmitPlaceholderBne(codegen_, bne_label);
+        EmitPlaceholderBne(this, bne_label);
         __ Bind(&return_address);
         DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
                   narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
@@ -8394,8 +8863,8 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
         // Slow path marking the GC root `root`. The entrypoint will
         // be loaded by the slow path code.
         SlowPathCodeARMVIXL* slow_path =
-            new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root);
-        codegen_->AddSlowPath(slow_path);
+            new (GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root);
+        AddSlowPath(slow_path);
 
         // /* GcRoot<mirror::Object> */ root = *(obj + offset)
         GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
@@ -8416,7 +8885,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
       // /* GcRoot<mirror::Object>* */ root = obj + offset
       __ Add(root_reg, obj, offset);
       // /* mirror::Object* */ root = root->Read()
-      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+      GenerateReadBarrierForRootSlow(instruction, root, root);
     }
   } else {
     // Plain GC root load with no read barrier.
@@ -8425,7 +8894,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
     // Note that GC roots are not affected by heap poisoning, thus we
     // do not have to unpoison `root_reg` here.
   }
-  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18);
+  MaybeGenerateMarkingRegisterCheck(/* code */ 18);
 }
 
 void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
@@ -8486,8 +8955,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
     }
     UseScratchRegisterScope temps(GetVIXLAssembler());
     ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
-    uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
-        base.GetCode(), obj.GetCode(), narrow);
+    uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow);
     vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
 
     {
@@ -8573,8 +9041,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i
 
     UseScratchRegisterScope temps(GetVIXLAssembler());
     ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
-    uint32_t custom_data =
-        linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode());
+    uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
     vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
 
     __ Add(data_reg, obj, Operand(data_offset));
@@ -8711,7 +9178,7 @@ void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction
 
 void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
                                                     Location ref,
-                                                    vixl::aarch32::Register obj,
+                                                    vixl32::Register obj,
                                                     uint32_t offset,
                                                     Location index,
                                                     ScaleFactor scale_factor,
@@ -8901,6 +9368,14 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress()));
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+      uint32_t boot_image_offset = GetBootImageOffset(invoke);
+      PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset);
+      vixl32::Register temp_reg = RegisterFrom(temp);
+      EmitMovwMovtPlaceholder(labels, temp_reg);
+      GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
       PcRelativePatchInfo* labels = NewMethodBssEntryPatch(
           MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
@@ -8998,6 +9473,13 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(
   }
 }
 
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch(
+    uint32_t boot_image_offset) {
+  return NewPcRelativePatch(/* dex_file */ nullptr,
+                            boot_image_offset,
+                            &boot_image_method_patches_);
+}
+
 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch(
     MethodReference target_method) {
   return NewPcRelativePatch(
@@ -9036,7 +9518,7 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa
   return &patches->back();
 }
 
-vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) {
+vixl32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) {
   baker_read_barrier_patches_.emplace_back(custom_data);
   return &baker_read_barrier_patches_.back().label;
 }
@@ -9088,6 +9570,14 @@ inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
   }
 }
 
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+                                              const DexFile* target_dex_file,
+                                              uint32_t pc_insn_offset,
+                                              uint32_t boot_image_offset) {
+  DCHECK(target_dex_file == nullptr);  // Unused for DataBimgRelRoPatch(), should be null.
+  return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
 void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
@@ -9107,11 +9597,10 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l
     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
         boot_image_string_patches_, linker_patches);
   } else {
-    DCHECK(boot_image_method_patches_.empty());
-    EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
-        boot_image_type_patches_, linker_patches);
-    EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
-        boot_image_string_patches_, linker_patches);
+    EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+        boot_image_method_patches_, linker_patches);
+    DCHECK(boot_image_type_patches_.empty());
+    DCHECK(boot_image_string_patches_.empty());
   }
   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
       method_bss_entry_patches_, linker_patches);
@@ -9126,6 +9615,45 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l
   DCHECK_EQ(size, linker_patches->size());
 }
 
+bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const {
+  return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
+         patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
+}
+
+void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
+                                         /*out*/ ArenaVector<uint8_t>* code,
+                                         /*out*/ std::string* debug_name) {
+  arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator());
+  switch (patch.GetType()) {
+    case linker::LinkerPatch::Type::kCallRelative:
+      // The thunk just uses the entry point in the ArtMethod. This works even for calls
+      // to the generic JNI and interpreter trampolines.
+      assembler.LoadFromOffset(
+          arm::kLoadWord,
+          vixl32::pc,
+          vixl32::r0,
+          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
+      assembler.GetVIXLAssembler()->Bkpt(0);
+      if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+        *debug_name = "MethodCallThunk";
+      }
+      break;
+    case linker::LinkerPatch::Type::kBakerReadBarrierBranch:
+      DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
+      CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected patch type " << patch.GetType();
+      UNREACHABLE();
+  }
+
+  // Ensure we emit the literal pool if any.
+  assembler.FinalizeCode();
+  code->resize(assembler.CodeSize());
+  MemoryRegion code_region(code->data(), code->size());
+  assembler.FinalizeInstructions(code_region);
+}
+
 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
     uint32_t value,
     Uint32ToLiteralMap* map) {
@@ -9370,5 +9898,211 @@ void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
 #undef QUICK_ENTRY_POINT
 #undef TODO_VIXL32
 
+#define __ assembler.GetVIXLAssembler()->
+
+static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler,
+                                     vixl32::Register base_reg,
+                                     vixl32::MemOperand& lock_word,
+                                     vixl32::Label* slow_path,
+                                     int32_t raw_ldr_offset,
+                                     vixl32::Label* throw_npe = nullptr) {
+  // Load the lock word containing the rb_state.
+  __ Ldr(ip, lock_word);
+  // Given the numeric representation, it's enough to check the low bit of the rb_state.
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+  __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
+  __ B(ne, slow_path, /* is_far_target */ false);
+  // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
+  if (throw_npe != nullptr) {
+    __ Bind(throw_npe);
+  }
+  __ Add(lr, lr, raw_ldr_offset);
+  // Introduce a dependency on the lock_word including rb_state,
+  // to prevent load-load reordering, and without using
+  // a memory barrier (which would be more expensive).
+  __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
+  __ Bx(lr);          // And return back to the function.
+  // Note: The fake dependency is unnecessary for the slow path.
+}
+
+// Load the read barrier introspection entrypoint in register `entrypoint`
+static void LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler,
+                                                       vixl32::Register entrypoint) {
+  // The register where the read barrier introspection entrypoint is loaded
+  // is fixed: `kBakerCcEntrypointRegister` (R4).
+  DCHECK(entrypoint.Is(kBakerCcEntrypointRegister));
+  // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+  DCHECK_EQ(ip.GetCode(), 12u);
+  const int32_t entry_point_offset =
+      Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+  __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
+}
+
+void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
+                                                        uint32_t encoded_data,
+                                                        /*out*/ std::string* debug_name) {
+  BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+  switch (kind) {
+    case BakerReadBarrierKind::kField: {
+      vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
+      CheckValidReg(holder_reg.GetCode());
+      BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip);
+      // If base_reg differs from holder_reg, the offset was too large and we must have emitted
+      // an explicit null check before the load. Otherwise, for implicit null checks, we need to
+      // null-check the holder as we do not necessarily do that check before going to the thunk.
+      vixl32::Label throw_npe_label;
+      vixl32::Label* throw_npe = nullptr;
+      if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
+        throw_npe = &throw_npe_label;
+        __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target */ false);
+      }
+      // Check if the holder is gray and, if not, add fake dependency to the base register
+      // and return to the LDR instruction to load the reference. Otherwise, use introspection
+      // to load the reference and call the entrypoint that performs further checks on the
+      // reference and marks it if needed.
+      vixl32::Label slow_path;
+      MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
+      const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
+          ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
+          : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
+      EmitGrayCheckAndFastPath(
+          assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe);
+      __ Bind(&slow_path);
+      const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
+                                 raw_ldr_offset;
+      vixl32::Register ep_reg(kBakerCcEntrypointRegister);
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+      if (width == BakerReadBarrierWidth::kWide) {
+        MemOperand ldr_half_address(lr, ldr_offset + 2);
+        __ Ldrh(ip, ldr_half_address);        // Load the LDR immediate half-word with "Rt | imm12".
+        __ Ubfx(ip, ip, 0, 12);               // Extract the offset imm12.
+        __ Ldr(ip, MemOperand(base_reg, ip));   // Load the reference.
+      } else {
+        MemOperand ldr_address(lr, ldr_offset);
+        __ Ldrh(ip, ldr_address);             // Load the LDR immediate, encoding T1.
+        __ Add(ep_reg,                        // Adjust the entrypoint address to the entrypoint
+               ep_reg,                        // for narrow LDR.
+               Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
+        __ Ubfx(ip, ip, 6, 5);                // Extract the imm5, i.e. offset / 4.
+        __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2));   // Load the reference.
+      }
+      // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
+      __ Bx(ep_reg);                          // Jump to the entrypoint.
+      break;
+    }
+    case BakerReadBarrierKind::kArray: {
+      vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                BakerReadBarrierSecondRegField::Decode(encoded_data));
+      DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip);
+      vixl32::Label slow_path;
+      int32_t data_offset =
+          mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+      MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+      DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
+      const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
+      __ Bind(&slow_path);
+      const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
+                                 raw_ldr_offset;
+      MemOperand ldr_address(lr, ldr_offset + 2);
+      __ Ldrb(ip, ldr_address);               // Load the LDR (register) byte with "00 | imm2 | Rm",
+                                              // i.e. Rm+32 because the scale in imm2 is 2.
+      vixl32::Register ep_reg(kBakerCcEntrypointRegister);
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+      __ Bfi(ep_reg, ip, 3, 6);               // Insert ip to the entrypoint address to create
+                                              // a switch case target based on the index register.
+      __ Mov(ip, base_reg);                   // Move the base register to ip0.
+      __ Bx(ep_reg);                          // Jump to the entrypoint's array switch case.
+      break;
+    }
+    case BakerReadBarrierKind::kGcRoot: {
+      // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
+      // and it does not have a forwarding address), call the correct introspection entrypoint;
+      // otherwise return the reference (or the extracted forwarding address).
+      // There is no gray bit check for GC roots.
+      vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(root_reg.GetCode());
+      DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                BakerReadBarrierSecondRegField::Decode(encoded_data));
+      BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip);
+      vixl32::Label return_label, not_marked, forwarding_address;
+      __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target */ false);
+      MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
+      __ Ldr(ip, lock_word);
+      __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
+      __ B(eq, &not_marked);
+      __ Bind(&return_label);
+      __ Bx(lr);
+      __ Bind(&not_marked);
+      static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
+                    "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
+                    " the highest bits and the 'forwarding address' state to have all bits set");
+      __ Cmp(ip, Operand(0xc0000000));
+      __ B(hs, &forwarding_address);
+      vixl32::Register ep_reg(kBakerCcEntrypointRegister);
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+      // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
+      // to art_quick_read_barrier_mark_introspection_gc_roots.
+      int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide)
+          ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
+          : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET;
+      __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
+      __ Mov(ip, root_reg);
+      __ Bx(ep_reg);
+      __ Bind(&forwarding_address);
+      __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
+      __ Bx(lr);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+      UNREACHABLE();
+  }
+
+  if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+    std::ostringstream oss;
+    oss << "BakerReadBarrierThunk";
+    switch (kind) {
+      case BakerReadBarrierKind::kField:
+        oss << "Field";
+        if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
+          oss << "Wide";
+        }
+        oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
+            << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
+        break;
+      case BakerReadBarrierKind::kArray:
+        oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+        DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                  BakerReadBarrierSecondRegField::Decode(encoded_data));
+        DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
+        break;
+      case BakerReadBarrierKind::kGcRoot:
+        oss << "GcRoot";
+        if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
+          oss << "Wide";
+        }
+        oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+        DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                  BakerReadBarrierSecondRegField::Decode(encoded_data));
+        break;
+    }
+    *debug_name = oss.str();
+  }
+}
+
+#undef __
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 536da41d07f..6b9919ab15e 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -36,6 +36,11 @@
 #pragma GCC diagnostic pop
 
 namespace art {
+
+namespace linker {
+class Thumb2RelativePatcherTest;
+}  // namespace linker
+
 namespace arm {
 
 // This constant is used as an approximate margin when emission of veneer and literal pools
@@ -108,6 +113,9 @@ static const vixl::aarch32::SRegister kRuntimeParameterFpuRegistersVIXL[] = {
 static const size_t kRuntimeParameterFpuRegistersLengthVIXL =
     arraysize(kRuntimeParameterFpuRegistersVIXL);
 
+// The reserved entrypoint register for link-time generated thunks.
+const vixl::aarch32::Register kBakerCcEntrypointRegister = vixl32::r4;
+
 class LoadClassSlowPathARMVIXL;
 class CodeGeneratorARMVIXL;
 
@@ -322,6 +330,9 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path,
                                         vixl32::Register class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                         vixl::aarch32::Register temp,
+                                         vixl::aarch32::FlagsUpdate flags_update);
   void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
   void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
   void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
@@ -349,6 +360,12 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
+  void GenerateMinMaxInt(LocationSummary* locations, bool is_min);
+  void GenerateMinMaxLong(LocationSummary* locations, bool is_min);
+  void GenerateMinMaxFloat(HInstruction* minmax, bool is_min);
+  void GenerateMinMaxDouble(HInstruction* minmax, bool is_min);
+  void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
   // Generate a heap reference load using one register `out`:
   //
   //   out <- *(out + offset)
@@ -379,16 +396,6 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
                                          uint32_t offset,
                                          Location maybe_temp,
                                          ReadBarrierOption read_barrier_option);
-  // Generate a GC root reference load:
-  //
-  //   root <- *(obj + offset)
-  //
-  // while honoring read barriers based on read_barrier_option.
-  void GenerateGcRootFieldLoad(HInstruction* instruction,
-                               Location root,
-                               vixl::aarch32::Register obj,
-                               uint32_t offset,
-                               ReadBarrierOption read_barrier_option);
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
                              vixl::aarch32::Label* true_target,
@@ -574,6 +581,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
     vixl::aarch32::Label add_pc_label;
   };
 
+  PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset);
   PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method);
   PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method);
   PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
@@ -596,6 +604,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
                                                 Handle<mirror::Class> handle);
 
   void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
+  bool NeedsThunkCode(const linker::LinkerPatch& patch) const OVERRIDE;
+  void EmitThunkCode(const linker::LinkerPatch& patch,
+                     /*out*/ ArenaVector<uint8_t>* code,
+                     /*out*/ std::string* debug_name) OVERRIDE;
 
   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
 
@@ -603,6 +615,16 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
   // is added only for AOT compilation if link-time generated thunks for fields are enabled.
   void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
 
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers based on read_barrier_option.
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               vixl::aarch32::Register obj,
+                               uint32_t offset,
+                               ReadBarrierOption read_barrier_option);
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -757,6 +779,83 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
                                  vixl::aarch32::Register temp = vixl32::Register());
 
  private:
+  // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
+
+  enum class BakerReadBarrierKind : uint8_t {
+    kField,   // Field get or array get with constant offset (i.e. constant index).
+    kArray,   // Array get with index in register.
+    kGcRoot,  // GC root load.
+    kLast = kGcRoot
+  };
+
+  enum class BakerReadBarrierWidth : uint8_t {
+    kWide,          // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled).
+    kNarrow,        // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled).
+    kLast = kNarrow
+  };
+
+  static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* pc is invalid */ 15u;
+
+  static constexpr size_t kBitsForBakerReadBarrierKind =
+      MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
+  static constexpr size_t kBakerReadBarrierBitsForRegister =
+      MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg);
+  using BakerReadBarrierKindField =
+      BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
+  using BakerReadBarrierFirstRegField =
+      BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>;
+  using BakerReadBarrierSecondRegField =
+      BitField<uint32_t,
+               kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister,
+               kBakerReadBarrierBitsForRegister>;
+  static constexpr size_t kBitsForBakerReadBarrierWidth =
+      MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast));
+  using BakerReadBarrierWidthField =
+      BitField<BakerReadBarrierWidth,
+               kBitsForBakerReadBarrierKind + 2 * kBakerReadBarrierBitsForRegister,
+               kBitsForBakerReadBarrierWidth>;
+
+  static void CheckValidReg(uint32_t reg) {
+    DCHECK(reg < vixl::aarch32::ip.GetCode() && reg != kBakerCcEntrypointRegister.GetCode()) << reg;
+  }
+
+  static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg,
+                                                  uint32_t holder_reg,
+                                                  bool narrow) {
+    CheckValidReg(base_reg);
+    CheckValidReg(holder_reg);
+    DCHECK(!narrow || base_reg < 8u) << base_reg;
+    BakerReadBarrierWidth width =
+        narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(holder_reg) |
+           BakerReadBarrierWidthField::Encode(width);
+  }
+
+  static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+    CheckValidReg(base_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) |
+           BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide);
+  }
+
+  static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) {
+    CheckValidReg(root_reg);
+    DCHECK(!narrow || root_reg < 8u) << root_reg;
+    BakerReadBarrierWidth width =
+        narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
+           BakerReadBarrierFirstRegField::Encode(root_reg) |
+           BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) |
+           BakerReadBarrierWidthField::Encode(width);
+  }
+
+  void CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
+                                    uint32_t encoded_data,
+                                    /*out*/ std::string* debug_name);
+
   vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
                                                                 vixl::aarch32::Register temp);
 
@@ -798,7 +897,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
 
   // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
   Uint32ToLiteralMap uint32_literals_;
-  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+  // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
   ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
   // PC-relative method patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -806,7 +906,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
   ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
-  // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+  // PC-relative String patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
   // PC-relative String patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
@@ -818,6 +918,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
   // Patches for class literals in JIT compiled code.
   TypeToLiteralMap jit_class_patches_;
 
+  friend class linker::Thumb2RelativePatcherTest;
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL);
 };
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 87e6d6834b7..25e2eddbfab 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1597,6 +1597,14 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches(
   }
 }
 
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+                                              const DexFile* target_dex_file,
+                                              uint32_t pc_insn_offset,
+                                              uint32_t boot_image_offset) {
+  DCHECK(target_dex_file == nullptr);  // Unused for DataBimgRelRoPatch(), should be null.
+  return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
 void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
@@ -1615,11 +1623,10 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link
     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
         boot_image_string_patches_, linker_patches);
   } else {
-    DCHECK(boot_image_method_patches_.empty());
-    EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
-        boot_image_type_patches_, linker_patches);
-    EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
-        boot_image_string_patches_, linker_patches);
+    EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+        boot_image_method_patches_, linker_patches);
+    DCHECK(boot_image_type_patches_.empty());
+    DCHECK(boot_image_string_patches_.empty());
   }
   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
       method_bss_entry_patches_, linker_patches);
@@ -1630,6 +1637,13 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link
   DCHECK_EQ(size, linker_patches->size());
 }
 
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageRelRoPatch(
+    uint32_t boot_image_offset,
+    const PcRelativePatchInfo* info_high) {
+  return NewPcRelativePatch(
+      /* dex_file */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_);
+}
+
 CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageMethodPatch(
     MethodReference target_method,
     const PcRelativePatchInfo* info_high) {
@@ -1936,6 +1950,34 @@ void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCode
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorMIPS::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                                                     Register temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if (mask_bits == 16u) {
+    // Load only the bitstring part of the status word.
+    __ LoadFromOffset(
+        kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value());
+    // Compare the bitstring bits using XOR.
+    __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value());
+    // Compare the bitstring bits using XOR.
+    if (IsUint<16>(path_to_root)) {
+      __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+    } else {
+      __ LoadConst32(TMP, path_to_root);
+      __ Xor(temp, temp, TMP);
+    }
+    // Shift out bits that do not contribute to the comparison.
+    __ Sll(temp, temp, 32 - mask_bits);
+  }
+}
+
 void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
   __ Sync(0);  // Only stype 0 is supported.
 }
@@ -3287,7 +3329,13 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
@@ -3296,7 +3344,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
-  Register cls = locations->InAt(1).AsRegister<Register>();
+  Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   Register temp = temp_loc.AsRegister<Register>();
   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -3335,7 +3383,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
                                         kWithoutReadBarrier);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ Bne(temp, cls, slow_path->GetEntryLabel());
+      __ Bne(temp, cls.AsRegister<Register>(), slow_path->GetEntryLabel());
       break;
     }
 
@@ -3361,7 +3409,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
       // exception.
       __ Beqz(temp, slow_path->GetEntryLabel());
       // Otherwise, compare the classes.
-      __ Bne(temp, cls, &loop);
+      __ Bne(temp, cls.AsRegister<Register>(), &loop);
       break;
     }
 
@@ -3376,7 +3424,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
       // Walk over the class hierarchy to find a match.
       MipsLabel loop;
       __ Bind(&loop);
-      __ Beq(temp, cls, &done);
+      __ Beq(temp, cls.AsRegister<Register>(), &done);
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction,
                                        temp_loc,
@@ -3399,7 +3447,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
                                         maybe_temp2_loc,
                                         kWithoutReadBarrier);
       // Do an exact check.
-      __ Beq(temp, cls, &done);
+      __ Beq(temp, cls.AsRegister<Register>(), &done);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
       GenerateReferenceLoadOneRegister(instruction,
@@ -3458,7 +3506,21 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
       // Go to next interface.
       __ Addiu(TMP, TMP, -2);
       // Compare the classes and continue the loop if they do not match.
-      __ Bne(AT, cls, &loop);
+      __ Bne(AT, cls.AsRegister<Register>(), &loop);
+      break;
+    }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ Bnez(temp, slow_path->GetEntryLabel());
       break;
     }
   }
@@ -7401,6 +7463,8 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -7409,7 +7473,13 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // The output does overlap inputs.
   // Note that TypeCheckSlowPathMIPS uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -7421,7 +7491,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
-  Register cls = locations->InAt(1).AsRegister<Register>();
+  Location cls = locations->InAt(1);
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -7453,7 +7523,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
                                         maybe_temp_loc,
                                         read_barrier_option);
       // Classes must be equal for the instanceof to succeed.
-      __ Xor(out, out, cls);
+      __ Xor(out, out, cls.AsRegister<Register>());
       __ Sltiu(out, out, 1);
       break;
     }
@@ -7480,7 +7550,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
                                        read_barrier_option);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Beqz(out, &done);
-      __ Bne(out, cls, &loop);
+      __ Bne(out, cls.AsRegister<Register>(), &loop);
       __ LoadConst32(out, 1);
       break;
     }
@@ -7498,7 +7568,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
       // Walk over the class hierarchy to find a match.
       MipsLabel loop, success;
       __ Bind(&loop);
-      __ Beq(out, cls, &success);
+      __ Beq(out, cls.AsRegister<Register>(), &success);
       // /* HeapReference<Class> */ out = out->super_class_
       GenerateReferenceLoadOneRegister(instruction,
                                        out_loc,
@@ -7525,7 +7595,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
                                         read_barrier_option);
       // Do an exact check.
       MipsLabel success;
-      __ Beq(out, cls, &success);
+      __ Beq(out, cls.AsRegister<Register>(), &success);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
       GenerateReferenceLoadOneRegister(instruction,
@@ -7557,7 +7627,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS(
           instruction, /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
-      __ Bne(out, cls, slow_path->GetEntryLabel());
+      __ Bne(out, cls.AsRegister<Register>(), slow_path->GetEntryLabel());
       __ LoadConst32(out, 1);
       break;
     }
@@ -7589,6 +7659,20 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
       __ B(slow_path->GetEntryLabel());
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      __ Sltiu(out, out, 1);
+      break;
+    }
   }
 
   __ Bind(&done);
@@ -7725,7 +7809,7 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-    case HLoadString::LoadKind::kBootImageInternTable:
+    case HLoadString::LoadKind::kBootImageRelRo:
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -7748,7 +7832,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
     case HLoadClass::LoadKind::kReferrersClass:
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-    case HLoadClass::LoadKind::kBootImageClassTable:
+    case HLoadClass::LoadKind::kBootImageRelRo:
     case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -7835,6 +7919,15 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+      uint32_t boot_image_offset = GetBootImageOffset(invoke);
+      PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset);
+      PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high);
+      Register temp_reg = temp.AsRegister<Register>();
+      EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg);
+      __ Lw(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
       PcRelativePatchInfo* info_high = NewMethodBssEntryPatch(
           MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
@@ -7956,7 +8049,7 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
     // We need an extra register for PC-relative literals on R2.
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kBootImageClassTable:
+    case HLoadClass::LoadKind::kBootImageRelRo:
     case HLoadClass::LoadKind::kBssEntry:
       if (isR6) {
         break;
@@ -8008,7 +8101,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
     // We need an extra register for PC-relative literals on R2.
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kBootImageClassTable:
+    case HLoadClass::LoadKind::kBootImageRelRo:
     case HLoadClass::LoadKind::kBssEntry:
       base_or_current_method_reg =
           (isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>();
@@ -8065,22 +8158,17 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
       }
       break;
     }
-    case HLoadClass::LoadKind::kBootImageClassTable: {
+    case HLoadClass::LoadKind::kBootImageRelRo: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+      uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
       CodeGeneratorMIPS::PcRelativePatchInfo* info_high =
-          codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+          codegen_->NewBootImageRelRoPatch(boot_image_offset);
       CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
-          codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high);
+          codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
                                                      out,
                                                      base_or_current_method_reg);
       __ Lw(out, out, /* placeholder */ 0x5678, &info_low->label);
-      // Extract the reference from the slot data, i.e. clear the hash bits.
-      int32_t masked_hash = ClassTable::TableSlot::MaskHash(
-          ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
-      if (masked_hash != 0) {
-        __ Addiu(out, out, -masked_hash);
-      }
       break;
     }
     case HLoadClass::LoadKind::kBssEntry: {
@@ -8171,7 +8259,7 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
     // We need an extra register for PC-relative literals on R2.
     case HLoadString::LoadKind::kBootImageAddress:
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-    case HLoadString::LoadKind::kBootImageInternTable:
+    case HLoadString::LoadKind::kBootImageRelRo:
     case HLoadString::LoadKind::kBssEntry:
       if (isR6) {
         break;
@@ -8223,7 +8311,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
     // We need an extra register for PC-relative literals on R2.
     case HLoadString::LoadKind::kBootImageAddress:
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-    case HLoadString::LoadKind::kBootImageInternTable:
+    case HLoadString::LoadKind::kBootImageRelRo:
     case HLoadString::LoadKind::kBssEntry:
       base_or_current_method_reg =
           (isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>();
@@ -8259,12 +8347,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
       }
       return;
     }
-    case HLoadString::LoadKind::kBootImageInternTable: {
+    case HLoadString::LoadKind::kBootImageRelRo: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+      uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
       CodeGeneratorMIPS::PcRelativePatchInfo* info_high =
-          codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
+          codegen_->NewBootImageRelRoPatch(boot_image_offset);
       CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
-          codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
+          codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
                                                      out,
                                                      base_or_current_method_reg);
@@ -8779,6 +8868,501 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) {
   }
 }
 
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+  LocationSummary* locations = new (allocator) LocationSummary(minmax);
+  switch (minmax->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMaxInt(LocationSummary* locations,
+                                                     bool is_min,
+                                                     bool isR6,
+                                                     DataType::Type type) {
+  if (isR6) {
+    // Some architectures, such as ARM and MIPS (prior to r6), have a
+    // conditional move instruction which only changes the target
+    // (output) register if the condition is true (MIPS prior to r6 had
+    // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions
+    // always change the target (output) register.  If the condition is
+    // true the output register gets the contents of the "rs" register;
+    // otherwise, the output register is set to zero. One consequence
+    // of this is that to implement something like "rd = c==0 ? rs : rt"
+    // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions.
+    // After executing this pair of instructions one of the output
+    // registers from the pair will necessarily contain zero. Then the
+    // code ORs the output registers from the SELEQZ/SELNEZ instructions
+    // to get the final result.
+    //
+    // The initial test to see if the output register is same as the
+    // first input register is needed to make sure that value in the
+    // first input register isn't clobbered before we've finished
+    // computing the output value. The logic in the corresponding else
+    // clause performs the same task but makes sure the second input
+    // register isn't clobbered in the event that it's the same register
+    // as the output register; the else clause also handles the case
+    // where the output register is distinct from both the first, and the
+    // second input registers.
+    if (type == DataType::Type::kInt64) {
+      Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
+      Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
+      Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+      Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+      MipsLabel compare_done;
+
+      if (a_lo == b_lo) {
+        if (out_lo != a_lo) {
+          __ Move(out_lo, a_lo);
+          __ Move(out_hi, a_hi);
+        }
+      } else {
+        __ Slt(TMP, b_hi, a_hi);
+        __ Bne(b_hi, a_hi, &compare_done);
+
+        __ Sltu(TMP, b_lo, a_lo);
+
+        __ Bind(&compare_done);
+
+        if (is_min) {
+          __ Seleqz(AT, a_lo, TMP);
+          __ Selnez(out_lo, b_lo, TMP);  // Safe even if out_lo == a_lo/b_lo
+                                         // because at this point we're
+                                         // done using a_lo/b_lo.
+        } else {
+          __ Selnez(AT, a_lo, TMP);
+          __ Seleqz(out_lo, b_lo, TMP);  // ditto
+        }
+        __ Or(out_lo, out_lo, AT);
+        if (is_min) {
+          __ Seleqz(AT, a_hi, TMP);
+          __ Selnez(out_hi, b_hi, TMP);  // ditto but for out_hi & a_hi/b_hi
+        } else {
+          __ Selnez(AT, a_hi, TMP);
+          __ Seleqz(out_hi, b_hi, TMP);  // ditto but for out_hi & a_hi/b_hi
+        }
+        __ Or(out_hi, out_hi, AT);
+      }
+    } else {
+      DCHECK_EQ(type, DataType::Type::kInt32);
+      Register a = locations->InAt(0).AsRegister<Register>();
+      Register b = locations->InAt(1).AsRegister<Register>();
+      Register out = locations->Out().AsRegister<Register>();
+
+      if (a == b) {
+        if (out != a) {
+          __ Move(out, a);
+        }
+      } else {
+        __ Slt(AT, b, a);
+        if (is_min) {
+          __ Seleqz(TMP, a, AT);
+          __ Selnez(AT, b, AT);
+        } else {
+          __ Selnez(TMP, a, AT);
+          __ Seleqz(AT, b, AT);
+        }
+        __ Or(out, TMP, AT);
+      }
+    }
+  } else {  // !isR6
+    if (type == DataType::Type::kInt64) {
+      Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
+      Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
+      Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+      Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+      MipsLabel compare_done;
+
+      if (a_lo == b_lo) {
+        if (out_lo != a_lo) {
+          __ Move(out_lo, a_lo);
+          __ Move(out_hi, a_hi);
+        }
+      } else {
+        __ Slt(TMP, a_hi, b_hi);
+        __ Bne(a_hi, b_hi, &compare_done);
+
+        __ Sltu(TMP, a_lo, b_lo);
+
+        __ Bind(&compare_done);
+
+        if (is_min) {
+          if (out_lo != a_lo) {
+            __ Movn(out_hi, a_hi, TMP);
+            __ Movn(out_lo, a_lo, TMP);
+          }
+          if (out_lo != b_lo) {
+            __ Movz(out_hi, b_hi, TMP);
+            __ Movz(out_lo, b_lo, TMP);
+          }
+        } else {
+          if (out_lo != a_lo) {
+            __ Movz(out_hi, a_hi, TMP);
+            __ Movz(out_lo, a_lo, TMP);
+          }
+          if (out_lo != b_lo) {
+            __ Movn(out_hi, b_hi, TMP);
+            __ Movn(out_lo, b_lo, TMP);
+          }
+        }
+      }
+    } else {
+      DCHECK_EQ(type, DataType::Type::kInt32);
+      Register a = locations->InAt(0).AsRegister<Register>();
+      Register b = locations->InAt(1).AsRegister<Register>();
+      Register out = locations->Out().AsRegister<Register>();
+
+      if (a == b) {
+        if (out != a) {
+          __ Move(out, a);
+        }
+      } else {
+        __ Slt(AT, a, b);
+        if (is_min) {
+          if (out != a) {
+            __ Movn(out, a, AT);
+          }
+          if (out != b) {
+            __ Movz(out, b, AT);
+          }
+        } else {
+          if (out != a) {
+            __ Movz(out, a, AT);
+          }
+          if (out != b) {
+            __ Movn(out, b, AT);
+          }
+        }
+      }
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMaxFP(LocationSummary* locations,
+                                                    bool is_min,
+                                                    bool isR6,
+                                                    DataType::Type type) {
+  FRegister out = locations->Out().AsFpuRegister<FRegister>();
+  FRegister a = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister b = locations->InAt(1).AsFpuRegister<FRegister>();
+
+  if (isR6) {
+    MipsLabel noNaNs;
+    MipsLabel done;
+    FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
+
+    // When Java computes min/max it prefers a NaN to a number; the
+    // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
+    // the inputs is a NaN and the other is a valid number, the MIPS
+    // instruction will return the number; Java wants the NaN value
+    // returned. This is why there is extra logic preceding the use of
+    // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
+    // NaN, return the NaN, otherwise return the min/max.
+    if (type == DataType::Type::kFloat64) {
+      __ CmpUnD(FTMP, a, b);
+      __ Bc1eqz(FTMP, &noNaNs);
+
+      // One of the inputs is a NaN
+      __ CmpEqD(ftmp, a, a);
+      // If a == a then b is the NaN, otherwise a is the NaN.
+      __ SelD(ftmp, a, b);
+
+      if (ftmp != out) {
+        __ MovD(out, ftmp);
+      }
+
+      __ B(&done);
+
+      __ Bind(&noNaNs);
+
+      if (is_min) {
+        __ MinD(out, a, b);
+      } else {
+        __ MaxD(out, a, b);
+      }
+    } else {
+      DCHECK_EQ(type, DataType::Type::kFloat32);
+      __ CmpUnS(FTMP, a, b);
+      __ Bc1eqz(FTMP, &noNaNs);
+
+      // One of the inputs is a NaN
+      __ CmpEqS(ftmp, a, a);
+      // If a == a then b is the NaN, otherwise a is the NaN.
+      __ SelS(ftmp, a, b);
+
+      if (ftmp != out) {
+        __ MovS(out, ftmp);
+      }
+
+      __ B(&done);
+
+      __ Bind(&noNaNs);
+
+      if (is_min) {
+        __ MinS(out, a, b);
+      } else {
+        __ MaxS(out, a, b);
+      }
+    }
+
+    __ Bind(&done);
+
+  } else {  // !isR6
+    MipsLabel ordered;
+    MipsLabel compare;
+    MipsLabel select;
+    MipsLabel done;
+
+    if (type == DataType::Type::kFloat64) {
+      __ CunD(a, b);
+    } else {
+      DCHECK_EQ(type, DataType::Type::kFloat32);
+      __ CunS(a, b);
+    }
+    __ Bc1f(&ordered);
+
+    // a or b (or both) is a NaN. Return one, which is a NaN.
+    if (type == DataType::Type::kFloat64) {
+      __ CeqD(b, b);
+    } else {
+      __ CeqS(b, b);
+    }
+    __ B(&select);
+
+    __ Bind(&ordered);
+
+    // Neither is a NaN.
+    // a == b? (-0.0 compares equal with +0.0)
+    // If equal, handle zeroes, else compare further.
+    if (type == DataType::Type::kFloat64) {
+      __ CeqD(a, b);
+    } else {
+      __ CeqS(a, b);
+    }
+    __ Bc1f(&compare);
+
+    // a == b either bit for bit or one is -0.0 and the other is +0.0.
+    if (type == DataType::Type::kFloat64) {
+      __ MoveFromFpuHigh(TMP, a);
+      __ MoveFromFpuHigh(AT, b);
+    } else {
+      __ Mfc1(TMP, a);
+      __ Mfc1(AT, b);
+    }
+
+    if (is_min) {
+      // -0.0 prevails over +0.0.
+      __ Or(TMP, TMP, AT);
+    } else {
+      // +0.0 prevails over -0.0.
+      __ And(TMP, TMP, AT);
+    }
+
+    if (type == DataType::Type::kFloat64) {
+      __ Mfc1(AT, a);
+      __ Mtc1(AT, out);
+      __ MoveToFpuHigh(TMP, out);
+    } else {
+      __ Mtc1(TMP, out);
+    }
+    __ B(&done);
+
+    __ Bind(&compare);
+
+    if (type == DataType::Type::kFloat64) {
+      if (is_min) {
+        // return (a <= b) ? a : b;
+        __ ColeD(a, b);
+      } else {
+        // return (a >= b) ? a : b;
+        __ ColeD(b, a);  // b <= a
+      }
+    } else {
+      if (is_min) {
+        // return (a <= b) ? a : b;
+        __ ColeS(a, b);
+      } else {
+        // return (a >= b) ? a : b;
+        __ ColeS(b, a);  // b <= a
+      }
+    }
+
+    __ Bind(&select);
+
+    if (type == DataType::Type::kFloat64) {
+      __ MovtD(out, a);
+      __ MovfD(out, b);
+    } else {
+      __ MovtS(out, a);
+      __ MovfS(out, b);
+    }
+
+    __ Bind(&done);
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  DataType::Type type = minmax->GetResultType();
+  switch (type) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      GenerateMinMaxInt(minmax->GetLocations(), is_min, isR6, type);
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      GenerateMinMaxFP(minmax->GetLocations(), is_min, isR6, type);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HMinMax " << type;
+  }
+}
+
+void LocationsBuilderMIPS::VisitMin(HMin* min) {
+  CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorMIPS::VisitMin(HMin* min) {
+  GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderMIPS::VisitMax(HMax* max) {
+  CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorMIPS::VisitMax(HMax* max) {
+  GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderMIPS::VisitAbs(HAbs* abs) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+  switch (abs->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateAbsFP(LocationSummary* locations,
+                                                 DataType::Type type,
+                                                 bool isR2OrNewer,
+                                                 bool isR6) {
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+  // Note, as a "quality of implementation", rather than pure "spec compliance", we require that
+  // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN
+  // (signaling NaN may become quiet though).
+  //
+  // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case,
+  // both regular floating point numbers and NAN values are treated alike, only the sign bit is
+  // affected by this instruction.
+  // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any
+  // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be
+  // changed when doing abs(NaN). Because of that, we clear sign bit in a different way.
+  if (isR6) {
+    if (type == DataType::Type::kFloat64) {
+      __ AbsD(out, in);
+    } else {
+      DCHECK_EQ(type, DataType::Type::kFloat32);
+      __ AbsS(out, in);
+    }
+  } else {
+    if (type == DataType::Type::kFloat64) {
+      if (in != out) {
+        __ MovD(out, in);
+      }
+      __ MoveFromFpuHigh(TMP, in);
+      // ins instruction is not available for R1.
+      if (isR2OrNewer) {
+        __ Ins(TMP, ZERO, 31, 1);
+      } else {
+        __ Sll(TMP, TMP, 1);
+        __ Srl(TMP, TMP, 1);
+      }
+      __ MoveToFpuHigh(TMP, out);
+    } else {
+      DCHECK_EQ(type, DataType::Type::kFloat32);
+      __ Mfc1(TMP, in);
+      // ins instruction is not available for R1.
+      if (isR2OrNewer) {
+        __ Ins(TMP, ZERO, 31, 1);
+      } else {
+        __ Sll(TMP, TMP, 1);
+        __ Srl(TMP, TMP, 1);
+      }
+      __ Mtc1(TMP, out);
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitAbs(HAbs* abs) {
+  LocationSummary* locations = abs->GetLocations();
+  bool isR2OrNewer = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  switch (abs->GetResultType()) {
+    case DataType::Type::kInt32: {
+      Register in = locations->InAt(0).AsRegister<Register>();
+      Register out = locations->Out().AsRegister<Register>();
+      __ Sra(AT, in, 31);
+      __ Xor(out, in, AT);
+      __ Subu(out, out, AT);
+      break;
+    }
+    case DataType::Type::kInt64: {
+      Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+      Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+      // The comments in this section show the analogous operations which would
+      // be performed if we had 64-bit registers "in", and "out".
+      // __ Dsra32(AT, in, 31);
+      __ Sra(AT, in_hi, 31);
+      // __ Xor(out, in, AT);
+      __ Xor(TMP, in_lo, AT);
+      __ Xor(out_hi, in_hi, AT);
+      // __ Dsubu(out, out, AT);
+      __ Subu(out_lo, TMP, AT);
+      __ Sltu(TMP, out_lo, TMP);
+      __ Addu(out_hi, out_hi, TMP);
+      break;
+    }
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      GenerateAbsFP(locations, abs->GetResultType(), isR2OrNewer, isR6);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+  }
+}
+
 void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) {
   constructor_fence->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index c91cb62eda5..2e7c736dbd3 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -237,6 +237,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
  private:
   void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);
   void HandleBinaryOp(HBinaryOperation* operation);
   void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
@@ -246,6 +247,11 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
 
+  void GenerateMinMaxInt(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type);
+  void GenerateMinMaxFP(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type);
+  void GenerateMinMax(HBinaryOperation*, bool is_min);
+  void GenerateAbsFP(LocationSummary* locations, DataType::Type type, bool isR2OrNewer, bool isR6);
+
   // Generate a heap reference load using one register `out`:
   //
   //   out <- *(out + offset)
@@ -615,6 +621,8 @@ class CodeGeneratorMIPS : public CodeGenerator {
     DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo);
   };
 
+  PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+                                              const PcRelativePatchInfo* info_high = nullptr);
   PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method,
                                                const PcRelativePatchInfo* info_high = nullptr);
   PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method,
@@ -689,7 +697,8 @@ class CodeGeneratorMIPS : public CodeGenerator {
 
   // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
   Uint32ToLiteralMap uint32_literals_;
-  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+  // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
   ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
   // PC-relative method patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -697,7 +706,7 @@ class CodeGeneratorMIPS : public CodeGenerator {
   ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
-  // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+  // PC-relative String patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
   // PC-relative String patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 985ac2ca554..5b07b55cbbb 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1509,6 +1509,14 @@ inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches(
   }
 }
 
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+                                              const DexFile* target_dex_file,
+                                              uint32_t pc_insn_offset,
+                                              uint32_t boot_image_offset) {
+  DCHECK(target_dex_file == nullptr);  // Unused for DataBimgRelRoPatch(), should be null.
+  return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
 void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
@@ -1527,11 +1535,10 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
         boot_image_string_patches_, linker_patches);
   } else {
-    DCHECK(boot_image_method_patches_.empty());
-    EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
-        boot_image_type_patches_, linker_patches);
-    EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
-        boot_image_string_patches_, linker_patches);
+    EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+        boot_image_method_patches_, linker_patches);
+    DCHECK(boot_image_type_patches_.empty());
+    DCHECK(boot_image_string_patches_.empty());
   }
   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
       method_bss_entry_patches_, linker_patches);
@@ -1542,6 +1549,13 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
   DCHECK_EQ(size, linker_patches->size());
 }
 
+CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageRelRoPatch(
+    uint32_t boot_image_offset,
+    const PcRelativePatchInfo* info_high) {
+  return NewPcRelativePatch(
+      /* dex_file */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_);
+}
+
 CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageMethodPatch(
     MethodReference target_method,
     const PcRelativePatchInfo* info_high) {
@@ -1780,6 +1794,34 @@ void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCo
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorMIPS64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                                                       GpuRegister temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if (mask_bits == 16u) {
+    // Load only the bitstring part of the status word.
+    __ LoadFromOffset(
+        kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value());
+    // Compare the bitstring bits using XOR.
+    __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value());
+    // Compare the bitstring bits using XOR.
+    if (IsUint<16>(path_to_root)) {
+      __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+    } else {
+      __ LoadConst32(TMP, path_to_root);
+      __ Xor(temp, temp, TMP);
+    }
+    // Shift out bits that do not contribute to the comparison.
+    __ Sll(temp, temp, 32 - mask_bits);
+  }
+}
+
 void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
   __ Sync(0);  // only stype 0 is supported
 }
@@ -2840,7 +2882,13 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
@@ -2849,7 +2897,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
-  GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+  Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   GpuRegister temp = temp_loc.AsRegister<GpuRegister>();
   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -2888,7 +2936,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
                                         kWithoutReadBarrier);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ Bnec(temp, cls, slow_path->GetEntryLabel());
+      __ Bnec(temp, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
       break;
     }
 
@@ -2914,7 +2962,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
       // exception.
       __ Beqzc(temp, slow_path->GetEntryLabel());
       // Otherwise, compare the classes.
-      __ Bnec(temp, cls, &loop);
+      __ Bnec(temp, cls.AsRegister<GpuRegister>(), &loop);
       break;
     }
 
@@ -2929,7 +2977,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
       // Walk over the class hierarchy to find a match.
       Mips64Label loop;
       __ Bind(&loop);
-      __ Beqc(temp, cls, &done);
+      __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done);
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction,
                                        temp_loc,
@@ -2952,7 +3000,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
                                         maybe_temp2_loc,
                                         kWithoutReadBarrier);
       // Do an exact check.
-      __ Beqc(temp, cls, &done);
+      __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
       GenerateReferenceLoadOneRegister(instruction,
@@ -3011,7 +3059,21 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
       __ Daddiu(temp, temp, 2 * kHeapReferenceSize);
       __ Addiu(TMP, TMP, -2);
       // Compare the classes and continue the loop if they do not match.
-      __ Bnec(AT, cls, &loop);
+      __ Bnec(AT, cls.AsRegister<GpuRegister>(), &loop);
+      break;
+    }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ Bnezc(temp, slow_path->GetEntryLabel());
       break;
     }
   }
@@ -5515,6 +5577,8 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -5523,7 +5587,13 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
   // The output does overlap inputs.
   // Note that TypeCheckSlowPathMIPS64 uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -5535,7 +5605,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
-  GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+  Location cls = locations->InAt(1);
   Location out_loc = locations->Out();
   GpuRegister out = out_loc.AsRegister<GpuRegister>();
   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -5567,7 +5637,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
                                         maybe_temp_loc,
                                         read_barrier_option);
       // Classes must be equal for the instanceof to succeed.
-      __ Xor(out, out, cls);
+      __ Xor(out, out, cls.AsRegister<GpuRegister>());
       __ Sltiu(out, out, 1);
       break;
     }
@@ -5594,7 +5664,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
                                        read_barrier_option);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Beqzc(out, &done);
-      __ Bnec(out, cls, &loop);
+      __ Bnec(out, cls.AsRegister<GpuRegister>(), &loop);
       __ LoadConst32(out, 1);
       break;
     }
@@ -5612,7 +5682,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
       // Walk over the class hierarchy to find a match.
       Mips64Label loop, success;
       __ Bind(&loop);
-      __ Beqc(out, cls, &success);
+      __ Beqc(out, cls.AsRegister<GpuRegister>(), &success);
       // /* HeapReference<Class> */ out = out->super_class_
       GenerateReferenceLoadOneRegister(instruction,
                                        out_loc,
@@ -5639,7 +5709,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
                                         read_barrier_option);
       // Do an exact check.
       Mips64Label success;
-      __ Beqc(out, cls, &success);
+      __ Beqc(out, cls.AsRegister<GpuRegister>(), &success);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
       GenerateReferenceLoadOneRegister(instruction,
@@ -5671,7 +5741,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64(
           instruction, /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
-      __ Bnec(out, cls, slow_path->GetEntryLabel());
+      __ Bnec(out, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
       __ LoadConst32(out, 1);
       break;
     }
@@ -5703,6 +5773,20 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
       __ Bc(slow_path->GetEntryLabel());
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      __ Sltiu(out, out, 1);
+      break;
+    }
   }
 
   __ Bind(&done);
@@ -5839,7 +5923,7 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
   bool fallback_load = false;
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-    case HLoadString::LoadKind::kBootImageInternTable:
+    case HLoadString::LoadKind::kBootImageRelRo:
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -5866,7 +5950,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
     case HLoadClass::LoadKind::kReferrersClass:
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-    case HLoadClass::LoadKind::kBootImageClassTable:
+    case HLoadClass::LoadKind::kBootImageRelRo:
     case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -5926,6 +6010,15 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(
                      kLoadDoubleword,
                      DeduplicateUint64Literal(invoke->GetMethodAddress()));
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+      uint32_t boot_image_offset = GetBootImageOffset(invoke);
+      PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset);
+      PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high);
+      EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
+      // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+      __ Lwu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
       PcRelativePatchInfo* info_high = NewMethodBssEntryPatch(
           MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
@@ -6113,20 +6206,15 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
                      codegen_->DeduplicateBootImageAddressLiteral(address));
       break;
     }
-    case HLoadClass::LoadKind::kBootImageClassTable: {
+    case HLoadClass::LoadKind::kBootImageRelRo: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+      uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
       CodeGeneratorMIPS64::PcRelativePatchInfo* info_high =
-          codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+          codegen_->NewBootImageRelRoPatch(boot_image_offset);
       CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
-          codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high);
+          codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
       __ Lwu(out, AT, /* placeholder */ 0x5678);
-      // Extract the reference from the slot data, i.e. clear the hash bits.
-      int32_t masked_hash = ClassTable::TableSlot::MaskHash(
-          ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
-      if (masked_hash != 0) {
-        __ Daddiu(out, out, -masked_hash);
-      }
       break;
     }
     case HLoadClass::LoadKind::kBssEntry: {
@@ -6248,12 +6336,13 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
                      codegen_->DeduplicateBootImageAddressLiteral(address));
       return;
     }
-    case HLoadString::LoadKind::kBootImageInternTable: {
+    case HLoadString::LoadKind::kBootImageRelRo: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+      uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
       CodeGeneratorMIPS64::PcRelativePatchInfo* info_high =
-          codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
+          codegen_->NewBootImageRelRoPatch(boot_image_offset);
       CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
-          codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
+          codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
       __ Lwu(out, AT, /* placeholder */ 0x5678);
       return;
@@ -6665,6 +6754,236 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
   }
 }
 
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+  LocationSummary* locations = new (allocator) LocationSummary(minmax);
+  switch (minmax->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
+  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (lhs == rhs) {
+    if (out != lhs) {
+      __ Move(out, lhs);
+    }
+  } else {
+    // Some architectures, such as ARM and MIPS (prior to r6), have a
+    // conditional move instruction which only changes the target
+    // (output) register if the condition is true (MIPS prior to r6 had
+    // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
+    // change the target (output) register.  If the condition is true the
+    // output register gets the contents of the "rs" register; otherwise,
+    // the output register is set to zero. One consequence of this is
+    // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
+    // needs to use a pair of SELEQZ/SELNEZ instructions.  After
+    // executing this pair of instructions one of the output registers
+    // from the pair will necessarily contain zero. Then the code ORs the
+    // output registers from the SELEQZ/SELNEZ instructions to get the
+    // final result.
+    //
+    // The initial test to see if the output register is same as the
+    // first input register is needed to make sure that value in the
+    // first input register isn't clobbered before we've finished
+    // computing the output value. The logic in the corresponding else
+    // clause performs the same task but makes sure the second input
+    // register isn't clobbered in the event that it's the same register
+    // as the output register; the else clause also handles the case
+    // where the output register is distinct from both the first, and the
+    // second input registers.
+    if (out == lhs) {
+      __ Slt(AT, rhs, lhs);
+      if (is_min) {
+        __ Seleqz(out, lhs, AT);
+        __ Selnez(AT, rhs, AT);
+      } else {
+        __ Selnez(out, lhs, AT);
+        __ Seleqz(AT, rhs, AT);
+      }
+    } else {
+      __ Slt(AT, lhs, rhs);
+      if (is_min) {
+        __ Seleqz(out, rhs, AT);
+        __ Selnez(AT, lhs, AT);
+      } else {
+        __ Selnez(out, rhs, AT);
+        __ Seleqz(AT, lhs, AT);
+      }
+    }
+    __ Or(out, out, AT);
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMaxFP(LocationSummary* locations,
+                                                      bool is_min,
+                                                      DataType::Type type) {
+  FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  Mips64Label noNaNs;
+  Mips64Label done;
+  FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
+
+  // When Java computes min/max it prefers a NaN to a number; the
+  // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
+  // the inputs is a NaN and the other is a valid number, the MIPS
+  // instruction will return the number; Java wants the NaN value
+  // returned. This is why there is extra logic preceding the use of
+  // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
+  // NaN, return the NaN, otherwise return the min/max.
+  if (type == DataType::Type::kFloat64) {
+    __ CmpUnD(FTMP, a, b);
+    __ Bc1eqz(FTMP, &noNaNs);
+
+    // One of the inputs is a NaN
+    __ CmpEqD(ftmp, a, a);
+    // If a == a then b is the NaN, otherwise a is the NaN.
+    __ SelD(ftmp, a, b);
+
+    if (ftmp != out) {
+      __ MovD(out, ftmp);
+    }
+
+    __ Bc(&done);
+
+    __ Bind(&noNaNs);
+
+    if (is_min) {
+      __ MinD(out, a, b);
+    } else {
+      __ MaxD(out, a, b);
+    }
+  } else {
+    DCHECK_EQ(type, DataType::Type::kFloat32);
+    __ CmpUnS(FTMP, a, b);
+    __ Bc1eqz(FTMP, &noNaNs);
+
+    // One of the inputs is a NaN
+    __ CmpEqS(ftmp, a, a);
+    // If a == a then b is the NaN, otherwise a is the NaN.
+    __ SelS(ftmp, a, b);
+
+    if (ftmp != out) {
+      __ MovS(out, ftmp);
+    }
+
+    __ Bc(&done);
+
+    __ Bind(&noNaNs);
+
+    if (is_min) {
+      __ MinS(out, a, b);
+    } else {
+      __ MaxS(out, a, b);
+    }
+  }
+
+  __ Bind(&done);
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+  DataType::Type type = minmax->GetResultType();
+  switch (type) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      GenerateMinMaxInt(minmax->GetLocations(), is_min);
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HMinMax " << type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitMin(HMin* min) {
+  CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMin(HMin* min) {
+  GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderMIPS64::VisitMax(HMax* max) {
+  CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMax(HMax* max) {
+  GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderMIPS64::VisitAbs(HAbs* abs) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+  switch (abs->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitAbs(HAbs* abs) {
+  LocationSummary* locations = abs->GetLocations();
+  switch (abs->GetResultType()) {
+    case DataType::Type::kInt32: {
+      GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
+      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      __ Sra(AT, in, 31);
+      __ Xor(out, in, AT);
+      __ Subu(out, out, AT);
+      break;
+    }
+    case DataType::Type::kInt64: {
+      GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
+      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      __ Dsra32(AT, in, 31);
+      __ Xor(out, in, AT);
+      __ Dsubu(out, out, AT);
+      break;
+    }
+    case DataType::Type::kFloat32: {
+      FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+      __ AbsS(out, in);
+      break;
+    }
+    case DataType::Type::kFloat64: {
+      FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+      __ AbsD(out, in);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+  }
+}
+
 void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) {
   constructor_fence->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index e6b69c469fd..6e69e4611a7 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -233,6 +233,7 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
 
  private:
   void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, GpuRegister temp);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
   void HandleCondition(HCondition* instruction);
@@ -242,6 +243,10 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
+  void GenerateMinMaxInt(LocationSummary* locations, bool is_min);
+  void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+  void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
   // Generate a heap reference load using one register `out`:
   //
   //   out <- *(out + offset)
@@ -586,6 +591,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
     DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo);
   };
 
+  PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+                                              const PcRelativePatchInfo* info_high = nullptr);
   PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method,
                                                const PcRelativePatchInfo* info_high = nullptr);
   PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method,
@@ -655,7 +662,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
   // Deduplication map for 64-bit literals, used for non-patchable method address or method code
   // address.
   Uint64ToLiteralMap uint64_literals_;
-  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+  // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
   ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
   // PC-relative method patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -663,7 +671,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
   ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
-  // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+  // PC-relative String patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 174efdf1155..6b0ec253e99 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -63,7 +63,7 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -125,7 +125,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar*
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -149,7 +149,7 @@ void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -173,7 +173,7 @@ void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* ins
       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -200,7 +200,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -240,7 +240,7 @@ void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -259,7 +259,7 @@ void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
     DCHECK_EQ(4u, instruction->GetVectorLength());
     __ Scvtf(dst.V4S(), src.V4S());
   } else {
-    LOG(FATAL) << "Unsupported SIMD type";
+    LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   }
 }
 
@@ -299,7 +299,7 @@ void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
       __ Fneg(dst.V2D(), src.V2D());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -338,7 +338,7 @@ void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
       __ Fabs(dst.V2D(), src.V2D());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -366,7 +366,7 @@ void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
       __ Not(dst.V16B(), src.V16B());  // lanes do not matter
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -389,7 +389,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -431,7 +431,39 @@ void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
       __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  VRegister lhs = VRegisterFrom(locations->InAt(0));
+  VRegister rhs = VRegisterFrom(locations->InAt(1));
+  VRegister dst = VRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+      break;
+    case DataType::Type::kInt8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+      break;
+    case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+      break;
+    case DataType::Type::kInt16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -471,7 +503,7 @@ void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instructi
           : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -513,7 +545,39 @@ void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
       __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  VRegister lhs = VRegisterFrom(locations->InAt(0));
+  VRegister rhs = VRegisterFrom(locations->InAt(1));
+  VRegister dst = VRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
+      break;
+    case DataType::Type::kInt8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
+      break;
+    case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
+      break;
+    case DataType::Type::kInt16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -551,7 +615,7 @@ void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
       __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -575,7 +639,7 @@ void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
       __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -623,7 +687,7 @@ void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
       __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -671,7 +735,7 @@ void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
       __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -699,7 +763,7 @@ void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
       __ And(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -735,7 +799,7 @@ void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
       __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -762,7 +826,7 @@ void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
       __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -782,7 +846,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -816,7 +880,7 @@ void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
       __ Shl(dst.V2D(), lhs.V2D(), value);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -850,7 +914,7 @@ void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
       __ Sshr(dst.V2D(), lhs.V2D(), value);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -884,7 +948,7 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
       __ Ushr(dst.V2D(), lhs.V2D(), value);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -916,7 +980,7 @@ void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
       locations->SetOut(Location::RequiresFpuRegister());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -957,7 +1021,7 @@ void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instructi
       __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -978,7 +1042,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1026,7 +1090,7 @@ void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccum
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1139,7 +1203,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins
           break;
         }
         default:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
           UNREACHABLE();
       }
       break;
@@ -1167,7 +1231,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins
           break;
         }
         default:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
           UNREACHABLE();
       }
       break;
@@ -1188,7 +1252,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins
           __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
           break;
         default:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
           UNREACHABLE();
       }
       break;
@@ -1204,12 +1268,12 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins
           break;
         }
         default:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
           UNREACHABLE();
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   }
 }
 
@@ -1237,7 +1301,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1331,7 +1395,7 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
       __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1362,7 +1426,7 @@ void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
       __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 7c3155ab73b..7b66b179839 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -46,7 +46,7 @@ void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instr
       locations->SetOut(Location::RequiresFpuRegister());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -71,7 +71,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScala
       __ Vdup(Untyped32, dst, InputRegisterAt(instruction, 0));
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -84,7 +84,7 @@ void LocationsBuilderARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instructi
       locations->SetOut(Location::RequiresRegister());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -98,7 +98,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecExtractScalar(HVecExtractScalar* i
       __ Vmov(OutputRegister(instruction), DRegisterLane(src, 0));
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -122,7 +122,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -151,7 +151,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) {
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -188,7 +188,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) {
       __ Vneg(DataTypeValue::S32, dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -215,7 +215,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) {
       __ Vabs(DataTypeValue::S32, dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -242,7 +242,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) {
       __ Vmvn(I8, dst, src);  // lanes do not matter
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -262,7 +262,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -292,7 +292,39 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) {
       __ Vadd(I32, dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+  vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+  vixl32::DRegister dst = DRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Vqadd(DataTypeValue::U8, dst, lhs, rhs);
+      break;
+    case DataType::Type::kInt8:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Vqadd(DataTypeValue::S8, dst, lhs, rhs);
+      break;
+    case DataType::Type::kUint16:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Vqadd(DataTypeValue::U16, dst, lhs, rhs);
+      break;
+    case DataType::Type::kInt16:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Vqadd(DataTypeValue::S16, dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -332,7 +364,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruc
           : __ Vhadd(DataTypeValue::S16, dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -362,7 +394,39 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) {
       __ Vsub(I32, dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+  vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+  vixl32::DRegister dst = DRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Vqsub(DataTypeValue::U8, dst, lhs, rhs);
+      break;
+    case DataType::Type::kInt8:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Vqsub(DataTypeValue::S8, dst, lhs, rhs);
+      break;
+    case DataType::Type::kUint16:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Vqsub(DataTypeValue::U16, dst, lhs, rhs);
+      break;
+    case DataType::Type::kInt16:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Vqsub(DataTypeValue::S16, dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -392,7 +456,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) {
       __ Vmul(I32, dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -440,7 +504,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) {
       __ Vmin(DataTypeValue::S32, dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -480,7 +544,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) {
       __ Vmax(DataTypeValue::S32, dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -505,7 +569,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) {
       __ Vand(I8, dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -537,7 +601,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) {
       __ Vorr(I8, dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -561,7 +625,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) {
       __ Veor(I8, dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -580,7 +644,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -610,7 +674,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) {
       __ Vshl(I32, dst, lhs, value);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -640,7 +704,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) {
       __ Vshr(DataTypeValue::S32, dst, lhs, value);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -670,7 +734,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) {
       __ Vshr(DataTypeValue::U32, dst, lhs, value);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -690,7 +754,7 @@ void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
       locations->SetOut(Location::RequiresFpuRegister());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -716,7 +780,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruc
       __ Vmov(Untyped32, DRegisterLane(dst, 0), InputRegisterAt(instruction, 0));
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -737,7 +801,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -780,12 +844,12 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* i
           break;
         }
         default:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
           UNREACHABLE();
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -817,7 +881,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -923,7 +987,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) {
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -971,7 +1035,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) {
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index ed9de964965..df0e1485d69 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -42,7 +42,7 @@ void LocationsBuilderMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruct
       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -89,7 +89,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar*
                                      /* is_double */ true);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -113,7 +113,7 @@ void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction)
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -138,7 +138,7 @@ void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* inst
       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -170,7 +170,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
                             : Location::kNoOutputOverlap);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -225,7 +225,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) {
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -244,7 +244,7 @@ void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) {
     DCHECK_EQ(4u, instruction->GetVectorLength());
     __ Ffint_sW(dst, src);
   } else {
-    LOG(FATAL) << "Unsupported SIMD type";
+    LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   }
 }
 
@@ -290,7 +290,7 @@ void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) {
       __ FsubD(dst, dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -337,7 +337,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAbs(HVecAbs* instruction) {
       __ AndV(dst, dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -369,7 +369,7 @@ void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) {
       __ NorV(dst, src, src);  // lanes do not matter
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -392,7 +392,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -434,11 +434,19 @@ void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) {
       __ FaddD(dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
 
+void LocationsBuilderMIPS::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+  LOG(FATAL) << "Unsupported SIMD " << instruction->GetId();
+}
+
 void LocationsBuilderMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
@@ -474,7 +482,7 @@ void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instructio
           : __ Ave_sH(dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -516,11 +524,19 @@ void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) {
       __ FsubD(dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
 
+void LocationsBuilderMIPS::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+  LOG(FATAL) << "Unsupported SIMD " << instruction->GetId();
+}
+
 void LocationsBuilderMIPS::VisitVecMul(HVecMul* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
@@ -558,7 +574,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) {
       __ FmulD(dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -582,7 +598,7 @@ void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) {
       __ FdivD(dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -640,7 +656,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) {
       __ FminD(dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -698,7 +714,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) {
       __ FmaxD(dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -727,7 +743,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) {
       __ AndV(dst, lhs, rhs);  // lanes do not matter
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -764,7 +780,7 @@ void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) {
       __ OrV(dst, lhs, rhs);  // lanes do not matter
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -793,7 +809,7 @@ void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) {
       __ XorV(dst, lhs, rhs);  // lanes do not matter
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -813,7 +829,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -847,7 +863,7 @@ void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) {
       __ SlliD(dst, lhs, value);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -881,7 +897,7 @@ void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) {
       __ SraiD(dst, lhs, value);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -915,7 +931,7 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) {
       __ SrliD(dst, lhs, value);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -947,7 +963,7 @@ void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
       locations->SetOut(Location::RequiresFpuRegister());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -989,7 +1005,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instructio
       __ InsertW(dst, locations->InAt(0).AsRegisterPairHigh<Register>(), 1);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1010,7 +1026,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1060,7 +1076,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumu
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1162,7 +1178,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst
           break;
         }
         default:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
           UNREACHABLE();
       }
       break;
@@ -1201,7 +1217,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst
           break;
         }
         default:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
           UNREACHABLE();
       }
       break;
@@ -1231,7 +1247,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst
           break;
         }
         default:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
           UNREACHABLE();
       }
       break;
@@ -1247,13 +1263,13 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst
           break;
         }
         default:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
           UNREACHABLE();
       }
       break;
     }
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1282,7 +1298,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1357,7 +1373,7 @@ void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) {
       __ LdD(reg, base, offset);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1395,7 +1411,7 @@ void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) {
       __ StD(reg, base, offset);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 9ea55ec8d79..de354b63a15 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -47,7 +47,7 @@ void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instru
       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -88,7 +88,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar
                                      /* is_double */ true);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -112,7 +112,7 @@ void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instructio
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -136,7 +136,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* in
       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -168,7 +168,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
                             : Location::kNoOutputOverlap);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -223,7 +223,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) {
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -242,7 +242,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) {
     DCHECK_EQ(4u, instruction->GetVectorLength());
     __ Ffint_sW(dst, src);
   } else {
-    LOG(FATAL) << "Unsupported SIMD type";
+    LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
     UNREACHABLE();
   }
 }
@@ -289,7 +289,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) {
       __ FsubD(dst, dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -336,7 +336,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) {
       __ AndV(dst, dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -368,7 +368,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) {
       __ NorV(dst, src, src);  // lanes do not matter
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -391,7 +391,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -433,11 +433,19 @@ void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) {
       __ FaddD(dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
 
+void LocationsBuilderMIPS64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+  LOG(FATAL) << "Unsupported SIMD " << instruction->GetId();
+}
+
 void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
@@ -473,7 +481,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruct
           : __ Ave_sH(dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -515,11 +523,19 @@ void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) {
       __ FsubD(dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
 
+void LocationsBuilderMIPS64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+  LOG(FATAL) << "Unsupported SIMD " << instruction->GetId();
+}
+
 void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
@@ -557,7 +573,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) {
       __ FmulD(dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -581,7 +597,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) {
       __ FdivD(dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -639,7 +655,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) {
       __ FminD(dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -697,7 +713,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) {
       __ FmaxD(dst, lhs, rhs);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -726,7 +742,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) {
       __ AndV(dst, lhs, rhs);  // lanes do not matter
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -763,7 +779,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) {
       __ OrV(dst, lhs, rhs);  // lanes do not matter
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -792,7 +808,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) {
       __ XorV(dst, lhs, rhs);  // lanes do not matter
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -812,7 +828,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -846,7 +862,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) {
       __ SlliD(dst, lhs, value);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -880,7 +896,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) {
       __ SraiD(dst, lhs, value);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -914,7 +930,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) {
       __ SrliD(dst, lhs, value);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -946,7 +962,7 @@ void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
       locations->SetOut(Location::RequiresFpuRegister());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -987,7 +1003,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruct
       __ InsertD(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1008,7 +1024,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1058,7 +1074,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccu
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1160,7 +1176,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in
           break;
         }
         default:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
           UNREACHABLE();
       }
       break;
@@ -1199,7 +1215,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in
           break;
         }
         default:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
           UNREACHABLE();
       }
       break;
@@ -1229,7 +1245,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in
           break;
         }
         default:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
           UNREACHABLE();
       }
       break;
@@ -1245,13 +1261,13 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in
           break;
         }
         default:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
           UNREACHABLE();
       }
       break;
     }
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1280,7 +1296,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1355,7 +1371,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) {
       __ LdD(reg, base, offset);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1393,7 +1409,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) {
       __ StD(reg, base, offset);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index f2ffccc8879..086ae07a064 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -54,7 +54,7 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi
                                 : Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -111,7 +111,7 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i
       __ shufpd(dst, dst, Immediate(0));
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -138,7 +138,7 @@ void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction)
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -152,7 +152,7 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:  // TODO: up to here, and?
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
     case DataType::Type::kInt32:
       DCHECK_LE(4u, instruction->GetVectorLength());
@@ -174,7 +174,7 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr
       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -196,7 +196,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
       locations->SetOut(Location::RequiresFpuRegister());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -258,12 +258,12 @@ void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) {
           break;
         case HVecReduce::kMin:
         case HVecReduce::kMax:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       }
       break;
     }
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -282,7 +282,7 @@ void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
     DCHECK_EQ(4u, instruction->GetVectorLength());
     __ cvtdq2ps(dst, src);
   } else {
-    LOG(FATAL) << "Unsupported SIMD type";
+    LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   }
 }
 
@@ -328,7 +328,7 @@ void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
       __ subpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -369,7 +369,7 @@ void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
       __ andpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -418,7 +418,7 @@ void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
       __ xorpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -441,7 +441,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -483,7 +483,39 @@ void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
       __ addpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ paddusb(dst, src);
+      break;
+    case DataType::Type::kInt8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ paddsb(dst, src);
+      break;
+    case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ paddusw(dst, src);
+      break;
+    case DataType::Type::kInt16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ paddsw(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -503,14 +535,14 @@ void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction
   switch (instruction->GetPackedType()) {
     case DataType::Type::kUint8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-     __ pavgb(dst, src);
-     return;
+      __ pavgb(dst, src);
+      break;
     case DataType::Type::kUint16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ pavgw(dst, src);
-      return;
+      break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -552,7 +584,39 @@ void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
       __ subpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ psubusb(dst, src);
+      break;
+    case DataType::Type::kInt8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ psubsb(dst, src);
+      break;
+    case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ psubusw(dst, src);
+      break;
+    case DataType::Type::kInt16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ psubsw(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -585,7 +649,7 @@ void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
       __ mulpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -609,7 +673,7 @@ void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
       __ divpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -658,7 +722,7 @@ void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
       __ minpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -707,7 +771,7 @@ void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
       __ maxpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -742,7 +806,7 @@ void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
       __ andpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -777,7 +841,7 @@ void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
       __ andnpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -812,7 +876,7 @@ void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
       __ orpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -847,7 +911,7 @@ void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
       __ xorpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -865,7 +929,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -894,7 +958,7 @@ void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
       __ psllq(dst, Immediate(static_cast<uint8_t>(value)));
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -919,7 +983,7 @@ void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
       __ psrad(dst, Immediate(static_cast<uint8_t>(value)));
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -948,7 +1012,7 @@ void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
       __ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -985,7 +1049,7 @@ void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
       locations->SetOut(Location::RequiresFpuRegister());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1011,7 +1075,7 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:  // TODO: up to here, and?
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
     case DataType::Type::kInt32:
       DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -1035,7 +1099,7 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction
       __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1056,7 +1120,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1103,7 +1167,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1184,7 +1248,7 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
       is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1220,7 +1284,7 @@ void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
       is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index e2b0485f890..4d31ab68d11 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -49,7 +49,7 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru
                                 : Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -102,7 +102,7 @@ void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar
       __ shufpd(dst, dst, Immediate(0));
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -126,7 +126,7 @@ void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instructio
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -140,7 +140,7 @@ void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* in
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:  // TODO: up to here, and?
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
     case DataType::Type::kInt32:
       DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -157,7 +157,7 @@ void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* in
       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -179,7 +179,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
       locations->SetOut(Location::RequiresFpuRegister());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -241,12 +241,12 @@ void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
           break;
         case HVecReduce::kMin:
         case HVecReduce::kMax:
-          LOG(FATAL) << "Unsupported SIMD type";
+          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       }
       break;
     }
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -265,7 +265,7 @@ void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
     DCHECK_EQ(4u, instruction->GetVectorLength());
     __ cvtdq2ps(dst, src);
   } else {
-    LOG(FATAL) << "Unsupported SIMD type";
+    LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
   }
 }
 
@@ -311,7 +311,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
       __ subpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -352,7 +352,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
       __ andpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -401,7 +401,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
       __ xorpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -424,7 +424,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -466,7 +466,39 @@ void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
       __ addpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ paddusb(dst, src);
+      break;
+    case DataType::Type::kInt8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ paddsb(dst, src);
+      break;
+    case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ paddusw(dst, src);
+      break;
+    case DataType::Type::kInt16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ paddsw(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -486,14 +518,14 @@ void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruct
   switch (instruction->GetPackedType()) {
     case DataType::Type::kUint8:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-     __ pavgb(dst, src);
-     return;
+      __ pavgb(dst, src);
+      break;
     case DataType::Type::kUint16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       __ pavgw(dst, src);
-      return;
+      break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -535,7 +567,39 @@ void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
       __ subpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ psubusb(dst, src);
+      break;
+    case DataType::Type::kInt8:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ psubsb(dst, src);
+      break;
+    case DataType::Type::kUint16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ psubusw(dst, src);
+      break;
+    case DataType::Type::kInt16:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ psubsw(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -568,7 +632,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
       __ mulpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -592,7 +656,7 @@ void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
       __ divpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -641,7 +705,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
       __ minpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -690,7 +754,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
       __ maxpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -725,7 +789,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
       __ andpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -760,7 +824,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
       __ andnpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -795,7 +859,7 @@ void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
       __ orpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -830,7 +894,7 @@ void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
       __ xorpd(dst, src);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -848,7 +912,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -877,7 +941,7 @@ void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
       __ psllq(dst, Immediate(static_cast<int8_t>(value)));
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -902,7 +966,7 @@ void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
       __ psrad(dst, Immediate(static_cast<int8_t>(value)));
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -931,7 +995,7 @@ void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
       __ psrlq(dst, Immediate(static_cast<int8_t>(value)));
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -963,7 +1027,7 @@ void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
       locations->SetOut(Location::RequiresFpuRegister());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -989,7 +1053,7 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct
     case DataType::Type::kInt8:
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:  // TODO: up to here, and?
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
     case DataType::Type::kInt32:
       DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -1008,7 +1072,7 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct
       __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1029,7 +1093,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1076,7 +1140,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
       }
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1157,7 +1221,7 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
       is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
@@ -1193,7 +1257,7 @@ void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
       is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
       break;
     default:
-      LOG(FATAL) << "Unsupported SIMD type";
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
       UNREACHABLE();
   }
 }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6bf045885d6..82d1fda8789 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -51,6 +51,9 @@ static constexpr int kC2ConditionMask = 0x400;
 
 static constexpr int kFakeReturnRegister = Register(8);
 
+static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
+static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
+
 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
@@ -3802,6 +3805,301 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
   }
 }
 
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+  LocationSummary* locations = new (allocator) LocationSummary(minmax);
+  switch (minmax->GetResultType()) {
+    case DataType::Type::kInt32:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      // Register to use to perform a long subtract to set cc.
+      locations->AddTemp(Location::RequiresRegister());
+      break;
+    case DataType::Type::kFloat32:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      locations->AddTemp(Location::RequiresRegister());
+      break;
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
+                                                    bool is_min,
+                                                    DataType::Type type) {
+  Location op1_loc = locations->InAt(0);
+  Location op2_loc = locations->InAt(1);
+
+  // Shortcut for same input locations.
+  if (op1_loc.Equals(op2_loc)) {
+    // Can return immediately, as op1_loc == out_loc.
+    // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+    //       a copy here.
+    DCHECK(locations->Out().Equals(op1_loc));
+    return;
+  }
+
+  if (type == DataType::Type::kInt64) {
+    // Need to perform a subtract to get the sign right.
+    // op1 is already in the same location as the output.
+    Location output = locations->Out();
+    Register output_lo = output.AsRegisterPairLow<Register>();
+    Register output_hi = output.AsRegisterPairHigh<Register>();
+
+    Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
+    Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
+
+    // The comparison is performed by subtracting the second operand from
+    // the first operand and then setting the status flags in the same
+    // manner as the SUB instruction."
+    __ cmpl(output_lo, op2_lo);
+
+    // Now use a temp and the borrow to finish the subtraction of op2_hi.
+    Register temp = locations->GetTemp(0).AsRegister<Register>();
+    __ movl(temp, output_hi);
+    __ sbbl(temp, op2_hi);
+
+    // Now the condition code is correct.
+    Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
+    __ cmovl(cond, output_lo, op2_lo);
+    __ cmovl(cond, output_hi, op2_hi);
+  } else {
+    DCHECK_EQ(type, DataType::Type::kInt32);
+    Register out = locations->Out().AsRegister<Register>();
+    Register op2 = op2_loc.AsRegister<Register>();
+
+    //  (out := op1)
+    //  out <=? op2
+    //  if out is min jmp done
+    //  out := op2
+    // done:
+
+    __ cmpl(out, op2);
+    Condition cond = is_min ? Condition::kGreater : Condition::kLess;
+    __ cmovl(cond, out, op2);
+  }
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
+                                                   bool is_min,
+                                                   DataType::Type type) {
+  Location op1_loc = locations->InAt(0);
+  Location op2_loc = locations->InAt(1);
+  Location out_loc = locations->Out();
+  XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+  // Shortcut for same input locations.
+  if (op1_loc.Equals(op2_loc)) {
+    DCHECK(out_loc.Equals(op1_loc));
+    return;
+  }
+
+  //  (out := op1)
+  //  out <=? op2
+  //  if Nan jmp Nan_label
+  //  if out is min jmp done
+  //  if op2 is min jmp op2_label
+  //  handle -0/+0
+  //  jmp done
+  // Nan_label:
+  //  out := NaN
+  // op2_label:
+  //  out := op2
+  // done:
+  //
+  // This removes one jmp, but needs to copy one input (op1) to out.
+  //
+  // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+
+  XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+  NearLabel nan, done, op2_label;
+  if (type == DataType::Type::kFloat64) {
+    __ ucomisd(out, op2);
+  } else {
+    DCHECK_EQ(type, DataType::Type::kFloat32);
+    __ ucomiss(out, op2);
+  }
+
+  __ j(Condition::kParityEven, &nan);
+
+  __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+  __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+  // Handle 0.0/-0.0.
+  if (is_min) {
+    if (type == DataType::Type::kFloat64) {
+      __ orpd(out, op2);
+    } else {
+      __ orps(out, op2);
+    }
+  } else {
+    if (type == DataType::Type::kFloat64) {
+      __ andpd(out, op2);
+    } else {
+      __ andps(out, op2);
+    }
+  }
+  __ jmp(&done);
+
+  // NaN handling.
+  __ Bind(&nan);
+  if (type == DataType::Type::kFloat64) {
+    // TODO: Use a constant from the constant table (requires extra input).
+    __ LoadLongConstant(out, kDoubleNaN);
+  } else {
+    Register constant = locations->GetTemp(0).AsRegister<Register>();
+    __ movl(constant, Immediate(kFloatNaN));
+    __ movd(out, constant);
+  }
+  __ jmp(&done);
+
+  // out := op2;
+  __ Bind(&op2_label);
+  if (type == DataType::Type::kFloat64) {
+    __ movsd(out, op2);
+  } else {
+    __ movss(out, op2);
+  }
+
+  // Done.
+  __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+  DataType::Type type = minmax->GetResultType();
+  switch (type) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HMinMax " << type;
+  }
+}
+
+void LocationsBuilderX86::VisitMin(HMin* min) {
+  CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
+  GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderX86::VisitMax(HMax* max) {
+  CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
+  GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderX86::VisitAbs(HAbs* abs) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+  switch (abs->GetResultType()) {
+    case DataType::Type::kInt32:
+      locations->SetInAt(0, Location::RegisterLocation(EAX));
+      locations->SetOut(Location::SameAsFirstInput());
+      locations->AddTemp(Location::RegisterLocation(EDX));
+      break;
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      locations->AddTemp(Location::RequiresRegister());
+      break;
+    case DataType::Type::kFloat32:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      locations->AddTemp(Location::RequiresFpuRegister());
+      locations->AddTemp(Location::RequiresRegister());
+      break;
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      locations->AddTemp(Location::RequiresFpuRegister());
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
+  LocationSummary* locations = abs->GetLocations();
+  switch (abs->GetResultType()) {
+    case DataType::Type::kInt32: {
+      Register out = locations->Out().AsRegister<Register>();
+      DCHECK_EQ(out, EAX);
+      Register temp = locations->GetTemp(0).AsRegister<Register>();
+      DCHECK_EQ(temp, EDX);
+      // Sign extend EAX into EDX.
+      __ cdq();
+      // XOR EAX with sign.
+      __ xorl(EAX, EDX);
+      // Subtract out sign to correct.
+      __ subl(EAX, EDX);
+      // The result is in EAX.
+      break;
+    }
+    case DataType::Type::kInt64: {
+      Location input = locations->InAt(0);
+      Register input_lo = input.AsRegisterPairLow<Register>();
+      Register input_hi = input.AsRegisterPairHigh<Register>();
+      Location output = locations->Out();
+      Register output_lo = output.AsRegisterPairLow<Register>();
+      Register output_hi = output.AsRegisterPairHigh<Register>();
+      Register temp = locations->GetTemp(0).AsRegister<Register>();
+      // Compute the sign into the temporary.
+      __ movl(temp, input_hi);
+      __ sarl(temp, Immediate(31));
+      // Store the sign into the output.
+      __ movl(output_lo, temp);
+      __ movl(output_hi, temp);
+      // XOR the input to the output.
+      __ xorl(output_lo, input_lo);
+      __ xorl(output_hi, input_hi);
+      // Subtract the sign.
+      __ subl(output_lo, temp);
+      __ sbbl(output_hi, temp);
+      break;
+    }
+    case DataType::Type::kFloat32: {
+      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+      Register constant = locations->GetTemp(1).AsRegister<Register>();
+      __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
+      __ movd(temp, constant);
+      __ andps(out, temp);
+      break;
+    }
+    case DataType::Type::kFloat64: {
+      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+      // TODO: Use a constant from the constant table (requires extra input).
+      __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
+      __ andpd(out, temp);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
+  }
+}
+
 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   switch (instruction->GetType()) {
@@ -4534,6 +4832,15 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+      Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+                                                                temp.AsRegister<Register>());
+      __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
+      RecordBootImageRelRoPatch(
+          invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(),
+          GetBootImageOffset(invoke));
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
       Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
                                                                 temp.AsRegister<Register>());
@@ -4595,6 +4902,13 @@ void CodeGeneratorX86::GenerateVirtualCall(
   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
 }
 
+void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
+                                                 uint32_t boot_image_offset) {
+  boot_image_method_patches_.emplace_back(
+      method_address, /* target_dex_file */ nullptr, boot_image_offset);
+  __ Bind(&boot_image_method_patches_.back().label);
+}
+
 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
   DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
   HX86ComputeBaseMethodAddress* method_address =
@@ -4664,6 +4978,14 @@ inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
   }
 }
 
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+                                              const DexFile* target_dex_file,
+                                              uint32_t pc_insn_offset,
+                                              uint32_t boot_image_offset) {
+  DCHECK(target_dex_file == nullptr);  // Unused for DataBimgRelRoPatch(), should be null.
+  return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
@@ -4682,11 +5004,10 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke
     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
         boot_image_string_patches_, linker_patches);
   } else {
-    DCHECK(boot_image_method_patches_.empty());
-    EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
-        boot_image_type_patches_, linker_patches);
-    EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
-        boot_image_string_patches_, linker_patches);
+    EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+        boot_image_method_patches_, linker_patches);
+    DCHECK(boot_image_type_patches_.empty());
+    DCHECK(boot_image_string_patches_.empty());
   }
   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
       method_bss_entry_patches_, linker_patches);
@@ -6055,7 +6376,7 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
     case HLoadClass::LoadKind::kReferrersClass:
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-    case HLoadClass::LoadKind::kBootImageClassTable:
+    case HLoadClass::LoadKind::kBootImageRelRo:
     case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -6093,7 +6414,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
 
   if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
       load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
-      load_kind == HLoadClass::LoadKind::kBootImageClassTable ||
+      load_kind == HLoadClass::LoadKind::kBootImageRelRo ||
       load_kind == HLoadClass::LoadKind::kBssEntry) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
@@ -6169,17 +6490,12 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
       __ movl(out, Immediate(address));
       break;
     }
-    case HLoadClass::LoadKind::kBootImageClassTable: {
+    case HLoadClass::LoadKind::kBootImageRelRo: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       Register method_address = locations->InAt(0).AsRegister<Register>();
       __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
-      codegen_->RecordBootImageTypePatch(cls);
-      // Extract the reference from the slot data, i.e. clear the hash bits.
-      int32_t masked_hash = ClassTable::TableSlot::MaskHash(
-          ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
-      if (masked_hash != 0) {
-        __ subl(out, Immediate(masked_hash));
-      }
+      codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
+                                          codegen_->GetBootImageOffset(cls));
       break;
     }
     case HLoadClass::LoadKind::kBssEntry: {
@@ -6255,11 +6571,31 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
   // No need for memory fence, thanks to the X86 memory model.
 }
 
+void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                                                    Register temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if (mask_bits == 16u) {
+    // Compare the bitstring in memory.
+    __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
+    // Compare the bitstring bits using SUB.
+    __ subl(temp, Immediate(path_to_root));
+    // Shift out bits that do not contribute to the comparison.
+    __ shll(temp, Immediate(32u - mask_bits));
+  }
+}
+
 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-    case HLoadString::LoadKind::kBootImageInternTable:
+    case HLoadString::LoadKind::kBootImageRelRo:
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -6278,7 +6614,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
   HLoadString::LoadKind load_kind = load->GetLoadKind();
   if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
-      load_kind == HLoadString::LoadKind::kBootImageInternTable ||
+      load_kind == HLoadString::LoadKind::kBootImageRelRo ||
       load_kind == HLoadString::LoadKind::kBssEntry) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
@@ -6332,11 +6668,12 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
       __ movl(out, Immediate(address));
       return;
     }
-    case HLoadString::LoadKind::kBootImageInternTable: {
+    case HLoadString::LoadKind::kBootImageRelRo: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       Register method_address = locations->InAt(0).AsRegister<Register>();
       __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
-      codegen_->RecordBootImageStringPatch(load);
+      codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
+                                          codegen_->GetBootImageOffset(load));
       return;
     }
     case HLoadString::LoadKind::kBssEntry: {
@@ -6418,8 +6755,8 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
   return 0;
 }
 
-// Interface case has 3 temps, one for holding the number of interfaces, one for the current
-// interface pointer, one for loading the current interface.
+// Interface case has 2 temps, one for holding the number of interfaces, one for the current
+// interface pointer, the current interface is compared in memory.
 // The other checks have one temp for loading the object's class.
 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
@@ -6447,6 +6784,8 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -6455,7 +6794,13 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::Any());
+  }
   // Note that TypeCheckSlowPathX86 uses this "out" register too.
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for some cases.
@@ -6676,6 +7021,21 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
       }
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      __ j(kNotEqual, &zero);
+      __ movl(out, Immediate(1));
+      __ jmp(&done);
+      break;
+    }
   }
 
   if (zero.IsLinked()) {
@@ -6702,12 +7062,14 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
     // Require a register for the interface check since there is a loop that compares the class to
     // a memory address.
     locations->SetInAt(1, Location::RequiresRegister());
+  } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
   } else {
     locations->SetInAt(1, Location::Any());
   }
-  // Note that TypeCheckSlowPathX86 uses this "temp" register too.
-  locations->AddTemp(Location::RequiresRegister());
-  // When read barriers are enabled, we need an additional temporary register for some cases.
+  // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
@@ -6921,6 +7283,19 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
       __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+      break;
+    }
   }
   __ Bind(&done);
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 51e5bca00b6..6c76e27d35f 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -211,6 +211,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
   // the suspend call.
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
@@ -225,6 +226,9 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
   void GenerateShlLong(const Location& loc, int shift);
   void GenerateShrLong(const Location& loc, int shift);
   void GenerateUShrLong(const Location& loc, int shift);
+  void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
+  void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+  void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
 
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
@@ -414,6 +418,8 @@ class CodeGeneratorX86 : public CodeGenerator {
   void GenerateVirtualCall(
       HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
 
+  void RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
+                                 uint32_t boot_image_offset);
   void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke);
   void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke);
   void RecordBootImageTypePatch(HLoadClass* load_class);
@@ -631,17 +637,18 @@ class CodeGeneratorX86 : public CodeGenerator {
   X86Assembler assembler_;
   const X86InstructionSetFeatures& isa_features_;
 
-  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+  // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
   ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_;
   // PC-relative method patch info for kBssEntry.
   ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_;
-  // Type patch locations for kBssEntry.
+  // PC-relative type patch info for kBssEntry.
   ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_;
-  // String patch locations; type depends on configuration (intern table or boot image PIC).
+  // PC-relative String patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_;
-  // String patch locations for kBssEntry.
+  // PC-relative String patch info for kBssEntry.
   ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_;
 
   // Patches for string root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 7be360536b2..322b0cfc4c1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -998,6 +998,13 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+      // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+      __ movl(temp.AsRegister<CpuRegister>(),
+              Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
+      RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
       __ movq(temp.AsRegister<CpuRegister>(),
               Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
@@ -1059,6 +1066,11 @@ void CodeGeneratorX86_64::GenerateVirtualCall(
   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
 }
 
+void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
+  boot_image_method_patches_.emplace_back(/* target_dex_file */ nullptr, boot_image_offset);
+  __ Bind(&boot_image_method_patches_.back().label);
+}
+
 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
   boot_image_method_patches_.emplace_back(
       invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
@@ -1110,6 +1122,14 @@ inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
   }
 }
 
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+                                              const DexFile* target_dex_file,
+                                              uint32_t pc_insn_offset,
+                                              uint32_t boot_image_offset) {
+  DCHECK(target_dex_file == nullptr);  // Unused for DataBimgRelRoPatch(), should be null.
+  return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
@@ -1128,11 +1148,10 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
         boot_image_string_patches_, linker_patches);
   } else {
-    DCHECK(boot_image_method_patches_.empty());
-    EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
-        boot_image_type_patches_, linker_patches);
-    EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
-        boot_image_string_patches_, linker_patches);
+    EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+        boot_image_method_patches_, linker_patches);
+    DCHECK(boot_image_type_patches_.empty());
+    DCHECK(boot_image_string_patches_.empty());
   }
   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
       method_bss_entry_patches_, linker_patches);
@@ -3821,6 +3840,241 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
   }
 }
 
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+  LocationSummary* locations = new (allocator) LocationSummary(minmax);
+  switch (minmax->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      // The following is sub-optimal, but all we can do for now. It would be fine to also accept
+      // the second input to be the output (we can simply swap inputs).
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
+                                                       bool is_min,
+                                                       DataType::Type type) {
+  Location op1_loc = locations->InAt(0);
+  Location op2_loc = locations->InAt(1);
+
+  // Shortcut for same input locations.
+  if (op1_loc.Equals(op2_loc)) {
+    // Can return immediately, as op1_loc == out_loc.
+    // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+    //       a copy here.
+    DCHECK(locations->Out().Equals(op1_loc));
+    return;
+  }
+
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
+
+  //  (out := op1)
+  //  out <=? op2
+  //  if out is min jmp done
+  //  out := op2
+  // done:
+
+  if (type == DataType::Type::kInt64) {
+    __ cmpq(out, op2);
+    __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
+  } else {
+    DCHECK_EQ(type, DataType::Type::kInt32);
+    __ cmpl(out, op2);
+    __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
+  }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
+                                                      bool is_min,
+                                                      DataType::Type type) {
+  Location op1_loc = locations->InAt(0);
+  Location op2_loc = locations->InAt(1);
+  Location out_loc = locations->Out();
+  XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+  // Shortcut for same input locations.
+  if (op1_loc.Equals(op2_loc)) {
+    DCHECK(out_loc.Equals(op1_loc));
+    return;
+  }
+
+  //  (out := op1)
+  //  out <=? op2
+  //  if Nan jmp Nan_label
+  //  if out is min jmp done
+  //  if op2 is min jmp op2_label
+  //  handle -0/+0
+  //  jmp done
+  // Nan_label:
+  //  out := NaN
+  // op2_label:
+  //  out := op2
+  // done:
+  //
+  // This removes one jmp, but needs to copy one input (op1) to out.
+  //
+  // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
+
+  XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+  NearLabel nan, done, op2_label;
+  if (type == DataType::Type::kFloat64) {
+    __ ucomisd(out, op2);
+  } else {
+    DCHECK_EQ(type, DataType::Type::kFloat32);
+    __ ucomiss(out, op2);
+  }
+
+  __ j(Condition::kParityEven, &nan);
+
+  __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+  __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+  // Handle 0.0/-0.0.
+  if (is_min) {
+    if (type == DataType::Type::kFloat64) {
+      __ orpd(out, op2);
+    } else {
+      __ orps(out, op2);
+    }
+  } else {
+    if (type == DataType::Type::kFloat64) {
+      __ andpd(out, op2);
+    } else {
+      __ andps(out, op2);
+    }
+  }
+  __ jmp(&done);
+
+  // NaN handling.
+  __ Bind(&nan);
+  if (type == DataType::Type::kFloat64) {
+    __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
+  } else {
+    __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
+  }
+  __ jmp(&done);
+
+  // out := op2;
+  __ Bind(&op2_label);
+  if (type == DataType::Type::kFloat64) {
+    __ movsd(out, op2);
+  } else {
+    __ movss(out, op2);
+  }
+
+  // Done.
+  __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+  DataType::Type type = minmax->GetResultType();
+  switch (type) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HMinMax " << type;
+  }
+}
+
+void LocationsBuilderX86_64::VisitMin(HMin* min) {
+  CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
+  GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderX86_64::VisitMax(HMax* max) {
+  CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
+  GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+  switch (abs->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      locations->AddTemp(Location::RequiresRegister());
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      locations->AddTemp(Location::RequiresFpuRegister());
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
+  LocationSummary* locations = abs->GetLocations();
+  switch (abs->GetResultType()) {
+    case DataType::Type::kInt32: {
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
+      // Create mask.
+      __ movl(mask, out);
+      __ sarl(mask, Immediate(31));
+      // Add mask.
+      __ addl(out, mask);
+      __ xorl(out, mask);
+      break;
+    }
+    case DataType::Type::kInt64: {
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
+      // Create mask.
+      __ movq(mask, out);
+      __ sarq(mask, Immediate(63));
+      // Add mask.
+      __ addq(out, mask);
+      __ xorq(out, mask);
+      break;
+    }
+    case DataType::Type::kFloat32: {
+      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+      __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
+      __ andps(out, mask);
+      break;
+    }
+    case DataType::Type::kFloat64: {
+      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+      __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
+      __ andpd(out, mask);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
+  }
+}
+
 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   locations->SetInAt(0, Location::Any());
@@ -5462,6 +5716,26 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
   // No need for memory fence, thanks to the x86-64 memory model.
 }
 
+void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                                                       CpuRegister temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if (mask_bits == 16u) {
+    // Compare the bitstring in memory.
+    __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
+    // Compare the bitstring bits using SUB.
+    __ subl(temp, Immediate(path_to_root));
+    // Shift out bits that do not contribute to the comparison.
+    __ shll(temp, Immediate(32u - mask_bits));
+  }
+}
+
 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
     HLoadClass::LoadKind desired_class_load_kind) {
   switch (desired_class_load_kind) {
@@ -5471,7 +5745,7 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
     case HLoadClass::LoadKind::kReferrersClass:
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-    case HLoadClass::LoadKind::kBootImageClassTable:
+    case HLoadClass::LoadKind::kBootImageRelRo:
     case HLoadClass::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -5579,16 +5853,10 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
       break;
     }
-    case HLoadClass::LoadKind::kBootImageClassTable: {
+    case HLoadClass::LoadKind::kBootImageRelRo: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
-      codegen_->RecordBootImageTypePatch(cls);
-      // Extract the reference from the slot data, i.e. clear the hash bits.
-      int32_t masked_hash = ClassTable::TableSlot::MaskHash(
-          ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
-      if (masked_hash != 0) {
-        __ subl(out, Immediate(masked_hash));
-      }
+      codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(cls));
       break;
     }
     case HLoadClass::LoadKind::kBssEntry: {
@@ -5653,7 +5921,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-    case HLoadString::LoadKind::kBootImageInternTable:
+    case HLoadString::LoadKind::kBootImageRelRo:
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -5719,10 +5987,10 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
       return;
     }
-    case HLoadString::LoadKind::kBootImageInternTable: {
+    case HLoadString::LoadKind::kBootImageRelRo: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
-      codegen_->RecordBootImageStringPatch(load);
+      codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(load));
       return;
     }
     case HLoadString::LoadKind::kBssEntry: {
@@ -5795,24 +6063,26 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
-static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
-  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
-    // We need a temporary for holding the iftable length.
-    return true;
-  }
-  return kEmitCompilerReadBarrier &&
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (kEmitCompilerReadBarrier &&
       !kUseBakerReadBarrier &&
       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
 }
 
-static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
-  return kEmitCompilerReadBarrier &&
-      !kUseBakerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+// Interface case has 2 temps, one for holding the number of interfaces, one for the current
+// interface pointer, the current interface is compared in memory.
+// The other checks have one temp for loading the object's class.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+    return 2;
+  }
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
 }
 
 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5834,6 +6104,8 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -5842,14 +6114,16 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::Any());
+  }
   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
   locations->SetOut(Location::RequiresRegister());
-  // When read barriers are enabled, we need a temporary register for
-  // some cases.
-  if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5860,9 +6134,9 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   Location cls = locations->InAt(1);
   Location out_loc =  locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
-  Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(0) :
-      Location::NoLocation();
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -6071,6 +6345,27 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
       }
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      if (zero.IsLinked()) {
+        __ j(kNotEqual, &zero);
+        __ movl(out, Immediate(1));
+        __ jmp(&done);
+      } else {
+        __ setcc(kEqual, out);
+        // setcc only sets the low byte.
+        __ andl(out, Immediate(1));
+      }
+      break;
+    }
   }
 
   if (zero.IsLinked()) {
@@ -6097,17 +6392,15 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
     // Require a register for the interface check since there is a loop that compares the class to
     // a memory address.
     locations->SetInAt(1, Location::RequiresRegister());
+  } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
   } else {
     locations->SetInAt(1, Location::Any());
   }
-
-  // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
-  locations->AddTemp(Location::RequiresRegister());
-  // When read barriers are enabled, we need an additional temporary
-  // register for some cases.
-  if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
@@ -6118,9 +6411,10 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
   Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-  Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(1) :
-      Location::NoLocation();
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_GE(num_temps, 1u);
+  DCHECK_LE(num_temps, 2u);
+  Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -6283,7 +6577,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
       break;
     }
 
-    case TypeCheckKind::kInterfaceCheck:
+    case TypeCheckKind::kInterfaceCheck: {
       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
       // We can not get false positives by doing this.
       // /* HeapReference<Class> */ temp = obj->klass_
@@ -6319,6 +6613,20 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
       // If `cls` was poisoned above, unpoison it.
       __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
       break;
+    }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+      break;
+    }
   }
 
   if (done.IsLinked()) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 1079e94dfc2..9a4c53b5240 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -208,6 +208,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
   // the suspend call.
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void GenerateRemFP(HRem* rem);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
@@ -222,6 +223,10 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
+  void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
+  void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+  void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
   // Generate a heap reference load using one register `out`:
   //
   //   out <- *(out + offset)
@@ -410,6 +415,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
   void GenerateVirtualCall(
       HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
 
+  void RecordBootImageRelRoPatch(uint32_t boot_image_offset);
   void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke);
   void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke);
   void RecordBootImageTypePatch(HLoadClass* load_class);
@@ -604,17 +610,18 @@ class CodeGeneratorX86_64 : public CodeGenerator {
   // Used for fixups to the constant area.
   int constant_area_start_;
 
-  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+  // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
   ArenaDeque<PatchInfo<Label>> boot_image_method_patches_;
   // PC-relative method patch info for kBssEntry.
   ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PatchInfo<Label>> boot_image_type_patches_;
-  // Type patch locations for kBssEntry.
+  // PC-relative type patch info for kBssEntry.
   ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_;
-  // String patch locations; type depends on configuration (intern table or boot image PIC).
+  // PC-relative String patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PatchInfo<Label>> boot_image_string_patches_;
-  // String patch locations for kBssEntry.
+  // PC-relative String patch info for kBssEntry.
   ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_;
 
   // Patches for string literals in JIT compiled code.
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index c41c290c8b4..792cfb539a6 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -195,7 +195,9 @@ class InternalCodeAllocator : public CodeAllocator {
   }
 
   size_t GetSize() const { return size_; }
-  uint8_t* GetMemory() const { return memory_.get(); }
+  ArrayRef<const uint8_t> GetMemory() const OVERRIDE {
+    return ArrayRef<const uint8_t>(memory_.get(), size_);
+  }
 
  private:
   size_t size_;
@@ -269,8 +271,8 @@ static void Run(const InternalCodeAllocator& allocator,
   InstructionSet target_isa = codegen.GetInstructionSet();
 
   typedef Expected (*fptr)();
-  CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
-  fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
+  CommonCompilerTest::MakeExecutable(allocator.GetMemory().data(), allocator.GetMemory().size());
+  fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(allocator.GetMemory().data()));
   if (target_isa == InstructionSet::kThumb2) {
     // For thumb we need the bottom bit set.
     f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h
index 4a6c91459fc..be26e67af37 100644
--- a/compiler/optimizing/data_type.h
+++ b/compiler/optimizing/data_type.h
@@ -210,6 +210,12 @@ class DataType {
   static bool IsTypeConversionImplicit(Type input_type, Type result_type);
   static bool IsTypeConversionImplicit(int64_t value, Type result_type);
 
+  static bool IsZeroExtension(Type input_type, Type result_type) {
+    return IsIntOrLongType(result_type) &&
+        IsUnsignedType(input_type) &&
+        Size(result_type) > Size(input_type);
+  }
+
   static const char* PrettyDescriptor(Type type);
 
  private:
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index c88baa8610f..fbcbe3608e6 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -25,6 +25,11 @@
 #include "base/bit_vector-inl.h"
 #include "base/scoped_arena_allocator.h"
 #include "base/scoped_arena_containers.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "obj_ptr-inl.h"
+#include "scoped_thread_state_change-inl.h"
+#include "subtype_check.h"
 
 namespace art {
 
@@ -548,30 +553,85 @@ void GraphChecker::VisitReturnVoid(HReturnVoid* ret) {
   }
 }
 
-void GraphChecker::VisitCheckCast(HCheckCast* check) {
-  VisitInstruction(check);
-  HInstruction* input = check->InputAt(1);
-  if (!input->IsLoadClass()) {
-    AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
+void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check,
+                                                size_t input_pos,
+                                                bool check_value,
+                                                uint32_t expected_value,
+                                                const char* name) {
+  if (!check->InputAt(input_pos)->IsIntConstant()) {
+    AddError(StringPrintf("%s:%d (bitstring) expects a HIntConstant input %zu (%s), not %s:%d.",
                           check->DebugName(),
                           check->GetId(),
-                          input->DebugName(),
-                          input->GetId()));
+                          input_pos,
+                          name,
+                          check->InputAt(2)->DebugName(),
+                          check->InputAt(2)->GetId()));
+  } else if (check_value) {
+    uint32_t actual_value =
+        static_cast<uint32_t>(check->InputAt(input_pos)->AsIntConstant()->GetValue());
+    if (actual_value != expected_value) {
+      AddError(StringPrintf("%s:%d (bitstring) has %s 0x%x, not 0x%x as expected.",
+                            check->DebugName(),
+                            check->GetId(),
+                            name,
+                            actual_value,
+                            expected_value));
+    }
   }
 }
 
-void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) {
-  VisitInstruction(instruction);
-  HInstruction* input = instruction->InputAt(1);
-  if (!input->IsLoadClass()) {
-    AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
-                          instruction->DebugName(),
-                          instruction->GetId(),
-                          input->DebugName(),
-                          input->GetId()));
+void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) {
+  VisitInstruction(check);
+  HInstruction* input = check->InputAt(1);
+  if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+    if (!input->IsNullConstant()) {
+      AddError(StringPrintf("%s:%d (bitstring) expects a HNullConstant as second input, not %s:%d.",
+                            check->DebugName(),
+                            check->GetId(),
+                            input->DebugName(),
+                            input->GetId()));
+    }
+    bool check_values = false;
+    BitString::StorageType expected_path_to_root = 0u;
+    BitString::StorageType expected_mask = 0u;
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      ObjPtr<mirror::Class> klass = check->GetClass().Get();
+      MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+      SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass);
+      if (state == SubtypeCheckInfo::kAssigned) {
+        expected_path_to_root =
+            SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass);
+        expected_mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass);
+        check_values = true;
+      } else {
+        AddError(StringPrintf("%s:%d (bitstring) references a class with unassigned bitstring.",
+                              check->DebugName(),
+                              check->GetId()));
+      }
+    }
+    CheckTypeCheckBitstringInput(
+        check, /* input_pos */ 2, check_values, expected_path_to_root, "path_to_root");
+    CheckTypeCheckBitstringInput(check, /* input_pos */ 3, check_values, expected_mask, "mask");
+  } else {
+    if (!input->IsLoadClass()) {
+      AddError(StringPrintf("%s:%d (classic) expects a HLoadClass as second input, not %s:%d.",
+                            check->DebugName(),
+                            check->GetId(),
+                            input->DebugName(),
+                            input->GetId()));
+    }
   }
 }
 
+void GraphChecker::VisitCheckCast(HCheckCast* check) {
+  HandleTypeCheckInstruction(check);
+}
+
+void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) {
+  HandleTypeCheckInstruction(instruction);
+}
+
 void GraphChecker::HandleLoop(HBasicBlock* loop_header) {
   int id = loop_header->GetBlockId();
   HLoopInformation* loop_information = loop_header->GetLoopInformation();
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 0f0b49d240a..dbedc405185 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -71,6 +71,12 @@ class GraphChecker : public HGraphDelegateVisitor {
   void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
   void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
 
+  void CheckTypeCheckBitstringInput(HTypeCheckInstruction* check,
+                                    size_t input_pos,
+                                    bool check_value,
+                                    uint32_t expected_value,
+                                    const char* name);
+  void HandleTypeCheckInstruction(HTypeCheckInstruction* instruction);
   void HandleLoop(HBasicBlock* loop_header);
   void HandleBooleanInput(HInstruction* instruction, size_t input_index);
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 5ff31cead58..54d46445804 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -390,16 +390,23 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
     StartAttributeStream("load_kind") << load_string->GetLoadKind();
   }
 
-  void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
-    StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind();
+  void HandleTypeCheckInstruction(HTypeCheckInstruction* check) {
+    StartAttributeStream("check_kind") << check->GetTypeCheckKind();
     StartAttributeStream("must_do_null_check") << std::boolalpha
-        << check_cast->MustDoNullCheck() << std::noboolalpha;
+        << check->MustDoNullCheck() << std::noboolalpha;
+    if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+      StartAttributeStream("path_to_root") << std::hex
+          << "0x" << check->GetBitstringPathToRoot() << std::dec;
+      StartAttributeStream("mask") << std::hex << "0x" << check->GetBitstringMask() << std::dec;
+    }
+  }
+
+  void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
+    HandleTypeCheckInstruction(check_cast);
   }
 
   void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE {
-    StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind();
-    StartAttributeStream("must_do_null_check") << std::boolalpha
-        << instance_of->MustDoNullCheck() << std::noboolalpha;
+    HandleTypeCheckInstruction(instance_of);
   }
 
   void VisitArrayLength(HArrayLength* array_length) OVERRIDE {
@@ -576,6 +583,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
       }
       StartAttributeStream() << input_list;
     }
+    if (instruction->GetDexPc() != kNoDexPc) {
+      StartAttributeStream("dex_pc") << instruction->GetDexPc();
+    } else {
+      StartAttributeStream("dex_pc") << "n/a";
+    }
     instruction->Accept(this);
     if (instruction->HasEnvironment()) {
       StringList envs;
@@ -641,20 +653,32 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
           << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha;
     }
 
+    // For the builder and the inliner, we want to add extra information on HInstructions
+    // that have reference types, and also HInstanceOf/HCheckcast.
     if ((IsPass(HGraphBuilder::kBuilderPassName)
         || IsPass(HInliner::kInlinerPassName))
-        && (instruction->GetType() == DataType::Type::kReference)) {
-      ReferenceTypeInfo info = instruction->IsLoadClass()
-        ? instruction->AsLoadClass()->GetLoadedClassRTI()
-        : instruction->GetReferenceTypeInfo();
+        && (instruction->GetType() == DataType::Type::kReference ||
+            instruction->IsInstanceOf() ||
+            instruction->IsCheckCast())) {
+      ReferenceTypeInfo info = (instruction->GetType() == DataType::Type::kReference)
+          ? instruction->IsLoadClass()
+              ? instruction->AsLoadClass()->GetLoadedClassRTI()
+              : instruction->GetReferenceTypeInfo()
+          : instruction->IsInstanceOf()
+              ? instruction->AsInstanceOf()->GetTargetClassRTI()
+              : instruction->AsCheckCast()->GetTargetClassRTI();
       ScopedObjectAccess soa(Thread::Current());
       if (info.IsValid()) {
         StartAttributeStream("klass")
             << mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get());
-        StartAttributeStream("can_be_null")
-            << std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
+        if (instruction->GetType() == DataType::Type::kReference) {
+          StartAttributeStream("can_be_null")
+              << std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
+        }
         StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
-      } else if (instruction->IsLoadClass()) {
+      } else if (instruction->IsLoadClass() ||
+                 instruction->IsInstanceOf() ||
+                 instruction->IsCheckCast()) {
         StartAttributeStream("klass") << "unresolved";
       } else {
         // The NullConstant may be added to the graph during other passes that happen between
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 99dec112400..55eca2316a1 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -78,22 +78,15 @@ static bool IsGEZero(HInstruction* instruction) {
   DCHECK(instruction != nullptr);
   if (instruction->IsArrayLength()) {
     return true;
-  } else if (instruction->IsInvokeStaticOrDirect()) {
-    switch (instruction->AsInvoke()->GetIntrinsic()) {
-      case Intrinsics::kMathMinIntInt:
-      case Intrinsics::kMathMinLongLong:
-        // Instruction MIN(>=0, >=0) is >= 0.
-        return IsGEZero(instruction->InputAt(0)) &&
-               IsGEZero(instruction->InputAt(1));
-      case Intrinsics::kMathAbsInt:
-      case Intrinsics::kMathAbsLong:
-        // Instruction ABS(>=0) is >= 0.
-        // NOTE: ABS(minint) = minint prevents assuming
-        //       >= 0 without looking at the argument.
-        return IsGEZero(instruction->InputAt(0));
-      default:
-        break;
-    }
+  } else if (instruction->IsMin()) {
+    // Instruction MIN(>=0, >=0) is >= 0.
+    return IsGEZero(instruction->InputAt(0)) &&
+           IsGEZero(instruction->InputAt(1));
+  } else if (instruction->IsAbs()) {
+    // Instruction ABS(>=0) is >= 0.
+    // NOTE: ABS(minint) = minint prevents assuming
+    //       >= 0 without looking at the argument.
+    return IsGEZero(instruction->InputAt(0));
   }
   int64_t value = -1;
   return IsInt64AndGet(instruction, &value) && value >= 0;
@@ -102,21 +95,14 @@ static bool IsGEZero(HInstruction* instruction) {
 /** Hunts "under the hood" for a suitable instruction at the hint. */
 static bool IsMaxAtHint(
     HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) {
-  if (instruction->IsInvokeStaticOrDirect()) {
-    switch (instruction->AsInvoke()->GetIntrinsic()) {
-      case Intrinsics::kMathMinIntInt:
-      case Intrinsics::kMathMinLongLong:
-        // For MIN(x, y), return most suitable x or y as maximum.
-        return IsMaxAtHint(instruction->InputAt(0), hint, suitable) ||
-               IsMaxAtHint(instruction->InputAt(1), hint, suitable);
-      default:
-        break;
-    }
+  if (instruction->IsMin()) {
+    // For MIN(x, y), return most suitable x or y as maximum.
+    return IsMaxAtHint(instruction->InputAt(0), hint, suitable) ||
+           IsMaxAtHint(instruction->InputAt(1), hint, suitable);
   } else {
     *suitable = instruction;
     return HuntForDeclaration(instruction) == hint;
   }
-  return false;
 }
 
 /** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */
@@ -365,14 +351,16 @@ void InductionVarRange::Replace(HInstruction* instruction,
   }
 }
 
-bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const {
-  HInductionVarAnalysis::InductionInfo *trip =
-      induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
-  if (trip != nullptr && !IsUnsafeTripCount(trip)) {
-    IsConstant(trip->op_a, kExact, tc);
-    return true;
-  }
-  return false;
+bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const {
+  bool is_constant_unused = false;
+  return CheckForFiniteAndConstantProps(loop, &is_constant_unused, trip_count);
+}
+
+bool InductionVarRange::HasKnownTripCount(HLoopInformation* loop,
+                                          /*out*/ int64_t* trip_count) const {
+  bool is_constant = false;
+  CheckForFiniteAndConstantProps(loop, &is_constant, trip_count);
+  return is_constant;
 }
 
 bool InductionVarRange::IsUnitStride(HInstruction* context,
@@ -431,6 +419,18 @@ HInstruction* InductionVarRange::GenerateTripCount(HLoopInformation* loop,
 // Private class methods.
 //
 
+bool InductionVarRange::CheckForFiniteAndConstantProps(HLoopInformation* loop,
+                                                       /*out*/ bool* is_constant,
+                                                       /*out*/ int64_t* trip_count) const {
+  HInductionVarAnalysis::InductionInfo *trip =
+      induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
+  if (trip != nullptr && !IsUnsafeTripCount(trip)) {
+    *is_constant = IsConstant(trip->op_a, kExact, trip_count);
+    return true;
+  }
+  return false;
+}
+
 bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info,
                                    ConstantRequest request,
                                    /*out*/ int64_t* value) const {
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 0b980f596a3..906dc6bb7b9 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -161,9 +161,15 @@ class InductionVarRange {
   }
 
   /**
-   * Checks if header logic of a loop terminates. Sets trip-count tc if known.
+   * Checks if header logic of a loop terminates. If trip count is known sets 'trip_count' to its
+   * value.
    */
-  bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const;
+  bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const;
+
+  /**
+   * Checks if a trip count is known for the loop and sets 'trip_count' to its value in this case.
+   */
+  bool HasKnownTripCount(HLoopInformation* loop, /*out*/ int64_t* trip_count) const;
 
   /**
    * Checks if the given instruction is a unit stride induction inside the closest enveloping
@@ -194,6 +200,14 @@ class InductionVarRange {
   };
 
   /**
+   * Checks if header logic of a loop terminates. If trip count is known (constant) sets
+   * 'is_constant' to true and 'trip_count' to the trip count value.
+   */
+  bool CheckForFiniteAndConstantProps(HLoopInformation* loop,
+                                      /*out*/ bool* is_constant,
+                                      /*out*/ int64_t* trip_count) const;
+
+  /**
    * Returns true if exact or upper/lower bound on the given induction
    * information is known as a 64-bit constant, which is returned in value.
    */
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 4fc7262265e..8b10a78212e 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -147,10 +147,11 @@ void HInliner::Run() {
   //   that this method is actually inlined;
   // - if a method's name contains the substring "$noinline$", do not
   //   inline that method.
-  // We limit this to AOT compilation, as the JIT may or may not inline
+  // We limit the latter to AOT compilation, as the JIT may or may not inline
   // depending on the state of classes at runtime.
-  const bool honor_inlining_directives =
-      IsCompilingWithCoreImage() && Runtime::Current()->IsAotCompiler();
+  const bool honor_noinline_directives = IsCompilingWithCoreImage();
+  const bool honor_inline_directives =
+      honor_noinline_directives && Runtime::Current()->IsAotCompiler();
 
   // Keep a copy of all blocks when starting the visit.
   ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder();
@@ -164,18 +165,19 @@ void HInliner::Run() {
       HInvoke* call = instruction->AsInvoke();
       // As long as the call is not intrinsified, it is worth trying to inline.
       if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) {
-        if (honor_inlining_directives) {
+        if (honor_noinline_directives) {
           // Debugging case: directives in method names control or assert on inlining.
           std::string callee_name = outer_compilation_unit_.GetDexFile()->PrettyMethod(
               call->GetDexMethodIndex(), /* with_signature */ false);
           // Tests prevent inlining by having $noinline$ in their method names.
           if (callee_name.find("$noinline$") == std::string::npos) {
-            if (!TryInline(call)) {
+            if (!TryInline(call) && honor_inline_directives) {
               bool should_have_inlined = (callee_name.find("$inline$") != std::string::npos);
               CHECK(!should_have_inlined) << "Could not inline " << callee_name;
             }
           }
         } else {
+          DCHECK(!honor_inline_directives);
           // Normal case: try to inline.
           TryInline(call);
         }
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index c7aef3779d1..9647dd5d41c 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1815,29 +1815,6 @@ void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object,
   }
 }
 
-static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  if (cls == nullptr) {
-    return TypeCheckKind::kUnresolvedCheck;
-  } else if (cls->IsInterface()) {
-    return TypeCheckKind::kInterfaceCheck;
-  } else if (cls->IsArrayClass()) {
-    if (cls->GetComponentType()->IsObjectClass()) {
-      return TypeCheckKind::kArrayObjectCheck;
-    } else if (cls->CannotBeAssignedFromOtherTypes()) {
-      return TypeCheckKind::kExactCheck;
-    } else {
-      return TypeCheckKind::kArrayCheck;
-    }
-  } else if (cls->IsFinal()) {
-    return TypeCheckKind::kExactCheck;
-  } else if (cls->IsAbstract()) {
-    return TypeCheckKind::kAbstractClassCheck;
-  } else {
-    return TypeCheckKind::kClassHierarchyCheck;
-  }
-}
-
 void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) {
   HLoadString* load_string =
       new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc);
@@ -1852,22 +1829,8 @@ void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_
 HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) {
   ScopedObjectAccess soa(Thread::Current());
   const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
-  Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
-  Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass(
-      soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_));
-
-  bool needs_access_check = true;
-  if (klass != nullptr) {
-    if (klass->IsPublic()) {
-      needs_access_check = false;
-    } else {
-      ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
-      if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) {
-        needs_access_check = false;
-      }
-    }
-  }
-
+  Handle<mirror::Class> klass = ResolveClass(soa, type_index);
+  bool needs_access_check = LoadClassNeedsAccessCheck(klass);
   return BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check);
 }
 
@@ -1912,25 +1875,83 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index,
   return load_class;
 }
 
+Handle<mirror::Class> HInstructionBuilder::ResolveClass(ScopedObjectAccess& soa,
+                                                        dex::TypeIndex type_index) {
+  Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
+  ObjPtr<mirror::Class> klass = compiler_driver_->ResolveClass(
+      soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_);
+  // TODO: Avoid creating excessive handles if the method references the same class repeatedly.
+  // (Use a map on the local_allocator_.)
+  return handles_->NewHandle(klass);
+}
+
+bool HInstructionBuilder::LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) {
+  if (klass == nullptr) {
+    return true;
+  } else if (klass->IsPublic()) {
+    return false;
+  } else {
+    ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
+    return compiling_class == nullptr || !compiling_class->CanAccess(klass.Get());
+  }
+}
+
 void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,
                                          uint8_t destination,
                                          uint8_t reference,
                                          dex::TypeIndex type_index,
                                          uint32_t dex_pc) {
   HInstruction* object = LoadLocal(reference, DataType::Type::kReference);
-  HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
 
   ScopedObjectAccess soa(Thread::Current());
-  TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass());
+  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
+  Handle<mirror::Class> klass = ResolveClass(soa, type_index);
+  bool needs_access_check = LoadClassNeedsAccessCheck(klass);
+  TypeCheckKind check_kind = HSharpening::ComputeTypeCheckKind(
+      klass.Get(), code_generator_, compiler_driver_, needs_access_check);
+
+  HInstruction* class_or_null = nullptr;
+  HIntConstant* bitstring_path_to_root = nullptr;
+  HIntConstant* bitstring_mask = nullptr;
+  if (check_kind == TypeCheckKind::kBitstringCheck) {
+    // TODO: Allow using the bitstring check also if we need an access check.
+    DCHECK(!needs_access_check);
+    class_or_null = graph_->GetNullConstant(dex_pc);
+    MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+    uint32_t path_to_root =
+        SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass.Get());
+    uint32_t mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass.Get());
+    bitstring_path_to_root = graph_->GetIntConstant(static_cast<int32_t>(path_to_root), dex_pc);
+    bitstring_mask = graph_->GetIntConstant(static_cast<int32_t>(mask), dex_pc);
+  } else {
+    class_or_null = BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check);
+  }
+  DCHECK(class_or_null != nullptr);
+
   if (instruction.Opcode() == Instruction::INSTANCE_OF) {
-    AppendInstruction(new (allocator_) HInstanceOf(object, cls, check_kind, dex_pc));
+    AppendInstruction(new (allocator_) HInstanceOf(object,
+                                                   class_or_null,
+                                                   check_kind,
+                                                   klass,
+                                                   dex_pc,
+                                                   allocator_,
+                                                   bitstring_path_to_root,
+                                                   bitstring_mask));
     UpdateLocal(destination, current_block_->GetLastInstruction());
   } else {
     DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
     // We emit a CheckCast followed by a BoundType. CheckCast is a statement
     // which may throw. If it succeeds BoundType sets the new type of `object`
     // for all subsequent uses.
-    AppendInstruction(new (allocator_) HCheckCast(object, cls, check_kind, dex_pc));
+    AppendInstruction(
+        new (allocator_) HCheckCast(object,
+                                    class_or_null,
+                                    check_kind,
+                                    klass,
+                                    dex_pc,
+                                    allocator_,
+                                    bitstring_path_to_root,
+                                    bitstring_mask));
     AppendInstruction(new (allocator_) HBoundType(object, dex_pc));
     UpdateLocal(reference, current_block_->GetLastInstruction());
   }
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 4428c532779..f78829232d4 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -39,6 +39,7 @@ class DexCompilationUnit;
 class HBasicBlockBuilder;
 class Instruction;
 class OptimizingCompilerStats;
+class ScopedObjectAccess;
 class SsaBuilder;
 class VariableSizedHandleScope;
 
@@ -232,6 +233,12 @@ class HInstructionBuilder : public ValueObject {
                              bool needs_access_check)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  Handle<mirror::Class> ResolveClass(ScopedObjectAccess& soa, dex::TypeIndex type_index)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  bool LoadClassNeedsAccessCheck(Handle<mirror::Class> klass)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Returns the outer-most compiling method's class.
   ObjPtr<mirror::Class> GetOutermostCompilingClass() const;
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index a42a85dc1d2..d3cf9568c2d 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -67,7 +67,6 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
   bool TryCombineVecMultiplyAccumulate(HVecMul* mul);
 
   void VisitShift(HBinaryOperation* shift);
-
   void VisitEqual(HEqual* equal) OVERRIDE;
   void VisitNotEqual(HNotEqual* equal) OVERRIDE;
   void VisitBooleanNot(HBooleanNot* bool_not) OVERRIDE;
@@ -78,6 +77,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
   void VisitNullCheck(HNullCheck* instruction) OVERRIDE;
   void VisitArrayLength(HArrayLength* instruction) OVERRIDE;
   void VisitCheckCast(HCheckCast* instruction) OVERRIDE;
+  void VisitAbs(HAbs* instruction) OVERRIDE;
   void VisitAdd(HAdd* instruction) OVERRIDE;
   void VisitAnd(HAnd* instruction) OVERRIDE;
   void VisitCondition(HCondition* instruction) OVERRIDE;
@@ -120,6 +120,9 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
   void SimplifyReturnThis(HInvoke* invoke);
   void SimplifyAllocationIntrinsic(HInvoke* invoke);
   void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
+  void SimplifyMin(HInvoke* invoke, DataType::Type type);
+  void SimplifyMax(HInvoke* invoke, DataType::Type type);
+  void SimplifyAbs(HInvoke* invoke, DataType::Type type);
 
   CodeGenerator* codegen_;
   CompilerDriver* compiler_driver_;
@@ -576,7 +579,9 @@ bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInst
 
 // Returns whether doing a type test between the class of `object` against `klass` has
 // a statically known outcome. The result of the test is stored in `outcome`.
-static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) {
+static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti,
+                                     HInstruction* object,
+                                     /*out*/bool* outcome) {
   DCHECK(!object->IsNullConstant()) << "Null constants should be special cased";
   ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo();
   ScopedObjectAccess soa(Thread::Current());
@@ -586,7 +591,6 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo
     return false;
   }
 
-  ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI();
   if (!class_rti.IsValid()) {
     // Happens when the loaded class is unresolved.
     return false;
@@ -611,8 +615,8 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo
 
 void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
   HInstruction* object = check_cast->InputAt(0);
-  HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
-  if (load_class->NeedsAccessCheck()) {
+  if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck &&
+      check_cast->GetTargetClass()->NeedsAccessCheck()) {
     // If we need to perform an access check we cannot remove the instruction.
     return;
   }
@@ -630,15 +634,18 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
   // Note: The `outcome` is initialized to please valgrind - the compiler can reorder
   // the return value check with the `outcome` check, b/27651442 .
   bool outcome = false;
-  if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
+  if (TypeCheckHasKnownOutcome(check_cast->GetTargetClassRTI(), object, &outcome)) {
     if (outcome) {
       check_cast->GetBlock()->RemoveInstruction(check_cast);
       MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast);
-      if (!load_class->HasUses()) {
-        // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
-        // However, here we know that it cannot because the checkcast was successfull, hence
-        // the class was already loaded.
-        load_class->GetBlock()->RemoveInstruction(load_class);
+      if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) {
+        HLoadClass* load_class = check_cast->GetTargetClass();
+        if (!load_class->HasUses()) {
+          // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
+          // However, here we know that it cannot because the checkcast was successfull, hence
+          // the class was already loaded.
+          load_class->GetBlock()->RemoveInstruction(load_class);
+        }
       }
     } else {
       // Don't do anything for exceptional cases for now. Ideally we should remove
@@ -649,8 +656,8 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
 
 void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
   HInstruction* object = instruction->InputAt(0);
-  HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass();
-  if (load_class->NeedsAccessCheck()) {
+  if (instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck &&
+      instruction->GetTargetClass()->NeedsAccessCheck()) {
     // If we need to perform an access check we cannot remove the instruction.
     return;
   }
@@ -673,7 +680,7 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
   // Note: The `outcome` is initialized to please valgrind - the compiler can reorder
   // the return value check with the `outcome` check, b/27651442 .
   bool outcome = false;
-  if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
+  if (TypeCheckHasKnownOutcome(instruction->GetTargetClassRTI(), object, &outcome)) {
     MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf);
     if (outcome && can_be_null) {
       // Type test will succeed, we just need a null test.
@@ -686,11 +693,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
     }
     RecordSimplification();
     instruction->GetBlock()->RemoveInstruction(instruction);
-    if (outcome && !load_class->HasUses()) {
-      // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
-      // However, here we know that it cannot because the instanceof check was successfull, hence
-      // the class was already loaded.
-      load_class->GetBlock()->RemoveInstruction(load_class);
+    if (outcome && instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) {
+      HLoadClass* load_class = instruction->GetTargetClass();
+      if (!load_class->HasUses()) {
+        // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
+        // However, here we know that it cannot because the instanceof check was successfull, hence
+        // the class was already loaded.
+        load_class->GetBlock()->RemoveInstruction(load_class);
+      }
     }
   }
 }
@@ -849,35 +859,29 @@ void InstructionSimplifierVisitor::VisitBooleanNot(HBooleanNot* bool_not) {
 static HInstruction* NewIntegralAbs(ArenaAllocator* allocator,
                                     HInstruction* x,
                                     HInstruction* cursor) {
-  DataType::Type type = x->GetType();
-  DCHECK(type == DataType::Type::kInt32 || type ==  DataType::Type::kInt64);
-  // Construct a fake intrinsic with as much context as is needed to allocate one.
-  // The intrinsic will always be lowered into code later anyway.
-  // TODO: b/65164101 : moving towards a real HAbs node makes more sense.
-  HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
-    HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress,
-    HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-    0u
-  };
-  HInvokeStaticOrDirect* invoke = new (allocator) HInvokeStaticOrDirect(
-      allocator,
-      1,
-      type,
-      x->GetDexPc(),
-      /*method_idx*/ -1,
-      /*resolved_method*/ nullptr,
-      dispatch_info,
-      kStatic,
-      MethodReference(nullptr, dex::kDexNoIndex),
-      HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
-  invoke->SetArgumentAt(0, x);
-  invoke->SetIntrinsic(type == DataType::Type::kInt32 ? Intrinsics::kMathAbsInt
-                                                      : Intrinsics::kMathAbsLong,
-                       kNoEnvironmentOrCache,
-                       kNoSideEffects,
-                       kNoThrow);
-  cursor->GetBlock()->InsertInstructionBefore(invoke, cursor);
-  return invoke;
+  DataType::Type type = DataType::Kind(x->GetType());
+  DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
+  HAbs* abs = new (allocator) HAbs(type, x, cursor->GetDexPc());
+  cursor->GetBlock()->InsertInstructionBefore(abs, cursor);
+  return abs;
+}
+
+// Constructs a new MIN/MAX(x, y) node in the HIR.
+static HInstruction* NewIntegralMinMax(ArenaAllocator* allocator,
+                                       HInstruction* x,
+                                       HInstruction* y,
+                                       HInstruction* cursor,
+                                       bool is_min) {
+  DataType::Type type = DataType::Kind(x->GetType());
+  DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
+  HBinaryOperation* minmax = nullptr;
+  if (is_min) {
+    minmax = new (allocator) HMin(type, x, y, cursor->GetDexPc());
+  } else {
+    minmax = new (allocator) HMax(type, x, y, cursor->GetDexPc());
+  }
+  cursor->GetBlock()->InsertInstructionBefore(minmax, cursor);
+  return minmax;
 }
 
 // Returns true if operands a and b consists of widening type conversions
@@ -899,6 +903,30 @@ static bool AreLowerPrecisionArgs(DataType::Type to_type, HInstruction* a, HInst
           to_type == DataType::Type::kInt64);
 }
 
+// Returns an acceptable substitution for "a" on the select
+// construct "a <cmp> b ? c : .."  during MIN/MAX recognition.
+static HInstruction* AllowInMinMax(IfCondition cmp,
+                                   HInstruction* a,
+                                   HInstruction* b,
+                                   HInstruction* c) {
+  int64_t value = 0;
+  if (IsInt64AndGet(b, /*out*/ &value) &&
+      (((cmp == kCondLT || cmp == kCondLE) && c->IsMax()) ||
+       ((cmp == kCondGT || cmp == kCondGE) && c->IsMin()))) {
+    HConstant* other = c->AsBinaryOperation()->GetConstantRight();
+    if (other != nullptr && a == c->AsBinaryOperation()->GetLeastConstantLeft()) {
+      int64_t other_value = Int64FromConstant(other);
+      bool is_max = (cmp == kCondLT || cmp == kCondLE);
+      // Allow the max for a <  100 ? max(a, -100) : ..
+      //    or the min for a > -100 ? min(a,  100) : ..
+      if (is_max ? (value >= other_value) : (value <= other_value)) {
+        return c;
+      }
+    }
+  }
+  return nullptr;
+}
+
 void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
   HInstruction* replace_with = nullptr;
   HInstruction* condition = select->GetCondition();
@@ -942,23 +970,35 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
     DataType::Type t_type = true_value->GetType();
     DataType::Type f_type = false_value->GetType();
     // Here we have a <cmp> b ? true_value : false_value.
-    // Test if both values are same-typed int or long.
-    if (t_type == f_type &&
-        (t_type == DataType::Type::kInt32 || t_type == DataType::Type::kInt64)) {
-      // Try to replace typical integral ABS constructs.
-      if (true_value->IsNeg()) {
-        HInstruction* negated = true_value->InputAt(0);
-        if ((cmp == kCondLT || cmp == kCondLE) &&
-            (a == negated && a == false_value && IsInt64Value(b, 0))) {
-          // Found a < 0 ? -a : a which can be replaced by ABS(a).
-          replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), false_value, select);
-        }
-      } else if (false_value->IsNeg()) {
-        HInstruction* negated = false_value->InputAt(0);
-        if ((cmp == kCondGT || cmp == kCondGE) &&
-            (a == true_value && a == negated && IsInt64Value(b, 0))) {
-          // Found a > 0 ? a : -a which can be replaced by ABS(a).
-          replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select);
+    // Test if both values are compatible integral types (resulting MIN/MAX/ABS
+    // type will be int or long, like the condition). Replacements are general,
+    // but assume conditions prefer constants on the right.
+    if (DataType::IsIntegralType(t_type) && DataType::Kind(t_type) == DataType::Kind(f_type)) {
+      // Allow a <  100 ? max(a, -100) : ..
+      //    or a > -100 ? min(a,  100) : ..
+      // to use min/max instead of a to detect nested min/max expressions.
+      HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value);
+      if (new_a != nullptr) {
+        a = new_a;
+      }
+      // Try to replace typical integral MIN/MAX/ABS constructs.
+      if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) &&
+          ((a == true_value && b == false_value) ||
+           (b == true_value && a == false_value))) {
+        // Found a < b ? a : b (MIN) or a < b ? b : a (MAX)
+        //    or a > b ? a : b (MAX) or a > b ? b : a (MIN).
+        bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value);
+        replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min);
+      } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) ||
+                 ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) {
+        bool negLeft = (cmp == kCondLT || cmp == kCondLE);
+        HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0);
+        HInstruction* not_negated = negLeft ? false_value : true_value;
+        if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) {
+          // Found a < 0 ? -a :  a
+          //    or a > 0 ?  a : -a
+          // which can be replaced by ABS(a).
+          replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select);
         }
       } else if (true_value->IsSub() && false_value->IsSub()) {
         HInstruction* true_sub1 = true_value->InputAt(0);
@@ -970,8 +1010,8 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
              ((cmp == kCondLT || cmp == kCondLE) &&
               (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) &&
             AreLowerPrecisionArgs(t_type, a, b)) {
-          // Found a > b ? a - b  : b - a   or
-          //       a < b ? b - a  : a - b
+          // Found a > b ? a - b  : b - a
+          //    or a < b ? b - a  : a - b
           // which can be replaced by ABS(a - b) for lower precision operands a, b.
           replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select);
         }
@@ -1230,6 +1270,17 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct
   }
 }
 
+void InstructionSimplifierVisitor::VisitAbs(HAbs* instruction) {
+  HInstruction* input = instruction->GetInput();
+  if (DataType::IsZeroExtension(input->GetType(), instruction->GetResultType())) {
+    // Zero extension from narrow to wide can never set sign bit in the wider
+    // operand, making the subsequent Abs redundant (e.g., abs(b & 0xff) for byte b).
+    instruction->ReplaceWith(input);
+    instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
+  }
+}
+
 void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
   HConstant* input_cst = instruction->GetConstantRight();
   HInstruction* input_other = instruction->GetLeastConstantLeft();
@@ -2430,6 +2481,27 @@ void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke,
   invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, mem_barrier);
 }
 
+void InstructionSimplifierVisitor::SimplifyMin(HInvoke* invoke, DataType::Type type) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  HMin* min = new (GetGraph()->GetAllocator())
+      HMin(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc());
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, min);
+}
+
+void InstructionSimplifierVisitor::SimplifyMax(HInvoke* invoke, DataType::Type type) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  HMax* max = new (GetGraph()->GetAllocator())
+      HMax(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc());
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, max);
+}
+
+void InstructionSimplifierVisitor::SimplifyAbs(HInvoke* invoke, DataType::Type type) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  HAbs* abs = new (GetGraph()->GetAllocator())
+      HAbs(type, invoke->InputAt(0), invoke->GetDexPc());
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, abs);
+}
+
 void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
   switch (instruction->GetIntrinsic()) {
     case Intrinsics::kStringEquals:
@@ -2513,6 +2585,42 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
     case Intrinsics::kVarHandleStoreStoreFence:
       SimplifyMemBarrier(instruction, MemBarrierKind::kStoreStore);
       break;
+    case Intrinsics::kMathMinIntInt:
+      SimplifyMin(instruction, DataType::Type::kInt32);
+      break;
+    case Intrinsics::kMathMinLongLong:
+      SimplifyMin(instruction, DataType::Type::kInt64);
+      break;
+    case Intrinsics::kMathMinFloatFloat:
+      SimplifyMin(instruction, DataType::Type::kFloat32);
+      break;
+    case Intrinsics::kMathMinDoubleDouble:
+      SimplifyMin(instruction, DataType::Type::kFloat64);
+      break;
+    case Intrinsics::kMathMaxIntInt:
+      SimplifyMax(instruction, DataType::Type::kInt32);
+      break;
+    case Intrinsics::kMathMaxLongLong:
+      SimplifyMax(instruction, DataType::Type::kInt64);
+      break;
+    case Intrinsics::kMathMaxFloatFloat:
+      SimplifyMax(instruction, DataType::Type::kFloat32);
+      break;
+    case Intrinsics::kMathMaxDoubleDouble:
+      SimplifyMax(instruction, DataType::Type::kFloat64);
+      break;
+    case Intrinsics::kMathAbsInt:
+      SimplifyAbs(instruction, DataType::Type::kInt32);
+      break;
+    case Intrinsics::kMathAbsLong:
+      SimplifyAbs(instruction, DataType::Type::kInt64);
+      break;
+    case Intrinsics::kMathAbsFloat:
+      SimplifyAbs(instruction, DataType::Type::kFloat32);
+      break;
+    case Intrinsics::kMathAbsDouble:
+      SimplifyAbs(instruction, DataType::Type::kFloat64);
+      break;
     default:
       break;
   }
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 62991435c7a..1035cbc2c46 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -266,6 +266,18 @@ void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke) {    \
              << " should have been converted to HIR";                    \
 }
 #define UNREACHABLE_INTRINSICS(Arch)                            \
+UNREACHABLE_INTRINSIC(Arch, MathMinIntInt)                      \
+UNREACHABLE_INTRINSIC(Arch, MathMinLongLong)                    \
+UNREACHABLE_INTRINSIC(Arch, MathMinFloatFloat)                  \
+UNREACHABLE_INTRINSIC(Arch, MathMinDoubleDouble)                \
+UNREACHABLE_INTRINSIC(Arch, MathMaxIntInt)                      \
+UNREACHABLE_INTRINSIC(Arch, MathMaxLongLong)                    \
+UNREACHABLE_INTRINSIC(Arch, MathMaxFloatFloat)                  \
+UNREACHABLE_INTRINSIC(Arch, MathMaxDoubleDouble)                \
+UNREACHABLE_INTRINSIC(Arch, MathAbsInt)                         \
+UNREACHABLE_INTRINSIC(Arch, MathAbsLong)                        \
+UNREACHABLE_INTRINSIC(Arch, MathAbsFloat)                       \
+UNREACHABLE_INTRINSIC(Arch, MathAbsDouble)                      \
 UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits)                \
 UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits)             \
 UNREACHABLE_INTRINSIC(Arch, FloatIsNaN)                         \
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 0e6485be9f7..c3d643a7d18 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -344,14 +344,6 @@ void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
 }
 
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
 static void GenNumberOfLeadingZeros(LocationSummary* locations,
                                     DataType::Type type,
                                     MacroAssembler* masm) {
@@ -536,168 +528,6 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
 }
 
-static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
-  Location in = locations->InAt(0);
-  Location out = locations->Out();
-
-  FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in);
-  FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out);
-
-  __ Fabs(out_reg, in_reg);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) {
-  CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) {
-  CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
-}
-
-static void GenAbsInteger(LocationSummary* locations,
-                          bool is64bit,
-                          MacroAssembler* masm) {
-  Location in = locations->InAt(0);
-  Location output = locations->Out();
-
-  Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in);
-  Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output);
-
-  __ Cmp(in_reg, Operand(0));
-  __ Cneg(out_reg, in_reg, lt);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) {
-  CreateIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) {
-  CreateIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
-                        bool is_min,
-                        bool is_double,
-                        MacroAssembler* masm) {
-  Location op1 = locations->InAt(0);
-  Location op2 = locations->InAt(1);
-  Location out = locations->Out();
-
-  FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1);
-  FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2);
-  FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out);
-  if (is_min) {
-    __ Fmin(out_reg, op1_reg, op2_reg);
-  } else {
-    __ Fmax(out_reg, op1_reg, op2_reg);
-  }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresFpuRegister());
-  locations->SetInAt(1, Location::RequiresFpuRegister());
-  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(
-      invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler());
-}
-
-static void GenMinMax(LocationSummary* locations,
-                      bool is_min,
-                      bool is_long,
-                      MacroAssembler* masm) {
-  Location op1 = locations->InAt(0);
-  Location op2 = locations->InAt(1);
-  Location out = locations->Out();
-
-  Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
-  Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2);
-  Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out);
-
-  __ Cmp(op1_reg, op2_reg);
-  __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler());
-}
-
 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
   CreateFPToFPLocations(allocator_, invoke);
 }
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 97a145664c3..29aecbc0975 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -432,341 +432,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invo
   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
 }
 
-static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
-  __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
-  CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
-  CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke, GetAssembler());
-}
-
-static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-
-  locations->AddTemp(Location::RequiresRegister());
-}
-
-static void GenAbsInteger(LocationSummary* locations,
-                          bool is64bit,
-                          ArmVIXLAssembler* assembler) {
-  Location in = locations->InAt(0);
-  Location output = locations->Out();
-
-  vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
-
-  if (is64bit) {
-    vixl32::Register in_reg_lo = LowRegisterFrom(in);
-    vixl32::Register in_reg_hi = HighRegisterFrom(in);
-    vixl32::Register out_reg_lo = LowRegisterFrom(output);
-    vixl32::Register out_reg_hi = HighRegisterFrom(output);
-
-    DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
-
-    __ Asr(mask, in_reg_hi, 31);
-    __ Adds(out_reg_lo, in_reg_lo, mask);
-    __ Adc(out_reg_hi, in_reg_hi, mask);
-    __ Eor(out_reg_lo, mask, out_reg_lo);
-    __ Eor(out_reg_hi, mask, out_reg_hi);
-  } else {
-    vixl32::Register in_reg = RegisterFrom(in);
-    vixl32::Register out_reg = RegisterFrom(output);
-
-    __ Asr(mask, in_reg, 31);
-    __ Add(out_reg, in_reg, mask);
-    __ Eor(out_reg, mask, out_reg);
-  }
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
-  CreateIntToIntPlusTemp(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
-  CreateIntToIntPlusTemp(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
-  ArmVIXLAssembler* assembler = codegen->GetAssembler();
-  Location op1_loc = invoke->GetLocations()->InAt(0);
-  Location op2_loc = invoke->GetLocations()->InAt(1);
-  Location out_loc = invoke->GetLocations()->Out();
-
-  // Optimization: don't generate any code if inputs are the same.
-  if (op1_loc.Equals(op2_loc)) {
-    DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
-    return;
-  }
-
-  vixl32::SRegister op1 = SRegisterFrom(op1_loc);
-  vixl32::SRegister op2 = SRegisterFrom(op2_loc);
-  vixl32::SRegister out = OutputSRegister(invoke);
-  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
-  const vixl32::Register temp1 = temps.Acquire();
-  vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
-  vixl32::Label nan, done;
-  vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
-
-  DCHECK(op1.Is(out));
-
-  __ Vcmp(op1, op2);
-  __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
-  __ B(vs, &nan, /* far_target */ false);  // if un-ordered, go to NaN handling.
-
-  // op1 <> op2
-  vixl32::ConditionType cond = is_min ? gt : lt;
-  {
-    ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
-                                2 * kMaxInstructionSizeInBytes,
-                                CodeBufferCheckScope::kMaximumSize);
-    __ it(cond);
-    __ vmov(cond, F32, out, op2);
-  }
-  // for <>(not equal), we've done min/max calculation.
-  __ B(ne, final_label, /* far_target */ false);
-
-  // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
-  __ Vmov(temp1, op1);
-  __ Vmov(temp2, op2);
-  if (is_min) {
-    __ Orr(temp1, temp1, temp2);
-  } else {
-    __ And(temp1, temp1, temp2);
-  }
-  __ Vmov(out, temp1);
-  __ B(final_label);
-
-  // handle NaN input.
-  __ Bind(&nan);
-  __ Movt(temp1, High16Bits(kNanFloat));  // 0x7FC0xxxx is a NaN.
-  __ Vmov(out, temp1);
-
-  if (done.IsReferenced()) {
-    __ Bind(&done);
-  }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresFpuRegister());
-  locations->SetInAt(1, Location::RequiresFpuRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-  invoke->GetLocations()->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFloat(invoke, /* is_min */ true, codegen_);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-  invoke->GetLocations()->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFloat(invoke, /* is_min */ false, codegen_);
-}
-
-static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
-  ArmVIXLAssembler* assembler = codegen->GetAssembler();
-  Location op1_loc = invoke->GetLocations()->InAt(0);
-  Location op2_loc = invoke->GetLocations()->InAt(1);
-  Location out_loc = invoke->GetLocations()->Out();
-
-  // Optimization: don't generate any code if inputs are the same.
-  if (op1_loc.Equals(op2_loc)) {
-    DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in.
-    return;
-  }
-
-  vixl32::DRegister op1 = DRegisterFrom(op1_loc);
-  vixl32::DRegister op2 = DRegisterFrom(op2_loc);
-  vixl32::DRegister out = OutputDRegister(invoke);
-  vixl32::Label handle_nan_eq, done;
-  vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
-
-  DCHECK(op1.Is(out));
-
-  __ Vcmp(op1, op2);
-  __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
-  __ B(vs, &handle_nan_eq, /* far_target */ false);  // if un-ordered, go to NaN handling.
-
-  // op1 <> op2
-  vixl32::ConditionType cond = is_min ? gt : lt;
-  {
-    ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
-                                2 * kMaxInstructionSizeInBytes,
-                                CodeBufferCheckScope::kMaximumSize);
-    __ it(cond);
-    __ vmov(cond, F64, out, op2);
-  }
-  // for <>(not equal), we've done min/max calculation.
-  __ B(ne, final_label, /* far_target */ false);
-
-  // handle op1 == op2, max(+0.0,-0.0).
-  if (!is_min) {
-    __ Vand(F64, out, op1, op2);
-    __ B(final_label);
-  }
-
-  // handle op1 == op2, min(+0.0,-0.0), NaN input.
-  __ Bind(&handle_nan_eq);
-  __ Vorr(F64, out, op1, op2);  // assemble op1/-0.0/NaN.
-
-  if (done.IsReferenced()) {
-    __ Bind(&done);
-  }
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxDouble(invoke, /* is_min */ true , codegen_);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxDouble(invoke, /* is_min */ false, codegen_);
-}
-
-static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
-  Location op1_loc = invoke->GetLocations()->InAt(0);
-  Location op2_loc = invoke->GetLocations()->InAt(1);
-  Location out_loc = invoke->GetLocations()->Out();
-
-  // Optimization: don't generate any code if inputs are the same.
-  if (op1_loc.Equals(op2_loc)) {
-    DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
-    return;
-  }
-
-  vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
-  vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
-  vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
-  vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
-  vixl32::Register out_lo = LowRegisterFrom(out_loc);
-  vixl32::Register out_hi = HighRegisterFrom(out_loc);
-  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
-  const vixl32::Register temp = temps.Acquire();
-
-  DCHECK(op1_lo.Is(out_lo));
-  DCHECK(op1_hi.Is(out_hi));
-
-  // Compare op1 >= op2, or op1 < op2.
-  __ Cmp(out_lo, op2_lo);
-  __ Sbcs(temp, out_hi, op2_hi);
-
-  // Now GE/LT condition code is correct for the long comparison.
-  {
-    vixl32::ConditionType cond = is_min ? ge : lt;
-    ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
-                                3 * kMaxInstructionSizeInBytes,
-                                CodeBufferCheckScope::kMaximumSize);
-    __ itt(cond);
-    __ mov(cond, out_lo, op2_lo);
-    __ mov(cond, out_hi, op2_hi);
-  }
-}
-
-static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
-  CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMaxLong(invoke, /* is_min */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
-  CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMaxLong(invoke, /* is_min */ false, GetAssembler());
-}
-
-static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
-  vixl32::Register op1 = InputRegisterAt(invoke, 0);
-  vixl32::Register op2 = InputRegisterAt(invoke, 1);
-  vixl32::Register out = OutputRegister(invoke);
-
-  __ Cmp(op1, op2);
-
-  {
-    ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
-                           3 * kMaxInstructionSizeInBytes,
-                           CodeBufferCheckScope::kMaximumSize);
-
-    __ ite(is_min ? lt : gt);
-    __ mov(is_min ? lt : gt, out, op1);
-    __ mov(is_min ? ge : le, out, op2);
-  }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke, /* is_min */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke, /* is_min */ false, GetAssembler());
-}
-
 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
   CreateFPToFPLocations(allocator_, invoke);
 }
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index b7936b9c8ec..ae248a3e5c7 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -58,6 +58,10 @@ inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const {
   return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
 }
 
+inline bool IntrinsicCodeGeneratorMIPS::HasMsa() const {
+  return codegen_->GetInstructionSetFeatures().HasMsa();
+}
+
 #define __ codegen->GetAssembler()->
 
 static void MoveFromReturnRegister(Location trg,
@@ -612,6 +616,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
 static void GenBitCount(LocationSummary* locations,
                         DataType::Type type,
                         bool isR6,
+                        bool hasMsa,
                         MipsAssembler* assembler) {
   Register out = locations->Out().AsRegister<Register>();
 
@@ -637,85 +642,102 @@ static void GenBitCount(LocationSummary* locations,
   // instructions compared to a loop-based algorithm which required 47
   // instructions.
 
-  if (type == DataType::Type::kInt32) {
-    Register in = locations->InAt(0).AsRegister<Register>();
-
-    __ Srl(TMP, in, 1);
-    __ LoadConst32(AT, 0x55555555);
-    __ And(TMP, TMP, AT);
-    __ Subu(TMP, in, TMP);
-    __ LoadConst32(AT, 0x33333333);
-    __ And(out, TMP, AT);
-    __ Srl(TMP, TMP, 2);
-    __ And(TMP, TMP, AT);
-    __ Addu(TMP, out, TMP);
-    __ Srl(out, TMP, 4);
-    __ Addu(out, out, TMP);
-    __ LoadConst32(AT, 0x0F0F0F0F);
-    __ And(out, out, AT);
-    __ LoadConst32(TMP, 0x01010101);
-    if (isR6) {
-      __ MulR6(out, out, TMP);
+  if (hasMsa) {
+    if (type == DataType::Type::kInt32) {
+      Register in = locations->InAt(0).AsRegister<Register>();
+      __ Mtc1(in, FTMP);
+      __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+      __ Mfc1(out, FTMP);
     } else {
-      __ MulR2(out, out, TMP);
+      DCHECK_EQ(type, DataType::Type::kInt64);
+      Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+      __ Mtc1(in_lo, FTMP);
+      __ Mthc1(in_hi, FTMP);
+      __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+      __ Mfc1(out, FTMP);
     }
-    __ Srl(out, out, 24);
   } else {
-    DCHECK_EQ(type, DataType::Type::kInt64);
-    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
-    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
-    Register tmp_hi = locations->GetTemp(0).AsRegister<Register>();
-    Register out_hi = locations->GetTemp(1).AsRegister<Register>();
-    Register tmp_lo = TMP;
-    Register out_lo = out;
+    if (type == DataType::Type::kInt32) {
+      Register in = locations->InAt(0).AsRegister<Register>();
+
+      __ Srl(TMP, in, 1);
+      __ LoadConst32(AT, 0x55555555);
+      __ And(TMP, TMP, AT);
+      __ Subu(TMP, in, TMP);
+      __ LoadConst32(AT, 0x33333333);
+      __ And(out, TMP, AT);
+      __ Srl(TMP, TMP, 2);
+      __ And(TMP, TMP, AT);
+      __ Addu(TMP, out, TMP);
+      __ Srl(out, TMP, 4);
+      __ Addu(out, out, TMP);
+      __ LoadConst32(AT, 0x0F0F0F0F);
+      __ And(out, out, AT);
+      __ LoadConst32(TMP, 0x01010101);
+      if (isR6) {
+        __ MulR6(out, out, TMP);
+      } else {
+        __ MulR2(out, out, TMP);
+      }
+      __ Srl(out, out, 24);
+    } else {
+      DCHECK_EQ(type, DataType::Type::kInt64);
+      Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register tmp_hi = locations->GetTemp(0).AsRegister<Register>();
+      Register out_hi = locations->GetTemp(1).AsRegister<Register>();
+      Register tmp_lo = TMP;
+      Register out_lo = out;
 
-    __ Srl(tmp_lo, in_lo, 1);
-    __ Srl(tmp_hi, in_hi, 1);
+      __ Srl(tmp_lo, in_lo, 1);
+      __ Srl(tmp_hi, in_hi, 1);
 
-    __ LoadConst32(AT, 0x55555555);
+      __ LoadConst32(AT, 0x55555555);
 
-    __ And(tmp_lo, tmp_lo, AT);
-    __ Subu(tmp_lo, in_lo, tmp_lo);
+      __ And(tmp_lo, tmp_lo, AT);
+      __ Subu(tmp_lo, in_lo, tmp_lo);
 
-    __ And(tmp_hi, tmp_hi, AT);
-    __ Subu(tmp_hi, in_hi, tmp_hi);
+      __ And(tmp_hi, tmp_hi, AT);
+      __ Subu(tmp_hi, in_hi, tmp_hi);
 
-    __ LoadConst32(AT, 0x33333333);
+      __ LoadConst32(AT, 0x33333333);
 
-    __ And(out_lo, tmp_lo, AT);
-    __ Srl(tmp_lo, tmp_lo, 2);
-    __ And(tmp_lo, tmp_lo, AT);
-    __ Addu(tmp_lo, out_lo, tmp_lo);
+      __ And(out_lo, tmp_lo, AT);
+      __ Srl(tmp_lo, tmp_lo, 2);
+      __ And(tmp_lo, tmp_lo, AT);
+      __ Addu(tmp_lo, out_lo, tmp_lo);
 
-    __ And(out_hi, tmp_hi, AT);
-    __ Srl(tmp_hi, tmp_hi, 2);
-    __ And(tmp_hi, tmp_hi, AT);
-    __ Addu(tmp_hi, out_hi, tmp_hi);
+      __ And(out_hi, tmp_hi, AT);
+      __ Srl(tmp_hi, tmp_hi, 2);
+      __ And(tmp_hi, tmp_hi, AT);
+      __ Addu(tmp_hi, out_hi, tmp_hi);
 
-    // Here we deviate from the original algorithm a bit. We've reached
-    // the stage where the bitfields holding the subtotals are large
-    // enough to hold the combined subtotals for both the low word, and
-    // the high word. This means that we can add the subtotals for the
-    // the high, and low words into a single word, and compute the final
-    // result for both the high, and low words using fewer instructions.
-    __ LoadConst32(AT, 0x0F0F0F0F);
+      // Here we deviate from the original algorithm a bit. We've reached
+      // the stage where the bitfields holding the subtotals are large
+      // enough to hold the combined subtotals for both the low word, and
+      // the high word. This means that we can add the subtotals for the
+      // the high, and low words into a single word, and compute the final
+      // result for both the high, and low words using fewer instructions.
+      __ LoadConst32(AT, 0x0F0F0F0F);
 
-    __ Addu(TMP, tmp_hi, tmp_lo);
+      __ Addu(TMP, tmp_hi, tmp_lo);
 
-    __ Srl(out, TMP, 4);
-    __ And(out, out, AT);
-    __ And(TMP, TMP, AT);
-    __ Addu(out, out, TMP);
+      __ Srl(out, TMP, 4);
+      __ And(out, out, AT);
+      __ And(TMP, TMP, AT);
+      __ Addu(out, out, TMP);
 
-    __ LoadConst32(AT, 0x01010101);
+      __ LoadConst32(AT, 0x01010101);
 
-    if (isR6) {
-      __ MulR6(out, out, AT);
-    } else {
-      __ MulR2(out, out, AT);
-    }
+      if (isR6) {
+        __ MulR6(out, out, AT);
+      } else {
+        __ MulR2(out, out, AT);
+      }
 
-    __ Srl(out, out, 24);
+      __ Srl(out, out, 24);
+    }
   }
 }
 
@@ -725,7 +747,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerBitCount(HInvoke* invoke) {
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitIntegerBitCount(HInvoke* invoke) {
-  GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), GetAssembler());
+  GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), HasMsa(), GetAssembler());
 }
 
 // int java.lang.Long.bitCount(int)
@@ -739,575 +761,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongBitCount(HInvoke* invoke) {
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) {
-  GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler());
-}
-
-static void MathAbsFP(LocationSummary* locations,
-                      bool is64bit,
-                      bool isR2OrNewer,
-                      bool isR6,
-                      MipsAssembler* assembler) {
-  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
-  FRegister out = locations->Out().AsFpuRegister<FRegister>();
-
-  // Note, as a "quality of implementation", rather than pure "spec compliance", we require that
-  // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN
-  // (signaling NaN may become quiet though).
-  //
-  // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case,
-  // both regular floating point numbers and NAN values are treated alike, only the sign bit is
-  // affected by this instruction.
-  // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any
-  // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be
-  // changed when doing abs(NaN). Because of that, we clear sign bit in a different way.
-  if (isR6) {
-    if (is64bit) {
-      __ AbsD(out, in);
-    } else {
-      __ AbsS(out, in);
-    }
-  } else {
-    if (is64bit) {
-      if (in != out) {
-        __ MovD(out, in);
-      }
-      __ MoveFromFpuHigh(TMP, in);
-      // ins instruction is not available for R1.
-      if (isR2OrNewer) {
-        __ Ins(TMP, ZERO, 31, 1);
-      } else {
-        __ Sll(TMP, TMP, 1);
-        __ Srl(TMP, TMP, 1);
-      }
-      __ MoveToFpuHigh(TMP, out);
-    } else {
-      __ Mfc1(TMP, in);
-      // ins instruction is not available for R1.
-      if (isR2OrNewer) {
-        __ Ins(TMP, ZERO, 31, 1);
-      } else {
-        __ Sll(TMP, TMP, 1);
-        __ Srl(TMP, TMP, 1);
-      }
-      __ Mtc1(TMP, out);
-    }
-  }
-}
-
-// double java.lang.Math.abs(double)
-void IntrinsicLocationsBuilderMIPS::VisitMathAbsDouble(HInvoke* invoke) {
-  CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, IsR2OrNewer(), IsR6(), GetAssembler());
-}
-
-// float java.lang.Math.abs(float)
-void IntrinsicLocationsBuilderMIPS::VisitMathAbsFloat(HInvoke* invoke) {
-  CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, IsR2OrNewer(), IsR6(), GetAssembler());
-}
-
-static void GenAbsInteger(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
-  if (is64bit) {
-    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
-    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
-    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
-    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
-
-    // The comments in this section show the analogous operations which would
-    // be performed if we had 64-bit registers "in", and "out".
-    // __ Dsra32(AT, in, 31);
-    __ Sra(AT, in_hi, 31);
-    // __ Xor(out, in, AT);
-    __ Xor(TMP, in_lo, AT);
-    __ Xor(out_hi, in_hi, AT);
-    // __ Dsubu(out, out, AT);
-    __ Subu(out_lo, TMP, AT);
-    __ Sltu(TMP, out_lo, TMP);
-    __ Addu(out_hi, out_hi, TMP);
-  } else {
-    Register in = locations->InAt(0).AsRegister<Register>();
-    Register out = locations->Out().AsRegister<Register>();
-
-    __ Sra(AT, in, 31);
-    __ Xor(out, in, AT);
-    __ Subu(out, out, AT);
-  }
-}
-
-// int java.lang.Math.abs(int)
-void IntrinsicLocationsBuilderMIPS::VisitMathAbsInt(HInvoke* invoke) {
-  CreateIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-// long java.lang.Math.abs(long)
-void IntrinsicLocationsBuilderMIPS::VisitMathAbsLong(HInvoke* invoke) {
-  CreateIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
-                        bool is_min,
-                        DataType::Type type,
-                        bool is_R6,
-                        MipsAssembler* assembler) {
-  FRegister out = locations->Out().AsFpuRegister<FRegister>();
-  FRegister a = locations->InAt(0).AsFpuRegister<FRegister>();
-  FRegister b = locations->InAt(1).AsFpuRegister<FRegister>();
-
-  if (is_R6) {
-    MipsLabel noNaNs;
-    MipsLabel done;
-    FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
-
-    // When Java computes min/max it prefers a NaN to a number; the
-    // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
-    // the inputs is a NaN and the other is a valid number, the MIPS
-    // instruction will return the number; Java wants the NaN value
-    // returned. This is why there is extra logic preceding the use of
-    // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
-    // NaN, return the NaN, otherwise return the min/max.
-    if (type == DataType::Type::kFloat64) {
-      __ CmpUnD(FTMP, a, b);
-      __ Bc1eqz(FTMP, &noNaNs);
-
-      // One of the inputs is a NaN
-      __ CmpEqD(ftmp, a, a);
-      // If a == a then b is the NaN, otherwise a is the NaN.
-      __ SelD(ftmp, a, b);
-
-      if (ftmp != out) {
-        __ MovD(out, ftmp);
-      }
-
-      __ B(&done);
-
-      __ Bind(&noNaNs);
-
-      if (is_min) {
-        __ MinD(out, a, b);
-      } else {
-        __ MaxD(out, a, b);
-      }
-    } else {
-      DCHECK_EQ(type, DataType::Type::kFloat32);
-      __ CmpUnS(FTMP, a, b);
-      __ Bc1eqz(FTMP, &noNaNs);
-
-      // One of the inputs is a NaN
-      __ CmpEqS(ftmp, a, a);
-      // If a == a then b is the NaN, otherwise a is the NaN.
-      __ SelS(ftmp, a, b);
-
-      if (ftmp != out) {
-        __ MovS(out, ftmp);
-      }
-
-      __ B(&done);
-
-      __ Bind(&noNaNs);
-
-      if (is_min) {
-        __ MinS(out, a, b);
-      } else {
-        __ MaxS(out, a, b);
-      }
-    }
-
-    __ Bind(&done);
-  } else {
-    MipsLabel ordered;
-    MipsLabel compare;
-    MipsLabel select;
-    MipsLabel done;
-
-    if (type == DataType::Type::kFloat64) {
-      __ CunD(a, b);
-    } else {
-      DCHECK_EQ(type, DataType::Type::kFloat32);
-      __ CunS(a, b);
-    }
-    __ Bc1f(&ordered);
-
-    // a or b (or both) is a NaN. Return one, which is a NaN.
-    if (type == DataType::Type::kFloat64) {
-      __ CeqD(b, b);
-    } else {
-      __ CeqS(b, b);
-    }
-    __ B(&select);
-
-    __ Bind(&ordered);
-
-    // Neither is a NaN.
-    // a == b? (-0.0 compares equal with +0.0)
-    // If equal, handle zeroes, else compare further.
-    if (type == DataType::Type::kFloat64) {
-      __ CeqD(a, b);
-    } else {
-      __ CeqS(a, b);
-    }
-    __ Bc1f(&compare);
-
-    // a == b either bit for bit or one is -0.0 and the other is +0.0.
-    if (type == DataType::Type::kFloat64) {
-      __ MoveFromFpuHigh(TMP, a);
-      __ MoveFromFpuHigh(AT, b);
-    } else {
-      __ Mfc1(TMP, a);
-      __ Mfc1(AT, b);
-    }
-
-    if (is_min) {
-      // -0.0 prevails over +0.0.
-      __ Or(TMP, TMP, AT);
-    } else {
-      // +0.0 prevails over -0.0.
-      __ And(TMP, TMP, AT);
-    }
-
-    if (type == DataType::Type::kFloat64) {
-      __ Mfc1(AT, a);
-      __ Mtc1(AT, out);
-      __ MoveToFpuHigh(TMP, out);
-    } else {
-      __ Mtc1(TMP, out);
-    }
-    __ B(&done);
-
-    __ Bind(&compare);
-
-    if (type == DataType::Type::kFloat64) {
-      if (is_min) {
-        // return (a <= b) ? a : b;
-        __ ColeD(a, b);
-      } else {
-        // return (a >= b) ? a : b;
-        __ ColeD(b, a);  // b <= a
-      }
-    } else {
-      if (is_min) {
-        // return (a <= b) ? a : b;
-        __ ColeS(a, b);
-      } else {
-        // return (a >= b) ? a : b;
-        __ ColeS(b, a);  // b <= a
-      }
-    }
-
-    __ Bind(&select);
-
-    if (type == DataType::Type::kFloat64) {
-      __ MovtD(out, a);
-      __ MovfD(out, b);
-    } else {
-      __ MovtS(out, a);
-      __ MovfS(out, b);
-    }
-
-    __ Bind(&done);
-  }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresFpuRegister());
-  locations->SetInAt(1, Location::RequiresFpuRegister());
-  locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
-}
-
-// double java.lang.Math.min(double, double)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(),
-              /* is_min */ true,
-              DataType::Type::kFloat64,
-              IsR6(),
-              GetAssembler());
-}
-
-// float java.lang.Math.min(float, float)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(),
-              /* is_min */ true,
-              DataType::Type::kFloat32,
-              IsR6(),
-              GetAssembler());
-}
-
-// double java.lang.Math.max(double, double)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(),
-              /* is_min */ false,
-              DataType::Type::kFloat64,
-              IsR6(),
-              GetAssembler());
-}
-
-// float java.lang.Math.max(float, float)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(),
-              /* is_min */ false,
-              DataType::Type::kFloat32,
-              IsR6(),
-              GetAssembler());
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-static void GenMinMax(LocationSummary* locations,
-                      bool is_min,
-                      DataType::Type type,
-                      bool is_R6,
-                      MipsAssembler* assembler) {
-  if (is_R6) {
-    // Some architectures, such as ARM and MIPS (prior to r6), have a
-    // conditional move instruction which only changes the target
-    // (output) register if the condition is true (MIPS prior to r6 had
-    // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions
-    // always change the target (output) register.  If the condition is
-    // true the output register gets the contents of the "rs" register;
-    // otherwise, the output register is set to zero. One consequence
-    // of this is that to implement something like "rd = c==0 ? rs : rt"
-    // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions.
-    // After executing this pair of instructions one of the output
-    // registers from the pair will necessarily contain zero. Then the
-    // code ORs the output registers from the SELEQZ/SELNEZ instructions
-    // to get the final result.
-    //
-    // The initial test to see if the output register is same as the
-    // first input register is needed to make sure that value in the
-    // first input register isn't clobbered before we've finished
-    // computing the output value. The logic in the corresponding else
-    // clause performs the same task but makes sure the second input
-    // register isn't clobbered in the event that it's the same register
-    // as the output register; the else clause also handles the case
-    // where the output register is distinct from both the first, and the
-    // second input registers.
-    if (type == DataType::Type::kInt64) {
-      Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
-      Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
-      Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
-      Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
-      Register out_lo = locations->Out().AsRegisterPairLow<Register>();
-      Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
-
-      MipsLabel compare_done;
-
-      if (a_lo == b_lo) {
-        if (out_lo != a_lo) {
-          __ Move(out_lo, a_lo);
-          __ Move(out_hi, a_hi);
-        }
-      } else {
-        __ Slt(TMP, b_hi, a_hi);
-        __ Bne(b_hi, a_hi, &compare_done);
-
-        __ Sltu(TMP, b_lo, a_lo);
-
-        __ Bind(&compare_done);
-
-        if (is_min) {
-          __ Seleqz(AT, a_lo, TMP);
-          __ Selnez(out_lo, b_lo, TMP);  // Safe even if out_lo == a_lo/b_lo
-                                         // because at this point we're
-                                         // done using a_lo/b_lo.
-        } else {
-          __ Selnez(AT, a_lo, TMP);
-          __ Seleqz(out_lo, b_lo, TMP);  // ditto
-        }
-        __ Or(out_lo, out_lo, AT);
-        if (is_min) {
-          __ Seleqz(AT, a_hi, TMP);
-          __ Selnez(out_hi, b_hi, TMP);  // ditto but for out_hi & a_hi/b_hi
-        } else {
-          __ Selnez(AT, a_hi, TMP);
-          __ Seleqz(out_hi, b_hi, TMP);  // ditto but for out_hi & a_hi/b_hi
-        }
-        __ Or(out_hi, out_hi, AT);
-      }
-    } else {
-      DCHECK_EQ(type, DataType::Type::kInt32);
-      Register a = locations->InAt(0).AsRegister<Register>();
-      Register b = locations->InAt(1).AsRegister<Register>();
-      Register out = locations->Out().AsRegister<Register>();
-
-      if (a == b) {
-        if (out != a) {
-          __ Move(out, a);
-        }
-      } else {
-        __ Slt(AT, b, a);
-        if (is_min) {
-          __ Seleqz(TMP, a, AT);
-          __ Selnez(AT, b, AT);
-        } else {
-          __ Selnez(TMP, a, AT);
-          __ Seleqz(AT, b, AT);
-        }
-        __ Or(out, TMP, AT);
-      }
-    }
-  } else {
-    if (type == DataType::Type::kInt64) {
-      Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
-      Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
-      Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
-      Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
-      Register out_lo = locations->Out().AsRegisterPairLow<Register>();
-      Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
-
-      MipsLabel compare_done;
-
-      if (a_lo == b_lo) {
-        if (out_lo != a_lo) {
-          __ Move(out_lo, a_lo);
-          __ Move(out_hi, a_hi);
-        }
-      } else {
-        __ Slt(TMP, a_hi, b_hi);
-        __ Bne(a_hi, b_hi, &compare_done);
-
-        __ Sltu(TMP, a_lo, b_lo);
-
-        __ Bind(&compare_done);
-
-        if (is_min) {
-          if (out_lo != a_lo) {
-            __ Movn(out_hi, a_hi, TMP);
-            __ Movn(out_lo, a_lo, TMP);
-          }
-          if (out_lo != b_lo) {
-            __ Movz(out_hi, b_hi, TMP);
-            __ Movz(out_lo, b_lo, TMP);
-          }
-        } else {
-          if (out_lo != a_lo) {
-            __ Movz(out_hi, a_hi, TMP);
-            __ Movz(out_lo, a_lo, TMP);
-          }
-          if (out_lo != b_lo) {
-            __ Movn(out_hi, b_hi, TMP);
-            __ Movn(out_lo, b_lo, TMP);
-          }
-        }
-      }
-    } else {
-      DCHECK_EQ(type, DataType::Type::kInt32);
-      Register a = locations->InAt(0).AsRegister<Register>();
-      Register b = locations->InAt(1).AsRegister<Register>();
-      Register out = locations->Out().AsRegister<Register>();
-
-      if (a == b) {
-        if (out != a) {
-          __ Move(out, a);
-        }
-      } else {
-        __ Slt(AT, a, b);
-        if (is_min) {
-          if (out != a) {
-            __ Movn(out, a, AT);
-          }
-          if (out != b) {
-            __ Movz(out, b, AT);
-          }
-        } else {
-          if (out != a) {
-            __ Movz(out, a, AT);
-          }
-          if (out != b) {
-            __ Movn(out, b, AT);
-          }
-        }
-      }
-    }
-  }
-}
-
-// int java.lang.Math.min(int, int)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(),
-            /* is_min */ true,
-            DataType::Type::kInt32,
-            IsR6(),
-            GetAssembler());
-}
-
-// long java.lang.Math.min(long, long)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(),
-            /* is_min */ true,
-            DataType::Type::kInt64,
-            IsR6(),
-            GetAssembler());
-}
-
-// int java.lang.Math.max(int, int)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(),
-            /* is_min */ false,
-            DataType::Type::kInt32,
-            IsR6(),
-            GetAssembler());
-}
-
-// long java.lang.Math.max(long, long)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(),
-            /* is_min */ false,
-            DataType::Type::kInt64,
-            IsR6(),
-            GetAssembler());
+  GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), HasMsa(), GetAssembler());
 }
 
 // double java.lang.Math.sqrt(double)
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index 13397f11d4c..1c1ba401325 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -71,6 +71,7 @@ class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor {
   bool IsR2OrNewer() const;
   bool IsR6() const;
   bool Is32BitFPU() const;
+  bool HasMsa() const;
 
  private:
   MipsAssembler* GetAssembler();
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 4668c561ed3..9a9ae714bc6 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -46,6 +46,10 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() {
   return codegen_->GetGraph()->GetAllocator();
 }
 
+inline bool IntrinsicCodeGeneratorMIPS64::HasMsa() const {
+  return codegen_->GetInstructionSetFeatures().HasMsa();
+}
+
 #define __ codegen->GetAssembler()->
 
 static void MoveFromReturnRegister(Location trg,
@@ -386,6 +390,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
 
 static void GenBitCount(LocationSummary* locations,
                         const DataType::Type type,
+                        const bool hasMsa,
                         Mips64Assembler* assembler) {
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
   GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
@@ -414,41 +419,52 @@ static void GenBitCount(LocationSummary* locations,
   // bits are set but the algorithm here attempts to minimize the total
   // number of instructions executed even when a large number of bits
   // are set.
-
-  if (type == DataType::Type::kInt32) {
-    __ Srl(TMP, in, 1);
-    __ LoadConst32(AT, 0x55555555);
-    __ And(TMP, TMP, AT);
-    __ Subu(TMP, in, TMP);
-    __ LoadConst32(AT, 0x33333333);
-    __ And(out, TMP, AT);
-    __ Srl(TMP, TMP, 2);
-    __ And(TMP, TMP, AT);
-    __ Addu(TMP, out, TMP);
-    __ Srl(out, TMP, 4);
-    __ Addu(out, out, TMP);
-    __ LoadConst32(AT, 0x0F0F0F0F);
-    __ And(out, out, AT);
-    __ LoadConst32(TMP, 0x01010101);
-    __ MulR6(out, out, TMP);
-    __ Srl(out, out, 24);
-  } else if (type == DataType::Type::kInt64) {
-    __ Dsrl(TMP, in, 1);
-    __ LoadConst64(AT, 0x5555555555555555L);
-    __ And(TMP, TMP, AT);
-    __ Dsubu(TMP, in, TMP);
-    __ LoadConst64(AT, 0x3333333333333333L);
-    __ And(out, TMP, AT);
-    __ Dsrl(TMP, TMP, 2);
-    __ And(TMP, TMP, AT);
-    __ Daddu(TMP, out, TMP);
-    __ Dsrl(out, TMP, 4);
-    __ Daddu(out, out, TMP);
-    __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
-    __ And(out, out, AT);
-    __ LoadConst64(TMP, 0x0101010101010101L);
-    __ Dmul(out, out, TMP);
-    __ Dsrl32(out, out, 24);
+  if (hasMsa) {
+    if (type == DataType::Type::kInt32) {
+      __ Mtc1(in, FTMP);
+      __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+      __ Mfc1(out, FTMP);
+    } else {
+      __ Dmtc1(in, FTMP);
+      __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+      __ Dmfc1(out, FTMP);
+    }
+  } else {
+    if (type == DataType::Type::kInt32) {
+      __ Srl(TMP, in, 1);
+      __ LoadConst32(AT, 0x55555555);
+      __ And(TMP, TMP, AT);
+      __ Subu(TMP, in, TMP);
+      __ LoadConst32(AT, 0x33333333);
+      __ And(out, TMP, AT);
+      __ Srl(TMP, TMP, 2);
+      __ And(TMP, TMP, AT);
+      __ Addu(TMP, out, TMP);
+      __ Srl(out, TMP, 4);
+      __ Addu(out, out, TMP);
+      __ LoadConst32(AT, 0x0F0F0F0F);
+      __ And(out, out, AT);
+      __ LoadConst32(TMP, 0x01010101);
+      __ MulR6(out, out, TMP);
+      __ Srl(out, out, 24);
+    } else {
+      __ Dsrl(TMP, in, 1);
+      __ LoadConst64(AT, 0x5555555555555555L);
+      __ And(TMP, TMP, AT);
+      __ Dsubu(TMP, in, TMP);
+      __ LoadConst64(AT, 0x3333333333333333L);
+      __ And(out, TMP, AT);
+      __ Dsrl(TMP, TMP, 2);
+      __ And(TMP, TMP, AT);
+      __ Daddu(TMP, out, TMP);
+      __ Dsrl(out, TMP, 4);
+      __ Daddu(out, out, TMP);
+      __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
+      __ And(out, out, AT);
+      __ LoadConst64(TMP, 0x0101010101010101L);
+      __ Dmul(out, out, TMP);
+      __ Dsrl32(out, out, 24);
+    }
   }
 }
 
@@ -458,7 +474,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
-  GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
+  GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, HasMsa(), GetAssembler());
 }
 
 // int java.lang.Long.bitCount(long)
@@ -467,291 +483,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) {
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) {
-  GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
-}
-
-static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
-  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
-  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
-
-  if (is64bit) {
-    __ AbsD(out, in);
-  } else {
-    __ AbsS(out, in);
-  }
-}
-
-// double java.lang.Math.abs(double)
-void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
-  CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-// float java.lang.Math.abs(float)
-void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
-  CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-static void CreateIntToInt(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
-  GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
-  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
-  if (is64bit) {
-    __ Dsra32(AT, in, 31);
-    __ Xor(out, in, AT);
-    __ Dsubu(out, out, AT);
-  } else {
-    __ Sra(AT, in, 31);
-    __ Xor(out, in, AT);
-    __ Subu(out, out, AT);
-  }
-}
-
-// int java.lang.Math.abs(int)
-void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) {
-  CreateIntToInt(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-// long java.lang.Math.abs(long)
-void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) {
-  CreateIntToInt(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
-                        bool is_min,
-                        DataType::Type type,
-                        Mips64Assembler* assembler) {
-  FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>();
-  FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>();
-  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
-
-  Mips64Label noNaNs;
-  Mips64Label done;
-  FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
-
-  // When Java computes min/max it prefers a NaN to a number; the
-  // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
-  // the inputs is a NaN and the other is a valid number, the MIPS
-  // instruction will return the number; Java wants the NaN value
-  // returned. This is why there is extra logic preceding the use of
-  // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
-  // NaN, return the NaN, otherwise return the min/max.
-  if (type == DataType::Type::kFloat64) {
-    __ CmpUnD(FTMP, a, b);
-    __ Bc1eqz(FTMP, &noNaNs);
-
-    // One of the inputs is a NaN
-    __ CmpEqD(ftmp, a, a);
-    // If a == a then b is the NaN, otherwise a is the NaN.
-    __ SelD(ftmp, a, b);
-
-    if (ftmp != out) {
-      __ MovD(out, ftmp);
-    }
-
-    __ Bc(&done);
-
-    __ Bind(&noNaNs);
-
-    if (is_min) {
-      __ MinD(out, a, b);
-    } else {
-      __ MaxD(out, a, b);
-    }
-  } else {
-    DCHECK_EQ(type, DataType::Type::kFloat32);
-    __ CmpUnS(FTMP, a, b);
-    __ Bc1eqz(FTMP, &noNaNs);
-
-    // One of the inputs is a NaN
-    __ CmpEqS(ftmp, a, a);
-    // If a == a then b is the NaN, otherwise a is the NaN.
-    __ SelS(ftmp, a, b);
-
-    if (ftmp != out) {
-      __ MovS(out, ftmp);
-    }
-
-    __ Bc(&done);
-
-    __ Bind(&noNaNs);
-
-    if (is_min) {
-      __ MinS(out, a, b);
-    } else {
-      __ MaxS(out, a, b);
-    }
-  }
-
-  __ Bind(&done);
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresFpuRegister());
-  locations->SetInAt(1, Location::RequiresFpuRegister());
-  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-}
-
-// double java.lang.Math.min(double, double)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat64, GetAssembler());
-}
-
-// float java.lang.Math.min(float, float)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat32, GetAssembler());
-}
-
-// double java.lang.Math.max(double, double)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat64, GetAssembler());
-}
-
-// float java.lang.Math.max(float, float)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat32, GetAssembler());
-}
-
-static void GenMinMax(LocationSummary* locations,
-                      bool is_min,
-                      Mips64Assembler* assembler) {
-  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-  GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
-  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
-  if (lhs == rhs) {
-    if (out != lhs) {
-      __ Move(out, lhs);
-    }
-  } else {
-    // Some architectures, such as ARM and MIPS (prior to r6), have a
-    // conditional move instruction which only changes the target
-    // (output) register if the condition is true (MIPS prior to r6 had
-    // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
-    // change the target (output) register.  If the condition is true the
-    // output register gets the contents of the "rs" register; otherwise,
-    // the output register is set to zero. One consequence of this is
-    // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
-    // needs to use a pair of SELEQZ/SELNEZ instructions.  After
-    // executing this pair of instructions one of the output registers
-    // from the pair will necessarily contain zero. Then the code ORs the
-    // output registers from the SELEQZ/SELNEZ instructions to get the
-    // final result.
-    //
-    // The initial test to see if the output register is same as the
-    // first input register is needed to make sure that value in the
-    // first input register isn't clobbered before we've finished
-    // computing the output value. The logic in the corresponding else
-    // clause performs the same task but makes sure the second input
-    // register isn't clobbered in the event that it's the same register
-    // as the output register; the else clause also handles the case
-    // where the output register is distinct from both the first, and the
-    // second input registers.
-    if (out == lhs) {
-      __ Slt(AT, rhs, lhs);
-      if (is_min) {
-        __ Seleqz(out, lhs, AT);
-        __ Selnez(AT, rhs, AT);
-      } else {
-        __ Selnez(out, lhs, AT);
-        __ Seleqz(AT, rhs, AT);
-      }
-    } else {
-      __ Slt(AT, lhs, rhs);
-      if (is_min) {
-        __ Seleqz(out, rhs, AT);
-        __ Selnez(AT, lhs, AT);
-      } else {
-        __ Selnez(out, rhs, AT);
-        __ Seleqz(AT, lhs, AT);
-      }
-    }
-    __ Or(out, out, AT);
-  }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-// int java.lang.Math.min(int, int)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
-}
-
-// long java.lang.Math.min(long, long)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
-}
-
-// int java.lang.Math.max(int, int)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
-}
-
-// long java.lang.Math.max(long, long)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
+  GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, HasMsa(), GetAssembler());
 }
 
 // double java.lang.Math.sqrt(double)
diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h
index 6f40d90ddbf..748b0b02b2e 100644
--- a/compiler/optimizing/intrinsics_mips64.h
+++ b/compiler/optimizing/intrinsics_mips64.h
@@ -68,6 +68,8 @@ class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor {
 #undef INTRINSICS_LIST
 #undef OPTIMIZING_INTRINSICS
 
+  bool HasMsa() const;
+
  private:
   Mips64Assembler* GetAssembler();
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 0763ef23529..f84a33bb8e3 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -40,11 +40,6 @@ namespace art {
 
 namespace x86 {
 
-static constexpr int kDoubleNaNHigh = 0x7FF80000;
-static constexpr int kDoubleNaNLow = 0x00000000;
-static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
-static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
-
 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
   : allocator_(codegen->GetGraph()->GetAllocator()),
     codegen_(codegen) {
@@ -333,432 +328,6 @@ void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
 }
 
-
-// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
-//       need is 64b.
-
-static void CreateFloatToFloat(ArenaAllocator* allocator, HInvoke* invoke) {
-  // TODO: Enable memory operations when the assembler supports them.
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresFpuRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-  HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
-  DCHECK(static_or_direct != nullptr);
-  if (static_or_direct->HasSpecialInput() &&
-      invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
-    // We need addressibility for the constant area.
-    locations->SetInAt(1, Location::RequiresRegister());
-    // We need a temporary to hold the constant.
-    locations->AddTemp(Location::RequiresFpuRegister());
-  }
-}
-
-static void MathAbsFP(HInvoke* invoke,
-                      bool is64bit,
-                      X86Assembler* assembler,
-                      CodeGeneratorX86* codegen) {
-  LocationSummary* locations = invoke->GetLocations();
-  Location output = locations->Out();
-
-  DCHECK(output.IsFpuRegister());
-  if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
-    HX86ComputeBaseMethodAddress* method_address =
-        invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
-    DCHECK(locations->InAt(1).IsRegister());
-    // We also have a constant area pointer.
-    Register constant_area = locations->InAt(1).AsRegister<Register>();
-    XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-    if (is64bit) {
-      __ movsd(temp, codegen->LiteralInt64Address(
-          INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area));
-      __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
-    } else {
-      __ movss(temp, codegen->LiteralInt32Address(
-          INT32_C(0x7FFFFFFF), method_address, constant_area));
-      __ andps(output.AsFpuRegister<XmmRegister>(), temp);
-    }
-  } else {
-    // Create the right constant on an aligned stack.
-    if (is64bit) {
-      __ subl(ESP, Immediate(8));
-      __ pushl(Immediate(0x7FFFFFFF));
-      __ pushl(Immediate(0xFFFFFFFF));
-      __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
-    } else {
-      __ subl(ESP, Immediate(12));
-      __ pushl(Immediate(0x7FFFFFFF));
-      __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
-    }
-    __ addl(ESP, Immediate(16));
-  }
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
-  CreateFloatToFloat(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
-  CreateFloatToFloat(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_);
-}
-
-static void CreateAbsIntLocation(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RegisterLocation(EAX));
-  locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RegisterLocation(EDX));
-}
-
-static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
-  Location output = locations->Out();
-  Register out = output.AsRegister<Register>();
-  DCHECK_EQ(out, EAX);
-  Register temp = locations->GetTemp(0).AsRegister<Register>();
-  DCHECK_EQ(temp, EDX);
-
-  // Sign extend EAX into EDX.
-  __ cdq();
-
-  // XOR EAX with sign.
-  __ xorl(EAX, EDX);
-
-  // Subtract out sign to correct.
-  __ subl(EAX, EDX);
-
-  // The result is in EAX.
-}
-
-static void CreateAbsLongLocation(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  locations->AddTemp(Location::RequiresRegister());
-}
-
-static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
-  Location input = locations->InAt(0);
-  Register input_lo = input.AsRegisterPairLow<Register>();
-  Register input_hi = input.AsRegisterPairHigh<Register>();
-  Location output = locations->Out();
-  Register output_lo = output.AsRegisterPairLow<Register>();
-  Register output_hi = output.AsRegisterPairHigh<Register>();
-  Register temp = locations->GetTemp(0).AsRegister<Register>();
-
-  // Compute the sign into the temporary.
-  __ movl(temp, input_hi);
-  __ sarl(temp, Immediate(31));
-
-  // Store the sign into the output.
-  __ movl(output_lo, temp);
-  __ movl(output_hi, temp);
-
-  // XOR the input to the output.
-  __ xorl(output_lo, input_lo);
-  __ xorl(output_hi, input_hi);
-
-  // Subtract the sign.
-  __ subl(output_lo, temp);
-  __ sbbl(output_hi, temp);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
-  CreateAbsIntLocation(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
-  CreateAbsLongLocation(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsLong(invoke->GetLocations(), GetAssembler());
-}
-
-static void GenMinMaxFP(HInvoke* invoke,
-                        bool is_min,
-                        bool is_double,
-                        X86Assembler* assembler,
-                        CodeGeneratorX86* codegen) {
-  LocationSummary* locations = invoke->GetLocations();
-  Location op1_loc = locations->InAt(0);
-  Location op2_loc = locations->InAt(1);
-  Location out_loc = locations->Out();
-  XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
-
-  // Shortcut for same input locations.
-  if (op1_loc.Equals(op2_loc)) {
-    DCHECK(out_loc.Equals(op1_loc));
-    return;
-  }
-
-  //  (out := op1)
-  //  out <=? op2
-  //  if Nan jmp Nan_label
-  //  if out is min jmp done
-  //  if op2 is min jmp op2_label
-  //  handle -0/+0
-  //  jmp done
-  // Nan_label:
-  //  out := NaN
-  // op2_label:
-  //  out := op2
-  // done:
-  //
-  // This removes one jmp, but needs to copy one input (op1) to out.
-  //
-  // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
-
-  XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
-
-  NearLabel nan, done, op2_label;
-  if (is_double) {
-    __ ucomisd(out, op2);
-  } else {
-    __ ucomiss(out, op2);
-  }
-
-  __ j(Condition::kParityEven, &nan);
-
-  __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
-  __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
-
-  // Handle 0.0/-0.0.
-  if (is_min) {
-    if (is_double) {
-      __ orpd(out, op2);
-    } else {
-      __ orps(out, op2);
-    }
-  } else {
-    if (is_double) {
-      __ andpd(out, op2);
-    } else {
-      __ andps(out, op2);
-    }
-  }
-  __ jmp(&done);
-
-  // NaN handling.
-  __ Bind(&nan);
-  // Do we have a constant area pointer?
-  if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
-    HX86ComputeBaseMethodAddress* method_address =
-        invoke->InputAt(2)->AsX86ComputeBaseMethodAddress();
-    DCHECK(locations->InAt(2).IsRegister());
-    Register constant_area = locations->InAt(2).AsRegister<Register>();
-    if (is_double) {
-      __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area));
-    } else {
-      __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area));
-    }
-  } else {
-    if (is_double) {
-      __ pushl(Immediate(kDoubleNaNHigh));
-      __ pushl(Immediate(kDoubleNaNLow));
-      __ movsd(out, Address(ESP, 0));
-      __ addl(ESP, Immediate(8));
-    } else {
-      __ pushl(Immediate(kFloatNaN));
-      __ movss(out, Address(ESP, 0));
-      __ addl(ESP, Immediate(4));
-    }
-  }
-  __ jmp(&done);
-
-  // out := op2;
-  __ Bind(&op2_label);
-  if (is_double) {
-    __ movsd(out, op2);
-  } else {
-    __ movss(out, op2);
-  }
-
-  // Done.
-  __ Bind(&done);
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresFpuRegister());
-  locations->SetInAt(1, Location::RequiresFpuRegister());
-  // The following is sub-optimal, but all we can do for now. It would be fine to also accept
-  // the second input to be the output (we can simply swap inputs).
-  locations->SetOut(Location::SameAsFirstInput());
-  HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
-  DCHECK(static_or_direct != nullptr);
-  if (static_or_direct->HasSpecialInput() &&
-      invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
-    locations->SetInAt(2, Location::RequiresRegister());
-  }
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke,
-              /* is_min */ true,
-              /* is_double */ true,
-              GetAssembler(),
-              codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke,
-              /* is_min */ true,
-              /* is_double */ false,
-              GetAssembler(),
-              codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke,
-              /* is_min */ false,
-              /* is_double */ true,
-              GetAssembler(),
-              codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke,
-              /* is_min */ false,
-              /* is_double */ false,
-              GetAssembler(),
-              codegen_);
-}
-
-static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
-                      X86Assembler* assembler) {
-  Location op1_loc = locations->InAt(0);
-  Location op2_loc = locations->InAt(1);
-
-  // Shortcut for same input locations.
-  if (op1_loc.Equals(op2_loc)) {
-    // Can return immediately, as op1_loc == out_loc.
-    // Note: if we ever support separate registers, e.g., output into memory, we need to check for
-    //       a copy here.
-    DCHECK(locations->Out().Equals(op1_loc));
-    return;
-  }
-
-  if (is_long) {
-    // Need to perform a subtract to get the sign right.
-    // op1 is already in the same location as the output.
-    Location output = locations->Out();
-    Register output_lo = output.AsRegisterPairLow<Register>();
-    Register output_hi = output.AsRegisterPairHigh<Register>();
-
-    Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
-    Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
-
-    // Spare register to compute the subtraction to set condition code.
-    Register temp = locations->GetTemp(0).AsRegister<Register>();
-
-    // Subtract off op2_low.
-    __ movl(temp, output_lo);
-    __ subl(temp, op2_lo);
-
-    // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
-    __ movl(temp, output_hi);
-    __ sbbl(temp, op2_hi);
-
-    // Now the condition code is correct.
-    Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
-    __ cmovl(cond, output_lo, op2_lo);
-    __ cmovl(cond, output_hi, op2_hi);
-  } else {
-    Register out = locations->Out().AsRegister<Register>();
-    Register op2 = op2_loc.AsRegister<Register>();
-
-    //  (out := op1)
-    //  out <=? op2
-    //  if out is min jmp done
-    //  out := op2
-    // done:
-
-    __ cmpl(out, op2);
-    Condition cond = is_min ? Condition::kGreater : Condition::kLess;
-    __ cmovl(cond, out, op2);
-  }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-  // Register to use to perform a long subtract to set cc.
-  locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
-  CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
-  CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
-}
-
 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
   LocationSummary* locations =
       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 91a505ede1a..7627dc9490a 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -236,304 +236,6 @@ void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
 }
 
-
-// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
-//       need is 64b.
-
-static void CreateFloatToFloatPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) {
-  // TODO: Enable memory operations when the assembler supports them.
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresFpuRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresFpuRegister());  // FP reg to hold mask.
-}
-
-static void MathAbsFP(LocationSummary* locations,
-                      bool is64bit,
-                      X86_64Assembler* assembler,
-                      CodeGeneratorX86_64* codegen) {
-  Location output = locations->Out();
-
-  DCHECK(output.IsFpuRegister());
-  XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-
-  // TODO: Can mask directly with constant area using pand if we can guarantee
-  // that the literal is aligned on a 16 byte boundary.  This will avoid a
-  // temporary.
-  if (is64bit) {
-    __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
-    __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
-  } else {
-    __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
-    __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
-  }
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
-  CreateFloatToFloatPlusTemps(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
-  CreateFloatToFloatPlusTemps(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
-}
-
-static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());
-}
-
-static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
-  Location output = locations->Out();
-  CpuRegister out = output.AsRegister<CpuRegister>();
-  CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
-
-  if (is64bit) {
-    // Create mask.
-    __ movq(mask, out);
-    __ sarq(mask, Immediate(63));
-    // Add mask.
-    __ addq(out, mask);
-    __ xorq(out, mask);
-  } else {
-    // Create mask.
-    __ movl(mask, out);
-    __ sarl(mask, Immediate(31));
-    // Add mask.
-    __ addl(out, mask);
-    __ xorl(out, mask);
-  }
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
-  CreateIntToIntPlusTemp(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
-  CreateIntToIntPlusTemp(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
-                        bool is_min,
-                        bool is_double,
-                        X86_64Assembler* assembler,
-                        CodeGeneratorX86_64* codegen) {
-  Location op1_loc = locations->InAt(0);
-  Location op2_loc = locations->InAt(1);
-  Location out_loc = locations->Out();
-  XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
-
-  // Shortcut for same input locations.
-  if (op1_loc.Equals(op2_loc)) {
-    DCHECK(out_loc.Equals(op1_loc));
-    return;
-  }
-
-  //  (out := op1)
-  //  out <=? op2
-  //  if Nan jmp Nan_label
-  //  if out is min jmp done
-  //  if op2 is min jmp op2_label
-  //  handle -0/+0
-  //  jmp done
-  // Nan_label:
-  //  out := NaN
-  // op2_label:
-  //  out := op2
-  // done:
-  //
-  // This removes one jmp, but needs to copy one input (op1) to out.
-  //
-  // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
-
-  XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
-
-  NearLabel nan, done, op2_label;
-  if (is_double) {
-    __ ucomisd(out, op2);
-  } else {
-    __ ucomiss(out, op2);
-  }
-
-  __ j(Condition::kParityEven, &nan);
-
-  __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
-  __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
-
-  // Handle 0.0/-0.0.
-  if (is_min) {
-    if (is_double) {
-      __ orpd(out, op2);
-    } else {
-      __ orps(out, op2);
-    }
-  } else {
-    if (is_double) {
-      __ andpd(out, op2);
-    } else {
-      __ andps(out, op2);
-    }
-  }
-  __ jmp(&done);
-
-  // NaN handling.
-  __ Bind(&nan);
-  if (is_double) {
-    __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
-  } else {
-    __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
-  }
-  __ jmp(&done);
-
-  // out := op2;
-  __ Bind(&op2_label);
-  if (is_double) {
-    __ movsd(out, op2);
-  } else {
-    __ movss(out, op2);
-  }
-
-  // Done.
-  __ Bind(&done);
-}
-
-static void CreateFPFPToFP(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresFpuRegister());
-  locations->SetInAt(1, Location::RequiresFpuRegister());
-  // The following is sub-optimal, but all we can do for now. It would be fine to also accept
-  // the second input to be the output (we can simply swap inputs).
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(
-      invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(
-      invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(
-      invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(
-      invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_);
-}
-
-static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
-                      X86_64Assembler* assembler) {
-  Location op1_loc = locations->InAt(0);
-  Location op2_loc = locations->InAt(1);
-
-  // Shortcut for same input locations.
-  if (op1_loc.Equals(op2_loc)) {
-    // Can return immediately, as op1_loc == out_loc.
-    // Note: if we ever support separate registers, e.g., output into memory, we need to check for
-    //       a copy here.
-    DCHECK(locations->Out().Equals(op1_loc));
-    return;
-  }
-
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
-
-  //  (out := op1)
-  //  out <=? op2
-  //  if out is min jmp done
-  //  out := op2
-  // done:
-
-  if (is_long) {
-    __ cmpq(out, op2);
-  } else {
-    __ cmpl(out, op2);
-  }
-
-  __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
-  LocationSummary* locations =
-      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
-  CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
-}
-
 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
   LocationSummary* locations =
       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
new file mode 100644
index 00000000000..a0760eff691
--- /dev/null
+++ b/compiler/optimizing/loop_analysis.cc
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loop_analysis.h"
+
+#include "base/bit_vector-inl.h"
+
+namespace art {
+
+void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
+                                                LoopAnalysisInfo* analysis_results) {
+  for (HBlocksInLoopIterator block_it(*loop_info);
+       !block_it.Done();
+       block_it.Advance()) {
+    HBasicBlock* block = block_it.Current();
+
+    for (HBasicBlock* successor : block->GetSuccessors()) {
+      if (!loop_info->Contains(*successor)) {
+        analysis_results->exits_num_++;
+      }
+    }
+
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instruction = it.Current();
+      if (MakesScalarPeelingUnrollingNonBeneficial(instruction)) {
+        analysis_results->has_instructions_preventing_scalar_peeling_ = true;
+        analysis_results->has_instructions_preventing_scalar_unrolling_ = true;
+      }
+      analysis_results->instr_num_++;
+    }
+    analysis_results->bb_num_++;
+  }
+}
+
+bool LoopAnalysis::HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info) {
+  HGraph* graph = loop_info->GetHeader()->GetGraph();
+  for (uint32_t block_id : loop_info->GetBlocks().Indexes()) {
+    HBasicBlock* block = graph->GetBlocks()[block_id];
+    DCHECK(block != nullptr);
+    if (block->EndsWithIf()) {
+      HIf* hif = block->GetLastInstruction()->AsIf();
+      HInstruction* input = hif->InputAt(0);
+      if (IsLoopExit(loop_info, hif) && !loop_info->Contains(*input->GetBlock())) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+class Arm64LoopHelper : public ArchDefaultLoopHelper {
+ public:
+  // Scalar loop unrolling parameters and heuristics.
+  //
+  // Maximum possible unrolling factor.
+  static constexpr uint32_t kArm64ScalarMaxUnrollFactor = 2;
+  // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled.
+  static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40;
+  // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
+  static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8;
+
+  // SIMD loop unrolling parameters and heuristics.
+  //
+  // Maximum possible unrolling factor.
+  static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8;
+  // Loop's maximum instruction count. Loops with higher count will not be unrolled.
+  static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50;
+
+  bool IsLoopTooBigForScalarPeelingUnrolling(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
+    size_t instr_num = loop_analysis_info->GetNumberOfInstructions();
+    size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks();
+    return (instr_num >= kArm64ScalarHeuristicMaxBodySizeInstr ||
+            bb_num >= kArm64ScalarHeuristicMaxBodySizeBlocks);
+  }
+
+  uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
+                                    uint64_t trip_count) const OVERRIDE {
+    uint32_t desired_unrolling_factor = kArm64ScalarMaxUnrollFactor;
+    if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) {
+      return kNoUnrollingFactor;
+    }
+
+    return desired_unrolling_factor;
+  }
+
+  bool IsLoopPeelingEnabled() const OVERRIDE { return true; }
+
+  uint32_t GetSIMDUnrollingFactor(HBasicBlock* block,
+                                  int64_t trip_count,
+                                  uint32_t max_peel,
+                                  uint32_t vector_length) const OVERRIDE {
+    // Don't unroll with insufficient iterations.
+    // TODO: Unroll loops with unknown trip count.
+    DCHECK_NE(vector_length, 0u);
+    if (trip_count < (2 * vector_length + max_peel)) {
+      return kNoUnrollingFactor;
+    }
+    // Don't unroll for large loop body size.
+    uint32_t instruction_count = block->GetInstructions().CountSize();
+    if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) {
+      return kNoUnrollingFactor;
+    }
+    // Find a beneficial unroll factor with the following restrictions:
+    //  - At least one iteration of the transformed loop should be executed.
+    //  - The loop body shouldn't be "too big" (heuristic).
+
+    uint32_t uf1 = kArm64SimdHeuristicMaxBodySizeInstr / instruction_count;
+    uint32_t uf2 = (trip_count - max_peel) / vector_length;
+    uint32_t unroll_factor =
+        TruncToPowerOfTwo(std::min({uf1, uf2, kArm64SimdMaxUnrollFactor}));
+    DCHECK_GE(unroll_factor, 1u);
+    return unroll_factor;
+  }
+};
+
+ArchDefaultLoopHelper* ArchDefaultLoopHelper::Create(InstructionSet isa,
+                                                     ArenaAllocator* allocator) {
+  switch (isa) {
+    case InstructionSet::kArm64: {
+      return new (allocator) Arm64LoopHelper;
+    }
+    default: {
+      return new (allocator) ArchDefaultLoopHelper;
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
new file mode 100644
index 00000000000..ece98581367
--- /dev/null
+++ b/compiler/optimizing/loop_analysis.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
+#define ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
+
+#include "nodes.h"
+
+namespace art {
+
+class LoopAnalysis;
+
+// No loop unrolling factor (just one copy of the loop-body).
+static constexpr uint32_t kNoUnrollingFactor = 1;
+
+// Class to hold cached information on properties of the loop.
+class LoopAnalysisInfo : public ValueObject {
+ public:
+  explicit LoopAnalysisInfo(HLoopInformation* loop_info)
+      : bb_num_(0),
+        instr_num_(0),
+        exits_num_(0),
+        has_instructions_preventing_scalar_peeling_(false),
+        has_instructions_preventing_scalar_unrolling_(false),
+        loop_info_(loop_info) {}
+
+  size_t GetNumberOfBasicBlocks() const { return bb_num_; }
+  size_t GetNumberOfInstructions() const { return instr_num_; }
+  size_t GetNumberOfExits() const { return exits_num_; }
+
+  bool HasInstructionsPreventingScalarPeeling() const {
+    return has_instructions_preventing_scalar_peeling_;
+  }
+
+  bool HasInstructionsPreventingScalarUnrolling() const {
+    return has_instructions_preventing_scalar_unrolling_;
+  }
+
+  const HLoopInformation* GetLoopInfo() const { return loop_info_; }
+
+ private:
+  // Number of basic blocks in the loop body.
+  size_t bb_num_;
+  // Number of instructions in the loop body.
+  size_t instr_num_;
+  // Number of loop's exits.
+  size_t exits_num_;
+  // Whether the loop has instructions which make scalar loop peeling non-beneficial.
+  bool has_instructions_preventing_scalar_peeling_;
+  // Whether the loop has instructions which make scalar loop unrolling non-beneficial.
+  bool has_instructions_preventing_scalar_unrolling_;
+
+  // Corresponding HLoopInformation.
+  const HLoopInformation* loop_info_;
+
+  friend class LoopAnalysis;
+};
+
+// Placeholder class for methods and routines used to analyse loops, calculate loop properties
+// and characteristics.
+class LoopAnalysis : public ValueObject {
+ public:
+  // Calculates loops basic properties like body size, exits number, etc. and fills
+  // 'analysis_results' with this information.
+  static void CalculateLoopBasicProperties(HLoopInformation* loop_info,
+                                           LoopAnalysisInfo* analysis_results);
+
+  // Returns whether the loop has at least one loop invariant exit.
+  static bool HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info);
+
+  // Returns whether HIf's true or false successor is outside the specified loop.
+  //
+  // Prerequisite: HIf must be in the specified loop.
+  static bool IsLoopExit(HLoopInformation* loop_info, const HIf* hif) {
+    DCHECK(loop_info->Contains(*hif->GetBlock()));
+    HBasicBlock* true_succ = hif->IfTrueSuccessor();
+    HBasicBlock* false_succ = hif->IfFalseSuccessor();
+    return (!loop_info->Contains(*true_succ) || !loop_info->Contains(*false_succ));
+  }
+
+ private:
+  // Returns whether an instruction makes scalar loop peeling/unrolling non-beneficial.
+  //
+  // If in the loop body we have a dex/runtime call then its contribution to the whole
+  // loop performance will probably prevail. So peeling/unrolling optimization will not bring
+  // any noticeable performance improvement. It will increase the code size.
+  static bool MakesScalarPeelingUnrollingNonBeneficial(HInstruction* instruction) {
+    return (instruction->IsNewArray() ||
+        instruction->IsNewInstance() ||
+        instruction->IsUnresolvedInstanceFieldGet() ||
+        instruction->IsUnresolvedInstanceFieldSet() ||
+        instruction->IsUnresolvedStaticFieldGet() ||
+        instruction->IsUnresolvedStaticFieldSet() ||
+        // TODO: Support loops with intrinsified invokes.
+        instruction->IsInvoke() ||
+        // TODO: Support loops with ClinitChecks.
+        instruction->IsClinitCheck());
+  }
+};
+
+//
+// Helper class which holds target-dependent methods and constants needed for loop optimizations.
+//
+// To support peeling/unrolling for a new architecture one needs to create new helper class,
+// inherit it from this and add implementation for the following methods.
+//
+class ArchDefaultLoopHelper : public ArenaObject<kArenaAllocOptimization> {
+ public:
+  virtual ~ArchDefaultLoopHelper() {}
+
+  // Creates an instance of specialised helper for the target or default helper if the target
+  // doesn't support loop peeling and unrolling.
+  static ArchDefaultLoopHelper* Create(InstructionSet isa, ArenaAllocator* allocator);
+
+  // Returns whether the loop is too big for loop peeling/unrolling by checking its total number of
+  // basic blocks and instructions.
+  //
+  // If the loop body has too many instructions then peeling/unrolling optimization will not bring
+  // any noticeable performance improvement however will increase the code size.
+  //
+  // Returns 'true' by default, should be overridden by particular target loop helper.
+  virtual bool IsLoopTooBigForScalarPeelingUnrolling(
+      LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; }
+
+  // Returns optimal scalar unrolling factor for the loop.
+  //
+  // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
+  virtual uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
+                                            uint64_t trip_count ATTRIBUTE_UNUSED) const {
+    return kNoUnrollingFactor;
+  }
+
+  // Returns whether scalar loop peeling is enabled,
+  //
+  // Returns 'false' by default, should be overridden by particular target loop helper.
+  virtual bool IsLoopPeelingEnabled() const { return false; }
+
+  // Returns optimal SIMD unrolling factor for the loop.
+  //
+  // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
+  virtual uint32_t GetSIMDUnrollingFactor(HBasicBlock* block ATTRIBUTE_UNUSED,
+                                          int64_t trip_count ATTRIBUTE_UNUSED,
+                                          uint32_t max_peel ATTRIBUTE_UNUSED,
+                                          uint32_t vector_length ATTRIBUTE_UNUSED) const {
+    return kNoUnrollingFactor;
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 899496328eb..1462404932e 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -33,8 +33,8 @@ namespace art {
 // Enables vectorization (SIMDization) in the loop optimizer.
 static constexpr bool kEnableVectorization = true;
 
-// No loop unrolling factor (just one copy of the loop-body).
-static constexpr uint32_t kNoUnrollingFactor = 1;
+// Enables scalar loop unrolling in the loop optimizer.
+static constexpr bool kEnableScalarPeelingUnrolling = false;
 
 //
 // Static helpers.
@@ -153,6 +153,18 @@ static bool IsSignExtensionAndGet(HInstruction* instruction,
         return false;
     }
   }
+  // A MIN-MAX on narrower operands qualifies as well
+  // (returning the operator itself).
+  if (instruction->IsMin() || instruction->IsMax()) {
+    HBinaryOperation* min_max = instruction->AsBinaryOperation();
+    DCHECK(min_max->GetType() == DataType::Type::kInt32 ||
+           min_max->GetType() == DataType::Type::kInt64);
+    if (IsSignExtensionAndGet(min_max->InputAt(0), type, operand) &&
+        IsSignExtensionAndGet(min_max->InputAt(1), type, operand)) {
+      *operand = min_max;
+      return true;
+    }
+  }
   return false;
 }
 
@@ -216,6 +228,18 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction,
         return false;
     }
   }
+  // A MIN-MAX on narrower operands qualifies as well
+  // (returning the operator itself).
+  if (instruction->IsMin() || instruction->IsMax()) {
+    HBinaryOperation* min_max = instruction->AsBinaryOperation();
+    DCHECK(min_max->GetType() == DataType::Type::kInt32 ||
+           min_max->GetType() == DataType::Type::kInt64);
+    if (IsZeroExtensionAndGet(min_max->InputAt(0), type, operand) &&
+        IsZeroExtensionAndGet(min_max->InputAt(1), type, operand)) {
+      *operand = min_max;
+      return true;
+    }
+  }
   return false;
 }
 
@@ -227,6 +251,7 @@ static bool IsNarrowerOperands(HInstruction* a,
                                /*out*/ HInstruction** r,
                                /*out*/ HInstruction** s,
                                /*out*/ bool* is_unsigned) {
+  DCHECK(a != nullptr && b != nullptr);
   // Look for a matching sign extension.
   DataType::Type stype = HVecOperation::ToSignedType(type);
   if (IsSignExtensionAndGet(a, stype, r) && IsSignExtensionAndGet(b, stype, s)) {
@@ -247,6 +272,7 @@ static bool IsNarrowerOperand(HInstruction* a,
                               DataType::Type type,
                               /*out*/ HInstruction** r,
                               /*out*/ bool* is_unsigned) {
+  DCHECK(a != nullptr);
   // Look for a matching sign extension.
   DataType::Type stype = HVecOperation::ToSignedType(type);
   if (IsSignExtensionAndGet(a, stype, r)) {
@@ -270,20 +296,28 @@ static uint32_t GetOtherVL(DataType::Type other_type, DataType::Type vector_type
   return vl >> (DataType::SizeShift(other_type) - DataType::SizeShift(vector_type));
 }
 
-// Detect up to two instructions a and b, and an acccumulated constant c.
-static bool IsAddConstHelper(HInstruction* instruction,
-                             /*out*/ HInstruction** a,
-                             /*out*/ HInstruction** b,
-                             /*out*/ int64_t* c,
-                             int32_t depth) {
-  static constexpr int32_t kMaxDepth = 8;  // don't search too deep
+// Detect up to two added operands a and b and an acccumulated constant c.
+static bool IsAddConst(HInstruction* instruction,
+                       /*out*/ HInstruction** a,
+                       /*out*/ HInstruction** b,
+                       /*out*/ int64_t* c,
+                       int32_t depth = 8) {  // don't search too deep
   int64_t value = 0;
+  // Enter add/sub while still within reasonable depth.
+  if (depth > 0) {
+    if (instruction->IsAdd()) {
+      return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1) &&
+             IsAddConst(instruction->InputAt(1), a, b, c, depth - 1);
+    } else if (instruction->IsSub() &&
+               IsInt64AndGet(instruction->InputAt(1), &value)) {
+      *c -= value;
+      return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1);
+    }
+  }
+  // Otherwise, deal with leaf nodes.
   if (IsInt64AndGet(instruction, &value)) {
     *c += value;
     return true;
-  } else if (instruction->IsAdd() && depth <= kMaxDepth) {
-    return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) &&
-           IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1);
   } else if (*a == nullptr) {
     *a = instruction;
     return true;
@@ -291,72 +325,170 @@ static bool IsAddConstHelper(HInstruction* instruction,
     *b = instruction;
     return true;
   }
-  return false;  // too many non-const operands
+  return false;  // too many operands
 }
 
-// Detect a + b + c for an optional constant c.
-static bool IsAddConst(HInstruction* instruction,
-                       /*out*/ HInstruction** a,
-                       /*out*/ HInstruction** b,
-                       /*out*/ int64_t* c) {
-  if (instruction->IsAdd()) {
-    // Try to find a + b and accumulated c.
-    if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) &&
-        IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) &&
-        *b != nullptr) {
-      return true;
+// Detect a + b + c with optional constant c.
+static bool IsAddConst2(HGraph* graph,
+                        HInstruction* instruction,
+                        /*out*/ HInstruction** a,
+                        /*out*/ HInstruction** b,
+                        /*out*/ int64_t* c) {
+  if (IsAddConst(instruction, a, b, c) && *a != nullptr) {
+    if (*b == nullptr) {
+      // Constant is usually already present, unless accumulated.
+      *b = graph->GetConstant(instruction->GetType(), (*c));
+      *c = 0;
     }
-    // Found a + b.
+    return true;
+  }
+  return false;
+}
+
+// Detect a direct a - b or a hidden a - (-c).
+static bool IsSubConst2(HGraph* graph,
+                        HInstruction* instruction,
+                        /*out*/ HInstruction** a,
+                        /*out*/ HInstruction** b) {
+  int64_t c = 0;
+  if (instruction->IsSub()) {
     *a = instruction->InputAt(0);
     *b = instruction->InputAt(1);
-    *c = 0;
+    return true;
+  } else if (IsAddConst(instruction, a, b, &c) && *a != nullptr && *b == nullptr) {
+    // Constant for the hidden subtraction.
+    *b = graph->GetConstant(instruction->GetType(), -c);
     return true;
   }
   return false;
 }
 
-// Detect a + c for constant c.
-static bool IsAddConst(HInstruction* instruction,
-                       /*out*/ HInstruction** a,
-                       /*out*/ int64_t* c) {
-  if (instruction->IsAdd()) {
-    if (IsInt64AndGet(instruction->InputAt(0), c)) {
-      *a = instruction->InputAt(1);
-      return true;
-    } else if (IsInt64AndGet(instruction->InputAt(1), c)) {
-      *a = instruction->InputAt(0);
-      return true;
+// Detect clipped [lo, hi] range for nested MIN-MAX operations on a clippee,
+// such as MIN(hi, MAX(lo, clippee)) for an arbitrary clippee expression.
+// Example: MIN(10, MIN(20, MAX(0, x))) yields [0, 10] with clippee x.
+static HInstruction* FindClippee(HInstruction* instruction,
+                                 /*out*/ int64_t* lo,
+                                 /*out*/ int64_t* hi) {
+  // Iterate into MIN(.., c)-MAX(.., c) expressions and 'tighten' the range [lo, hi].
+  while (instruction->IsMin() || instruction->IsMax()) {
+    HBinaryOperation* min_max = instruction->AsBinaryOperation();
+    DCHECK(min_max->GetType() == DataType::Type::kInt32 ||
+           min_max->GetType() == DataType::Type::kInt64);
+    // Process the constant.
+    HConstant* right = min_max->GetConstantRight();
+    if (right == nullptr) {
+      break;
+    } else if (instruction->IsMin()) {
+      *hi = std::min(*hi, Int64FromConstant(right));
+    } else {
+      *lo = std::max(*lo, Int64FromConstant(right));
     }
+    instruction = min_max->GetLeastConstantLeft();
+  }
+  // Iteration ends in any other expression (possibly MIN/MAX without constant).
+  // This leaf expression is the clippee with range [lo, hi].
+  return instruction;
+}
+
+// Set value range for type (or fail).
+static bool CanSetRange(DataType::Type type,
+                        /*out*/ int64_t* uhi,
+                        /*out*/ int64_t* slo,
+                        /*out*/ int64_t* shi) {
+  if (DataType::Size(type) == 1) {
+    *uhi = std::numeric_limits<uint8_t>::max();
+    *slo = std::numeric_limits<int8_t>::min();
+    *shi = std::numeric_limits<int8_t>::max();
+    return true;
+  } else if (DataType::Size(type) == 2) {
+    *uhi = std::numeric_limits<uint16_t>::max();
+    *slo = std::numeric_limits<int16_t>::min();
+    *shi = std::numeric_limits<int16_t>::max();
+    return true;
   }
   return false;
 }
 
+// Accept various saturated addition forms.
+static bool IsSaturatedAdd(HInstruction* a,
+                           HInstruction* b,
+                           DataType::Type type,
+                           int64_t lo,
+                           int64_t hi,
+                           bool is_unsigned) {
+  int64_t ulo = 0, uhi = 0, slo = 0, shi = 0;
+  if (!CanSetRange(type, &uhi, &slo, &shi)) {
+    return false;
+  }
+  // Tighten the range for signed single clipping on constant.
+  if (!is_unsigned) {
+    int64_t c = 0;
+    if (IsInt64AndGet(a, &c) || IsInt64AndGet(b, &c)) {
+      // For c in proper range and narrower operand r:
+      //    MIN(r + c,  127) c > 0
+      // or MAX(r + c, -128) c < 0 (and possibly redundant bound).
+      if (0 < c && c <= shi && hi == shi) {
+        if (lo <= (slo + c)) {
+          return true;
+        }
+      } else if (slo <= c && c < 0 && lo == slo) {
+        if (hi >= (shi + c)) {
+          return true;
+        }
+      }
+    }
+  }
+  // Detect for narrower operands r and s:
+  //     MIN(r + s, 255)        => SAT_ADD_unsigned
+  // MAX(MIN(r + s, 127), -128) => SAT_ADD_signed.
+  return is_unsigned ? (lo <= ulo && hi == uhi) : (lo == slo && hi == shi);
+}
+
+// Accept various saturated subtraction forms.
+static bool IsSaturatedSub(HInstruction* a,
+                           DataType::Type type,
+                           int64_t lo,
+                           int64_t hi,
+                           bool is_unsigned) {
+  int64_t ulo = 0, uhi = 0, slo = 0, shi = 0;
+  if (!CanSetRange(type, &uhi, &slo, &shi)) {
+    return false;
+  }
+  // Tighten the range for signed single clipping on constant.
+  if (!is_unsigned) {
+    int64_t c = 0;
+    if (IsInt64AndGet(a, /*out*/ &c)) {
+      // For c in proper range and narrower operand r:
+      //    MIN(c - r,  127) c > 0
+      // or MAX(c - r, -128) c < 0 (and possibly redundant bound).
+      if (0 < c && c <= shi && hi == shi) {
+        if (lo <= (c - shi)) {
+          return true;
+        }
+      } else if (slo <= c && c < 0 && lo == slo) {
+        if (hi >= (c - slo)) {
+          return true;
+        }
+      }
+    }
+  }
+  // Detect for narrower operands r and s:
+  //     MAX(r - s, 0)          => SAT_SUB_unsigned
+  // MIN(MAX(r - s, -128), 127) => SAT_ADD_signed.
+  return is_unsigned ? (lo == ulo && hi >= uhi) : (lo == slo && hi == shi);
+}
+
 // Detect reductions of the following forms,
 //   x = x_phi + ..
 //   x = x_phi - ..
-//   x = max(x_phi, ..)
 //   x = min(x_phi, ..)
+//   x = max(x_phi, ..)
 static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
-  if (reduction->IsAdd()) {
+  if (reduction->IsAdd() || reduction->IsMin() || reduction->IsMax()) {
     return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
            (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
   } else if (reduction->IsSub()) {
     return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi);
-  } else if (reduction->IsInvokeStaticOrDirect()) {
-    switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) {
-      case Intrinsics::kMathMinIntInt:
-      case Intrinsics::kMathMinLongLong:
-      case Intrinsics::kMathMinFloatFloat:
-      case Intrinsics::kMathMinDoubleDouble:
-      case Intrinsics::kMathMaxIntInt:
-      case Intrinsics::kMathMaxLongLong:
-      case Intrinsics::kMathMaxFloatFloat:
-      case Intrinsics::kMathMaxDoubleDouble:
-        return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
-               (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
-      default:
-        return false;
-    }
   }
   return false;
 }
@@ -401,6 +533,43 @@ static bool CheckInductionSetFullyRemoved(ScopedArenaSet<HInstruction*>* iset) {
   return true;
 }
 
+// Tries to statically evaluate condition of the specified "HIf" for other condition checks.
+static void TryToEvaluateIfCondition(HIf* instruction, HGraph* graph) {
+  HInstruction* cond = instruction->InputAt(0);
+
+  // If a condition 'cond' is evaluated in an HIf instruction then in the successors of the
+  // IF_BLOCK we statically know the value of the condition 'cond' (TRUE in TRUE_SUCC, FALSE in
+  // FALSE_SUCC). Using that we can replace another evaluation (use) EVAL of the same 'cond'
+  // with TRUE value (FALSE value) if every path from the ENTRY_BLOCK to EVAL_BLOCK contains the
+  // edge HIF_BLOCK->TRUE_SUCC (HIF_BLOCK->FALSE_SUCC).
+  //     if (cond) {               if(cond) {
+  //       if (cond) {}              if (1) {}
+  //     } else {        =======>  } else {
+  //       if (cond) {}              if (0) {}
+  //     }                         }
+  if (!cond->IsConstant()) {
+    HBasicBlock* true_succ = instruction->IfTrueSuccessor();
+    HBasicBlock* false_succ = instruction->IfFalseSuccessor();
+
+    DCHECK_EQ(true_succ->GetPredecessors().size(), 1u);
+    DCHECK_EQ(false_succ->GetPredecessors().size(), 1u);
+
+    const HUseList<HInstruction*>& uses = cond->GetUses();
+    for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+      HInstruction* user = it->GetUser();
+      size_t index = it->GetIndex();
+      HBasicBlock* user_block = user->GetBlock();
+      // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+      ++it;
+      if (true_succ->Dominates(user_block)) {
+        user->ReplaceInput(graph->GetIntConstant(1), index);
+     } else if (false_succ->Dominates(user_block)) {
+        user->ReplaceInput(graph->GetIntConstant(0), index);
+      }
+    }
+  }
+}
+
 //
 // Public methods.
 //
@@ -432,7 +601,11 @@ HLoopOptimization::HLoopOptimization(HGraph* graph,
       vector_preheader_(nullptr),
       vector_header_(nullptr),
       vector_body_(nullptr),
-      vector_index_(nullptr) {
+      vector_index_(nullptr),
+      arch_loop_helper_(ArchDefaultLoopHelper::Create(compiler_driver_ != nullptr
+                                                          ? compiler_driver_->GetInstructionSet()
+                                                          : InstructionSet::kNone,
+                                                      global_allocator_)) {
 }
 
 void HLoopOptimization::Run() {
@@ -643,7 +816,7 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
   }
 }
 
-bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
+bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
   HBasicBlock* header = node->loop_info->GetHeader();
   HBasicBlock* preheader = node->loop_info->GetPreHeader();
   // Ensure loop header logic is finite.
@@ -713,6 +886,103 @@ bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
   return false;
 }
 
+bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
+  return TryOptimizeInnerLoopFinite(node) ||
+         TryPeelingForLoopInvariantExitsElimination(node) ||
+         TryUnrollingForBranchPenaltyReduction(node);
+}
+
+
+
+//
+// Loop unrolling: generic part methods.
+//
+
+bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) {
+  // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests)
+  // as InstructionSet is needed.
+  if (!kEnableScalarPeelingUnrolling || compiler_driver_ == nullptr) {
+    return false;
+  }
+
+  HLoopInformation* loop_info = node->loop_info;
+  int64_t trip_count = 0;
+  // Only unroll loops with a known tripcount.
+  if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) {
+    return false;
+  }
+
+  uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(loop_info, trip_count);
+  if (unrolling_factor == kNoUnrollingFactor) {
+    return false;
+  }
+
+  LoopAnalysisInfo loop_analysis_info(loop_info);
+  LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
+
+  // Check "IsLoopClonable" last as it can be time-consuming.
+  if (arch_loop_helper_->IsLoopTooBigForScalarPeelingUnrolling(&loop_analysis_info) ||
+      (loop_analysis_info.GetNumberOfExits() > 1) ||
+      loop_analysis_info.HasInstructionsPreventingScalarUnrolling() ||
+      !PeelUnrollHelper::IsLoopClonable(loop_info)) {
+    return false;
+  }
+
+  // TODO: support other unrolling factors.
+  DCHECK_EQ(unrolling_factor, 2u);
+
+  // Perform unrolling.
+  PeelUnrollSimpleHelper helper(loop_info);
+  helper.DoUnrolling();
+
+  // Remove the redundant loop check after unrolling.
+  HIf* copy_hif =
+      helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
+  int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0;
+  copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
+
+  return true;
+}
+
+bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* node) {
+  // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests)
+  // as InstructionSet is needed.
+  if (!kEnableScalarPeelingUnrolling || compiler_driver_ == nullptr) {
+    return false;
+  }
+
+  HLoopInformation* loop_info = node->loop_info;
+  // Check 'IsLoopClonable' the last as it might be time-consuming.
+  if (!arch_loop_helper_->IsLoopPeelingEnabled()) {
+    return false;
+  }
+
+  LoopAnalysisInfo loop_analysis_info(loop_info);
+  LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
+
+  // Check "IsLoopClonable" last as it can be time-consuming.
+  if (arch_loop_helper_->IsLoopTooBigForScalarPeelingUnrolling(&loop_analysis_info) ||
+      loop_analysis_info.HasInstructionsPreventingScalarPeeling() ||
+      !LoopAnalysis::HasLoopAtLeastOneInvariantExit(loop_info) ||
+      !PeelUnrollHelper::IsLoopClonable(loop_info)) {
+    return false;
+  }
+
+  // Perform peeling.
+  PeelUnrollSimpleHelper helper(loop_info);
+  helper.DoPeeling();
+
+  const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap();
+  for (auto entry : *hir_map) {
+    HInstruction* copy = entry.second;
+    if (copy->IsIf()) {
+      TryToEvaluateIfCondition(copy->AsIf(), graph_);
+    }
+  }
+
+  return true;
+}
+
 //
 // Loop vectorization. The implementation is based on the book by Aart J.C. Bik:
 // "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance."
@@ -843,7 +1113,8 @@ void HLoopOptimization::Vectorize(LoopNode* node,
   HBasicBlock* preheader = node->loop_info->GetPreHeader();
 
   // Pick a loop unrolling factor for the vector loop.
-  uint32_t unroll = GetUnrollingFactor(block, trip_count);
+  uint32_t unroll = arch_loop_helper_->GetSIMDUnrollingFactor(
+      block, trip_count, MaxNumberPeeled(), vector_length_);
   uint32_t chunk = vector_length_ * unroll;
 
   DCHECK(trip_count == 0 || (trip_count >= MaxNumberPeeled() + chunk));
@@ -1082,6 +1353,11 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
     HInstruction* index = instruction->InputAt(1);
     HInstruction* value = instruction->InputAt(2);
     HInstruction* offset = nullptr;
+    // For narrow types, explicit type conversion may have been
+    // optimized way, so set the no hi bits restriction here.
+    if (DataType::Size(type) <= 2) {
+      restrictions |= kNoHiBits;
+    }
     if (TrySetVectorType(type, &restrictions) &&
         node->loop_info->IsDefinedOutOfTheLoop(base) &&
         induction_range_.IsUnitStride(instruction, index, graph_, &offset) &&
@@ -1124,7 +1400,6 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
   return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite();
 }
 
-// TODO: saturation arithmetic.
 bool HLoopOptimization::VectorizeUse(LoopNode* node,
                                      HInstruction* instruction,
                                      bool generate_code,
@@ -1297,80 +1572,62 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
         return true;
       }
     }
-  } else if (instruction->IsInvokeStaticOrDirect()) {
-    // Accept particular intrinsics.
-    HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect();
-    switch (invoke->GetIntrinsic()) {
-      case Intrinsics::kMathAbsInt:
-      case Intrinsics::kMathAbsLong:
-      case Intrinsics::kMathAbsFloat:
-      case Intrinsics::kMathAbsDouble: {
-        // Deal with vector restrictions.
-        HInstruction* opa = instruction->InputAt(0);
-        HInstruction* r = opa;
-        bool is_unsigned = false;
-        if (HasVectorRestrictions(restrictions, kNoAbs)) {
-          return false;
-        } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
-                   (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
-          return false;  // reject, unless operand is sign-extension narrower
-        }
-        // Accept ABS(x) for vectorizable operand.
-        DCHECK(r != nullptr);
-        if (generate_code && vector_mode_ != kVector) {  // de-idiom
-          r = opa;
-        }
-        if (VectorizeUse(node, r, generate_code, type, restrictions)) {
-          if (generate_code) {
-            GenerateVecOp(instruction,
-                          vector_map_->Get(r),
-                          nullptr,
-                          HVecOperation::ToProperType(type, is_unsigned));
-          }
-          return true;
-        }
-        return false;
+  } else if (instruction->IsAbs()) {
+    // Deal with vector restrictions.
+    HInstruction* opa = instruction->InputAt(0);
+    HInstruction* r = opa;
+    bool is_unsigned = false;
+    if (HasVectorRestrictions(restrictions, kNoAbs)) {
+      return false;
+    } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+               (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
+      return false;  // reject, unless operand is sign-extension narrower
+    }
+    // Accept ABS(x) for vectorizable operand.
+    DCHECK(r != nullptr);
+    if (generate_code && vector_mode_ != kVector) {  // de-idiom
+      r = opa;
+    }
+    if (VectorizeUse(node, r, generate_code, type, restrictions)) {
+      if (generate_code) {
+        GenerateVecOp(instruction,
+                      vector_map_->Get(r),
+                      nullptr,
+                      HVecOperation::ToProperType(type, is_unsigned));
       }
-      case Intrinsics::kMathMinIntInt:
-      case Intrinsics::kMathMinLongLong:
-      case Intrinsics::kMathMinFloatFloat:
-      case Intrinsics::kMathMinDoubleDouble:
-      case Intrinsics::kMathMaxIntInt:
-      case Intrinsics::kMathMaxLongLong:
-      case Intrinsics::kMathMaxFloatFloat:
-      case Intrinsics::kMathMaxDoubleDouble: {
-        // Deal with vector restrictions.
-        HInstruction* opa = instruction->InputAt(0);
-        HInstruction* opb = instruction->InputAt(1);
-        HInstruction* r = opa;
-        HInstruction* s = opb;
-        bool is_unsigned = false;
-        if (HasVectorRestrictions(restrictions, kNoMinMax)) {
-          return false;
-        } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
-                   !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
-          return false;  // reject, unless all operands are same-extension narrower
-        }
-        // Accept MIN/MAX(x, y) for vectorizable operands.
-        DCHECK(r != nullptr);
-        DCHECK(s != nullptr);
-        if (generate_code && vector_mode_ != kVector) {  // de-idiom
-          r = opa;
-          s = opb;
-        }
-        if (VectorizeUse(node, r, generate_code, type, restrictions) &&
-            VectorizeUse(node, s, generate_code, type, restrictions)) {
-          if (generate_code) {
-            GenerateVecOp(
-                instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
-          }
-          return true;
-        }
-        return false;
+      return true;
+    }
+  } else if (instruction->IsMin() || instruction->IsMax()) {
+    // Recognize saturation arithmetic.
+    if (VectorizeSaturationIdiom(node, instruction, generate_code, type, restrictions)) {
+      return true;
+    }
+    // Deal with vector restrictions.
+    HInstruction* opa = instruction->InputAt(0);
+    HInstruction* opb = instruction->InputAt(1);
+    HInstruction* r = opa;
+    HInstruction* s = opb;
+    bool is_unsigned = false;
+    if (HasVectorRestrictions(restrictions, kNoMinMax)) {
+      return false;
+    } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+               !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
+      return false;  // reject, unless all operands are same-extension narrower
+    }
+    // Accept MIN/MAX(x, y) for vectorizable operands.
+    DCHECK(r != nullptr && s != nullptr);
+    if (generate_code && vector_mode_ != kVector) {  // de-idiom
+      r = opa;
+      s = opb;
+    }
+    if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+        VectorizeUse(node, s, generate_code, type, restrictions)) {
+      if (generate_code) {
+        GenerateVecOp(
+            instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
       }
-      default:
-        return false;
-    }  // switch
+      return true;
+    }
   }
   return false;
 }
@@ -1475,11 +1732,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
           case DataType::Type::kBool:
           case DataType::Type::kUint8:
           case DataType::Type::kInt8:
-            *restrictions |= kNoDiv;
+            *restrictions |= kNoDiv | kNoSaturation;
             return TrySetVectorLength(16);
           case DataType::Type::kUint16:
           case DataType::Type::kInt16:
-            *restrictions |= kNoDiv | kNoStringCharAt;
+            *restrictions |= kNoDiv | kNoSaturation | kNoStringCharAt;
             return TrySetVectorLength(8);
           case DataType::Type::kInt32:
             *restrictions |= kNoDiv;
@@ -1504,11 +1761,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
           case DataType::Type::kBool:
           case DataType::Type::kUint8:
           case DataType::Type::kInt8:
-            *restrictions |= kNoDiv;
+            *restrictions |= kNoDiv | kNoSaturation;
             return TrySetVectorLength(16);
           case DataType::Type::kUint16:
           case DataType::Type::kInt16:
-            *restrictions |= kNoDiv | kNoStringCharAt;
+            *restrictions |= kNoDiv | kNoSaturation | kNoStringCharAt;
             return TrySetVectorLength(8);
           case DataType::Type::kInt32:
             *restrictions |= kNoDiv;
@@ -1811,83 +2068,29 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
       GENERATE_VEC(
         new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_, dex_pc),
         new (global_allocator_) HUShr(org_type, opa, opb, dex_pc));
-    case HInstruction::kInvokeStaticOrDirect: {
-      HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect();
-      if (vector_mode_ == kVector) {
-        switch (invoke->GetIntrinsic()) {
-          case Intrinsics::kMathAbsInt:
-          case Intrinsics::kMathAbsLong:
-          case Intrinsics::kMathAbsFloat:
-          case Intrinsics::kMathAbsDouble:
-            DCHECK(opb == nullptr);
-            vector = new (global_allocator_)
-                HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc);
-            break;
-          case Intrinsics::kMathMinIntInt:
-          case Intrinsics::kMathMinLongLong:
-          case Intrinsics::kMathMinFloatFloat:
-          case Intrinsics::kMathMinDoubleDouble: {
-            vector = new (global_allocator_)
-                HVecMin(global_allocator_,
-                        opa,
-                        opb,
-                        HVecOperation::ToProperType(type, is_unsigned),
-                        vector_length_,
-                        dex_pc);
-            break;
-          }
-          case Intrinsics::kMathMaxIntInt:
-          case Intrinsics::kMathMaxLongLong:
-          case Intrinsics::kMathMaxFloatFloat:
-          case Intrinsics::kMathMaxDoubleDouble: {
-            vector = new (global_allocator_)
-                HVecMax(global_allocator_,
-                        opa,
-                        opb,
-                        HVecOperation::ToProperType(type, is_unsigned),
-                        vector_length_,
-                        dex_pc);
-            break;
-          }
-          default:
-            LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId();
-            UNREACHABLE();
-        }  // switch invoke
-      } else {
-        // In scalar code, simply clone the method invoke, and replace its operands with the
-        // corresponding new scalar instructions in the loop. The instruction will get an
-        // environment while being inserted from the instruction map in original program order.
-        DCHECK(vector_mode_ == kSequential);
-        size_t num_args = invoke->GetNumberOfArguments();
-        HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect(
-            global_allocator_,
-            num_args,
-            invoke->GetType(),
-            invoke->GetDexPc(),
-            invoke->GetDexMethodIndex(),
-            invoke->GetResolvedMethod(),
-            invoke->GetDispatchInfo(),
-            invoke->GetInvokeType(),
-            invoke->GetTargetMethod(),
-            invoke->GetClinitCheckRequirement());
-        HInputsRef inputs = invoke->GetInputs();
-        size_t num_inputs = inputs.size();
-        DCHECK_LE(num_args, num_inputs);
-        DCHECK_EQ(num_inputs, new_invoke->GetInputs().size());  // both invokes agree
-        for (size_t index = 0; index < num_inputs; ++index) {
-          HInstruction* new_input = index < num_args
-              ? vector_map_->Get(inputs[index])
-              : inputs[index];  // beyond arguments: just pass through
-          new_invoke->SetArgumentAt(index, new_input);
-        }
-        new_invoke->SetIntrinsic(invoke->GetIntrinsic(),
-                                 kNeedsEnvironmentOrCache,
-                                 kNoSideEffects,
-                                 kNoThrow);
-        vector = new_invoke;
-      }
-      break;
-    }
+    case HInstruction::kMin:
+      GENERATE_VEC(
+        new (global_allocator_) HVecMin(global_allocator_,
+                                        opa,
+                                        opb,
+                                        HVecOperation::ToProperType(type, is_unsigned),
+                                        vector_length_,
+                                        dex_pc),
+        new (global_allocator_) HMin(org_type, opa, opb, dex_pc));
+    case HInstruction::kMax:
+      GENERATE_VEC(
+        new (global_allocator_) HVecMax(global_allocator_,
+                                        opa,
+                                        opb,
+                                        HVecOperation::ToProperType(type, is_unsigned),
+                                        vector_length_,
+                                        dex_pc),
+        new (global_allocator_) HMax(org_type, opa, opb, dex_pc));
+    case HInstruction::kAbs:
+      DCHECK(opb == nullptr);
+      GENERATE_VEC(
+        new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc),
+        new (global_allocator_) HAbs(org_type, opa, dex_pc));
     default:
       break;
   }  // switch
@@ -1901,6 +2104,79 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
 // Vectorization idioms.
 //
 
+// Method recognizes single and double clipping saturation arithmetic.
+bool HLoopOptimization::VectorizeSaturationIdiom(LoopNode* node,
+                                                 HInstruction* instruction,
+                                                 bool generate_code,
+                                                 DataType::Type type,
+                                                 uint64_t restrictions) {
+  // Deal with vector restrictions.
+  if (HasVectorRestrictions(restrictions, kNoSaturation)) {
+    return false;
+  }
+  // Restrict type (generalize if one day we generalize allowed MIN/MAX integral types).
+  if (instruction->GetType() != DataType::Type::kInt32 &&
+      instruction->GetType() != DataType::Type::kInt64) {
+    return false;
+  }
+  // Clipped addition or subtraction on narrower operands? We will try both
+  // formats since, e.g., x+c can be interpreted as x+c and x-(-c), depending
+  // on what clipping values are used, to get most benefits.
+  int64_t lo = std::numeric_limits<int64_t>::min();
+  int64_t hi = std::numeric_limits<int64_t>::max();
+  HInstruction* clippee = FindClippee(instruction, &lo, &hi);
+  HInstruction* a = nullptr;
+  HInstruction* b = nullptr;
+  HInstruction* r = nullptr;
+  HInstruction* s = nullptr;
+  bool is_unsigned = false;
+  bool is_add = true;
+  int64_t c = 0;
+  // First try for saturated addition.
+  if (IsAddConst2(graph_, clippee, /*out*/ &a, /*out*/ &b, /*out*/ &c) && c == 0 &&
+      IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned) &&
+      IsSaturatedAdd(r, s, type, lo, hi, is_unsigned)) {
+    is_add = true;
+  } else {
+    // Then try again for saturated subtraction.
+    a = b = r = s = nullptr;
+    if (IsSubConst2(graph_, clippee, /*out*/ &a, /*out*/ &b) &&
+        IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned) &&
+        IsSaturatedSub(r, type, lo, hi, is_unsigned)) {
+      is_add = false;
+    } else {
+      return false;
+    }
+  }
+  // Accept saturation idiom for vectorizable operands.
+  DCHECK(r != nullptr && s != nullptr);
+  if (generate_code && vector_mode_ != kVector) {  // de-idiom
+    r = instruction->InputAt(0);
+    s = instruction->InputAt(1);
+    restrictions &= ~(kNoHiBits | kNoMinMax);  // allow narrow MIN/MAX in seq
+  }
+  if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+      VectorizeUse(node, s, generate_code, type, restrictions)) {
+    if (generate_code) {
+      if (vector_mode_ == kVector) {
+        DataType::Type vtype = HVecOperation::ToProperType(type, is_unsigned);
+        HInstruction* op1 = vector_map_->Get(r);
+        HInstruction* op2 = vector_map_->Get(s);
+        vector_map_->Put(instruction, is_add
+          ? reinterpret_cast<HInstruction*>(new (global_allocator_) HVecSaturationAdd(
+              global_allocator_, op1, op2, vtype, vector_length_, kNoDexPc))
+          : reinterpret_cast<HInstruction*>(new (global_allocator_) HVecSaturationSub(
+              global_allocator_, op1, op2, vtype, vector_length_, kNoDexPc)));
+        MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
+      } else {
+        GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type);
+      }
+    }
+    return true;
+  }
+  return false;
+}
+
 // Method recognizes the following idioms:
 //   rounding  halving add (a + b + 1) >> 1 for unsigned/signed operands a, b
 //   truncated halving add (a + b)     >> 1 for unsigned/signed operands a, b
@@ -1924,8 +2200,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
     HInstruction* a = nullptr;
     HInstruction* b = nullptr;
     int64_t       c = 0;
-    if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
-      DCHECK(a != nullptr && b != nullptr);
+    if (IsAddConst2(graph_, instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
       // Accept c == 1 (rounded) or c == 0 (not rounded).
       bool is_rounded = false;
       if (c == 1) {
@@ -1947,8 +2222,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
       }
       // Accept recognized halving add for vectorizable operands. Vectorized code uses the
       // shorthand idiomatic operation. Sequential code uses the original scalar expressions.
-      DCHECK(r != nullptr);
-      DCHECK(s != nullptr);
+      DCHECK(r != nullptr && s != nullptr);
       if (generate_code && vector_mode_ != kVector) {  // de-idiom
         r = instruction->InputAt(0);
         s = instruction->InputAt(1);
@@ -1998,21 +2272,11 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
   HInstruction* v = instruction->InputAt(1);
   HInstruction* a = nullptr;
   HInstruction* b = nullptr;
-  if (v->IsInvokeStaticOrDirect() &&
-       (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt ||
-        v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) {
-    HInstruction* x = v->InputAt(0);
-    if (x->GetType() == reduction_type) {
-      int64_t c = 0;
-      if (x->IsSub()) {
-        a = x->InputAt(0);
-        b = x->InputAt(1);
-      } else if (IsAddConst(x, /*out*/ &a, /*out*/ &c)) {
-        b = graph_->GetConstant(reduction_type, -c);  // hidden SUB!
-      }
-    }
-  }
-  if (a == nullptr || b == nullptr) {
+  if (v->IsAbs() &&
+      v->GetType() == reduction_type &&
+      IsSubConst2(graph_, v->InputAt(0), /*out*/ &a, /*out*/ &b)) {
+    DCHECK(a != nullptr && b != nullptr);
+  } else {
     return false;
   }
   // Accept same-type or consistent sign extension for narrower-type on operands a and b.
@@ -2045,8 +2309,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
   }
   // Accept SAD idiom for vectorizable operands. Vectorized code uses the shorthand
   // idiomatic operation. Sequential code uses the original scalar expressions.
-  DCHECK(r != nullptr);
-  DCHECK(s != nullptr);
+  DCHECK(r != nullptr && s != nullptr);
   if (generate_code && vector_mode_ != kVector) {  // de-idiom
     r = s = v->InputAt(0);
   }
@@ -2054,14 +2317,13 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
       VectorizeUse(node, r, generate_code, sub_type, restrictions) &&
       VectorizeUse(node, s, generate_code, sub_type, restrictions)) {
     if (generate_code) {
-      reduction_type = HVecOperation::ToProperType(reduction_type, is_unsigned);
       if (vector_mode_ == kVector) {
         vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate(
             global_allocator_,
             vector_map_->Get(q),
             vector_map_->Get(r),
             vector_map_->Get(s),
-            reduction_type,
+            HVecOperation::ToProperType(reduction_type, is_unsigned),
             GetOtherVL(reduction_type, sub_type, vector_length_),
             kNoDexPc));
         MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
@@ -2134,41 +2396,6 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) {
   return true;
 }
 
-static constexpr uint32_t ARM64_SIMD_MAXIMUM_UNROLL_FACTOR = 8;
-static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50;
-
-uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) {
-  uint32_t max_peel = MaxNumberPeeled();
-  switch (compiler_driver_->GetInstructionSet()) {
-    case InstructionSet::kArm64: {
-      // Don't unroll with insufficient iterations.
-      // TODO: Unroll loops with unknown trip count.
-      DCHECK_NE(vector_length_, 0u);
-      if (trip_count < (2 * vector_length_ + max_peel)) {
-        return kNoUnrollingFactor;
-      }
-      // Don't unroll for large loop body size.
-      uint32_t instruction_count = block->GetInstructions().CountSize();
-      if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) {
-        return kNoUnrollingFactor;
-      }
-      // Find a beneficial unroll factor with the following restrictions:
-      //  - At least one iteration of the transformed loop should be executed.
-      //  - The loop body shouldn't be "too big" (heuristic).
-      uint32_t uf1 = ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE / instruction_count;
-      uint32_t uf2 = (trip_count - max_peel) / vector_length_;
-      uint32_t unroll_factor =
-          TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR}));
-      DCHECK_GE(unroll_factor, 1u);
-      return unroll_factor;
-    }
-    case InstructionSet::kX86:
-    case InstructionSet::kX86_64:
-    default:
-      return kNoUnrollingFactor;
-  }
-}
-
 //
 // Helpers.
 //
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index a707ad13580..f9a31a34d40 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -20,12 +20,15 @@
 #include "base/scoped_arena_allocator.h"
 #include "base/scoped_arena_containers.h"
 #include "induction_var_range.h"
+#include "loop_analysis.h"
 #include "nodes.h"
 #include "optimization.h"
+#include "superblock_cloner.h"
 
 namespace art {
 
 class CompilerDriver;
+class ArchDefaultLoopHelper;
 
 /**
  * Loop optimizations. Builds a loop hierarchy and applies optimizations to
@@ -80,6 +83,7 @@ class HLoopOptimization : public HOptimization {
     kNoReduction     = 1 << 10,  // no reduction
     kNoSAD           = 1 << 11,  // no sum of absolute differences (SAD)
     kNoWideSAD       = 1 << 12,  // no sum of absolute differences (SAD) with operand widening
+    kNoSaturation    = 1 << 13,  // no saturation arithmetic
   };
 
   /*
@@ -134,10 +138,21 @@ class HLoopOptimization : public HOptimization {
   void SimplifyInduction(LoopNode* node);
   void SimplifyBlocks(LoopNode* node);
 
-  // Performs optimizations specific to inner loop (empty loop removal,
+  // Performs optimizations specific to inner loop with finite header logic (empty loop removal,
   // unrolling, vectorization). Returns true if anything changed.
+  bool TryOptimizeInnerLoopFinite(LoopNode* node);
+
+  // Performs optimizations specific to inner loop. Returns true if anything changed.
   bool OptimizeInnerLoop(LoopNode* node);
 
+  // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling
+  // opportunities. Returns whether transformation happened.
+  bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node);
+
+  // Tries to apply loop peeling for loop invariant exits elimination. Returns whether
+  // transformation happened.
+  bool TryPeelingForLoopInvariantExitsElimination(LoopNode* loop_node);
+
   //
   // Vectorization analysis and synthesis.
   //
@@ -177,6 +192,11 @@ class HLoopOptimization : public HOptimization {
                      bool is_unsigned = false);
 
   // Vectorization idioms.
+  bool VectorizeSaturationIdiom(LoopNode* node,
+                                HInstruction* instruction,
+                                bool generate_code,
+                                DataType::Type type,
+                                uint64_t restrictions);
   bool VectorizeHalvingAddIdiom(LoopNode* node,
                                 HInstruction* instruction,
                                 bool generate_code,
@@ -197,7 +217,6 @@ class HLoopOptimization : public HOptimization {
                             const ArrayReference* peeling_candidate);
   uint32_t MaxNumberPeeled();
   bool IsVectorizationProfitable(int64_t trip_count);
-  uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
 
   //
   // Helpers.
@@ -291,6 +310,9 @@ class HLoopOptimization : public HOptimization {
   HBasicBlock* vector_body_;  // body of the new loop
   HInstruction* vector_index_;  // normalized index of the new loop
 
+  // Helper for target-specific behaviour for loop optimizations.
+  ArchDefaultLoopHelper* arch_loop_helper_;
+
   friend class LoopOptimizationTest;
 
   DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index db8368986c9..c21bd65d97e 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -227,11 +227,14 @@ TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) {
   graph_->ClearDominanceInformation();
   graph_->BuildDominatorTree();
 
+  // BuildDominatorTree inserts a block beetween loop header and entry block.
+  EXPECT_EQ(header->GetPredecessors()[0]->GetSinglePredecessor(), entry_block_);
+
   // Check that after optimizations in BuildDominatorTree()/SimplifyCFG() phi inputs
   // are still mapped correctly to the block predecessors.
   for (size_t i = 0, e = phi->InputCount(); i < e; i++) {
     HInstruction* input = phi->InputAt(i);
-    ASSERT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i]));
+    EXPECT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i]));
   }
 }
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index f6ba19f22a8..f784f8f7f35 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2891,6 +2891,8 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind
       return os << "BootImageLinkTimePcRelative";
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       return os << "DirectAddress";
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo:
+      return os << "BootImageRelRo";
     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry:
       return os << "BssEntry";
     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall:
@@ -2925,7 +2927,7 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const {
   }
   switch (GetLoadKind()) {
     case LoadKind::kBootImageAddress:
-    case LoadKind::kBootImageClassTable:
+    case LoadKind::kBootImageRelRo:
     case LoadKind::kJitTableAddress: {
       ScopedObjectAccess soa(Thread::Current());
       return GetClass().Get() == other_load_class->GetClass().Get();
@@ -2944,8 +2946,8 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) {
       return os << "BootImageLinkTimePcRelative";
     case HLoadClass::LoadKind::kBootImageAddress:
       return os << "BootImageAddress";
-    case HLoadClass::LoadKind::kBootImageClassTable:
-      return os << "BootImageClassTable";
+    case HLoadClass::LoadKind::kBootImageRelRo:
+      return os << "BootImageRelRo";
     case HLoadClass::LoadKind::kBssEntry:
       return os << "BssEntry";
     case HLoadClass::LoadKind::kJitTableAddress:
@@ -2968,7 +2970,7 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const {
   }
   switch (GetLoadKind()) {
     case LoadKind::kBootImageAddress:
-    case LoadKind::kBootImageInternTable:
+    case LoadKind::kBootImageRelRo:
     case LoadKind::kJitTableAddress: {
       ScopedObjectAccess soa(Thread::Current());
       return GetString().Get() == other_load_string->GetString().Get();
@@ -2984,8 +2986,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) {
       return os << "BootImageLinkTimePcRelative";
     case HLoadString::LoadKind::kBootImageAddress:
       return os << "BootImageAddress";
-    case HLoadString::LoadKind::kBootImageInternTable:
-      return os << "BootImageInternTable";
+    case HLoadString::LoadKind::kBootImageRelRo:
+      return os << "BootImageRelRo";
     case HLoadString::LoadKind::kBssEntry:
       return os << "BssEntry";
     case HLoadString::LoadKind::kJitTableAddress:
@@ -3101,6 +3103,8 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) {
       return os << "array_object_check";
     case TypeCheckKind::kArrayCheck:
       return os << "array_check";
+    case TypeCheckKind::kBitstringCheck:
+      return os << "bitstring_check";
     default:
       LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs);
       UNREACHABLE();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index fe992a7f399..79d733060b3 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1338,6 +1338,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
 #define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M)                         \
   M(Above, Condition)                                                   \
   M(AboveOrEqual, Condition)                                            \
+  M(Abs, UnaryOperation)                                                \
   M(Add, BinaryOperation)                                               \
   M(And, BinaryOperation)                                               \
   M(ArrayGet, Instruction)                                              \
@@ -1383,7 +1384,9 @@ class HLoopInformationOutwardIterator : public ValueObject {
   M(LoadException, Instruction)                                         \
   M(LoadString, Instruction)                                            \
   M(LongConstant, Constant)                                             \
+  M(Max, Instruction)                                                   \
   M(MemoryBarrier, Instruction)                                         \
+  M(Min, BinaryOperation)                                               \
   M(MonitorOperation, Instruction)                                      \
   M(Mul, BinaryOperation)                                               \
   M(NativeDebugInfo, Instruction)                                       \
@@ -1437,6 +1440,8 @@ class HLoopInformationOutwardIterator : public ValueObject {
   M(VecAndNot, VecBinaryOperation)                                      \
   M(VecOr, VecBinaryOperation)                                          \
   M(VecXor, VecBinaryOperation)                                         \
+  M(VecSaturationAdd, VecBinaryOperation)                               \
+  M(VecSaturationSub, VecBinaryOperation)                               \
   M(VecShl, VecBinaryOperation)                                         \
   M(VecShr, VecBinaryOperation)                                         \
   M(VecUShr, VecBinaryOperation)                                        \
@@ -4428,6 +4433,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
     // Used for app->boot calls with non-relocatable image and for JIT-compiled calls.
     kDirectAddress,
 
+    // Load from an entry in the .data.bimg.rel.ro using a PC-relative load.
+    // Used for app->boot calls with relocatable image.
+    kBootImageRelRo,
+
     // Load from an entry in the .bss section using a PC-relative load.
     // Used for classes outside boot image when .bss is accessible with a PC-relative load.
     kBssEntry,
@@ -4560,6 +4569,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
   bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; }
   bool HasPcRelativeMethodLoadKind() const {
     return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative ||
+           GetMethodLoadKind() == MethodLoadKind::kBootImageRelRo ||
            GetMethodLoadKind() == MethodLoadKind::kBssEntry;
   }
   bool HasCurrentMethodInput() const {
@@ -5016,6 +5026,117 @@ class HRem FINAL : public HBinaryOperation {
   DEFAULT_COPY_CONSTRUCTOR(Rem);
 };
 
+class HMin FINAL : public HBinaryOperation {
+ public:
+  HMin(DataType::Type result_type,
+       HInstruction* left,
+       HInstruction* right,
+       uint32_t dex_pc)
+      : HBinaryOperation(kMin, result_type, left, right, SideEffects::None(), dex_pc) {}
+
+  bool IsCommutative() const OVERRIDE { return true; }
+
+  // Evaluation for integral values.
+  template <typename T> static T ComputeIntegral(T x, T y) {
+    return (x <= y) ? x : y;
+  }
+
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  // TODO: Evaluation for floating-point values.
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+
+  DECLARE_INSTRUCTION(Min);
+
+ protected:
+  DEFAULT_COPY_CONSTRUCTOR(Min);
+};
+
+class HMax FINAL : public HBinaryOperation {
+ public:
+  HMax(DataType::Type result_type,
+       HInstruction* left,
+       HInstruction* right,
+       uint32_t dex_pc)
+      : HBinaryOperation(kMax, result_type, left, right, SideEffects::None(), dex_pc) {}
+
+  bool IsCommutative() const OVERRIDE { return true; }
+
+  // Evaluation for integral values.
+  template <typename T> static T ComputeIntegral(T x, T y) {
+    return (x >= y) ? x : y;
+  }
+
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  // TODO: Evaluation for floating-point values.
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+
+  DECLARE_INSTRUCTION(Max);
+
+ protected:
+  DEFAULT_COPY_CONSTRUCTOR(Max);
+};
+
+class HAbs FINAL : public HUnaryOperation {
+ public:
+  HAbs(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
+      : HUnaryOperation(kAbs, result_type, input, dex_pc) {}
+
+  // Evaluation for integral values.
+  template <typename T> static T ComputeIntegral(T x) {
+    return x < 0 ? -x : x;
+  }
+
+  // Evaluation for floating-point values.
+  // Note, as a "quality of implementation", rather than pure "spec compliance",
+  // we require that Math.abs() clears the sign bit (but changes nothing else)
+  // for all floating-point numbers, including NaN (signaling NaN may become quiet though).
+  // http://b/30758343
+  template <typename T, typename S> static T ComputeFP(T x) {
+    S bits = bit_cast<S, T>(x);
+    return bit_cast<T, S>(bits & std::numeric_limits<S>::max());
+  }
+
+  HConstant* Evaluate(HIntConstant* x) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(ComputeIntegral(x->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(ComputeIntegral(x->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetFloatConstant(
+        ComputeFP<float, int32_t>(x->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HDoubleConstant* x) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetDoubleConstant(
+        ComputeFP<double, int64_t>(x->GetValue()), GetDexPc());
+  }
+
+  DECLARE_INSTRUCTION(Abs);
+
+ protected:
+  DEFAULT_COPY_CONSTRUCTOR(Abs);
+};
+
 class HDivZeroCheck FINAL : public HExpression<1> {
  public:
   // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException`
@@ -6025,12 +6146,12 @@ class HLoadClass FINAL : public HInstruction {
     kBootImageLinkTimePcRelative,
 
     // Use a known boot image Class* address, embedded in the code by the codegen.
-    // Used for boot image classes referenced by apps in AOT- and JIT-compiled code.
+    // Used for boot image classes referenced by apps in JIT- and AOT-compiled code (non-PIC).
     kBootImageAddress,
 
-    // Use a PC-relative load from a boot image ClassTable mmapped into the .bss
-    // of the oat file.
-    kBootImageClassTable,
+    // Load from an entry in the .data.bimg.rel.ro using a PC-relative load.
+    // Used for boot image classes referenced by apps in AOT-compiled code (PIC).
+    kBootImageRelRo,
 
     // Load from an entry in the .bss section using a PC-relative load.
     // Used for classes outside boot image when .bss is accessible with a PC-relative load.
@@ -6057,8 +6178,7 @@ class HLoadClass FINAL : public HInstruction {
         special_input_(HUserRecord<HInstruction*>(current_method)),
         type_index_(type_index),
         dex_file_(dex_file),
-        klass_(klass),
-        loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
+        klass_(klass) {
     // Referrers class should not need access check. We never inline unverified
     // methods so we can't possibly end up in this situation.
     DCHECK(!is_referrers_class || !needs_access_check);
@@ -6068,6 +6188,7 @@ class HLoadClass FINAL : public HInstruction {
     SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
     SetPackedFlag<kFlagIsInBootImage>(false);
     SetPackedFlag<kFlagGenerateClInitCheck>(false);
+    SetPackedFlag<kFlagValidLoadedClassRTI>(false);
   }
 
   bool IsClonable() const OVERRIDE { return true; }
@@ -6078,6 +6199,12 @@ class HLoadClass FINAL : public HInstruction {
     return GetPackedField<LoadKindField>();
   }
 
+  bool HasPcRelativeLoadKind() const {
+    return GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
+           GetLoadKind() == LoadKind::kBootImageRelRo ||
+           GetLoadKind() == LoadKind::kBssEntry;
+  }
+
   bool CanBeMoved() const OVERRIDE { return true; }
 
   bool InstructionDataEquals(const HInstruction* other) const;
@@ -6116,13 +6243,18 @@ class HLoadClass FINAL : public HInstruction {
   }
 
   ReferenceTypeInfo GetLoadedClassRTI() {
-    return loaded_class_rti_;
+    if (GetPackedFlag<kFlagValidLoadedClassRTI>()) {
+      // Note: The is_exact flag from the return value should not be used.
+      return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true);
+    } else {
+      return ReferenceTypeInfo::CreateInvalid();
+    }
   }
 
-  void SetLoadedClassRTI(ReferenceTypeInfo rti) {
-    // Make sure we only set exact types (the loaded class should never be merged).
-    DCHECK(rti.IsExact());
-    loaded_class_rti_ = rti;
+  // Loaded class RTI is marked as valid by RTP if the klass_ is admissible.
+  void SetValidLoadedClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(klass_ != nullptr);
+    SetPackedFlag<kFlagValidLoadedClassRTI>(true);
   }
 
   dex::TypeIndex GetTypeIndex() const { return type_index_; }
@@ -6175,14 +6307,14 @@ class HLoadClass FINAL : public HInstruction {
   static constexpr size_t kFieldLoadKind           = kFlagGenerateClInitCheck + 1;
   static constexpr size_t kFieldLoadKindSize =
       MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
-  static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize;
+  static constexpr size_t kFlagValidLoadedClassRTI = kFieldLoadKind + kFieldLoadKindSize;
+  static constexpr size_t kNumberOfLoadClassPackedBits = kFlagValidLoadedClassRTI + 1;
   static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields.");
   using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>;
 
   static bool HasTypeReference(LoadKind load_kind) {
     return load_kind == LoadKind::kReferrersClass ||
         load_kind == LoadKind::kBootImageLinkTimePcRelative ||
-        load_kind == LoadKind::kBootImageClassTable ||
         load_kind == LoadKind::kBssEntry ||
         load_kind == LoadKind::kRuntimeCall;
   }
@@ -6203,8 +6335,6 @@ class HLoadClass FINAL : public HInstruction {
   const DexFile& dex_file_;
 
   Handle<mirror::Class> klass_;
-
-  ReferenceTypeInfo loaded_class_rti_;
 };
 std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs);
 
@@ -6228,7 +6358,7 @@ inline void HLoadClass::AddSpecialInput(HInstruction* special_input) {
   // including literal pool loads, which are PC-relative too.
   DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
          GetLoadKind() == LoadKind::kBootImageAddress ||
-         GetLoadKind() == LoadKind::kBootImageClassTable ||
+         GetLoadKind() == LoadKind::kBootImageRelRo ||
          GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind();
   DCHECK(special_input_.GetInstruction() == nullptr);
   special_input_ = HUserRecord<HInstruction*>(special_input);
@@ -6244,12 +6374,12 @@ class HLoadString FINAL : public HInstruction {
     kBootImageLinkTimePcRelative,
 
     // Use a known boot image String* address, embedded in the code by the codegen.
-    // Used for boot image strings referenced by apps in AOT- and JIT-compiled code.
+    // Used for boot image strings referenced by apps in JIT- and AOT-compiled code (non-PIC).
     kBootImageAddress,
 
-    // Use a PC-relative load from a boot image InternTable mmapped into the .bss
-    // of the oat file.
-    kBootImageInternTable,
+    // Load from an entry in the .data.bimg.rel.ro using a PC-relative load.
+    // Used for boot image strings referenced by apps in AOT-compiled code (PIC).
+    kBootImageRelRo,
 
     // Load from an entry in the .bss section using a PC-relative load.
     // Used for strings outside boot image when .bss is accessible with a PC-relative load.
@@ -6284,6 +6414,12 @@ class HLoadString FINAL : public HInstruction {
     return GetPackedField<LoadKindField>();
   }
 
+  bool HasPcRelativeLoadKind() const {
+    return GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
+           GetLoadKind() == LoadKind::kBootImageRelRo ||
+           GetLoadKind() == LoadKind::kBssEntry;
+  }
+
   const DexFile& GetDexFile() const {
     return dex_file_;
   }
@@ -6312,7 +6448,7 @@ class HLoadString FINAL : public HInstruction {
     LoadKind load_kind = GetLoadKind();
     if (load_kind == LoadKind::kBootImageLinkTimePcRelative ||
         load_kind == LoadKind::kBootImageAddress ||
-        load_kind == LoadKind::kBootImageInternTable ||
+        load_kind == LoadKind::kBootImageRelRo ||
         load_kind == LoadKind::kJitTableAddress) {
       return false;
     }
@@ -6390,7 +6526,7 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
   // including literal pool loads, which are PC-relative too.
   DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
          GetLoadKind() == LoadKind::kBootImageAddress ||
-         GetLoadKind() == LoadKind::kBootImageInternTable ||
+         GetLoadKind() == LoadKind::kBootImageRelRo ||
          GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind();
   // HLoadString::GetInputRecords() returns an empty array at this point,
   // so use the GetInputRecords() from the base class to set the input record.
@@ -6750,72 +6886,159 @@ enum class TypeCheckKind {
   kInterfaceCheck,        // No optimization yet when checking against an interface.
   kArrayObjectCheck,      // Can just check if the array is not primitive.
   kArrayCheck,            // No optimization yet when checking against a generic array.
+  kBitstringCheck,        // Compare the type check bitstring.
   kLast = kArrayCheck
 };
 
 std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs);
 
-class HInstanceOf FINAL : public HExpression<2> {
+// Note: HTypeCheckInstruction is just a helper class, not an abstract instruction with an
+// `IsTypeCheckInstruction()`. (New virtual methods in the HInstruction class have a high cost.)
+class HTypeCheckInstruction : public HVariableInputSizeInstruction {
  public:
-  HInstanceOf(HInstruction* object,
-              HLoadClass* target_class,
-              TypeCheckKind check_kind,
-              uint32_t dex_pc)
-      : HExpression(kInstanceOf,
-                    DataType::Type::kBool,
-                    SideEffectsForArchRuntimeCalls(check_kind),
-                    dex_pc) {
+  HTypeCheckInstruction(InstructionKind kind,
+                        HInstruction* object,
+                        HInstruction* target_class_or_null,
+                        TypeCheckKind check_kind,
+                        Handle<mirror::Class> klass,
+                        uint32_t dex_pc,
+                        ArenaAllocator* allocator,
+                        HIntConstant* bitstring_path_to_root,
+                        HIntConstant* bitstring_mask,
+                        SideEffects side_effects)
+      : HVariableInputSizeInstruction(
+          kind,
+          side_effects,
+          dex_pc,
+          allocator,
+          /* number_of_inputs */ check_kind == TypeCheckKind::kBitstringCheck ? 4u : 2u,
+          kArenaAllocTypeCheckInputs),
+        klass_(klass) {
     SetPackedField<TypeCheckKindField>(check_kind);
     SetPackedFlag<kFlagMustDoNullCheck>(true);
+    SetPackedFlag<kFlagValidTargetClassRTI>(false);
     SetRawInputAt(0, object);
-    SetRawInputAt(1, target_class);
+    SetRawInputAt(1, target_class_or_null);
+    DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_path_to_root != nullptr);
+    DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_mask != nullptr);
+    if (check_kind == TypeCheckKind::kBitstringCheck) {
+      DCHECK(target_class_or_null->IsNullConstant());
+      SetRawInputAt(2, bitstring_path_to_root);
+      SetRawInputAt(3, bitstring_mask);
+    } else {
+      DCHECK(target_class_or_null->IsLoadClass());
+    }
   }
 
   HLoadClass* GetTargetClass() const {
+    DCHECK_NE(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
     HInstruction* load_class = InputAt(1);
     DCHECK(load_class->IsLoadClass());
     return load_class->AsLoadClass();
   }
 
-  bool IsClonable() const OVERRIDE { return true; }
-  bool CanBeMoved() const OVERRIDE { return true; }
+  uint32_t GetBitstringPathToRoot() const {
+    DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
+    HInstruction* path_to_root = InputAt(2);
+    DCHECK(path_to_root->IsIntConstant());
+    return static_cast<uint32_t>(path_to_root->AsIntConstant()->GetValue());
+  }
 
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
-    return true;
+  uint32_t GetBitstringMask() const {
+    DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
+    HInstruction* mask = InputAt(3);
+    DCHECK(mask->IsIntConstant());
+    return static_cast<uint32_t>(mask->AsIntConstant()->GetValue());
   }
 
-  bool NeedsEnvironment() const OVERRIDE {
-    return CanCallRuntime(GetTypeCheckKind());
+  bool IsClonable() const OVERRIDE { return true; }
+  bool CanBeMoved() const OVERRIDE { return true; }
+
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    DCHECK(other->IsInstanceOf() || other->IsCheckCast()) << other->DebugName();
+    return GetPackedFields() == down_cast<const HTypeCheckInstruction*>(other)->GetPackedFields();
   }
 
-  // Used only in code generation.
   bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }
   void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }
   TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }
   bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; }
 
-  static bool CanCallRuntime(TypeCheckKind check_kind) {
-    // Mips currently does runtime calls for any other checks.
-    return check_kind != TypeCheckKind::kExactCheck;
+  ReferenceTypeInfo GetTargetClassRTI() {
+    if (GetPackedFlag<kFlagValidTargetClassRTI>()) {
+      // Note: The is_exact flag from the return value should not be used.
+      return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true);
+    } else {
+      return ReferenceTypeInfo::CreateInvalid();
+    }
   }
 
-  static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) {
-    return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None();
+  // Target class RTI is marked as valid by RTP if the klass_ is admissible.
+  void SetValidTargetClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(klass_ != nullptr);
+    SetPackedFlag<kFlagValidTargetClassRTI>(true);
   }
 
-  DECLARE_INSTRUCTION(InstanceOf);
+  Handle<mirror::Class> GetClass() const {
+    return klass_;
+  }
 
  protected:
-  DEFAULT_COPY_CONSTRUCTOR(InstanceOf);
+  DEFAULT_COPY_CONSTRUCTOR(TypeCheckInstruction);
 
  private:
-  static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits;
+  static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits;
   static constexpr size_t kFieldTypeCheckKindSize =
       MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
   static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
-  static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1;
+  static constexpr size_t kFlagValidTargetClassRTI = kFlagMustDoNullCheck + 1;
+  static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagValidTargetClassRTI + 1;
   static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
   using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
+
+  Handle<mirror::Class> klass_;
+};
+
+class HInstanceOf FINAL : public HTypeCheckInstruction {
+ public:
+  HInstanceOf(HInstruction* object,
+              HInstruction* target_class_or_null,
+              TypeCheckKind check_kind,
+              Handle<mirror::Class> klass,
+              uint32_t dex_pc,
+              ArenaAllocator* allocator,
+              HIntConstant* bitstring_path_to_root,
+              HIntConstant* bitstring_mask)
+      : HTypeCheckInstruction(kInstanceOf,
+                              object,
+                              target_class_or_null,
+                              check_kind,
+                              klass,
+                              dex_pc,
+                              allocator,
+                              bitstring_path_to_root,
+                              bitstring_mask,
+                              SideEffectsForArchRuntimeCalls(check_kind)) {}
+
+  DataType::Type GetType() const OVERRIDE { return DataType::Type::kBool; }
+
+  bool NeedsEnvironment() const OVERRIDE {
+    return CanCallRuntime(GetTypeCheckKind());
+  }
+
+  static bool CanCallRuntime(TypeCheckKind check_kind) {
+    // Mips currently does runtime calls for any other checks.
+    return check_kind != TypeCheckKind::kExactCheck;
+  }
+
+  static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) {
+    return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None();
+  }
+
+  DECLARE_INSTRUCTION(InstanceOf);
+
+ protected:
+  DEFAULT_COPY_CONSTRUCTOR(InstanceOf);
 };
 
 class HBoundType FINAL : public HExpression<1> {
@@ -6865,31 +7088,26 @@ class HBoundType FINAL : public HExpression<1> {
   ReferenceTypeInfo upper_bound_;
 };
 
-class HCheckCast FINAL : public HTemplateInstruction<2> {
+class HCheckCast FINAL : public HTypeCheckInstruction {
  public:
   HCheckCast(HInstruction* object,
-             HLoadClass* target_class,
+             HInstruction* target_class_or_null,
              TypeCheckKind check_kind,
-             uint32_t dex_pc)
-      : HTemplateInstruction(kCheckCast, SideEffects::CanTriggerGC(), dex_pc) {
-    SetPackedField<TypeCheckKindField>(check_kind);
-    SetPackedFlag<kFlagMustDoNullCheck>(true);
-    SetRawInputAt(0, object);
-    SetRawInputAt(1, target_class);
-  }
-
-  HLoadClass* GetTargetClass() const {
-    HInstruction* load_class = InputAt(1);
-    DCHECK(load_class->IsLoadClass());
-    return load_class->AsLoadClass();
-  }
-
-  bool IsClonable() const OVERRIDE { return true; }
-  bool CanBeMoved() const OVERRIDE { return true; }
-
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
-    return true;
-  }
+             Handle<mirror::Class> klass,
+             uint32_t dex_pc,
+             ArenaAllocator* allocator,
+             HIntConstant* bitstring_path_to_root,
+             HIntConstant* bitstring_mask)
+      : HTypeCheckInstruction(kCheckCast,
+                              object,
+                              target_class_or_null,
+                              check_kind,
+                              klass,
+                              dex_pc,
+                              allocator,
+                              bitstring_path_to_root,
+                              bitstring_mask,
+                              SideEffects::CanTriggerGC()) {}
 
   bool NeedsEnvironment() const OVERRIDE {
     // Instruction may throw a CheckCastError.
@@ -6898,24 +7116,10 @@ class HCheckCast FINAL : public HTemplateInstruction<2> {
 
   bool CanThrow() const OVERRIDE { return true; }
 
-  bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }
-  void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }
-  TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }
-  bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; }
-
   DECLARE_INSTRUCTION(CheckCast);
 
  protected:
   DEFAULT_COPY_CONSTRUCTOR(CheckCast);
-
- private:
-  static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits;
-  static constexpr size_t kFieldTypeCheckKindSize =
-      MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
-  static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
-  static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1;
-  static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
-  using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
 };
 
 /**
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index d1eaf5c3666..1a484e1944f 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -328,7 +328,7 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation {
                       uint32_t dex_pc)
       : HVecUnaryOperation(
             kVecReplicateScalar, allocator, scalar, packed_type, vector_length, dex_pc) {
-    DCHECK(!scalar->IsVecOperation());
+    DCHECK(!ReturnsSIMDValue(scalar));
   }
 
   // A replicate needs to stay in place, since SIMD registers are not
@@ -533,6 +533,31 @@ class HVecAdd FINAL : public HVecBinaryOperation {
   DEFAULT_COPY_CONSTRUCTOR(VecAdd);
 };
 
+// Adds every component in the two vectors using saturation arithmetic,
+// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 +_sat y1, .. , xn +_sat yn ]
+// for either both signed or both unsigned operands x, y (reflected in packed_type).
+class HVecSaturationAdd FINAL : public HVecBinaryOperation {
+ public:
+  HVecSaturationAdd(ArenaAllocator* allocator,
+                    HInstruction* left,
+                    HInstruction* right,
+                    DataType::Type packed_type,
+                    size_t vector_length,
+                    uint32_t dex_pc)
+      : HVecBinaryOperation(
+          kVecSaturationAdd, allocator, left, right, packed_type, vector_length, dex_pc) {
+    DCHECK(HasConsistentPackedTypes(left, packed_type));
+    DCHECK(HasConsistentPackedTypes(right, packed_type));
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+
+  DECLARE_INSTRUCTION(VecSaturationAdd);
+
+ protected:
+  DEFAULT_COPY_CONSTRUCTOR(VecSaturationAdd);
+};
+
 // Performs halving add on every component in the two vectors, viz.
 // rounded   [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ]
 // truncated [ x1, .. , xn ] hadd  [ y1, .. , yn ] = [ (x1 + y1)     >> 1, .. , (xn + yn )    >> 1 ]
@@ -598,6 +623,31 @@ class HVecSub FINAL : public HVecBinaryOperation {
   DEFAULT_COPY_CONSTRUCTOR(VecSub);
 };
 
+// Subtracts every component in the two vectors using saturation arithmetic,
+// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 -_sat y1, .. , xn -_sat yn ]
+// for either both signed or both unsigned operands x, y (reflected in packed_type).
+class HVecSaturationSub FINAL : public HVecBinaryOperation {
+ public:
+  HVecSaturationSub(ArenaAllocator* allocator,
+                    HInstruction* left,
+                    HInstruction* right,
+                    DataType::Type packed_type,
+                    size_t vector_length,
+                    uint32_t dex_pc)
+      : HVecBinaryOperation(
+          kVecSaturationSub, allocator, left, right, packed_type, vector_length, dex_pc) {
+    DCHECK(HasConsistentPackedTypes(left, packed_type));
+    DCHECK(HasConsistentPackedTypes(right, packed_type));
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+
+  DECLARE_INSTRUCTION(VecSaturationSub);
+
+ protected:
+  DEFAULT_COPY_CONSTRUCTOR(VecSaturationSub);
+};
+
 // Multiplies every component in the two vectors,
 // viz. [ x1, .. , xn ] * [ y1, .. , yn ] = [ x1 * y1, .. , xn * yn ].
 class HVecMul FINAL : public HVecBinaryOperation {
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index d20b681b49a..2e189fdd141 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -105,15 +105,15 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
              const std::vector<uint8_t>& expected_asm,
              const std::vector<uint8_t>& expected_cfi) {
     // Get the outputs.
-    const std::vector<uint8_t>& actual_asm = code_allocator_.GetMemory();
+    ArrayRef<const uint8_t> actual_asm = code_allocator_.GetMemory();
     Assembler* opt_asm = code_gen_->GetAssembler();
-    const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data());
+    ArrayRef<const uint8_t> actual_cfi(*(opt_asm->cfi().data()));
 
     if (kGenerateExpected) {
       GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
     } else {
-      EXPECT_EQ(expected_asm, actual_asm);
-      EXPECT_EQ(expected_cfi, actual_cfi);
+      EXPECT_EQ(ArrayRef<const uint8_t>(expected_asm), actual_asm);
+      EXPECT_EQ(ArrayRef<const uint8_t>(expected_cfi), actual_cfi);
     }
   }
 
@@ -140,7 +140,7 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
       return memory_.data();
     }
 
-    const std::vector<uint8_t>& GetMemory() { return memory_; }
+    ArrayRef<const uint8_t> GetMemory() const OVERRIDE { return ArrayRef<const uint8_t>(memory_); }
 
    private:
     std::vector<uint8_t> memory_;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index e42dfc10ba5..cadefc3b015 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -75,22 +75,18 @@ static constexpr const char* kPassNameSeparator = "$";
 class CodeVectorAllocator FINAL : public CodeAllocator {
  public:
   explicit CodeVectorAllocator(ArenaAllocator* allocator)
-      : memory_(allocator->Adapter(kArenaAllocCodeBuffer)),
-        size_(0) {}
+      : memory_(allocator->Adapter(kArenaAllocCodeBuffer)) {}
 
   virtual uint8_t* Allocate(size_t size) {
-    size_ = size;
     memory_.resize(size);
     return &memory_[0];
   }
 
-  size_t GetSize() const { return size_; }
-  const ArenaVector<uint8_t>& GetMemory() const { return memory_; }
+  ArrayRef<const uint8_t> GetMemory() const OVERRIDE { return ArrayRef<const uint8_t>(memory_); }
   uint8_t* GetData() { return memory_.data(); }
 
  private:
   ArenaVector<uint8_t> memory_;
-  size_t size_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator);
 };
@@ -647,15 +643,13 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
   MaybeRunInliner(graph, codegen, dex_compilation_unit, pass_observer, handles);
 
   OptimizationDef optimizations2[] = {
-    // SelectGenerator depends on the InstructionSimplifier removing
-    // redundant suspend checks to recognize empty blocks.
+    OptDef(OptimizationPass::kSideEffectsAnalysis,   "side_effects$before_gvn"),
+    OptDef(OptimizationPass::kGlobalValueNumbering),
     OptDef(OptimizationPass::kSelectGenerator),
-    // TODO: if we don't inline we can also skip fold2.
     OptDef(OptimizationPass::kConstantFolding,       "constant_folding$after_inlining"),
     OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$after_inlining"),
     OptDef(OptimizationPass::kDeadCodeElimination,   "dead_code_elimination$after_inlining"),
-    OptDef(OptimizationPass::kSideEffectsAnalysis,   "side_effects$before_gvn"),
-    OptDef(OptimizationPass::kGlobalValueNumbering),
+    OptDef(OptimizationPass::kSideEffectsAnalysis,   "side_effects$before_licm"),
     OptDef(OptimizationPass::kInvariantCodeMotion),
     OptDef(OptimizationPass::kInductionVarAnalysis),
     OptDef(OptimizationPass::kBoundsCheckElimination),
@@ -719,7 +713,7 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       GetCompilerDriver(),
       codegen->GetInstructionSet(),
-      ArrayRef<const uint8_t>(code_allocator->GetMemory()),
+      code_allocator->GetMemory(),
       // Follow Quick's behavior and set the frame size to zero if it is
       // considered "empty" (see the definition of
       // art::CodeGenerator::HasEmptyFrame).
@@ -731,6 +725,16 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
       ArrayRef<const linker::LinkerPatch>(linker_patches));
 
+  CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage();
+  for (const linker::LinkerPatch& patch : linker_patches) {
+    if (codegen->NeedsThunkCode(patch) && storage->GetThunkCode(patch).empty()) {
+      ArenaVector<uint8_t> code(allocator->Adapter());
+      std::string debug_name;
+      codegen->EmitThunkCode(patch, &code, &debug_name);
+      storage->SetThunkCode(patch, ArrayRef<const uint8_t>(code), debug_name);
+    }
+  }
+
   return compiled_method;
 }
 
@@ -1339,7 +1343,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
       code_allocator.GetMemory().data(),
-      code_allocator.GetSize(),
+      code_allocator.GetMemory().size(),
       data_size,
       osr,
       roots,
@@ -1369,7 +1373,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
     info.is_optimized = true;
     info.is_code_address_text_relative = false;
     info.code_address = code_address;
-    info.code_size = code_allocator.GetSize();
+    info.code_size = code_allocator.GetMemory().size();
     info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
     info.code_info = stack_map_size == 0 ? nullptr : stack_map_data;
     info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
@@ -1378,7 +1382,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
 
   Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed());
   if (jit_logger != nullptr) {
-    jit_logger->WriteLog(code, code_allocator.GetSize(), method);
+    jit_logger->WriteLog(code, code_allocator.GetMemory().size(), method);
   }
 
   if (kArenaAllocatorCountAllocations) {
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 00194ff1fe0..9a26f2f6c40 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -99,6 +99,7 @@ enum class MethodCompilationStat {
   kConstructorFenceRemovedLSE,
   kConstructorFenceRemovedPFRA,
   kConstructorFenceRemovedCFRE,
+  kBitstringTypeCheck,
   kJitOutOfMemoryForCommit,
   kLastStat
 };
@@ -124,11 +125,6 @@ class OptimizingCompilerStats {
   }
 
   void Log() const {
-    if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) {
-      // Log only in debug builds or if the compiler is verbose.
-      return;
-    }
-
     uint32_t compiled_intrinsics = GetStat(MethodCompilationStat::kCompiledIntrinsic);
     uint32_t compiled_native_stubs = GetStat(MethodCompilationStat::kCompiledNativeStub);
     uint32_t bytecode_attempts =
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 6dcbadba6ed..a9bc5664c09 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -20,6 +20,7 @@
 #include <memory>
 #include <vector>
 
+#include "base/malloc_arena_pool.h"
 #include "base/scoped_arena_allocator.h"
 #include "builder.h"
 #include "common_compiler_test.h"
@@ -97,7 +98,7 @@ class ArenaPoolAndAllocator {
   ScopedArenaAllocator* GetScopedAllocator() { return &scoped_allocator_; }
 
  private:
-  ArenaPool pool_;
+  MallocArenaPool pool_;
   ArenaAllocator allocator_;
   ArenaStack arena_stack_;
   ScopedArenaAllocator scoped_allocator_;
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index cb87cabe1cd..be352011668 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "base/arena_allocator.h"
+#include "base/malloc_arena_pool.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
 
@@ -180,7 +181,7 @@ TYPED_TEST_CASE(ParallelMoveTest, ParallelMoveResolverTestTypes);
 
 
 TYPED_TEST(ParallelMoveTest, Dependency) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaAllocator allocator(&pool);
 
   {
@@ -207,7 +208,7 @@ TYPED_TEST(ParallelMoveTest, Dependency) {
 }
 
 TYPED_TEST(ParallelMoveTest, Cycle) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaAllocator allocator(&pool);
 
   {
@@ -257,7 +258,7 @@ TYPED_TEST(ParallelMoveTest, Cycle) {
 }
 
 TYPED_TEST(ParallelMoveTest, ConstantLast) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaAllocator allocator(&pool);
   TypeParam resolver(&allocator);
   HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
@@ -276,7 +277,7 @@ TYPED_TEST(ParallelMoveTest, ConstantLast) {
 }
 
 TYPED_TEST(ParallelMoveTest, Pairs) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaAllocator allocator(&pool);
 
   {
@@ -453,7 +454,7 @@ TYPED_TEST(ParallelMoveTest, Pairs) {
 }
 
 TYPED_TEST(ParallelMoveTest, MultiCycles) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaAllocator allocator(&pool);
 
   {
@@ -551,7 +552,7 @@ TYPED_TEST(ParallelMoveTest, MultiCycles) {
 
 // Test that we do 64bits moves before 32bits moves.
 TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaAllocator allocator(&pool);
 
   {
@@ -610,7 +611,7 @@ TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
 }
 
 TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaAllocator allocator(&pool);
 
   {
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index 9d5358514ee..01022542062 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -75,7 +75,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
     switch (load_kind) {
       case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
       case HLoadClass::LoadKind::kBootImageAddress:
-      case HLoadClass::LoadKind::kBootImageClassTable:
+      case HLoadClass::LoadKind::kBootImageRelRo:
       case HLoadClass::LoadKind::kBssEntry:
         // Add a base register for PC-relative literals on R2.
         InitializePCRelativeBasePointer();
@@ -91,7 +91,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
     switch (load_kind) {
       case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
       case HLoadString::LoadKind::kBootImageAddress:
-      case HLoadString::LoadKind::kBootImageInternTable:
+      case HLoadString::LoadKind::kBootImageRelRo:
       case HLoadString::LoadKind::kBssEntry:
         // Add a base register for PC-relative literals on R2.
         InitializePCRelativeBasePointer();
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index f92f4b274ae..647336b6b93 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -81,20 +81,14 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
   }
 
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
-    HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
-    if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
-        load_kind == HLoadClass::LoadKind::kBootImageClassTable ||
-        load_kind == HLoadClass::LoadKind::kBssEntry) {
+    if (load_class->HasPcRelativeLoadKind()) {
       HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_class);
       load_class->AddSpecialInput(method_address);
     }
   }
 
   void VisitLoadString(HLoadString* load_string) OVERRIDE {
-    HLoadString::LoadKind load_kind = load_string->GetLoadKind();
-    if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
-        load_kind == HLoadString::LoadKind::kBootImageInternTable ||
-        load_kind == HLoadString::LoadKind::kBssEntry) {
+    if (load_string->HasPcRelativeLoadKind()) {
       HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_string);
       load_string->AddSpecialInput(method_address);
     }
@@ -238,6 +232,9 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
       case Intrinsics::kMathMaxFloatFloat:
       case Intrinsics::kMathMinDoubleDouble:
       case Intrinsics::kMathMinFloatFloat:
+        LOG(FATAL) << "Unreachable min/max/abs: intrinsics should have been lowered "
+                      "to IR nodes by instruction simplifier";
+        UNREACHABLE();
       case Intrinsics::kMathRoundFloat:
         if (!base_added) {
           DCHECK(invoke_static_or_direct != nullptr);
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index f843c008d8b..59733397bfe 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -34,6 +34,20 @@ void PrepareForRegisterAllocation::Run() {
   }
 }
 
+void PrepareForRegisterAllocation::VisitCheckCast(HCheckCast* check_cast) {
+  // Record only those bitstring type checks that make it to the codegen stage.
+  if (check_cast->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+    MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck);
+  }
+}
+
+void PrepareForRegisterAllocation::VisitInstanceOf(HInstanceOf* instance_of) {
+  // Record only those bitstring type checks that make it to the codegen stage.
+  if (instance_of->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+    MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck);
+  }
+}
+
 void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) {
   check->ReplaceWith(check->InputAt(0));
 }
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 2c64f016c17..f6e4d3ef99b 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -40,6 +40,8 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
       "prepare_for_register_allocation";
 
  private:
+  void VisitCheckCast(HCheckCast* check_cast) OVERRIDE;
+  void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE;
   void VisitNullCheck(HNullCheck* check) OVERRIDE;
   void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE;
   void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 67a61fc01de..4030883a57e 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -87,6 +87,7 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
   void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE;
   void VisitNewInstance(HNewInstance* new_instance) OVERRIDE;
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE;
+  void VisitInstanceOf(HInstanceOf* load_class) OVERRIDE;
   void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE;
   void VisitLoadString(HLoadString* instr) OVERRIDE;
   void VisitLoadException(HLoadException* instr) OVERRIDE;
@@ -171,6 +172,12 @@ void ReferenceTypePropagation::ValidateTypes() {
                 << "NullCheck " << instr->GetReferenceTypeInfo()
                 << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo();
           }
+        } else if (instr->IsInstanceOf()) {
+          HInstanceOf* iof = instr->AsInstanceOf();
+          DCHECK(!iof->GetTargetClassRTI().IsValid() || iof->GetTargetClassRTI().IsExact());
+        } else if (instr->IsCheckCast()) {
+          HCheckCast* check = instr->AsCheckCast();
+          DCHECK(!check->GetTargetClassRTI().IsValid() || check->GetTargetClassRTI().IsExact());
         }
       }
     }
@@ -499,8 +506,7 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock*
     return;
   }
 
-  HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass();
-  ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+  ReferenceTypeInfo class_rti = instanceOf->GetTargetClassRTI();
   if (!class_rti.IsValid()) {
     // He have loaded an unresolved class. Don't bother bounding the type.
     return;
@@ -643,15 +649,20 @@ void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedStaticFieldGet(
 
 void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) {
   ScopedObjectAccess soa(Thread::Current());
-  Handle<mirror::Class> resolved_class = instr->GetClass();
-  if (IsAdmissible(resolved_class.Get())) {
-    instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(
-        resolved_class, /* is_exact */ true));
+  if (IsAdmissible(instr->GetClass().Get())) {
+    instr->SetValidLoadedClassRTI();
   }
   instr->SetReferenceTypeInfo(
       ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true));
 }
 
+void ReferenceTypePropagation::RTPVisitor::VisitInstanceOf(HInstanceOf* instr) {
+  ScopedObjectAccess soa(Thread::Current());
+  if (IsAdmissible(instr->GetClass().Get())) {
+    instr->SetValidTargetClassRTI();
+  }
+}
+
 void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) {
   instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo());
 }
@@ -719,8 +730,6 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) {
 }
 
 void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
-  HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
-  ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
   HBoundType* bound_type = check_cast->GetNext()->AsBoundType();
   if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) {
     // The next instruction is not an uninitialized BoundType. This must be
@@ -729,12 +738,14 @@ void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast
   }
   DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0));
 
-  if (class_rti.IsValid()) {
+  ScopedObjectAccess soa(Thread::Current());
+  Handle<mirror::Class> klass = check_cast->GetClass();
+  if (IsAdmissible(klass.Get())) {
     DCHECK(is_first_run_);
-    ScopedObjectAccess soa(Thread::Current());
+    check_cast->SetValidTargetClassRTI();
     // This is the first run of RTP and class is resolved.
-    bool is_exact = class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes();
-    bound_type->SetUpperBound(ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), is_exact),
+    bool is_exact = klass->CannotBeAssignedFromOtherTypes();
+    bound_type->SetUpperBound(ReferenceTypeInfo::Create(klass, is_exact),
                               /* CheckCast succeeds for nulls. */ true);
   } else {
     // This is the first run of RTP and class is unresolved. Remove the binding.
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index bb28d50b569..bca538fb170 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -667,7 +667,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
   // HUnaryOperation (or HBinaryOperation), check in debug mode that we have
   // the exhaustive lists here.
   if (instruction->IsUnaryOperation()) {
-    DCHECK(instruction->IsBooleanNot() ||
+    DCHECK(instruction->IsAbs() ||
+           instruction->IsBooleanNot() ||
            instruction->IsNot() ||
            instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName();
     return true;
@@ -678,6 +679,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
            instruction->IsCompare() ||
            instruction->IsCondition() ||
            instruction->IsDiv() ||
+           instruction->IsMin() ||
+           instruction->IsMax() ||
            instruction->IsMul() ||
            instruction->IsOr() ||
            instruction->IsRem() ||
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index 66e51421ca3..f9acf5aa9a9 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -16,6 +16,7 @@
 
 #include "select_generator.h"
 
+#include "base/scoped_arena_containers.h"
 #include "reference_type_propagation.h"
 
 namespace art {
@@ -43,12 +44,16 @@ static bool IsSimpleBlock(HBasicBlock* block) {
   for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
     HInstruction* instruction = it.Current();
     if (instruction->IsControlFlow()) {
-      if (num_instructions > kMaxInstructionsInBranch) {
-        return false;
-      }
       return instruction->IsGoto() || instruction->IsReturn();
     } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) {
-      num_instructions++;
+      if (instruction->IsSelect() &&
+          instruction->AsSelect()->GetCondition()->GetBlock() == block) {
+        // Count one HCondition and HSelect in the same block as a single instruction.
+        // This enables finding nested selects.
+        continue;
+      } else if (++num_instructions > kMaxInstructionsInBranch) {
+        return false;  // bail as soon as we exceed number of allowed instructions
+      }
     } else {
       return false;
     }
@@ -86,9 +91,13 @@ static HPhi* GetSingleChangedPhi(HBasicBlock* block, size_t index1, size_t index
 }
 
 void HSelectGenerator::Run() {
+  // Select cache with local allocator.
+  ScopedArenaAllocator allocator(graph_->GetArenaStack());
+  ScopedArenaSafeMap<HInstruction*, HSelect*> cache(
+      std::less<HInstruction*>(), allocator.Adapter(kArenaAllocSelectGenerator));
+
   // Iterate in post order in the unlikely case that removing one occurrence of
   // the selection pattern empties a branch block of another occurrence.
-  // Otherwise the order does not matter.
   for (HBasicBlock* block : graph_->GetPostOrder()) {
     if (!block->EndsWithIf()) continue;
 
@@ -97,6 +106,7 @@ void HSelectGenerator::Run() {
     HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
     HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
     DCHECK_NE(true_block, false_block);
+
     if (!IsSimpleBlock(true_block) ||
         !IsSimpleBlock(false_block) ||
         !BlocksMergeTogether(true_block, false_block)) {
@@ -107,10 +117,10 @@ void HSelectGenerator::Run() {
     // If the branches are not empty, move instructions in front of the If.
     // TODO(dbrazdil): This puts an instruction between If and its condition.
     //                 Implement moving of conditions to first users if possible.
-    if (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) {
+    while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) {
       true_block->GetFirstInstruction()->MoveBefore(if_instruction);
     }
-    if (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) {
+    while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) {
       false_block->GetFirstInstruction()->MoveBefore(if_instruction);
     }
     DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn());
@@ -138,7 +148,8 @@ void HSelectGenerator::Run() {
     DCHECK(both_successors_return || phi != nullptr);
 
     // Create the Select instruction and insert it in front of the If.
-    HSelect* select = new (graph_->GetAllocator()) HSelect(if_instruction->InputAt(0),
+    HInstruction* condition = if_instruction->InputAt(0);
+    HSelect* select = new (graph_->GetAllocator()) HSelect(condition,
                                                            true_value,
                                                            false_value,
                                                            if_instruction->GetDexPc());
@@ -175,6 +186,26 @@ void HSelectGenerator::Run() {
 
     MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated);
 
+    // Very simple way of finding common subexpressions in the generated HSelect statements
+    // (since this runs after GVN). Lookup by condition, and reuse latest one if possible
+    // (due to post order, latest select is most likely replacement). If needed, we could
+    // improve this by e.g. using the operands in the map as well.
+    auto it = cache.find(condition);
+    if (it == cache.end()) {
+      cache.Put(condition, select);
+    } else {
+      // Found cached value. See if latest can replace cached in the HIR.
+      HSelect* cached = it->second;
+      DCHECK_EQ(cached->GetCondition(), select->GetCondition());
+      if (cached->GetTrueValue() == select->GetTrueValue() &&
+          cached->GetFalseValue() == select->GetFalseValue() &&
+          select->StrictlyDominates(cached)) {
+       cached->ReplaceWith(select);
+       cached->GetBlock()->RemoveInstruction(cached);
+      }
+      it->second = select;  // always cache latest
+    }
+
     // No need to update dominance information, as we are simplifying
     // a simple diamond shape, where the join block is merged with the
     // entry block. Any following blocks would have had the join block
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 1e49411c72d..70b45763af7 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -125,8 +125,12 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
              BootImageAOTCanEmbedMethod(callee, compiler_driver)) {
     method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative;
     code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+  } else if (IsInBootImage(callee)) {
+    // Use PC-relative access to the .data.bimg.rel.ro methods array.
+    method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo;
+    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
   } else {
-    // Use PC-relative access to the .bss methods arrays.
+    // Use PC-relative access to the .bss methods array.
     method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry;
     code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
   }
@@ -207,7 +211,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(
       } else if (is_in_boot_image) {
         // AOT app compilation, boot image class.
         if (codegen->GetCompilerOptions().GetCompilePic()) {
-          desired_load_kind = HLoadClass::LoadKind::kBootImageClassTable;
+          desired_load_kind = HLoadClass::LoadKind::kBootImageRelRo;
         } else {
           desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
         }
@@ -236,6 +240,75 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(
   return load_kind;
 }
 
+static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass,
+                                            CodeGenerator* codegen,
+                                            CompilerDriver* compiler_driver)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(!klass->IsProxyClass());
+  DCHECK(!klass->IsArrayClass());
+
+  if (Runtime::Current()->UseJitCompilation()) {
+    // If we're JITting, try to assign a type check bitstring (fall through).
+  } else if (codegen->GetCompilerOptions().IsBootImage()) {
+    const char* descriptor = klass->GetDexFile().StringByTypeIdx(klass->GetDexTypeIndex());
+    if (!compiler_driver->IsImageClass(descriptor)) {
+      return false;
+    }
+    // If the target is a boot image class, try to assign a type check bitstring (fall through).
+    // (If --force-determinism, this was already done; repeating is OK and yields the same result.)
+  } else {
+    // TODO: Use the bitstring also for AOT app compilation if the target class has a bitstring
+    // already assigned in the boot image.
+    return false;
+  }
+
+  // Try to assign a type check bitstring.
+  MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+  if ((false) &&  // FIXME: Inliner does not respect compiler_driver->IsClassToCompile()
+                  // and we're hitting an unassigned bitstring in dex2oat_image_test. b/26687569
+      kIsDebugBuild &&
+      codegen->GetCompilerOptions().IsBootImage() &&
+      codegen->GetCompilerOptions().IsForceDeterminism()) {
+    SubtypeCheckInfo::State old_state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass);
+    CHECK(old_state == SubtypeCheckInfo::kAssigned || old_state == SubtypeCheckInfo::kOverflowed)
+        << klass->PrettyDescriptor() << "/" << old_state
+        << " in " << codegen->GetGraph()->PrettyMethod();
+  }
+  SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass);
+  return state == SubtypeCheckInfo::kAssigned;
+}
+
+TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass,
+                                                CodeGenerator* codegen,
+                                                CompilerDriver* compiler_driver,
+                                                bool needs_access_check) {
+  if (klass == nullptr) {
+    return TypeCheckKind::kUnresolvedCheck;
+  } else if (klass->IsInterface()) {
+    return TypeCheckKind::kInterfaceCheck;
+  } else if (klass->IsArrayClass()) {
+    if (klass->GetComponentType()->IsObjectClass()) {
+      return TypeCheckKind::kArrayObjectCheck;
+    } else if (klass->CannotBeAssignedFromOtherTypes()) {
+      return TypeCheckKind::kExactCheck;
+    } else {
+      return TypeCheckKind::kArrayCheck;
+    }
+  } else if (klass->IsFinal()) {  // TODO: Consider using bitstring for final classes.
+    return TypeCheckKind::kExactCheck;
+  } else if (kBitstringSubtypeCheckEnabled &&
+             !needs_access_check &&
+             CanUseTypeCheckBitstring(klass, codegen, compiler_driver)) {
+    // TODO: We should not need the `!needs_access_check` check but getting rid of that
+    // requires rewriting some optimizations in instruction simplifier.
+    return TypeCheckKind::kBitstringCheck;
+  } else if (klass->IsAbstract()) {
+    return TypeCheckKind::kAbstractClassCheck;
+  } else {
+    return TypeCheckKind::kClassHierarchyCheck;
+  }
+}
+
 void HSharpening::ProcessLoadString(
     HLoadString* load_string,
     CodeGenerator* codegen,
@@ -288,7 +361,7 @@ void HSharpening::ProcessLoadString(
       string = class_linker->LookupString(string_index, dex_cache.Get());
       if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
         if (codegen->GetCompilerOptions().GetCompilePic()) {
-          desired_load_kind = HLoadString::LoadKind::kBootImageInternTable;
+          desired_load_kind = HLoadString::LoadKind::kBootImageRelRo;
         } else {
           desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
         }
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 6df7d6d91ed..fa3e948eeb5 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -44,12 +44,10 @@ class HSharpening : public HOptimization {
 
   static constexpr const char* kSharpeningPassName = "sharpening";
 
-  // Used by the builder.
-  static void ProcessLoadString(HLoadString* load_string,
-                                CodeGenerator* codegen,
-                                CompilerDriver* compiler_driver,
-                                const DexCompilationUnit& dex_compilation_unit,
-                                VariableSizedHandleScope* handles);
+  // Used by Sharpening and InstructionSimplifier.
+  static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
+                                          CodeGenerator* codegen,
+                                          CompilerDriver* compiler_driver);
 
   // Used by the builder and the inliner.
   static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class,
@@ -58,10 +56,19 @@ class HSharpening : public HOptimization {
                                                    const DexCompilationUnit& dex_compilation_unit)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Used by Sharpening and InstructionSimplifier.
-  static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
-                                          CodeGenerator* codegen,
-                                          CompilerDriver* compiler_driver);
+  // Used by the builder.
+  static TypeCheckKind ComputeTypeCheckKind(ObjPtr<mirror::Class> klass,
+                                            CodeGenerator* codegen,
+                                            CompilerDriver* compiler_driver,
+                                            bool needs_access_check)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Used by the builder.
+  static void ProcessLoadString(HLoadString* load_string,
+                                CodeGenerator* codegen,
+                                CompilerDriver* compiler_driver,
+                                const DexCompilationUnit& dex_compilation_unit,
+                                VariableSizedHandleScope* handles);
 
  private:
   CodeGenerator* codegen_;
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 579aabdb5f5..268e9bd6e0c 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -19,9 +19,9 @@
 
 #include "base/bit_vector-inl.h"
 #include "base/hash_map.h"
+#include "base/memory_region.h"
 #include "base/scoped_arena_containers.h"
 #include "base/value_object.h"
-#include "memory_region.h"
 #include "method_info.h"
 #include "nodes.h"
 #include "stack_map.h"
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 7e517f34850..e36c5926622 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -18,6 +18,7 @@
 
 #include "art_method.h"
 #include "base/arena_bit_vector.h"
+#include "base/malloc_arena_pool.h"
 #include "stack_map_stream.h"
 
 #include "gtest/gtest.h"
@@ -46,7 +47,7 @@ static bool CheckStackMask(
 using Kind = DexRegisterLocation::Kind;
 
 TEST(StackMapTest, Test1) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaStack arena_stack(&pool);
   ScopedArenaAllocator allocator(&arena_stack);
   StackMapStream stream(&allocator, kRuntimeISA);
@@ -128,7 +129,7 @@ TEST(StackMapTest, Test1) {
 }
 
 TEST(StackMapTest, Test2) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaStack arena_stack(&pool);
   ScopedArenaAllocator allocator(&arena_stack);
   StackMapStream stream(&allocator, kRuntimeISA);
@@ -413,7 +414,7 @@ TEST(StackMapTest, Test2) {
 }
 
 TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaStack arena_stack(&pool);
   ScopedArenaAllocator allocator(&arena_stack);
   StackMapStream stream(&allocator, kRuntimeISA);
@@ -508,7 +509,7 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) {
 }
 
 TEST(StackMapTest, TestNonLiveDexRegisters) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaStack arena_stack(&pool);
   ScopedArenaAllocator allocator(&arena_stack);
   StackMapStream stream(&allocator, kRuntimeISA);
@@ -588,7 +589,7 @@ TEST(StackMapTest, TestNonLiveDexRegisters) {
 // StackMap::kNoDexRegisterMapSmallEncoding, and ensure we do
 // not treat it as kNoDexRegisterMap.
 TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaStack arena_stack(&pool);
   ScopedArenaAllocator allocator(&arena_stack);
   StackMapStream stream(&allocator, kRuntimeISA);
@@ -652,7 +653,7 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
 }
 
 TEST(StackMapTest, TestShareDexRegisterMap) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaStack arena_stack(&pool);
   ScopedArenaAllocator allocator(&arena_stack);
   StackMapStream stream(&allocator, kRuntimeISA);
@@ -711,7 +712,7 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
 }
 
 TEST(StackMapTest, TestNoDexRegisterMap) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaStack arena_stack(&pool);
   ScopedArenaAllocator allocator(&arena_stack);
   StackMapStream stream(&allocator, kRuntimeISA);
@@ -761,7 +762,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
 }
 
 TEST(StackMapTest, InlineTest) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaStack arena_stack(&pool);
   ScopedArenaAllocator allocator(&arena_stack);
   StackMapStream stream(&allocator, kRuntimeISA);
@@ -949,7 +950,7 @@ TEST(StackMapTest, CodeOffsetTest) {
 }
 
 TEST(StackMapTest, TestDeduplicateStackMask) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaStack arena_stack(&pool);
   ScopedArenaAllocator allocator(&arena_stack);
   StackMapStream stream(&allocator, kRuntimeISA);
@@ -978,7 +979,7 @@ TEST(StackMapTest, TestDeduplicateStackMask) {
 }
 
 TEST(StackMapTest, TestInvokeInfo) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaStack arena_stack(&pool);
   ScopedArenaAllocator allocator(&arena_stack);
   StackMapStream stream(&allocator, kRuntimeISA);
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index a7c23bef7e3..fad7729956b 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -70,20 +70,18 @@ static bool ArePhiInputsTheSame(const HPhi* phi) {
   return true;
 }
 
-// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
-// graph.
-static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
-  if (loop1 != nullptr || loop2 != nullptr) {
-    return nullptr;
+// Returns whether two Edge sets are equal (ArenaHashSet doesn't have "Equal" method).
+static bool EdgeHashSetsEqual(const HEdgeSet* set1, const HEdgeSet* set2) {
+  if (set1->Size() != set2->Size()) {
+    return false;
   }
 
-  if (loop1->IsIn(*loop2)) {
-    return loop2;
-  } else if (loop2->IsIn(*loop1)) {
-    return loop1;
+  for (auto e : *set1) {
+    if (set2->Find(e) == set2->end()) {
+      return false;
+    }
   }
-  HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader());
-  return block->GetLoopInformation();
+  return true;
 }
 
 // Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph.
@@ -95,6 +93,21 @@ static void OrderLoopsHeadersPredecessors(HGraph* graph) {
   }
 }
 
+// Performs DFS on the subgraph (specified by 'bb_set') starting from the specified block; while
+// traversing the function removes basic blocks from the bb_set (instead of traditional DFS
+// 'marking'). So what is left in the 'bb_set' after the traversal is not reachable from the start
+// block.
+static void TraverseSubgraphForConnectivity(HBasicBlock* block, HBasicBlockSet* bb_set) {
+  DCHECK(bb_set->IsBitSet(block->GetBlockId()));
+  bb_set->ClearBit(block->GetBlockId());
+
+  for (HBasicBlock* succ : block->GetSuccessors()) {
+    if (bb_set->IsBitSet(succ->GetBlockId())) {
+      TraverseSubgraphForConnectivity(succ, bb_set);
+    }
+  }
+}
+
 //
 // Helpers for CloneBasicBlock.
 //
@@ -268,7 +281,6 @@ void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVect
 }
 
 void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) {
-  // TODO: DCHECK that after the transformation the graph is connected.
   HBasicBlock* block_entry = nullptr;
 
   if (outer_loop_ == nullptr) {
@@ -424,6 +436,11 @@ void SuperblockCloner::FindAndSetLocalAreaForAdjustments() {
       outer_loop_ = nullptr;
       break;
     }
+    if (outer_loop_ == nullptr) {
+      // We should not use the initial outer_loop_ value 'nullptr' when finding the most outer
+      // common loop.
+      outer_loop_ = loop_exit_loop_info;
+    }
     outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info);
   }
 
@@ -507,6 +524,34 @@ void SuperblockCloner::ResolveDataFlow() {
 // Debug and logging methods.
 //
 
+// Debug function to dump graph' BasicBlocks info.
+void DumpBB(HGraph* graph) {
+  for (HBasicBlock* bb : graph->GetBlocks()) {
+    if (bb == nullptr) {
+      continue;
+    }
+    std::cout << bb->GetBlockId();
+    std::cout << " <- ";
+    for (HBasicBlock* pred : bb->GetPredecessors()) {
+      std::cout << pred->GetBlockId() << " ";
+    }
+    std::cout << " -> ";
+    for (HBasicBlock* succ : bb->GetSuccessors()) {
+      std::cout << succ->GetBlockId()  << " ";
+    }
+
+    if (bb->GetDominator()) {
+      std::cout << " dom " << bb->GetDominator()->GetBlockId();
+    }
+
+    if (bb->GetLoopInformation()) {
+      std::cout <<  "\tloop: " << bb->GetLoopInformation()->GetHeader()->GetBlockId();
+    }
+
+    std::cout << std::endl;
+  }
+}
+
 void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) {
   DCHECK(!orig_instr->IsPhi());
   HInstruction* copy_instr = GetInstrCopy(orig_instr);
@@ -542,6 +587,82 @@ void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr)
   }
 }
 
+bool SuperblockCloner::CheckRemappingInfoIsValid() {
+  for (HEdge edge : *remap_orig_internal_) {
+    if (!IsEdgeValid(edge, graph_) ||
+        !IsInOrigBBSet(edge.GetFrom()) ||
+        !IsInOrigBBSet(edge.GetTo())) {
+      return false;
+    }
+  }
+
+  for (auto edge : *remap_copy_internal_) {
+    if (!IsEdgeValid(edge, graph_) ||
+        !IsInOrigBBSet(edge.GetFrom()) ||
+        !IsInOrigBBSet(edge.GetTo())) {
+      return false;
+    }
+  }
+
+  for (auto edge : *remap_incoming_) {
+    if (!IsEdgeValid(edge, graph_) ||
+        IsInOrigBBSet(edge.GetFrom()) ||
+        !IsInOrigBBSet(edge.GetTo())) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void SuperblockCloner::VerifyGraph() {
+  for (auto it : *hir_map_) {
+    HInstruction* orig_instr = it.first;
+    HInstruction* copy_instr = it.second;
+    if (!orig_instr->IsPhi() && !orig_instr->IsSuspendCheck()) {
+      DCHECK(it.first->GetBlock() != nullptr);
+    }
+    if (!copy_instr->IsPhi() && !copy_instr->IsSuspendCheck()) {
+      DCHECK(it.second->GetBlock() != nullptr);
+    }
+  }
+
+  GraphChecker checker(graph_);
+  checker.Run();
+  if (!checker.IsValid()) {
+    for (const std::string& error : checker.GetErrors()) {
+      std::cout << error << std::endl;
+    }
+    LOG(FATAL) << "GraphChecker failed: superblock cloner\n";
+  }
+}
+
+void DumpBBSet(const ArenaBitVector* set) {
+  for (uint32_t idx : set->Indexes()) {
+    std::cout << idx << "\n";
+  }
+}
+
+void SuperblockCloner::DumpInputSets() {
+  std::cout << graph_->PrettyMethod() << "\n";
+  std::cout << "orig_bb_set:\n";
+  for (uint32_t idx : orig_bb_set_.Indexes()) {
+    std::cout << idx << "\n";
+  }
+  std::cout << "remap_orig_internal:\n";
+  for (HEdge e : *remap_orig_internal_) {
+    std::cout << e << "\n";
+  }
+  std::cout << "remap_copy_internal:\n";
+  for (auto e : *remap_copy_internal_) {
+    std::cout << e << "\n";
+  }
+  std::cout << "remap_incoming:\n";
+  for (auto e : *remap_incoming_) {
+    std::cout << e << "\n";
+  }
+}
+
 //
 // Public methods.
 //
@@ -569,6 +690,7 @@ void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_inte
   remap_orig_internal_ = remap_orig_internal;
   remap_copy_internal_ = remap_copy_internal;
   remap_incoming_ = remap_incoming;
+  DCHECK(CheckRemappingInfoIsValid());
 }
 
 bool SuperblockCloner::IsSubgraphClonable() const {
@@ -602,6 +724,63 @@ bool SuperblockCloner::IsSubgraphClonable() const {
   return true;
 }
 
+bool SuperblockCloner::IsFastCase() const {
+  // Check that loop unrolling/loop peeling is being conducted.
+  // Check that all the basic blocks belong to the same loop.
+  bool flag = false;
+  HLoopInformation* common_loop_info = nullptr;
+  for (uint32_t idx : orig_bb_set_.Indexes()) {
+    HBasicBlock* block = GetBlockById(idx);
+    HLoopInformation* block_loop_info = block->GetLoopInformation();
+    if (!flag) {
+      common_loop_info = block_loop_info;
+    } else {
+      if (block_loop_info != common_loop_info) {
+        return false;
+      }
+    }
+  }
+
+  // Check that orig_bb_set_ corresponds to loop peeling/unrolling.
+  if (common_loop_info == nullptr || !orig_bb_set_.SameBitsSet(&common_loop_info->GetBlocks())) {
+    return false;
+  }
+
+  bool peeling_or_unrolling = false;
+  HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+
+  // Check whether remapping info corresponds to loop unrolling.
+  CollectRemappingInfoForPeelUnroll(/* to_unroll*/ true,
+                                    common_loop_info,
+                                    &remap_orig_internal,
+                                    &remap_copy_internal,
+                                    &remap_incoming);
+
+  peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) &&
+                          EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) &&
+                          EdgeHashSetsEqual(&remap_incoming, remap_incoming_);
+
+  remap_orig_internal.Clear();
+  remap_copy_internal.Clear();
+  remap_incoming.Clear();
+
+  // Check whether remapping info corresponds to loop peeling.
+  CollectRemappingInfoForPeelUnroll(/* to_unroll*/ false,
+                                    common_loop_info,
+                                    &remap_orig_internal,
+                                    &remap_copy_internal,
+                                    &remap_incoming);
+
+  peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) &&
+                          EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) &&
+                          EdgeHashSetsEqual(&remap_incoming, remap_incoming_);
+
+  return peeling_or_unrolling;
+}
+
 void SuperblockCloner::Run() {
   DCHECK(bb_map_ != nullptr);
   DCHECK(hir_map_ != nullptr);
@@ -609,6 +788,11 @@ void SuperblockCloner::Run() {
          remap_copy_internal_ != nullptr &&
          remap_incoming_ != nullptr);
   DCHECK(IsSubgraphClonable());
+  DCHECK(IsFastCase());
+
+  if (kSuperblockClonerLogging) {
+    DumpInputSets();
+  }
 
   // Find an area in the graph for which control flow information should be adjusted.
   FindAndSetLocalAreaForAdjustments();
@@ -618,6 +802,19 @@ void SuperblockCloner::Run() {
   // Connect the blocks together/remap successors and fix phis which are directly affected my the
   // remapping.
   RemapEdgesSuccessors();
+
+  // Check that the subgraph is connected.
+  if (kIsDebugBuild) {
+    HBasicBlockSet work_set(arena_, orig_bb_set_.GetSizeOf(), true, kArenaAllocSuperblockCloner);
+
+    // Add original and copy blocks of the subgraph to the work set.
+    for (auto iter : *bb_map_) {
+      work_set.SetBit(iter.first->GetBlockId());   // Original block.
+      work_set.SetBit(iter.second->GetBlockId());  // Copy block.
+    }
+    CHECK(IsSubgraphConnected(&work_set, graph_));
+  }
+
   // Recalculate dominance and backedge information which is required by the next stage.
   AdjustControlFlowInfo();
   // Fix data flow of the graph.
@@ -650,6 +847,10 @@ void SuperblockCloner::CleanUp() {
       }
     }
   }
+
+  if (kIsDebugBuild) {
+    VerifyGraph();
+  }
 }
 
 HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) {
@@ -701,4 +902,127 @@ void SuperblockCloner::CloneBasicBlocks() {
   }
 }
 
+//
+// Stand-alone methods.
+//
+
+void CollectRemappingInfoForPeelUnroll(bool to_unroll,
+                                       HLoopInformation* loop_info,
+                                       HEdgeSet* remap_orig_internal,
+                                       HEdgeSet* remap_copy_internal,
+                                       HEdgeSet* remap_incoming) {
+  DCHECK(loop_info != nullptr);
+  HBasicBlock* loop_header = loop_info->GetHeader();
+  // Set up remap_orig_internal edges set - set is empty.
+  // Set up remap_copy_internal edges set.
+  for (HBasicBlock* back_edge_block : loop_info->GetBackEdges()) {
+    HEdge e = HEdge(back_edge_block, loop_header);
+    if (to_unroll) {
+      remap_orig_internal->Insert(e);
+      remap_copy_internal->Insert(e);
+    } else {
+      remap_copy_internal->Insert(e);
+    }
+  }
+
+  // Set up remap_incoming edges set.
+  if (!to_unroll) {
+    remap_incoming->Insert(HEdge(loop_info->GetPreHeader(), loop_header));
+  }
+}
+
+bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph) {
+  ArenaVector<HBasicBlock*> entry_blocks(
+      graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+  // Find subgraph entry blocks.
+  for (uint32_t orig_block_id : work_set->Indexes()) {
+    HBasicBlock* block = graph->GetBlocks()[orig_block_id];
+    for (HBasicBlock* pred : block->GetPredecessors()) {
+      if (!work_set->IsBitSet(pred->GetBlockId())) {
+        entry_blocks.push_back(block);
+        break;
+      }
+    }
+  }
+
+  for (HBasicBlock* entry_block : entry_blocks) {
+    if (work_set->IsBitSet(entry_block->GetBlockId())) {
+      TraverseSubgraphForConnectivity(entry_block, work_set);
+    }
+  }
+
+  // Return whether there are unvisited - unreachable - blocks.
+  return work_set->NumSetBits() == 0;
+}
+
+HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
+  if (loop1 == nullptr || loop2 == nullptr) {
+    return nullptr;
+  }
+
+  if (loop1->IsIn(*loop2)) {
+    return loop2;
+  }
+
+  HLoopInformation* current = loop1;
+  while (current != nullptr && !loop2->IsIn(*current)) {
+    current = current->GetPreHeader()->GetLoopInformation();
+  }
+
+  return current;
+}
+
+bool PeelUnrollHelper::IsLoopClonable(HLoopInformation* loop_info) {
+  PeelUnrollHelper helper(loop_info, nullptr, nullptr);
+  return helper.IsLoopClonable();
+}
+
+HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) {
+  // For now do peeling only for natural loops.
+  DCHECK(!loop_info_->IsIrreducible());
+
+  HBasicBlock* loop_header = loop_info_->GetHeader();
+  // Check that loop info is up-to-date.
+  DCHECK(loop_info_ == loop_header->GetLoopInformation());
+
+  HGraph* graph = loop_header->GetGraph();
+  ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool());
+
+  HEdgeSet remap_orig_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HEdgeSet remap_copy_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HEdgeSet remap_incoming(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+  CollectRemappingInfoForPeelUnroll(to_unroll,
+                                    loop_info_,
+                                    &remap_orig_internal,
+                                    &remap_copy_internal,
+                                    &remap_incoming);
+
+  cloner_.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming);
+  cloner_.Run();
+  cloner_.CleanUp();
+
+  // Check that loop info is preserved.
+  DCHECK(loop_info_ == loop_header->GetLoopInformation());
+
+  return loop_header;
+}
+
+PeelUnrollSimpleHelper::PeelUnrollSimpleHelper(HLoopInformation* info)
+  : bb_map_(std::less<HBasicBlock*>(),
+            info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)),
+    hir_map_(std::less<HInstruction*>(),
+             info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)),
+    helper_(info, &bb_map_, &hir_map_) {}
+
 }  // namespace art
+
+namespace std {
+
+ostream& operator<<(ostream& os, const art::HEdge& e) {
+  e.Dump(os);
+  return os;
+}
+
+}  // namespace std
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
index 23de6926735..e0931674cb3 100644
--- a/compiler/optimizing/superblock_cloner.h
+++ b/compiler/optimizing/superblock_cloner.h
@@ -25,7 +25,6 @@
 namespace art {
 
 static const bool kSuperblockClonerLogging = false;
-static const bool kSuperblockClonerVerify = false;
 
 // Represents an edge between two HBasicBlocks.
 //
@@ -152,6 +151,15 @@ class SuperblockCloner : public ValueObject {
   // TODO: Start from small range of graph patterns then extend it.
   bool IsSubgraphClonable() const;
 
+  // Returns whether selected subgraph satisfies the criteria for fast data flow resolution
+  // when iterative DF algorithm is not required and dominators/instructions inputs can be
+  // trivially adjusted.
+  //
+  // TODO: formally describe the criteria.
+  //
+  // Loop peeling and unrolling satisfy the criteria.
+  bool IsFastCase() const;
+
   // Runs the copy algorithm according to the description.
   void Run();
 
@@ -202,11 +210,17 @@ class SuperblockCloner : public ValueObject {
     return IsInOrigBBSet(block->GetBlockId());
   }
 
+  // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops,
+  // dominators) needs to be adjusted.
+  HLoopInformation* GetRegionToBeAdjusted() const {
+    return outer_loop_;
+  }
+
  private:
   // Fills the 'exits' vector with the subgraph exits.
   void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
 
-  // Finds and records information about the area in the graph for which control-flow (back edges,
+  // Finds and records information about the area in the graph for which control flow (back edges,
   // loops, dominators) needs to be adjusted.
   void FindAndSetLocalAreaForAdjustments();
 
@@ -217,7 +231,7 @@ class SuperblockCloner : public ValueObject {
   // phis' nor instructions' inputs values are resolved.
   void RemapEdgesSuccessors();
 
-  // Adjusts control-flow (back edges, loops, dominators) for the local area defined by
+  // Adjusts control flow (back edges, loops, dominators) for the local area defined by
   // FindAndSetLocalAreaForAdjustments.
   void AdjustControlFlowInfo();
 
@@ -272,6 +286,9 @@ class SuperblockCloner : public ValueObject {
   // Debug and logging methods.
   //
   void CheckInstructionInputsRemapping(HInstruction* orig_instr);
+  bool CheckRemappingInfoIsValid();
+  void VerifyGraph();
+  void DumpInputSets();
 
   HBasicBlock* GetBlockById(uint32_t block_id) const {
     DCHECK(block_id < graph_->GetBlocks().size());
@@ -295,15 +312,97 @@ class SuperblockCloner : public ValueObject {
   HBasicBlockMap* bb_map_;
   // Correspondence map for instructions: (original HInstruction, copy HInstruction).
   HInstructionMap* hir_map_;
-  // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted.
+  // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted.
   HLoopInformation* outer_loop_;
   HBasicBlockSet outer_loop_bb_set_;
 
   ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
+  ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected);
 
   DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
 };
 
+// Helper class to perform loop peeling/unrolling.
+//
+// This helper should be used when correspondence map between original and copied
+// basic blocks/instructions are demanded.
+class PeelUnrollHelper : public ValueObject {
+ public:
+  explicit PeelUnrollHelper(HLoopInformation* info,
+                            SuperblockCloner::HBasicBlockMap* bb_map,
+                            SuperblockCloner::HInstructionMap* hir_map) :
+      loop_info_(info),
+      cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map) {
+    // For now do peeling/unrolling only for natural loops.
+    DCHECK(!info->IsIrreducible());
+  }
+
+  // Returns whether the loop can be peeled/unrolled (static function).
+  static bool IsLoopClonable(HLoopInformation* loop_info);
+
+  // Returns whether the loop can be peeled/unrolled.
+  bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); }
+
+  HBasicBlock* DoPeeling() { return DoPeelUnrollImpl(/* to_unroll */ false); }
+  HBasicBlock* DoUnrolling() { return DoPeelUnrollImpl(/* to_unroll */ true); }
+  HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); }
+
+ protected:
+  // Applies loop peeling/unrolling for the loop specified by 'loop_info'.
+  //
+  // Depending on 'do_unroll' either unrolls loop by 2 or peels one iteration from it.
+  HBasicBlock* DoPeelUnrollImpl(bool to_unroll);
+
+ private:
+  HLoopInformation* loop_info_;
+  SuperblockCloner cloner_;
+
+  DISALLOW_COPY_AND_ASSIGN(PeelUnrollHelper);
+};
+
+// Helper class to perform loop peeling/unrolling.
+//
+// This helper should be used when there is no need to get correspondence information between
+// original and copied basic blocks/instructions.
+class PeelUnrollSimpleHelper : public ValueObject {
+ public:
+  explicit PeelUnrollSimpleHelper(HLoopInformation* info);
+  bool IsLoopClonable() const { return helper_.IsLoopClonable(); }
+  HBasicBlock* DoPeeling() { return helper_.DoPeeling(); }
+  HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); }
+  HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); }
+
+  const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; }
+  const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; }
+
+ private:
+  SuperblockCloner::HBasicBlockMap bb_map_;
+  SuperblockCloner::HInstructionMap hir_map_;
+  PeelUnrollHelper helper_;
+
+  DISALLOW_COPY_AND_ASSIGN(PeelUnrollSimpleHelper);
+};
+
+// Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info.
+void CollectRemappingInfoForPeelUnroll(bool to_unroll,
+                                       HLoopInformation* loop_info,
+                                       SuperblockCloner::HEdgeSet* remap_orig_internal,
+                                       SuperblockCloner::HEdgeSet* remap_copy_internal,
+                                       SuperblockCloner::HEdgeSet* remap_incoming);
+
+// Returns whether blocks from 'work_set' are reachable from the rest of the graph.
+//
+// Returns whether such a set 'outer_entries' of basic blocks exists that:
+// - each block from 'outer_entries' is not from 'work_set'.
+// - each block from 'work_set' is reachable from at least one block from 'outer_entries'.
+//
+// After the function returns work_set contains only blocks from the original 'work_set'
+// which are unreachable from the rest of the graph.
+bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph);
+
+// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
+// graph.
+HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2);
 }  // namespace art
 
 namespace std {
@@ -312,11 +411,12 @@ template <>
 struct hash<art::HEdge> {
   size_t operator()(art::HEdge const& x) const noexcept  {
     // Use Cantor pairing function as the hash function.
-    uint32_t a = x.GetFrom();
-    uint32_t b = x.GetTo();
+    size_t a = x.GetFrom();
+    size_t b = x.GetTo();
     return (a + b) * (a + b + 1) / 2 + b;
   }
 };
+ostream& operator<<(ostream& os, const art::HEdge& e);
 
 }  // namespace std
 
diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc
index f1b7bffdf5f..df2e517afff 100644
--- a/compiler/optimizing/superblock_cloner_test.cc
+++ b/compiler/optimizing/superblock_cloner_test.cc
@@ -25,52 +25,65 @@ namespace art {
 
 using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
 using HInstructionMap = SuperblockCloner::HInstructionMap;
+using HBasicBlockSet = SuperblockCloner::HBasicBlockSet;
+using HEdgeSet = SuperblockCloner::HEdgeSet;
 
 // This class provides methods and helpers for testing various cloning and copying routines:
 // individual instruction cloning and cloning of the more coarse-grain structures.
 class SuperblockClonerTest : public OptimizingUnitTest {
  public:
-  SuperblockClonerTest()
-      : graph_(CreateGraph()), entry_block_(nullptr), exit_block_(nullptr), parameter_(nullptr) {}
+  SuperblockClonerTest() : graph_(CreateGraph()),
+                           entry_block_(nullptr),
+                           return_block_(nullptr),
+                           exit_block_(nullptr),
+                           parameter_(nullptr) {}
 
-  void CreateBasicLoopControlFlow(/* out */ HBasicBlock** header_p,
-                                  /* out */ HBasicBlock** body_p) {
+  void InitGraph() {
     entry_block_ = new (GetAllocator()) HBasicBlock(graph_);
     graph_->AddBlock(entry_block_);
     graph_->SetEntryBlock(entry_block_);
 
+    return_block_ = new (GetAllocator()) HBasicBlock(graph_);
+    graph_->AddBlock(return_block_);
+
+    exit_block_ = new (GetAllocator()) HBasicBlock(graph_);
+    graph_->AddBlock(exit_block_);
+    graph_->SetExitBlock(exit_block_);
+
+    entry_block_->AddSuccessor(return_block_);
+    return_block_->AddSuccessor(exit_block_);
+
+    parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
+                                                      dex::TypeIndex(0),
+                                                      0,
+                                                      DataType::Type::kInt32);
+    entry_block_->AddInstruction(parameter_);
+    return_block_->AddInstruction(new (GetAllocator()) HReturnVoid());
+    exit_block_->AddInstruction(new (GetAllocator()) HExit());
+  }
+
+  void CreateBasicLoopControlFlow(HBasicBlock* position,
+                                  HBasicBlock* successor,
+                                  /* out */ HBasicBlock** header_p,
+                                  /* out */ HBasicBlock** body_p) {
     HBasicBlock* loop_preheader = new (GetAllocator()) HBasicBlock(graph_);
     HBasicBlock* loop_header = new (GetAllocator()) HBasicBlock(graph_);
     HBasicBlock* loop_body = new (GetAllocator()) HBasicBlock(graph_);
-    HBasicBlock* loop_exit = new (GetAllocator()) HBasicBlock(graph_);
 
     graph_->AddBlock(loop_preheader);
     graph_->AddBlock(loop_header);
     graph_->AddBlock(loop_body);
-    graph_->AddBlock(loop_exit);
 
-    exit_block_ = new (GetAllocator()) HBasicBlock(graph_);
-    graph_->AddBlock(exit_block_);
-    graph_->SetExitBlock(exit_block_);
+    position->ReplaceSuccessor(successor, loop_preheader);
 
-    entry_block_->AddSuccessor(loop_preheader);
     loop_preheader->AddSuccessor(loop_header);
     // Loop exit first to have a proper exit condition/target for HIf.
-    loop_header->AddSuccessor(loop_exit);
+    loop_header->AddSuccessor(successor);
     loop_header->AddSuccessor(loop_body);
     loop_body->AddSuccessor(loop_header);
-    loop_exit->AddSuccessor(exit_block_);
 
     *header_p = loop_header;
     *body_p = loop_body;
-
-    parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
-                                                      dex::TypeIndex(0),
-                                                      0,
-                                                      DataType::Type::kInt32);
-    entry_block_->AddInstruction(parameter_);
-    loop_exit->AddInstruction(new (GetAllocator()) HReturnVoid());
-    exit_block_->AddInstruction(new (GetAllocator()) HExit());
   }
 
   void CreateBasicLoopDataFlow(HBasicBlock* loop_header, HBasicBlock* loop_body) {
@@ -84,11 +97,12 @@ class SuperblockClonerTest : public OptimizingUnitTest {
     // Header block.
     HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32);
     HInstruction* suspend_check = new (GetAllocator()) HSuspendCheck();
+    HInstruction* loop_check = new (GetAllocator()) HGreaterThanOrEqual(phi, const_128);
 
     loop_header->AddPhi(phi);
     loop_header->AddInstruction(suspend_check);
-    loop_header->AddInstruction(new (GetAllocator()) HGreaterThanOrEqual(phi, const_128));
-    loop_header->AddInstruction(new (GetAllocator()) HIf(parameter_));
+    loop_header->AddInstruction(loop_check);
+    loop_header->AddInstruction(new (GetAllocator()) HIf(loop_check));
 
     // Loop body block.
     HInstruction* null_check = new (GetAllocator()) HNullCheck(parameter_, dex_pc);
@@ -97,8 +111,8 @@ class SuperblockClonerTest : public OptimizingUnitTest {
     HInstruction* array_get =
         new (GetAllocator()) HArrayGet(null_check, bounds_check, DataType::Type::kInt32, dex_pc);
     HInstruction* add =  new (GetAllocator()) HAdd(DataType::Type::kInt32, array_get, const_1);
-    HInstruction* array_set =
-        new (GetAllocator()) HArraySet(null_check, bounds_check, add, DataType::Type::kInt32, dex_pc);
+    HInstruction* array_set = new (GetAllocator()) HArraySet(
+        null_check, bounds_check, add, DataType::Type::kInt32, dex_pc);
     HInstruction* induction_inc = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi, const_1);
 
     loop_body->AddInstruction(null_check);
@@ -153,6 +167,7 @@ class SuperblockClonerTest : public OptimizingUnitTest {
   HGraph* graph_;
 
   HBasicBlock* entry_block_;
+  HBasicBlock* return_block_;
   HBasicBlock* exit_block_;
 
   HInstruction* parameter_;
@@ -162,10 +177,11 @@ TEST_F(SuperblockClonerTest, IndividualInstrCloner) {
   HBasicBlock* header = nullptr;
   HBasicBlock* loop_body = nullptr;
 
-  CreateBasicLoopControlFlow(&header, &loop_body);
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
   CreateBasicLoopDataFlow(header, loop_body);
   graph_->BuildDominatorTree();
-  ASSERT_TRUE(CheckGraph());
+  EXPECT_TRUE(CheckGraph());
 
   HSuspendCheck* old_suspend_check = header->GetLoopInformation()->GetSuspendCheck();
   CloneAndReplaceInstructionVisitor visitor(graph_);
@@ -193,7 +209,8 @@ TEST_F(SuperblockClonerTest, CloneBasicBlocks) {
   HBasicBlock* loop_body = nullptr;
   ArenaAllocator* arena = graph_->GetAllocator();
 
-  CreateBasicLoopControlFlow(&header, &loop_body);
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
   CreateBasicLoopDataFlow(header, loop_body);
   graph_->BuildDominatorTree();
   ASSERT_TRUE(CheckGraph());
@@ -272,7 +289,8 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) {
   HBasicBlock* loop_body = nullptr;
   ArenaAllocator* arena = graph_->GetAllocator();
 
-  CreateBasicLoopControlFlow(&header, &loop_body);
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
   CreateBasicLoopDataFlow(header, loop_body);
   graph_->BuildDominatorTree();
   ASSERT_TRUE(CheckGraph());
@@ -303,4 +321,487 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) {
   EXPECT_TRUE(loop_info->IsBackEdge(*loop_body));
 }
 
+// Tests IsSubgraphConnected function for negative case.
+TEST_F(SuperblockClonerTest, IsGraphConnected) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+  ArenaAllocator* arena = graph_->GetAllocator();
+
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* unreachable_block = new (GetAllocator()) HBasicBlock(graph_);
+  graph_->AddBlock(unreachable_block);
+
+  HBasicBlockSet bb_set(
+      arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+  bb_set.SetBit(header->GetBlockId());
+  bb_set.SetBit(loop_body->GetBlockId());
+  bb_set.SetBit(unreachable_block->GetBlockId());
+
+  EXPECT_FALSE(IsSubgraphConnected(&bb_set, graph_));
+  EXPECT_EQ(bb_set.NumSetBits(), 1u);
+  EXPECT_TRUE(bb_set.IsBitSet(unreachable_block->GetBlockId()));
+}
+
+// Tests SuperblockCloner for loop peeling case.
+//
+// Control Flow of the example (ignoring critical edges splitting).
+//
+//       Before                    After
+//
+//         |B|                      |B|
+//          |                        |
+//          v                        v
+//         |1|                      |1|
+//          |                        |
+//          v                        v
+//         |2|<-\              (6) |2A|
+//         / \  /                   / \
+//        v   v/                   /   v
+//       |4|  |3|                 /   |3A| (7)
+//        |                      /     /
+//        v                     |     v
+//       |E|                     \   |2|<-\
+//                                \ / \   /
+//                                 v   v /
+//                                |4|  |3|
+//                                 |
+//                                 v
+//                                |E|
+TEST_F(SuperblockClonerTest, LoopPeeling) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  graph_->BuildDominatorTree();
+  EXPECT_TRUE(CheckGraph());
+
+  HBasicBlockMap bb_map(
+      std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HInstructionMap hir_map(
+      std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+  HLoopInformation* loop_info = header->GetLoopInformation();
+  PeelUnrollHelper helper(loop_info, &bb_map, &hir_map);
+  EXPECT_TRUE(helper.IsLoopClonable());
+  HBasicBlock* new_header = helper.DoPeeling();
+  HLoopInformation* new_loop_info = new_header->GetLoopInformation();
+
+  EXPECT_TRUE(CheckGraph());
+
+  // Check loop body successors.
+  EXPECT_EQ(loop_body->GetSingleSuccessor(), header);
+  EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header);
+
+  // Check loop structure.
+  EXPECT_EQ(header, new_header);
+  EXPECT_EQ(new_loop_info->GetHeader(), header);
+  EXPECT_EQ(new_loop_info->GetBackEdges().size(), 1u);
+  EXPECT_EQ(new_loop_info->GetBackEdges()[0], loop_body);
+}
+
+// Tests SuperblockCloner for loop unrolling case.
+//
+// Control Flow of the example (ignoring critical edges splitting).
+//
+//       Before                    After
+//
+//         |B|                      |B|
+//          |                        |
+//          v                        v
+//         |1|                      |1|
+//          |                        |
+//          v                        v
+//         |2|<-\               (6) |2A|<-\
+//         / \  /                   / \    \
+//        v   v/                   /   v    \
+//       |4|  |3|                 /(7)|3A|   |
+//        |                      /     /    /
+//        v                     |     v    /
+//       |E|                     \   |2|  /
+//                                \ / \  /
+//                                 v   v/
+//                                |4| |3|
+//                                 |
+//                                 v
+//                                |E|
+TEST_F(SuperblockClonerTest, LoopUnrolling) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  graph_->BuildDominatorTree();
+  EXPECT_TRUE(CheckGraph());
+
+  HBasicBlockMap bb_map(
+      std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HInstructionMap hir_map(
+      std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+  HLoopInformation* loop_info = header->GetLoopInformation();
+  PeelUnrollHelper helper(loop_info, &bb_map, &hir_map);
+  EXPECT_TRUE(helper.IsLoopClonable());
+  HBasicBlock* new_header = helper.DoUnrolling();
+
+  EXPECT_TRUE(CheckGraph());
+
+  // Check loop body successors.
+  EXPECT_EQ(loop_body->GetSingleSuccessor(), bb_map.Get(header));
+  EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header);
+
+  // Check loop structure.
+  EXPECT_EQ(header, new_header);
+  EXPECT_EQ(loop_info, new_header->GetLoopInformation());
+  EXPECT_EQ(loop_info->GetHeader(), new_header);
+  EXPECT_EQ(loop_info->GetBackEdges().size(), 1u);
+  EXPECT_EQ(loop_info->GetBackEdges()[0], bb_map.Get(loop_body));
+}
+
+// Checks that loop unrolling works fine for a loop with multiple back edges. Tests that after
+// the transformation the loop has a single preheader.
+TEST_F(SuperblockClonerTest, LoopPeelingMultipleBackEdges) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+
+  // Transform a basic loop to have multiple back edges.
+  HBasicBlock* latch = header->GetSuccessors()[1];
+  HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_);
+  HBasicBlock* temp1 = new (GetAllocator()) HBasicBlock(graph_);
+  graph_->AddBlock(if_block);
+  graph_->AddBlock(temp1);
+  header->ReplaceSuccessor(latch, if_block);
+  if_block->AddSuccessor(latch);
+  if_block->AddSuccessor(temp1);
+  temp1->AddSuccessor(header);
+
+  if_block->AddInstruction(new (GetAllocator()) HIf(parameter_));
+
+  HInstructionIterator it(header->GetPhis());
+  DCHECK(!it.Done());
+  HPhi* loop_phi = it.Current()->AsPhi();
+  HInstruction* temp_add = new (GetAllocator()) HAdd(DataType::Type::kInt32,
+                                                     loop_phi,
+                                                     graph_->GetIntConstant(2));
+  temp1->AddInstruction(temp_add);
+  temp1->AddInstruction(new (GetAllocator()) HGoto());
+  loop_phi->AddInput(temp_add);
+
+  graph_->BuildDominatorTree();
+  EXPECT_TRUE(CheckGraph());
+
+  HLoopInformation* loop_info = header->GetLoopInformation();
+  PeelUnrollSimpleHelper helper(loop_info);
+  HBasicBlock* new_header = helper.DoPeeling();
+  EXPECT_EQ(header, new_header);
+
+  EXPECT_TRUE(CheckGraph());
+  EXPECT_EQ(header->GetPredecessors().size(), 3u);
+}
+
+static void CheckLoopStructureForLoopPeelingNested(HBasicBlock* loop1_header,
+                                                   HBasicBlock* loop2_header,
+                                                   HBasicBlock* loop3_header) {
+  EXPECT_EQ(loop1_header->GetLoopInformation()->GetHeader(), loop1_header);
+  EXPECT_EQ(loop2_header->GetLoopInformation()->GetHeader(), loop2_header);
+  EXPECT_EQ(loop3_header->GetLoopInformation()->GetHeader(), loop3_header);
+  EXPECT_EQ(loop1_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr);
+  EXPECT_EQ(loop2_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr);
+  EXPECT_EQ(loop3_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation()->GetHeader(),
+            loop2_header);
+}
+
+TEST_F(SuperblockClonerTest, LoopPeelingNested) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+
+  InitGraph();
+
+  // Create the following nested structure of loops
+  //   Headers:  1    2 3
+  //             [ ], [ [ ] ]
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop1_header = header;
+
+  CreateBasicLoopControlFlow(header, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop2_header = header;
+
+  CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop3_header = header;
+
+  graph_->BuildDominatorTree();
+  EXPECT_TRUE(CheckGraph());
+
+  HLoopInformation* loop2_info_before = loop2_header->GetLoopInformation();
+  HLoopInformation* loop3_info_before = loop3_header->GetLoopInformation();
+
+  // Check nested loops structure.
+  CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header);
+  PeelUnrollSimpleHelper helper(loop1_header->GetLoopInformation());
+  helper.DoPeeling();
+  // Check that nested loops structure has not changed after the transformation.
+  CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header);
+
+  // Test that the loop info is preserved.
+  EXPECT_EQ(loop2_info_before, loop2_header->GetLoopInformation());
+  EXPECT_EQ(loop3_info_before, loop3_header->GetLoopInformation());
+
+  EXPECT_EQ(loop3_info_before->GetPreHeader()->GetLoopInformation(), loop2_info_before);
+  EXPECT_EQ(loop2_info_before->GetPreHeader()->GetLoopInformation(), nullptr);
+
+  EXPECT_EQ(helper.GetRegionToBeAdjusted(), nullptr);
+
+  EXPECT_TRUE(CheckGraph());
+}
+
+// Checks that the loop population is correctly propagated after an inner loop is peeled.
+TEST_F(SuperblockClonerTest, OuterLoopPopulationAfterInnerPeeled) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+
+  InitGraph();
+
+  // Create the following nested structure of loops
+  //   Headers:  1 2 3        4
+  //             [ [ [ ] ] ], [ ]
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop1_header = header;
+
+  CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop2_header = header;
+
+  CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop3_header = header;
+
+  CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop4_header = header;
+
+  graph_->BuildDominatorTree();
+  EXPECT_TRUE(CheckGraph());
+
+  PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation());
+  helper.DoPeeling();
+  HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+  HLoopInformation* loop2 = loop2_header->GetLoopInformation();
+  HLoopInformation* loop3 = loop3_header->GetLoopInformation();
+  HLoopInformation* loop4 = loop4_header->GetLoopInformation();
+
+  EXPECT_TRUE(loop1->Contains(*loop2_header));
+  EXPECT_TRUE(loop1->Contains(*loop3_header));
+  EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader()));
+
+  // Check that loop4 info has not been touched after local run of AnalyzeLoops.
+  EXPECT_EQ(loop4, loop4_header->GetLoopInformation());
+
+  EXPECT_TRUE(loop1->IsIn(*loop1));
+  EXPECT_TRUE(loop2->IsIn(*loop1));
+  EXPECT_TRUE(loop3->IsIn(*loop1));
+  EXPECT_TRUE(loop3->IsIn(*loop2));
+  EXPECT_TRUE(!loop4->IsIn(*loop1));
+
+  EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), nullptr);
+
+  EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop2);
+
+  EXPECT_TRUE(CheckGraph());
+}
+
+// Checks the case when inner loop have an exit not to its immediate outer_loop but some other loop
+// in the hierarchy. Loop population information must be valid after loop peeling.
+TEST_F(SuperblockClonerTest, NestedCaseExitToOutermost) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+
+  InitGraph();
+
+  // Create the following nested structure of loops then peel loop3.
+  //   Headers:  1 2 3
+  //             [ [ [ ] ] ]
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop1_header = header;
+  HBasicBlock* loop_body1 = loop_body;
+
+  CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+
+  CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop3_header = header;
+  HBasicBlock* loop_body3 = loop_body;
+
+  // Change the loop3 - insert an exit which leads to loop1.
+  HBasicBlock* loop3_extra_if_block = new (GetAllocator()) HBasicBlock(graph_);
+  graph_->AddBlock(loop3_extra_if_block);
+  loop3_extra_if_block->AddInstruction(new (GetAllocator()) HIf(parameter_));
+
+  loop3_header->ReplaceSuccessor(loop_body3, loop3_extra_if_block);
+  loop3_extra_if_block->AddSuccessor(loop_body1);  // Long exit.
+  loop3_extra_if_block->AddSuccessor(loop_body3);
+
+  graph_->BuildDominatorTree();
+  EXPECT_TRUE(CheckGraph());
+
+  HBasicBlock* loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0];
+  EXPECT_TRUE(loop1_header->GetLoopInformation()->Contains(*loop3_long_exit));
+
+  PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation());
+  helper.DoPeeling();
+
+  HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+  // Check that after the transformation the local area for CF adjustments has been chosen
+  // correctly and loop population has been updated.
+  loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0];
+  EXPECT_TRUE(loop1->Contains(*loop3_long_exit));
+
+  EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop1);
+
+  EXPECT_TRUE(loop1->Contains(*loop3_header));
+  EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader()));
+
+  EXPECT_TRUE(CheckGraph());
+}
+
+TEST_F(SuperblockClonerTest, FastCaseCheck) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+  ArenaAllocator* arena = graph_->GetAllocator();
+
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  graph_->BuildDominatorTree();
+
+  HLoopInformation* loop_info = header->GetLoopInformation();
+
+  ArenaBitVector orig_bb_set(
+      arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+  orig_bb_set.Union(&loop_info->GetBlocks());
+
+  HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+  CollectRemappingInfoForPeelUnroll(true,
+                                    loop_info,
+                                    &remap_orig_internal,
+                                    &remap_copy_internal,
+                                    &remap_incoming);
+
+  // Insert some extra nodes and edges.
+  HBasicBlock* preheader = loop_info->GetPreHeader();
+  orig_bb_set.SetBit(preheader->GetBlockId());
+
+  // Adjust incoming edges.
+  remap_incoming.Clear();
+  remap_incoming.Insert(HEdge(preheader->GetSinglePredecessor(), preheader));
+
+  HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+  HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+
+  SuperblockCloner cloner(graph_,
+                          &orig_bb_set,
+                          &bb_map,
+                          &hir_map);
+  cloner.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming);
+
+  EXPECT_FALSE(cloner.IsFastCase());
+}
+
+// Helper for FindCommonLoop which also check that FindCommonLoop is symmetric.
+static HLoopInformation* FindCommonLoopCheck(HLoopInformation* loop1, HLoopInformation* loop2) {
+  HLoopInformation* common_loop12 = FindCommonLoop(loop1, loop2);
+  HLoopInformation* common_loop21 = FindCommonLoop(loop2, loop1);
+  EXPECT_EQ(common_loop21, common_loop12);
+  return common_loop12;
+}
+
+// Tests FindCommonLoop function on a loop nest.
+TEST_F(SuperblockClonerTest, FindCommonLoop) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+
+  InitGraph();
+
+  // Create the following nested structure of loops
+  //   Headers:  1 2 3      4      5
+  //             [ [ [ ] ], [ ] ], [ ]
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop1_header = header;
+
+  CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop2_header = header;
+
+  CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop3_header = header;
+
+  CreateBasicLoopControlFlow(loop2_header, loop2_header->GetSuccessors()[0], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop4_header = header;
+
+  CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop5_header = header;
+
+  graph_->BuildDominatorTree();
+  EXPECT_TRUE(CheckGraph());
+
+  HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+  HLoopInformation* loop2 = loop2_header->GetLoopInformation();
+  HLoopInformation* loop3 = loop3_header->GetLoopInformation();
+  HLoopInformation* loop4 = loop4_header->GetLoopInformation();
+  HLoopInformation* loop5 = loop5_header->GetLoopInformation();
+
+  EXPECT_TRUE(loop1->IsIn(*loop1));
+  EXPECT_TRUE(loop2->IsIn(*loop1));
+  EXPECT_TRUE(loop3->IsIn(*loop1));
+  EXPECT_TRUE(loop3->IsIn(*loop2));
+  EXPECT_TRUE(loop4->IsIn(*loop1));
+
+  EXPECT_FALSE(loop5->IsIn(*loop1));
+  EXPECT_FALSE(loop4->IsIn(*loop2));
+  EXPECT_FALSE(loop4->IsIn(*loop3));
+
+  EXPECT_EQ(loop1->GetPreHeader()->GetLoopInformation(), nullptr);
+  EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), loop1);
+
+  EXPECT_EQ(FindCommonLoopCheck(nullptr, nullptr), nullptr);
+  EXPECT_EQ(FindCommonLoopCheck(loop2, nullptr), nullptr);
+
+  EXPECT_EQ(FindCommonLoopCheck(loop1, loop1), loop1);
+  EXPECT_EQ(FindCommonLoopCheck(loop1, loop2), loop1);
+  EXPECT_EQ(FindCommonLoopCheck(loop1, loop3), loop1);
+  EXPECT_EQ(FindCommonLoopCheck(loop1, loop4), loop1);
+  EXPECT_EQ(FindCommonLoopCheck(loop1, loop5), nullptr);
+
+  EXPECT_EQ(FindCommonLoopCheck(loop2, loop3), loop2);
+  EXPECT_EQ(FindCommonLoopCheck(loop2, loop4), loop1);
+  EXPECT_EQ(FindCommonLoopCheck(loop2, loop5), nullptr);
+
+  EXPECT_EQ(FindCommonLoopCheck(loop3, loop4), loop1);
+  EXPECT_EQ(FindCommonLoopCheck(loop3, loop5), nullptr);
+
+  EXPECT_EQ(FindCommonLoopCheck(loop4, loop5), nullptr);
+
+  EXPECT_EQ(FindCommonLoopCheck(loop5, loop5), loop5);
+}
+
 }  // namespace art
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index 921d4018492..57360e74a33 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -17,6 +17,7 @@
 #include "trampoline_compiler.h"
 
 #include "base/arena_allocator.h"
+#include "base/malloc_arena_pool.h"
 #include "jni_env_ext.h"
 
 #ifdef ART_ENABLE_CODEGEN_arm
@@ -243,7 +244,7 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocat
 std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet isa,
                                                                EntryPointCallingConvention abi,
                                                                ThreadOffset64 offset) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaAllocator allocator(&pool);
   switch (isa) {
 #ifdef ART_ENABLE_CODEGEN_arm64
@@ -269,7 +270,7 @@ std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet is
 std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet isa,
                                                                EntryPointCallingConvention abi,
                                                                ThreadOffset32 offset) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaAllocator allocator(&pool);
   switch (isa) {
 #ifdef ART_ENABLE_CODEGEN_arm
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index 944c64b5918..421c1b60895 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -20,8 +20,8 @@
 #include <vector>
 
 #include "base/casts.h"
+#include "base/memory_region.h"
 #include "globals.h"
-#include "memory_region.h"
 
 namespace art {
 
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 5b0cd6baa8d..379a6396eb6 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -29,10 +29,10 @@
 #include "base/array_ref.h"
 #include "base/enums.h"
 #include "base/macros.h"
+#include "base/memory_region.h"
 #include "debug/dwarf/debug_frame_opcode_writer.h"
 #include "label.h"
 #include "managed_register.h"
-#include "memory_region.h"
 #include "mips/constants_mips.h"
 #include "offsets.h"
 #include "x86/constants_x86.h"
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 0cb8bbb2d54..7c800b355fe 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -26,6 +26,7 @@
 #include <fstream>
 #include <iterator>
 
+#include "base/malloc_arena_pool.h"
 #include "assembler_test_base.h"
 #include "common_runtime_test.h"  // For ScratchFile
 
@@ -1606,7 +1607,7 @@ class AssemblerTest : public testing::Test {
 
   static constexpr size_t kWarnManyCombinationsThreshold = 500;
 
-  ArenaPool pool_;
+  MallocArenaPool pool_;
   std::unique_ptr<ArenaAllocator> allocator_;
   std::unique_ptr<Ass> assembler_;
   std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 655d17d4fbf..053e202523a 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -27,6 +27,7 @@
 #include "utils/arm/jni_macro_assembler_arm_vixl.h"
 
 #include "base/hex_dump.h"
+#include "base/malloc_arena_pool.h"
 #include "common_runtime_test.h"
 
 namespace art {
@@ -169,7 +170,7 @@ class ArmVIXLAssemblerTest : public ::testing::Test {
  public:
   ArmVIXLAssemblerTest() : pool(), allocator(&pool), assembler(&allocator) { }
 
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaAllocator allocator;
   ArmVIXLJNIMacroAssembler assembler;
 };
diff --git a/compiler/utils/atomic_dex_ref_map-inl.h b/compiler/utils/atomic_dex_ref_map-inl.h
index ce3302bb628..9915498acca 100644
--- a/compiler/utils/atomic_dex_ref_map-inl.h
+++ b/compiler/utils/atomic_dex_ref_map-inl.h
@@ -70,7 +70,7 @@ inline bool AtomicDexRefMap<DexFileReferenceType, Value>::Get(const DexFileRefer
   if (array == nullptr) {
     return false;
   }
-  *out = (*array)[ref.index].LoadRelaxed();
+  *out = (*array)[ref.index].load(std::memory_order_relaxed);
   return true;
 }
 
@@ -81,7 +81,7 @@ inline bool AtomicDexRefMap<DexFileReferenceType, Value>::Remove(const DexFileRe
   if (array == nullptr) {
     return false;
   }
-  *out = (*array)[ref.index].ExchangeSequentiallyConsistent(nullptr);
+  *out = (*array)[ref.index].exchange(nullptr, std::memory_order_seq_cst);
   return true;
 }
 
@@ -120,7 +120,7 @@ inline void AtomicDexRefMap<DexFileReferenceType, Value>::Visit(const Visitor& v
     const DexFile* dex_file = pair.first;
     const ElementArray& elements = pair.second;
     for (size_t i = 0; i < elements.size(); ++i) {
-      visitor(DexFileReference(dex_file, i), elements[i].LoadRelaxed());
+      visitor(DexFileReference(dex_file, i), elements[i].load(std::memory_order_relaxed));
     }
   }
 }
@@ -129,7 +129,7 @@ template <typename DexFileReferenceType, typename Value>
 inline void AtomicDexRefMap<DexFileReferenceType, Value>::ClearEntries() {
   for (auto& it : arrays_) {
     for (auto& element : it.second) {
-      element.StoreRelaxed(nullptr);
+      element.store(nullptr, std::memory_order_relaxed);
     }
   }
 }
diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc
index 3f7691b6a86..0c34aa4f1dc 100644
--- a/compiler/utils/jni_macro_assembler.cc
+++ b/compiler/utils/jni_macro_assembler.cc
@@ -38,8 +38,8 @@
 #include "x86_64/jni_macro_assembler_x86_64.h"
 #endif
 #include "base/casts.h"
+#include "base/memory_region.h"
 #include "globals.h"
-#include "memory_region.h"
 
 namespace art {
 
diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h
index 1aefc84c78c..b70c18b3e2f 100644
--- a/compiler/utils/jni_macro_assembler_test.h
+++ b/compiler/utils/jni_macro_assembler_test.h
@@ -20,6 +20,7 @@
 #include "jni_macro_assembler.h"
 
 #include "assembler_test_base.h"
+#include "base/malloc_arena_pool.h"
 #include "common_runtime_test.h"  // For ScratchFile
 
 #include <sys/stat.h>
@@ -139,7 +140,7 @@ class JNIMacroAssemblerTest : public testing::Test {
     test_helper_->Driver(*data, assembly_text, test_name);
   }
 
-  ArenaPool pool_;
+  MallocArenaPool pool_;
   std::unique_ptr<ArenaAllocator> allocator_;
   std::unique_ptr<Ass> assembler_;
   std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 2218ef9af29..dce5b95fec3 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -18,9 +18,9 @@
 
 #include "base/bit_utils.h"
 #include "base/casts.h"
+#include "base/memory_region.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
-#include "memory_region.h"
 #include "thread.h"
 
 namespace art {
@@ -2793,6 +2793,26 @@ void MipsAssembler::Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister
   DsFsmInstr(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x15)).FprOuts(wd).FprIns(ws, wt);
 }
 
+void MipsAssembler::PcntB(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  DsFsmInstr(EmitMsa2R(0xc1, 0x0, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws);
+}
+
+void MipsAssembler::PcntH(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  DsFsmInstr(EmitMsa2R(0xc1, 0x1, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws);
+}
+
+void MipsAssembler::PcntW(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  DsFsmInstr(EmitMsa2R(0xc1, 0x2, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws);
+}
+
+void MipsAssembler::PcntD(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  DsFsmInstr(EmitMsa2R(0xc1, 0x3, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws);
+}
+
 void MipsAssembler::ReplicateFPToVectorRegister(VectorRegister dst,
                                                 FRegister src,
                                                 bool is_double) {
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 7de8e2e3665..c6ce62b4f4a 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -756,6 +756,11 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
   void Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
+  void PcntB(VectorRegister wd, VectorRegister ws);
+  void PcntH(VectorRegister wd, VectorRegister ws);
+  void PcntW(VectorRegister wd, VectorRegister ws);
+  void PcntD(VectorRegister wd, VectorRegister ws);
+
   // Helper for replicating floating point value in all destination elements.
   void ReplicateFPToVectorRegister(VectorRegister dst, FRegister src, bool is_double);
 
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index 937ee25bcb1..691c33f3e7a 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -2277,6 +2277,22 @@ TEST_F(AssemblerMIPS32r6Test, FillW) {
   DriverStr(RepeatVR(&mips::MipsAssembler::FillW, "fill.w ${reg1}, ${reg2}"), "fill.w");
 }
 
+TEST_F(AssemblerMIPS32r6Test, PcntB) {
+  DriverStr(RepeatVV(&mips::MipsAssembler::PcntB, "pcnt.b ${reg1}, ${reg2}"), "pcnt.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, PcntH) {
+  DriverStr(RepeatVV(&mips::MipsAssembler::PcntH, "pcnt.h ${reg1}, ${reg2}"), "pcnt.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, PcntW) {
+  DriverStr(RepeatVV(&mips::MipsAssembler::PcntW, "pcnt.w ${reg1}, ${reg2}"), "pcnt.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, PcntD) {
+  DriverStr(RepeatVV(&mips::MipsAssembler::PcntD, "pcnt.d ${reg1}, ${reg2}"), "pcnt.d");
+}
+
 TEST_F(AssemblerMIPS32r6Test, LdiB) {
   DriverStr(RepeatVIb(&mips::MipsAssembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b");
 }
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index e1b0e75108b..bb1bb82fa5d 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -18,9 +18,9 @@
 
 #include "base/bit_utils.h"
 #include "base/casts.h"
+#include "base/memory_region.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
-#include "memory_region.h"
 #include "thread.h"
 
 namespace art {
@@ -2279,6 +2279,26 @@ void Mips64Assembler::Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegist
   EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x15);
 }
 
+void Mips64Assembler::PcntB(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  EmitMsa2R(0xc1, 0x0, ws, wd, 0x1e);
+}
+
+void Mips64Assembler::PcntH(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  EmitMsa2R(0xc1, 0x1, ws, wd, 0x1e);
+}
+
+void Mips64Assembler::PcntW(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  EmitMsa2R(0xc1, 0x2, ws, wd, 0x1e);
+}
+
+void Mips64Assembler::PcntD(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  EmitMsa2R(0xc1, 0x3, ws, wd, 0x1e);
+}
+
 void Mips64Assembler::ReplicateFPToVectorRegister(VectorRegister dst,
                                                   FpuRegister src,
                                                   bool is_double) {
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 7a61f39e64a..542dbafc87d 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -863,6 +863,11 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
   void Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
+  void PcntB(VectorRegister wd, VectorRegister ws);
+  void PcntH(VectorRegister wd, VectorRegister ws);
+  void PcntW(VectorRegister wd, VectorRegister ws);
+  void PcntD(VectorRegister wd, VectorRegister ws);
+
   // Helper for replicating floating point value in all destination elements.
   void ReplicateFPToVectorRegister(VectorRegister dst, FpuRegister src, bool is_double);
 
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index b0e1d91c3f8..fb5f12be936 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -3529,6 +3529,22 @@ TEST_F(AssemblerMIPS64Test, FillD) {
   DriverStr(RepeatVR(&mips64::Mips64Assembler::FillD, "fill.d ${reg1}, ${reg2}"), "fill.d");
 }
 
+TEST_F(AssemblerMIPS64Test, PcntB) {
+  DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntB, "pcnt.b ${reg1}, ${reg2}"), "pcnt.b");
+}
+
+TEST_F(AssemblerMIPS64Test, PcntH) {
+  DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntH, "pcnt.h ${reg1}, ${reg2}"), "pcnt.h");
+}
+
+TEST_F(AssemblerMIPS64Test, PcntW) {
+  DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntW, "pcnt.w ${reg1}, ${reg2}"), "pcnt.w");
+}
+
+TEST_F(AssemblerMIPS64Test, PcntD) {
+  DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntD, "pcnt.d ${reg1}, ${reg2}"), "pcnt.d");
+}
+
 TEST_F(AssemblerMIPS64Test, LdiB) {
   DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b");
 }
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index ea160c8993c..86f9010ea32 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -17,8 +17,8 @@
 #include "assembler_x86.h"
 
 #include "base/casts.h"
+#include "base/memory_region.h"
 #include "entrypoints/quick/quick_entrypoints.h"
-#include "memory_region.h"
 #include "thread.h"
 
 namespace art {
@@ -913,6 +913,78 @@ void X86Assembler::psubq(XmmRegister dst, XmmRegister src) {
 }
 
 
+void X86Assembler::paddusb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xDC);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::paddsb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xEC);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::paddusw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xDD);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::paddsw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xED);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubusb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xD8);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubsb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xE8);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubusw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xD9);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubsw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xE9);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
 void X86Assembler::cvtsi2ss(XmmRegister dst, Register src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index a0856770836..22eaedce612 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -449,6 +449,15 @@ class X86Assembler FINAL : public Assembler {
   void paddq(XmmRegister dst, XmmRegister src);
   void psubq(XmmRegister dst, XmmRegister src);
 
+  void paddusb(XmmRegister dst, XmmRegister src);
+  void paddsb(XmmRegister dst, XmmRegister src);
+  void paddusw(XmmRegister dst, XmmRegister src);
+  void paddsw(XmmRegister dst, XmmRegister src);
+  void psubusb(XmmRegister dst, XmmRegister src);
+  void psubsb(XmmRegister dst, XmmRegister src);
+  void psubusw(XmmRegister dst, XmmRegister src);
+  void psubsw(XmmRegister dst, XmmRegister src);
+
   void cvtsi2ss(XmmRegister dst, Register src);
   void cvtsi2sd(XmmRegister dst, Register src);
 
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 2fd1b271828..cd007b32d41 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -17,13 +17,14 @@
 #include "assembler_x86.h"
 
 #include "base/arena_allocator.h"
+#include "base/malloc_arena_pool.h"
 #include "base/stl_util.h"
 #include "utils/assembler_test.h"
 
 namespace art {
 
 TEST(AssemblerX86, CreateBuffer) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaAllocator allocator(&pool);
   AssemblerBuffer buffer(&allocator);
   AssemblerBuffer::EnsureCapacity ensured(&buffer);
@@ -600,6 +601,38 @@ TEST_F(AssemblerX86Test, PSubQ) {
   DriverStr(RepeatFF(&x86::X86Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq");
 }
 
+TEST_F(AssemblerX86Test, PAddUSB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::paddusb, "paddusb %{reg2}, %{reg1}"), "paddusb");
+}
+
+TEST_F(AssemblerX86Test, PAddSB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::paddsb, "paddsb %{reg2}, %{reg1}"), "paddsb");
+}
+
+TEST_F(AssemblerX86Test, PAddUSW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::paddusw, "paddusw %{reg2}, %{reg1}"), "paddusw");
+}
+
+TEST_F(AssemblerX86Test, PAddSW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::psubsw, "psubsw %{reg2}, %{reg1}"), "psubsw");
+}
+
+TEST_F(AssemblerX86Test, PSubUSB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::psubusb, "psubusb %{reg2}, %{reg1}"), "psubusb");
+}
+
+TEST_F(AssemblerX86Test, PSubSB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::psubsb, "psubsb %{reg2}, %{reg1}"), "psubsb");
+}
+
+TEST_F(AssemblerX86Test, PSubUSW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::psubusw, "psubusw %{reg2}, %{reg1}"), "psubusw");
+}
+
+TEST_F(AssemblerX86Test, PSubSW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::psubsw, "psubsw %{reg2}, %{reg1}"), "psubsw");
+}
+
 TEST_F(AssemblerX86Test, XorPD) {
   DriverStr(RepeatFF(&x86::X86Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd");
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index ff5a357c5e3..bd31561937d 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -17,8 +17,8 @@
 #include "assembler_x86_64.h"
 
 #include "base/casts.h"
+#include "base/memory_region.h"
 #include "entrypoints/quick/quick_entrypoints.h"
-#include "memory_region.h"
 #include "thread.h"
 
 namespace art {
@@ -1011,6 +1011,86 @@ void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
 }
 
 
+void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xDC);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xEC);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xDD);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xED);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xD8);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xE8);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xD9);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xE9);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
   cvtsi2ss(dst, src, false);
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 7a5fdb502f6..ab761fb1fc1 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -485,6 +485,15 @@ class X86_64Assembler FINAL : public Assembler {
   void paddq(XmmRegister dst, XmmRegister src);
   void psubq(XmmRegister dst, XmmRegister src);
 
+  void paddusb(XmmRegister dst, XmmRegister src);
+  void paddsb(XmmRegister dst, XmmRegister src);
+  void paddusw(XmmRegister dst, XmmRegister src);
+  void paddsw(XmmRegister dst, XmmRegister src);
+  void psubusb(XmmRegister dst, XmmRegister src);
+  void psubsb(XmmRegister dst, XmmRegister src);
+  void psubusw(XmmRegister dst, XmmRegister src);
+  void psubsw(XmmRegister dst, XmmRegister src);
+
   void cvtsi2ss(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
   void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
   void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 6b1e53c35ab..0589df55d23 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -21,6 +21,7 @@
 #include <random>
 
 #include "base/bit_utils.h"
+#include "base/malloc_arena_pool.h"
 #include "base/stl_util.h"
 #include "jni_macro_assembler_x86_64.h"
 #include "utils/assembler_test.h"
@@ -29,7 +30,7 @@
 namespace art {
 
 TEST(AssemblerX86_64, CreateBuffer) {
-  ArenaPool pool;
+  MallocArenaPool pool;
   ArenaAllocator allocator(&pool);
   AssemblerBuffer buffer(&allocator);
   AssemblerBuffer::EnsureCapacity ensured(&buffer);
@@ -1282,6 +1283,38 @@ TEST_F(AssemblerX86_64Test, Psubq) {
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq");
 }
 
+TEST_F(AssemblerX86_64Test, Paddusb) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddusb, "paddusb %{reg2}, %{reg1}"), "paddusb");
+}
+
+TEST_F(AssemblerX86_64Test, Paddsb) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddsb, "paddsb %{reg2}, %{reg1}"), "paddsb");
+}
+
+TEST_F(AssemblerX86_64Test, Paddusw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddusw, "paddusw %{reg2}, %{reg1}"), "paddusw");
+}
+
+TEST_F(AssemblerX86_64Test, Paddsw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddsw, "paddsw %{reg2}, %{reg1}"), "paddsw");
+}
+
+TEST_F(AssemblerX86_64Test, Psubusb) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubusb, "psubusb %{reg2}, %{reg1}"), "psubusb");
+}
+
+TEST_F(AssemblerX86_64Test, Psubsb) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubsb, "psubsb %{reg2}, %{reg1}"), "psubsb");
+}
+
+TEST_F(AssemblerX86_64Test, Psubusw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubusw, "psubusw %{reg2}, %{reg1}"), "psubusw");
+}
+
+TEST_F(AssemblerX86_64Test, Psubsw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubsw, "psubsw %{reg2}, %{reg1}"), "psubsw");
+}
+
 TEST_F(AssemblerX86_64Test, Cvtsi2ss) {
   DriverStr(RepeatFr(&x86_64::X86_64Assembler::cvtsi2ss, "cvtsi2ss %{reg2}, %{reg1}"), "cvtsi2ss");
 }
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index 5766f9d44b9..9486cb44c5b 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -17,8 +17,8 @@
 #include "jni_macro_assembler_x86_64.h"
 
 #include "base/casts.h"
+#include "base/memory_region.h"
 #include "entrypoints/quick/quick_entrypoints.h"
-#include "memory_region.h"
 #include "thread.h"
 
 namespace art {
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index 76448d819c2..553d131e2f5 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -18,6 +18,7 @@
 #include "verifier/verifier_deps.h"
 
 #include "art_method-inl.h"
+#include "base/indenter.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "compiler_callbacks.h"
@@ -28,7 +29,6 @@
 #include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "handle_scope-inl.h"
-#include "indenter.h"
 #include "mirror/class_loader.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"