summaryrefslogtreecommitdiff
path: root/compiler/optimizing/code_generator_arm64.cc
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing/code_generator_arm64.cc')
-rw-r--r--compiler/optimizing/code_generator_arm64.cc1088
1 files changed, 819 insertions, 269 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7d1b0ea6dd..e1a4718140 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -16,11 +16,15 @@
#include "code_generator_arm64.h"
+#include "aarch64/assembler-aarch64.h"
+#include "aarch64/registers-aarch64.h"
#include "arch/arm64/asm_support_arm64.h"
#include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/arm64/jni_frame_arm64.h"
#include "art_method-inl.h"
#include "base/bit_utils.h"
#include "base/bit_utils_iterator.h"
+#include "class_root-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
#include "compiled_method.h"
@@ -29,13 +33,16 @@
#include "gc/accounting/card_table.h"
#include "gc/space/image_space.h"
#include "heap_poisoning.h"
+#include "interpreter/mterp/nterp.h"
#include "intrinsics.h"
#include "intrinsics_arm64.h"
#include "linker/linker_patch.h"
#include "lock_word.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
+#include "mirror/var_handle.h"
#include "offsets.h"
+#include "optimizing/common_arm64.h"
#include "thread.h"
#include "utils/arm64/assembler_arm64.h"
#include "utils/assembler.h"
@@ -75,7 +82,6 @@ using helpers::OperandFromMemOperand;
using helpers::OutputCPURegister;
using helpers::OutputFPRegister;
using helpers::OutputRegister;
-using helpers::QRegisterFrom;
using helpers::RegisterFrom;
using helpers::StackOperandFrom;
using helpers::VIXLRegCodeFromART;
@@ -163,53 +169,6 @@ static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
-// Calculate memory accessing operand for save/restore live registers.
-static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
- LocationSummary* locations,
- int64_t spill_offset,
- bool is_save) {
- const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
- const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
- DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills,
- codegen->GetNumberOfCoreRegisters(),
- fp_spills,
- codegen->GetNumberOfFloatingPointRegisters()));
-
- CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
- unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize;
- CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills);
-
- MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
- UseScratchRegisterScope temps(masm);
-
- Register base = masm->StackPointer();
- int64_t core_spill_size = core_list.GetTotalSizeInBytes();
- int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
- int64_t reg_size = kXRegSizeInBytes;
- int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
- uint32_t ls_access_size = WhichPowerOf2(reg_size);
- if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
- !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
- // If the offset does not fit in the instruction's immediate field, use an alternate register
- // to compute the base address(float point registers spill base address).
- Register new_base = temps.AcquireSameSizeAs(base);
- __ Add(new_base, base, Operand(spill_offset + core_spill_size));
- base = new_base;
- spill_offset = -core_spill_size;
- int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
- DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
- DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
- }
-
- if (is_save) {
- __ StoreCPURegList(core_list, MemOperand(base, spill_offset));
- __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
- } else {
- __ LoadCPURegList(core_list, MemOperand(base, spill_offset));
- __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
- }
-}
-
void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
@@ -224,7 +183,7 @@ void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummar
stack_offset += kXRegSizeInBytes;
}
- const size_t fp_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSizeInBytes : kDRegSizeInBytes;
+ const size_t fp_reg_size = codegen->GetSlowPathFPWidth();
const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
for (uint32_t i : LowToHighBits(fp_spills)) {
DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
@@ -233,15 +192,15 @@ void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummar
stack_offset += fp_reg_size;
}
- SaveRestoreLiveRegistersHelper(codegen,
- locations,
- codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ true);
+ InstructionCodeGeneratorARM64* visitor =
+ down_cast<CodeGeneratorARM64*>(codegen)->GetInstructionCodeGeneratorArm64();
+ visitor->SaveLiveRegistersHelper(locations, codegen->GetFirstRegisterSlotInSlowPath());
}
void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
- SaveRestoreLiveRegistersHelper(codegen,
- locations,
- codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ false);
+ InstructionCodeGeneratorARM64* visitor =
+ down_cast<CodeGeneratorARM64*>(codegen)->GetInstructionCodeGeneratorArm64();
+ visitor->RestoreLiveRegistersHelper(locations, codegen->GetFirstRegisterSlotInSlowPath());
}
class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
@@ -325,8 +284,13 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile()));
dex::TypeIndex type_index = cls_->GetTypeIndex();
__ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
- arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
- CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
+ if (cls_->NeedsAccessCheck()) {
+ CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
+ arm64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
+ } else {
+ CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
+ arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
+ }
// If we also must_do_clinit, the resolved type is now in the correct register.
} else {
DCHECK(must_do_clinit);
@@ -426,10 +390,10 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, locations); // Only saves live 128-bit regs for SIMD.
+ SaveLiveRegisters(codegen, locations); // Only saves live vector regs for SIMD.
arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, locations); // Only restores live 128-bit regs for SIMD.
+ RestoreLiveRegisters(codegen, locations); // Only restores live vector regs for SIMD.
if (successor_ == nullptr) {
__ B(GetReturnLabel());
} else {
@@ -636,11 +600,12 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsPredicatedInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
// The read barrier instrumentation of object ArrayGet
@@ -706,14 +671,24 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
__ Add(index_reg, index_reg, Operand(offset_));
} else {
- // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+ // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile/VarHandleGet
// intrinsics, `index_` is not shifted by a scale factor of 2
// (as in the case of ArrayGet), as it is actually an offset
// to an object field within an object.
DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
DCHECK(instruction_->GetLocations()->Intrinsified());
- DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
- (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+ Intrinsics intrinsic = instruction_->AsInvoke()->GetIntrinsic();
+ DCHECK(intrinsic == Intrinsics::kUnsafeGetObject ||
+ intrinsic == Intrinsics::kUnsafeGetObjectVolatile ||
+ intrinsic == Intrinsics::kUnsafeCASObject ||
+ mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
+ mirror::VarHandle::AccessModeTemplate::kGet ||
+ mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
+ mirror::VarHandle::AccessModeTemplate::kCompareAndSet ||
+ mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
+ mirror::VarHandle::AccessModeTemplate::kCompareAndExchange ||
+ mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
+ mirror::VarHandle::AccessModeTemplate::kGetAndUpdate)
<< instruction_->AsInvoke()->GetIntrinsic();
DCHECK_EQ(offset_, 0u);
DCHECK(index_.IsRegister());
@@ -800,7 +775,9 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
DataType::Type type = DataType::Type::kReference;
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
- DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ DCHECK(instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for GC root slow path: "
<< instruction_->DebugName();
@@ -870,6 +847,49 @@ Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
return LocationFrom(kArtMethodRegister);
}
+Location CriticalNativeCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
+ DCHECK_NE(type, DataType::Type::kReference);
+
+ Location location = Location::NoLocation();
+ if (DataType::IsFloatingPointType(type)) {
+ if (fpr_index_ < kParameterFPRegistersLength) {
+ location = LocationFrom(kParameterFPRegisters[fpr_index_]);
+ ++fpr_index_;
+ }
+ } else {
+ // Native ABI uses the same registers as managed, except that the method register x0
+ // is a normal argument.
+ if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
+ location = LocationFrom(gpr_index_ == 0u ? x0 : kParameterCoreRegisters[gpr_index_ - 1u]);
+ ++gpr_index_;
+ }
+ }
+ if (location.IsInvalid()) {
+ if (DataType::Is64BitType(type)) {
+ location = Location::DoubleStackSlot(stack_offset_);
+ } else {
+ location = Location::StackSlot(stack_offset_);
+ }
+ stack_offset_ += kFramePointerSize;
+
+ if (for_register_allocation_) {
+ location = Location::Any();
+ }
+ }
+ return location;
+}
+
+Location CriticalNativeCallingConventionVisitorARM64::GetReturnLocation(DataType::Type type) const {
+ // We perform conversion to the managed ABI return register after the call if needed.
+ InvokeDexCallingConventionVisitorARM64 dex_calling_convention;
+ return dex_calling_convention.GetReturnLocation(type);
+}
+
+Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const {
+ // Pass the method in the hidden argument x15.
+ return Location::RegisterLocation(x15.GetCode());
+}
+
CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
@@ -883,8 +903,10 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
stats),
block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- location_builder_(graph, this),
- instruction_visitor_(graph, this),
+ location_builder_neon_(graph, this),
+ instruction_visitor_neon_(graph, this),
+ location_builder_sve_(graph, this),
+ instruction_visitor_sve_(graph, this),
move_resolver_(graph->GetAllocator(), this),
assembler_(graph->GetAllocator(),
compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()),
@@ -892,8 +914,11 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
@@ -909,6 +934,25 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
// Save the link register (containing the return address) to mimic Quick.
AddAllocatedRegister(LocationFrom(lr));
+
+ bool use_sve = ShouldUseSVE();
+ if (use_sve) {
+ location_builder_ = &location_builder_sve_;
+ instruction_visitor_ = &instruction_visitor_sve_;
+ } else {
+ location_builder_ = &location_builder_neon_;
+ instruction_visitor_ = &instruction_visitor_neon_;
+ }
+}
+
+bool CodeGeneratorARM64::ShouldUseSVE() const {
+ return GetInstructionSetFeatures().HasSVE();
+}
+
+size_t CodeGeneratorARM64::GetSIMDRegisterWidth() const {
+ return SupportsPredicatedSIMD()
+ ? GetInstructionSetFeatures().GetSVEVectorLength() / kBitsPerByte
+ : vixl::aarch64::kQRegSizeInBytes;
}
#define __ GetVIXLAssembler()->
@@ -923,7 +967,7 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
EmitJumpTables();
// Emit JIT baker read barrier slow paths.
- DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty());
+ DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
for (auto& entry : jit_baker_read_barrier_slow_paths_) {
uint32_t encoded_data = entry.first;
vixl::aarch64::Label* slow_path_entry = &entry.second.label;
@@ -989,7 +1033,7 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
uint32_t prev_insn = GetInsn(literal_offset - 4u);
const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
// Usually LDR (immediate) with correct root_reg but
- // we may have a "MOV marked, old_value" for UnsafeCASObject.
+ // we may have a "MOV marked, old_value" for intrinsic CAS.
if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) { // MOV?
CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); // LDR?
}
@@ -1038,9 +1082,9 @@ Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind ki
scratch = LocationFrom(vixl_temps_.AcquireX());
} else {
DCHECK_EQ(kind, Location::kFpuRegister);
- scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
- ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
- : vixl_temps_.AcquireD());
+ scratch = codegen_->GetGraph()->HasSIMD()
+ ? codegen_->GetInstructionCodeGeneratorArm64()->AllocateSIMDScratchLocation(&vixl_temps_)
+ : LocationFrom(vixl_temps_.AcquireD());
}
AddScratchLocation(scratch);
return scratch;
@@ -1051,7 +1095,11 @@ void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
vixl_temps_.Release(XRegisterFrom(loc));
} else {
DCHECK(loc.IsFpuRegister());
- vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc));
+ if (codegen_->GetGraph()->HasSIMD()) {
+ codegen_->GetInstructionCodeGeneratorArm64()->FreeSIMDScratchLocation(loc, &vixl_temps_);
+ } else {
+ vixl_temps_.Release(DRegisterFrom(loc));
+ }
}
RemoveScratchLocation(loc);
}
@@ -1078,8 +1126,9 @@ void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
}
if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
- ScopedObjectAccess soa(Thread::Current());
- ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
+ ScopedProfilingInfoUse spiu(
+ Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
+ ProfilingInfo* info = spiu.GetProfilingInfo();
if (info != nullptr) {
uint64_t address = reinterpret_cast64<uint64_t>(info);
vixl::aarch64::Label done;
@@ -1089,14 +1138,14 @@ void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
__ Mov(temp, address);
__ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
__ Add(counter, counter, 1);
+ __ And(counter, counter, interpreter::kTieredHotnessMask);
__ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
- __ Tst(counter, 0xffff);
- __ B(ne, &done);
+ __ Cbnz(counter, &done);
if (is_frame_entry) {
if (HasEmptyFrame()) {
- // The entyrpoint expects the method at the bottom of the stack. We
+ // The entrypoint expects the method at the bottom of the stack. We
// claim stack space necessary for alignment.
- __ Claim(kStackAlignment);
+ IncreaseFrame(kStackAlignment);
__ Stp(kArtMethodRegister, lr, MemOperand(sp, 0));
} else if (!RequiresCurrentMethod()) {
__ Str(kArtMethodRegister, MemOperand(sp, 0));
@@ -1113,7 +1162,7 @@ void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
if (HasEmptyFrame()) {
CHECK(is_frame_entry);
__ Ldr(lr, MemOperand(sp, 8));
- __ Drop(kStackAlignment);
+ DecreaseFrame(kStackAlignment);
}
__ Bind(&done);
}
@@ -1434,7 +1483,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination,
DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
__ Ldr(dst, StackOperandFrom(source));
} else if (source.IsSIMDStackSlot()) {
- __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
+ GetInstructionCodeGeneratorArm64()->LoadSIMDRegFromStack(destination, source);
} else if (source.IsConstant()) {
DCHECK(CoherentConstantAndType(source, dst_type));
MoveConstant(dst, source.GetConstant());
@@ -1458,30 +1507,14 @@ void CodeGeneratorARM64::MoveLocation(Location destination,
} else {
DCHECK(destination.IsFpuRegister());
if (GetGraph()->HasSIMD()) {
- __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
+ GetInstructionCodeGeneratorArm64()->MoveSIMDRegToSIMDReg(destination, source);
} else {
__ Fmov(VRegister(dst), FPRegisterFrom(source, dst_type));
}
}
}
} else if (destination.IsSIMDStackSlot()) {
- if (source.IsFpuRegister()) {
- __ Str(QRegisterFrom(source), StackOperandFrom(destination));
- } else {
- DCHECK(source.IsSIMDStackSlot());
- UseScratchRegisterScope temps(GetVIXLAssembler());
- if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
- Register temp = temps.AcquireX();
- __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
- __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
- __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
- __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
- } else {
- VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
- __ Ldr(temp, StackOperandFrom(source));
- __ Str(temp, StackOperandFrom(destination));
- }
- }
+ GetInstructionCodeGeneratorArm64()->MoveToSIMDStackSlot(destination, source);
} else { // The destination is not a register. It must be a stack slot.
DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
if (source.IsRegister() || source.IsFpuRegister()) {
@@ -1582,13 +1615,13 @@ void CodeGeneratorARM64::Load(DataType::Type type,
}
void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
+ DataType::Type type,
CPURegister dst,
const MemOperand& src,
bool needs_null_check) {
MacroAssembler* masm = GetVIXLAssembler();
UseScratchRegisterScope temps(masm);
Register temp_base = temps.AcquireX();
- DataType::Type type = instruction->GetType();
DCHECK(!src.IsPreIndex());
DCHECK(!src.IsPostIndex());
@@ -1779,7 +1812,7 @@ void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
// Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
// entire oat file. This adds an extra branch and we do not want to slow down the main path.
// For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
- if (slow_path == nullptr || Runtime::Current()->UseJitCompilation()) {
+ if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
__ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
// Ensure the pc position is recorded immediately after the `blr` instruction.
ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
@@ -1931,7 +1964,11 @@ void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
const FieldInfo& field_info) {
- DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+ DCHECK(instruction->IsInstanceFieldGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsPredicatedInstanceFieldGet());
+
+ bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
bool object_field_get_with_read_barrier =
kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
@@ -1950,29 +1987,45 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
locations->AddTemp(FixedTempLocation());
}
}
- locations->SetInAt(0, Location::RequiresRegister());
+ // Input for object receiver.
+ locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
if (DataType::IsFloatingPointType(instruction->GetType())) {
- locations->SetOut(Location::RequiresFpuRegister());
+ if (is_predicated) {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ } else {
+ locations->SetOut(Location::RequiresFpuRegister());
+ }
} else {
- // The output overlaps for an object field get when read barriers
- // are enabled: we do not want the load to overwrite the object's
- // location, as we need it to emit the read barrier.
- locations->SetOut(
- Location::RequiresRegister(),
- object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+ if (is_predicated) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ } else {
+ // The output overlaps for an object field get when read barriers
+ // are enabled: we do not want the load to overwrite the object's
+ // location, as we need it to emit the read barrier.
+ locations->SetOut(Location::RequiresRegister(),
+ object_field_get_with_read_barrier ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ }
}
}
void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
const FieldInfo& field_info) {
- DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+ DCHECK(instruction->IsInstanceFieldGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsPredicatedInstanceFieldGet());
+ bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
LocationSummary* locations = instruction->GetLocations();
- Location base_loc = locations->InAt(0);
+ uint32_t receiver_input = is_predicated ? 1 : 0;
+ Location base_loc = locations->InAt(receiver_input);
Location out = locations->Out();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
DataType::Type load_type = instruction->GetType();
- MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
+ MemOperand field =
+ HeapOperand(InputRegisterAt(instruction, receiver_input), field_info.GetFieldOffset());
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier &&
load_type == DataType::Type::kReference) {
@@ -1997,8 +2050,11 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
// Note that a potential implicit null check is handled in this
// CodeGeneratorARM64::LoadAcquire call.
// NB: LoadAcquire will record the pc info if needed.
- codegen_->LoadAcquire(
- instruction, OutputCPURegister(instruction), field, /* needs_null_check= */ true);
+ codegen_->LoadAcquire(instruction,
+ load_type,
+ OutputCPURegister(instruction),
+ field,
+ /* needs_null_check= */ true);
} else {
// Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
@@ -2031,12 +2087,19 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
bool value_can_be_null) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+ bool is_predicated =
+ instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
Register obj = InputRegisterAt(instruction, 0);
CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
CPURegister source = value;
Offset offset = field_info.GetFieldOffset();
DataType::Type field_type = field_info.GetFieldType();
+ std::optional<vixl::aarch64::Label> pred_is_null;
+ if (is_predicated) {
+ pred_is_null.emplace();
+ __ Cbz(obj, &*pred_is_null);
+ }
{
// We use a block to end the scratch scope before the write barrier, thus
@@ -2065,6 +2128,10 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
codegen_->MarkGCCard(obj, Register(value), value_can_be_null);
}
+
+ if (is_predicated) {
+ __ Bind(&*pred_is_null);
+ }
}
void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
@@ -3013,27 +3080,98 @@ void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instructi
Register out = OutputRegister(instruction);
Register dividend = InputRegisterAt(instruction, 0);
- if (abs_imm == 2) {
- int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte;
- __ Add(out, dividend, Operand(dividend, LSR, bits - 1));
+ Register final_dividend;
+ if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
+ // No need to adjust the result for non-negative dividends or the INT32_MIN/INT64_MIN dividends.
+ // NOTE: The generated code for HDiv correctly works for the INT32_MIN/INT64_MIN dividends:
+ // imm == 2
+ // add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
+ // asr out, out(0x80000001), #1 => out = 0xc0000000
+ // This is the same as 'asr out, 0x80000000, #1'
+ //
+ // imm > 2
+ // add temp, dividend(0x80000000), imm - 1 => temp = 0b10..01..1, where the number
+ // of the rightmost 1s is ctz_imm.
+ // cmp dividend(0x80000000), 0 => N = 1, V = 0 (lt is true)
+ // csel out, temp(0b10..01..1), dividend(0x80000000), lt => out = 0b10..01..1
+ // asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
+ // leftmost 1s is ctz_imm + 1.
+ // This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
+ //
+ // imm == INT32_MIN
+ // add tmp, dividend(0x80000000), #0x7fffffff => tmp = -1
+ // cmp dividend(0x80000000), 0 => N = 1, V = 0 (lt is true)
+ // csel out, temp(-1), dividend(0x80000000), lt => out = -1
+ // neg out, out(-1), asr #31 => out = 1
+ // This is the same as 'neg out, dividend(0x80000000), asr #31'.
+ final_dividend = dividend;
} else {
- UseScratchRegisterScope temps(GetVIXLAssembler());
- Register temp = temps.AcquireSameSizeAs(out);
- __ Add(temp, dividend, abs_imm - 1);
- __ Cmp(dividend, 0);
- __ Csel(out, temp, dividend, lt);
+ if (abs_imm == 2) {
+ int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte;
+ __ Add(out, dividend, Operand(dividend, LSR, bits - 1));
+ } else {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireSameSizeAs(out);
+ __ Add(temp, dividend, abs_imm - 1);
+ __ Cmp(dividend, 0);
+ __ Csel(out, temp, dividend, lt);
+ }
+ final_dividend = out;
}
int ctz_imm = CTZ(abs_imm);
if (imm > 0) {
- __ Asr(out, out, ctz_imm);
+ __ Asr(out, final_dividend, ctz_imm);
} else {
- __ Neg(out, Operand(out, ASR, ctz_imm));
+ __ Neg(out, Operand(final_dividend, ASR, ctz_imm));
}
}
-void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+// Return true if the magic number was modified by subtracting 2^32(Int32 div) or 2^64(Int64 div).
+// So dividend needs to be added.
+static inline bool NeedToAddDividend(int64_t magic_number, int64_t divisor) {
+ return divisor > 0 && magic_number < 0;
+}
+
+// Return true if the magic number was modified by adding 2^32(Int32 div) or 2^64(Int64 div).
+// So dividend needs to be subtracted.
+static inline bool NeedToSubDividend(int64_t magic_number, int64_t divisor) {
+ return divisor < 0 && magic_number > 0;
+}
+
+// Generate code which increments the value in register 'in' by 1 if the value is negative.
+// It is done with 'add out, in, in, lsr #31 or #63'.
+// If the value is a result of an operation setting the N flag, CINC MI can be used
+// instead of ADD. 'use_cond_inc' controls this.
+void InstructionCodeGeneratorARM64::GenerateIncrementNegativeByOne(
+ Register out,
+ Register in,
+ bool use_cond_inc) {
+ if (use_cond_inc) {
+ __ Cinc(out, in, mi);
+ } else {
+ __ Add(out, in, Operand(in, LSR, in.GetSizeInBits() - 1));
+ }
+}
+
+// Helper to generate code producing the result of HRem with a constant divisor.
+void InstructionCodeGeneratorARM64::GenerateResultRemWithAnyConstant(
+ Register out,
+ Register dividend,
+ Register quotient,
+ int64_t divisor,
+ UseScratchRegisterScope* temps_scope) {
+ Register temp_imm = temps_scope->AcquireSameSizeAs(out);
+ __ Mov(temp_imm, divisor);
+ __ Msub(out, quotient, temp_imm, dividend);
+}
+
+// Helper to generate code for HDiv/HRem instructions when a dividend is non-negative and
+// a divisor is a positive constant, not power of 2.
+void InstructionCodeGeneratorARM64::GenerateInt64UnsignedDivRemWithAnyPositiveConstant(
+ HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == DataType::Type::kInt64);
LocationSummary* locations = instruction->GetLocations();
Location second = locations->InAt(1);
@@ -3042,45 +3180,175 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati
Register out = OutputRegister(instruction);
Register dividend = InputRegisterAt(instruction, 0);
int64_t imm = Int64FromConstant(second.GetConstant());
-
- DataType::Type type = instruction->GetResultType();
- DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
+ DCHECK_GT(imm, 0);
int64_t magic;
int shift;
- CalculateMagicAndShiftForDivRem(
- imm, /* is_long= */ type == DataType::Type::kInt64, &magic, &shift);
+ CalculateMagicAndShiftForDivRem(imm, /* is_long= */ true, &magic, &shift);
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp = temps.AcquireSameSizeAs(out);
- // temp = get_high(dividend * magic)
- __ Mov(temp, magic);
- if (type == DataType::Type::kInt64) {
- __ Smulh(temp, dividend, temp);
+ auto generate_unsigned_div_code = [this, magic, shift](Register out,
+ Register dividend,
+ Register temp) {
+ // temp = get_high(dividend * magic)
+ __ Mov(temp, magic);
+ if (magic > 0 && shift == 0) {
+ __ Smulh(out, dividend, temp);
+ } else {
+ __ Smulh(temp, dividend, temp);
+ if (magic < 0) {
+ // The negative magic means that the multiplier m is greater than INT64_MAX.
+ // In such a case shift is never 0. See the proof in
+ // InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant.
+ __ Add(temp, temp, dividend);
+ }
+ DCHECK_NE(shift, 0);
+ __ Lsr(out, temp, shift);
+ }
+ };
+
+ if (instruction->IsDiv()) {
+ generate_unsigned_div_code(out, dividend, temp);
} else {
- __ Smull(temp.X(), dividend, temp);
- __ Lsr(temp.X(), temp.X(), 32);
+ generate_unsigned_div_code(temp, dividend, temp);
+ GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
}
+}
+
+// Helper to generate code for HDiv/HRem instructions for any dividend and a constant divisor
+// (not power of 2).
+void InstructionCodeGeneratorARM64::GenerateInt64DivRemWithAnyConstant(
+ HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == DataType::Type::kInt64);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ Register out = OutputRegister(instruction);
+ Register dividend = InputRegisterAt(instruction, 0);
+ int64_t imm = Int64FromConstant(second.GetConstant());
+
+ int64_t magic;
+ int shift;
+ CalculateMagicAndShiftForDivRem(imm, /* is_long= */ true, &magic, &shift);
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireSameSizeAs(out);
- if (imm > 0 && magic < 0) {
- __ Add(temp, temp, dividend);
- } else if (imm < 0 && magic > 0) {
- __ Sub(temp, temp, dividend);
+ // temp = get_high(dividend * magic)
+ __ Mov(temp, magic);
+ __ Smulh(temp, dividend, temp);
+
+ // The multiplication result might need some corrections to be finalized.
+ // The last correction is to increment by 1, if the result is negative.
+ // Currently it is done with 'add result, temp_result, temp_result, lsr #31 or #63'.
+ // Such ADD usually has latency 2, e.g. on Cortex-A55.
+ // However if one of the corrections is ADD or SUB, the sign can be detected
+ // with ADDS/SUBS. They set the N flag if the result is negative.
+ // This allows to use CINC MI which has latency 1.
+ bool use_cond_inc = false;
+
+ // Some combinations of magic_number and the divisor require to correct the result.
+ // Check whether the correction is needed.
+ if (NeedToAddDividend(magic, imm)) {
+ __ Adds(temp, temp, dividend);
+ use_cond_inc = true;
+ } else if (NeedToSubDividend(magic, imm)) {
+ __ Subs(temp, temp, dividend);
+ use_cond_inc = true;
}
if (shift != 0) {
__ Asr(temp, temp, shift);
}
- if (instruction->IsDiv()) {
- __ Sub(out, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31));
+ if (instruction->IsRem()) {
+ GenerateIncrementNegativeByOne(temp, temp, use_cond_inc);
+ GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
} else {
- __ Sub(temp, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31));
- // TODO: Strength reduction for msub.
- Register temp_imm = temps.AcquireSameSizeAs(out);
- __ Mov(temp_imm, imm);
- __ Msub(out, temp, temp_imm, dividend);
+ GenerateIncrementNegativeByOne(out, temp, use_cond_inc);
+ }
+}
+
+void InstructionCodeGeneratorARM64::GenerateInt32DivRemWithAnyConstant(
+ HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ Register out = OutputRegister(instruction);
+ Register dividend = InputRegisterAt(instruction, 0);
+ int64_t imm = Int64FromConstant(second.GetConstant());
+
+ int64_t magic;
+ int shift;
+ CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireSameSizeAs(out);
+
+ // temp = get_high(dividend * magic)
+ __ Mov(temp, magic);
+ __ Smull(temp.X(), dividend, temp);
+
+ // The multiplication result might need some corrections to be finalized.
+ // The last correction is to increment by 1, if the result is negative.
+ // Currently it is done with 'add result, temp_result, temp_result, lsr #31 or #63'.
+ // Such ADD usually has latency 2, e.g. on Cortex-A55.
+ // However if one of the corrections is ADD or SUB, the sign can be detected
+ // with ADDS/SUBS. They set the N flag if the result is negative.
+ // This allows to use CINC MI which has latency 1.
+ bool use_cond_inc = false;
+
+ // ADD/SUB correction is performed in the high 32 bits
+ // as high 32 bits are ignored because type are kInt32.
+ if (NeedToAddDividend(magic, imm)) {
+ __ Adds(temp.X(), temp.X(), Operand(dividend.X(), LSL, 32));
+ use_cond_inc = true;
+ } else if (NeedToSubDividend(magic, imm)) {
+ __ Subs(temp.X(), temp.X(), Operand(dividend.X(), LSL, 32));
+ use_cond_inc = true;
+ }
+
+ // Extract the result from the high 32 bits and apply the final right shift.
+ DCHECK_LT(shift, 32);
+ if (imm > 0 && HasNonNegativeInputAt(instruction, 0)) {
+ // No need to adjust the result for a non-negative dividend and a positive divisor.
+ if (instruction->IsDiv()) {
+ __ Lsr(out.X(), temp.X(), 32 + shift);
+ } else {
+ __ Lsr(temp.X(), temp.X(), 32 + shift);
+ GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
+ }
+ } else {
+ __ Asr(temp.X(), temp.X(), 32 + shift);
+
+ if (instruction->IsRem()) {
+ GenerateIncrementNegativeByOne(temp, temp, use_cond_inc);
+ GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
+ } else {
+ GenerateIncrementNegativeByOne(out, temp, use_cond_inc);
+ }
+ }
+}
+
+void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction,
+ int64_t divisor) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ if (instruction->GetResultType() == DataType::Type::kInt64) {
+ if (divisor > 0 && HasNonNegativeInputAt(instruction, 0)) {
+ GenerateInt64UnsignedDivRemWithAnyPositiveConstant(instruction);
+ } else {
+ GenerateInt64DivRemWithAnyConstant(instruction);
+ }
+ } else {
+ GenerateInt32DivRemWithAnyConstant(instruction);
}
}
@@ -3097,7 +3365,7 @@ void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instructio
} else {
// Cases imm == -1 or imm == 1 are handled by InstructionSimplifier.
DCHECK(imm < -2 || imm > 2) << imm;
- GenerateDivRemWithAnyConstant(instruction);
+ GenerateDivRemWithAnyConstant(instruction, imm);
}
}
@@ -3505,14 +3773,37 @@ void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
// MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
}
+void CodeGeneratorARM64::IncreaseFrame(size_t adjustment) {
+ __ Claim(adjustment);
+ GetAssembler()->cfi().AdjustCFAOffset(adjustment);
+}
+
+void CodeGeneratorARM64::DecreaseFrame(size_t adjustment) {
+ __ Drop(adjustment);
+ GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
+}
+
void CodeGeneratorARM64::GenerateNop() {
__ Nop();
}
+void LocationsBuilderARM64::VisitPredicatedInstanceFieldGet(
+ HPredicatedInstanceFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
HandleFieldGet(instruction, instruction->GetFieldInfo());
}
+void InstructionCodeGeneratorARM64::VisitPredicatedInstanceFieldGet(
+ HPredicatedInstanceFieldGet* instruction) {
+ vixl::aarch64::Label finish;
+ __ Cbz(InputRegisterAt(instruction, 1), &finish);
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+ __ Bind(&finish);
+}
+
void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
HandleFieldGet(instruction, instruction->GetFieldInfo());
}
@@ -4078,6 +4369,10 @@ void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
HandleInvoke(invoke);
+ if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
+ // We cannot request ip1 as it's blocked by the register allocator.
+ invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1, Location::Any());
+ }
}
void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
@@ -4089,8 +4384,9 @@ void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction
GetGraph()->IsCompilingBaseline() &&
!Runtime::Current()->IsAotCompiler()) {
DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
- ScopedObjectAccess soa(Thread::Current());
- ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
+ ScopedProfilingInfoUse spiu(
+ Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
+ ProfilingInfo* info = spiu.GetProfilingInfo();
if (info != nullptr) {
InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
uint64_t address = reinterpret_cast64<uint64_t>(cache);
@@ -4147,7 +4443,21 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok
MacroAssembler* masm = GetVIXLAssembler();
UseScratchRegisterScope scratch_scope(masm);
scratch_scope.Exclude(ip1);
- __ Mov(ip1, invoke->GetDexMethodIndex());
+ if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
+ Location interface_method = locations->InAt(invoke->GetNumberOfArguments() - 1);
+ if (interface_method.IsStackSlot()) {
+ __ Ldr(ip1, StackOperandFrom(interface_method));
+ } else {
+ __ Mov(ip1, XRegisterFrom(interface_method));
+ }
+ // If the load kind is through a runtime call, we will pass the method we
+ // fetch the IMT, which will either be a no-op if we don't hit the conflict
+ // stub, or will make us always go through the trampoline when there is a
+ // conflict.
+ } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
+ codegen_->LoadMethod(
+ invoke->GetHiddenArgumentLoadKind(), Location::RegisterLocation(ip1.GetCode()), invoke);
+ }
__ Ldr(temp,
MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
@@ -4155,6 +4465,11 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok
invoke->GetImtIndex(), kArm64PointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ Ldr(temp, MemOperand(temp, method_offset));
+ if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
+ // We pass the method from the IMT in case of a conflict. This will ensure
+ // we go into the runtime to resolve the actual method.
+ __ Mov(ip1, temp);
+ }
// lr = temp->GetEntryPoint();
__ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
@@ -4190,7 +4505,13 @@ void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* inv
return;
}
- HandleInvoke(invoke);
+ if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
+ CriticalNativeCallingConventionVisitorARM64 calling_convention_visitor(
+ /*for_register_allocation=*/ true);
+ CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+ } else {
+ HandleInvoke(invoke);
+ }
}
static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
@@ -4209,33 +4530,21 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStatic
return desired_dispatch_info;
}
-void CodeGeneratorARM64::GenerateStaticOrDirectCall(
- HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
- // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
- Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
- switch (invoke->GetMethodLoadKind()) {
- case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
- uint32_t offset =
- GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
- // temp = thread->string_init_entrypoint
- __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
- break;
- }
- case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
- break;
- case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+void CodeGeneratorARM64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
+ switch (load_kind) {
+ case MethodLoadKind::kBootImageLinkTimePcRelative: {
DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
// Add ADRP with its PC-relative method patch.
- vixl::aarch64::Label* adrp_label = NewBootImageMethodPatch(invoke->GetTargetMethod());
+ vixl::aarch64::Label* adrp_label =
+ NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
// Add ADD with its PC-relative method patch.
vixl::aarch64::Label* add_label =
- NewBootImageMethodPatch(invoke->GetTargetMethod(), adrp_label);
+ NewBootImageMethodPatch(invoke->GetResolvedMethodReference(), adrp_label);
EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
break;
}
- case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ case MethodLoadKind::kBootImageRelRo: {
// Add ADRP with its PC-relative .data.bimg.rel.ro patch.
uint32_t boot_image_offset = GetBootImageOffset(invoke);
vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset);
@@ -4246,30 +4555,86 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(
EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp));
break;
}
- case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
+ case MethodLoadKind::kBssEntry: {
// Add ADRP with its PC-relative .bss entry patch.
- MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
- vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method);
+ vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(invoke->GetMethodReference());
EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
// Add LDR with its PC-relative .bss entry patch.
vixl::aarch64::Label* ldr_label =
- NewMethodBssEntryPatch(target_method, adrp_label);
+ NewMethodBssEntryPatch(invoke->GetMethodReference(), adrp_label);
// All aligned loads are implicitly atomic consume operations on ARM64.
EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
break;
}
- case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
+ case MethodLoadKind::kJitDirectAddress: {
// Load method address from literal pool.
- __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
+ __ Ldr(XRegisterFrom(temp),
+ DeduplicateUint64Literal(reinterpret_cast<uint64_t>(invoke->GetResolvedMethod())));
+ break;
+ }
+ case MethodLoadKind::kRuntimeCall: {
+ // Test situation, don't do anything.
break;
- case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
+ }
+ default: {
+ LOG(FATAL) << "Load kind should have already been handled " << load_kind;
+ UNREACHABLE();
+ }
+ }
+}
+
+void CodeGeneratorARM64::GenerateStaticOrDirectCall(
+ HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
+ // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
+ Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
+ switch (invoke->GetMethodLoadKind()) {
+ case MethodLoadKind::kStringInit: {
+ uint32_t offset =
+ GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
+ // temp = thread->string_init_entrypoint
+ __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
+ break;
+ }
+ case MethodLoadKind::kRecursive: {
+ callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
+ break;
+ }
+ case MethodLoadKind::kRuntimeCall: {
GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
return; // No code pointer retrieval; the runtime performs the call directly.
}
+ case MethodLoadKind::kBootImageLinkTimePcRelative:
+ DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
+ if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
+ // Do not materialize the method pointer, load directly the entrypoint.
+ // Add ADRP with its PC-relative JNI entrypoint patch.
+ vixl::aarch64::Label* adrp_label =
+ NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference());
+ EmitAdrpPlaceholder(adrp_label, lr);
+ // Add the LDR with its PC-relative method patch.
+ vixl::aarch64::Label* add_label =
+ NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference(), adrp_label);
+ EmitLdrOffsetPlaceholder(add_label, lr, lr);
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ default: {
+ LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
+ break;
+ }
}
+ auto call_lr = [&]() {
+ // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
+ ExactAssemblyScope eas(GetVIXLAssembler(),
+ kInstructionSize,
+ CodeBufferCheckScope::kExactSize);
+ // lr()
+ __ blr(lr);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ };
switch (invoke->GetCodePtrLocation()) {
- case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
+ case CodePtrLocation::kCallSelf:
{
// Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
ExactAssemblyScope eas(GetVIXLAssembler(),
@@ -4279,21 +4644,57 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(
RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
}
break;
- case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
- // LR = callee_method->entry_point_from_quick_compiled_code_;
- __ Ldr(lr, MemOperand(
- XRegisterFrom(callee_method),
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
- {
- // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
- ExactAssemblyScope eas(GetVIXLAssembler(),
- kInstructionSize,
- CodeBufferCheckScope::kExactSize);
+ case CodePtrLocation::kCallCriticalNative: {
+ size_t out_frame_size =
+ PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARM64,
+ kAapcs64StackAlignment,
+ GetCriticalNativeDirectCallFrameSize>(invoke);
+ if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
+ call_lr();
+ } else {
+ // LR = callee_method->ptr_sized_fields_.data_; // EntryPointFromJni
+ MemberOffset offset = ArtMethod::EntryPointFromJniOffset(kArm64PointerSize);
+ __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
// lr()
- __ blr(lr);
- RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ call_lr();
}
+ // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
+ switch (invoke->GetType()) {
+ case DataType::Type::kBool:
+ __ Ubfx(w0, w0, 0, 8);
+ break;
+ case DataType::Type::kInt8:
+ __ Sbfx(w0, w0, 0, 8);
+ break;
+ case DataType::Type::kUint16:
+ __ Ubfx(w0, w0, 0, 16);
+ break;
+ case DataType::Type::kInt16:
+ __ Sbfx(w0, w0, 0, 16);
+ break;
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ case DataType::Type::kVoid:
+ break;
+ default:
+ DCHECK(false) << invoke->GetType();
+ break;
+ }
+ if (out_frame_size != 0u) {
+ DecreaseFrame(out_frame_size);
+ }
+ break;
+ }
+ case CodePtrLocation::kCallArtMethod: {
+ // LR = callee_method->ptr_sized_fields_.entry_point_from_quick_compiled_code_;
+ MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
+ __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
+ // lr()
+ call_lr();
break;
+ }
}
DCHECK(!IsLeafMethod());
@@ -4346,11 +4747,38 @@ void CodeGeneratorARM64::GenerateVirtualCall(
}
}
+void CodeGeneratorARM64::MoveFromReturnRegister(Location trg, DataType::Type type) {
+ if (!trg.IsValid()) {
+ DCHECK(type == DataType::Type::kVoid);
+ return;
+ }
+
+ DCHECK_NE(type, DataType::Type::kVoid);
+
+ if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) {
+ Register trg_reg = RegisterFrom(trg, type);
+ Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
+ __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
+ } else {
+ VRegister trg_reg = FPRegisterFrom(trg, type);
+ VRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type);
+ __ Fmov(trg_reg, res_reg);
+ }
+}
+
void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
+ IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
+ if (intrinsic.TryDispatch(invoke)) {
+ return;
+ }
HandleInvoke(invoke);
}
void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
+ if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
+ return;
+ }
codegen_->GenerateInvokePolymorphicCall(invoke);
codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
@@ -4400,10 +4828,26 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch(
}
vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
- const DexFile& dex_file,
- dex::TypeIndex type_index,
+ HLoadClass* load_class,
vixl::aarch64::Label* adrp_label) {
- return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
+ const DexFile& dex_file = load_class->GetDexFile();
+ dex::TypeIndex type_index = load_class->GetTypeIndex();
+ ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
+ switch (load_class->GetLoadKind()) {
+ case HLoadClass::LoadKind::kBssEntry:
+ patches = &type_bss_entry_patches_;
+ break;
+ case HLoadClass::LoadKind::kBssEntryPublic:
+ patches = &public_type_bss_entry_patches_;
+ break;
+ case HLoadClass::LoadKind::kBssEntryPackage:
+ patches = &package_type_bss_entry_patches_;
+ break;
+ default:
+ LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
+ UNREACHABLE();
+ }
+ return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, patches);
}
vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch(
@@ -4421,9 +4865,16 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch(
return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
}
+vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageJniEntrypointPatch(
+ MethodReference target_method,
+ vixl::aarch64::Label* adrp_label) {
+ return NewPcRelativePatch(
+ target_method.dex_file, target_method.index, adrp_label, &boot_image_jni_entrypoint_patches_);
+}
+
void CodeGeneratorARM64::EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset) {
DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
- DCHECK(!Runtime::Current()->UseJitCompilation());
+ DCHECK(!GetCompilerOptions().IsJitCompiler());
call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
vixl::aarch64::Label* bl_label = &call_entrypoint_patches_.back().label;
__ bind(bl_label);
@@ -4432,7 +4883,7 @@ void CodeGeneratorARM64::EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offse
void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) {
DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
- if (Runtime::Current()->UseJitCompilation()) {
+ if (GetCompilerOptions().IsJitCompiler()) {
auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
vixl::aarch64::Label* slow_path_entry = &it->second.label;
__ cbnz(mr, slow_path_entry);
@@ -4523,7 +4974,7 @@ void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg,
vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_reference, adrp_label);
EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X());
} else {
- DCHECK(Runtime::Current()->UseJitCompilation());
+ DCHECK(GetCompilerOptions().IsJitCompiler());
gc::Heap* heap = Runtime::Current()->GetHeap();
DCHECK(!heap->GetBootImageSpaces().empty());
const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
@@ -4531,28 +4982,43 @@ void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg,
}
}
-void CodeGeneratorARM64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
- uint32_t boot_image_offset) {
- DCHECK(invoke->IsStatic());
- InvokeRuntimeCallingConvention calling_convention;
- Register argument = calling_convention.GetRegisterAt(0);
+void CodeGeneratorARM64::LoadTypeForBootImageIntrinsic(vixl::aarch64::Register reg,
+ TypeReference target_type) {
+ // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
+ DCHECK(GetCompilerOptions().IsBootImage());
+ // Add ADRP with its PC-relative type patch.
+ vixl::aarch64::Label* adrp_label =
+ NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
+ EmitAdrpPlaceholder(adrp_label, reg.X());
+ // Add ADD with its PC-relative type patch.
+ vixl::aarch64::Label* add_label =
+ NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex(), adrp_label);
+ EmitAddPlaceholder(add_label, reg.X(), reg.X());
+}
+
+void CodeGeneratorARM64::LoadIntrinsicDeclaringClass(vixl::aarch64::Register reg, HInvoke* invoke) {
+ DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
if (GetCompilerOptions().IsBootImage()) {
- DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
- // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
- MethodReference target_method = invoke->GetTargetMethod();
+ MethodReference target_method = invoke->GetResolvedMethodReference();
dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
- // Add ADRP with its PC-relative type patch.
- vixl::aarch64::Label* adrp_label = NewBootImageTypePatch(*target_method.dex_file, type_idx);
- EmitAdrpPlaceholder(adrp_label, argument.X());
- // Add ADD with its PC-relative type patch.
- vixl::aarch64::Label* add_label =
- NewBootImageTypePatch(*target_method.dex_file, type_idx, adrp_label);
- EmitAddPlaceholder(add_label, argument.X(), argument.X());
+ LoadTypeForBootImageIntrinsic(reg, TypeReference(target_method.dex_file, type_idx));
} else {
- LoadBootImageAddress(argument, boot_image_offset);
+ uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
+ LoadBootImageAddress(reg, boot_image_offset);
+ }
+}
+
+void CodeGeneratorARM64::LoadClassRootForIntrinsic(vixl::aarch64::Register reg,
+ ClassRoot class_root) {
+ if (GetCompilerOptions().IsBootImage()) {
+ ScopedObjectAccess soa(Thread::Current());
+ ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
+ TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
+ LoadTypeForBootImageIntrinsic(reg, target_type);
+ } else {
+ uint32_t boot_image_offset = GetBootImageOffset(class_root);
+ LoadBootImageAddress(reg, boot_image_offset);
}
- InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
}
template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
@@ -4583,8 +5049,11 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin
method_bss_entry_patches_.size() +
boot_image_type_patches_.size() +
type_bss_entry_patches_.size() +
+ public_type_bss_entry_patches_.size() +
+ package_type_bss_entry_patches_.size() +
boot_image_string_patches_.size() +
string_bss_entry_patches_.size() +
+ boot_image_jni_entrypoint_patches_.size() +
boot_image_other_patches_.size() +
call_entrypoint_patches_.size() +
baker_read_barrier_patches_.size();
@@ -4612,8 +5081,14 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin
method_bss_entry_patches_, linker_patches);
EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
type_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
+ public_type_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
+ package_type_bss_entry_patches_, linker_patches);
EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
string_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
+ boot_image_jni_entrypoint_patches_, linker_patches);
for (const PatchInfo<vixl::aarch64::Label>& info : call_entrypoint_patches_) {
DCHECK(info.target_dex_file == nullptr);
linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
@@ -4643,7 +5118,7 @@ void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kArm64PointerSize).Int32Value());
assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
- if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+ if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
*debug_name = "MethodCallThunk";
}
break;
@@ -4651,7 +5126,7 @@ void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
case linker::LinkerPatch::Type::kCallEntrypoint: {
Offset offset(patch.EntrypointOffset());
assembler.JumpTo(ManagedRegister(arm64::TR), offset, ManagedRegister(arm64::IP0));
- if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+ if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
*debug_name = "EntrypointCallThunk_" + std::to_string(offset.Uint32Value());
}
break;
@@ -4695,14 +5170,9 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir
return;
}
- {
- // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
- // are no pools emitted.
- EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
- LocationSummary* locations = invoke->GetLocations();
- codegen_->GenerateStaticOrDirectCall(
- invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
- }
+ LocationSummary* locations = invoke->GetLocations();
+ codegen_->GenerateStaticOrDirectCall(
+ invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
@@ -4735,11 +5205,13 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
- DCHECK(!Runtime::Current()->UseJitCompilation());
+ case HLoadClass::LoadKind::kBssEntryPublic:
+ case HLoadClass::LoadKind::kBssEntryPackage:
+ DCHECK(!GetCompilerOptions().IsJitCompiler());
break;
case HLoadClass::LoadKind::kJitBootImageAddress:
case HLoadClass::LoadKind::kJitTableAddress:
- DCHECK(Runtime::Current()->UseJitCompilation());
+ DCHECK(GetCompilerOptions().IsJitCompiler());
break;
case HLoadClass::LoadKind::kRuntimeCall:
break;
@@ -4758,7 +5230,9 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
return;
}
- DCHECK(!cls->NeedsAccessCheck());
+ DCHECK_EQ(cls->NeedsAccessCheck(),
+ load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPackage);
const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
@@ -4792,7 +5266,9 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
return;
}
- DCHECK(!cls->NeedsAccessCheck());
+ DCHECK_EQ(cls->NeedsAccessCheck(),
+ load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPackage);
Location out_loc = cls->GetLocations()->Out();
Register out = OutputRegister(cls);
@@ -4832,7 +5308,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
}
case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
- uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
+ uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(cls);
// Add ADRP with its PC-relative .data.bimg.rel.ro patch.
vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
@@ -4842,16 +5318,15 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
break;
}
- case HLoadClass::LoadKind::kBssEntry: {
+ case HLoadClass::LoadKind::kBssEntry:
+ case HLoadClass::LoadKind::kBssEntryPublic:
+ case HLoadClass::LoadKind::kBssEntryPackage: {
// Add ADRP with its PC-relative Class .bss entry patch.
- const DexFile& dex_file = cls->GetDexFile();
- dex::TypeIndex type_index = cls->GetTypeIndex();
vixl::aarch64::Register temp = XRegisterFrom(out_loc);
- vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
+ vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(cls);
codegen_->EmitAdrpPlaceholder(adrp_label, temp);
// Add LDR with its PC-relative Class .bss entry patch.
- vixl::aarch64::Label* ldr_label =
- codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
+ vixl::aarch64::Label* ldr_label = codegen_->NewBssEntryTypePatch(cls, adrp_label);
// /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */
// All aligned loads are implicitly atomic consume operations on ARM64.
codegen_->GenerateGcRootFieldLoad(cls,
@@ -4954,11 +5429,11 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
- DCHECK(!Runtime::Current()->UseJitCompilation());
+ DCHECK(!GetCompilerOptions().IsJitCompiler());
break;
case HLoadString::LoadKind::kJitBootImageAddress:
case HLoadString::LoadKind::kJitTableAddress:
- DCHECK(Runtime::Current()->UseJitCompilation());
+ DCHECK(GetCompilerOptions().IsJitCompiler());
break;
case HLoadString::LoadKind::kRuntimeCall:
break;
@@ -5009,7 +5484,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
// Add ADRP with its PC-relative .data.bimg.rel.ro patch.
- uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
+ uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(load);
vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
// Add LDR with its PC-relative .data.bimg.rel.ro patch.
@@ -5372,18 +5847,27 @@ void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instructi
Register out = OutputRegister(instruction);
Register dividend = InputRegisterAt(instruction, 0);
- if (abs_imm == 2) {
- __ Cmp(dividend, 0);
- __ And(out, dividend, 1);
- __ Csneg(out, out, out, ge);
+ if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
+ // No need to adjust the result for non-negative dividends or the INT32_MIN/INT64_MIN dividends.
+ // NOTE: The generated code for HRem correctly works for the INT32_MIN/INT64_MIN dividends.
+ // INT*_MIN % imm must be 0 for any imm of power 2. 'and' works only with bits
+ // 0..30 (Int32 case)/0..62 (Int64 case) of a dividend. For INT32_MIN/INT64_MIN they are zeros.
+ // So 'and' always produces zero.
+ __ And(out, dividend, abs_imm - 1);
} else {
- UseScratchRegisterScope temps(GetVIXLAssembler());
- Register temp = temps.AcquireSameSizeAs(out);
+ if (abs_imm == 2) {
+ __ Cmp(dividend, 0);
+ __ And(out, dividend, 1);
+ __ Csneg(out, out, out, ge);
+ } else {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireSameSizeAs(out);
- __ Negs(temp, dividend);
- __ And(out, dividend, abs_imm - 1);
- __ And(temp, temp, abs_imm - 1);
- __ Csneg(out, out, temp, mi);
+ __ Negs(temp, dividend);
+ __ And(out, dividend, abs_imm - 1);
+ __ And(temp, temp, abs_imm - 1);
+ __ Csneg(out, out, temp, mi);
+ }
}
}
@@ -5405,7 +5889,7 @@ void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instructio
GenerateIntRemForPower2Denom(instruction);
} else {
DCHECK(imm < -2 || imm > 2) << imm;
- GenerateDivRemWithAnyConstant(instruction);
+ GenerateDivRemWithAnyConstant(instruction, imm);
}
}
@@ -6050,21 +6534,21 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad(
MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
-void CodeGeneratorARM64::GenerateUnsafeCasOldValueMovWithBakerReadBarrier(
- vixl::aarch64::Register marked,
+void CodeGeneratorARM64::GenerateIntrinsicCasMoveWithBakerReadBarrier(
+ vixl::aarch64::Register marked_old_value,
vixl::aarch64::Register old_value) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
// Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
- uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked.GetCode());
+ uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode());
ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
vixl::aarch64::Label return_address;
__ adr(lr, &return_address);
static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
"GC root LDR must be 2 instructions (8B) before the return address label.");
- __ mov(marked, old_value);
+ __ mov(marked_old_value, old_value);
EmitBakerReadBarrierCbnz(custom_data);
__ bind(&return_address);
}
@@ -6260,6 +6744,18 @@ void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location te
}
}
+SlowPathCodeARM64* CodeGeneratorARM64::AddReadBarrierSlowPath(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ SlowPathCodeARM64* slow_path = new (GetScopedAllocator())
+ ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
+ AddSlowPath(slow_path);
+ return slow_path;
+}
+
void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
Location out,
Location ref,
@@ -6279,9 +6775,7 @@ void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
// not used by the artReadBarrierSlow entry point.
//
// TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
- SlowPathCodeARM64* slow_path = new (GetScopedAllocator())
- ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
- AddSlowPath(slow_path);
+ SlowPathCodeARM64* slow_path = AddReadBarrierSlowPath(instruction, out, ref, obj, offset, index);
__ B(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -6372,6 +6866,64 @@ void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_
}
}
+MemOperand InstructionCodeGeneratorARM64::VecNEONAddress(
+ HVecMemoryOperation* instruction,
+ UseScratchRegisterScope* temps_scope,
+ size_t size,
+ bool is_string_char_at,
+ /*out*/ Register* scratch) {
+ LocationSummary* locations = instruction->GetLocations();
+ Register base = InputRegisterAt(instruction, 0);
+
+ if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
+ DCHECK(!is_string_char_at);
+ return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
+ }
+
+ Location index = locations->InAt(1);
+ uint32_t offset = is_string_char_at
+ ? mirror::String::ValueOffset().Uint32Value()
+ : mirror::Array::DataOffset(size).Uint32Value();
+ size_t shift = ComponentSizeShiftWidth(size);
+
+ // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
+ DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
+
+ if (index.IsConstant()) {
+ offset += Int64FromLocation(index) << shift;
+ return HeapOperand(base, offset);
+ } else {
+ *scratch = temps_scope->AcquireSameSizeAs(base);
+ __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
+ return HeapOperand(*scratch, offset);
+ }
+}
+
+SVEMemOperand InstructionCodeGeneratorARM64::VecSVEAddress(
+ HVecMemoryOperation* instruction,
+ UseScratchRegisterScope* temps_scope,
+ size_t size,
+ bool is_string_char_at,
+ /*out*/ Register* scratch) {
+ LocationSummary* locations = instruction->GetLocations();
+ Register base = InputRegisterAt(instruction, 0);
+ Location index = locations->InAt(1);
+
+ // TODO: Support intermediate address sharing for SVE accesses.
+ DCHECK(!instruction->InputAt(1)->IsIntermediateAddressIndex());
+ DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
+ DCHECK(!index.IsConstant());
+
+ uint32_t offset = is_string_char_at
+ ? mirror::String::ValueOffset().Uint32Value()
+ : mirror::Array::DataOffset(size).Uint32Value();
+ size_t shift = ComponentSizeShiftWidth(size);
+
+ *scratch = temps_scope->AcquireSameSizeAs(base);
+ __ Add(*scratch, base, offset);
+ return SVEMemOperand(scratch->X(), XRegisterFrom(index), LSL, shift);
+}
+
#undef __
#undef QUICK_ENTRY_POINT
@@ -6425,11 +6977,11 @@ void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
switch (kind) {
case BakerReadBarrierKind::kField:
case BakerReadBarrierKind::kAcquire: {
- auto base_reg =
- Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ Register base_reg =
+ vixl::aarch64::XRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
CheckValidReg(base_reg.GetCode());
- auto holder_reg =
- Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data));
+ Register holder_reg =
+ vixl::aarch64::XRegister(BakerReadBarrierSecondRegField::Decode(encoded_data));
CheckValidReg(holder_reg.GetCode());
UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
temps.Exclude(ip0, ip1);
@@ -6475,8 +7027,8 @@ void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
break;
}
case BakerReadBarrierKind::kArray: {
- auto base_reg =
- Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ Register base_reg =
+ vixl::aarch64::XRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
CheckValidReg(base_reg.GetCode());
DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
BakerReadBarrierSecondRegField::Decode(encoded_data));
@@ -6504,8 +7056,8 @@ void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
// and it does not have a forwarding address), call the correct introspection entrypoint;
// otherwise return the reference (or the extracted forwarding address).
// There is no gray bit check for GC roots.
- auto root_reg =
- Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ Register root_reg =
+ vixl::aarch64::WRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
CheckValidReg(root_reg.GetCode());
DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
BakerReadBarrierSecondRegField::Decode(encoded_data));
@@ -6538,10 +7090,8 @@ void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
}
// For JIT, the slow path is considered part of the compiled method,
- // so JIT should pass null as `debug_name`. Tests may not have a runtime.
- DCHECK(Runtime::Current() == nullptr ||
- !Runtime::Current()->UseJitCompilation() ||
- debug_name == nullptr);
+ // so JIT should pass null as `debug_name`.
+ DCHECK(!GetCompilerOptions().IsJitCompiler() || debug_name == nullptr);
if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
std::ostringstream oss;
oss << "BakerReadBarrierThunk";