summaryrefslogtreecommitdiff
path: root/compiler/optimizing/code_generator_arm.cc
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing/code_generator_arm.cc')
-rw-r--r--compiler/optimizing/code_generator_arm.cc475
1 files changed, 335 insertions, 140 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 710ca7ad45..2560c9fedd 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -636,10 +636,75 @@ class ArraySetSlowPathARM : public SlowPathCodeARM {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
};
+// Abstract base class for read barrier slow paths marking a reference
+// `ref`.
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class ReadBarrierMarkSlowPathBaseARM : public SlowPathCodeARM {
+ protected:
+ ReadBarrierMarkSlowPathBaseARM(HInstruction* instruction, Location ref, Location entrypoint)
+ : SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM"; }
+
+ // Generate assembly code calling the read barrier marking runtime
+ // entry point (ReadBarrierMarkRegX).
+ void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
+ Register ref_reg = ref_.AsRegister<Register>();
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ DCHECK_NE(ref_reg, SP);
+ DCHECK_NE(ref_reg, LR);
+ DCHECK_NE(ref_reg, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(ref_reg, IP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in R0):
+ //
+ // R0 <- ref
+ // R0 <- ReadBarrierMark(R0)
+ // ref <- R0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ if (entrypoint_.IsValid()) {
+ arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ __ blx(entrypoint_.AsRegister<Register>());
+ } else {
+ // Entrypoint is not already loaded, load from the thread.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ }
+ }
+
+ // The location (register) of the marked object reference.
+ const Location ref_;
+
+ // The location of the entrypoint if it is already loaded.
+ const Location entrypoint_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM);
+};
+
// Slow path marking an object reference `ref` during a read
// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking (see
-// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that).
+// reference does not get updated by this slow path after marking.
//
// This means that after the execution of this slow path, `ref` will
// always be up-to-date, but `obj.field` may not; i.e., after the
@@ -651,12 +716,12 @@ class ArraySetSlowPathARM : public SlowPathCodeARM {
// If `entrypoint` is a valid location it is assumed to already be
// holding the entrypoint. The case where the entrypoint is passed in
// is when the decision to mark is based on whether the GC is marking.
-class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
+class ReadBarrierMarkSlowPathARM : public ReadBarrierMarkSlowPathBaseARM {
public:
ReadBarrierMarkSlowPathARM(HInstruction* instruction,
Location ref,
Location entrypoint = Location::NoLocation())
- : SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) {
+ : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -664,15 +729,77 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ if (kIsDebugBuild) {
+ Register ref_reg = ref_.AsRegister<Register>();
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ }
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ GenerateReadBarrierMarkRuntimeCall(codegen);
+ __ b(GetExitLabel());
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
+};
+
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). The field `obj.field` in the object `obj` holding
+// this reference does not get updated by this slow path after marking
+// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM
+// below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierSlowPathARM : public ReadBarrierMarkSlowPathBaseARM {
+ public:
+ LoadReferenceWithBakerReadBarrierSlowPathARM(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check,
+ Register temp,
+ Location entrypoint)
+ : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint),
+ obj_(obj),
+ offset_(offset),
+ index_(index),
+ scale_factor_(scale_factor),
+ needs_null_check_(needs_null_check),
+ temp_(temp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "LoadReferenceWithBakerReadBarrierSlowPathARM";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ DCHECK_NE(ref_reg, temp_);
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsArraySet() ||
- instruction_->IsLoadClass() ||
- instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
(instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
@@ -686,158 +813,202 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
__ Bind(GetEntryLabel());
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
- DCHECK_NE(ref_reg, SP);
- DCHECK_NE(ref_reg, LR);
- DCHECK_NE(ref_reg, PC);
- // IP is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK_NE(ref_reg, IP);
- DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
- // "Compact" slow path, saving two moves.
- //
- // Instead of using the standard runtime calling convention (input
- // and output in R0):
+
+ // When using MaybeGenerateReadBarrierSlow, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
//
- // R0 <- ref
- // R0 <- ReadBarrierMark(R0)
- // ref <- R0
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
//
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
+
+ // /* int32_t */ monitor = obj->monitor_
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+ __ LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset);
+ if (needs_null_check_) {
+ codegen->MaybeRecordImplicitNullCheck(instruction_);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
+ __ add(obj_, obj_, ShifterOperand(temp_, LSR, 32));
+
+ // The actual reference load.
+ // A possible implicit null check has already been handled above.
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ arm_codegen->GenerateRawReferenceLoad(
+ instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
+
+ // Mark the object `ref` when `obj` is gray.
//
- // rX <- ReadBarrierMarkRegX(rX)
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
//
- if (entrypoint_.IsValid()) {
- arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
- __ blx(entrypoint_.AsRegister<Register>());
- } else {
- // Entrypoint is not already loaded, load from the thread.
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
- // This runtime call does not require a stack map.
- arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
- }
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1);
+ __ b(GetExitLabel(), CC); // Carry flag is the last bit shifted out by LSRS.
+ GenerateReadBarrierMarkRuntimeCall(codegen);
+
__ b(GetExitLabel());
}
private:
- // The location (register) of the marked object reference.
- const Location ref_;
-
- // The location of the entrypoint if already loaded.
- const Location entrypoint_;
-
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
+ // The register containing the object holding the marked object reference field.
+ Register obj_;
+ // The offset, index and scale factor to access the reference in `obj_`.
+ uint32_t offset_;
+ Location index_;
+ ScaleFactor scale_factor_;
+ // Is a null check required?
+ bool needs_null_check_;
+ // A temporary register used to hold the lock word of `obj_`.
+ Register temp_;
+
+ DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM);
};
-// Slow path marking an object reference `ref` during a read barrier,
-// and if needed, atomically updating the field `obj.field` in the
-// object `obj` holding this reference after marking (contrary to
-// ReadBarrierMarkSlowPathARM above, which never tries to update
-// `obj.field`).
+// Slow path loading `obj`'s lock word, loading a reference from
+// object `*(obj + offset + (index << scale_factor))` into `ref`, and
+// marking `ref` if `obj` is gray according to the lock word (Baker
+// read barrier). If needed, this slow path also atomically updates
+// the field `obj.field` in the object `obj` holding this reference
+// after marking (contrary to
+// LoadReferenceWithBakerReadBarrierSlowPathARM above, which never
+// tries to update `obj.field`).
//
// This means that after the execution of this slow path, both `ref`
// and `obj.field` will be up-to-date; i.e., after the flip, both will
// hold the same to-space reference (unless another thread installed
// another object reference (different from `ref`) in `obj.field`).
//
-// If `entrypoint` is a valid location it is assumed to already be
-// holding the entrypoint. The case where the entrypoint is passed in
-// is when the decision to mark is based on whether the GC is marking.
-class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked.
+class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM
+ : public ReadBarrierMarkSlowPathBaseARM {
public:
- ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction,
- Location ref,
- Register obj,
- Location field_offset,
- Register temp1,
- Register temp2,
- Location entrypoint = Location::NoLocation())
- : SlowPathCodeARM(instruction),
- ref_(ref),
+ LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ bool needs_null_check,
+ Register temp1,
+ Register temp2,
+ Location entrypoint)
+ : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint),
obj_(obj),
- field_offset_(field_offset),
+ offset_(offset),
+ index_(index),
+ scale_factor_(scale_factor),
+ needs_null_check_(needs_null_check),
temp1_(temp1),
- temp2_(temp2),
- entrypoint_(entrypoint) {
+ temp2_(temp2) {
DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathARM"; }
+ const char* GetDescription() const OVERRIDE {
+ return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM";
+ }
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
- // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK_NE(ref_reg, temp1_);
+
+ // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking and field updating slow path: "
<< instruction_->DebugName();
DCHECK(instruction_->GetLocations()->Intrinsified());
DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
- DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+ DCHECK_EQ(offset_, 0u);
+ DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1);
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset = index_;
+ DCHECK(field_offset.IsRegisterPair()) << field_offset;
__ Bind(GetEntryLabel());
- // Save the old reference.
+ // /* int32_t */ monitor = obj->monitor_
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+ __ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
+ if (needs_null_check_) {
+ codegen->MaybeRecordImplicitNullCheck(instruction_);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp1`.
+ __ add(obj_, obj_, ShifterOperand(temp1_, LSR, 32));
+
+ // The actual reference load.
+ // A possible implicit null check has already been handled above.
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ arm_codegen->GenerateRawReferenceLoad(
+ instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
+
+ // Mark the object `ref` when `obj` is gray.
+ //
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ //
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1);
+ __ b(GetExitLabel(), CC); // Carry flag is the last bit shifted out by LSRS.
+
+ // Save the old value of the reference before marking it.
// Note that we cannot use IP to save the old reference, as IP is
// used internally by the ReadBarrierMarkRegX entry point, and we
// need the old reference after the call to that entry point.
DCHECK_NE(temp1_, IP);
__ Mov(temp1_, ref_reg);
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
- DCHECK_NE(ref_reg, SP);
- DCHECK_NE(ref_reg, LR);
- DCHECK_NE(ref_reg, PC);
- // IP is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK_NE(ref_reg, IP);
- DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
- // "Compact" slow path, saving two moves.
- //
- // Instead of using the standard runtime calling convention (input
- // and output in R0):
- //
- // R0 <- ref
- // R0 <- ReadBarrierMark(R0)
- // ref <- R0
- //
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
- //
- // rX <- ReadBarrierMarkRegX(rX)
- //
- if (entrypoint_.IsValid()) {
- arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
- __ blx(entrypoint_.AsRegister<Register>());
- } else {
- // Entrypoint is not already loaded, load from the thread.
- int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
- // This runtime call does not require a stack map.
- arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
- }
+ GenerateReadBarrierMarkRuntimeCall(codegen);
// If the new reference is different from the old reference,
- // update the field in the holder (`*(obj_ + field_offset_)`).
+ // update the field in the holder (`*(obj_ + field_offset)`).
//
// Note that this field could also hold a different object, if
// another thread had concurrently changed it. In that case, the
// LDREX/SUBS/ITNE sequence of instructions in the compare-and-set
// (CAS) operation below would abort the CAS, leaving the field
// as-is.
- Label done;
__ cmp(temp1_, ShifterOperand(ref_reg));
- __ b(&done, EQ);
+ __ b(GetExitLabel(), EQ);
// Update the the holder's field atomically. This may fail if
// mutator updates before us, but it's OK. This is achieved
@@ -850,7 +1021,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
// The UnsafeCASObject intrinsic uses a register pair as field
// offset ("long offset"), of which only the low part contains
// data.
- Register offset = field_offset_.AsRegisterPairLow<Register>();
+ Register offset = field_offset.AsRegisterPairLow<Register>();
Register expected = temp1_;
Register value = ref_reg;
Register tmp_ptr = IP; // Pointer to actual memory.
@@ -900,25 +1071,27 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
}
}
- __ Bind(&done);
__ b(GetExitLabel());
}
private:
- // The location (register) of the marked object reference.
- const Location ref_;
// The register containing the object holding the marked object reference field.
const Register obj_;
- // The location of the offset of the marked reference field within `obj_`.
- Location field_offset_;
-
+ // The offset, index and scale factor to access the reference in `obj_`.
+ uint32_t offset_;
+ Location index_;
+ ScaleFactor scale_factor_;
+ // Is a null check required?
+ bool needs_null_check_;
+ // A temporary register used to hold the lock word of `obj_`; and
+ // also to hold the original reference value, when the reference is
+ // marked.
const Register temp1_;
+ // A temporary register used in the implementation of the CAS, to
+ // update the object's reference field.
const Register temp2_;
- // The location of the entrypoint if already loaded.
- const Location entrypoint_;
-
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM);
+ DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM);
};
// Slow path generating a read barrier for a heap reference.
@@ -7310,13 +7483,11 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- // After loading the reference from `obj.field` into `ref`, query
- // `art::Thread::Current()->GetIsGcMarking()` to decide whether we
- // need to enter the slow path to mark the reference. This
- // optimistic strategy (we expect the GC to not be marking most of
- // the time) does not check `obj`'s lock word (to see if it is a
- // gray object or not), so may sometimes mark an already marked
- // object.
+ // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+ // whether we need to enter the slow path to mark the reference.
+ // Then, in the slow path, check the gray bit in the lock word of
+ // the reference's holder (`obj`) to decide whether to mark `ref` or
+ // not.
//
// Note that we do not actually check the value of `GetIsGcMarking()`;
// instead, we load into `temp3` the read barrier mark entry point
@@ -7324,14 +7495,19 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// that `GetIsGcMarking()` is false, and vice versa.
//
// temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
// if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
// // Slow path.
- // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
+ // } else {
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
// }
- // TODO: This temp register is only necessary when
- // `always_update_field` is true; make it optional (like `temp2`).
Register temp_reg = temp.AsRegister<Register>();
// Slow path marking the object `ref` when the GC is marking. The
@@ -7340,18 +7516,37 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
SlowPathCodeARM* slow_path;
if (always_update_field) {
DCHECK(temp2 != nullptr);
- // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address
- // of the form `obj + field_offset`, where `obj` is a register and
- // `field_offset` is a register pair (of which only the lower half
- // is used). Thus `offset` and `scale_factor` above are expected
- // to be null in this code path.
+ // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only
+ // supports address of the form `obj + field_offset`, where `obj`
+ // is a register and `field_offset` is a register pair (of which
+ // only the lower half is used). Thus `offset` and `scale_factor`
+ // above are expected to be null in this code path.
DCHECK_EQ(offset, 0u);
DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM(
- instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2, /* entrypoint */ temp3);
+ Location field_offset = index;
+ slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(
+ instruction,
+ ref,
+ obj,
+ offset,
+ /* index */ field_offset,
+ scale_factor,
+ needs_null_check,
+ temp_reg,
+ *temp2,
+ /* entrypoint */ temp3);
} else {
- slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
- instruction, ref, /* entrypoint */ temp3);
+ slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM(
+ instruction,
+ ref,
+ obj,
+ offset,
+ index,
+ scale_factor,
+ needs_null_check,
+ temp_reg,
+ /* entrypoint */ temp3);
}
AddSlowPath(slow_path);
@@ -7361,11 +7556,11 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ LoadFromOffset(kLoadWord, temp3.AsRegister<Register>(), TR, entry_point_offset);
- // The reference load.
- GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel());
+ // Fast path: just load the reference.
+ GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
__ Bind(slow_path->GetExitLabel());
}