diff options
author | Roland Levillain <rpl@google.com> | 2017-03-09 13:02:12 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2017-03-09 13:02:12 +0000 |
commit | 5ed51e3176f3dc4ff2e50ba4bf52743d404b5b4f (patch) | |
tree | 1638115757601e4d41d1dc3f3cb9045f5d3d6dd9 /compiler/optimizing/code_generator_arm.cc | |
parent | 079f5fd58799a23aa5d60a5f85008a4663a33f2a (diff) | |
parent | 54f869ed3c7910e6eb7bade924d41570e9a4cb14 (diff) |
Merge changes Ia26b07f0,Id3d2758c
* changes:
Revert "Revert "Use the holder's gray bit in Baker read barrier slow paths (ARM, ARM64).""
Revert "Revert "Use the "GC is marking" information in compiler read barriers (ARM, ARM64).""
Diffstat (limited to 'compiler/optimizing/code_generator_arm.cc')
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 590 |
1 files changed, 405 insertions, 185 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 511bd9b7ef..2b0ab3e20e 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -636,10 +636,75 @@ class ArraySetSlowPathARM : public SlowPathCodeARM { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); }; +// Abstract base class for read barrier slow paths marking a reference +// `ref`. +// +// Argument `entrypoint` must be a register location holding the read +// barrier marking runtime entry point to be invoked. +class ReadBarrierMarkSlowPathBaseARM : public SlowPathCodeARM { + protected: + ReadBarrierMarkSlowPathBaseARM(HInstruction* instruction, Location ref, Location entrypoint) + : SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM"; } + + // Generate assembly code calling the read barrier marking runtime + // entry point (ReadBarrierMarkRegX). + void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) { + Register ref_reg = ref_.AsRegister<Register>(); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + DCHECK_NE(ref_reg, SP); + DCHECK_NE(ref_reg, LR); + DCHECK_NE(ref_reg, PC); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_reg, IP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in R0): + // + // R0 <- ref + // R0 <- ReadBarrierMark(R0) + // ref <- R0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + if (entrypoint_.IsValid()) { + arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); + __ blx(entrypoint_.AsRegister<Register>()); + } else { + // Entrypoint is not already loaded, load from the thread. + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + } + } + + // The location (register) of the marked object reference. + const Location ref_; + + // The location of the entrypoint if it is already loaded. + const Location entrypoint_; + + private: + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM); +}; + // Slow path marking an object reference `ref` during a read // barrier. The field `obj.field` in the object `obj` holding this -// reference does not get updated by this slow path after marking (see -// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that). +// reference does not get updated by this slow path after marking. // // This means that after the execution of this slow path, `ref` will // always be up-to-date, but `obj.field` may not; i.e., after the @@ -650,13 +715,13 @@ class ArraySetSlowPathARM : public SlowPathCodeARM { // // If `entrypoint` is a valid location it is assumed to already be // holding the entrypoint. The case where the entrypoint is passed in -// is for the GcRoot read barrier. -class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { +// is when the decision to mark is based on whether the GC is marking. +class ReadBarrierMarkSlowPathARM : public ReadBarrierMarkSlowPathBaseARM { public: ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location ref, Location entrypoint = Location::NoLocation()) - : SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) { + : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint) { DCHECK(kEmitCompilerReadBarrier); } @@ -664,15 +729,77 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + if (kIsDebugBuild) { + Register ref_reg = ref_.AsRegister<Register>(); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + } + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + GenerateReadBarrierMarkRuntimeCall(codegen); + __ b(GetExitLabel()); + } + + private: + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM); +}; + +// Slow path loading `obj`'s lock word, loading a reference from +// object `*(obj + offset + (index << scale_factor))` into `ref`, and +// marking `ref` if `obj` is gray according to the lock word (Baker +// read barrier). The field `obj.field` in the object `obj` holding +// this reference does not get updated by this slow path after marking +// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM +// below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). +// +// Argument `entrypoint` must be a register location holding the read +// barrier marking runtime entry point to be invoked. +class LoadReferenceWithBakerReadBarrierSlowPathARM : public ReadBarrierMarkSlowPathBaseARM { + public: + LoadReferenceWithBakerReadBarrierSlowPathARM(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + bool needs_null_check, + Register temp, + Location entrypoint) + : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint), + obj_(obj), + offset_(offset), + index_(index), + scale_factor_(scale_factor), + needs_null_check_(needs_null_check), + temp_(temp) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { + return "LoadReferenceWithBakerReadBarrierSlowPathARM"; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + DCHECK_NE(ref_reg, temp_); DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || instruction_->IsArraySet() || - instruction_->IsLoadClass() || - instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || @@ -686,145 +813,202 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); - // No need to save live registers; it's taken care of by the - // entrypoint. Also, there is no need to update the stack mask, - // as this runtime call will not trigger a garbage collection. - CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); - DCHECK_NE(ref_reg, SP); - DCHECK_NE(ref_reg, LR); - DCHECK_NE(ref_reg, PC); - // IP is used internally by the ReadBarrierMarkRegX entry point - // as a temporary, it cannot be the entry point's input/output. - DCHECK_NE(ref_reg, IP); - DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg; - // "Compact" slow path, saving two moves. + + // When using MaybeGenerateReadBarrierSlow, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: // - // Instead of using the standard runtime calling convention (input - // and output in R0): + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // } // - // R0 <- ref - // R0 <- ReadBarrierMark(R0) - // ref <- R0 - // - // we just use rX (the register containing `ref`) as input and output - // of a dedicated entrypoint: + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as it performs additional checks that we do + // not do here for performance reasons. + + // /* int32_t */ monitor = obj->monitor_ + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + __ LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset); + if (needs_null_check_) { + codegen->MaybeRecordImplicitNullCheck(instruction_); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including the rb_state, + // which shall prevent load-load reordering without using + // a memory barrier (which would be more expensive). + // `obj` is unchanged by this operation, but its value now depends + // on `temp`. + __ add(obj_, obj_, ShifterOperand(temp_, LSR, 32)); + + // The actual reference load. + // A possible implicit null check has already been handled above. + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + arm_codegen->GenerateRawReferenceLoad( + instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false); + + // Mark the object `ref` when `obj` is gray. // - // rX <- ReadBarrierMarkRegX(rX) + // if (rb_state == ReadBarrier::GrayState()) + // ref = ReadBarrier::Mark(ref); // - if (entrypoint_.IsValid()) { - arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); - __ blx(entrypoint_.AsRegister<Register>()); - } else { - int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); - // This runtime call does not require a stack map. - arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); - } + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with LSRS + // which can be a 16-bit instruction unlike the TST immediate. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1); + __ b(GetExitLabel(), CC); // Carry flag is the last bit shifted out by LSRS. + GenerateReadBarrierMarkRuntimeCall(codegen); + __ b(GetExitLabel()); } private: - // The location (register) of the marked object reference. - const Location ref_; - - // The location of the entrypoint if already loaded. - const Location entrypoint_; - - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM); + // The register containing the object holding the marked object reference field. + Register obj_; + // The offset, index and scale factor to access the reference in `obj_`. + uint32_t offset_; + Location index_; + ScaleFactor scale_factor_; + // Is a null check required? + bool needs_null_check_; + // A temporary register used to hold the lock word of `obj_`. + Register temp_; + + DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM); }; -// Slow path marking an object reference `ref` during a read barrier, -// and if needed, atomically updating the field `obj.field` in the -// object `obj` holding this reference after marking (contrary to -// ReadBarrierMarkSlowPathARM above, which never tries to update -// `obj.field`). +// Slow path loading `obj`'s lock word, loading a reference from +// object `*(obj + offset + (index << scale_factor))` into `ref`, and +// marking `ref` if `obj` is gray according to the lock word (Baker +// read barrier). If needed, this slow path also atomically updates +// the field `obj.field` in the object `obj` holding this reference +// after marking (contrary to +// LoadReferenceWithBakerReadBarrierSlowPathARM above, which never +// tries to update `obj.field`). // // This means that after the execution of this slow path, both `ref` // and `obj.field` will be up-to-date; i.e., after the flip, both will // hold the same to-space reference (unless another thread installed // another object reference (different from `ref`) in `obj.field`). -class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM { +// +// Argument `entrypoint` must be a register location holding the read +// barrier marking runtime entry point to be invoked. +class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM + : public ReadBarrierMarkSlowPathBaseARM { public: - ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction, - Location ref, - Register obj, - Location field_offset, - Register temp1, - Register temp2) - : SlowPathCodeARM(instruction), - ref_(ref), + LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + bool needs_null_check, + Register temp1, + Register temp2, + Location entrypoint) + : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint), obj_(obj), - field_offset_(field_offset), + offset_(offset), + index_(index), + scale_factor_(scale_factor), + needs_null_check_(needs_null_check), temp1_(temp1), temp2_(temp2) { DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathARM"; } + const char* GetDescription() const OVERRIDE { + return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM"; + } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; - // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK_NE(ref_reg, temp1_); + + // This slow path is only used by the UnsafeCASObject intrinsic at the moment. DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking and field updating slow path: " << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); - DCHECK(field_offset_.IsRegisterPair()) << field_offset_; + DCHECK_EQ(offset_, 0u); + DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1); + // The location of the offset of the marked reference field within `obj_`. + Location field_offset = index_; + DCHECK(field_offset.IsRegisterPair()) << field_offset; __ Bind(GetEntryLabel()); - // Save the old reference. + // /* int32_t */ monitor = obj->monitor_ + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + __ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset); + if (needs_null_check_) { + codegen->MaybeRecordImplicitNullCheck(instruction_); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including the rb_state, + // which shall prevent load-load reordering without using + // a memory barrier (which would be more expensive). + // `obj` is unchanged by this operation, but its value now depends + // on `temp1`. + __ add(obj_, obj_, ShifterOperand(temp1_, LSR, 32)); + + // The actual reference load. + // A possible implicit null check has already been handled above. + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + arm_codegen->GenerateRawReferenceLoad( + instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false); + + // Mark the object `ref` when `obj` is gray. + // + // if (rb_state == ReadBarrier::GrayState()) + // ref = ReadBarrier::Mark(ref); + // + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with LSRS + // which can be a 16-bit instruction unlike the TST immediate. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1); + __ b(GetExitLabel(), CC); // Carry flag is the last bit shifted out by LSRS. + + // Save the old value of the reference before marking it. // Note that we cannot use IP to save the old reference, as IP is // used internally by the ReadBarrierMarkRegX entry point, and we // need the old reference after the call to that entry point. DCHECK_NE(temp1_, IP); __ Mov(temp1_, ref_reg); - // No need to save live registers; it's taken care of by the - // entrypoint. Also, there is no need to update the stack mask, - // as this runtime call will not trigger a garbage collection. - CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); - DCHECK_NE(ref_reg, SP); - DCHECK_NE(ref_reg, LR); - DCHECK_NE(ref_reg, PC); - // IP is used internally by the ReadBarrierMarkRegX entry point - // as a temporary, it cannot be the entry point's input/output. - DCHECK_NE(ref_reg, IP); - DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg; - // "Compact" slow path, saving two moves. - // - // Instead of using the standard runtime calling convention (input - // and output in R0): - // - // R0 <- ref - // R0 <- ReadBarrierMark(R0) - // ref <- R0 - // - // we just use rX (the register containing `ref`) as input and output - // of a dedicated entrypoint: - // - // rX <- ReadBarrierMarkRegX(rX) - // - int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); - // This runtime call does not require a stack map. - arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + GenerateReadBarrierMarkRuntimeCall(codegen); // If the new reference is different from the old reference, - // update the field in the holder (`*(obj_ + field_offset_)`). + // update the field in the holder (`*(obj_ + field_offset)`). // // Note that this field could also hold a different object, if // another thread had concurrently changed it. In that case, the // LDREX/SUBS/ITNE sequence of instructions in the compare-and-set // (CAS) operation below would abort the CAS, leaving the field // as-is. - Label done; __ cmp(temp1_, ShifterOperand(ref_reg)); - __ b(&done, EQ); + __ b(GetExitLabel(), EQ); // Update the the holder's field atomically. This may fail if // mutator updates before us, but it's OK. This is achieved @@ -837,7 +1021,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM { // The UnsafeCASObject intrinsic uses a register pair as field // offset ("long offset"), of which only the low part contains // data. - Register offset = field_offset_.AsRegisterPairLow<Register>(); + Register offset = field_offset.AsRegisterPairLow<Register>(); Register expected = temp1_; Register value = ref_reg; Register tmp_ptr = IP; // Pointer to actual memory. @@ -887,22 +1071,27 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM { } } - __ Bind(&done); __ b(GetExitLabel()); } private: - // The location (register) of the marked object reference. - const Location ref_; // The register containing the object holding the marked object reference field. const Register obj_; - // The location of the offset of the marked reference field within `obj_`. - Location field_offset_; - + // The offset, index and scale factor to access the reference in `obj_`. + uint32_t offset_; + Location index_; + ScaleFactor scale_factor_; + // Is a null check required? + bool needs_null_check_; + // A temporary register used to hold the lock word of `obj_`; and + // also to hold the original reference value, when the reference is + // marked. const Register temp1_; + // A temporary register used in the implementation of the CAS, to + // update the object's reference field. const Register temp2_; - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM); + DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM); }; // Slow path generating a read barrier for a heap reference. @@ -7183,14 +7372,35 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when - // Baker's read barrier are used: + // Baker's read barrier are used. + // + // Note that we do not actually check the value of + // `GetIsGcMarking()` to decide whether to mark the loaded GC + // root or not. Instead, we load into `temp` the read barrier + // mark entry point corresponding to register `root`. If `temp` + // is null, it means that `GetIsGcMarking()` is false, and vice + // versa. // - // root = obj.field; // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // if (temp != null) { - // root = temp(root) + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. // } + // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. + Location temp = Location::RegisterLocation(LR); + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( + instruction, root, /* entrypoint */ temp); + codegen_->AddSlowPath(slow_path); + + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); + // /* GcRoot<mirror::Object> */ root = *(obj + offset) __ LoadFromOffset(kLoadWord, root_reg, obj, offset); static_assert( @@ -7201,21 +7411,6 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path marking the GC root `root`. - Location temp = Location::RegisterLocation(LR); - SlowPathCodeARM* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( - instruction, - root, - /*entrypoint*/ temp); - codegen_->AddSlowPath(slow_path); - - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); @@ -7286,51 +7481,101 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - // In slow path based read barriers, the read barrier call is - // inserted after the original load. However, in fast path based - // Baker's read barriers, we need to perform the load of - // mirror::Object::monitor_ *before* the original reference load. - // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // Query `art::Thread::Current()->GetIsGcMarking()` to decide + // whether we need to enter the slow path to mark the reference. + // Then, in the slow path, check the gray bit in the lock word of + // the reference's holder (`obj`) to decide whether to mark `ref` or + // not. // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. - // } + // Note that we do not actually check the value of `GetIsGcMarking()`; + // instead, we load into `temp3` the read barrier mark entry point + // corresponding to register `ref`. If `temp3` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. // - // Note: the original implementation in ReadBarrier::Barrier is - // slightly more complex as it performs additional checks that we do - // not do here for performance reasons. + // temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // } + // } else { + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // } - Register ref_reg = ref.AsRegister<Register>(); Register temp_reg = temp.AsRegister<Register>(); - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - // /* int32_t */ monitor = obj->monitor_ - __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); + // Slow path marking the object `ref` when the GC is marking. The + // entrypoint will already be loaded in `temp3`. + Location temp3 = Location::RegisterLocation(LR); + SlowPathCodeARM* slow_path; + if (always_update_field) { + DCHECK(temp2 != nullptr); + // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only + // supports address of the form `obj + field_offset`, where `obj` + // is a register and `field_offset` is a register pair (of which + // only the lower half is used). Thus `offset` and `scale_factor` + // above are expected to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); + Location field_offset = index; + slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM( + instruction, + ref, + obj, + offset, + /* index */ field_offset, + scale_factor, + needs_null_check, + temp_reg, + *temp2, + /* entrypoint */ temp3); + } else { + slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM( + instruction, + ref, + obj, + offset, + index, + scale_factor, + needs_null_check, + temp_reg, + /* entrypoint */ temp3); } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); + AddSlowPath(slow_path); - // Introduce a dependency on the lock_word including the rb_state, - // which shall prevent load-load reordering without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp_reg`. - __ add(obj, obj, ShifterOperand(temp_reg, LSR, 32)); + // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp3.AsRegister<Register>(), TR, entry_point_offset); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel()); + // Fast path: just load the reference. + GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM::GenerateRawReferenceLoad(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + bool needs_null_check) { + Register ref_reg = ref.AsRegister<Register>(); - // The actual reference load. if (index.IsValid()) { // Load types involving an "index": ArrayGet, // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject // intrinsics. - // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) + // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor)) if (index.IsConstant()) { size_t computed_offset = (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset; @@ -7347,41 +7592,16 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i __ LoadFromOffset(kLoadWord, ref_reg, IP, offset); } } else { - // /* HeapReference<Object> */ ref = *(obj + offset) + // /* HeapReference<mirror::Object> */ ref = *(obj + offset) __ LoadFromOffset(kLoadWord, ref_reg, obj, offset); } - // Object* ref = ref_addr->AsMirrorPtr() - __ MaybeUnpoisonHeapReference(ref_reg); - - // Slow path marking the object `ref` when it is gray. - SlowPathCodeARM* slow_path; - if (always_update_field) { - DCHECK(temp2 != nullptr); - // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address - // of the form `obj + field_offset`, where `obj` is a register and - // `field_offset` is a register pair (of which only the lower half - // is used). Thus `offset` and `scale_factor` above are expected - // to be null in this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM( - instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2); - } else { - slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); } - AddSlowPath(slow_path); - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with LSRS - // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1); - __ b(slow_path->GetEntryLabel(), CS); // Carry flag is the last bit shifted out by LSRS. - __ Bind(slow_path->GetExitLabel()); + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); } void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction, |