diff options
author | Roland Levillain <rpl@google.com> | 2016-08-25 17:27:56 +0100 |
---|---|---|
committer | Roland Levillain <rpl@google.com> | 2016-08-25 17:27:56 +0100 |
commit | 16d9f949698faed28435af7aa9c9ebacbfd5d1a8 (patch) | |
tree | 870fbd499c10f70cecc5f62246b26e1332b600da /compiler/optimizing/code_generator_arm.cc | |
parent | 7c95b4e22897a6f14ef79ec6e547e2eed686814a (diff) |
Re-enable the ArraySet fast path with Baker read barriers.
Benchmarks (ARM64) score variations on Nexus 5X with CPU
cores clamped at 960000 Hz (aosp_bullhead-userdebug build):
- Ritzperf - average (lower is better): -0.95% (virtually unchanged)
- CaffeineMark - average (higher is better): +2.50% (slightly better)
- DeltaBlue (lower is better): -0.55% (virtually unchanged)
- Richards - average (lower is better): +0.67% (virtually unchanged)
- SciMark2 - average (higher is better): -0.10% (virtually unchanged)
Details about Ritzperf benchmarks with meaningful variations
(lower is better):
- GenericCalcActions.MemAllocTest: -5.05% (better)
Details about CaffeineMark benchmarks with meaningful variations
(higher is better):
- Method: +16.88% (better)
Details about Richards benchmarks with meaningful variations
(lower is better):
- deutsch_acc_interface: +9.86% (worse)
Boot image code size variation on Nexus 5X
(aosp_bullhead-userdebug build):
- total ARM64 framework Oat files size change:
105933472 bytes -> 106027680 bytes (+0.09%)
- total ARM framework Oat files size change:
89157936 bytes -> 89239856 bytes (+0.09%)
Test: ART host and target (ARM, ARM64) tests.
Bug: 29516974
Bug: 29506760
Bug: 12687968
Change-Id: Ib9e9709712295e17804b8888ac10e3d518ff2e70
Diffstat (limited to 'compiler/optimizing/code_generator_arm.cc')
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 124 |
1 files changed, 96 insertions, 28 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 6d9c55cd75..9d5aabc25d 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -425,6 +425,7 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || + instruction_->IsArraySet() || instruction_->IsLoadClass() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || @@ -4660,6 +4661,7 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { } if (needs_write_barrier) { // Temporary registers for the write barrier. + // These registers may be used for Baker read barriers too. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. locations->AddTemp(Location::RequiresRegister()); } @@ -4744,8 +4746,10 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { } DCHECK(needs_write_barrier); - Register temp1 = locations->GetTemp(0).AsRegister<Register>(); - Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister<Register>(); + Location temp2_loc = locations->GetTemp(1); + Register temp2 = temp2_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -4776,33 +4780,97 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { } if (kEmitCompilerReadBarrier) { - // When read barriers are enabled, the type checking - // instrumentation requires two read barriers: - // - // __ Mov(temp2, temp1); - // // /* HeapReference<Class> */ temp1 = temp1->component_type_ - // __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); - // codegen_->GenerateReadBarrierSlow( - // instruction, temp1_loc, temp1_loc, temp2_loc, component_offset); - // - // // /* HeapReference<Class> */ temp2 = value->klass_ - // __ LoadFromOffset(kLoadWord, temp2, value, class_offset); - // codegen_->GenerateReadBarrierSlow( - // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc); - // - // __ cmp(temp1, ShifterOperand(temp2)); - // - // However, the second read barrier may trash `temp`, as it - // is a temporary register, and as such would not be saved - // along with live registers before calling the runtime (nor - // restored afterwards). So in this case, we bail out and - // delegate the work to the array set slow path. - // - // TODO: Extend the register allocator to support a new - // "(locally) live temp" location so as to avoid always - // going into the slow path when read barriers are enabled. - __ b(slow_path->GetEntryLabel()); + if (!kUseBakerReadBarrier) { + // When (non-Baker) read barriers are enabled, the type + // checking instrumentation requires two read barriers + // generated by CodeGeneratorARM::GenerateReadBarrierSlow: + // + // __ Mov(temp2, temp1); + // // /* HeapReference<Class> */ temp1 = temp1->component_type_ + // __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + // codegen_->GenerateReadBarrierSlow( + // instruction, temp1_loc, temp1_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = value->klass_ + // __ LoadFromOffset(kLoadWord, temp2, value, class_offset); + // codegen_->GenerateReadBarrierSlow( + // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc); + // + // __ cmp(temp1, ShifterOperand(temp2)); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled? + // + // There is no such problem with Baker read barriers (see below). + __ b(slow_path->GetEntryLabel()); + } else { + Register temp3 = IP; + Location temp3_loc = Location::RegisterLocation(temp3); + + // Note: `temp3` (scratch register IP) cannot be used as + // `ref` argument of GenerateFieldLoadWithBakerReadBarrier + // calls below (see ReadBarrierMarkSlowPathARM for more + // details). + + // /* HeapReference<Class> */ temp1 = array->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + temp1_loc, + array, + class_offset, + temp3_loc, + /* needs_null_check */ true); + + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + temp1_loc, + temp1, + component_offset, + temp3_loc, + /* needs_null_check */ false); + // Register `temp1` is not trashed by the read barrier + // emitted by GenerateFieldLoadWithBakerReadBarrier below, + // as that method produces a call to a ReadBarrierMarkRegX + // entry point, which saves all potentially live registers, + // including temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = value->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + temp2_loc, + value, + class_offset, + temp3_loc, + /* needs_null_check */ false); + // If heap poisoning is enabled, `temp1` and `temp2` have + // been unpoisoned by the the previous calls to + // CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier. + __ cmp(temp1, ShifterOperand(temp2)); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + Label do_put; + __ b(&do_put, EQ); + // We do not need to emit a read barrier for the + // following heap reference load, as `temp1` is only used + // in a comparison with null below, and this reference + // is not kept afterwards. + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + // If heap poisoning is enabled, no need to unpoison + // `temp`, as we are comparing against null below. + __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ b(slow_path->GetEntryLabel(), NE); + } + } } else { + // Non read barrier code. + // /* HeapReference<Class> */ temp1 = array->klass_ __ LoadFromOffset(kLoadWord, temp1, array, class_offset); codegen_->MaybeRecordImplicitNullCheck(instruction); |