diff options
author | Vladimir Marko <vmarko@google.com> | 2016-08-09 11:04:26 +0100 |
---|---|---|
committer | Vladimir Marko <vmarko@google.com> | 2016-09-05 17:27:41 +0100 |
commit | 70e97462116a47ef2e582ea29a037847debcc029 (patch) | |
tree | ee587e35b9b9483c35875ccc8ddea139978ca823 /compiler/optimizing/code_generator.cc | |
parent | 521691ae4dfad47cf6b46858347fa5fa32fd7bcc (diff) |
Avoid excessive spill slots for slow paths.
Reducing the frame size makes stack maps smaller as we need
fewer bits for stack masks and some dex register locations
may use short location kind rather than long. On Nexus 9,
AOSP ToT, the boot.oat size reduction is
prebuilt multi-part boot image:
- 32-bit boot.oat: -416KiB (-0.6%)
- 64-bit boot.oat: -635KiB (-0.9%)
prebuilt multi-part boot image with read barrier:
- 32-bit boot.oat: -483KiB (-0.7%)
- 64-bit boot.oat: -703KiB (-0.9%)
on-device built single boot image:
- 32-bit boot.oat: -380KiB (-0.6%)
- 64-bit boot.oat: -632KiB (-0.9%)
on-device built single boot image with read barrier:
- 32-bit boot.oat: -448KiB (-0.6%)
- 64-bit boot.oat: -692KiB (-0.9%)
The other benefit is that at runtime, threads may need fewer
pages for their stacks, reducing overall memory usage.
We defer the calculation of the maximum spill size from
the main register allocator (linear scan or graph coloring)
to the RegisterAllocationResolver and do it based on the
live registers at slow path safepoints. The old notion of
an artificial slow path safepoint interval is removed as
it is no longer needed.
Test: Run ART test suite on host and Nexus 9.
Bug: 30212852
Change-Id: I40b3d114e278e2c5807982904fa49bf6642c6275
Diffstat (limited to 'compiler/optimizing/code_generator.cc')
-rw-r--r-- | compiler/optimizing/code_generator.cc | 96 |
1 files changed, 37 insertions, 59 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index c532e72465..6a4ad5c92a 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -283,8 +283,7 @@ void CodeGenerator::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches A } void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, - size_t maximum_number_of_live_core_registers, - size_t maximum_number_of_live_fpu_registers, + size_t maximum_safepoint_spill_size, size_t number_of_out_slots, const ArenaVector<HBasicBlock*>& block_order) { block_order_ = &block_order; @@ -298,14 +297,12 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, && !HasAllocatedCalleeSaveRegisters() && IsLeafMethod() && !RequiresCurrentMethod()) { - DCHECK_EQ(maximum_number_of_live_core_registers, 0u); - DCHECK_EQ(maximum_number_of_live_fpu_registers, 0u); + DCHECK_EQ(maximum_safepoint_spill_size, 0u); SetFrameSize(CallPushesPC() ? GetWordSize() : 0); } else { SetFrameSize(RoundUp( first_register_slot_in_slow_path_ - + maximum_number_of_live_core_registers * GetWordSize() - + maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize() + + maximum_safepoint_spill_size + FrameEntrySpillSize(), kStackAlignment)); } @@ -765,21 +762,16 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, LocationSummary* locations = instruction->GetLocations(); uint32_t register_mask = locations->GetRegisterMask(); - if (instruction->IsSuspendCheck()) { - // Suspend check has special ABI that saves the caller-save registers in callee, - // so we want to emit stack maps containing the registers. - // TODO: Register allocator still reserves space for the caller-save registers. - // We should add slow-path-specific caller-save information into LocationSummary - // and refactor the code here as well as in the register allocator to use it. + DCHECK_EQ(register_mask & ~locations->GetLiveRegisters()->GetCoreRegisters(), 0u); + if (locations->OnlyCallsOnSlowPath()) { + // In case of slow path, we currently set the location of caller-save registers + // to register (instead of their stack location when pushed before the slow-path + // call). Therefore register_mask contains both callee-save and caller-save + // registers that hold objects. We must remove the spilled caller-save from the + // mask, since they will be overwritten by the callee. + uint32_t spills = GetSlowPathSpills(locations, /* core_registers */ true); + register_mask &= ~spills; } else { - if (locations->OnlyCallsOnSlowPath()) { - // In case of slow path, we currently set the location of caller-save registers - // to register (instead of their stack location when pushed before the slow-path - // call). Therefore register_mask contains both callee-save and caller-save - // registers that hold objects. We must remove the caller-save from the mask, since - // they will be overwritten by the callee. - register_mask &= core_callee_save_mask_; - } // The register mask must be a subset of callee-save registers. DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); } @@ -1235,58 +1227,44 @@ void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* in } void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { - RegisterSet* live_registers = locations->GetLiveRegisters(); size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); - for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { - if (!codegen->IsCoreCalleeSaveRegister(i)) { - if (live_registers->ContainsCoreRegister(i)) { - // If the register holds an object, update the stack mask. - if (locations->RegisterContainsObject(i)) { - locations->SetStackBit(stack_offset / kVRegSize); - } - DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); - DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); - saved_core_stack_offsets_[i] = stack_offset; - stack_offset += codegen->SaveCoreRegister(stack_offset, i); - } + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + for (uint32_t i : LowToHighBits(core_spills)) { + // If the register holds an object, update the stack mask. + if (locations->RegisterContainsObject(i)) { + locations->SetStackBit(stack_offset / kVRegSize); } + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_core_stack_offsets_[i] = stack_offset; + stack_offset += codegen->SaveCoreRegister(stack_offset, i); } - for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { - if (!codegen->IsFloatingPointCalleeSaveRegister(i)) { - if (live_registers->ContainsFloatingPointRegister(i)) { - DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); - DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); - saved_fpu_stack_offsets_[i] = stack_offset; - stack_offset += codegen->SaveFloatingPointRegister(stack_offset, i); - } - } + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + for (size_t i : LowToHighBits(fp_spills)) { + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_fpu_stack_offsets_[i] = stack_offset; + stack_offset += codegen->SaveFloatingPointRegister(stack_offset, i); } } void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { - RegisterSet* live_registers = locations->GetLiveRegisters(); size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); - for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { - if (!codegen->IsCoreCalleeSaveRegister(i)) { - if (live_registers->ContainsCoreRegister(i)) { - DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); - DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); - stack_offset += codegen->RestoreCoreRegister(stack_offset, i); - } - } + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + for (uint32_t i : LowToHighBits(core_spills)) { + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + stack_offset += codegen->RestoreCoreRegister(stack_offset, i); } - for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { - if (!codegen->IsFloatingPointCalleeSaveRegister(i)) { - if (live_registers->ContainsFloatingPointRegister(i)) { - DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); - DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); - stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i); - } - } + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + for (size_t i : LowToHighBits(fp_spills)) { + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i); } } |