diff options
author | Goran Jakovljevic <goran.jakovljevic@mips.com> | 2017-12-14 10:25:20 +0100 |
---|---|---|
committer | Goran Jakovljevic <goran.jakovljevic@mips.com> | 2017-12-15 17:33:12 +0100 |
commit | e7de5ec3e4cd1d607b647d98ea64df105479b867 (patch) | |
tree | d692c4d1dee08eea4beffd71bd8cdf1d106c059e /compiler/optimizing/code_generator_mips.cc | |
parent | bee510c94560703102ca553a08ec47119959c204 (diff) |
MIPS: Support swaps between 128-bit locations
Add support for swaps between two SIMDStackSlots, two
VectorRegisters (extended FpuRegister) and between a
SIMDStackSlot and a VectorRegister.
This fixes test 623-checker-loop-regressions for
MIPS64R6 and MIPS32R6.
Test: ./testrunner.py --optimizing --target in QEMU (MIPS64R6)
Test: ./testrunner.py --optimizing --target in QEMU (MIPS32R6)
Change-Id: I36aa209f79790fb6c08b9a171f810769a6b40afc
Diffstat (limited to 'compiler/optimizing/code_generator_mips.cc')
-rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 47 |
1 files changed, 37 insertions, 10 deletions
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 1f6b214f11..9f4c2349e7 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1095,17 +1095,23 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) { __ Move(r2, r1); __ Move(r1, TMP); } else if (loc1.IsFpuRegister() && loc2.IsFpuRegister()) { - FRegister f1 = loc1.AsFpuRegister<FRegister>(); - FRegister f2 = loc2.AsFpuRegister<FRegister>(); - if (type == DataType::Type::kFloat32) { - __ MovS(FTMP, f2); - __ MovS(f2, f1); - __ MovS(f1, FTMP); + if (codegen_->GetGraph()->HasSIMD()) { + __ MoveV(static_cast<VectorRegister>(FTMP), VectorRegisterFrom(loc1)); + __ MoveV(VectorRegisterFrom(loc1), VectorRegisterFrom(loc2)); + __ MoveV(VectorRegisterFrom(loc2), static_cast<VectorRegister>(FTMP)); } else { - DCHECK_EQ(type, DataType::Type::kFloat64); - __ MovD(FTMP, f2); - __ MovD(f2, f1); - __ MovD(f1, FTMP); + FRegister f1 = loc1.AsFpuRegister<FRegister>(); + FRegister f2 = loc2.AsFpuRegister<FRegister>(); + if (type == DataType::Type::kFloat32) { + __ MovS(FTMP, f2); + __ MovS(f2, f1); + __ MovS(f1, FTMP); + } else { + DCHECK_EQ(type, DataType::Type::kFloat64); + __ MovD(FTMP, f2); + __ MovD(f2, f1); + __ MovD(f1, FTMP); + } } } else if ((loc1.IsRegister() && loc2.IsFpuRegister()) || (loc1.IsFpuRegister() && loc2.IsRegister())) { @@ -1152,6 +1158,8 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) { Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ false); } else if (loc1.IsDoubleStackSlot() && loc2.IsDoubleStackSlot()) { Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ true); + } else if (loc1.IsSIMDStackSlot() && loc2.IsSIMDStackSlot()) { + ExchangeQuadSlots(loc1.GetStackIndex(), loc2.GetStackIndex()); } else if ((loc1.IsRegister() && loc2.IsStackSlot()) || (loc1.IsStackSlot() && loc2.IsRegister())) { Register reg = loc1.IsRegister() ? loc1.AsRegister<Register>() : loc2.AsRegister<Register>(); @@ -1174,6 +1182,13 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) { __ Move(TMP, reg_h); __ LoadFromOffset(kLoadWord, reg_h, SP, offset_h); __ StoreToOffset(kStoreWord, TMP, SP, offset_h); + } else if ((loc1.IsFpuRegister() && loc2.IsSIMDStackSlot()) || + (loc1.IsSIMDStackSlot() && loc2.IsFpuRegister())) { + Location fp_loc = loc1.IsFpuRegister() ? loc1 : loc2; + intptr_t offset = loc1.IsFpuRegister() ? loc2.GetStackIndex() : loc1.GetStackIndex(); + __ MoveV(static_cast<VectorRegister>(FTMP), VectorRegisterFrom(fp_loc)); + __ LoadQFromOffset(fp_loc.AsFpuRegister<FRegister>(), SP, offset); + __ StoreQToOffset(FTMP, SP, offset); } else if (loc1.IsFpuRegister() || loc2.IsFpuRegister()) { FRegister reg = loc1.IsFpuRegister() ? loc1.AsFpuRegister<FRegister>() : loc2.AsFpuRegister<FRegister>(); @@ -1225,6 +1240,13 @@ void ParallelMoveResolverMIPS::Exchange(int index1, int index2, bool double_slot } } +void ParallelMoveResolverMIPS::ExchangeQuadSlots(int index1, int index2) { + __ LoadQFromOffset(FTMP, SP, index1); + __ LoadQFromOffset(FTMP2, SP, index2); + __ StoreQToOffset(FTMP, SP, index2); + __ StoreQToOffset(FTMP2, SP, index1); +} + void CodeGeneratorMIPS::ComputeSpillMask() { core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; @@ -1790,6 +1812,11 @@ void CodeGeneratorMIPS::SetupBlockedRegisters() const { blocked_core_registers_[TMP] = true; blocked_fpu_registers_[FTMP] = true; + if (GetInstructionSetFeatures().HasMsa()) { + // To be used just for MSA instructions. + blocked_fpu_registers_[FTMP2] = true; + } + // Reserve suspend and thread registers. blocked_core_registers_[S0] = true; blocked_core_registers_[TR] = true; |