diff options
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 6 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics.cc | 2 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.cc | 7 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.h | 1 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86_test.cc | 6 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 7 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 1 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64_test.cc | 6 | ||||
-rw-r--r-- | runtime/Android.mk | 11 | ||||
-rw-r--r-- | runtime/arch/arm64/quick_entrypoints_arm64.S | 33 |
10 files changed, 61 insertions, 19 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index e15eff9056..676b8421cd 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -4535,7 +4535,11 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { Location destination = move->GetDestination(); if (source.IsRegister() && destination.IsRegister()) { - __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>()); + // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary. + DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>()); + __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>()); + __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>()); + __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>()); } else if (source.IsRegister() && destination.IsStackSlot()) { Exchange(source.AsRegister<Register>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 8ef13e125e..bc7da802b1 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -359,7 +359,7 @@ void IntrinsicsRecognizer::Run() { std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { switch (intrinsic) { case Intrinsics::kNone: - os << "No intrinsic."; + os << "None"; break; #define OPTIMIZING_INTRINSICS(Name, IsStatic) \ case Intrinsics::k ## Name: \ diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 44efc65e3f..a614193c96 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1523,6 +1523,13 @@ void X86Assembler::repe_cmpsw() { } +void X86Assembler::repe_cmpsl() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitUint8(0xA7); +} + + X86Assembler* X86Assembler::lock() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF0); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index e2abcde624..ae8d7a1a1d 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -466,6 +466,7 @@ class X86Assembler FINAL : public Assembler { void repne_scasw(); void repe_cmpsw(); + void repe_cmpsl(); X86Assembler* lock(); void cmpxchgl(const Address& address, Register reg); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 0e8c4aee0c..7663580793 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -202,4 +202,10 @@ TEST_F(AssemblerX86Test, Repecmpsw) { DriverStr(expected, "Repecmpsw"); } +TEST_F(AssemblerX86Test, Repecmpsl) { + GetAssembler()->repe_cmpsl(); + const char* expected = "repe cmpsl\n"; + DriverStr(expected, "Repecmpsl"); +} + } // namespace art diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 93c90db5d3..1dd4a2e3e9 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -2081,6 +2081,13 @@ void X86_64Assembler::repe_cmpsw() { } +void X86_64Assembler::repe_cmpsl() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitUint8(0xA7); +} + + void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) { // TODO: Need to have a code constants table. int64_t constant = bit_cast<int64_t, double>(value); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 0cd31971b4..89a5606399 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -604,6 +604,7 @@ class X86_64Assembler FINAL : public Assembler { void repne_scasw(); void repe_cmpsw(); + void repe_cmpsl(); // // Macros for High-level operations. diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 2c1a6a1f3c..e1e4c32d8f 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1269,4 +1269,10 @@ TEST_F(AssemblerX86_64Test, Repecmpsw) { DriverStr(expected, "Repecmpsw"); } +TEST_F(AssemblerX86_64Test, Repecmpsl) { + GetAssembler()->repe_cmpsl(); + const char* expected = "repe cmpsl\n"; + DriverStr(expected, "Repecmpsl"); +} + } // namespace art diff --git a/runtime/Android.mk b/runtime/Android.mk index 4a944963a2..ce3e6d1f37 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -341,10 +341,13 @@ LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := \ LIBART_CFLAGS := -DBUILDING_LIBART=1 +LIBART_TARGET_CFLAGS := +LIBART_HOST_CFLAGS := + ifeq ($(MALLOC_IMPL),dlmalloc) - LIBART_CFLAGS += -DUSE_DLMALLOC + LIBART_TARGET_CFLAGS += -DUSE_DLMALLOC else - LIBART_CFLAGS += -DUSE_JEMALLOC + LIBART_TARGET_CFLAGS += -DUSE_JEMALLOC endif # Default dex2oat instruction set features. @@ -440,8 +443,10 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT LOCAL_CFLAGS := $$(LIBART_CFLAGS) LOCAL_LDFLAGS := $$(LIBART_LDFLAGS) ifeq ($$(art_target_or_host),target) + LOCAL_CFLAGS += $$(LIBART_TARGET_CFLAGS) LOCAL_LDFLAGS += $$(LIBART_TARGET_LDFLAGS) else #host + LOCAL_CFLAGS += $$(LIBART_HOST_CFLAGS) LOCAL_LDFLAGS += $$(LIBART_HOST_LDFLAGS) ifeq ($$(art_static_or_shared),static) LOCAL_LDFLAGS += -static @@ -581,4 +586,6 @@ LIBART_HOST_SRC_FILES_32 := LIBART_HOST_SRC_FILES_64 := LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := LIBART_CFLAGS := +LIBART_TARGET_CFLAGS := +LIBART_HOST_CFLAGS := build-libart := diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 548ab47f82..8ba3d4392d 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -537,18 +537,18 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+8 // W10 - temporary add x9, sp, #8 // Destination address is bottom of stack + null. - // Use \@ to differentiate between macro invocations. -.LcopyParams\@: + // Copy parameters into the stack. Use numeric label as this is a macro and Clang's assembler + // does not have unique-id variables. +1: cmp w2, #0 - beq .LendCopyParams\@ + beq 2f sub w2, w2, #4 // Need 65536 bytes of range. ldr w10, [x1, x2] str w10, [x9, x2] - b .LcopyParams\@ - -.LendCopyParams\@: + b 1b +2: // Store null into ArtMethod* at bottom of frame. str xzr, [sp] .endm @@ -587,26 +587,29 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+8 // Store result (w0/x0/s0/d0) appropriately, depending on resultType. ldrb w10, [x5] + // Check the return type and store the correct register into the jvalue in memory. + // Use numeric label as this is a macro and Clang's assembler does not have unique-id variables. + // Don't set anything for a void type. cmp w10, #'V' - beq .Lexit_art_quick_invoke_stub\@ + beq 3f + // Is it a double? cmp w10, #'D' - bne .Lreturn_is_float\@ + bne 1f str d0, [x4] - b .Lexit_art_quick_invoke_stub\@ + b 3f -.Lreturn_is_float\@: +1: // Is it a float? cmp w10, #'F' - bne .Lreturn_is_int\@ + bne 2f str s0, [x4] - b .Lexit_art_quick_invoke_stub\@ + b 3f - // Just store x0. Doesn't matter if it is 64 or 32 bits. -.Lreturn_is_int\@: +2: // Just store x0. Doesn't matter if it is 64 or 32 bits. str x0, [x4] -.Lexit_art_quick_invoke_stub\@: +3: // Finish up. ldp x2, x19, [xFP, #32] // Restore stack pointer and x19. .cfi_restore x19 mov sp, x2 |