summaryrefslogtreecommitdiff
path: root/simd
diff options
context:
space:
mode:
authorHaibo Huang <hhb@google.com>2019-03-15 09:25:39 -0700
committerHaibo Huang <hhb@google.com>2019-03-18 10:38:39 -0700
commit090811cdb9b8ec7b7c7379264fb8f88fdb3f3aef (patch)
tree5db8efbe4eafcf1aba6bf9d6822c7b2bb459dd29 /simd
parent5cb261de3c18e97c1930493d8824c4044bc0acdb (diff)
parenta4aa30d9a080bbc50421285049e4379dcaf8a669 (diff)
Upgrade libjpeg to 2.0.2
Test: build Change-Id: If51cbf56f0b48fde26fac4f36d4f0219068659ac
Diffstat (limited to 'simd')
-rwxr-xr-xsimd/CMakeLists.txt4
-rw-r--r--simd/arm64/jsimd_neon.S2
-rw-r--r--simd/i386/jsimdcpu.asm5
-rw-r--r--simd/loongson/jccolext-mmi.c32
-rw-r--r--simd/loongson/loongson-mmintrin.h21
-rw-r--r--simd/mips/jsimd.c8
-rw-r--r--simd/mips/jsimd_dspr2.S8
-rw-r--r--simd/x86_64/jsimdcpu.asm5
8 files changed, 67 insertions, 18 deletions
diff --git a/simd/CMakeLists.txt b/simd/CMakeLists.txt
index 346994c..8dbd7f1 100755
--- a/simd/CMakeLists.txt
+++ b/simd/CMakeLists.txt
@@ -262,7 +262,7 @@ endif()
# MIPS (GAS)
###############################################################################
-elseif(CPU_TYPE STREQUAL "mips")
+elseif(CPU_TYPE STREQUAL "mips" OR CPU_TYPE STREQUAL "mipsel")
enable_language(ASM)
@@ -293,7 +293,7 @@ if(NOT HAVE_DSPR2)
return()
endif()
-add_library(simd OBJECT ${CPU_TYPE}/jsimd_dspr2.S ${CPU_TYPE}/jsimd.c)
+add_library(simd OBJECT mips/jsimd_dspr2.S mips/jsimd.c)
if(CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED)
set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1)
diff --git a/simd/arm64/jsimd_neon.S b/simd/arm64/jsimd_neon.S
index 447dedf..d30715a 100644
--- a/simd/arm64/jsimd_neon.S
+++ b/simd/arm64/jsimd_neon.S
@@ -309,7 +309,7 @@ _\fname:
trn2 \x1\literal, \xi\literal, \x1\literal
.endm
-/* Transpose elements of 2 differnet registers */
+/* Transpose elements of 2 different registers */
.macro transpose x0, x1, xi, xilen, literal
mov \xi\xilen, \x0\xilen
trn1 \x0\literal, \x0\literal, \x1\literal
diff --git a/simd/i386/jsimdcpu.asm b/simd/i386/jsimdcpu.asm
index 50a0d51..faddd38 100644
--- a/simd/i386/jsimdcpu.asm
+++ b/simd/i386/jsimdcpu.asm
@@ -94,9 +94,10 @@ EXTN(jpeg_simd_cpu_support):
xor ecx, ecx
xgetbv
- test eax, 6 ; O/S does not manage XMM/YMM state
+ and eax, 6
+ cmp eax, 6 ; O/S does not manage XMM/YMM state
; using XSAVE
- jz short .no_avx2
+ jnz short .no_avx2
or edi, JSIMD_AVX2
.no_avx2:
diff --git a/simd/loongson/jccolext-mmi.c b/simd/loongson/jccolext-mmi.c
index e1c4e69..6cdeb5e 100644
--- a/simd/loongson/jccolext-mmi.c
+++ b/simd/loongson/jccolext-mmi.c
@@ -2,12 +2,13 @@
* Loongson MMI optimizations for libjpeg-turbo
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2014-2015, D. R. Commander. All Rights Reserved.
- * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
+ * Copyright (C) 2014-2015, 2019, D. R. Commander. All Rights Reserved.
+ * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
* All Rights Reserved.
* Authors: ZhuChen <zhuchen@loongson.cn>
* SunZhangzhi <sunzhangzhi-cq@loongson.cn>
* CaiWanwei <caiwanwei@loongson.cn>
+ * ZhangLixia <zhanglixia-hf@loongson.cn>
*
* Based on the x86 SIMD extension for IJG JPEG library
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -184,9 +185,15 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
"$14", "memory"
);
} else {
- mmA = _mm_load_si64((__m64 *)&inptr[0]);
- mmG = _mm_load_si64((__m64 *)&inptr[8]);
- mmF = _mm_load_si64((__m64 *)&inptr[16]);
+ if (!(((long)inptr) & 7)) {
+ mmA = _mm_load_si64((__m64 *)&inptr[0]);
+ mmG = _mm_load_si64((__m64 *)&inptr[8]);
+ mmF = _mm_load_si64((__m64 *)&inptr[16]);
+ } else {
+ mmA = _mm_loadu_si64((__m64 *)&inptr[0]);
+ mmG = _mm_loadu_si64((__m64 *)&inptr[8]);
+ mmF = _mm_loadu_si64((__m64 *)&inptr[16]);
+ }
inptr += RGB_PIXELSIZE * 8;
}
mmD = mmA;
@@ -268,10 +275,17 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
: "$f0", "$f2", "$8", "$9", "$10", "$11", "$13", "memory"
);
} else {
- mmA = _mm_load_si64((__m64 *)&inptr[0]);
- mmF = _mm_load_si64((__m64 *)&inptr[8]);
- mmD = _mm_load_si64((__m64 *)&inptr[16]);
- mmC = _mm_load_si64((__m64 *)&inptr[24]);
+ if (!(((long)inptr) & 7)) {
+ mmA = _mm_load_si64((__m64 *)&inptr[0]);
+ mmF = _mm_load_si64((__m64 *)&inptr[8]);
+ mmD = _mm_load_si64((__m64 *)&inptr[16]);
+ mmC = _mm_load_si64((__m64 *)&inptr[24]);
+ } else {
+ mmA = _mm_loadu_si64((__m64 *)&inptr[0]);
+ mmF = _mm_loadu_si64((__m64 *)&inptr[8]);
+ mmD = _mm_loadu_si64((__m64 *)&inptr[16]);
+ mmC = _mm_loadu_si64((__m64 *)&inptr[24]);
+ }
inptr += RGB_PIXELSIZE * 8;
}
mmB = mmA;
diff --git a/simd/loongson/loongson-mmintrin.h b/simd/loongson/loongson-mmintrin.h
index 4aea763..50d166b 100644
--- a/simd/loongson/loongson-mmintrin.h
+++ b/simd/loongson/loongson-mmintrin.h
@@ -1,8 +1,9 @@
/*
* Loongson MMI optimizations for libjpeg-turbo
*
- * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
+ * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
* All Rights Reserved.
+ * Copyright (C) 2019, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -41,7 +42,7 @@ typedef float __m32;
/********** Set Operations **********/
-extern __inline __m64
+extern __inline __m64 FUNCTION_ATTRIBS
_mm_setzero_si64(void)
{
return 0.0;
@@ -1245,6 +1246,22 @@ _mm_load_si64(const __m64 *src)
asm("ldc1 %0, %1\n\t"
: "=f" (ret)
: "m" (*src)
+ : "memory"
+ );
+
+ return ret;
+}
+
+extern __inline __m64 FUNCTION_ATTRIBS
+_mm_loadu_si64(const __m64 *src)
+{
+ __m64 ret;
+
+ asm("gsldlc1 %0, 7(%1)\n\t"
+ "gsldrc1 %0, 0(%1)\n\t"
+ : "=f" (ret)
+ : "r" (src)
+ : "memory"
);
return ret;
diff --git a/simd/mips/jsimd.c b/simd/mips/jsimd.c
index af886f6..454cc99 100644
--- a/simd/mips/jsimd.c
+++ b/simd/mips/jsimd.c
@@ -692,8 +692,10 @@ jsimd_can_convsamp_float(void)
if (sizeof(ISLOW_MULT_TYPE) != 2)
return 0;
+#ifndef __mips_soft_float
if (simd_support & JSIMD_DSPR2)
return 1;
+#endif
return 0;
}
@@ -709,7 +711,9 @@ GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
FAST_FLOAT *workspace)
{
+#ifndef __mips_soft_float
jsimd_convsamp_float_dspr2(sample_data, start_col, workspace);
+#endif
}
GLOBAL(int)
@@ -805,8 +809,10 @@ jsimd_can_quantize_float(void)
if (sizeof(ISLOW_MULT_TYPE) != 2)
return 0;
+#ifndef __mips_soft_float
if (simd_support & JSIMD_DSPR2)
return 1;
+#endif
return 0;
}
@@ -821,7 +827,9 @@ GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
FAST_FLOAT *workspace)
{
+#ifndef __mips_soft_float
jsimd_quantize_float_dspr2(coef_block, divisors, workspace);
+#endif
}
GLOBAL(int)
diff --git a/simd/mips/jsimd_dspr2.S b/simd/mips/jsimd_dspr2.S
index 2ec543e..a28c116 100644
--- a/simd/mips/jsimd_dspr2.S
+++ b/simd/mips/jsimd_dspr2.S
@@ -2810,6 +2810,8 @@ LEAF_DSPR2(jsimd_quantize_dspr2)
END(jsimd_quantize_dspr2)
+#ifndef __mips_soft_float
+
/*****************************************************************************/
LEAF_DSPR2(jsimd_quantize_float_dspr2)
/*
@@ -2890,6 +2892,8 @@ LEAF_DSPR2(jsimd_quantize_float_dspr2)
END(jsimd_quantize_float_dspr2)
+#endif
+
/*****************************************************************************/
LEAF_DSPR2(jsimd_idct_2x2_dspr2)
@@ -4110,6 +4114,8 @@ LEAF_DSPR2(jsimd_convsamp_dspr2)
END(jsimd_convsamp_dspr2)
+#ifndef __mips_soft_float
+
/*****************************************************************************/
LEAF_DSPR2(jsimd_convsamp_float_dspr2)
/*
@@ -4468,4 +4474,6 @@ LEAF_DSPR2(jsimd_convsamp_float_dspr2)
END(jsimd_convsamp_float_dspr2)
+#endif
+
/*****************************************************************************/
diff --git a/simd/x86_64/jsimdcpu.asm b/simd/x86_64/jsimdcpu.asm
index 42979be..38e1a7b 100644
--- a/simd/x86_64/jsimdcpu.asm
+++ b/simd/x86_64/jsimdcpu.asm
@@ -60,9 +60,10 @@ EXTN(jpeg_simd_cpu_support):
xor rcx, rcx
xgetbv
- test rax, 6 ; O/S does not manage XMM/YMM state
+ and rax, 6
+ cmp rax, 6 ; O/S does not manage XMM/YMM state
; using XSAVE
- jz short .return
+ jnz short .return
or rdi, JSIMD_AVX2