diff options
author | Haibo Huang <hhb@google.com> | 2019-03-15 09:25:39 -0700 |
---|---|---|
committer | Haibo Huang <hhb@google.com> | 2019-03-18 10:38:39 -0700 |
commit | 090811cdb9b8ec7b7c7379264fb8f88fdb3f3aef (patch) | |
tree | 5db8efbe4eafcf1aba6bf9d6822c7b2bb459dd29 /simd | |
parent | 5cb261de3c18e97c1930493d8824c4044bc0acdb (diff) | |
parent | a4aa30d9a080bbc50421285049e4379dcaf8a669 (diff) |
Upgrade libjpeg to 2.0.2
Test: build
Change-Id: If51cbf56f0b48fde26fac4f36d4f0219068659ac
Diffstat (limited to 'simd')
-rwxr-xr-x | simd/CMakeLists.txt | 4 | ||||
-rw-r--r-- | simd/arm64/jsimd_neon.S | 2 | ||||
-rw-r--r-- | simd/i386/jsimdcpu.asm | 5 | ||||
-rw-r--r-- | simd/loongson/jccolext-mmi.c | 32 | ||||
-rw-r--r-- | simd/loongson/loongson-mmintrin.h | 21 | ||||
-rw-r--r-- | simd/mips/jsimd.c | 8 | ||||
-rw-r--r-- | simd/mips/jsimd_dspr2.S | 8 | ||||
-rw-r--r-- | simd/x86_64/jsimdcpu.asm | 5 |
8 files changed, 67 insertions, 18 deletions
diff --git a/simd/CMakeLists.txt b/simd/CMakeLists.txt index 346994c..8dbd7f1 100755 --- a/simd/CMakeLists.txt +++ b/simd/CMakeLists.txt @@ -262,7 +262,7 @@ endif() # MIPS (GAS) ############################################################################### -elseif(CPU_TYPE STREQUAL "mips") +elseif(CPU_TYPE STREQUAL "mips" OR CPU_TYPE STREQUAL "mipsel") enable_language(ASM) @@ -293,7 +293,7 @@ if(NOT HAVE_DSPR2) return() endif() -add_library(simd OBJECT ${CPU_TYPE}/jsimd_dspr2.S ${CPU_TYPE}/jsimd.c) +add_library(simd OBJECT mips/jsimd_dspr2.S mips/jsimd.c) if(CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED) set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1) diff --git a/simd/arm64/jsimd_neon.S b/simd/arm64/jsimd_neon.S index 447dedf..d30715a 100644 --- a/simd/arm64/jsimd_neon.S +++ b/simd/arm64/jsimd_neon.S @@ -309,7 +309,7 @@ _\fname: trn2 \x1\literal, \xi\literal, \x1\literal .endm -/* Transpose elements of 2 differnet registers */ +/* Transpose elements of 2 different registers */ .macro transpose x0, x1, xi, xilen, literal mov \xi\xilen, \x0\xilen trn1 \x0\literal, \x0\literal, \x1\literal diff --git a/simd/i386/jsimdcpu.asm b/simd/i386/jsimdcpu.asm index 50a0d51..faddd38 100644 --- a/simd/i386/jsimdcpu.asm +++ b/simd/i386/jsimdcpu.asm @@ -94,9 +94,10 @@ EXTN(jpeg_simd_cpu_support): xor ecx, ecx xgetbv - test eax, 6 ; O/S does not manage XMM/YMM state + and eax, 6 + cmp eax, 6 ; O/S does not manage XMM/YMM state ; using XSAVE - jz short .no_avx2 + jnz short .no_avx2 or edi, JSIMD_AVX2 .no_avx2: diff --git a/simd/loongson/jccolext-mmi.c b/simd/loongson/jccolext-mmi.c index e1c4e69..6cdeb5e 100644 --- a/simd/loongson/jccolext-mmi.c +++ b/simd/loongson/jccolext-mmi.c @@ -2,12 +2,13 @@ * Loongson MMI optimizations for libjpeg-turbo * * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB - * Copyright (C) 2014-2015, D. R. Commander. All Rights Reserved. - * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * Copyright (C) 2014-2015, 2019, D. R. Commander. All Rights Reserved. + * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing. * All Rights Reserved. * Authors: ZhuChen <zhuchen@loongson.cn> * SunZhangzhi <sunzhangzhi-cq@loongson.cn> * CaiWanwei <caiwanwei@loongson.cn> + * ZhangLixia <zhanglixia-hf@loongson.cn> * * Based on the x86 SIMD extension for IJG JPEG library * Copyright (C) 1999-2006, MIYASAKA Masaru. @@ -184,9 +185,15 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf, "$14", "memory" ); } else { - mmA = _mm_load_si64((__m64 *)&inptr[0]); - mmG = _mm_load_si64((__m64 *)&inptr[8]); - mmF = _mm_load_si64((__m64 *)&inptr[16]); + if (!(((long)inptr) & 7)) { + mmA = _mm_load_si64((__m64 *)&inptr[0]); + mmG = _mm_load_si64((__m64 *)&inptr[8]); + mmF = _mm_load_si64((__m64 *)&inptr[16]); + } else { + mmA = _mm_loadu_si64((__m64 *)&inptr[0]); + mmG = _mm_loadu_si64((__m64 *)&inptr[8]); + mmF = _mm_loadu_si64((__m64 *)&inptr[16]); + } inptr += RGB_PIXELSIZE * 8; } mmD = mmA; @@ -268,10 +275,17 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf, : "$f0", "$f2", "$8", "$9", "$10", "$11", "$13", "memory" ); } else { - mmA = _mm_load_si64((__m64 *)&inptr[0]); - mmF = _mm_load_si64((__m64 *)&inptr[8]); - mmD = _mm_load_si64((__m64 *)&inptr[16]); - mmC = _mm_load_si64((__m64 *)&inptr[24]); + if (!(((long)inptr) & 7)) { + mmA = _mm_load_si64((__m64 *)&inptr[0]); + mmF = _mm_load_si64((__m64 *)&inptr[8]); + mmD = _mm_load_si64((__m64 *)&inptr[16]); + mmC = _mm_load_si64((__m64 *)&inptr[24]); + } else { + mmA = _mm_loadu_si64((__m64 *)&inptr[0]); + mmF = _mm_loadu_si64((__m64 *)&inptr[8]); + mmD = _mm_loadu_si64((__m64 *)&inptr[16]); + mmC = _mm_loadu_si64((__m64 *)&inptr[24]); + } inptr += RGB_PIXELSIZE * 8; } mmB = mmA; diff --git a/simd/loongson/loongson-mmintrin.h b/simd/loongson/loongson-mmintrin.h index 4aea763..50d166b 100644 --- a/simd/loongson/loongson-mmintrin.h +++ b/simd/loongson/loongson-mmintrin.h @@ -1,8 +1,9 @@ /* * Loongson MMI optimizations for libjpeg-turbo * - * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing. * All Rights Reserved. + * Copyright (C) 2019, D. R. Commander. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -41,7 +42,7 @@ typedef float __m32; /********** Set Operations **********/ -extern __inline __m64 +extern __inline __m64 FUNCTION_ATTRIBS _mm_setzero_si64(void) { return 0.0; @@ -1245,6 +1246,22 @@ _mm_load_si64(const __m64 *src) asm("ldc1 %0, %1\n\t" : "=f" (ret) : "m" (*src) + : "memory" + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_loadu_si64(const __m64 *src) +{ + __m64 ret; + + asm("gsldlc1 %0, 7(%1)\n\t" + "gsldrc1 %0, 0(%1)\n\t" + : "=f" (ret) + : "r" (src) + : "memory" ); return ret; diff --git a/simd/mips/jsimd.c b/simd/mips/jsimd.c index af886f6..454cc99 100644 --- a/simd/mips/jsimd.c +++ b/simd/mips/jsimd.c @@ -692,8 +692,10 @@ jsimd_can_convsamp_float(void) if (sizeof(ISLOW_MULT_TYPE) != 2) return 0; +#ifndef __mips_soft_float if (simd_support & JSIMD_DSPR2) return 1; +#endif return 0; } @@ -709,7 +711,9 @@ GLOBAL(void) jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace) { +#ifndef __mips_soft_float jsimd_convsamp_float_dspr2(sample_data, start_col, workspace); +#endif } GLOBAL(int) @@ -805,8 +809,10 @@ jsimd_can_quantize_float(void) if (sizeof(ISLOW_MULT_TYPE) != 2) return 0; +#ifndef __mips_soft_float if (simd_support & JSIMD_DSPR2) return 1; +#endif return 0; } @@ -821,7 +827,9 @@ GLOBAL(void) jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace) { +#ifndef __mips_soft_float jsimd_quantize_float_dspr2(coef_block, divisors, workspace); +#endif } GLOBAL(int) diff --git a/simd/mips/jsimd_dspr2.S b/simd/mips/jsimd_dspr2.S index 2ec543e..a28c116 100644 --- a/simd/mips/jsimd_dspr2.S +++ b/simd/mips/jsimd_dspr2.S @@ -2810,6 +2810,8 @@ LEAF_DSPR2(jsimd_quantize_dspr2) END(jsimd_quantize_dspr2) +#ifndef __mips_soft_float + /*****************************************************************************/ LEAF_DSPR2(jsimd_quantize_float_dspr2) /* @@ -2890,6 +2892,8 @@ LEAF_DSPR2(jsimd_quantize_float_dspr2) END(jsimd_quantize_float_dspr2) +#endif + /*****************************************************************************/ LEAF_DSPR2(jsimd_idct_2x2_dspr2) @@ -4110,6 +4114,8 @@ LEAF_DSPR2(jsimd_convsamp_dspr2) END(jsimd_convsamp_dspr2) +#ifndef __mips_soft_float + /*****************************************************************************/ LEAF_DSPR2(jsimd_convsamp_float_dspr2) /* @@ -4468,4 +4474,6 @@ LEAF_DSPR2(jsimd_convsamp_float_dspr2) END(jsimd_convsamp_float_dspr2) +#endif + /*****************************************************************************/ diff --git a/simd/x86_64/jsimdcpu.asm b/simd/x86_64/jsimdcpu.asm index 42979be..38e1a7b 100644 --- a/simd/x86_64/jsimdcpu.asm +++ b/simd/x86_64/jsimdcpu.asm @@ -60,9 +60,10 @@ EXTN(jpeg_simd_cpu_support): xor rcx, rcx xgetbv - test rax, 6 ; O/S does not manage XMM/YMM state + and rax, 6 + cmp rax, 6 ; O/S does not manage XMM/YMM state ; using XSAVE - jz short .return + jnz short .return or rdi, JSIMD_AVX2 |