diff options
author | DRC <information@virtualgl.org> | 2019-01-30 12:43:45 -0600 |
---|---|---|
committer | DRC <information@virtualgl.org> | 2019-01-31 15:30:05 -0600 |
commit | 1c2d3cfaaf7324d9091ba3cc4e900f60a16fe1aa (patch) | |
tree | 06499d6245c9b197edc55ce48f3aa72e2697a777 /simd | |
parent | 2d0b675adff076723ae5f79dd505beeea0034af1 (diff) |
MMI: Fix comp. perf. issue w/ unaligned image rows
Using ldc1 with a non-64-bit-aligned memory location causes as much as a
10x slow-down in overall compression performance.
Diffstat (limited to 'simd')
-rw-r--r-- | simd/loongson/jccolext-mmi.c | 17 | ||||
-rw-r--r-- | simd/loongson/loongson-mmintrin.h | 21 |
2 files changed, 31 insertions, 7 deletions
diff --git a/simd/loongson/jccolext-mmi.c b/simd/loongson/jccolext-mmi.c index e1c4e69..a94b53b 100644 --- a/simd/loongson/jccolext-mmi.c +++ b/simd/loongson/jccolext-mmi.c @@ -2,12 +2,13 @@ * Loongson MMI optimizations for libjpeg-turbo * * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB - * Copyright (C) 2014-2015, D. R. Commander. All Rights Reserved. - * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * Copyright (C) 2014-2015, 2019, D. R. Commander. All Rights Reserved. + * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing. * All Rights Reserved. * Authors: ZhuChen <zhuchen@loongson.cn> * SunZhangzhi <sunzhangzhi-cq@loongson.cn> * CaiWanwei <caiwanwei@loongson.cn> + * ZhangLixia <zhanglixia-hf@loongson.cn> * * Based on the x86 SIMD extension for IJG JPEG library * Copyright (C) 1999-2006, MIYASAKA Masaru. @@ -184,9 +185,15 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf, "$14", "memory" ); } else { - mmA = _mm_load_si64((__m64 *)&inptr[0]); - mmG = _mm_load_si64((__m64 *)&inptr[8]); - mmF = _mm_load_si64((__m64 *)&inptr[16]); + if (!(((long)inptr) & 7)) { + mmA = _mm_load_si64((__m64 *)&inptr[0]); + mmG = _mm_load_si64((__m64 *)&inptr[8]); + mmF = _mm_load_si64((__m64 *)&inptr[16]); + } else { + mmA = _mm_loadu_si64((__m64 *)&inptr[0]); + mmG = _mm_loadu_si64((__m64 *)&inptr[8]); + mmF = _mm_loadu_si64((__m64 *)&inptr[16]); + } inptr += RGB_PIXELSIZE * 8; } mmD = mmA; diff --git a/simd/loongson/loongson-mmintrin.h b/simd/loongson/loongson-mmintrin.h index 4aea763..50d166b 100644 --- a/simd/loongson/loongson-mmintrin.h +++ b/simd/loongson/loongson-mmintrin.h @@ -1,8 +1,9 @@ /* * Loongson MMI optimizations for libjpeg-turbo * - * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing. * All Rights Reserved. + * Copyright (C) 2019, D. R. Commander. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -41,7 +42,7 @@ typedef float __m32; /********** Set Operations **********/ -extern __inline __m64 +extern __inline __m64 FUNCTION_ATTRIBS _mm_setzero_si64(void) { return 0.0; @@ -1245,6 +1246,22 @@ _mm_load_si64(const __m64 *src) asm("ldc1 %0, %1\n\t" : "=f" (ret) : "m" (*src) + : "memory" + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_loadu_si64(const __m64 *src) +{ + __m64 ret; + + asm("gsldlc1 %0, 7(%1)\n\t" + "gsldrc1 %0, 0(%1)\n\t" + : "=f" (ret) + : "r" (src) + : "memory" ); return ret; |