summaryrefslogtreecommitdiff
path: root/simd
diff options
context:
space:
mode:
authorDRC <information@virtualgl.org>2019-01-30 12:43:45 -0600
committerDRC <information@virtualgl.org>2019-01-31 15:30:05 -0600
commit1c2d3cfaaf7324d9091ba3cc4e900f60a16fe1aa (patch)
tree06499d6245c9b197edc55ce48f3aa72e2697a777 /simd
parent2d0b675adff076723ae5f79dd505beeea0034af1 (diff)
MMI: Fix comp. perf. issue w/ unaligned image rows
Using ldc1 with a non-64-bit-aligned memory location causes as much as a 10x slow-down in overall compression performance.
Diffstat (limited to 'simd')
-rw-r--r--simd/loongson/jccolext-mmi.c17
-rw-r--r--simd/loongson/loongson-mmintrin.h21
2 files changed, 31 insertions, 7 deletions
diff --git a/simd/loongson/jccolext-mmi.c b/simd/loongson/jccolext-mmi.c
index e1c4e69..a94b53b 100644
--- a/simd/loongson/jccolext-mmi.c
+++ b/simd/loongson/jccolext-mmi.c
@@ -2,12 +2,13 @@
* Loongson MMI optimizations for libjpeg-turbo
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2014-2015, D. R. Commander. All Rights Reserved.
- * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
+ * Copyright (C) 2014-2015, 2019, D. R. Commander. All Rights Reserved.
+ * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
* All Rights Reserved.
* Authors: ZhuChen <zhuchen@loongson.cn>
* SunZhangzhi <sunzhangzhi-cq@loongson.cn>
* CaiWanwei <caiwanwei@loongson.cn>
+ * ZhangLixia <zhanglixia-hf@loongson.cn>
*
* Based on the x86 SIMD extension for IJG JPEG library
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -184,9 +185,15 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
"$14", "memory"
);
} else {
- mmA = _mm_load_si64((__m64 *)&inptr[0]);
- mmG = _mm_load_si64((__m64 *)&inptr[8]);
- mmF = _mm_load_si64((__m64 *)&inptr[16]);
+ if (!(((long)inptr) & 7)) {
+ mmA = _mm_load_si64((__m64 *)&inptr[0]);
+ mmG = _mm_load_si64((__m64 *)&inptr[8]);
+ mmF = _mm_load_si64((__m64 *)&inptr[16]);
+ } else {
+ mmA = _mm_loadu_si64((__m64 *)&inptr[0]);
+ mmG = _mm_loadu_si64((__m64 *)&inptr[8]);
+ mmF = _mm_loadu_si64((__m64 *)&inptr[16]);
+ }
inptr += RGB_PIXELSIZE * 8;
}
mmD = mmA;
diff --git a/simd/loongson/loongson-mmintrin.h b/simd/loongson/loongson-mmintrin.h
index 4aea763..50d166b 100644
--- a/simd/loongson/loongson-mmintrin.h
+++ b/simd/loongson/loongson-mmintrin.h
@@ -1,8 +1,9 @@
/*
* Loongson MMI optimizations for libjpeg-turbo
*
- * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
+ * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
* All Rights Reserved.
+ * Copyright (C) 2019, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -41,7 +42,7 @@ typedef float __m32;
/********** Set Operations **********/
-extern __inline __m64
+extern __inline __m64 FUNCTION_ATTRIBS
_mm_setzero_si64(void)
{
return 0.0;
@@ -1245,6 +1246,22 @@ _mm_load_si64(const __m64 *src)
asm("ldc1 %0, %1\n\t"
: "=f" (ret)
: "m" (*src)
+ : "memory"
+ );
+
+ return ret;
+}
+
+extern __inline __m64 FUNCTION_ATTRIBS
+_mm_loadu_si64(const __m64 *src)
+{
+ __m64 ret;
+
+ asm("gsldlc1 %0, 7(%1)\n\t"
+ "gsldrc1 %0, 0(%1)\n\t"
+ : "=f" (ret)
+ : "r" (src)
+ : "memory"
);
return ret;