diff options
author | Jonathan Wright <jonathan.wright@arm.com> | 2020-08-25 12:26:52 +0100 |
---|---|---|
committer | Jonathan Wright <jonathan.wright@arm.com> | 2020-08-25 12:33:13 +0100 |
commit | a2a9d5e790805e88e560e7c84732b4e393597c36 (patch) | |
tree | 314ee8df4004c63a577f2639b7f74aca1bd7e4c5 | |
parent | 09caedd340984b44a4c97337d172039b40920a01 (diff) |
Add compiler-independent alignment macro
Some variables and structures need memory alignment and the syntax
for declaring it differs when using MSVC compared to GCC and Clang.
This commit adds a compiler-independent alignment macro and uses it
to apply 16-byte alignment for constant pools and temporary buffers
used in Arm NEON SIMD code.
Bug: 922430
Change-Id: Ic2beb7bae88504ba7f3e86e33ef31bf388774403
-rw-r--r-- | README.chromium | 1 | ||||
-rw-r--r-- | jconfigint.h | 9 | ||||
-rw-r--r-- | simd/arm/arm/jccolext-neon.c | 2 | ||||
-rw-r--r-- | simd/arm/arm64/jccolext-neon.c | 4 | ||||
-rw-r--r-- | simd/arm/common/jccolor-neon.c | 12 | ||||
-rw-r--r-- | simd/arm/common/jcgray-neon.c | 1 | ||||
-rw-r--r-- | simd/arm/common/jcgryext-neon.c | 2 | ||||
-rw-r--r-- | simd/arm/common/jcsample-neon.c | 3 | ||||
-rw-r--r-- | simd/arm/common/jidctint-neon.c | 3 | ||||
-rw-r--r-- | simd/arm/common/jidctred-neon.c | 3 |
10 files changed, 28 insertions, 12 deletions
diff --git a/README.chromium b/README.chromium index 5e1b166..1ae3e1a 100644 --- a/README.chromium +++ b/README.chromium @@ -75,6 +75,7 @@ following changes which are not merged to upstream: - Add Arm NEON implementation of h2v2_downsample - Implement RGB->YCbCr using Arm NEON intrinsics - Add Arm NEON implementation of RGB->Grayscale + - Add compiler-independent alignment macro * Patches to enable running the upstream unit tests through gtest. The upstream unit tests are defined here under the section 'TESTS': https://github.com/libjpeg-turbo/libjpeg-turbo/blob/master/CMakeLists.txt diff --git a/jconfigint.h b/jconfigint.h index 10bcac7..974890d 100644 --- a/jconfigint.h +++ b/jconfigint.h @@ -52,3 +52,12 @@ #define HAVEBITSCANFORWARD #endif #endif + +/* How to obtain memory alignment for structures and variables. */ +#if defined(_MSC_VER) +#define ALIGN(ALIGNMENT) __declspec(align((ALIGNMENT))) +#elif defined(__clang__) || defined(__GNUC__) +#define ALIGN(ALIGNMENT) __attribute__((aligned(ALIGNMENT))) +#else +#error "Unknown compiler" +#endif diff --git a/simd/arm/arm/jccolext-neon.c b/simd/arm/arm/jccolext-neon.c index 1e631b7..4f22e1f 100644 --- a/simd/arm/arm/jccolext-neon.c +++ b/simd/arm/arm/jccolext-neon.c @@ -78,7 +78,7 @@ void jsimd_rgb_ycc_convert_neon(JDIMENSION image_width, /* last (image_width % 8) columns of data are first memcopied to a */ /* temporary buffer large enough to accommodate the vector load. */ if (cols_remaining < 8) { - uint8_t __attribute__((aligned(8))) tmp_buf[8 * RGB_PIXELSIZE]; + ALIGN(16) uint8_t tmp_buf[8 * RGB_PIXELSIZE]; memcpy(tmp_buf, inptr, cols_remaining * RGB_PIXELSIZE); inptr = tmp_buf; } diff --git a/simd/arm/arm64/jccolext-neon.c b/simd/arm/arm64/jccolext-neon.c index 5c642fb..89f520a 100644 --- a/simd/arm/arm64/jccolext-neon.c +++ b/simd/arm/arm64/jccolext-neon.c @@ -161,7 +161,7 @@ void jsimd_rgb_ycc_convert_neon(JDIMENSION image_width, /* To prevent buffer overread by the vector load instructions, the */ /* last (image_width % 16) columns of data are first memcopied to a */ /* temporary buffer large enough to accommodate the vector load. */ - uint8_t __attribute__((aligned(16))) tmp_buf[16 * RGB_PIXELSIZE]; + ALIGN(16) uint8_t tmp_buf[16 * RGB_PIXELSIZE]; memcpy(tmp_buf, inptr, cols_remaining * RGB_PIXELSIZE); inptr = tmp_buf; @@ -252,7 +252,7 @@ void jsimd_rgb_ycc_convert_neon(JDIMENSION image_width, /* To prevent buffer overread by the vector load instructions, the */ /* last (image_width % 8) columns of data are first memcopied to a */ /* temporary buffer large enough to accommodate the vector load. */ - uint8_t __attribute__((aligned(8))) tmp_buf[8 * RGB_PIXELSIZE]; + ALIGN(16) uint8_t tmp_buf[8 * RGB_PIXELSIZE]; memcpy(tmp_buf, inptr, cols_remaining * RGB_PIXELSIZE); inptr = tmp_buf; diff --git a/simd/arm/common/jccolor-neon.c b/simd/arm/common/jccolor-neon.c index 2ec1636..f87c8d9 100644 --- a/simd/arm/common/jccolor-neon.c +++ b/simd/arm/common/jccolor-neon.c @@ -21,6 +21,7 @@ */ #define JPEG_INTERNALS +#include "../../../jconfigint.h" #include "../../../jinclude.h" #include "../../../jpeglib.h" #include "../../../jsimd.h" @@ -41,11 +42,12 @@ #define F_0_418 27439 #define F_0_081 5329 -const static uint16_t jsimd_rgb_ycc_neon_consts[] = { F_0_298, F_0_587, - F_0_113, F_0_168, - F_0_331, F_0_500, - F_0_418, F_0_081 - }; +ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = { + F_0_298, F_0_587, + F_0_113, F_0_168, + F_0_331, F_0_500, + F_0_418, F_0_081 + }; /* Include inline routines for colorspace extensions. */ diff --git a/simd/arm/common/jcgray-neon.c b/simd/arm/common/jcgray-neon.c index 4cba0ef..39d903f 100644 --- a/simd/arm/common/jcgray-neon.c +++ b/simd/arm/common/jcgray-neon.c @@ -21,6 +21,7 @@ */ #define JPEG_INTERNALS +#include "../../../jconfigint.h" #include "../../../jinclude.h" #include "../../../jpeglib.h" #include "../../../jsimd.h" diff --git a/simd/arm/common/jcgryext-neon.c b/simd/arm/common/jcgryext-neon.c index 35ba0ac..69ea67f 100644 --- a/simd/arm/common/jcgryext-neon.c +++ b/simd/arm/common/jcgryext-neon.c @@ -58,7 +58,7 @@ void jsimd_rgb_gray_convert_neon(JDIMENSION image_width, /* last (image_width % 16) columns of data are first memcopied to a */ /* temporary buffer large enough to accommodate the vector load. */ if (cols_remaining < 16) { - uint8_t __attribute__((aligned(8))) tmp_buf[16 * RGB_PIXELSIZE]; + ALIGN(16) uint8_t tmp_buf[16 * RGB_PIXELSIZE]; memcpy(tmp_buf, inptr, cols_remaining * RGB_PIXELSIZE); inptr = tmp_buf; } diff --git a/simd/arm/common/jcsample-neon.c b/simd/arm/common/jcsample-neon.c index ff989dc..a5ddf16 100644 --- a/simd/arm/common/jcsample-neon.c +++ b/simd/arm/common/jcsample-neon.c @@ -21,6 +21,7 @@ */ #define JPEG_INTERNALS +#include "../../../jconfigint.h" #include "../../../jinclude.h" #include "../../../jpeglib.h" #include "../../../jsimd.h" @@ -31,7 +32,7 @@ #include <arm_neon.h> -static const uint8_t jsimd_h2_downsample_consts[] = { +ALIGN(16) static const uint8_t jsimd_h2_downsample_consts[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* Pad 0 */ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* Pad 1 */ diff --git a/simd/arm/common/jidctint-neon.c b/simd/arm/common/jidctint-neon.c index 11076a0..4ee9eb0 100644 --- a/simd/arm/common/jidctint-neon.c +++ b/simd/arm/common/jidctint-neon.c @@ -21,6 +21,7 @@ */ #define JPEG_INTERNALS +#include "../../../jconfigint.h" #include "../../../jinclude.h" #include "../../../jpeglib.h" #include "../../../jsimd.h" @@ -75,7 +76,7 @@ #define F_2_053_MINUS_2_562 (F_2_053 - F_2_562) #define F_0_541_PLUS_0_765 (F_0_541 + F_0_765) -__attribute__ ((aligned(8))) static int16_t jsimd_idct_islow_neon_consts[] = { +ALIGN(16) static const int16_t jsimd_idct_islow_neon_consts[] = { F_0_899, F_0_541, F_2_562, F_0_298_MINUS_0_899, F_1_501_MINUS_0_899, F_2_053_MINUS_2_562, diff --git a/simd/arm/common/jidctred-neon.c b/simd/arm/common/jidctred-neon.c index 7e95bf3..3c9393b 100644 --- a/simd/arm/common/jidctred-neon.c +++ b/simd/arm/common/jidctred-neon.c @@ -21,6 +21,7 @@ */ #define JPEG_INTERNALS +#include "../../../jconfigint.h" #include "../../../jinclude.h" #include "../../../jpeglib.h" #include "../../../jsimd.h" @@ -183,7 +184,7 @@ void jsimd_idct_2x2_neon(void *dct_table, * exact compatibility with jpeg-6b. */ -__attribute__ ((aligned(8))) static int16_t jsimd_idct_4x4_neon_consts[] = { +ALIGN(16) static const int16_t jsimd_idct_4x4_neon_consts[] = { F_1_847, -F_0_765, -F_0_211, F_1_451, -F_2_172, F_1_061, -F_0_509, -F_0_601, F_0_899, F_2_562, 0, 0 |