diff options
Diffstat (limited to 'arch/arm')
-rw-r--r-- | arch/arm/armfeature.c | 3 | ||||
-rw-r--r-- | arch/arm/chunkset_neon.c | 11 | ||||
-rw-r--r-- | arch/arm/crc32_acle.c | 34 |
3 files changed, 16 insertions, 32 deletions
diff --git a/arch/arm/armfeature.c b/arch/arm/armfeature.c index bef9b29..978c987 100644 --- a/arch/arm/armfeature.c +++ b/arch/arm/armfeature.c @@ -11,6 +11,9 @@ # define ID_AA64ISAR0_CRC32_VAL ID_AA64ISAR0_CRC32 # endif #elif defined(__APPLE__) +# if !defined(_DARWIN_C_SOURCE) +# define _DARWIN_C_SOURCE /* enable types aliases (eg u_int) */ +# endif # include <sys/sysctl.h> #elif defined(_WIN32) # include <winapifamily.h> diff --git a/arch/arm/chunkset_neon.c b/arch/arm/chunkset_neon.c index e0ad3e0..51dcf09 100644 --- a/arch/arm/chunkset_neon.c +++ b/arch/arm/chunkset_neon.c @@ -15,30 +15,25 @@ typedef uint8x16_t chunk_t; #define CHUNK_SIZE 16 -#define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_2 #define HAVE_CHUNKMEMSET_4 #define HAVE_CHUNKMEMSET_8 -static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) { - *chunk = vld1q_dup_u8(from); -} - static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { uint16_t tmp; - memcpy(&tmp, from, 2); + memcpy(&tmp, from, sizeof(tmp)); *chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp)); } static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { uint32_t tmp; - memcpy(&tmp, from, 4); + memcpy(&tmp, from, sizeof(tmp)); *chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp)); } static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { uint64_t tmp; - memcpy(&tmp, from, 8); + memcpy(&tmp, from, sizeof(tmp)); *chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp)); } diff --git a/arch/arm/crc32_acle.c b/arch/arm/crc32_acle.c index 99013e1..0bcd3cf 100644 --- a/arch/arm/crc32_acle.c +++ b/arch/arm/crc32_acle.c @@ -62,7 +62,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) { len--; } - if ((len > sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) { + if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) { buf2 = (const uint16_t *) buf; c = __crc32h(c, *buf2++); len -= sizeof(uint16_t); @@ -72,22 +72,17 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) { } #if defined(__aarch64__) - if ((len > sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) { + if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) { c = __crc32w(c, *buf4++); len -= sizeof(uint32_t); } - const uint64_t *buf8 = (const uint64_t *) buf4; - -#ifdef UNROLL_MORE - while (len >= 4 * sizeof(uint64_t)) { - c = __crc32d(c, *buf8++); - c = __crc32d(c, *buf8++); - c = __crc32d(c, *buf8++); - c = __crc32d(c, *buf8++); - len -= 4 * sizeof(uint64_t); + if (len == 0) { + c = ~c; + return c; } -#endif + + const uint64_t *buf8 = (const uint64_t *) buf4; while (len >= sizeof(uint64_t)) { c = __crc32d(c, *buf8++); @@ -111,19 +106,10 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) { buf = (const unsigned char *) buf2; #else /* __aarch64__ */ -# ifdef UNROLL_MORE - while (len >= 8 * sizeof(uint32_t)) { - c = __crc32w(c, *buf4++); - c = __crc32w(c, *buf4++); - c = __crc32w(c, *buf4++); - c = __crc32w(c, *buf4++); - c = __crc32w(c, *buf4++); - c = __crc32w(c, *buf4++); - c = __crc32w(c, *buf4++); - c = __crc32w(c, *buf4++); - len -= 8 * sizeof(uint32_t); + if (len == 0) { + c = ~c; + return c; } -# endif while (len >= sizeof(uint32_t)) { c = __crc32w(c, *buf4++); |