diff options
author | Nathan Moinvaziri <nathan@nathanm.com> | 2022-06-29 08:57:11 -0700 |
---|---|---|
committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2023-03-17 21:27:56 +0100 |
commit | b4866c447e776ecd8306284e69f1ccc737314b46 (patch) | |
tree | ca580d50ad93da8d41182d8c96fd4123ae08be2e | |
parent | d7eb21fc172d4600fc419d37ebc01c75e41f5005 (diff) |
Don't use unaligned access for memcpy instructions due to GCC 11 assuming it is aligned in certain instances.
Backport note: Removed a lot of the original commit due to merge conflicts
-rw-r--r-- | arch/arm/chunkset_neon.c | 6 | ||||
-rw-r--r-- | chunkset.c | 36 | ||||
-rw-r--r-- | deflate.h | 62 |
3 files changed, 30 insertions, 74 deletions
diff --git a/arch/arm/chunkset_neon.c b/arch/arm/chunkset_neon.c index d5deed6..51dcf09 100644 --- a/arch/arm/chunkset_neon.c +++ b/arch/arm/chunkset_neon.c @@ -21,19 +21,19 @@ typedef uint8x16_t chunk_t; static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { uint16_t tmp; - memcpy(&tmp, from, 2); + memcpy(&tmp, from, sizeof(tmp)); *chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp)); } static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { uint32_t tmp; - memcpy(&tmp, from, 4); + memcpy(&tmp, from, sizeof(tmp)); *chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp)); } static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { uint64_t tmp; - memcpy(&tmp, from, 8); + memcpy(&tmp, from, sizeof(tmp)); *chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp)); } @@ -20,45 +20,21 @@ typedef struct chunk_t { uint8_t u8[8]; } chunk_t; #define HAVE_CHUNKMEMSET_8 static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { -#if defined(UNALIGNED64_OK) - uint32_t half_chunk; - half_chunk = *(uint32_t *)from; - *chunk = 0x0000000100000001 * (uint64_t)half_chunk; -#elif defined(UNALIGNED_OK) - chunk->u32[0] = *(uint32_t *)from; - chunk->u32[1] = chunk->u32[0]; -#else - uint8_t *chunkptr = (uint8_t *)chunk; - memcpy(chunkptr, from, 4); - memcpy(chunkptr+4, from, 4); -#endif + uint8_t *dest = (uint8_t *)chunk; + memcpy(dest, from, sizeof(uint32_t)); + memcpy(dest+4, from, sizeof(uint32_t)); } static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { -#if defined(UNALIGNED64_OK) - *chunk = *(uint64_t *)from; -#elif defined(UNALIGNED_OK) - uint32_t* p = (uint32_t *)from; - chunk->u32[0] = p[0]; - chunk->u32[1] = p[1]; -#else - memcpy(chunk, from, sizeof(chunk_t)); -#endif + memcpy(chunk, from, sizeof(uint64_t)); } static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { - chunkmemset_8((uint8_t *)s, chunk); + memcpy(chunk, (uint8_t *)s, sizeof(uint64_t)); } static inline void storechunk(uint8_t *out, chunk_t *chunk) { -#if defined(UNALIGNED64_OK) - *(uint64_t *)out = *chunk; -#elif defined(UNALIGNED_OK) - ((uint32_t *)out)[0] = chunk->u32[0]; - ((uint32_t *)out)[1] = chunk->u32[1]; -#else - memcpy(out, chunk, sizeof(chunk_t)); -#endif + memcpy(out, chunk, sizeof(uint64_t)); } #define CHUNKSIZE chunksize_c @@ -290,13 +290,11 @@ typedef enum { * IN assertion: there is enough room in pending_buf. */ static inline void put_short(deflate_state *s, uint16_t w) { -#if defined(UNALIGNED_OK) - *(uint16_t *)(&s->pending_buf[s->pending]) = w; - s->pending += 2; -#else - put_byte(s, (w & 0xff)); - put_byte(s, ((w >> 8) & 0xff)); +#if BYTE_ORDER == BIG_ENDIAN + w = ZSWAP16(w); #endif + memcpy(&s->pending_buf[s->pending], &w, sizeof(w)); + s->pending += 2; } /* =========================================================================== @@ -304,8 +302,11 @@ static inline void put_short(deflate_state *s, uint16_t w) { * IN assertion: there is enough room in pending_buf. */ static inline void put_short_msb(deflate_state *s, uint16_t w) { - put_byte(s, ((w >> 8) & 0xff)); - put_byte(s, (w & 0xff)); +#if BYTE_ORDER == LITTLE_ENDIAN + w = ZSWAP16(w); +#endif + memcpy(&s->pending_buf[s->pending], &w, sizeof(w)); + s->pending += 2; } /* =========================================================================== @@ -313,15 +314,11 @@ static inline void put_short_msb(deflate_state *s, uint16_t w) { * IN assertion: there is enough room in pending_buf. */ static inline void put_uint32(deflate_state *s, uint32_t dw) { -#if defined(UNALIGNED_OK) - *(uint32_t *)(&s->pending_buf[s->pending]) = dw; - s->pending += 4; -#else - put_byte(s, (dw & 0xff)); - put_byte(s, ((dw >> 8) & 0xff)); - put_byte(s, ((dw >> 16) & 0xff)); - put_byte(s, ((dw >> 24) & 0xff)); +#if BYTE_ORDER == BIG_ENDIAN + dw = ZSWAP32(dw); #endif + memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw)); + s->pending += 4; } /* =========================================================================== @@ -329,15 +326,11 @@ static inline void put_uint32(deflate_state *s, uint32_t dw) { * IN assertion: there is enough room in pending_buf. */ static inline void put_uint32_msb(deflate_state *s, uint32_t dw) { -#if defined(UNALIGNED_OK) - *(uint32_t *)(&s->pending_buf[s->pending]) = ZSWAP32(dw); - s->pending += 4; -#else - put_byte(s, ((dw >> 24) & 0xff)); - put_byte(s, ((dw >> 16) & 0xff)); - put_byte(s, ((dw >> 8) & 0xff)); - put_byte(s, (dw & 0xff)); +#if BYTE_ORDER == LITTLE_ENDIAN + dw = ZSWAP32(dw); #endif + memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw)); + s->pending += 4; } /* =========================================================================== @@ -345,24 +338,11 @@ static inline void put_uint32_msb(deflate_state *s, uint32_t dw) { * IN assertion: there is enough room in pending_buf. */ static inline void put_uint64(deflate_state *s, uint64_t lld) { -#if defined(UNALIGNED64_OK) - *(uint64_t *)(&s->pending_buf[s->pending]) = lld; - s->pending += 8; -#elif defined(UNALIGNED_OK) - *(uint32_t *)(&s->pending_buf[s->pending]) = lld & 0xffffffff; - s->pending += 4; - *(uint32_t *)(&s->pending_buf[s->pending]) = (lld >> 32) & 0xffffffff; - s->pending += 4; -#else - put_byte(s, (lld & 0xff)); - put_byte(s, ((lld >> 8) & 0xff)); - put_byte(s, ((lld >> 16) & 0xff)); - put_byte(s, ((lld >> 24) & 0xff)); - put_byte(s, ((lld >> 32) & 0xff)); - put_byte(s, ((lld >> 40) & 0xff)); - put_byte(s, ((lld >> 48) & 0xff)); - put_byte(s, ((lld >> 56) & 0xff)); +#if BYTE_ORDER == BIG_ENDIAN + lld = ZSWAP64(lld); #endif + memcpy(&s->pending_buf[s->pending], &lld, sizeof(lld)); + s->pending += 8; } #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) |