summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Moinvaziri <nathan@nathanm.com>2022-06-29 08:57:11 -0700
committerHans Kristian Rosbach <hk-github@circlestorm.org>2023-03-17 21:27:56 +0100
commitb4866c447e776ecd8306284e69f1ccc737314b46 (patch)
treeca580d50ad93da8d41182d8c96fd4123ae08be2e
parentd7eb21fc172d4600fc419d37ebc01c75e41f5005 (diff)
Don't use unaligned access for memcpy instructions due to GCC 11 assuming it is aligned in certain instances.
Backport note: Removed a lot of the original commit due to merge conflicts
-rw-r--r--arch/arm/chunkset_neon.c6
-rw-r--r--chunkset.c36
-rw-r--r--deflate.h62
3 files changed, 30 insertions, 74 deletions
diff --git a/arch/arm/chunkset_neon.c b/arch/arm/chunkset_neon.c
index d5deed6..51dcf09 100644
--- a/arch/arm/chunkset_neon.c
+++ b/arch/arm/chunkset_neon.c
@@ -21,19 +21,19 @@ typedef uint8x16_t chunk_t;
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
uint16_t tmp;
- memcpy(&tmp, from, 2);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
uint32_t tmp;
- memcpy(&tmp, from, 4);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
uint64_t tmp;
- memcpy(&tmp, from, 8);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp));
}
diff --git a/chunkset.c b/chunkset.c
index 765082c..59e30fc 100644
--- a/chunkset.c
+++ b/chunkset.c
@@ -20,45 +20,21 @@ typedef struct chunk_t { uint8_t u8[8]; } chunk_t;
#define HAVE_CHUNKMEMSET_8
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
-#if defined(UNALIGNED64_OK)
- uint32_t half_chunk;
- half_chunk = *(uint32_t *)from;
- *chunk = 0x0000000100000001 * (uint64_t)half_chunk;
-#elif defined(UNALIGNED_OK)
- chunk->u32[0] = *(uint32_t *)from;
- chunk->u32[1] = chunk->u32[0];
-#else
- uint8_t *chunkptr = (uint8_t *)chunk;
- memcpy(chunkptr, from, 4);
- memcpy(chunkptr+4, from, 4);
-#endif
+ uint8_t *dest = (uint8_t *)chunk;
+ memcpy(dest, from, sizeof(uint32_t));
+ memcpy(dest+4, from, sizeof(uint32_t));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
-#if defined(UNALIGNED64_OK)
- *chunk = *(uint64_t *)from;
-#elif defined(UNALIGNED_OK)
- uint32_t* p = (uint32_t *)from;
- chunk->u32[0] = p[0];
- chunk->u32[1] = p[1];
-#else
- memcpy(chunk, from, sizeof(chunk_t));
-#endif
+ memcpy(chunk, from, sizeof(uint64_t));
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
- chunkmemset_8((uint8_t *)s, chunk);
+ memcpy(chunk, (uint8_t *)s, sizeof(uint64_t));
}
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
-#if defined(UNALIGNED64_OK)
- *(uint64_t *)out = *chunk;
-#elif defined(UNALIGNED_OK)
- ((uint32_t *)out)[0] = chunk->u32[0];
- ((uint32_t *)out)[1] = chunk->u32[1];
-#else
- memcpy(out, chunk, sizeof(chunk_t));
-#endif
+ memcpy(out, chunk, sizeof(uint64_t));
}
#define CHUNKSIZE chunksize_c
diff --git a/deflate.h b/deflate.h
index 525b3cd..3ae6c1b 100644
--- a/deflate.h
+++ b/deflate.h
@@ -290,13 +290,11 @@ typedef enum {
* IN assertion: there is enough room in pending_buf.
*/
static inline void put_short(deflate_state *s, uint16_t w) {
-#if defined(UNALIGNED_OK)
- *(uint16_t *)(&s->pending_buf[s->pending]) = w;
- s->pending += 2;
-#else
- put_byte(s, (w & 0xff));
- put_byte(s, ((w >> 8) & 0xff));
+#if BYTE_ORDER == BIG_ENDIAN
+ w = ZSWAP16(w);
#endif
+ memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
+ s->pending += 2;
}
/* ===========================================================================
@@ -304,8 +302,11 @@ static inline void put_short(deflate_state *s, uint16_t w) {
* IN assertion: there is enough room in pending_buf.
*/
static inline void put_short_msb(deflate_state *s, uint16_t w) {
- put_byte(s, ((w >> 8) & 0xff));
- put_byte(s, (w & 0xff));
+#if BYTE_ORDER == LITTLE_ENDIAN
+ w = ZSWAP16(w);
+#endif
+ memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
+ s->pending += 2;
}
/* ===========================================================================
@@ -313,15 +314,11 @@ static inline void put_short_msb(deflate_state *s, uint16_t w) {
* IN assertion: there is enough room in pending_buf.
*/
static inline void put_uint32(deflate_state *s, uint32_t dw) {
-#if defined(UNALIGNED_OK)
- *(uint32_t *)(&s->pending_buf[s->pending]) = dw;
- s->pending += 4;
-#else
- put_byte(s, (dw & 0xff));
- put_byte(s, ((dw >> 8) & 0xff));
- put_byte(s, ((dw >> 16) & 0xff));
- put_byte(s, ((dw >> 24) & 0xff));
+#if BYTE_ORDER == BIG_ENDIAN
+ dw = ZSWAP32(dw);
#endif
+ memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
+ s->pending += 4;
}
/* ===========================================================================
@@ -329,15 +326,11 @@ static inline void put_uint32(deflate_state *s, uint32_t dw) {
* IN assertion: there is enough room in pending_buf.
*/
static inline void put_uint32_msb(deflate_state *s, uint32_t dw) {
-#if defined(UNALIGNED_OK)
- *(uint32_t *)(&s->pending_buf[s->pending]) = ZSWAP32(dw);
- s->pending += 4;
-#else
- put_byte(s, ((dw >> 24) & 0xff));
- put_byte(s, ((dw >> 16) & 0xff));
- put_byte(s, ((dw >> 8) & 0xff));
- put_byte(s, (dw & 0xff));
+#if BYTE_ORDER == LITTLE_ENDIAN
+ dw = ZSWAP32(dw);
#endif
+ memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
+ s->pending += 4;
}
/* ===========================================================================
@@ -345,24 +338,11 @@ static inline void put_uint32_msb(deflate_state *s, uint32_t dw) {
* IN assertion: there is enough room in pending_buf.
*/
static inline void put_uint64(deflate_state *s, uint64_t lld) {
-#if defined(UNALIGNED64_OK)
- *(uint64_t *)(&s->pending_buf[s->pending]) = lld;
- s->pending += 8;
-#elif defined(UNALIGNED_OK)
- *(uint32_t *)(&s->pending_buf[s->pending]) = lld & 0xffffffff;
- s->pending += 4;
- *(uint32_t *)(&s->pending_buf[s->pending]) = (lld >> 32) & 0xffffffff;
- s->pending += 4;
-#else
- put_byte(s, (lld & 0xff));
- put_byte(s, ((lld >> 8) & 0xff));
- put_byte(s, ((lld >> 16) & 0xff));
- put_byte(s, ((lld >> 24) & 0xff));
- put_byte(s, ((lld >> 32) & 0xff));
- put_byte(s, ((lld >> 40) & 0xff));
- put_byte(s, ((lld >> 48) & 0xff));
- put_byte(s, ((lld >> 56) & 0xff));
+#if BYTE_ORDER == BIG_ENDIAN
+ lld = ZSWAP64(lld);
#endif
+ memcpy(&s->pending_buf[s->pending], &lld, sizeof(lld));
+ s->pending += 8;
}
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)