summaryrefslogtreecommitdiff
path: root/deflate.h
diff options
context:
space:
mode:
authorSebastian Pop <s.pop@samsung.com>2018-12-07 11:58:18 -0600
committerHans Kristian Rosbach <hk-github@circlestorm.org>2018-12-18 13:41:39 +0100
commit7a4df36aad308cc50b60bf64d0a8acc036ed8b08 (patch)
tree8bf4da6948c5b6d7d72097a92dbb604089b285a0 /deflate.h
parentdd70d6c467f26d727ea730ba577809fa0668940d (diff)
remove `unaligned store` UBsan warnings
This patch addresses several warnings from `make test` when zlib-ng was configured -with-fuzzers -with-sanitizers: zlib-ng/trees.c:798:5: runtime error: store to misaligned address 0x63100125c801 for type 'uint16_t' (aka 'unsigned short'), which requires 2 byte alignment 0x63100125c801: note: pointer points here 00 80 76 01 8b 08 00 00 00 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ^ SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior zlib-ng/trees.c:798:5 in zlib-ng/trees.c:799:5: runtime error: store to misaligned address 0x63100125c803 for type 'uint16_t' (aka 'unsigned short'), which requires 2 byte alignment 0x63100125c803: note: pointer points here 76 01 f5 08 00 00 00 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ^ Instead of using `*(uint16_t*) foo = bar` to write a uint16_t, call __builtin_memcpy which will be safe in case of memory page boundaries. Without the patch: Performance counter stats for './minigzip -9 llvm.tar': 13173.840115 task-clock (msec) # 1.000 CPUs utilized 27 context-switches # 0.002 K/sec 0 cpu-migrations # 0.000 K/sec 129 page-faults # 0.010 K/sec 57,801,072,298 cycles # 4.388 GHz <not supported> stalled-cycles-frontend <not supported> stalled-cycles-backend 75,270,723,557 instructions # 1.30 insns per cycle 17,797,368,302 branches # 1350.963 M/sec 196,795,107 branch-misses # 1.11% of all branches 13.177897531 seconds time elapsed 45408 -rw-rw-r-- 1 spop spop 46493896 Dec 11 14:45 llvm.tar.gz With remove-unaligned-stores patch: 13184.736536 task-clock (msec) # 1.000 CPUs utilized 44 context-switches # 0.003 K/sec 1 cpu-migrations # 0.000 K/sec 129 page-faults # 0.010 K/sec 57,882,724,316 cycles # 4.390 GHz <not supported> stalled-cycles-frontend <not supported> stalled-cycles-backend 75,235,920,853 instructions # 1.30 insns per cycle 17,826,873,999 branches # 1352.084 M/sec 196,050,096 branch-misses # 1.10% of all branches 13.185868238 seconds time elapsed 45408 -rw-rw-r-- 1 spop spop 46493896 Dec 11 14:46 llvm.tar.gz
Diffstat (limited to 'deflate.h')
-rw-r--r--deflate.h26
1 files changed, 7 insertions, 19 deletions
diff --git a/deflate.h b/deflate.h
index 02d8f5a..6cba8dd 100644
--- a/deflate.h
+++ b/deflate.h
@@ -13,6 +13,7 @@
/* @(#) $Id$ */
#include "zutil.h"
+#include "gzendian.h"
/* define NO_GZIP when compiling if you want to disable gzip header and
trailer creation by deflate(). NO_GZIP would be used to avoid linking in
@@ -302,26 +303,13 @@ typedef enum {
* Output a short LSB first on the stream.
* IN assertion: there is enough room in pendingBuf.
*/
-#ifdef UNALIGNED_OK
-/* Compared to the else-clause's implementation, there are few advantages:
- * - s->pending is loaded only once (else-clause's implementation needs to
- * load s->pending twice due to the alias between s->pending and
- * s->pending_buf[].
- * - no instructions for extracting bytes from short.
- * - needs less registers
- * - stores to adjacent bytes are merged into a single store, albeit at the
- * cost of penalty of potentially unaligned access.
- */
-#define put_short(s, w) { \
- *(uint16_t*)(&s->pending_buf[s->pending]) = (w) ; \
- s->pending += 2; \
-}
-#else
-#define put_short(s, w) { \
- put_byte(s, (unsigned char)((w) & 0xff)); \
- put_byte(s, (unsigned char)((uint16_t)(w) >> 8)); \
-}
+static inline void put_short(deflate_state *s, uint16_t w) {
+#if BYTE_ORDER == BIG_ENDIAN
+ w = ZSWAP16(w);
#endif
+ MEMCPY(&(s->pending_buf[s->pending]), &w, sizeof(uint16_t));
+ s->pending += 2;
+}
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
/* Minimum amount of lookahead, except at the end of the input file.