summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/armfeature.c3
-rw-r--r--arch/arm/chunkset_neon.c11
-rw-r--r--arch/arm/crc32_acle.c34
-rw-r--r--arch/s390/dfltcc_deflate.c10
-rw-r--r--arch/s390/dfltcc_detail.h176
-rw-r--r--arch/s390/dfltcc_inflate.c8
-rw-r--r--arch/s390/self-hosted-builder/actions-runner.Dockerfile2
-rw-r--r--arch/x86/chunkset_avx.c17
-rw-r--r--arch/x86/chunkset_sse.c17
9 files changed, 152 insertions, 126 deletions
diff --git a/arch/arm/armfeature.c b/arch/arm/armfeature.c
index bef9b29..978c987 100644
--- a/arch/arm/armfeature.c
+++ b/arch/arm/armfeature.c
@@ -11,6 +11,9 @@
# define ID_AA64ISAR0_CRC32_VAL ID_AA64ISAR0_CRC32
# endif
#elif defined(__APPLE__)
+# if !defined(_DARWIN_C_SOURCE)
+# define _DARWIN_C_SOURCE /* enable types aliases (eg u_int) */
+# endif
# include <sys/sysctl.h>
#elif defined(_WIN32)
# include <winapifamily.h>
diff --git a/arch/arm/chunkset_neon.c b/arch/arm/chunkset_neon.c
index e0ad3e0..51dcf09 100644
--- a/arch/arm/chunkset_neon.c
+++ b/arch/arm/chunkset_neon.c
@@ -15,30 +15,25 @@ typedef uint8x16_t chunk_t;
#define CHUNK_SIZE 16
-#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_2
#define HAVE_CHUNKMEMSET_4
#define HAVE_CHUNKMEMSET_8
-static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
- *chunk = vld1q_dup_u8(from);
-}
-
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
uint16_t tmp;
- memcpy(&tmp, from, 2);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
uint32_t tmp;
- memcpy(&tmp, from, 4);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
uint64_t tmp;
- memcpy(&tmp, from, 8);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp));
}
diff --git a/arch/arm/crc32_acle.c b/arch/arm/crc32_acle.c
index 99013e1..0bcd3cf 100644
--- a/arch/arm/crc32_acle.c
+++ b/arch/arm/crc32_acle.c
@@ -62,7 +62,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
len--;
}
- if ((len > sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) {
+ if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) {
buf2 = (const uint16_t *) buf;
c = __crc32h(c, *buf2++);
len -= sizeof(uint16_t);
@@ -72,22 +72,17 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
}
#if defined(__aarch64__)
- if ((len > sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) {
+ if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) {
c = __crc32w(c, *buf4++);
len -= sizeof(uint32_t);
}
- const uint64_t *buf8 = (const uint64_t *) buf4;
-
-#ifdef UNROLL_MORE
- while (len >= 4 * sizeof(uint64_t)) {
- c = __crc32d(c, *buf8++);
- c = __crc32d(c, *buf8++);
- c = __crc32d(c, *buf8++);
- c = __crc32d(c, *buf8++);
- len -= 4 * sizeof(uint64_t);
+ if (len == 0) {
+ c = ~c;
+ return c;
}
-#endif
+
+ const uint64_t *buf8 = (const uint64_t *) buf4;
while (len >= sizeof(uint64_t)) {
c = __crc32d(c, *buf8++);
@@ -111,19 +106,10 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
buf = (const unsigned char *) buf2;
#else /* __aarch64__ */
-# ifdef UNROLL_MORE
- while (len >= 8 * sizeof(uint32_t)) {
- c = __crc32w(c, *buf4++);
- c = __crc32w(c, *buf4++);
- c = __crc32w(c, *buf4++);
- c = __crc32w(c, *buf4++);
- c = __crc32w(c, *buf4++);
- c = __crc32w(c, *buf4++);
- c = __crc32w(c, *buf4++);
- c = __crc32w(c, *buf4++);
- len -= 8 * sizeof(uint32_t);
+ if (len == 0) {
+ c = ~c;
+ return c;
}
-# endif
while (len >= sizeof(uint32_t)) {
c = __crc32w(c, *buf4++);
diff --git a/arch/s390/dfltcc_deflate.c b/arch/s390/dfltcc_deflate.c
index e3b53ee..9ecc6ba 100644
--- a/arch/s390/dfltcc_deflate.c
+++ b/arch/s390/dfltcc_deflate.c
@@ -210,7 +210,10 @@ again:
*strm->next_out = (unsigned char)state->bi_buf;
/* Honor history and check value */
param->nt = 0;
- param->cv = state->wrap == 2 ? ZSWAP32(strm->adler) : strm->adler;
+ if (state->wrap == 1)
+ param->cv = strm->adler;
+ else if (state->wrap == 2)
+ param->cv = ZSWAP32(strm->adler);
/* When opening a block, choose a Huffman-Table Type */
if (!param->bcf) {
@@ -241,7 +244,10 @@ again:
state->bi_buf = 0; /* Avoid accessing next_out */
else
state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1);
- strm->adler = state->wrap == 2 ? ZSWAP32(param->cv) : param->cv;
+ if (state->wrap == 1)
+ strm->adler = param->cv;
+ else if (state->wrap == 2)
+ strm->adler = ZSWAP32(param->cv);
/* Unmask the input data */
strm->avail_in += masked_avail_in;
diff --git a/arch/s390/dfltcc_detail.h b/arch/s390/dfltcc_detail.h
index 4ec03f8..411e9f6 100644
--- a/arch/s390/dfltcc_detail.h
+++ b/arch/s390/dfltcc_detail.h
@@ -26,74 +26,6 @@
#endif
/*
- C wrapper for the DEFLATE CONVERSION CALL instruction.
- */
-typedef enum {
- DFLTCC_CC_OK = 0,
- DFLTCC_CC_OP1_TOO_SHORT = 1,
- DFLTCC_CC_OP2_TOO_SHORT = 2,
- DFLTCC_CC_OP2_CORRUPT = 2,
- DFLTCC_CC_AGAIN = 3,
-} dfltcc_cc;
-
-#define DFLTCC_QAF 0
-#define DFLTCC_GDHT 1
-#define DFLTCC_CMPR 2
-#define DFLTCC_XPND 4
-#define HBT_CIRCULAR (1 << 7)
-#define HB_BITS 15
-#define HB_SIZE (1 << HB_BITS)
-#define DFLTCC_FACILITY 151
-
-static inline dfltcc_cc dfltcc(int fn, void *param,
- unsigned char **op1, size_t *len1, z_const unsigned char **op2, size_t *len2, void *hist) {
- unsigned char *t2 = op1 ? *op1 : NULL;
- size_t t3 = len1 ? *len1 : 0;
- z_const unsigned char *t4 = op2 ? *op2 : NULL;
- size_t t5 = len2 ? *len2 : 0;
- Z_REGISTER int r0 __asm__("r0") = fn;
- Z_REGISTER void *r1 __asm__("r1") = param;
- Z_REGISTER unsigned char *r2 __asm__("r2") = t2;
- Z_REGISTER size_t r3 __asm__("r3") = t3;
- Z_REGISTER z_const unsigned char *r4 __asm__("r4") = t4;
- Z_REGISTER size_t r5 __asm__("r5") = t5;
- int cc;
-
- __asm__ volatile(
-#ifdef HAVE_SYS_SDT_H
- STAP_PROBE_ASM(zlib, dfltcc_entry, STAP_PROBE_ASM_TEMPLATE(5))
-#endif
- ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n"
-#ifdef HAVE_SYS_SDT_H
- STAP_PROBE_ASM(zlib, dfltcc_exit, STAP_PROBE_ASM_TEMPLATE(5))
-#endif
- "ipm %[cc]\n"
- : [r2] "+r" (r2)
- , [r3] "+r" (r3)
- , [r4] "+r" (r4)
- , [r5] "+r" (r5)
- , [cc] "=r" (cc)
- : [r0] "r" (r0)
- , [r1] "r" (r1)
- , [hist] "r" (hist)
-#ifdef HAVE_SYS_SDT_H
- , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist)
-#endif
- : "cc", "memory");
- t2 = r2; t3 = r3; t4 = r4; t5 = r5;
-
- if (op1)
- *op1 = t2;
- if (len1)
- *len1 = t3;
- if (op2)
- *op2 = t4;
- if (len2)
- *len2 = t5;
- return (cc >> 28) & 3;
-}
-
-/*
Parameter Block for Query Available Functions.
*/
#define static_assert(c, msg) __attribute__((unused)) static char static_assert_failed_ ## msg[c ? 1 : -1]
@@ -105,7 +37,8 @@ struct dfltcc_qaf_param {
char reserved2[6];
};
-static_assert(sizeof(struct dfltcc_qaf_param) == 32, sizeof_struct_dfltcc_qaf_param_is_32);
+#define DFLTCC_SIZEOF_QAF 32
+static_assert(sizeof(struct dfltcc_qaf_param) == DFLTCC_SIZEOF_QAF, qaf);
static inline int is_bit_set(const char *bits, int n) {
return bits[n / 8] & (1 << (7 - (n % 8)));
@@ -115,6 +48,8 @@ static inline void clear_bit(char *bits, int n) {
bits[n / 8] &= ~(1 << (7 - (n % 8)));
}
+#define DFLTCC_FACILITY 151
+
#define DFLTCC_FMT0 0
/*
@@ -165,12 +100,16 @@ struct dfltcc_param_v0 {
uint16_t cdhtl : 12; /* Compressed-Dynamic-Huffman Table
Length */
uint8_t reserved464[6];
- uint8_t cdht[288];
- uint8_t reserved[32];
- uint8_t csb[1152];
+ uint8_t cdht[288]; /* Compressed-Dynamic-Huffman Table */
+ uint8_t reserved[24];
+ uint8_t ribm2[8]; /* Reserved for IBM use */
+ uint8_t csb[1152]; /* Continuation-State Buffer */
};
-static_assert(sizeof(struct dfltcc_param_v0) == 1536, sizeof_struct_dfltcc_param_v0_is_1536);
+#define DFLTCC_SIZEOF_GDHT_V0 384
+#define DFLTCC_SIZEOF_CMPR_XPND_V0 1536
+static_assert(offsetof(struct dfltcc_param_v0, csb) == DFLTCC_SIZEOF_GDHT_V0, gdht_v0);
+static_assert(sizeof(struct dfltcc_param_v0) == DFLTCC_SIZEOF_CMPR_XPND_V0, cmpr_xpnd_v0);
static inline z_const char *oesc_msg(char *buf, int oesc) {
if (oesc == 0x00)
@@ -182,6 +121,97 @@ static inline z_const char *oesc_msg(char *buf, int oesc) {
}
/*
+ C wrapper for the DEFLATE CONVERSION CALL instruction.
+ */
+typedef enum {
+ DFLTCC_CC_OK = 0,
+ DFLTCC_CC_OP1_TOO_SHORT = 1,
+ DFLTCC_CC_OP2_TOO_SHORT = 2,
+ DFLTCC_CC_OP2_CORRUPT = 2,
+ DFLTCC_CC_AGAIN = 3,
+} dfltcc_cc;
+
+#define DFLTCC_QAF 0
+#define DFLTCC_GDHT 1
+#define DFLTCC_CMPR 2
+#define DFLTCC_XPND 4
+#define HBT_CIRCULAR (1 << 7)
+#define DFLTCC_FN_MASK ((1 << 7) - 1)
+#define HB_BITS 15
+#define HB_SIZE (1 << HB_BITS)
+
+static inline dfltcc_cc dfltcc(int fn, void *param,
+ unsigned char **op1, size_t *len1,
+ z_const unsigned char **op2, size_t *len2, void *hist) {
+ unsigned char *t2 = op1 ? *op1 : NULL;
+#ifdef Z_MEMORY_SANITIZER
+ unsigned char *orig_t2 = t2;
+#endif
+ size_t t3 = len1 ? *len1 : 0;
+ z_const unsigned char *t4 = op2 ? *op2 : NULL;
+ size_t t5 = len2 ? *len2 : 0;
+ Z_REGISTER int r0 __asm__("r0") = fn;
+ Z_REGISTER void *r1 __asm__("r1") = param;
+ Z_REGISTER unsigned char *r2 __asm__("r2") = t2;
+ Z_REGISTER size_t r3 __asm__("r3") = t3;
+ Z_REGISTER z_const unsigned char *r4 __asm__("r4") = t4;
+ Z_REGISTER size_t r5 __asm__("r5") = t5;
+ int cc;
+
+ __asm__ volatile(
+#ifdef HAVE_SYS_SDT_H
+ STAP_PROBE_ASM(zlib, dfltcc_entry, STAP_PROBE_ASM_TEMPLATE(5))
+#endif
+ ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n"
+#ifdef HAVE_SYS_SDT_H
+ STAP_PROBE_ASM(zlib, dfltcc_exit, STAP_PROBE_ASM_TEMPLATE(5))
+#endif
+ "ipm %[cc]\n"
+ : [r2] "+r" (r2)
+ , [r3] "+r" (r3)
+ , [r4] "+r" (r4)
+ , [r5] "+r" (r5)
+ , [cc] "=r" (cc)
+ : [r0] "r" (r0)
+ , [r1] "r" (r1)
+ , [hist] "r" (hist)
+#ifdef HAVE_SYS_SDT_H
+ , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist)
+#endif
+ : "cc", "memory");
+ t2 = r2; t3 = r3; t4 = r4; t5 = r5;
+
+#ifdef Z_MEMORY_SANITIZER
+ switch (fn & DFLTCC_FN_MASK) {
+ case DFLTCC_QAF:
+ __msan_unpoison(param, DFLTCC_SIZEOF_QAF);
+ break;
+ case DFLTCC_GDHT:
+ __msan_unpoison(param, DFLTCC_SIZEOF_GDHT_V0);
+ break;
+ case DFLTCC_CMPR:
+ __msan_unpoison(param, DFLTCC_SIZEOF_CMPR_XPND_V0);
+ __msan_unpoison(orig_t2, t2 - orig_t2 + (((struct dfltcc_param_v0 *)param)->sbb == 0 ? 0 : 1));
+ break;
+ case DFLTCC_XPND:
+ __msan_unpoison(param, DFLTCC_SIZEOF_CMPR_XPND_V0);
+ __msan_unpoison(orig_t2, t2 - orig_t2);
+ break;
+ }
+#endif
+
+ if (op1)
+ *op1 = t2;
+ if (len1)
+ *len1 = t3;
+ if (op2)
+ *op2 = t4;
+ if (len2)
+ *len2 = t5;
+ return (cc >> 28) & 3;
+}
+
+/*
Extension of inflate_state and deflate_state. Must be doubleword-aligned.
*/
struct dfltcc_state {
diff --git a/arch/s390/dfltcc_inflate.c b/arch/s390/dfltcc_inflate.c
index 2535064..801e547 100644
--- a/arch/s390/dfltcc_inflate.c
+++ b/arch/s390/dfltcc_inflate.c
@@ -81,13 +81,14 @@ dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush
}
/* Translate stream to parameter block */
- param->cvt = state->flags ? CVT_CRC32 : CVT_ADLER32;
+ param->cvt = ((state->wrap & 4) && state->flags) ? CVT_CRC32 : CVT_ADLER32;
param->sbb = state->bits;
param->hl = state->whave; /* Software and hardware history formats match */
param->ho = (state->wnext - state->whave) & ((1 << HB_BITS) - 1);
if (param->hl)
param->nt = 0; /* Honor history for the first block */
- param->cv = state->flags ? ZSWAP32(state->check) : state->check;
+ if (state->wrap & 4)
+ param->cv = state->flags ? ZSWAP32(state->check) : state->check;
/* Inflate */
do {
@@ -100,7 +101,8 @@ dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush
state->bits = param->sbb;
state->whave = param->hl;
state->wnext = (param->ho + param->hl) & ((1 << HB_BITS) - 1);
- state->check = state->flags ? ZSWAP32(param->cv) : param->cv;
+ if (state->wrap & 4)
+ strm->adler = state->check = state->flags ? ZSWAP32(param->cv) : param->cv;
if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) {
/* Report an error if stream is corrupted */
state->mode = BAD;
diff --git a/arch/s390/self-hosted-builder/actions-runner.Dockerfile b/arch/s390/self-hosted-builder/actions-runner.Dockerfile
index a4bb774..a55a74d 100644
--- a/arch/s390/self-hosted-builder/actions-runner.Dockerfile
+++ b/arch/s390/self-hosted-builder/actions-runner.Dockerfile
@@ -11,11 +11,13 @@ FROM s390x/ubuntu:20.04
# Packages for zlib-ng testing.
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get -y install \
+ clang-11 \
cmake \
curl \
gcc \
git \
jq \
+ llvm-11-tools \
ninja-build \
python-is-python3 \
python3 \
diff --git a/arch/x86/chunkset_avx.c b/arch/x86/chunkset_avx.c
index 7a9a56a..398d192 100644
--- a/arch/x86/chunkset_avx.c
+++ b/arch/x86/chunkset_avx.c
@@ -11,25 +11,26 @@ typedef __m256i chunk_t;
#define CHUNK_SIZE 32
-#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_2
#define HAVE_CHUNKMEMSET_4
#define HAVE_CHUNKMEMSET_8
-static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
- *chunk = _mm256_set1_epi8(*(int8_t *)from);
-}
-
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
- *chunk = _mm256_set1_epi16(*(int16_t *)from);
+ int16_t tmp;
+ memcpy(&tmp, from, sizeof(tmp));
+ *chunk = _mm256_set1_epi16(tmp);
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
- *chunk = _mm256_set1_epi32(*(int32_t *)from);
+ int32_t tmp;
+ memcpy(&tmp, from, sizeof(tmp));
+ *chunk = _mm256_set1_epi32(tmp);
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
- *chunk = _mm256_set1_epi64x(*(int64_t *)from);
+ int64_t tmp;
+ memcpy(&tmp, from, sizeof(tmp));
+ *chunk = _mm256_set1_epi64x(tmp);
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
diff --git a/arch/x86/chunkset_sse.c b/arch/x86/chunkset_sse.c
index d38e99d..6b43d4a 100644
--- a/arch/x86/chunkset_sse.c
+++ b/arch/x86/chunkset_sse.c
@@ -12,25 +12,26 @@ typedef __m128i chunk_t;
#define CHUNK_SIZE 16
-#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_2
#define HAVE_CHUNKMEMSET_4
#define HAVE_CHUNKMEMSET_8
-static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
- *chunk = _mm_set1_epi8(*(int8_t *)from);
-}
-
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
- *chunk = _mm_set1_epi16(*(int16_t *)from);
+ int16_t tmp;
+ memcpy(&tmp, from, sizeof(tmp));
+ *chunk = _mm_set1_epi16(tmp);
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
- *chunk = _mm_set1_epi32(*(int32_t *)from);
+ int32_t tmp;
+ memcpy(&tmp, from, sizeof(tmp));
+ *chunk = _mm_set1_epi32(tmp);
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
- *chunk = _mm_set1_epi64x(*(int64_t *)from);
+ int64_t tmp;
+ memcpy(&tmp, from, sizeof(tmp));
+ *chunk = _mm_set1_epi64x(tmp);
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {