diff options
author | alk3pInjection <webmaster@raspii.tech> | 2023-04-20 00:08:54 +0800 |
---|---|---|
committer | alk3pInjection <webmaster@raspii.tech> | 2023-04-20 00:08:54 +0800 |
commit | 004b98220a30de0d1956a8149d8bc6ec356667da (patch) | |
tree | 1eaee2603984d7ab4524be68b57ce0a2b2b72118 /arch | |
parent | 2ca0d0b38b60e8d6d49a8959bf674a79e7d16f41 (diff) | |
parent | a583e215afa2356e23b418efa871a1cc4348702a (diff) |
Merge tag '2.0.7' into tachibanatachibana-mr1tachibana
Change-Id: I7b03d60d67d184c21ff7437a35062077666951e9
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/armfeature.c | 3 | ||||
-rw-r--r-- | arch/arm/chunkset_neon.c | 11 | ||||
-rw-r--r-- | arch/arm/crc32_acle.c | 34 | ||||
-rw-r--r-- | arch/s390/dfltcc_deflate.c | 10 | ||||
-rw-r--r-- | arch/s390/dfltcc_detail.h | 176 | ||||
-rw-r--r-- | arch/s390/dfltcc_inflate.c | 8 | ||||
-rw-r--r-- | arch/s390/self-hosted-builder/actions-runner.Dockerfile | 2 | ||||
-rw-r--r-- | arch/x86/chunkset_avx.c | 17 | ||||
-rw-r--r-- | arch/x86/chunkset_sse.c | 17 |
9 files changed, 152 insertions, 126 deletions
diff --git a/arch/arm/armfeature.c b/arch/arm/armfeature.c index bef9b29..978c987 100644 --- a/arch/arm/armfeature.c +++ b/arch/arm/armfeature.c @@ -11,6 +11,9 @@ # define ID_AA64ISAR0_CRC32_VAL ID_AA64ISAR0_CRC32 # endif #elif defined(__APPLE__) +# if !defined(_DARWIN_C_SOURCE) +# define _DARWIN_C_SOURCE /* enable types aliases (eg u_int) */ +# endif # include <sys/sysctl.h> #elif defined(_WIN32) # include <winapifamily.h> diff --git a/arch/arm/chunkset_neon.c b/arch/arm/chunkset_neon.c index e0ad3e0..51dcf09 100644 --- a/arch/arm/chunkset_neon.c +++ b/arch/arm/chunkset_neon.c @@ -15,30 +15,25 @@ typedef uint8x16_t chunk_t; #define CHUNK_SIZE 16 -#define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_2 #define HAVE_CHUNKMEMSET_4 #define HAVE_CHUNKMEMSET_8 -static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) { - *chunk = vld1q_dup_u8(from); -} - static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { uint16_t tmp; - memcpy(&tmp, from, 2); + memcpy(&tmp, from, sizeof(tmp)); *chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp)); } static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { uint32_t tmp; - memcpy(&tmp, from, 4); + memcpy(&tmp, from, sizeof(tmp)); *chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp)); } static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { uint64_t tmp; - memcpy(&tmp, from, 8); + memcpy(&tmp, from, sizeof(tmp)); *chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp)); } diff --git a/arch/arm/crc32_acle.c b/arch/arm/crc32_acle.c index 99013e1..0bcd3cf 100644 --- a/arch/arm/crc32_acle.c +++ b/arch/arm/crc32_acle.c @@ -62,7 +62,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) { len--; } - if ((len > sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) { + if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) { buf2 = (const uint16_t *) buf; c = __crc32h(c, *buf2++); len -= sizeof(uint16_t); @@ -72,22 +72,17 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) { } #if defined(__aarch64__) - if ((len > sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) { + if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) { c = __crc32w(c, *buf4++); len -= sizeof(uint32_t); } - const uint64_t *buf8 = (const uint64_t *) buf4; - -#ifdef UNROLL_MORE - while (len >= 4 * sizeof(uint64_t)) { - c = __crc32d(c, *buf8++); - c = __crc32d(c, *buf8++); - c = __crc32d(c, *buf8++); - c = __crc32d(c, *buf8++); - len -= 4 * sizeof(uint64_t); + if (len == 0) { + c = ~c; + return c; } -#endif + + const uint64_t *buf8 = (const uint64_t *) buf4; while (len >= sizeof(uint64_t)) { c = __crc32d(c, *buf8++); @@ -111,19 +106,10 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) { buf = (const unsigned char *) buf2; #else /* __aarch64__ */ -# ifdef UNROLL_MORE - while (len >= 8 * sizeof(uint32_t)) { - c = __crc32w(c, *buf4++); - c = __crc32w(c, *buf4++); - c = __crc32w(c, *buf4++); - c = __crc32w(c, *buf4++); - c = __crc32w(c, *buf4++); - c = __crc32w(c, *buf4++); - c = __crc32w(c, *buf4++); - c = __crc32w(c, *buf4++); - len -= 8 * sizeof(uint32_t); + if (len == 0) { + c = ~c; + return c; } -# endif while (len >= sizeof(uint32_t)) { c = __crc32w(c, *buf4++); diff --git a/arch/s390/dfltcc_deflate.c b/arch/s390/dfltcc_deflate.c index e3b53ee..9ecc6ba 100644 --- a/arch/s390/dfltcc_deflate.c +++ b/arch/s390/dfltcc_deflate.c @@ -210,7 +210,10 @@ again: *strm->next_out = (unsigned char)state->bi_buf; /* Honor history and check value */ param->nt = 0; - param->cv = state->wrap == 2 ? ZSWAP32(strm->adler) : strm->adler; + if (state->wrap == 1) + param->cv = strm->adler; + else if (state->wrap == 2) + param->cv = ZSWAP32(strm->adler); /* When opening a block, choose a Huffman-Table Type */ if (!param->bcf) { @@ -241,7 +244,10 @@ again: state->bi_buf = 0; /* Avoid accessing next_out */ else state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1); - strm->adler = state->wrap == 2 ? ZSWAP32(param->cv) : param->cv; + if (state->wrap == 1) + strm->adler = param->cv; + else if (state->wrap == 2) + strm->adler = ZSWAP32(param->cv); /* Unmask the input data */ strm->avail_in += masked_avail_in; diff --git a/arch/s390/dfltcc_detail.h b/arch/s390/dfltcc_detail.h index 4ec03f8..411e9f6 100644 --- a/arch/s390/dfltcc_detail.h +++ b/arch/s390/dfltcc_detail.h @@ -26,74 +26,6 @@ #endif /* - C wrapper for the DEFLATE CONVERSION CALL instruction. - */ -typedef enum { - DFLTCC_CC_OK = 0, - DFLTCC_CC_OP1_TOO_SHORT = 1, - DFLTCC_CC_OP2_TOO_SHORT = 2, - DFLTCC_CC_OP2_CORRUPT = 2, - DFLTCC_CC_AGAIN = 3, -} dfltcc_cc; - -#define DFLTCC_QAF 0 -#define DFLTCC_GDHT 1 -#define DFLTCC_CMPR 2 -#define DFLTCC_XPND 4 -#define HBT_CIRCULAR (1 << 7) -#define HB_BITS 15 -#define HB_SIZE (1 << HB_BITS) -#define DFLTCC_FACILITY 151 - -static inline dfltcc_cc dfltcc(int fn, void *param, - unsigned char **op1, size_t *len1, z_const unsigned char **op2, size_t *len2, void *hist) { - unsigned char *t2 = op1 ? *op1 : NULL; - size_t t3 = len1 ? *len1 : 0; - z_const unsigned char *t4 = op2 ? *op2 : NULL; - size_t t5 = len2 ? *len2 : 0; - Z_REGISTER int r0 __asm__("r0") = fn; - Z_REGISTER void *r1 __asm__("r1") = param; - Z_REGISTER unsigned char *r2 __asm__("r2") = t2; - Z_REGISTER size_t r3 __asm__("r3") = t3; - Z_REGISTER z_const unsigned char *r4 __asm__("r4") = t4; - Z_REGISTER size_t r5 __asm__("r5") = t5; - int cc; - - __asm__ volatile( -#ifdef HAVE_SYS_SDT_H - STAP_PROBE_ASM(zlib, dfltcc_entry, STAP_PROBE_ASM_TEMPLATE(5)) -#endif - ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n" -#ifdef HAVE_SYS_SDT_H - STAP_PROBE_ASM(zlib, dfltcc_exit, STAP_PROBE_ASM_TEMPLATE(5)) -#endif - "ipm %[cc]\n" - : [r2] "+r" (r2) - , [r3] "+r" (r3) - , [r4] "+r" (r4) - , [r5] "+r" (r5) - , [cc] "=r" (cc) - : [r0] "r" (r0) - , [r1] "r" (r1) - , [hist] "r" (hist) -#ifdef HAVE_SYS_SDT_H - , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist) -#endif - : "cc", "memory"); - t2 = r2; t3 = r3; t4 = r4; t5 = r5; - - if (op1) - *op1 = t2; - if (len1) - *len1 = t3; - if (op2) - *op2 = t4; - if (len2) - *len2 = t5; - return (cc >> 28) & 3; -} - -/* Parameter Block for Query Available Functions. */ #define static_assert(c, msg) __attribute__((unused)) static char static_assert_failed_ ## msg[c ? 1 : -1] @@ -105,7 +37,8 @@ struct dfltcc_qaf_param { char reserved2[6]; }; -static_assert(sizeof(struct dfltcc_qaf_param) == 32, sizeof_struct_dfltcc_qaf_param_is_32); +#define DFLTCC_SIZEOF_QAF 32 +static_assert(sizeof(struct dfltcc_qaf_param) == DFLTCC_SIZEOF_QAF, qaf); static inline int is_bit_set(const char *bits, int n) { return bits[n / 8] & (1 << (7 - (n % 8))); @@ -115,6 +48,8 @@ static inline void clear_bit(char *bits, int n) { bits[n / 8] &= ~(1 << (7 - (n % 8))); } +#define DFLTCC_FACILITY 151 + #define DFLTCC_FMT0 0 /* @@ -165,12 +100,16 @@ struct dfltcc_param_v0 { uint16_t cdhtl : 12; /* Compressed-Dynamic-Huffman Table Length */ uint8_t reserved464[6]; - uint8_t cdht[288]; - uint8_t reserved[32]; - uint8_t csb[1152]; + uint8_t cdht[288]; /* Compressed-Dynamic-Huffman Table */ + uint8_t reserved[24]; + uint8_t ribm2[8]; /* Reserved for IBM use */ + uint8_t csb[1152]; /* Continuation-State Buffer */ }; -static_assert(sizeof(struct dfltcc_param_v0) == 1536, sizeof_struct_dfltcc_param_v0_is_1536); +#define DFLTCC_SIZEOF_GDHT_V0 384 +#define DFLTCC_SIZEOF_CMPR_XPND_V0 1536 +static_assert(offsetof(struct dfltcc_param_v0, csb) == DFLTCC_SIZEOF_GDHT_V0, gdht_v0); +static_assert(sizeof(struct dfltcc_param_v0) == DFLTCC_SIZEOF_CMPR_XPND_V0, cmpr_xpnd_v0); static inline z_const char *oesc_msg(char *buf, int oesc) { if (oesc == 0x00) @@ -182,6 +121,97 @@ static inline z_const char *oesc_msg(char *buf, int oesc) { } /* + C wrapper for the DEFLATE CONVERSION CALL instruction. + */ +typedef enum { + DFLTCC_CC_OK = 0, + DFLTCC_CC_OP1_TOO_SHORT = 1, + DFLTCC_CC_OP2_TOO_SHORT = 2, + DFLTCC_CC_OP2_CORRUPT = 2, + DFLTCC_CC_AGAIN = 3, +} dfltcc_cc; + +#define DFLTCC_QAF 0 +#define DFLTCC_GDHT 1 +#define DFLTCC_CMPR 2 +#define DFLTCC_XPND 4 +#define HBT_CIRCULAR (1 << 7) +#define DFLTCC_FN_MASK ((1 << 7) - 1) +#define HB_BITS 15 +#define HB_SIZE (1 << HB_BITS) + +static inline dfltcc_cc dfltcc(int fn, void *param, + unsigned char **op1, size_t *len1, + z_const unsigned char **op2, size_t *len2, void *hist) { + unsigned char *t2 = op1 ? *op1 : NULL; +#ifdef Z_MEMORY_SANITIZER + unsigned char *orig_t2 = t2; +#endif + size_t t3 = len1 ? *len1 : 0; + z_const unsigned char *t4 = op2 ? *op2 : NULL; + size_t t5 = len2 ? *len2 : 0; + Z_REGISTER int r0 __asm__("r0") = fn; + Z_REGISTER void *r1 __asm__("r1") = param; + Z_REGISTER unsigned char *r2 __asm__("r2") = t2; + Z_REGISTER size_t r3 __asm__("r3") = t3; + Z_REGISTER z_const unsigned char *r4 __asm__("r4") = t4; + Z_REGISTER size_t r5 __asm__("r5") = t5; + int cc; + + __asm__ volatile( +#ifdef HAVE_SYS_SDT_H + STAP_PROBE_ASM(zlib, dfltcc_entry, STAP_PROBE_ASM_TEMPLATE(5)) +#endif + ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n" +#ifdef HAVE_SYS_SDT_H + STAP_PROBE_ASM(zlib, dfltcc_exit, STAP_PROBE_ASM_TEMPLATE(5)) +#endif + "ipm %[cc]\n" + : [r2] "+r" (r2) + , [r3] "+r" (r3) + , [r4] "+r" (r4) + , [r5] "+r" (r5) + , [cc] "=r" (cc) + : [r0] "r" (r0) + , [r1] "r" (r1) + , [hist] "r" (hist) +#ifdef HAVE_SYS_SDT_H + , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist) +#endif + : "cc", "memory"); + t2 = r2; t3 = r3; t4 = r4; t5 = r5; + +#ifdef Z_MEMORY_SANITIZER + switch (fn & DFLTCC_FN_MASK) { + case DFLTCC_QAF: + __msan_unpoison(param, DFLTCC_SIZEOF_QAF); + break; + case DFLTCC_GDHT: + __msan_unpoison(param, DFLTCC_SIZEOF_GDHT_V0); + break; + case DFLTCC_CMPR: + __msan_unpoison(param, DFLTCC_SIZEOF_CMPR_XPND_V0); + __msan_unpoison(orig_t2, t2 - orig_t2 + (((struct dfltcc_param_v0 *)param)->sbb == 0 ? 0 : 1)); + break; + case DFLTCC_XPND: + __msan_unpoison(param, DFLTCC_SIZEOF_CMPR_XPND_V0); + __msan_unpoison(orig_t2, t2 - orig_t2); + break; + } +#endif + + if (op1) + *op1 = t2; + if (len1) + *len1 = t3; + if (op2) + *op2 = t4; + if (len2) + *len2 = t5; + return (cc >> 28) & 3; +} + +/* Extension of inflate_state and deflate_state. Must be doubleword-aligned. */ struct dfltcc_state { diff --git a/arch/s390/dfltcc_inflate.c b/arch/s390/dfltcc_inflate.c index 2535064..801e547 100644 --- a/arch/s390/dfltcc_inflate.c +++ b/arch/s390/dfltcc_inflate.c @@ -81,13 +81,14 @@ dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush } /* Translate stream to parameter block */ - param->cvt = state->flags ? CVT_CRC32 : CVT_ADLER32; + param->cvt = ((state->wrap & 4) && state->flags) ? CVT_CRC32 : CVT_ADLER32; param->sbb = state->bits; param->hl = state->whave; /* Software and hardware history formats match */ param->ho = (state->wnext - state->whave) & ((1 << HB_BITS) - 1); if (param->hl) param->nt = 0; /* Honor history for the first block */ - param->cv = state->flags ? ZSWAP32(state->check) : state->check; + if (state->wrap & 4) + param->cv = state->flags ? ZSWAP32(state->check) : state->check; /* Inflate */ do { @@ -100,7 +101,8 @@ dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush state->bits = param->sbb; state->whave = param->hl; state->wnext = (param->ho + param->hl) & ((1 << HB_BITS) - 1); - state->check = state->flags ? ZSWAP32(param->cv) : param->cv; + if (state->wrap & 4) + strm->adler = state->check = state->flags ? ZSWAP32(param->cv) : param->cv; if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { /* Report an error if stream is corrupted */ state->mode = BAD; diff --git a/arch/s390/self-hosted-builder/actions-runner.Dockerfile b/arch/s390/self-hosted-builder/actions-runner.Dockerfile index a4bb774..a55a74d 100644 --- a/arch/s390/self-hosted-builder/actions-runner.Dockerfile +++ b/arch/s390/self-hosted-builder/actions-runner.Dockerfile @@ -11,11 +11,13 @@ FROM s390x/ubuntu:20.04 # Packages for zlib-ng testing. ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get -y install \ + clang-11 \ cmake \ curl \ gcc \ git \ jq \ + llvm-11-tools \ ninja-build \ python-is-python3 \ python3 \ diff --git a/arch/x86/chunkset_avx.c b/arch/x86/chunkset_avx.c index 7a9a56a..398d192 100644 --- a/arch/x86/chunkset_avx.c +++ b/arch/x86/chunkset_avx.c @@ -11,25 +11,26 @@ typedef __m256i chunk_t; #define CHUNK_SIZE 32 -#define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_2 #define HAVE_CHUNKMEMSET_4 #define HAVE_CHUNKMEMSET_8 -static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) { - *chunk = _mm256_set1_epi8(*(int8_t *)from); -} - static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { - *chunk = _mm256_set1_epi16(*(int16_t *)from); + int16_t tmp; + memcpy(&tmp, from, sizeof(tmp)); + *chunk = _mm256_set1_epi16(tmp); } static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { - *chunk = _mm256_set1_epi32(*(int32_t *)from); + int32_t tmp; + memcpy(&tmp, from, sizeof(tmp)); + *chunk = _mm256_set1_epi32(tmp); } static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { - *chunk = _mm256_set1_epi64x(*(int64_t *)from); + int64_t tmp; + memcpy(&tmp, from, sizeof(tmp)); + *chunk = _mm256_set1_epi64x(tmp); } static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { diff --git a/arch/x86/chunkset_sse.c b/arch/x86/chunkset_sse.c index d38e99d..6b43d4a 100644 --- a/arch/x86/chunkset_sse.c +++ b/arch/x86/chunkset_sse.c @@ -12,25 +12,26 @@ typedef __m128i chunk_t; #define CHUNK_SIZE 16 -#define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_2 #define HAVE_CHUNKMEMSET_4 #define HAVE_CHUNKMEMSET_8 -static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) { - *chunk = _mm_set1_epi8(*(int8_t *)from); -} - static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { - *chunk = _mm_set1_epi16(*(int16_t *)from); + int16_t tmp; + memcpy(&tmp, from, sizeof(tmp)); + *chunk = _mm_set1_epi16(tmp); } static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { - *chunk = _mm_set1_epi32(*(int32_t *)from); + int32_t tmp; + memcpy(&tmp, from, sizeof(tmp)); + *chunk = _mm_set1_epi32(tmp); } static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { - *chunk = _mm_set1_epi64x(*(int64_t *)from); + int64_t tmp; + memcpy(&tmp, from, sizeof(tmp)); + *chunk = _mm_set1_epi64x(tmp); } static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { |