diff options
author | Nathan Moinvaziri <nathan@nathanm.com> | 2021-06-13 15:57:28 -0700 |
---|---|---|
committer | Hans Kristian Rosbach <hk-git@circlestorm.org> | 2021-06-21 11:34:01 +0200 |
commit | 3fdfedd7e53e0deb1d71b8b00e1944918af54b2f (patch) | |
tree | 312136b5486642d69eae764a45202eff9ba44339 | |
parent | 77966c02cda6f509594791879573582908a3a718 (diff) |
Reduce number of branches in partial chunk copy based on chunk size.
-rw-r--r-- | arch/arm/chunkset_neon.c | 2 | ||||
-rw-r--r-- | arch/x86/chunkset_avx.c | 2 | ||||
-rw-r--r-- | arch/x86/chunkset_sse.c | 2 | ||||
-rw-r--r-- | chunkset.c | 2 | ||||
-rw-r--r-- | chunkset_tpl.h | 7 |
5 files changed, 13 insertions, 2 deletions
diff --git a/arch/arm/chunkset_neon.c b/arch/arm/chunkset_neon.c index b153298..22c3785 100644 --- a/arch/arm/chunkset_neon.c +++ b/arch/arm/chunkset_neon.c @@ -13,6 +13,8 @@ typedef uint8x16_t chunk_t; +#define CHUNK_SIZE 16 + #define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_2 #define HAVE_CHUNKMEMSET_4 diff --git a/arch/x86/chunkset_avx.c b/arch/x86/chunkset_avx.c index eb76c0d..7a9a56a 100644 --- a/arch/x86/chunkset_avx.c +++ b/arch/x86/chunkset_avx.c @@ -9,6 +9,8 @@ typedef __m256i chunk_t; +#define CHUNK_SIZE 32 + #define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_2 #define HAVE_CHUNKMEMSET_4 diff --git a/arch/x86/chunkset_sse.c b/arch/x86/chunkset_sse.c index 1d5a0fa..d38e99d 100644 --- a/arch/x86/chunkset_sse.c +++ b/arch/x86/chunkset_sse.c @@ -10,6 +10,8 @@ typedef __m128i chunk_t; +#define CHUNK_SIZE 16 + #define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_2 #define HAVE_CHUNKMEMSET_4 @@ -14,6 +14,8 @@ typedef struct chunk_t { uint32_t u32[2]; } chunk_t; typedef struct chunk_t { uint8_t u8[8]; } chunk_t; #endif +#define CHUNK_SIZE 8 + #define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_4 #define HAVE_CHUNKMEMSET_8 diff --git a/chunkset_tpl.h b/chunkset_tpl.h index 62cd4aa..2026ff3 100644 --- a/chunkset_tpl.h +++ b/chunkset_tpl.h @@ -40,17 +40,20 @@ Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { Z_INTERNAL uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) { len = MIN(len, safe - out + 1); if (len < sizeof(chunk_t)) { - int32_t use_chunk16 = sizeof(chunk_t) > 16 && (len & 16); - if (use_chunk16) { +#if CHUNK_SIZE > 16 + if (len & 16) { memcpy(out, from, 16); out += 16; from += 16; } +#endif +#if CHUNK_SIZE > 8 if (len & 8) { memcpy(out, from, 8); out += 8; from += 8; } +#endif if (len & 4) { memcpy(out, from, 4); out += 4; |