summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Moinvaziri <nathan@nathanm.com>2021-06-13 15:57:28 -0700
committerHans Kristian Rosbach <hk-git@circlestorm.org>2021-06-21 11:34:01 +0200
commit3fdfedd7e53e0deb1d71b8b00e1944918af54b2f (patch)
tree312136b5486642d69eae764a45202eff9ba44339
parent77966c02cda6f509594791879573582908a3a718 (diff)
Reduce number of branches in partial chunk copy based on chunk size.
-rw-r--r--arch/arm/chunkset_neon.c2
-rw-r--r--arch/x86/chunkset_avx.c2
-rw-r--r--arch/x86/chunkset_sse.c2
-rw-r--r--chunkset.c2
-rw-r--r--chunkset_tpl.h7
5 files changed, 13 insertions, 2 deletions
diff --git a/arch/arm/chunkset_neon.c b/arch/arm/chunkset_neon.c
index b153298..22c3785 100644
--- a/arch/arm/chunkset_neon.c
+++ b/arch/arm/chunkset_neon.c
@@ -13,6 +13,8 @@
typedef uint8x16_t chunk_t;
+#define CHUNK_SIZE 16
+
#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_2
#define HAVE_CHUNKMEMSET_4
diff --git a/arch/x86/chunkset_avx.c b/arch/x86/chunkset_avx.c
index eb76c0d..7a9a56a 100644
--- a/arch/x86/chunkset_avx.c
+++ b/arch/x86/chunkset_avx.c
@@ -9,6 +9,8 @@
typedef __m256i chunk_t;
+#define CHUNK_SIZE 32
+
#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_2
#define HAVE_CHUNKMEMSET_4
diff --git a/arch/x86/chunkset_sse.c b/arch/x86/chunkset_sse.c
index 1d5a0fa..d38e99d 100644
--- a/arch/x86/chunkset_sse.c
+++ b/arch/x86/chunkset_sse.c
@@ -10,6 +10,8 @@
typedef __m128i chunk_t;
+#define CHUNK_SIZE 16
+
#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_2
#define HAVE_CHUNKMEMSET_4
diff --git a/chunkset.c b/chunkset.c
index 2aa8d4e..b07e6f4 100644
--- a/chunkset.c
+++ b/chunkset.c
@@ -14,6 +14,8 @@ typedef struct chunk_t { uint32_t u32[2]; } chunk_t;
typedef struct chunk_t { uint8_t u8[8]; } chunk_t;
#endif
+#define CHUNK_SIZE 8
+
#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_4
#define HAVE_CHUNKMEMSET_8
diff --git a/chunkset_tpl.h b/chunkset_tpl.h
index 62cd4aa..2026ff3 100644
--- a/chunkset_tpl.h
+++ b/chunkset_tpl.h
@@ -40,17 +40,20 @@ Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) {
Z_INTERNAL uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) {
len = MIN(len, safe - out + 1);
if (len < sizeof(chunk_t)) {
- int32_t use_chunk16 = sizeof(chunk_t) > 16 && (len & 16);
- if (use_chunk16) {
+#if CHUNK_SIZE > 16
+ if (len & 16) {
memcpy(out, from, 16);
out += 16;
from += 16;
}
+#endif
+#if CHUNK_SIZE > 8
if (len & 8) {
memcpy(out, from, 8);
out += 8;
from += 8;
}
+#endif
if (len & 4) {
memcpy(out, from, 4);
out += 4;