summaryrefslogtreecommitdiff
path: root/functable.c
diff options
context:
space:
mode:
authorNathan Moinvaziri <nathan@solidstatenetworks.com>2020-06-28 20:00:01 -0700
committerHans Kristian Rosbach <hk-github@circlestorm.org>2020-09-11 13:01:28 +0200
commit4bc5bd65e52ecbc5f751fba11461bc803bd92428 (patch)
tree1c52051bd729923ed2e36dc493d73573ee0658f9 /functable.c
parentb30cbcc0c1db1ad20b9af2c847663ec1e68da37d (diff)
Added AVX support to chunkset functions.
Diffstat (limited to 'functable.c')
-rw-r--r--functable.c32
1 files changed, 32 insertions, 0 deletions
diff --git a/functable.c b/functable.c
index 6f4f815..782e7fd 100644
--- a/functable.c
+++ b/functable.c
@@ -72,6 +72,14 @@ extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len);
extern uint8_t* chunkmemset_sse2(uint8_t *out, unsigned dist, unsigned len);
extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#endif
+#ifdef X86_AVX_CHUNKSET
+extern uint32_t chunksize_avx(void);
+extern uint8_t* chunkcopy_avx(uint8_t *out, uint8_t const *from, unsigned len);
+extern uint8_t* chunkcopy_safe_avx(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
+extern uint8_t* chunkunroll_avx(uint8_t *out, unsigned *dist, unsigned *len);
+extern uint8_t* chunkmemset_avx(uint8_t *out, unsigned dist, unsigned len);
+extern uint8_t* chunkmemset_safe_avx(uint8_t *out, unsigned dist, unsigned len, unsigned left);
+#endif
#ifdef ARM_NEON_CHUNKSET
extern uint32_t chunksize_neon(void);
extern uint8_t* chunkcopy_neon(uint8_t *out, uint8_t const *from, unsigned len);
@@ -240,6 +248,10 @@ Z_INTERNAL uint32_t chunksize_stub(void) {
# endif
functable.chunksize = &chunksize_sse2;
#endif
+#ifdef X86_AVX_CHUNKSET
+ if (x86_cpu_has_avx2)
+ functable.chunksize = &chunksize_avx;
+#endif
#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunksize = &chunksize_neon;
@@ -258,6 +270,10 @@ Z_INTERNAL uint8_t* chunkcopy_stub(uint8_t *out, uint8_t const *from, unsigned l
# endif
functable.chunkcopy = &chunkcopy_sse2;
#endif
+#ifdef X86_AVX_CHUNKSET
+ if (x86_cpu_has_avx2)
+ functable.chunkcopy = &chunkcopy_avx;
+#endif
#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkcopy = &chunkcopy_neon;
@@ -276,6 +292,10 @@ Z_INTERNAL uint8_t* chunkcopy_safe_stub(uint8_t *out, uint8_t const *from, unsig
# endif
functable.chunkcopy_safe = &chunkcopy_safe_sse2;
#endif
+#ifdef X86_AVX_CHUNKSET
+ if (x86_cpu_has_avx2)
+ functable.chunkcopy_safe = &chunkcopy_safe_avx;
+#endif
#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkcopy_safe = &chunkcopy_safe_neon;
@@ -294,6 +314,10 @@ Z_INTERNAL uint8_t* chunkunroll_stub(uint8_t *out, unsigned *dist, unsigned *len
# endif
functable.chunkunroll = &chunkunroll_sse2;
#endif
+#ifdef X86_AVX_CHUNKSET
+ if (x86_cpu_has_avx2)
+ functable.chunkunroll = &chunkunroll_avx;
+#endif
#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkunroll = &chunkunroll_neon;
@@ -312,6 +336,10 @@ Z_INTERNAL uint8_t* chunkmemset_stub(uint8_t *out, unsigned dist, unsigned len)
# endif
functable.chunkmemset = &chunkmemset_sse2;
#endif
+#ifdef X86_AVX_CHUNKSET
+ if (x86_cpu_has_avx2)
+ functable.chunkmemset = &chunkmemset_avx;
+#endif
#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkmemset = &chunkmemset_neon;
@@ -330,6 +358,10 @@ Z_INTERNAL uint8_t* chunkmemset_safe_stub(uint8_t *out, unsigned dist, unsigned
# endif
functable.chunkmemset_safe = &chunkmemset_safe_sse2;
#endif
+#ifdef X86_AVX_CHUNKSET
+ if (x86_cpu_has_avx2)
+ functable.chunkmemset_safe = &chunkmemset_safe_avx;
+#endif
#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkmemset_safe = &chunkmemset_safe_neon;