diff options
author | Nathan Moinvaziri <nathan@solidstatenetworks.com> | 2020-06-28 20:00:01 -0700 |
---|---|---|
committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2020-09-11 13:01:28 +0200 |
commit | 4bc5bd65e52ecbc5f751fba11461bc803bd92428 (patch) | |
tree | 1c52051bd729923ed2e36dc493d73573ee0658f9 /functable.c | |
parent | b30cbcc0c1db1ad20b9af2c847663ec1e68da37d (diff) |
Added AVX support to chunkset functions.
Diffstat (limited to 'functable.c')
-rw-r--r-- | functable.c | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/functable.c b/functable.c index 6f4f815..782e7fd 100644 --- a/functable.c +++ b/functable.c @@ -72,6 +72,14 @@ extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len); extern uint8_t* chunkmemset_sse2(uint8_t *out, unsigned dist, unsigned len); extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left); #endif +#ifdef X86_AVX_CHUNKSET +extern uint32_t chunksize_avx(void); +extern uint8_t* chunkcopy_avx(uint8_t *out, uint8_t const *from, unsigned len); +extern uint8_t* chunkcopy_safe_avx(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe); +extern uint8_t* chunkunroll_avx(uint8_t *out, unsigned *dist, unsigned *len); +extern uint8_t* chunkmemset_avx(uint8_t *out, unsigned dist, unsigned len); +extern uint8_t* chunkmemset_safe_avx(uint8_t *out, unsigned dist, unsigned len, unsigned left); +#endif #ifdef ARM_NEON_CHUNKSET extern uint32_t chunksize_neon(void); extern uint8_t* chunkcopy_neon(uint8_t *out, uint8_t const *from, unsigned len); @@ -240,6 +248,10 @@ Z_INTERNAL uint32_t chunksize_stub(void) { # endif functable.chunksize = &chunksize_sse2; #endif +#ifdef X86_AVX_CHUNKSET + if (x86_cpu_has_avx2) + functable.chunksize = &chunksize_avx; +#endif #ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) functable.chunksize = &chunksize_neon; @@ -258,6 +270,10 @@ Z_INTERNAL uint8_t* chunkcopy_stub(uint8_t *out, uint8_t const *from, unsigned l # endif functable.chunkcopy = &chunkcopy_sse2; #endif +#ifdef X86_AVX_CHUNKSET + if (x86_cpu_has_avx2) + functable.chunkcopy = &chunkcopy_avx; +#endif #ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) functable.chunkcopy = &chunkcopy_neon; @@ -276,6 +292,10 @@ Z_INTERNAL uint8_t* chunkcopy_safe_stub(uint8_t *out, uint8_t const *from, unsig # endif functable.chunkcopy_safe = &chunkcopy_safe_sse2; #endif +#ifdef X86_AVX_CHUNKSET + if (x86_cpu_has_avx2) + functable.chunkcopy_safe = &chunkcopy_safe_avx; +#endif #ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) functable.chunkcopy_safe = &chunkcopy_safe_neon; @@ -294,6 +314,10 @@ Z_INTERNAL uint8_t* chunkunroll_stub(uint8_t *out, unsigned *dist, unsigned *len # endif functable.chunkunroll = &chunkunroll_sse2; #endif +#ifdef X86_AVX_CHUNKSET + if (x86_cpu_has_avx2) + functable.chunkunroll = &chunkunroll_avx; +#endif #ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) functable.chunkunroll = &chunkunroll_neon; @@ -312,6 +336,10 @@ Z_INTERNAL uint8_t* chunkmemset_stub(uint8_t *out, unsigned dist, unsigned len) # endif functable.chunkmemset = &chunkmemset_sse2; #endif +#ifdef X86_AVX_CHUNKSET + if (x86_cpu_has_avx2) + functable.chunkmemset = &chunkmemset_avx; +#endif #ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) functable.chunkmemset = &chunkmemset_neon; @@ -330,6 +358,10 @@ Z_INTERNAL uint8_t* chunkmemset_safe_stub(uint8_t *out, unsigned dist, unsigned # endif functable.chunkmemset_safe = &chunkmemset_safe_sse2; #endif +#ifdef X86_AVX_CHUNKSET + if (x86_cpu_has_avx2) + functable.chunkmemset_safe = &chunkmemset_safe_avx; +#endif #ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) functable.chunkmemset_safe = &chunkmemset_safe_neon; |