diff options
author | Jim Kukunas <james.t.kukunas@linux.intel.com> | 2018-06-21 20:47:32 +0000 |
---|---|---|
committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2019-09-04 08:53:36 +0200 |
commit | 11f2e8f33788f7e3619e6cee6f7634295f102da9 (patch) | |
tree | d14c0b67d4d8639220fc1fa6a905b37fe2d57bb5 | |
parent | ce0076688a7eff8c7991234f5024386cf9ac944c (diff) |
Adds SSE2 optimized slide_hash.
Edit: Removed glue code in deflate.c, since we want
to implement this differently in zlib-ng.
-rw-r--r-- | arch/x86/Makefile.in | 8 | ||||
-rw-r--r-- | arch/x86/slide_sse.c | 52 | ||||
-rw-r--r-- | win32/Makefile.msc | 3 |
3 files changed, 61 insertions, 2 deletions
diff --git a/arch/x86/Makefile.in b/arch/x86/Makefile.in index 2e473c6..95ad368 100644 --- a/arch/x86/Makefile.in +++ b/arch/x86/Makefile.in @@ -16,7 +16,7 @@ SRCDIR=. SRCTOP=../.. TOPDIR=$(SRCTOP) -all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo +all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo slide_sse.o x86.o: $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c @@ -48,6 +48,12 @@ crc_folding.o: crc_folding.lo: $(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c +slide_sse.o: + $(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c + +slide_sse.lo: + $(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c + mostlyclean: clean clean: rm -f *.o *.lo *~ diff --git a/arch/x86/slide_sse.c b/arch/x86/slide_sse.c new file mode 100644 index 0000000..342fd56 --- /dev/null +++ b/arch/x86/slide_sse.c @@ -0,0 +1,52 @@ +/* + * SSE optimized hash slide + * + * Copyright (C) 2017 Intel Corporation + * Authors: + * Arjan van de Ven <arjan@linux.intel.com> + * Jim Kukunas <james.t.kukunas@linux.intel.com> + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#include "deflate.h" + +#ifdef USE_SSE_SLIDE +#include <immintrin.h> + +void slide_hash_sse(deflate_state *s) +{ + unsigned n; + Posf *p; + uInt wsize = s->w_size; + z_const __m128i xmm_wsize = _mm_set1_epi16(s->w_size); + + n = s->hash_size; + p = &s->head[n] - 8; + do { + __m128i value, result; + + value = _mm_loadu_si128((__m128i *)p); + result= _mm_subs_epu16(value, xmm_wsize); + _mm_storeu_si128((__m128i *)p, result); + p -= 8; + n -= 8; + } while (n > 0); + +#ifndef FASTEST + n = wsize; + p = &s->prev[n] - 8; + do { + __m128i value, result; + + value = _mm_loadu_si128((__m128i *)p); + result= _mm_subs_epu16(value, xmm_wsize); + _mm_storeu_si128((__m128i *)p, result); + + p -= 8; + n -= 8; + } while (n > 0); +#endif +} + +#endif + diff --git a/win32/Makefile.msc b/win32/Makefile.msc index 9baf1e4..4ad62eb 100644 --- a/win32/Makefile.msc +++ b/win32/Makefile.msc @@ -36,7 +36,7 @@ SUFFIX = OBJS = adler32.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_quick.obj deflate_slow.obj \ deflate_medium.obj \ - functable.obj infback.obj inflate.obj inftrees.obj inffast.obj trees.obj uncompr.obj zutil.obj \ + functable.obj infback.obj inflate.obj inftrees.obj inffast.obj slide_sse.obj trees.obj uncompr.obj zutil.obj \ x86.obj fill_window_sse.obj insert_string_sse.obj crc_folding.obj !if "$(ZLIB_COMPAT)" != "" WITH_GZFILEOP = yes @@ -126,6 +126,7 @@ infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/ inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h $(SRCDIR)/functable.h inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h +slide_sse.obj: $(SRCDIR)/arch/x86/slide_sse.c $(SRCDIR)/deflate.h trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees.h zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h |