summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Kukunas <james.t.kukunas@linux.intel.com>2018-06-21 20:47:32 +0000
committerHans Kristian Rosbach <hk-github@circlestorm.org>2019-09-04 08:53:36 +0200
commit11f2e8f33788f7e3619e6cee6f7634295f102da9 (patch)
treed14c0b67d4d8639220fc1fa6a905b37fe2d57bb5
parentce0076688a7eff8c7991234f5024386cf9ac944c (diff)
Adds SSE2 optimized slide_hash.
Edit: Removed glue code in deflate.c, since we want to implement this differently in zlib-ng.
-rw-r--r--arch/x86/Makefile.in8
-rw-r--r--arch/x86/slide_sse.c52
-rw-r--r--win32/Makefile.msc3
3 files changed, 61 insertions, 2 deletions
diff --git a/arch/x86/Makefile.in b/arch/x86/Makefile.in
index 2e473c6..95ad368 100644
--- a/arch/x86/Makefile.in
+++ b/arch/x86/Makefile.in
@@ -16,7 +16,7 @@ SRCDIR=.
SRCTOP=../..
TOPDIR=$(SRCTOP)
-all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo
+all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo slide_sse.o
x86.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
@@ -48,6 +48,12 @@ crc_folding.o:
crc_folding.lo:
$(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c
+slide_sse.o:
+ $(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c
+
+slide_sse.lo:
+ $(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c
+
mostlyclean: clean
clean:
rm -f *.o *.lo *~
diff --git a/arch/x86/slide_sse.c b/arch/x86/slide_sse.c
new file mode 100644
index 0000000..342fd56
--- /dev/null
+++ b/arch/x86/slide_sse.c
@@ -0,0 +1,52 @@
+/*
+ * SSE optimized hash slide
+ *
+ * Copyright (C) 2017 Intel Corporation
+ * Authors:
+ * Arjan van de Ven <arjan@linux.intel.com>
+ * Jim Kukunas <james.t.kukunas@linux.intel.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include "deflate.h"
+
+#ifdef USE_SSE_SLIDE
+#include <immintrin.h>
+
+void slide_hash_sse(deflate_state *s)
+{
+ unsigned n;
+ Posf *p;
+ uInt wsize = s->w_size;
+ z_const __m128i xmm_wsize = _mm_set1_epi16(s->w_size);
+
+ n = s->hash_size;
+ p = &s->head[n] - 8;
+ do {
+ __m128i value, result;
+
+ value = _mm_loadu_si128((__m128i *)p);
+ result= _mm_subs_epu16(value, xmm_wsize);
+ _mm_storeu_si128((__m128i *)p, result);
+ p -= 8;
+ n -= 8;
+ } while (n > 0);
+
+#ifndef FASTEST
+ n = wsize;
+ p = &s->prev[n] - 8;
+ do {
+ __m128i value, result;
+
+ value = _mm_loadu_si128((__m128i *)p);
+ result= _mm_subs_epu16(value, xmm_wsize);
+ _mm_storeu_si128((__m128i *)p, result);
+
+ p -= 8;
+ n -= 8;
+ } while (n > 0);
+#endif
+}
+
+#endif
+
diff --git a/win32/Makefile.msc b/win32/Makefile.msc
index 9baf1e4..4ad62eb 100644
--- a/win32/Makefile.msc
+++ b/win32/Makefile.msc
@@ -36,7 +36,7 @@ SUFFIX =
OBJS = adler32.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_quick.obj deflate_slow.obj \
deflate_medium.obj \
- functable.obj infback.obj inflate.obj inftrees.obj inffast.obj trees.obj uncompr.obj zutil.obj \
+ functable.obj infback.obj inflate.obj inftrees.obj inffast.obj slide_sse.obj trees.obj uncompr.obj zutil.obj \
x86.obj fill_window_sse.obj insert_string_sse.obj crc_folding.obj
!if "$(ZLIB_COMPAT)" != ""
WITH_GZFILEOP = yes
@@ -126,6 +126,7 @@ infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/
inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h
inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h $(SRCDIR)/functable.h
inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
+slide_sse.obj: $(SRCDIR)/arch/x86/slide_sse.c $(SRCDIR)/deflate.h
trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees.h
zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h