]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Adds SSE2 optimized slide_hash.
authorJim Kukunas <james.t.kukunas@linux.intel.com>
Thu, 21 Jun 2018 20:47:32 +0000 (20:47 +0000)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Wed, 4 Sep 2019 06:53:36 +0000 (08:53 +0200)
Edit: Removed glue code in deflate.c, since we want
to implement this differently in zlib-ng.

arch/x86/Makefile.in
arch/x86/slide_sse.c [new file with mode: 0644]
win32/Makefile.msc

index 2e473c6268aa29e5304fee81e89832a056887843..95ad3682f9bfc4a6616ca2f8f0df5927025dfb48 100644 (file)
@@ -16,7 +16,7 @@ SRCDIR=.
 SRCTOP=../..
 TOPDIR=$(SRCTOP)
 
-all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo
+all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo slide_sse.o
 
 x86.o:
        $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
@@ -48,6 +48,12 @@ crc_folding.o:
 crc_folding.lo:
        $(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c
 
+slide_sse.o:
+       $(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c
+
+slide_sse.lo:
+       $(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c
+
 mostlyclean: clean
 clean:
        rm -f *.o *.lo *~
diff --git a/arch/x86/slide_sse.c b/arch/x86/slide_sse.c
new file mode 100644 (file)
index 0000000..342fd56
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * SSE optimized hash slide
+ *
+ * Copyright (C) 2017 Intel Corporation
+ * Authors:
+ *   Arjan van de Ven  <arjan@linux.intel.com>
+ *   Jim Kukunas       <james.t.kukunas@linux.intel.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include "deflate.h"
+
+#ifdef USE_SSE_SLIDE
+#include <immintrin.h>
+
+void slide_hash_sse(deflate_state *s)
+{
+    unsigned n;
+    Posf *p;
+    uInt wsize = s->w_size;
+    z_const __m128i xmm_wsize = _mm_set1_epi16(s->w_size);
+
+    n = s->hash_size;
+    p = &s->head[n] - 8;
+    do {
+        __m128i value, result;
+
+       value = _mm_loadu_si128((__m128i *)p);
+       result= _mm_subs_epu16(value, xmm_wsize);
+       _mm_storeu_si128((__m128i *)p, result);
+       p -= 8;
+       n -= 8;
+    } while (n > 0);
+
+#ifndef FASTEST
+    n = wsize;
+    p = &s->prev[n] - 8;
+    do {
+        __m128i value, result;
+
+       value = _mm_loadu_si128((__m128i *)p);
+       result= _mm_subs_epu16(value, xmm_wsize);
+       _mm_storeu_si128((__m128i *)p, result);
+
+       p -= 8;
+       n -= 8;
+    } while (n > 0);
+#endif
+}
+
+#endif
+
index 9baf1e439016fd1495141079c2c36f1999ada328..4ad62ebf3c10d8b8bbd7b793ed34299b60e3edd3 100644 (file)
@@ -36,7 +36,7 @@ SUFFIX =
 
 OBJS = adler32.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_quick.obj deflate_slow.obj \
        deflate_medium.obj \
-       functable.obj infback.obj inflate.obj inftrees.obj inffast.obj trees.obj uncompr.obj zutil.obj \
+       functable.obj infback.obj inflate.obj inftrees.obj inffast.obj slide_sse.obj trees.obj uncompr.obj zutil.obj \
        x86.obj fill_window_sse.obj insert_string_sse.obj crc_folding.obj
 !if "$(ZLIB_COMPAT)" != ""
 WITH_GZFILEOP = yes
@@ -126,6 +126,7 @@ infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/
 inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h
 inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h $(SRCDIR)/functable.h
 inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
+slide_sse.obj: $(SRCDIR)/arch/x86/slide_sse.c $(SRCDIR)/deflate.h
 trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees.h
 zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h