SRCTOP=../..
TOPDIR=$(SRCTOP)
-all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo
+all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo slide_sse.o
x86.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
crc_folding.lo:
$(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c
+slide_sse.o:
+ $(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c
+
+slide_sse.lo:
+ $(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c
+
mostlyclean: clean
clean:
rm -f *.o *.lo *~
--- /dev/null
+/*
+ * SSE optimized hash slide
+ *
+ * Copyright (C) 2017 Intel Corporation
+ * Authors:
+ * Arjan van de Ven <arjan@linux.intel.com>
+ * Jim Kukunas <james.t.kukunas@linux.intel.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include "deflate.h"
+
+#ifdef USE_SSE_SLIDE
+#include <immintrin.h>
+
+void slide_hash_sse(deflate_state *s)
+{
+ unsigned n;
+ Posf *p;
+ uInt wsize = s->w_size;
+ z_const __m128i xmm_wsize = _mm_set1_epi16(s->w_size);
+
+ n = s->hash_size;
+ p = &s->head[n] - 8;
+ do {
+ __m128i value, result;
+
+ value = _mm_loadu_si128((__m128i *)p);
+ result= _mm_subs_epu16(value, xmm_wsize);
+ _mm_storeu_si128((__m128i *)p, result);
+ p -= 8;
+ n -= 8;
+ } while (n > 0);
+
+#ifndef FASTEST
+ n = wsize;
+ p = &s->prev[n] - 8;
+ do {
+ __m128i value, result;
+
+ value = _mm_loadu_si128((__m128i *)p);
+ result= _mm_subs_epu16(value, xmm_wsize);
+ _mm_storeu_si128((__m128i *)p, result);
+
+ p -= 8;
+ n -= 8;
+ } while (n > 0);
+#endif
+}
+
+#endif
+
OBJS = adler32.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_quick.obj deflate_slow.obj \
deflate_medium.obj \
- functable.obj infback.obj inflate.obj inftrees.obj inffast.obj trees.obj uncompr.obj zutil.obj \
+ functable.obj infback.obj inflate.obj inftrees.obj inffast.obj slide_sse.obj trees.obj uncompr.obj zutil.obj \
x86.obj fill_window_sse.obj insert_string_sse.obj crc_folding.obj
!if "$(ZLIB_COMPAT)" != ""
WITH_GZFILEOP = yes
inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h
inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h $(SRCDIR)/functable.h
inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
+slide_sse.obj: $(SRCDIR)/arch/x86/slide_sse.c $(SRCDIR)/deflate.h
trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees.h
zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h