SRCDIR=
SRCTOP=
-all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo crc_folding.o crc_folding.lo
+all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo
x86.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
deflate_quick.lo:
$(CC) $(SFLAGS) -msse4 -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c
+insert_string_sse.o:
+ $(CC) $(CFLAGS) -msse4 $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c
+
+insert_string_sse.lo:
+ $(CC) $(SFLAGS) -msse4 -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c
+
crc_folding.o:
$(CC) $(CFLAGS) -mpclmul -msse4 $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c
--- /dev/null
+/* insert_string_sse -- insert_string variant using SSE4.2's CRC instructions
+ *
+ * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ */
+
+#include "deflate.h"
+
+/* ===========================================================================
+ * Insert string str in the dictionary and set match_head to the previous head
+ * of the hash chain (the most recent string with same hash key). Return
+ * the previous length of the hash chain.
+ * IN assertion: all calls to to INSERT_STRING are made with consecutive
+ * input characters and the first MIN_MATCH bytes of str are valid
+ * (except for the last MIN_MATCH-1 bytes of the input file).
+ */
+#ifdef X86_SSE4_2_CRC_HASH
+extern inline Pos insert_string_sse(deflate_state *const s, const Pos str, uInt count) {
+ Pos ret = 0;
+ uInt idx;
+ unsigned *ip, val, h = 0;
+
+ for (idx = 0; idx < count; idx++) {
+ ip = (unsigned *)&s->window[str+idx];
+ val = *ip;
+ h = 0;
+
+ if (s->level >= 6)
+ val &= 0xFFFFFF;
+
+#ifdef _MSC_VER
+ h = _mm_crc32_u32(h, val);
+#else
+ __asm__ __volatile__ (
+ "crc32 %1,%0\n\t"
+ : "+r" (h)
+ : "r" (val)
+ );
+#endif
+
+ ret = s->prev[(str+idx) & s->w_mask] = s->head[h & s->hash_mask];
+ s->head[h & s->hash_mask] = str+idx;
+ }
+ return ret;
+}
+#endif
CFLAGS="${CFLAGS} -DX86_SSE4_2_CRC_HASH"
SFLAGS="${SFLAGS} -DX86_SSE4_2_CRC_HASH"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} insert_string_sse.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} insert_string_sse.lo"
if test ${HAVE_PCLMULQDQ_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_PCLMULQDQ_CRC"
* input characters and the first MIN_MATCH bytes of str are valid
* (except for the last MIN_MATCH-1 bytes of the input file).
*/
-#ifdef X86_SSE4_2_CRC_HASH
-local inline Pos insert_string_sse(deflate_state *const s, const Pos str, uInt count) {
- Pos ret = 0;
- uInt idx;
- unsigned *ip, val, h = 0;
-
- for (idx = 0; idx < count; idx++) {
- ip = (unsigned *)&s->window[str+idx];
- val = *ip;
- h = 0;
-
- if (s->level >= 6)
- val &= 0xFFFFFF;
-
-#ifdef _MSC_VER
- h = _mm_crc32_u32(h, val);
-#else
- __asm__ __volatile__ (
- "crc32 %1,%0\n\t"
- : "+r" (h)
- : "r" (val)
- );
-#endif
- ret = s->prev[(str+idx) & s->w_mask] = s->head[h & s->hash_mask];
- s->head[h & s->hash_mask] = str+idx;
- }
- return ret;
-}
+#ifdef X86_SSE4_2_CRC_HASH
+extern Pos insert_string_sse(deflate_state *const s, const Pos str, uInt count);
#endif
local inline Pos insert_string_c(deflate_state *const s, const Pos str, uInt count) {
WITH_GZFILEOP =
OBJS = adler32.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_quick.obj deflate_slow.obj \
- infback.obj inflate.obj inftrees.obj inffast.obj match.obj trees.obj uncompr.obj zutil.obj x86.obj fill_window_sse.obj crc_folding.obj
+ infback.obj inflate.obj inftrees.obj inffast.obj match.obj trees.obj uncompr.obj zutil.obj x86.obj fill_window_sse.obj insert_string_sse.obj crc_folding.obj
!if "$(WITH_GZFILEOP)" != ""
WFLAGS = $(WFLAGS) -DWITH_GZFILEOP
OBJS = $(OBJS) gzclose.obj gzlib.obj gzread.obj gzwrite.obj