From: Hans Kristian Rosbach Date: Wed, 4 Nov 2015 19:58:56 +0000 (+0100) Subject: Split insert_string_sse into separate file in arch folder. X-Git-Tag: 1.9.9-b1~793^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2c4ec8a5a3eba31aae6488c0981dc23b36b24d26;p=thirdparty%2Fzlib-ng.git Split insert_string_sse into separate file in arch folder. --- diff --git a/arch/x86/Makefile.in b/arch/x86/Makefile.in index d91411ed7..3604ba852 100644 --- a/arch/x86/Makefile.in +++ b/arch/x86/Makefile.in @@ -10,7 +10,7 @@ INCLUDES= SRCDIR= SRCTOP= -all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo crc_folding.o crc_folding.lo +all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo x86.o: $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c @@ -30,6 +30,12 @@ deflate_quick.o: deflate_quick.lo: $(CC) $(SFLAGS) -msse4 -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c +insert_string_sse.o: + $(CC) $(CFLAGS) -msse4 $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c + +insert_string_sse.lo: + $(CC) $(SFLAGS) -msse4 -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c + crc_folding.o: $(CC) $(CFLAGS) -mpclmul -msse4 $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c diff --git a/arch/x86/insert_string_sse.c b/arch/x86/insert_string_sse.c new file mode 100644 index 000000000..4bbda5060 --- /dev/null +++ b/arch/x86/insert_string_sse.c @@ -0,0 +1,47 @@ +/* insert_string_sse -- insert_string variant using SSE4.2's CRC instructions + * + * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + */ + +#include "deflate.h" + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * IN assertion: all calls to to INSERT_STRING are made with consecutive + * input characters and the first MIN_MATCH bytes of str are valid + * (except for the last MIN_MATCH-1 bytes of the input file). + */ +#ifdef X86_SSE4_2_CRC_HASH +extern inline Pos insert_string_sse(deflate_state *const s, const Pos str, uInt count) { + Pos ret = 0; + uInt idx; + unsigned *ip, val, h = 0; + + for (idx = 0; idx < count; idx++) { + ip = (unsigned *)&s->window[str+idx]; + val = *ip; + h = 0; + + if (s->level >= 6) + val &= 0xFFFFFF; + +#ifdef _MSC_VER + h = _mm_crc32_u32(h, val); +#else + __asm__ __volatile__ ( + "crc32 %1,%0\n\t" + : "+r" (h) + : "r" (val) + ); +#endif + + ret = s->prev[(str+idx) & s->w_mask] = s->head[h & s->hash_mask]; + s->head[h & s->hash_mask] = str+idx; + } + return ret; +} +#endif diff --git a/configure b/configure index 5b48d9144..00526ff3a 100755 --- a/configure +++ b/configure @@ -703,6 +703,8 @@ case "${ARCH}" in CFLAGS="${CFLAGS} -DX86_SSE4_2_CRC_HASH" SFLAGS="${SFLAGS} -DX86_SSE4_2_CRC_HASH" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} insert_string_sse.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} insert_string_sse.lo" if test ${HAVE_PCLMULQDQ_INTRIN} -eq 1; then CFLAGS="${CFLAGS} -DX86_PCLMULQDQ_CRC" diff --git a/deflate_p.h b/deflate_p.h index 25923a722..7fee6f02b 100644 --- a/deflate_p.h +++ b/deflate_p.h @@ -31,35 +31,9 @@ void flush_pending(z_stream *strm); * input characters and the first MIN_MATCH bytes of str are valid * (except for the last MIN_MATCH-1 bytes of the input file). */ -#ifdef X86_SSE4_2_CRC_HASH -local inline Pos insert_string_sse(deflate_state *const s, const Pos str, uInt count) { - Pos ret = 0; - uInt idx; - unsigned *ip, val, h = 0; - - for (idx = 0; idx < count; idx++) { - ip = (unsigned *)&s->window[str+idx]; - val = *ip; - h = 0; - - if (s->level >= 6) - val &= 0xFFFFFF; - -#ifdef _MSC_VER - h = _mm_crc32_u32(h, val); -#else - __asm__ __volatile__ ( - "crc32 %1,%0\n\t" - : "+r" (h) - : "r" (val) - ); -#endif - ret = s->prev[(str+idx) & s->w_mask] = s->head[h & s->hash_mask]; - s->head[h & s->hash_mask] = str+idx; - } - return ret; -} +#ifdef X86_SSE4_2_CRC_HASH +extern Pos insert_string_sse(deflate_state *const s, const Pos str, uInt count); #endif local inline Pos insert_string_c(deflate_state *const s, const Pos str, uInt count) { diff --git a/win32/Makefile.msc b/win32/Makefile.msc index e79ce5267..b6375ccb4 100644 --- a/win32/Makefile.msc +++ b/win32/Makefile.msc @@ -31,7 +31,7 @@ DEFFILE = zlib.def WITH_GZFILEOP = OBJS = adler32.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_quick.obj deflate_slow.obj \ - infback.obj inflate.obj inftrees.obj inffast.obj match.obj trees.obj uncompr.obj zutil.obj x86.obj fill_window_sse.obj crc_folding.obj + infback.obj inflate.obj inftrees.obj inffast.obj match.obj trees.obj uncompr.obj zutil.obj x86.obj fill_window_sse.obj insert_string_sse.obj crc_folding.obj !if "$(WITH_GZFILEOP)" != "" WFLAGS = $(WFLAGS) -DWITH_GZFILEOP OBJS = $(OBJS) gzclose.obj gzlib.obj gzread.obj gzwrite.obj