From: hansr Date: Wed, 15 Oct 2014 11:11:17 +0000 (+0200) Subject: Reorganize optimization defines X-Git-Tag: 1.9.9-b1~910 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8795d01ea7d6033a248f6118d450237eec9c2cc0;p=thirdparty%2Fzlib-ng.git Reorganize optimization defines Enable x86_64 to skip checking for SSE2 Enable deflate_medium on all archs --- diff --git a/arch/x86/crc_folding.c b/arch/x86/crc_folding.c index 87f8c9dcd..9f4a2baf2 100644 --- a/arch/x86/crc_folding.c +++ b/arch/x86/crc_folding.c @@ -16,7 +16,7 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#ifdef HAVE_PCLMULQDQ +#ifdef X86_PCLMULQDQ_CRC #include "deflate.h" diff --git a/arch/x86/fill_window_sse.c b/arch/x86/fill_window_sse.c index bb4f521b2..ef666e2e2 100644 --- a/arch/x86/fill_window_sse.c +++ b/arch/x86/fill_window_sse.c @@ -8,7 +8,7 @@ * * For conditions of distribution and use, see copyright notice in zlib.h */ -#ifdef HAVE_SSE2 +#ifdef X86_SSE2_FILL_WINDOW #include #include "deflate.h" diff --git a/configure b/configure index 28a165778..718676e3d 100755 --- a/configure +++ b/configure @@ -634,6 +634,12 @@ else HAVE_PCLMULQDQ_INTRIN=0 fi +# Enable deflate_medium at level 4-6 +if test $without_new_strategies -eq 0; then + CFLAGS="${CFLAGS} -DMEDIUM_STRATEGY" + SFLAGS="${SFLAGS} -DMEDIUM_STRATEGY" +fi + ARCHDIR="" ARCH_STATIC_OBJS="" ARCH_SHARED_OBJS="" @@ -645,8 +651,8 @@ case "${ARCH}" in case "${ARCH}" in x86_64) - CFLAGS="${CFLAGS} -DX86_64" - SFLAGS="${SFLAGS} -DX86_64" + CFLAGS="${CFLAGS} -DX86_64 -DX86_NOCHECK_SSE2" + SFLAGS="${SFLAGS} -DX86_64 -DX86_NOCHECK_SSE2" ;; i386 | i486 | i586 | i686) CFLAGS="${CFLAGS} -DX86" @@ -654,8 +660,8 @@ case "${ARCH}" in ;; esac - CFLAGS="${CFLAGS} -DUNALIGNED_OK -DADLER32_UNROLL_LESS -DCRC32_UNROLL_LESS" - SFLAGS="${SFLAGS} -DUNALIGNED_OK -DADLER32_UNROLL_LESS -DCRC32_UNROLL_LESS" + CFLAGS="${CFLAGS} -DUNALIGNED_OK -DADLER32_UNROLL_LESS -DCRC32_UNROLL_LESS -DX86_CPUID" + SFLAGS="${SFLAGS} -DUNALIGNED_OK -DADLER32_UNROLL_LESS -DCRC32_UNROLL_LESS -DX86_CPUID" # Enable arch-specific optimizations? if test $without_optimizations -eq 0; then @@ -663,34 +669,36 @@ case "${ARCH}" in ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86.lo" if test ${HAVE_SSE2_INTRIN} -eq 1; then - CFLAGS="${CFLAGS} -UCHECK_SSE2 -DHAVE_SSE2" - SFLAGS="${SFLAGS} -UCHECK_SSE2 -DHAVE_SSE2" + CFLAGS="${CFLAGS} -DX86_SSE2_FILL_WINDOW" + SFLAGS="${SFLAGS} -DX86_SSE2_FILL_WINDOW" ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} fill_window_sse.o" ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} fill_window_sse.lo" fi - CFLAGS="${CFLAGS} -DUSE_SSE4_2_CRC_HASH" - SFLAGS="${SFLAGS} -DUSE_SSE4_2_CRC_HASH" + CFLAGS="${CFLAGS} -DX86_SSE4_2_CRC_HASH" + SFLAGS="${SFLAGS} -DX86_SSE4_2_CRC_HASH" if test ${HAVE_PCLMULQDQ_INTRIN} -eq 1; then - CFLAGS="${CFLAGS} -DHAVE_PCLMULQDQ" - SFLAGS="${SFLAGS} -DHAVE_PCLMULQDQ" + CFLAGS="${CFLAGS} -DX86_PCLMULQDQ_CRC" + SFLAGS="${SFLAGS} -DX86_PCLMULQDQ_CRC" ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc_folding.o" ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc_folding.lo" - fi - fi + fi - # Enable deflate_quick at level 1? - if test $without_new_strategies -eq 0; then - CFLAGS="${CFLAGS} -DUSE_QUICK -DUSE_MEDIUM" - SFLAGS="${SFLAGS} -DUSE_QUICK -DUSE_MEDIUM" + # Enable deflate_quick at level 1? + if test $without_new_strategies -eq 0; then + CFLAGS="${CFLAGS} -DX86_QUICK_STRATEGY" + SFLAGS="${SFLAGS} -DX86_QUICK_STRATEGY" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} deflate_quick.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} deflate_quick.lo" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} deflate_quick.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} deflate_quick.lo" + fi fi ;; esac + + # show the results in the log echo >> configure.log echo ALL = $ALL >> configure.log @@ -722,12 +730,9 @@ echo mandir = $mandir >> configure.log echo prefix = $prefix >> configure.log echo sharedlibdir = $sharedlibdir >> configure.log echo uname = $uname >> configure.log -echo FILL_WINDOW_SSE_o = ${FILL_WINDOW_SSE_o} >> configure.log -echo FILL_WINDOW_SSE_lo= ${FILL_WINDOW_SSE_lo} >> configure.log -echo CRC_FOLDING_o = ${CRC_FOLDING_o} >> configure.log -echo CRC_FOLDING_lo= ${CRC_FOLDING_lo} >> configure.log -echo DEFLATE_QUICK_o=${DEFLATE_QUICK_o} >> configure.log -echo DEFLATE_QUICK_lo=${DEFLATE_QUICK_lo} >> configure.log +echo ARCHDIR = ${ARCHDIR} >> configure.log +echo ARCH_STATIC_OBJS = ${ARCH_STATIC_OBJS} >> configure.log +echo ARCH_SHARED_OBJS = ${ARCH_SHARED_OBJS} >> configure.log # udpate Makefile with the configure results sed < Makefile.in " diff --git a/crc32.c b/crc32.c index 92499d57c..4b2a38694 100644 --- a/crc32.c +++ b/crc32.c @@ -437,7 +437,7 @@ uLong ZEXPORT crc32_combine64(crc1, crc2, len2) #include "deflate.h" -#ifdef HAVE_PCLMULQDQ +#ifdef X86_PCLMULQDQ_CRC #include "arch/x86/x86.h" extern void ZLIB_INTERNAL crc_fold_init(deflate_state *z_const s); extern void ZLIB_INTERNAL crc_fold_copy(deflate_state *z_const s, @@ -447,7 +447,7 @@ extern unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *z_const s); ZLIB_INTERNAL void crc_reset(deflate_state *const s) { -#ifdef HAVE_PCLMULQDQ +#ifdef X86_PCLMULQDQ_CRC if (x86_cpu_has_pclmulqdq) { crc_fold_init(s); return; @@ -458,7 +458,7 @@ ZLIB_INTERNAL void crc_reset(deflate_state *const s) ZLIB_INTERNAL void crc_finalize(deflate_state *const s) { -#ifdef HAVE_PCLMULQDQ +#ifdef X86_PCLMULQDQ_CRC if (x86_cpu_has_pclmulqdq) s->strm->adler = crc_fold_512to32(s); #endif @@ -466,7 +466,7 @@ ZLIB_INTERNAL void crc_finalize(deflate_state *const s) ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size) { -#ifdef HAVE_PCLMULQDQ +#ifdef X86_PCLMULQDQ_CRC if (x86_cpu_has_pclmulqdq) { crc_fold_copy(strm->state, dst, strm->next_in, size); return; diff --git a/deflate.c b/deflate.c index 002fca4aa..5bfc7d253 100644 --- a/deflate.c +++ b/deflate.c @@ -51,7 +51,7 @@ #include "deflate.h" -#if defined(CHECK_SSE2) || defined(USE_SSE4_2_CRC_HASH) || defined(USE_QUICK) +#if defined(X86_CPUID) #include "arch/x86/x86.h" #endif @@ -126,17 +126,18 @@ typedef struct config_s { local const config configuration_table[10] = { /* good lazy nice chain */ /* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ -#ifdef USE_QUICK + +#ifdef X86_QUICK_STRATEGY /* 1 */ {4, 4, 8, 4, deflate_quick}, /* 2 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ -/* 3 */ {4, 6, 32, 32, deflate_fast}, #else /* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ /* 2 */ {4, 5, 16, 8, deflate_fast}, -/* 3 */ {4, 6, 32, 32, deflate_fast}, #endif -#ifdef USE_MEDIUM +/* 3 */ {4, 6, 32, 32, deflate_fast}, + +#ifdef MEDIUM_STRATEGY /* 4 */ {4, 4, 16, 16, deflate_medium}, /* lazy matches */ /* 5 */ {8, 16, 32, 32, deflate_medium}, /* 6 */ {8, 16, 128, 128, deflate_medium}, @@ -174,7 +175,7 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ * input characters and the first MIN_MATCH bytes of str are valid * (except for the last MIN_MATCH-1 bytes of the input file). */ -#ifdef USE_SSE4_2_CRC_HASH +#ifdef X86_SSE4_2_CRC_HASH local inline Pos insert_string_sse(deflate_state *z_const s, z_const Pos str) { Pos ret; @@ -212,7 +213,7 @@ local inline Pos insert_string_c(deflate_state *z_const s, z_const Pos str) local inline Pos insert_string(deflate_state *z_const s, z_const Pos str) { -#ifdef USE_SSE4_2_CRC_HASH +#ifdef X86_SSE4_2_CRC_HASH if (x86_cpu_has_sse42) return insert_string_sse(s, str); #endif @@ -272,7 +273,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, * output size for (length,distance) codes is <= 24 bits. */ -#if defined(CHECK_SSE2) || defined(USE_SSE4_2_CRC_HASH) +#if defined(X86_SSE2_FILL_WINDOW) || defined(X86_SSE4_2_CRC_HASH) x86_check_features(); #endif @@ -317,7 +318,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, } if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ -#ifdef USE_QUICK +#ifdef X86_QUICK_STRATEGY if (level == 1) windowBits = 13; #endif @@ -333,7 +334,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, s->w_size = 1 << s->w_bits; s->w_mask = s->w_size - 1; -#ifdef USE_SSE4_2_CRC_HASH +#ifdef X86_SSE4_2_CRC_HASH if (x86_cpu_has_sse42) s->hash_bits = 15; else @@ -344,7 +345,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, s->hash_mask = s->hash_size - 1; s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); -#ifdef HAVE_PCLMULQDQ +#ifdef X86_PCLMULQDQ_CRC window_padding = 8; #endif @@ -952,7 +953,7 @@ int ZEXPORT deflate (strm, flush) (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { block_state bstate; -#ifdef USE_QUICK +#ifdef X86_QUICK_STRATEGY if (s->level == 1 && !x86_cpu_has_sse42) bstate = s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : (s->strategy == Z_RLE ? deflate_rle(s, flush) : @@ -1224,21 +1225,21 @@ local void check_match(s, start, match, length) * performed for at least two bytes (required for the zip translate_eol * option -- not supported here). */ -#ifdef HAVE_SSE2 +#ifdef X86_SSE2_FILL_WINDOW extern void fill_window_sse(deflate_state *s); #endif local void fill_window_c(deflate_state *s); local void fill_window(deflate_state *s) { -#ifdef HAVE_SSE2 -#ifdef CHECK_SSE2 +#ifdef X86_SSE2_FILL_WINDOW +#ifndef X86_NOCHECK_SSE2 if (x86_cpu_has_sse2) { #endif fill_window_sse(s); return; -#ifdef CHECK_SSE2 - } +#ifndef X86_NOCHECK_SSE2 + } #endif #endif @@ -1609,7 +1610,7 @@ local block_state deflate_fast(s, flush) } -#ifdef USE_MEDIUM +#ifdef MEDIUM_STRATEGY #include "deflate_medium.c" #endif diff --git a/deflate.h b/deflate.h index 8ba824f64..e1f44b403 100644 --- a/deflate.h +++ b/deflate.h @@ -110,7 +110,7 @@ typedef struct internal_state { Byte method; /* can only be DEFLATED */ int last_flush; /* value of flush param for previous deflate call */ -#ifdef HAVE_PCLMULQDQ +#ifdef X86_PCLMULQDQ_CRC unsigned __attribute__((aligned(16))) crc0[4 * 5]; #endif @@ -386,7 +386,7 @@ void ZLIB_INTERNAL bi_windup (deflate_state *s); * input characters, so that a running hash key can be computed from the * previous key instead of complete recalculation each time. */ -#ifdef USE_SSE4_2_CRC_HASH +#ifdef X86_SSE4_2_CRC_HASH #define UPDATE_HASH(s,h,i) (\ {\ if (s->level < 6) \