* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifdef HAVE_PCLMULQDQ
+#ifdef X86_PCLMULQDQ_CRC
#include "deflate.h"
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifdef HAVE_SSE2
+#ifdef X86_SSE2_FILL_WINDOW
#include <immintrin.h>
#include "deflate.h"
HAVE_PCLMULQDQ_INTRIN=0
fi
+# Enable deflate_medium at level 4-6
+if test $without_new_strategies -eq 0; then
+ CFLAGS="${CFLAGS} -DMEDIUM_STRATEGY"
+ SFLAGS="${SFLAGS} -DMEDIUM_STRATEGY"
+fi
+
ARCHDIR=""
ARCH_STATIC_OBJS=""
ARCH_SHARED_OBJS=""
case "${ARCH}" in
x86_64)
- CFLAGS="${CFLAGS} -DX86_64"
- SFLAGS="${SFLAGS} -DX86_64"
+ CFLAGS="${CFLAGS} -DX86_64 -DX86_NOCHECK_SSE2"
+ SFLAGS="${SFLAGS} -DX86_64 -DX86_NOCHECK_SSE2"
;;
i386 | i486 | i586 | i686)
CFLAGS="${CFLAGS} -DX86"
;;
esac
- CFLAGS="${CFLAGS} -DUNALIGNED_OK -DADLER32_UNROLL_LESS -DCRC32_UNROLL_LESS"
- SFLAGS="${SFLAGS} -DUNALIGNED_OK -DADLER32_UNROLL_LESS -DCRC32_UNROLL_LESS"
+ CFLAGS="${CFLAGS} -DUNALIGNED_OK -DADLER32_UNROLL_LESS -DCRC32_UNROLL_LESS -DX86_CPUID"
+ SFLAGS="${SFLAGS} -DUNALIGNED_OK -DADLER32_UNROLL_LESS -DCRC32_UNROLL_LESS -DX86_CPUID"
# Enable arch-specific optimizations?
if test $without_optimizations -eq 0; then
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86.lo"
if test ${HAVE_SSE2_INTRIN} -eq 1; then
- CFLAGS="${CFLAGS} -UCHECK_SSE2 -DHAVE_SSE2"
- SFLAGS="${SFLAGS} -UCHECK_SSE2 -DHAVE_SSE2"
+ CFLAGS="${CFLAGS} -DX86_SSE2_FILL_WINDOW"
+ SFLAGS="${SFLAGS} -DX86_SSE2_FILL_WINDOW"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} fill_window_sse.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} fill_window_sse.lo"
fi
- CFLAGS="${CFLAGS} -DUSE_SSE4_2_CRC_HASH"
- SFLAGS="${SFLAGS} -DUSE_SSE4_2_CRC_HASH"
+ CFLAGS="${CFLAGS} -DX86_SSE4_2_CRC_HASH"
+ SFLAGS="${SFLAGS} -DX86_SSE4_2_CRC_HASH"
if test ${HAVE_PCLMULQDQ_INTRIN} -eq 1; then
- CFLAGS="${CFLAGS} -DHAVE_PCLMULQDQ"
- SFLAGS="${SFLAGS} -DHAVE_PCLMULQDQ"
+ CFLAGS="${CFLAGS} -DX86_PCLMULQDQ_CRC"
+ SFLAGS="${SFLAGS} -DX86_PCLMULQDQ_CRC"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc_folding.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc_folding.lo"
- fi
- fi
+ fi
- # Enable deflate_quick at level 1?
- if test $without_new_strategies -eq 0; then
- CFLAGS="${CFLAGS} -DUSE_QUICK -DUSE_MEDIUM"
- SFLAGS="${SFLAGS} -DUSE_QUICK -DUSE_MEDIUM"
+ # Enable deflate_quick at level 1?
+ if test $without_new_strategies -eq 0; then
+ CFLAGS="${CFLAGS} -DX86_QUICK_STRATEGY"
+ SFLAGS="${SFLAGS} -DX86_QUICK_STRATEGY"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} deflate_quick.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} deflate_quick.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} deflate_quick.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} deflate_quick.lo"
+ fi
fi
;;
esac
+
+
# show the results in the log
echo >> configure.log
echo ALL = $ALL >> configure.log
echo prefix = $prefix >> configure.log
echo sharedlibdir = $sharedlibdir >> configure.log
echo uname = $uname >> configure.log
-echo FILL_WINDOW_SSE_o = ${FILL_WINDOW_SSE_o} >> configure.log
-echo FILL_WINDOW_SSE_lo= ${FILL_WINDOW_SSE_lo} >> configure.log
-echo CRC_FOLDING_o = ${CRC_FOLDING_o} >> configure.log
-echo CRC_FOLDING_lo= ${CRC_FOLDING_lo} >> configure.log
-echo DEFLATE_QUICK_o=${DEFLATE_QUICK_o} >> configure.log
-echo DEFLATE_QUICK_lo=${DEFLATE_QUICK_lo} >> configure.log
+echo ARCHDIR = ${ARCHDIR} >> configure.log
+echo ARCH_STATIC_OBJS = ${ARCH_STATIC_OBJS} >> configure.log
+echo ARCH_SHARED_OBJS = ${ARCH_SHARED_OBJS} >> configure.log
# udpate Makefile with the configure results
sed < Makefile.in "
#include "deflate.h"
-#ifdef HAVE_PCLMULQDQ
+#ifdef X86_PCLMULQDQ_CRC
#include "arch/x86/x86.h"
extern void ZLIB_INTERNAL crc_fold_init(deflate_state *z_const s);
extern void ZLIB_INTERNAL crc_fold_copy(deflate_state *z_const s,
ZLIB_INTERNAL void crc_reset(deflate_state *const s)
{
-#ifdef HAVE_PCLMULQDQ
+#ifdef X86_PCLMULQDQ_CRC
if (x86_cpu_has_pclmulqdq) {
crc_fold_init(s);
return;
ZLIB_INTERNAL void crc_finalize(deflate_state *const s)
{
-#ifdef HAVE_PCLMULQDQ
+#ifdef X86_PCLMULQDQ_CRC
if (x86_cpu_has_pclmulqdq)
s->strm->adler = crc_fold_512to32(s);
#endif
ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size)
{
-#ifdef HAVE_PCLMULQDQ
+#ifdef X86_PCLMULQDQ_CRC
if (x86_cpu_has_pclmulqdq) {
crc_fold_copy(strm->state, dst, strm->next_in, size);
return;
#include "deflate.h"
-#if defined(CHECK_SSE2) || defined(USE_SSE4_2_CRC_HASH) || defined(USE_QUICK)
+#if defined(X86_CPUID)
#include "arch/x86/x86.h"
#endif
local const config configuration_table[10] = {
/* good lazy nice chain */
/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */
-#ifdef USE_QUICK
+
+#ifdef X86_QUICK_STRATEGY
/* 1 */ {4, 4, 8, 4, deflate_quick},
/* 2 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */
-/* 3 */ {4, 6, 32, 32, deflate_fast},
#else
/* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */
/* 2 */ {4, 5, 16, 8, deflate_fast},
-/* 3 */ {4, 6, 32, 32, deflate_fast},
#endif
-#ifdef USE_MEDIUM
+/* 3 */ {4, 6, 32, 32, deflate_fast},
+
+#ifdef MEDIUM_STRATEGY
/* 4 */ {4, 4, 16, 16, deflate_medium}, /* lazy matches */
/* 5 */ {8, 16, 32, 32, deflate_medium},
/* 6 */ {8, 16, 128, 128, deflate_medium},
* input characters and the first MIN_MATCH bytes of str are valid
* (except for the last MIN_MATCH-1 bytes of the input file).
*/
-#ifdef USE_SSE4_2_CRC_HASH
+#ifdef X86_SSE4_2_CRC_HASH
local inline Pos insert_string_sse(deflate_state *z_const s, z_const Pos str)
{
Pos ret;
local inline Pos insert_string(deflate_state *z_const s, z_const Pos str)
{
-#ifdef USE_SSE4_2_CRC_HASH
+#ifdef X86_SSE4_2_CRC_HASH
if (x86_cpu_has_sse42)
return insert_string_sse(s, str);
#endif
* output size for (length,distance) codes is <= 24 bits.
*/
-#if defined(CHECK_SSE2) || defined(USE_SSE4_2_CRC_HASH)
+#if defined(X86_SSE2_FILL_WINDOW) || defined(X86_SSE4_2_CRC_HASH)
x86_check_features();
#endif
}
if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */
-#ifdef USE_QUICK
+#ifdef X86_QUICK_STRATEGY
if (level == 1)
windowBits = 13;
#endif
s->w_size = 1 << s->w_bits;
s->w_mask = s->w_size - 1;
-#ifdef USE_SSE4_2_CRC_HASH
+#ifdef X86_SSE4_2_CRC_HASH
if (x86_cpu_has_sse42)
s->hash_bits = 15;
else
s->hash_mask = s->hash_size - 1;
s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
-#ifdef HAVE_PCLMULQDQ
+#ifdef X86_PCLMULQDQ_CRC
window_padding = 8;
#endif
(flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
block_state bstate;
-#ifdef USE_QUICK
+#ifdef X86_QUICK_STRATEGY
if (s->level == 1 && !x86_cpu_has_sse42)
bstate = s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) :
(s->strategy == Z_RLE ? deflate_rle(s, flush) :
* performed for at least two bytes (required for the zip translate_eol
* option -- not supported here).
*/
-#ifdef HAVE_SSE2
+#ifdef X86_SSE2_FILL_WINDOW
extern void fill_window_sse(deflate_state *s);
#endif
local void fill_window_c(deflate_state *s);
local void fill_window(deflate_state *s)
{
-#ifdef HAVE_SSE2
-#ifdef CHECK_SSE2
+#ifdef X86_SSE2_FILL_WINDOW
+#ifndef X86_NOCHECK_SSE2
if (x86_cpu_has_sse2) {
#endif
fill_window_sse(s);
return;
-#ifdef CHECK_SSE2
- }
+#ifndef X86_NOCHECK_SSE2
+ }
#endif
#endif
}
-#ifdef USE_MEDIUM
+#ifdef MEDIUM_STRATEGY
#include "deflate_medium.c"
#endif
Byte method; /* can only be DEFLATED */
int last_flush; /* value of flush param for previous deflate call */
-#ifdef HAVE_PCLMULQDQ
+#ifdef X86_PCLMULQDQ_CRC
unsigned __attribute__((aligned(16))) crc0[4 * 5];
#endif
* input characters, so that a running hash key can be computed from the
* previous key instead of complete recalculation each time.
*/
-#ifdef USE_SSE4_2_CRC_HASH
+#ifdef X86_SSE4_2_CRC_HASH
#define UPDATE_HASH(s,h,i) (\
{\
if (s->level < 6) \