add_definitions(-DHAVE_VISIBILITY_INTERNAL)
endif()
+#
+# check for __builtin_ctz() support in the compiler
+#
+check_c_source_compiles(
+ "int main(void)
+ {
+ unsigned int zero = 0;
+ long test = __builtin_ctz(zero);
+ (void)test;
+ return 0;
+ }"
+ HAVE_BUILTIN_CTZ
+)
+if(HAVE_BUILTIN_CTZ)
+ add_definitions(-DHAVE_BUILTIN_CTZ)
+endif()
#
# check for __builtin_ctzl() support in the compiler
#
if(HAVE_BUILTIN_CTZL)
add_definitions(-DHAVE_BUILTIN_CTZL)
endif()
+#
+# check for __builtin_ctzll() support in the compiler
+#
+check_c_source_compiles(
+ "int main(void)
+ {
+ unsigned int zero = 0;
+ long test = __builtin_ctzll(zero);
+ (void)test;
+ return 0;
+ }"
+ HAVE_BUILTIN_CTZLL
+)
+if(HAVE_BUILTIN_CTZLL)
+ add_definitions(-DHAVE_BUILTIN_CTZLL)
+endif()
#
# check for ptrdiff_t support
}"
HAVE_SSE42CRC_INTRIN
)
+ # Check whether compiler supports SSE4.2 compare string instrinics
+ check_c_source_compile_or_run(
+ "#include <immintrin.h>
+ int main(void)
+ {
+ unsigned char a[64] = { 0 };
+ unsigned char b[64] = { 0 };
+ __m128i xmm_src0, xmm_src1;
+ xmm_src0 = _mm_loadu_si128((__m128i *)(char *)a);
+ xmm_src1 = _mm_loadu_si128((__m128i *)(char *)b);
+ return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0);
+ }"
+ HAVE_SSE42CMPSTR_INTRIN
+ )
set(CMAKE_REQUIRED_FLAGS)
# Check whether compiler supports PCLMULQDQ intrinics
if("${ARCH}" MATCHES "arm" OR NOT WITH_NEON)
add_intrinsics_option("${ACLEFLAG}")
endif()
- add_feature_info(ACLE_CRC 1 "Support CRC hash generation using the ACLE instruction set, using \"${ACLEFLAG}\"")
+ add_feature_info(ACLE_CRC 1 "Support ACLE optimized CRC hash generation, using \"${ACLEFLAG}\"")
endif()
elseif(BASEARCH_S360_FOUND AND "${ARCH}" MATCHES "s390x")
if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE)
if(WITH_AVX2 AND HAVE_AVX2_INTRIN)
add_definitions(-DX86_AVX2)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/slide_avx.c)
- add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2-optimized slide_hash, using \"${AVX2FLAG}\"")
+ add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"")
add_intrinsics_option("${AVX2FLAG}")
endif()
if(WITH_SSE4 AND (HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN))
add_definitions(-DX86_SSE42_CRC_HASH)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/insert_string_sse.c)
- add_feature_info(SSE42_CRC 1 "Support CRC hash generation using the SSE4.2 instruction set, using \"${SSE4FLAG}\"")
+ add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE4FLAG}\"")
add_intrinsics_option("${SSE4FLAG}")
if(HAVE_SSE42CRC_INTRIN)
add_definitions(-DX86_SSE42_CRC_INTRIN)
if(WITH_NEW_STRATEGIES)
add_definitions(-DX86_QUICK_STRATEGY)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/deflate_quick.c)
- add_feature_info(SSE42_DEFLATE_QUICK 1 "Support SSE4.2-accelerated quick compression")
+ add_feature_info(SSE42_DEFLATE_QUICK 1 "Support SSE4.2 accelerated quick compression")
endif()
endif()
+ if(HAVE_SSE42CMPSTR_INTRIN)
+ add_definitions(-DX86_SSE42_CMP_STR)
+ list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/compare258_sse.c)
+ add_feature_info(SSE42_COMPARE258 1 "Support SSE4.2 optimized compare258, using \"${SSE4FLAG}\"")
+ endif()
if(WITH_SSE2 AND HAVE_SSE2_INTRIN)
add_definitions(-DX86_SSE2)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/slide_sse.c)
)
set(ZLIB_SRCS
adler32.c
+ compare258.c
compress.c
crc32.c
deflate.c
man3dir = ${mandir}/man3
pkgconfigdir = ${libdir}/pkgconfig
-OBJZ = adler32.o compress.o crc32.o deflate.o deflate_fast.o deflate_medium.o deflate_slow.o functable.o infback.o inffast.o inflate.o inftrees.o insert_string.o trees.o uncompr.o zutil.o $(ARCH_STATIC_OBJS)
+OBJZ = adler32.o compare258.o compress.o crc32.o deflate.o deflate_fast.o deflate_medium.o deflate_slow.o functable.o infback.o inffast.o inflate.o inftrees.o insert_string.o trees.o uncompr.o zutil.o $(ARCH_STATIC_OBJS)
OBJG = gzclose.o gzlib.o gzread.o gzwrite.o
OBJC = $(OBJZ) $(OBJG)
-PIC_OBJZ = adler32.lo compress.lo crc32.lo deflate.lo deflate_fast.lo deflate_medium.lo deflate_slow.lo functable.lo infback.lo inffast.lo inflate.lo inftrees.lo insert_string.lo trees.lo uncompr.lo zutil.lo $(ARCH_SHARED_OBJS)
+PIC_OBJZ = adler32.lo compare258.lo compress.lo crc32.lo deflate.lo deflate_fast.lo deflate_medium.lo deflate_slow.lo functable.lo infback.lo inffast.lo inflate.lo inftrees.lo insert_string.lo trees.lo uncompr.lo zutil.lo $(ARCH_SHARED_OBJS)
PIC_OBJG = gzclose.lo gzlib.lo gzread.lo gzwrite.lo
PIC_OBJC = $(PIC_OBJZ) $(PIC_OBJG)
SRCTOP=../..
TOPDIR=$(SRCTOP)
-all: x86.o x86.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo slide_avx.o slide_avx.lo slide_sse.o slide_sse.lo
+all: x86.o x86.lo compare258_sse.o compare258_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo slide_avx.o slide_avx.lo slide_sse.o slide_sse.lo
x86.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
x86.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
+compare258_sse.o:
+ $(CC) $(CFLAGS) $(SSE4FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_sse.c
+
+compare258_sse.lo:
+ $(CC) $(SFLAGS) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_sse.c
+
deflate_quick.o:
$(CC) $(CFLAGS) $(SSE4FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c
--- /dev/null
+/* compare258_sse.c -- SSE4.2 version of compare258
+ *
+ * Copyright (C) 2013 Intel Corporation. All rights reserved.
+ * Authors:
+ * Wajdi Feghali <wajdi.k.feghali@intel.com>
+ * Jim Guilford <james.guilford@intel.com>
+ * Vinodh Gopal <vinodh.gopal@intel.com>
+ * Erdinc Ozturk <erdinc.ozturk@intel.com>
+ * Jim Kukunas <james.t.kukunas@linux.intel.com>
+ *
+ * Portions are Copyright (C) 2016 12Sided Technology, LLC.
+ * Author:
+ * Phil Vachon <pvachon@12sidedtech.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "../../zbuild.h"
+#include "../../zutil.h"
+
+#ifdef X86_SSE42_CMP_STR
+
+#include <immintrin.h>
+#ifdef _MSC_VER
+# include <nmmintrin.h>
+#endif
+
+/* UNALIGNED_OK, SSE4.2 intrinsic comparison */
+int32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1) {
+#ifdef _MSC_VER
+ const unsigned char *src0start = src0;
+ const unsigned char *src0end = src0 + 256;
+
+ do {
+ #define mode _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_NEGATIVE_POLARITY
+ __m128i xmm_src0, xmm_src1;
+ int ret;
+
+ xmm_src0 = _mm_loadu_si128((__m128i *)src0);
+ xmm_src1 = _mm_loadu_si128((__m128i *)src1);
+ ret = _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode);
+ if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) {
+ return (int32_t)(src0 - src0start + ret);
+ }
+ src0 += 16, src1 += 16;
+
+ xmm_src0 = _mm_loadu_si128((__m128i *)src0);
+ xmm_src1 = _mm_loadu_si128((__m128i *)src1);
+ ret = _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode);
+ if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) {
+ return (int32_t)(src0 - src0start + ret);
+ }
+ src0 += 16, src1 += 16;
+ } while (src0 < src0end);
+
+ if (*(uint16_t *)src0 == *(uint16_t *)src1)
+ src0 += 2, src1 += 2;
+ else if (*src0 == *src1)
+ src0 += 1, src1 += 1;
+
+ return (int32_t)(src0 - src0start);
+#else
+ uintptr_t ax, dx, cx;
+ __m128i xmm_src0;
+
+ ax = 16;
+ dx = 16;
+ /* Set cx to something, otherwise gcc thinks it's used
+ uninitalised */
+ cx = 0;
+
+ __asm__ __volatile__ (
+ "1:"
+ "movdqu -16(%[src0], %[ax]), %[xmm_src0]\n\t"
+ "pcmpestri $0x18, -16(%[src1], %[ax]), %[xmm_src0]\n\t"
+ "jc 2f\n\t"
+ "add $16, %[ax]\n\t"
+
+ "movdqu -16(%[src0], %[ax]), %[xmm_src0]\n\t"
+ "pcmpestri $0x18, -16(%[src1], %[ax]), %[xmm_src0]\n\t"
+ "jc 2f\n\t"
+ "add $16, %[ax]\n\t"
+
+ "cmp $256 + 16, %[ax]\n\t"
+ "jb 1b\n\t"
+
+# if !defined(__x86_64__)
+ "movzwl -16(%[src0], %[ax]), %[dx]\n\t"
+# else
+ "movzwq -16(%[src0], %[ax]), %[dx]\n\t"
+# endif
+ "xorw -16(%[src1], %[ax]), %%dx\n\t"
+ "jnz 3f\n\t"
+
+ "add $2, %[ax]\n\t"
+ "jmp 4f\n\t"
+ "3:\n\t"
+ "rep; bsf %[dx], %[cx]\n\t"
+ "shr $3, %[cx]\n\t"
+ "2:"
+ "add %[cx], %[ax]\n\t"
+ "4:"
+ : [ax] "+a" (ax),
+ [cx] "+c" (cx),
+ [dx] "+d" (dx),
+ [xmm_src0] "=x" (xmm_src0)
+ : [src0] "r" (src0),
+ [src1] "r" (src1)
+ : "cc"
+ );
+ return (int32_t)(ax - 16);
+#endif
+}
+
+#endif
--- /dev/null
+/* compare258.c -- aligned and unaligned versions of compare258
+ * Copyright (C) 2020 Nathan Moinvaziri
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zbuild.h"
+#include "zutil.h"
+
+#include "fallback_builtins.h"
+
+/* ALIGNED, byte comparison */
+int32_t compare258_c(const unsigned char *src0, const unsigned char *src1) {
+ const unsigned char *src0start = src0;
+ const unsigned char *src0end = src0 + 258;
+
+ do {
+ if (*src0 != *src1)
+ break;
+ src0 += 1, src1 += 1;
+ if (*src0 != *src1)
+ break;
+ src0 += 1, src1 += 1;
+ if (*src0 != *src1)
+ break;
+ src0 += 1, src1 += 1;
+ if (*src0 != *src1)
+ break;
+ src0 += 1, src1 += 1;
+ if (*src0 != *src1)
+ break;
+ src0 += 1, src1 += 1;
+ if (*src0 != *src1)
+ break;
+ src0 += 1, src1 += 1;
+ } while (src0 < src0end);
+
+ return (int32_t)(src0 - src0start);
+}
+
+#ifdef UNALIGNED_OK
+/* UNALIGNED_OK, 16-bit integer comparison */
+int32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1) {
+ const unsigned char *src0start = src0;
+ const unsigned char *src0end = src0 + 258;
+
+ do {
+ if (*(uint16_t *)src0 != *(uint16_t *)src1)
+ break;
+ src0 += 2, src1 += 2;
+ if (*(uint16_t *)src0 != *(uint16_t *)src1)
+ break;
+ src0 += 2, src1 += 2;
+ if (*(uint16_t *)src0 != *(uint16_t *)src1)
+ break;
+ src0 += 2, src1 += 2;
+ } while (src0 < src0end);
+
+ if (*src0 == *src1)
+ src0 += 1;
+
+ return (int32_t)(src0 - src0start);
+}
+
+#ifdef HAVE_BUILTIN_CTZ
+/* UNALIGNED_OK, 32-bit integer comparison */
+int32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1) {
+ const unsigned char *src0start = src0;
+ const unsigned char *src0end = src0 + 256;
+
+ do {
+ uint32_t sv = *(uint32_t *)src0;
+ uint32_t mv = *(uint32_t *)src1;
+ uint32_t xor = sv ^ mv;
+
+ if (xor) {
+ uint32_t match_byte = __builtin_ctz(xor) / 8;
+ return (int32_t)(src0 - src0start + match_byte);
+ }
+
+ src0 += 4, src1 += 4;
+ } while (src0 < src0end);
+
+ if (*(uint16_t *)src0 == *(uint16_t *)src1)
+ src0 += 2, src1 += 2;
+ else if (*src0 == *src1)
+ src0 += 1, src1 += 1;
+
+ return (int32_t)(src0 - src0start);
+}
+
+#endif
+
+#ifdef HAVE_BUILTIN_CTZLL
+/* UNALIGNED_OK, 64-bit integer comparison */
+int32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1) {
+ const unsigned char *src0start = src0;
+ const unsigned char *src0end = src0 + 256;
+
+ do {
+ uint64_t sv = *(uint64_t *)src0;
+ uint64_t mv = *(uint64_t *)src1;
+ uint64_t xor = sv ^ mv;
+
+ if (xor) {
+ uint64_t match_byte = __builtin_ctzll(xor) / 8;
+ return (int32_t)(src0 - src0start + match_byte);
+ }
+
+ src0 += 8, src1 += 8;
+ } while (src0 < src0end);
+
+ if (*(uint16_t *)src0 == *(uint16_t *)src1)
+ src0 += 2, src1 += 2;
+ else if (*src0 == *src1)
+ src0 += 1, src1 += 1;
+
+ return (int32_t)(src0 - src0start);
+}
+
+#endif
+
+#endif
fi
fi
+# Check for __builtin_ctz() support in compiler
+cat > $test.c << EOF
+int main(void) {
+ unsigned int zero = 0;
+ long test = __builtin_ctz(zero);
+ (void)test;
+ return 0;
+}
+EOF
+if try ${CC} ${CFLAGS} $test.c $LDSHAREDLIBC; then
+ echo "Checking for __builtin_ctz ... Yes." | tee -a configure.log
+ CFLAGS="$CFLAGS -DHAVE_BUILTIN_CTZ"
+ SFLAGS="$SFLAGS -DHAVE_BUILTIN_CTZ"
+else
+ echo "Checking for __builtin_ctz ... No." | tee -a configure.log
+fi
+
# Check for __builtin_ctzl() support in compiler
cat > $test.c << EOF
int main(void) {
echo "Checking for __builtin_ctzl ... No." | tee -a configure.log
fi
+# Check for __builtin_ctzll() support in compiler
+cat > $test.c << EOF
+int main(void) {
+ unsigned long long zero = 0;
+ long test = __builtin_ctzll(zero);
+ (void)test;
+ return 0;
+}
+EOF
+if try ${CC} ${CFLAGS} $test.c $LDSHAREDLIBC; then
+ echo "Checking for __builtin_ctzll ... Yes." | tee -a configure.log
+ CFLAGS="$CFLAGS -DHAVE_BUILTIN_CTZLL"
+ SFLAGS="$SFLAGS -DHAVE_BUILTIN_CTZLL"
+else
+ echo "Checking for __builtin_ctzll ... No." | tee -a configure.log
+fi
+
# Check for SSE2 intrinsics
case "${ARCH}" in
i386 | i486 | i586 | i686)
;;
esac
+# Check for SSE4.2 compare string intrinsics
+case "${ARCH}" in
+ i386 | i486 | i586 | i686 | x86_64)
+ cat > $test.c << EOF
+#include <immintrin.h>
+int main(void)
+{
+ unsigned char a[64] = { 0 };
+ unsigned char b[64] = { 0 };
+ __m128i xmm_src0, xmm_src1;
+ xmm_src0 = _mm_loadu_si128((__m128i *)(char *)a);
+ xmm_src1 = _mm_loadu_si128((__m128i *)(char *)b);
+ return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0);
+}
+EOF
+ if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
+ echo "Checking for SSE4.2 compare string intrinsics ... Yes." | tee -a configure.log
+ HAVE_SSE42CMPSTR_INTRIN=1
+ else
+ echo "Checking for SSE4.2 compare string intrinsics ... No." | tee -a configure.log
+ HAVE_SSE42CMPSTR_INTRIN=0
+ fi
+ ;;
+esac
+
# Check for PCLMULQDQ intrinsics
case "${ARCH}" in
i386 | i486 | i586 | i686 | x86_64)
SFLAGS="${SFLAGS} -DX86_SSE42_CRC_INTRIN"
fi
+ if test ${HAVE_SSE42CMPSTR_INTRIN} -eq 1; then
+ CFLAGS="${CFLAGS} -DX86_SSE42_CMP_STR"
+ SFLAGS="${SFLAGS} -DX86_SSE42_CMP_STR"
+
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} compare258_sse.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} compare258_sse.lo"
+ fi
+
if test ${HAVE_AVX2_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_AVX2"
SFLAGS="${SFLAGS} -DX86_AVX2"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_avx.lo"
fi
+ if test ${HAVE_SSE42CMPSTR_INTRIN} -eq 1; then
+ CFLAGS="${CFLAGS} -DX86_SSE42_CMP_STR"
+ SFLAGS="${SFLAGS} -DX86_SSE42_CMP_STR"
+
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} compare258_sse.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} compare258_sse.lo"
+ fi
+
# Enable deflate_quick at level 1?
if test $without_new_strategies -eq 0; then
CFLAGS="${CFLAGS} -DX86_QUICK_STRATEGY"
-#ifndef X86_CTZL_H
-#define X86_CTZL_H
+#ifndef X86_BUILTIN_CTZ_H
+#define X86_BUILTIN_CTZ_H
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM64)
#include <intrin.h>
#ifdef X86_CPUID
# include "arch/x86/x86.h"
#endif
-#if defined(_MSC_VER) && !defined(__clang__)
+/* This is not a general purpose replacement for __builtin_ctz. The function expects that value is != 0
+ * Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked
+ */
+static __forceinline unsigned long __builtin_ctz(uint32_t value) {
+#ifdef X86_CPUID
+ if (x86_cpu_has_tzcnt)
+ return _tzcnt_u32(value);
+#endif
+ unsigned long trailing_zero;
+ _BitScanForward(&trailing_zero, value);
+ return trailing_zero;
+}
+#define HAVE_BUILTIN_CTZ
+
/* This is not a general purpose replacement for __builtin_ctzl. The function expects that value is != 0
* Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked
*/
_BitScanForward(&trailing_zero, value);
return trailing_zero;
}
+
+#ifdef _M_AMD64
+/* This is not a general purpose replacement for __builtin_ctzll. The function expects that value is != 0
+ * Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward64 is not checked
+ */
+static __forceinline unsigned long long __builtin_ctzll(uint64_t value) {
+#ifdef X86_CPUID
+ if (x86_cpu_has_tzcnt)
+ return _tzcnt_u64(value);
+#endif
+ unsigned long trailing_zero;
+ _BitScanForward64(&trailing_zero, value);
+ return trailing_zero;
+}
+#define HAVE_BUILTIN_CTZLL
#endif
#endif
+#endif
+#endif
#include "deflate_p.h"
#include "functable.h"
+
+#ifdef X86_CPUID
+# include "fallback_builtins.h"
+#endif
+
/* insert_string */
extern Pos insert_string_c(deflate_state *const s, const Pos str, unsigned int count);
#ifdef X86_SSE42_CRC_HASH
extern uint32_t crc32_big(uint32_t, const unsigned char *, uint64_t);
#endif
+/* compare258 */
+extern int32_t compare258_c(const unsigned char *src0, const unsigned char *src1);
+#ifdef UNALIGNED_OK
+extern int32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1);
+extern int32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1);
+extern int32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1);
+#ifdef X86_SSE42_CMP_STR
+extern int32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1);
+#endif
+#endif
/* stub definitions */
ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsigned int count);
ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len);
ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len);
ZLIB_INTERNAL void slide_hash_stub(deflate_state *s);
+ZLIB_INTERNAL int32_t compare258_stub(const unsigned char *src0, const unsigned char *src1);
/* functable init */
ZLIB_INTERNAL __thread struct functable_s functable = {
quick_insert_string_stub,
adler32_stub,
crc32_stub,
- slide_hash_stub
+ slide_hash_stub,
+ compare258_stub
};
ZLIB_INTERNAL void cpu_check_features(void)
return functable.crc32(crc, buf, len);
}
+
+ZLIB_INTERNAL int32_t compare258_stub(const unsigned char *src0, const unsigned char *src1) {
+
+ functable.compare258 = &compare258_c;
+
+#ifdef UNALIGNED_OK
+# ifdef HAVE_BUILTIN_CTZLL
+ functable.compare258 = &compare258_unaligned_64;
+# elif defined(HAVE_BUILTIN_CTZ)
+ functable.compare258 = &compare258_unaligned_32;
+# else
+ functable.compare258 = &compare258_unaligned_16;
+# endif
+# ifdef X86_SSE42_CMP_STR
+ if (x86_cpu_has_sse42)
+ functable.compare258 = &compare258_unaligned_sse4;
+# endif
+#endif
+
+ return functable.compare258(src0, src1);
+}
+
uint32_t (* adler32) (uint32_t adler, const unsigned char *buf, size_t len);
uint32_t (* crc32) (uint32_t crc, const unsigned char *buf, uint64_t len);
void (* slide_hash) (deflate_state *s);
+ int32_t (* compare258) (const unsigned char *src0, const unsigned char *src1);
};
ZLIB_INTERNAL extern __thread struct functable_s functable;
ZLIB_COMPAT =
SUFFIX =
-OBJS = adler32.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_quick.obj deflate_slow.obj \
- deflate_medium.obj \
+OBJS = adler32.obj compare258.obj compare258_sse.obj compress.obj crc32.obj \
+ deflate.obj deflate_fast.obj deflate_quick.obj deflate_slow.obj deflate_medium.obj \
functable.obj infback.obj inflate.obj inftrees.obj inffast.obj insert_string.obj \
slide_avx.obj slide_sse.obj trees.obj uncompr.obj zutil.obj \
x86.obj insert_string_sse.obj crc_folding.obj