From: Mika Lindqvist Date: Tue, 3 May 2016 18:03:54 +0000 (+0300) Subject: MSVC: Use _tzcnt_u32() if available. X-Git-Tag: 1.9.9-b1~694^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5bbf5a6eeefa1c693f96f51878c1ea5394c0756d;p=thirdparty%2Fzlib-ng.git MSVC: Use _tzcnt_u32() if available. --- diff --git a/arch/x86/x86.c b/arch/x86/x86.c index c7302412d..cd933745f 100644 --- a/arch/x86/x86.c +++ b/arch/x86/x86.c @@ -20,6 +20,7 @@ ZLIB_INTERNAL int x86_cpu_has_sse2; ZLIB_INTERNAL int x86_cpu_has_sse42; ZLIB_INTERNAL int x86_cpu_has_pclmulqdq; +ZLIB_INTERNAL int x86_cpu_has_tzcnt; static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) { #ifdef _MSC_VER @@ -50,4 +51,8 @@ void ZLIB_INTERNAL x86_check_features(void) { x86_cpu_has_sse2 = edx & 0x4000000; x86_cpu_has_sse42 = ecx & 0x100000; x86_cpu_has_pclmulqdq = ecx & 0x2; + + cpuid(7, &eax, &ebx, &ecx, &edx); + + x86_cpu_has_tzcnt = ecx & 0x8; } diff --git a/arch/x86/x86.h b/arch/x86/x86.h index 78be0a661..9b06cc665 100644 --- a/arch/x86/x86.h +++ b/arch/x86/x86.h @@ -17,6 +17,7 @@ extern int x86_cpu_has_sse2; extern int x86_cpu_has_sse42; extern int x86_cpu_has_pclmulqdq; +extern int x86_cpu_has_tzcnt; void ZLIB_INTERNAL x86_check_features(void); diff --git a/configure b/configure index 8d8b5bdad..a4acdd3c6 100755 --- a/configure +++ b/configure @@ -733,11 +733,14 @@ case "${ARCH}" in ;; esac - CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNROLL_LESS -DX86_CPUID" - SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNROLL_LESS -DX86_CPUID" + CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNROLL_LESS" + SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNROLL_LESS" # Enable arch-specific optimizations? if test $without_optimizations -eq 0; then + CFLAGS="${CFLAGS} -DX86_CPUID" + SFLAGS="${SFLAGS} -DX86_CPUID" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} x86.o" ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86.lo" diff --git a/deflate.c b/deflate.c index 8ca21c523..a11b5ebda 100644 --- a/deflate.c +++ b/deflate.c @@ -236,7 +236,7 @@ int ZEXPORT deflateInit2_(z_stream *strm, int level, int method, int windowBits, * output size for (length,distance) codes is <= 24 bits. */ -#if defined(X86_SSE2_FILL_WINDOW) || defined(X86_SSE4_2_CRC_HASH) +#ifdef X86_CPUID x86_check_features(); #endif diff --git a/match.c b/match.c index 486acceb4..ce93132be 100644 --- a/match.c +++ b/match.c @@ -30,15 +30,9 @@ #if defined(_MSC_VER) && !defined(__clang__) #include -/* This is not a general purpose replacement for __builtin_ctzl. The function expects that value is != 0 - * Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked - */ -static __forceinline unsigned long __builtin_ctzl(unsigned long value) -{ - unsigned long trailing_zero; - _BitScanForward(&trailing_zero, value); - return trailing_zero; -} +# ifdef X86_CPUID +# include "arch/x86/x86.h" +# endif #endif @@ -283,6 +277,23 @@ ZLIB_INTERNAL unsigned longest_match(deflate_state *const s, IPos cur_match) { #endif #ifdef std3_longest_match + +#if defined(_MSC_VER) && !defined(__clang__) +/* This is not a general purpose replacement for __builtin_ctzl. The function expects that value is != 0 + * Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked + */ +static __forceinline unsigned long __builtin_ctzl(unsigned long value) +{ +#ifdef X86_CPUID + if (x86_cpu_has_tzcnt) + return _tzcnt_u32(value); +#endif + unsigned long trailing_zero; + _BitScanForward(&trailing_zero, value); + return trailing_zero; +} +#endif + /* longest_match() with minor change to improve performance (in terms of * execution time). *