]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
MSVC: Use _tzcnt_u32() if available.
authorMika Lindqvist <postmaster@raasu.org>
Tue, 3 May 2016 18:03:54 +0000 (21:03 +0300)
committerMika Lindqvist <postmaster@raasu.org>
Mon, 13 Feb 2017 11:18:47 +0000 (13:18 +0200)
arch/x86/x86.c
arch/x86/x86.h
configure
deflate.c
match.c

index c7302412ddfa8af67da8d75db54285b836dce3a5..cd933745f1edb9ab33cfb75d2b6fd964316f91dc 100644 (file)
@@ -20,6 +20,7 @@
 ZLIB_INTERNAL int x86_cpu_has_sse2;
 ZLIB_INTERNAL int x86_cpu_has_sse42;
 ZLIB_INTERNAL int x86_cpu_has_pclmulqdq;
+ZLIB_INTERNAL int x86_cpu_has_tzcnt;
 
 static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
 #ifdef _MSC_VER
@@ -50,4 +51,8 @@ void ZLIB_INTERNAL x86_check_features(void) {
        x86_cpu_has_sse2 = edx & 0x4000000;
        x86_cpu_has_sse42 = ecx & 0x100000;
        x86_cpu_has_pclmulqdq = ecx & 0x2;
+
+       cpuid(7, &eax, &ebx, &ecx, &edx);
+
+       x86_cpu_has_tzcnt = ecx & 0x8;
 }
index 78be0a661a13838958c266788c985f3767ca06d8..9b06cc6659804bfe90bc91b99d6c3bf46b26fcc9 100644 (file)
@@ -17,6 +17,7 @@
 extern int x86_cpu_has_sse2;
 extern int x86_cpu_has_sse42;
 extern int x86_cpu_has_pclmulqdq;
+extern int x86_cpu_has_tzcnt;
 
 void ZLIB_INTERNAL x86_check_features(void);
 
index 8d8b5bdaded6650064bf8a9dfc872c83b28b4118..a4acdd3c602f39457986f05f90acee6f00eab71a 100755 (executable)
--- a/configure
+++ b/configure
@@ -733,11 +733,14 @@ case "${ARCH}" in
             ;;
         esac
 
-        CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNROLL_LESS -DX86_CPUID"
-        SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNROLL_LESS -DX86_CPUID"
+        CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNROLL_LESS"
+        SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNROLL_LESS"
 
        # Enable arch-specific optimizations?
        if test $without_optimizations -eq 0; then
+            CFLAGS="${CFLAGS} -DX86_CPUID"
+            SFLAGS="${SFLAGS} -DX86_CPUID"
+
             ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} x86.o"
             ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86.lo"
 
index 8ca21c523449e94b59ae39fcb16f7af3b996fde8..a11b5ebda96d94f7131af1ffd144f86aedc21897 100644 (file)
--- a/deflate.c
+++ b/deflate.c
@@ -236,7 +236,7 @@ int ZEXPORT deflateInit2_(z_stream *strm, int level, int method, int windowBits,
      * output size for (length,distance) codes is <= 24 bits.
      */
 
-#if defined(X86_SSE2_FILL_WINDOW) || defined(X86_SSE4_2_CRC_HASH)
+#ifdef X86_CPUID
     x86_check_features();
 #endif
 
diff --git a/match.c b/match.c
index 486acceb4a86164b2470a766f8bdef45f41cfac8..ce93132be5121d418b50916db263999aec9837e5 100644 (file)
--- a/match.c
+++ b/match.c
 
 #if defined(_MSC_VER) && !defined(__clang__)
 #include <intrin.h>
-/* This is not a general purpose replacement for __builtin_ctzl. The function expects that value is != 0
- * Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked
- */
-static __forceinline unsigned long __builtin_ctzl(unsigned long value)
-{
-       unsigned long trailing_zero;
-       _BitScanForward(&trailing_zero, value);
-       return trailing_zero;
-}
+# ifdef X86_CPUID
+#  include "arch/x86/x86.h"
+# endif
 #endif
 
 
@@ -283,6 +277,23 @@ ZLIB_INTERNAL unsigned longest_match(deflate_state *const s, IPos cur_match) {
 #endif
 
 #ifdef std3_longest_match
+
+#if defined(_MSC_VER) && !defined(__clang__)
+/* This is not a general purpose replacement for __builtin_ctzl. The function expects that value is != 0
+ * Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked
+ */
+static __forceinline unsigned long __builtin_ctzl(unsigned long value)
+{
+#ifdef X86_CPUID
+       if (x86_cpu_has_tzcnt)
+               return _tzcnt_u32(value);
+#endif
+       unsigned long trailing_zero;
+       _BitScanForward(&trailing_zero, value);
+       return trailing_zero;
+}
+#endif
+
 /* longest_match() with minor change to improve performance (in terms of
  * execution time).
  *