]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Allow bypassing runtime feature check of TZCNT instructions.
authorMika Lindqvist <postmaster@raasu.org>
Sun, 13 Mar 2022 15:12:42 +0000 (17:12 +0200)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Fri, 17 Mar 2023 20:27:56 +0000 (21:27 +0100)
* This avoids conditional branch when it's known at build time that TZCNT instructions are always supported

CMakeLists.txt
README.md
configure
fallback_builtins.h

index 8e5646d78ce1c20adf8440600d58b5dd8c86fc21..978ae2dbf51395fdc0bcd3db999794627febb42c 100644 (file)
@@ -102,6 +102,7 @@ elseif(BASEARCH_S360_FOUND)
     add_option(WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z" OFF)
     add_option(WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z" OFF)
 elseif(BASEARCH_X86_FOUND)
+    option(FORCE_TZCNT "Always assume CPU is TZCNT capable" OFF)
     add_option(WITH_AVX2 "Build with AVX2" ON)
     add_option(WITH_SSE2 "Build with SSE2" ON)
     add_option(WITH_SSSE3 "Build with SSSE3" ON)
@@ -821,13 +822,10 @@ if(WITH_OPTIM)
                 endif()
             endif()
         endif()
-        if(WITH_SSSE3 AND HAVE_SSSE3_INTRIN)
-            add_definitions(-DX86_SSSE3 -DX86_SSSE3_ADLER32)
-            set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c)
-            add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"")
-            list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS})
-            set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}")
+        if(FORCE_TZCNT)
+            add_definitions(-DX86_NOCHECK_TZCNT)
         endif()
+        add_feature_info(FORCE_TZCNT FORCE_TZCNT "Assume CPU is TZCNT capable")
         if(WITH_PCLMULQDQ AND HAVE_PCLMULQDQ_INTRIN AND WITH_SSSE3 AND WITH_SSE4)
             add_definitions(-DX86_PCLMULQDQ_CRC)
             set(PCLMULQDQ_SRCS ${ARCHDIR}/crc_folding.c)
index 8528f2848f1c90887e7f8fe4462a9eea2a51730e..a89c8b70836620e0016cad6c9fa5b50bd995b10b 100644 (file)
--- a/README.md
+++ b/README.md
@@ -195,6 +195,7 @@ Advanced Build Options
 | ZLIB_DUAL_LINK                  |                       | Dual link tests with system zlib                                    | OFF                    |
 | UNALIGNED_OK                    |                       | Allow unaligned reads                                               | ON (x86, arm)          |
 |                                 | --force-sse2          | Skip runtime check for SSE2 instructions (Always on for x86_64)     | OFF (x86)              |
+| FORCE_TZCNT                     | --force-tzcnt         | Skip runtime check for TZCNT instructions                           | OFF                    |
 | WITH_AVX2                       |                       | Build with AVX2 intrinsics                                          | ON                     |
 | WITH_SSE2                       |                       | Build with SSE2 intrinsics                                          | ON                     |
 | WITH_SSE4                       |                       | Build with SSE4 intrinsics                                          | ON                     |
index afc07f94b3fd254c35a3940a59487b6b0bc7d1ca..3712476f45e652b88312170fa47cf43765014276 100755 (executable)
--- a/configure
+++ b/configure
@@ -99,6 +99,7 @@ with_fuzzers=0
 floatabi=
 native=0
 forcesse2=0
+forcetzcnt=0
 avx2flag="-mavx2"
 sse2flag="-msse2"
 ssse3flag="-mssse3"
@@ -155,6 +156,7 @@ case "$1" in
       echo '    [--with-dfltcc-deflate]     Use DEFLATE CONVERSION CALL instruction for compression on IBM Z' | tee -a configure.log
       echo '    [--with-dfltcc-inflate]     Use DEFLATE CONVERSION CALL instruction for decompression on IBM Z' | tee -a configure.log
       echo '    [--force-sse2]              Assume SSE2 instructions are always available (disabled by default on x86, enabled on x86_64)' | tee -a configure.log
+      echo '    [--force-tzcnt]             Assume TZCNT instructions are always available (disabled by default)' | tee -a configure.log
       echo '    [--with-sanitizer]          Build with sanitizer (memory, address, undefined)' | tee -a configure.log
       echo '    [--with-fuzzers]            Build test/fuzz (disabled by default)' | tee -a configure.log
       echo '    [--native]                  Compiles with full instruction set supported on this host' | tee -a configure.log
@@ -181,6 +183,7 @@ case "$1" in
     --with-dfltcc-deflate) builddfltccdeflate=1; shift ;;
     --with-dfltcc-inflate) builddfltccinflate=1; shift ;;
     --force-sse2) forcesse2=1; shift ;;
+    --force-tzcnt) forcetzcnt=1; shift ;;
     -n | --native) native=1; shift ;;
     -a*=* | --archs=*) ARCHS=`echo $1 | sed 's/.*=//'`; shift ;;
     --sysconfdir=*) echo "ignored option: --sysconfdir" | tee -a configure.log; shift ;;
@@ -1282,6 +1285,11 @@ case "${ARCH}" in
                 ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc_folding.o"
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc_folding.lo"
             fi
+
+            if test $forcetzcnt -eq 1; then
+                CFLAGS="${CFLAGS} -DX86_NOCHECK_TZCNT"
+                SFLAGS="${SFLAGS} -DX86_NOCHECK_TZCNT"
+            fi
         fi
     ;;
 
index 314ad3267f2bb2abcf753f58ed169a7a7976c690..afa5870abfee21630c0c94e32dab7f90101a9720 100644 (file)
@@ -14,7 +14,9 @@
  */
 static __forceinline unsigned long __builtin_ctz(uint32_t value) {
 #ifdef X86_FEATURES
+#  ifndef X86_NOCHECK_TZCNT
     if (x86_cpu_has_tzcnt)
+#  endif
         return _tzcnt_u32(value);
 #endif
     unsigned long trailing_zero;
@@ -29,7 +31,9 @@ static __forceinline unsigned long __builtin_ctz(uint32_t value) {
  */
 static __forceinline unsigned long long __builtin_ctzll(uint64_t value) {
 #ifdef X86_FEATURES
+#  ifndef X86_NOCHECK_TZCNT
     if (x86_cpu_has_tzcnt)
+#  endif
         return _tzcnt_u64(value);
 #endif
     unsigned long trailing_zero;