]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Allow bypassing runtime feature check of TZCNT instructions.
authorMika Lindqvist <postmaster@raasu.org>
Sun, 13 Mar 2022 15:12:42 +0000 (17:12 +0200)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Wed, 16 Mar 2022 10:43:09 +0000 (11:43 +0100)
* This avoids conditional branch when it's known at build time that TZCNT instructions are always supported

CMakeLists.txt
README.md
configure
fallback_builtins.h

index 440a7169682c4966e4dc56cfb72ea8830e43664f..60ccbe25bd09bf7d2a000bc64b173b29bce21355 100644 (file)
@@ -108,6 +108,7 @@ elseif(BASEARCH_S360_FOUND)
     option(WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z" OFF)
     option(WITH_CRC32_VX "Build with vectorized CRC32 on IBM Z" ON)
 elseif(BASEARCH_X86_FOUND)
+    option(FORCE_TZCNT "Always assume CPU is TZCNT capable" OFF)
     option(WITH_AVX2 "Build with AVX2" ON)
     option(WITH_AVX512 "Build with AVX512" ON)
     option(WITH_AVX512VNNI "Build with AVX512 VNNI extensions" ON)
@@ -792,6 +793,10 @@ if(WITH_OPTIM)
                 set(WITH_SSE4 OFF)
             endif()
         endif()
+        if(FORCE_TZCNT)
+            add_definitions(-DX86_NOCHECK_TZCNT)
+        endif()
+        add_feature_info(FORCE_TZCNT FORCE_TZCNT "Assume CPU is TZCNT capable")
         if(WITH_SSE2)
             check_sse2_intrinsics()
             if(HAVE_SSE2_INTRIN)
index 3c4fd00b6459bf750323fc74f84a09dbc892ffef..668c29cbb0d181ff27308c9f4072756ef83b4d29 100644 (file)
--- a/README.md
+++ b/README.md
@@ -196,6 +196,7 @@ Advanced Build Options
 | ZLIB_DUAL_LINK                  |                       | Dual link tests with system zlib                                    | OFF                    |
 | UNALIGNED_OK                    |                       | Allow unaligned reads                                               | ON (x86, arm)          |
 |                                 | --force-sse2          | Skip runtime check for SSE2 instructions (Always on for x86_64)     | OFF (x86)              |
+| FORCE_TZCNT                     | --force-tzcnt         | Skip runtime check for TZCNT instructions                           | OFF                    |
 | WITH_AVX2                       |                       | Build with AVX2 intrinsics                                          | ON                     |
 | WITH_AVX512                     |                       | Build with AVX512 intrinsics                                        | ON                     |
 | WITH_AVX512VNNI                 |                       | Build with AVX512VNNI intrinsics                                    | ON                     |
index 149ae732a3f6281dfdf522e5b417b8e72d36e6ae..bd237236d632d54f6e7a779304474613fb30f16a 100755 (executable)
--- a/configure
+++ b/configure
@@ -103,6 +103,7 @@ with_fuzzers=0
 floatabi=
 native=0
 forcesse2=0
+forcetzcnt=0
 # For CPUs that can benefit from AVX512, it seems GCC generates suboptimal
 # instruction scheduling unless you specify a reasonable -mtune= target
 avx512flag="-mavx512f -mavx512dq -mavx512bw -mavx512vl -mtune=cascadelake"
@@ -173,6 +174,7 @@ case "$1" in
       echo '    [--without-crc32-vx]        Build without vectorized CRC32 on IBM Z' | tee -a configure.log
       echo '    [--with-reduced-mem]        Reduced memory usage for special cases (reduces performance)' | tee -a configure.log
       echo '    [--force-sse2]              Assume SSE2 instructions are always available (disabled by default on x86, enabled on x86_64)' | tee -a configure.log
+      echo '    [--force-tzcnt]             Assume TZCNT instructions are always available (disabled by default)' | tee -a configure.log
       echo '    [--with-sanitizer]          Build with sanitizer (memory, address, undefined)' | tee -a configure.log
       echo '    [--with-fuzzers]            Build test/fuzz (disabled by default)' | tee -a configure.log
       echo '    [--native]                  Compiles with full instruction set supported on this host' | tee -a configure.log
@@ -206,6 +208,7 @@ case "$1" in
     --without-crc32-vx) buildcrc32vx=0; shift ;;
     --with-reduced-mem) reducedmem=1; shift ;;
     --force-sse2) forcesse2=1; shift ;;
+    --force-tzcnt) forcetzcnt=1; shift ;;
     -n | --native) native=1; shift ;;
     -a*=* | --archs=*) ARCHS=$(echo $1 | sed 's/.*=//'); shift ;;
     --sysconfdir=*) echo "ignored option: --sysconfdir" | tee -a configure.log; shift ;;
@@ -1589,6 +1592,11 @@ case "${ARCH}" in
                     fi
                 fi
             fi
+
+            if test $forcetzcnt -eq 1; then
+                CFLAGS="${CFLAGS} -DX86_NOCHECK_TZCNT"
+                SFLAGS="${SFLAGS} -DX86_NOCHECK_TZCNT"
+            fi
         fi
     ;;
 
index 8abec2fad72cd74d6e34c9f25cb936ade02064ae..c9fcceac3bf2b55ec1f06b8899b5951bfcebbfac 100644 (file)
@@ -14,7 +14,9 @@
  */
 static __forceinline unsigned long __builtin_ctz(uint32_t value) {
 #ifdef X86_FEATURES
+#  ifndef X86_NOCHECK_TZCNT
     if (x86_cpu_has_tzcnt)
+#  endif
         return _tzcnt_u32(value);
 #endif
     unsigned long trailing_zero;
@@ -29,7 +31,9 @@ static __forceinline unsigned long __builtin_ctz(uint32_t value) {
  */
 static __forceinline unsigned long long __builtin_ctzll(uint64_t value) {
 #ifdef X86_FEATURES
+#  ifndef X86_NOCHECK_TZCNT
     if (x86_cpu_has_tzcnt)
+#  endif
         return _tzcnt_u64(value);
 #endif
     unsigned long trailing_zero;