From: Mika Lindqvist Date: Sun, 13 Mar 2022 15:12:42 +0000 (+0200) Subject: Allow bypassing runtime feature check of TZCNT instructions. X-Git-Tag: 2.0.7~79 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=10627e69df3360103bf132a97f7708e73d01122c;p=thirdparty%2Fzlib-ng.git Allow bypassing runtime feature check of TZCNT instructions. * This avoids conditional branch when it's known at build time that TZCNT instructions are always supported --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e5646d7..978ae2db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,6 +102,7 @@ elseif(BASEARCH_S360_FOUND) add_option(WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z" OFF) add_option(WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z" OFF) elseif(BASEARCH_X86_FOUND) + option(FORCE_TZCNT "Always assume CPU is TZCNT capable" OFF) add_option(WITH_AVX2 "Build with AVX2" ON) add_option(WITH_SSE2 "Build with SSE2" ON) add_option(WITH_SSSE3 "Build with SSSE3" ON) @@ -821,13 +822,10 @@ if(WITH_OPTIM) endif() endif() endif() - if(WITH_SSSE3 AND HAVE_SSSE3_INTRIN) - add_definitions(-DX86_SSSE3 -DX86_SSSE3_ADLER32) - set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c) - add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"") - list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS}) - set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}") + if(FORCE_TZCNT) + add_definitions(-DX86_NOCHECK_TZCNT) endif() + add_feature_info(FORCE_TZCNT FORCE_TZCNT "Assume CPU is TZCNT capable") if(WITH_PCLMULQDQ AND HAVE_PCLMULQDQ_INTRIN AND WITH_SSSE3 AND WITH_SSE4) add_definitions(-DX86_PCLMULQDQ_CRC) set(PCLMULQDQ_SRCS ${ARCHDIR}/crc_folding.c) diff --git a/README.md b/README.md index 8528f284..a89c8b70 100644 --- a/README.md +++ b/README.md @@ -195,6 +195,7 @@ Advanced Build Options | ZLIB_DUAL_LINK | | Dual link tests with system zlib | OFF | | UNALIGNED_OK | | Allow unaligned reads | ON (x86, arm) | | | --force-sse2 | Skip runtime check for SSE2 instructions (Always on for x86_64) | OFF (x86) | +| FORCE_TZCNT | --force-tzcnt | Skip runtime check for TZCNT instructions | OFF | | WITH_AVX2 | | Build with AVX2 intrinsics | ON | | WITH_SSE2 | | Build with SSE2 intrinsics | ON | | WITH_SSE4 | | Build with SSE4 intrinsics | ON | diff --git a/configure b/configure index afc07f94..3712476f 100755 --- a/configure +++ b/configure @@ -99,6 +99,7 @@ with_fuzzers=0 floatabi= native=0 forcesse2=0 +forcetzcnt=0 avx2flag="-mavx2" sse2flag="-msse2" ssse3flag="-mssse3" @@ -155,6 +156,7 @@ case "$1" in echo ' [--with-dfltcc-deflate] Use DEFLATE CONVERSION CALL instruction for compression on IBM Z' | tee -a configure.log echo ' [--with-dfltcc-inflate] Use DEFLATE CONVERSION CALL instruction for decompression on IBM Z' | tee -a configure.log echo ' [--force-sse2] Assume SSE2 instructions are always available (disabled by default on x86, enabled on x86_64)' | tee -a configure.log + echo ' [--force-tzcnt] Assume TZCNT instructions are always available (disabled by default)' | tee -a configure.log echo ' [--with-sanitizer] Build with sanitizer (memory, address, undefined)' | tee -a configure.log echo ' [--with-fuzzers] Build test/fuzz (disabled by default)' | tee -a configure.log echo ' [--native] Compiles with full instruction set supported on this host' | tee -a configure.log @@ -181,6 +183,7 @@ case "$1" in --with-dfltcc-deflate) builddfltccdeflate=1; shift ;; --with-dfltcc-inflate) builddfltccinflate=1; shift ;; --force-sse2) forcesse2=1; shift ;; + --force-tzcnt) forcetzcnt=1; shift ;; -n | --native) native=1; shift ;; -a*=* | --archs=*) ARCHS=`echo $1 | sed 's/.*=//'`; shift ;; --sysconfdir=*) echo "ignored option: --sysconfdir" | tee -a configure.log; shift ;; @@ -1282,6 +1285,11 @@ case "${ARCH}" in ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc_folding.o" ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc_folding.lo" fi + + if test $forcetzcnt -eq 1; then + CFLAGS="${CFLAGS} -DX86_NOCHECK_TZCNT" + SFLAGS="${SFLAGS} -DX86_NOCHECK_TZCNT" + fi fi ;; diff --git a/fallback_builtins.h b/fallback_builtins.h index 314ad326..afa5870a 100644 --- a/fallback_builtins.h +++ b/fallback_builtins.h @@ -14,7 +14,9 @@ */ static __forceinline unsigned long __builtin_ctz(uint32_t value) { #ifdef X86_FEATURES +# ifndef X86_NOCHECK_TZCNT if (x86_cpu_has_tzcnt) +# endif return _tzcnt_u32(value); #endif unsigned long trailing_zero; @@ -29,7 +31,9 @@ static __forceinline unsigned long __builtin_ctz(uint32_t value) { */ static __forceinline unsigned long long __builtin_ctzll(uint64_t value) { #ifdef X86_FEATURES +# ifndef X86_NOCHECK_TZCNT if (x86_cpu_has_tzcnt) +# endif return _tzcnt_u64(value); #endif unsigned long trailing_zero;