]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Remove force-sse2 config option from x86 builds.
authorHans Kristian Rosbach <hk-git@circlestorm.org>
Fri, 10 Oct 2025 11:26:12 +0000 (13:26 +0200)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Sat, 11 Oct 2025 16:27:07 +0000 (18:27 +0200)
Due to major refactoring done long ago, this option no longer avoids a branch
in a hot path, it currently only removes a single if check during init.

CMakeLists.txt
README.md
arch/x86/x86_functions.h
configure
functable.c

index 2b1d52e537a7452496d99ff8ddfee551137cc1e9..7a30ed2fea70d60fbaf389fad71953a539fb8fa4 100644 (file)
@@ -1042,20 +1042,12 @@ if(WITH_OPTIM)
         endif()
         if(WITH_SSE2)
             check_sse2_intrinsics()
-            # FORCE_SSE2 option will only be shown if HAVE_SSE2_INTRIN is true
-            if("${ARCH}" MATCHES "i[3-6]86")
-                cmake_dependent_option(FORCE_SSE2 "Always assume CPU is SSE2 capable" OFF "HAVE_SSE2_INTRIN" OFF)
-            endif()
             if(HAVE_SSE2_INTRIN)
                 add_definitions(-DX86_SSE2)
                 set(SSE2_SRCS ${ARCHDIR}/chunkset_sse2.c ${ARCHDIR}/chorba_sse2.c ${ARCHDIR}/compare256_sse2.c ${ARCHDIR}/slide_hash_sse2.c)
                 list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
                 if(NOT ${ARCH} MATCHES "x86_64")
                     set_property(SOURCE ${SSE2_SRCS} PROPERTY COMPILE_FLAGS "${SSE2FLAG} ${NOLTOFLAG}")
-                    add_feature_info(FORCE_SSE2 FORCE_SSE2 "Assume CPU is SSE2 capable")
-                    if(FORCE_SSE2)
-                        add_definitions(-DX86_NOCHECK_SSE2)
-                    endif()
                 endif()
             else()
                 set(WITH_SSE2 OFF)
index 183935e3e8ecd197337533b78c725c925c39adf3..c0ddc62268608dd60ec5956ed83bc37cc887db19 100644 (file)
--- a/README.md
+++ b/README.md
@@ -195,7 +195,6 @@ Advanced Build Options
 
 | CMake                           | configure             | Description                                                         | Default                |
 |:--------------------------------|:----------------------|:--------------------------------------------------------------------|------------------------|
-| FORCE_SSE2                      | --force-sse2          | Skip runtime check for SSE2 instructions (Always on for x86_64)     | OFF (x86)              |
 | WITH_AVX2                       |                       | Build with AVX2 intrinsics                                          | ON                     |
 | WITH_AVX512                     |                       | Build with AVX512 intrinsics                                        | ON                     |
 | WITH_AVX512VNNI                 |                       | Build with AVX512VNNI intrinsics                                    | ON                     |
index ddb61b74e73c15733f4bd99cfe9c78e9894671e6..918b7e0f670f354321021d8e7478427aa16186e7 100644 (file)
@@ -91,7 +91,7 @@ uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 // X86 - SSE2
-#  if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64) || defined(X86_NOCHECK_SSE2)
+#  if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64)
 #    undef native_chunkmemset_safe
 #    define native_chunkmemset_safe chunkmemset_safe_sse2
 #    undef native_inflate_fast
index 0a7cbfcfa51baca44a60338acf66d02442dc7f75..fcfc795d0b91a964350baa6d027dbf829f847437 100755 (executable)
--- a/configure
+++ b/configure
@@ -106,7 +106,6 @@ builddfltccinflate=0
 buildcrc32vx=1
 buildcrc32la=1
 floatabi=
-forcesse2=0
 # For CPUs that can benefit from AVX512, it seems GCC generates suboptimal
 # instruction scheduling unless you specify a reasonable -mtune= target
 avx512flag="-mavx512f -mavx512dq -mavx512bw -mavx512vl -mbmi2"
@@ -190,7 +189,6 @@ case "$1" in
       echo '    [--with-dfltcc-inflate]     Use DEFLATE CONVERSION CALL instruction for decompression on IBM Z' | tee -a configure.log
       echo '    [--without-crc32-vx]        Build without vectorized CRC32 on IBM Z' | tee -a configure.log
       echo '    [--with-reduced-mem]        Reduced memory usage for special cases (reduces performance)' | tee -a configure.log
-      echo '    [--force-sse2]              Assume SSE2 instructions are always available (disabled by default on x86, enabled on x86_64)' | tee -a configure.log
         exit 0 ;;
     -p*=* | --prefix=*) prefix=$(echo $1 | sed 's/.*=//'); shift ;;
     -e*=* | --eprefix=*) exec_prefix=$(echo $1 | sed 's/.*=//'); shift ;;
@@ -229,7 +227,6 @@ case "$1" in
     --without-crc32-vx) buildcrc32vx=0; shift ;;
     --without-crc32-la) buildcrc32la=0; shift ;;
     --with-reduced-mem) reducedmem=1; shift ;;
-    --force-sse2) forcesse2=1; shift ;;
     -a*=* | --archs=*) ARCHS=$(echo $1 | sed 's/.*=//'); shift ;;
     --sysconfdir=*) echo "ignored option: --sysconfdir" | tee -a configure.log; shift ;;
     --localstatedir=*) echo "ignored option: --localstatedir" | tee -a configure.log; shift ;;
@@ -1811,11 +1808,6 @@ case "${ARCH}" in
                 SFLAGS="${SFLAGS} -DX86_SSE2"
                 ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} chunkset_sse2.o chorba_sse2.o compare256_sse2.o slide_hash_sse2.o"
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} chunkset_sse2.lo chorba_sse2.lo compare256_sse2.lo slide_hash_sse2.lo"
-
-                if test $forcesse2 -eq 1; then
-                    CFLAGS="${CFLAGS} -DX86_NOCHECK_SSE2"
-                    SFLAGS="${SFLAGS} -DX86_NOCHECK_SSE2"
-                fi
             fi
 
             check_ssse3_intrinsics
index 831a8a27bda9ea343883719e6364bb458d08fb5f..1f8f52fd7cec66a07b9ca8bb90c5c4fc0063df21 100644 (file)
@@ -68,7 +68,7 @@ static void init_functable(void) {
 
     // X86 - SSE2
 #ifdef X86_SSE2
-#  if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
+#  if !defined(__x86_64__) && !defined(_M_X64)
     if (cf.x86.has_sse2)
 #  endif
     {