]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Don't build C-fallback functions that never get used on x86_64
authorHans Kristian Rosbach <hk-git@circlestorm.org>
Fri, 10 Oct 2025 12:52:21 +0000 (14:52 +0200)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Mon, 13 Oct 2025 20:02:01 +0000 (22:02 +0200)
.github/workflows/pkgcheck.yml
CMakeLists.txt
README.md
configure
functable.c
test/benchmarks/benchmark_slidehash.cc

index 58af3a51cb0a772710b11133dc64e6257998c9b2..3ec5461f84c51e024b0f76dcbd862207ecece0da 100644 (file)
@@ -137,7 +137,7 @@ jobs:
         CFLAGS: ${{ matrix.cflags }}
         CXXFLAGS: ${{ matrix.cxxflags }}
         CHOST: ${{ matrix.chost }}
-        CMAKE_ARGS: ${{ matrix.cmake-args }}
+        CMAKE_ARGS: ${{ matrix.cmake-args }} -DWITH_ALL_FALLBACKS=ON
         CONFIGURE_ARGS: ${{ matrix.configure-args }}
         LDFLAGS: ${{ matrix.ldflags }}
 
@@ -147,7 +147,7 @@ jobs:
         CC: ${{ matrix.compiler }}
         CFLAGS: ${{ matrix.cflags }}
         CHOST: ${{ matrix.chost }}
-        CMAKE_ARGS: ${{ matrix.cmake-args }}
+        CMAKE_ARGS: ${{ matrix.cmake-args }} -DWITH_ALL_FALLBACKS=ON
         CONFIGURE_ARGS: ${{ matrix.configure-args }}
         LDFLAGS: ${{ matrix.ldflags }}
 
index 7a30ed2fea70d60fbaf389fad71953a539fb8fa4..31e8c691526cbdd15f717e2441b3515ace0b430b 100644 (file)
@@ -90,6 +90,7 @@ endif()
 option(WITH_GZFILEOP "Compile with support for gzFile related functions" ON)
 option(ZLIB_COMPAT "Compile with zlib compatible API" OFF)
 option(WITH_OPTIM "Build with optimisation" ON)
+option(WITH_ALL_FALLBACKS "Build all generic fallback functions (Useful for Gbench)" OFF)
 option(WITH_REDUCED_MEM "Reduced memory usage for special cases (reduces performance)" OFF)
 option(WITH_NEW_STRATEGIES "Use new strategies" ON)
 option(WITH_CRC32_CHORBA "Enable optimized CRC32 algorithm Chorba" ON)
@@ -151,6 +152,7 @@ mark_as_advanced(FORCE
     ZLIB_SYMBOL_PREFIX
     WITH_REDUCED_MEM
     WITH_CRC32_CHORBA
+    WITH_ALL_FALLBACKS
     WITH_ARMV8 WITH_NEON
     WITH_ARMV6
     WITH_DFLTCC_DEFLATE
@@ -713,6 +715,7 @@ else()
 endif()
 
 if(WITH_OPTIM)
+    add_definitions(-DWITH_OPTIM)
     if(BASEARCH_ARM_FOUND)
         add_definitions(-DARM_FEATURES)
         if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
@@ -1160,6 +1163,9 @@ if(WITH_OPTIM)
             endif()
         endif()
     endif()
+else()
+    # If WITH_OPTIM is disabled, we need all the fallbacks.
+    set(WITH_ALL_FALLBACKS ON)
 endif()
 
 message(STATUS "Architecture-specific source files: ${ZLIB_ARCH_SRCS}")
@@ -1267,14 +1273,6 @@ set(ZLIB_PRIVATE_HDRS
     zutil_p.h
 )
 set(ZLIB_SRCS
-    arch/generic/adler32_c.c
-    arch/generic/adler32_fold_c.c
-    arch/generic/chunkset_c.c
-    arch/generic/compare256_c.c
-    arch/generic/crc32_braid_c.c
-    arch/generic/crc32_c.c
-    arch/generic/crc32_fold_c.c
-    arch/generic/slide_hash_c.c
     adler32.c
     compress.c
     crc32.c
@@ -1298,6 +1296,39 @@ set(ZLIB_SRCS
     zutil.c
 )
 
+set(ZLIB_ALL_FALLBACK_SRCS
+    arch/generic/adler32_c.c
+    arch/generic/adler32_fold_c.c
+    arch/generic/chunkset_c.c
+    arch/generic/compare256_c.c
+    arch/generic/crc32_braid_c.c
+    arch/generic/crc32_c.c
+    arch/generic/crc32_fold_c.c
+    arch/generic/slide_hash_c.c
+)
+
+if(WITH_ALL_FALLBACKS)
+    list(APPEND ZLIB_GENERIC_SRCS ${ZLIB_ALL_FALLBACK_SRCS})
+    add_definitions(-DWITH_ALL_FALLBACKS)
+elseif(${ARCH} STREQUAL "x86_64" AND WITH_SSE2)
+    # x86_64 always has SSE2, so let the SSE2 functions act as fallbacks.
+    list(APPEND ZLIB_GENERIC_SRCS
+        arch/generic/adler32_c.c
+        arch/generic/adler32_fold_c.c
+        arch/generic/crc32_braid_c.c
+        arch/generic/crc32_c.c
+        arch/generic/crc32_fold_c.c
+    )
+
+    # x86_64 does not need compare256 fallback if we have BUILTIN_CTZ
+    if(NOT HAVE_BUILTIN_CTZ)
+        list(APPEND ZLIB_GENERIC_SRCS arch/generic/compare256_c.c)
+    endif()
+else()
+    list(APPEND ZLIB_GENERIC_SRCS ${ZLIB_ALL_FALLBACK_SRCS})
+    add_definitions(-DWITH_ALL_FALLBACKS)
+endif()
+
 if(WITH_CRC32_CHORBA)
     list(APPEND ZLIB_SRCS arch/generic/crc32_chorba_c.c)
 endif()
@@ -1316,7 +1347,7 @@ set(ZLIB_GZFILE_SRCS
     gzwrite.c
 )
 
-set(ZLIB_ALL_SRCS ${ZLIB_SRCS} ${ZLIB_ARCH_HDRS} ${ZLIB_ARCH_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
+set(ZLIB_ALL_SRCS ${ZLIB_GENERIC_SRCS} ${ZLIB_SRCS} ${ZLIB_ARCH_HDRS} ${ZLIB_ARCH_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
 if(WITH_GZFILEOP)
     list(APPEND ZLIB_ALL_SRCS ${ZLIB_GZFILE_PRIVATE_HDRS} ${ZLIB_GZFILE_SRCS})
 endif()
@@ -1542,6 +1573,7 @@ add_feature_info(WITH_GTEST WITH_GTEST "Build gtest_zlib")
 add_feature_info(WITH_FUZZERS WITH_FUZZERS "Build test/fuzz")
 add_feature_info(WITH_BENCHMARKS WITH_BENCHMARKS "Build test/benchmarks")
 add_feature_info(WITH_BENCHMARK_APPS WITH_BENCHMARK_APPS "Build application benchmarks")
+add_feature_info(WITH_ALL_FALLBACKS WITH_ALL_FALLBACKS "Build all generic fallback functions")
 add_feature_info(WITH_OPTIM WITH_OPTIM "Build with optimisation")
 add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies")
 add_feature_info(WITH_CRC32_CHORBA WITH_CRC32_CHORBA "Use optimized CRC32 algorithm Chorba")
index c0ddc62268608dd60ec5956ed83bc37cc887db19..81d967fb039ecd0f3dcf0e96e50ec9e43c635625 100644 (file)
--- a/README.md
+++ b/README.md
@@ -203,7 +203,7 @@ Advanced Build Options
 | WITH_SSE42                      |                       | Build with SSE42 intrinsics                                         | ON                     |
 | WITH_PCLMULQDQ                  |                       | Build with PCLMULQDQ intrinsics                                     | ON                     |
 | WITH_VPCLMULQDQ                 | --without-vpclmulqdq  | Build with VPCLMULQDQ intrinsics                                    | ON                     |
-| WITH_ARMV8                      | --without-armv8       | Build with ARMv8 intrinsics                                          | ON                     |
+| WITH_ARMV8                      | --without-armv8       | Build with ARMv8 intrinsics                                         | ON                     |
 | WITH_NEON                       | --without-neon        | Build with NEON intrinsics                                          | ON                     |
 | WITH_ARMV6                      | --without-armv6       | Build with ARMv6 intrinsics                                         | ON                     |
 | WITH_ALTIVEC                    | --without-altivec     | Build with AltiVec (VMX) intrinsics                                 | ON                     |
@@ -216,6 +216,7 @@ Advanced Build Options
 | WITH_INFLATE_ALLOW_INVALID_DIST |                       | Build with zero fill for inflate invalid distances                  | OFF                    |
 | INSTALL_UTILS                   |                       | Copy minigzip and minideflate during install                        | OFF                    |
 | ZLIBNG_ENABLE_TESTS             |                       | Test zlib-ng specific API                                           | ON                     |
+| WITH_ALL_FALLBACKS              |                       | Build with all c-fallbacks (useful for Gbench comparisons)          | OFF                    |
 
 
 Related Projects
index fcfc795d0b91a964350baa6d027dbf829f847437..9cec2d03742fecd22df11a4b526735b790501f9a 100755 (executable)
--- a/configure
+++ b/configure
@@ -1774,6 +1774,16 @@ if test $without_new_strategies -eq 1; then
     SFLAGS="${SFLAGS} -DNO_QUICK_STRATEGY -DNO_MEDIUM_STRATEGY"
 fi
 
+# CMake can exclude building some of the generic fallback functions,
+# configure does not have the detection code to do so.
+CFLAGS="${CFLAGS} -DWITH_ALL_FALLBACKS"
+SFLAGS="${SFLAGS} -DWITH_ALL_FALLBACKS"
+
+if test $without_optimizations -eq 0; then
+    CFLAGS="${CFLAGS} -DWITH_OPTIM"
+    SFLAGS="${SFLAGS} -DWITH_OPTIM"
+fi
+
 ARCHDIR='arch/generic'
 ARCH_STATIC_OBJS=''
 ARCH_SHARED_OBJS=''
index 1f8f52fd7cec66a07b9ca8bb90c5c4fc0063df21..4481fdb9df12ba8970c69f2703129a4a1ca7cb7f 100644 (file)
@@ -47,9 +47,26 @@ static void init_functable(void) {
     struct cpu_features cf;
 
     cpu_check_features(&cf);
-
-    // Generic code
     ft.force_init = &force_init_empty;
+
+    // Set up generic C code fallbacks
+#ifndef WITH_ALL_FALLBACKS
+#  if (defined(__x86_64__) || defined(_M_X64)) && defined(X86_SSE2)
+    // x86_64 always has SSE2, so we can use SSE2 functions as fallbacks where available.
+    ft.adler32 = &adler32_c;
+    ft.adler32_fold_copy = &adler32_fold_copy_c;
+    ft.crc32 = &crc32_c;
+    ft.crc32_fold = &crc32_fold_c;
+    ft.crc32_fold_copy = &crc32_fold_copy_c;
+    ft.crc32_fold_final = &crc32_fold_final_c;
+    ft.crc32_fold_reset = &crc32_fold_reset_c;
+#    ifndef HAVE_BUILTIN_CTZ
+    ft.longest_match = &longest_match_c;
+    ft.longest_match_slow = &longest_match_slow_c;
+    ft.compare256 = &compare256_c;
+#    endif
+#  endif
+#else // WITH_ALL_FALLBACKS
     ft.adler32 = &adler32_c;
     ft.adler32_fold_copy = &adler32_fold_copy_c;
     ft.chunkmemset_safe = &chunkmemset_safe_c;
@@ -63,8 +80,10 @@ static void init_functable(void) {
     ft.longest_match = &longest_match_c;
     ft.longest_match_slow = &longest_match_slow_c;
     ft.compare256 = &compare256_c;
+#endif
 
     // Select arch-optimized functions
+#ifdef WITH_OPTIM
 
     // X86 - SSE2
 #ifdef X86_SSE2
@@ -73,9 +92,9 @@ static void init_functable(void) {
 #  endif
     {
         ft.chunkmemset_safe = &chunkmemset_safe_sse2;
-#if !defined(WITHOUT_CHORBA) && !defined(NO_CHORBA_SSE)
+#  if !defined(WITHOUT_CHORBA) && !defined(NO_CHORBA_SSE)
         ft.crc32 = &crc32_chorba_sse2;
-#endif
+#  endif
         ft.inflate_fast = &inflate_fast_sse2;
         ft.slide_hash = &slide_hash_sse2;
 #  ifdef HAVE_BUILTIN_CTZ
@@ -301,6 +320,8 @@ static void init_functable(void) {
     }
 #endif
 
+#endif // WITH_OPTIM
+
     // Assign function pointers individually for atomic operation
     FUNCTABLE_ASSIGN(ft, force_init);
     FUNCTABLE_ASSIGN(ft, adler32);
index 6f3b1221fc6081fa80a8ce0d724ebf0d867a6ebd..4479a935b2437a5a655961757252dd85c32569df 100644 (file)
@@ -77,7 +77,9 @@ public:
     } \
     BENCHMARK_REGISTER_F(slide_hash, name)->RangeMultiplier(2)->Range(512, MAX_RANDOM_INTS);
 
+#if defined(WITH_ALL_FALLBACKS) || !defined(__x86_64__)
 BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 BENCHMARK_SLIDEHASH(native, native_slide_hash, 1);