]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Use fallback defines for Chorba Scalar/SSE
authorNathan Moinvaziri <nathan@nathanm.com>
Wed, 18 Feb 2026 08:29:00 +0000 (00:29 -0800)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Wed, 22 Apr 2026 09:40:20 +0000 (11:40 +0200)
Gate Scalar and SSE chorba uniformly on CRC32_CHORBA_FALLBACK and
CRC32_CHORBA_SSE_FALLBACK across prototypes, dispatch, sources, tests
and benchmarks instead of spot-checking WITHOUT_CHORBA /
WITHOUT_CHORBA_SSE directly at each site.

Also move crc32_chorba_c.c into ZLIB_GENERIC_SRCS and align Makefile.in
to match so the CMake and autotools builds stay bit-identical.

CMakeLists.txt
Makefile.in
arch/x86/crc32_chorba_sse2.c
arch/x86/crc32_chorba_sse41.c
arch/x86/x86_functions.h
functable.c
test/benchmarks/benchmark_crc32.cc
test/benchmarks/benchmark_crc32_copy.cc
test/test_crc32.cc
test/test_crc32_copy.cc

index c3d1704d23ef937ca049d9f080e54debd1bbb75e..307159db9783d233496cb31b815d6e3a3f717caf 100644 (file)
@@ -1298,6 +1298,7 @@ set(ZLIB_GENERIC_SRCS
     arch/generic/chunkset_c.c
     arch/generic/compare256_c.c
     arch/generic/crc32_braid_c.c
+    arch/generic/crc32_chorba_c.c
     arch/generic/slide_hash_c.c
 )
 
@@ -1305,10 +1306,6 @@ if(WITH_ALL_FALLBACKS)
     add_definitions(-DWITH_ALL_FALLBACKS)
 endif()
 
-if(WITH_CRC32_CHORBA)
-    list(APPEND ZLIB_SRCS arch/generic/crc32_chorba_c.c)
-endif()
-
 if(WITH_RUNTIME_CPU_DETECTION)
     list(APPEND ZLIB_PRIVATE_HDRS cpu_features.h)
     list(APPEND ZLIB_SRCS cpu_features.c)
index 2f2b92b8721ca1fa07ba48b2383df1e7e9fa14d5..2bd9673eb0f4959259738f667268277f66829329 100644 (file)
@@ -79,6 +79,7 @@ OBJZ = \
        arch/generic/chunkset_c.o \
        arch/generic/compare256_c.o \
        arch/generic/crc32_braid_c.o \
+       arch/generic/crc32_chorba_c.o \
        arch/generic/slide_hash_c.o \
        adler32.o \
        compress.o \
@@ -100,7 +101,6 @@ OBJZ = \
        trees.o \
        uncompr.o \
        zutil.o \
-       arch/generic/crc32_chorba_c.o \
        cpu_features.o \
        $(ARCH_STATIC_OBJS)
 
@@ -117,6 +117,7 @@ PIC_OBJZ = \
        arch/generic/chunkset_c.lo \
        arch/generic/compare256_c.lo \
        arch/generic/crc32_braid_c.lo \
+       arch/generic/crc32_chorba_c.lo \
        arch/generic/slide_hash_c.lo \
        adler32.lo \
        compress.lo \
@@ -138,7 +139,6 @@ PIC_OBJZ = \
        trees.lo \
        uncompr.lo \
        zutil.lo \
-       arch/generic/crc32_chorba_c.lo \
        cpu_features.lo \
        $(ARCH_SHARED_OBJS)
 
index 5fbdbc6efb30d7771f1b0f7c4f53a83a77a0d4c4..c30581ea481f3c0e32b7f4de0713934b188345a5 100644 (file)
@@ -1,7 +1,7 @@
 #include "zbuild.h"
 #include "arch_functions.h"
 
-#if defined(X86_SSE2) && !defined(WITHOUT_CHORBA_SSE) && defined(CRC32_CHORBA_FALLBACK)
+#if defined(X86_SSE2) && defined(CRC32_CHORBA_SSE_FALLBACK)
 
 #include "crc32_chorba_p.h"
 #include "crc32_braid_p.h"
@@ -328,7 +328,7 @@ Z_INTERNAL uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t l
         len -= align_diff;
         buf += align_diff;
     }
-#if !defined(WITHOUT_CHORBA)
+#ifdef CRC32_CHORBA_FALLBACK
     if (len > CHORBA_LARGE_THRESHOLD)
         return crc32_chorba_118960_nondestructive(crc, buf, len);
 #endif
index d8cadc93a10e47d76f924a5edf437ddb088c1402..32e9e36ef0c146a83a21e52320617d29ad19f27a 100644 (file)
@@ -1,7 +1,7 @@
 #include "zbuild.h"
 #include "arch_functions.h"
 
-#if defined(X86_SSE41) && !defined(WITHOUT_CHORBA_SSE) && defined(CRC32_CHORBA_FALLBACK)
+#if defined(X86_SSE41) && defined(CRC32_CHORBA_SSE_FALLBACK)
 
 #include "crc32_chorba_p.h"
 #include "crc32_braid_p.h"
@@ -322,7 +322,7 @@ Z_INTERNAL uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t
         len -= align_diff;
         buf += align_diff;
     }
-#if !defined(WITHOUT_CHORBA)
+#ifdef CRC32_CHORBA_FALLBACK
     if (len > CHORBA_LARGE_THRESHOLD)
         return crc32_chorba_118960_nondestructive(crc, buf, len);
 #endif
index 7e266af1172f6a5c6ea9cd3eb18a0c853a960411..cf28c6ac0883c7ce5d3e138e1bc33b1fba024984 100644 (file)
@@ -8,6 +8,13 @@
 
 #include "x86_natives.h"
 
+#if !defined(X86_PCLMULQDQ_NATIVE) && !defined(X86_VPCLMULQDQ_NATIVE)
+#  define CRC32_BRAID_FALLBACK
+#  ifndef WITHOUT_CHORBA_SSE
+#    define CRC32_CHORBA_SSE_FALLBACK
+#  endif
+#endif
+
 #ifdef X86_SSE2
 uint8_t* chunkmemset_safe_sse2(uint8_t *out, uint8_t *from, size_t len, size_t left);
 uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1);
@@ -16,7 +23,7 @@ uint32_t longest_match_sse2(deflate_state *const s, uint32_t cur_match);
 uint32_t longest_match_roll_sse2(deflate_state *const s, uint32_t cur_match);
 void slide_hash_sse2(deflate_state *s);
 
-#  if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE)
+#  ifdef CRC32_CHORBA_SSE_FALLBACK
     uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t len);
     uint32_t crc32_copy_chorba_sse2(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
     uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint8_t *buf, size_t len);
@@ -41,7 +48,7 @@ void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
 #endif
 
 #if defined(X86_SSE41)
-#  if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE)
+#  ifdef CRC32_CHORBA_SSE_FALLBACK
     uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t len);
     uint32_t crc32_copy_chorba_sse41(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 #  endif
@@ -88,10 +95,6 @@ uint32_t crc32_vpclmulqdq_avx512(uint32_t crc, const uint8_t *buf, size_t len);
 uint32_t crc32_copy_vpclmulqdq_avx512(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
 
-#if !defined(X86_PCLMULQDQ_NATIVE) && !defined(X86_VPCLMULQDQ_NATIVE)
-#  define CRC32_BRAID_FALLBACK
-#endif
-
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 // X86 - SSE2
 #  ifdef X86_SSE2_NATIVE
@@ -105,7 +108,7 @@ uint32_t crc32_copy_vpclmulqdq_avx512(uint32_t crc, uint8_t *dst, const uint8_t
 #    define native_longest_match longest_match_sse2
 #    undef native_longest_match_roll
 #    define native_longest_match_roll longest_match_roll_sse2
-#    if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE)
+#    ifdef CRC32_CHORBA_SSE_FALLBACK
 #      undef native_crc32
 #      define native_crc32 crc32_chorba_sse2
 #      undef native_crc32_copy
@@ -127,7 +130,7 @@ uint32_t crc32_copy_vpclmulqdq_avx512(uint32_t crc, uint8_t *dst, const uint8_t
 #  endif
 // X86 - SSE4.1
 #  if defined(X86_SSE41_NATIVE)
-#    if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE)
+#    ifdef CRC32_CHORBA_SSE_FALLBACK
 #      undef native_crc32
 #      define native_crc32 crc32_chorba_sse41
 #      undef native_crc32_copy
index b2319a7aca149f9c9571ef4370cd63c3b30cdddf..449ab446253d74069c34e0ac14e8b27727237e85 100644 (file)
@@ -120,7 +120,7 @@ static int init_functable(void) {
         ft.longest_match_roll = &longest_match_roll_sse2;
         ft.slide_hash = &slide_hash_sse2;
 #  endif
-#  if !defined(WITHOUT_CHORBA_SSE) && !defined(X86_PCLMULQDQ_NATIVE)
+#  if defined(CRC32_CHORBA_SSE_FALLBACK) && !defined(X86_SSE41_NATIVE) && !defined(X86_PCLMULQDQ_NATIVE)
         ft.crc32 = &crc32_chorba_sse2;
         ft.crc32_copy = &crc32_copy_chorba_sse2;
 #  endif
@@ -147,7 +147,7 @@ static int init_functable(void) {
     if (cf.x86.has_sse41)
 #  endif
     {
-#  ifndef WITHOUT_CHORBA_SSE
+#  ifdef CRC32_CHORBA_SSE_FALLBACK
         ft.crc32 = &crc32_chorba_sse41;
         ft.crc32_copy = &crc32_copy_chorba_sse41;
 #  endif
index b95f9520a8b18c20e6754d615551e8ed2f9a2085..bdf26071cbcd77934272f1d2eea1904a15347e61 100644 (file)
@@ -88,7 +88,7 @@ BENCHMARK_CRC32(chorba_c, crc32_chorba, 1);
 BENCHMARK_CRC32(native, native_crc32, 1);
 #else
 
-#if defined(CRC32_CHORBA_FALLBACK) && !defined(WITHOUT_CHORBA_SSE)
+#ifdef CRC32_CHORBA_SSE_FALLBACK
 #   ifdef X86_SSE2
     BENCHMARK_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2);
 #   endif
index 2df1f5710d0d3035323e111acf7d8661aea5e822..b3e3e270647adc250449d4a300de323e5290843b 100644 (file)
@@ -140,7 +140,7 @@ BENCHMARK_CRC32_COPY(chorba, crc32_chorba, crc32_copy_chorba, 1)
     BENCHMARK_CRC32_COPY(native, native_crc32, native_crc32_copy, 1)
 #else
     // Optimized functions
-#  if defined(CRC32_CHORBA_FALLBACK) && !defined(WITHOUT_CHORBA_SSE)
+#  ifdef CRC32_CHORBA_SSE_FALLBACK
 #    ifdef X86_SSE2
     BENCHMARK_CRC32_COPY(chorba_sse2, crc32_chorba_sse2, crc32_copy_chorba_sse2, test_cpu_features.x86.has_sse2);
 #    endif
index 3da7a34a39b048aca3c226e9deee09acb2b9c9aa..58861fed97224b091a68da4b15b9ff06b86df20d 100644 (file)
@@ -131,7 +131,7 @@ TEST_CRC32(vpclmulqdq_avx2, crc32_vpclmulqdq_avx2, (test_cpu_features.x86.has_pc
 #ifdef X86_VPCLMULQDQ_AVX512
 TEST_CRC32(vpclmulqdq_avx512, crc32_vpclmulqdq_avx512, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq))
 #endif
-#if defined(CRC32_CHORBA_FALLBACK) && !defined(WITHOUT_CHORBA_SSE)
+#ifdef CRC32_CHORBA_SSE_FALLBACK
 #   ifdef X86_SSE2
     TEST_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2)
 #   endif
index 9edc8f601ab105e7e99ef8f87261c7ef7e326cca..054eb4e06b1f5c662084cc5e74ff7a43e9014ecd 100644 (file)
@@ -52,7 +52,7 @@ TEST_CRC32_COPY(chorba, crc32_copy_chorba, 1)
     TEST_CRC32_COPY(native, native_crc32_copy, 1)
 #else
     // Optimized functions
-#  if defined(CRC32_CHORBA_FALLBACK) && !defined(WITHOUT_CHORBA_SSE)
+#  ifdef CRC32_CHORBA_SSE_FALLBACK
 #    ifdef X86_SSE2
     TEST_CRC32_COPY(chorba_sse2, crc32_copy_chorba_sse2, test_cpu_features.x86.has_sse2)
 #    endif