]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Add fallback defines to skip generic C code when native intrinsics exist
authorNathan Moinvaziri <nathan@nathanm.com>
Tue, 10 Mar 2026 06:41:45 +0000 (23:41 -0700)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Wed, 15 Apr 2026 11:12:04 +0000 (13:12 +0200)
Each arch header now sets *_FALLBACK defines (ADLER32_FALLBACK,
CHUNKSET_FALLBACK, COMPARE256_FALLBACK, CRC32_BRAID_FALLBACK,
SLIDE_HASH_FALLBACK) when no native SIMD implementation exists.
Generic C source files, declarations, functable entries, tests,
and benchmarks are guarded by these defines.

29 files changed:
CMakeLists.txt
arch/arm/arm_functions.h
arch/generic/adler32_c.c
arch/generic/chunkset_c.c
arch/generic/compare256_c.c
arch/generic/crc32_braid_c.c
arch/generic/crc32_chorba_c.c
arch/generic/generic_functions.h
arch/generic/slide_hash_c.c
arch/loongarch/loongarch_functions.h
arch/power/power_functions.h
arch/riscv/riscv_functions.h
arch/s390/s390_functions.h
arch/x86/crc32_chorba_sse2.c
arch/x86/crc32_chorba_sse41.c
arch/x86/x86_functions.h
arch_functions.h
functable.c
test/benchmarks/benchmark_adler32.cc
test/benchmarks/benchmark_adler32_copy.cc
test/benchmarks/benchmark_compare256.cc
test/benchmarks/benchmark_crc32.cc
test/benchmarks/benchmark_crc32_copy.cc
test/benchmarks/benchmark_slidehash.cc
test/test_adler32.cc
test/test_adler32_copy.cc
test/test_compare256.cc
test/test_crc32.cc
test/test_crc32_copy.cc

index 81d7b0729164a78e2a13d389778777f94adab41b..ffd3c5bac46cd819d324b5e5201660f8002b3a54 100644 (file)
@@ -1297,7 +1297,7 @@ set(ZLIB_SRCS
     zutil.c
 )
 
-set(ZLIB_ALL_FALLBACK_SRCS
+set(ZLIB_GENERIC_SRCS
     arch/generic/adler32_c.c
     arch/generic/chunkset_c.c
     arch/generic/compare256_c.c
@@ -1306,16 +1306,6 @@ set(ZLIB_ALL_FALLBACK_SRCS
 )
 
 if(WITH_ALL_FALLBACKS)
-    list(APPEND ZLIB_GENERIC_SRCS ${ZLIB_ALL_FALLBACK_SRCS})
-    add_definitions(-DWITH_ALL_FALLBACKS)
-elseif(BASEARCH_X86_FOUND AND ARCH_64BIT AND WITH_SSE2)
-    # x86_64 always has SSE2, so let the SSE2 functions act as fallbacks.
-    list(APPEND ZLIB_GENERIC_SRCS
-        arch/generic/adler32_c.c
-        arch/generic/crc32_braid_c.c
-    )
-else()
-    list(APPEND ZLIB_GENERIC_SRCS ${ZLIB_ALL_FALLBACK_SRCS})
     add_definitions(-DWITH_ALL_FALLBACKS)
 endif()
 
index bc77adb977d69e65d2d298857592cfd199071ec4..d5ebd1997c79f26e3cc3533e3ae6e2c06f8f7408 100644 (file)
@@ -18,15 +18,29 @@ uint32_t longest_match_slow_neon(deflate_state *const s, uint32_t cur_match);
 void slide_hash_neon(deflate_state *s);
 #endif
 
+#ifndef ARM_NEON_NATIVE
+#  define ADLER32_FALLBACK
+#  define CHUNKSET_FALLBACK
+#  define COMPARE256_FALLBACK
+#  ifndef ARM_SIMD_NATIVE
+#    define SLIDE_HASH_FALLBACK
+#  endif
+#endif
+
 #ifdef ARM_CRC32
 uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len);
 uint32_t crc32_copy_armv8(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
+
 #ifdef ARM_PMULL_EOR3
 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, const uint8_t *buf, size_t len);
 uint32_t crc32_copy_armv8_pmull_eor3(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
 
+#if !defined(ARM_CRC32_NATIVE) && !defined(ARM_PMULL_EOR3_NATIVE)
+#  define CRC32_BRAID_FALLBACK
+#endif
+
 #ifdef ARM_SIMD
 void slide_hash_armv6(deflate_state *s);
 #endif
index 84c946f452d37ba952f21e92491caa2901ac49a8..8abfcd2e60b86da7104cfbd66ab7465c9a39ff70 100644 (file)
@@ -4,6 +4,10 @@
  */
 
 #include "zbuild.h"
+#include "arch_functions.h"
+
+#ifdef ADLER32_FALLBACK
+
 #include "functable.h"
 #include "adler32_p.h"
 
@@ -53,3 +57,5 @@ Z_INTERNAL uint32_t adler32_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *
     memcpy(dst, src, len);
     return adler;
 }
+
+#endif /* ADLER32_FALLBACK */
index ff9b1cb5fb4a206f2507270365fae50c463ab60f..5cc66977e2714e0ff44153de134f8ea1541c06d0 100644 (file)
@@ -3,6 +3,10 @@
  */
 
 #include "zbuild.h"
+#include "arch_functions.h"
+
+#ifdef CHUNKSET_FALLBACK
+
 #include "zmemory.h"
 
 typedef uint64_t chunk_t;
@@ -38,3 +42,5 @@ static inline void storechunk(uint8_t *out, chunk_t *chunk) {
 #define INFLATE_FAST     inflate_fast_c
 
 #include "inffast_tpl.h"
+
+#endif /* CHUNKSET_FALLBACK */
index a2b47751e5404ea8f86a129205d239feba4db616..e0b0165561584a485153ed5cf9e5960baeb3bbd9 100644 (file)
@@ -4,6 +4,10 @@
  */
 
 #include "zbuild.h"
+#include "arch_functions.h"
+
+#ifdef COMPARE256_FALLBACK
+
 #include "zendian.h"
 #include "deflate.h"
 #include "fallback_builtins.h"
@@ -64,7 +68,6 @@ static inline uint32_t compare256_64_static(const uint8_t *src0, const uint8_t *
 #  define COMPARE256 compare256_64_static
 #endif
 
-#ifdef WITH_ALL_FALLBACKS
 Z_INTERNAL uint32_t compare256_8(const uint8_t *src0, const uint8_t *src1) {
     return compare256_8_static(src0, src1);
 }
@@ -72,7 +75,6 @@ Z_INTERNAL uint32_t compare256_8(const uint8_t *src0, const uint8_t *src1) {
 Z_INTERNAL uint32_t compare256_64(const uint8_t *src0, const uint8_t *src1) {
     return compare256_64_static(src0, src1);
 }
-#endif
 
 Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) {
     return COMPARE256(src0, src1);
@@ -86,3 +88,5 @@ Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) {
 #define LONGEST_MATCH_SLOW
 #define LONGEST_MATCH       longest_match_slow_c
 #include "match_tpl.h"
+
+#endif /* COMPARE256_FALLBACK */
index bda4a249bb401e4656c89d3bc59d86564ae08698..1e83543d135b2e63b98f5687776f4c599561cfee 100644 (file)
@@ -8,6 +8,11 @@
  */
 
 #include "zbuild.h"
+#include "arch_functions.h"
+
+/* Used by chorba fallback and by arch-specific implementations (s390 vx, risc-v zbc). */
+#ifdef CRC32_BRAID_FALLBACK
+
 #include "crc32_braid_p.h"
 #include "crc32_braid_tbl.h"
 #include "crc32_p.h"
@@ -211,3 +216,5 @@ Z_INTERNAL uint32_t crc32_copy_braid(uint32_t crc, uint8_t *dst, const uint8_t *
     memcpy(dst, src, len);
     return crc;
 }
+
+#endif /* CRC32_BRAID_FALLBACK */
index 9f8427bd1cdad40c5fe9b21e8aa5c5e0cb3fff92..ded968a8aa05cc63d2975ed2c07ff096fc0f9ffa 100644 (file)
@@ -1,5 +1,8 @@
 #include "zbuild.h"
-#include "zendian.h"
+#include "arch_functions.h"
+
+#ifdef CRC32_CHORBA_FALLBACK
+
 #if defined(__EMSCRIPTEN__)
 #  include "zutil_p.h"
 #endif
@@ -7,7 +10,6 @@
 #include "crc32_chorba_p.h"
 #include "crc32_braid_p.h"
 #include "crc32_braid_tbl.h"
-#include "generic_functions.h"
 
 /* Implement Chorba algorithm from https://arxiv.org/abs/2412.16398 */
 #define bitbuffer_size_bytes (16 * 1024 * sizeof(chorba_word_t))
@@ -1032,3 +1034,5 @@ uint32_t crc32_copy_chorba(uint32_t crc, uint8_t *dst, const uint8_t *src, size_
     memcpy(dst, src, len);
     return crc;
 }
+
+#endif /* CRC32_CHORBA_FALLBACK */
index c150a2f0106a7a644daa750ddc6add05bedc1104..0fcca560be106afb61f3d1f0a9ab86a171912911 100644 (file)
@@ -5,9 +5,6 @@
 #ifndef GENERIC_FUNCTIONS_H_
 #define GENERIC_FUNCTIONS_H_
 
-#include "zendian.h"
-#include "deflate.h"
-
 typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);
 typedef uint32_t (*adler32_copy_func)(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
 typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1);
@@ -15,50 +12,69 @@ typedef uint32_t (*crc32_func)(uint32_t crc, const uint8_t *buf, size_t len);
 typedef uint32_t (*crc32_copy_func)(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 typedef void     (*slide_hash_func)(deflate_state *s);
 
-
+#ifdef ADLER32_FALLBACK
 uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len);
 uint32_t adler32_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
-
+#endif
+#ifdef CHUNKSET_FALLBACK
 uint8_t* chunkmemset_safe_c(uint8_t *out, uint8_t *from, size_t len, size_t left);
-
-#ifdef WITH_ALL_FALLBACKS
+#endif
+#ifdef COMPARE256_FALLBACK
 uint32_t compare256_8(const uint8_t *src0, const uint8_t *src1);
 uint32_t compare256_64(const uint8_t *src0, const uint8_t *src1);
-#endif
 uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
+#endif
 
+#ifdef CRC32_BRAID_FALLBACK
 uint32_t crc32_braid(uint32_t crc, const uint8_t *buf, size_t len);
 uint32_t crc32_copy_braid(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
+#endif
 
-#ifndef WITHOUT_CHORBA
+/* Chorba is available whenever braid is needed as a fallback and hasn't been disabled. */
+#if defined(CRC32_BRAID_FALLBACK) && !defined(WITHOUT_CHORBA)
+#  define CRC32_CHORBA_FALLBACK
+#endif
+
+#ifdef CRC32_CHORBA_FALLBACK
   uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len);
   uint32_t crc32_copy_chorba(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
-
+#ifdef CHUNKSET_FALLBACK
 void     inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
-
+#endif
+#ifdef COMPARE256_FALLBACK
 uint32_t longest_match_c(deflate_state *const s, uint32_t cur_match);
 uint32_t longest_match_slow_c(deflate_state *const s, uint32_t cur_match);
-
+#endif
+#ifdef SLIDE_HASH_FALLBACK
 void     slide_hash_c(deflate_state *s);
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
-// Generic code
-#  define native_adler32 adler32_c
-#  define native_adler32_copy adler32_copy_c
-#  define native_chunkmemset_safe chunkmemset_safe_c
-#ifndef WITHOUT_CHORBA
-#  define native_crc32 crc32_chorba
-#  define native_crc32_copy crc32_copy_chorba
-#else
-#  define native_crc32 crc32_braid
-#  define native_crc32_copy crc32_copy_braid
-#endif
-#  define native_inflate_fast inflate_fast_c
-#  define native_slide_hash slide_hash_c
-#  define native_longest_match longest_match_c
-#  define native_longest_match_slow longest_match_slow_c
-#  define native_compare256 compare256_c
+// Generic fallbacks when no native implementation exists
+#  ifdef ADLER32_FALLBACK
+#    define native_adler32 adler32_c
+#    define native_adler32_copy adler32_copy_c
+#  endif
+#  ifdef CHUNKSET_FALLBACK
+#    define native_chunkmemset_safe chunkmemset_safe_c
+#    define native_inflate_fast inflate_fast_c
+#  endif
+#  ifdef COMPARE256_FALLBACK
+#    define native_compare256 compare256_c
+#    define native_longest_match longest_match_c
+#    define native_longest_match_slow longest_match_slow_c
+#  endif
+#  ifdef CRC32_CHORBA_FALLBACK
+#    define native_crc32 crc32_chorba
+#    define native_crc32_copy crc32_copy_chorba
+#  elif defined(CRC32_BRAID_FALLBACK)
+#    define native_crc32 crc32_braid
+#    define native_crc32_copy crc32_copy_braid
+#  endif
+#  ifdef SLIDE_HASH_FALLBACK
+#    define native_slide_hash slide_hash_c
+#  endif
 #endif
 
 #endif
index 8345b9e36b85233f46fd6d666d7a761f1c496acc..8fdc478cbbfaa1105e2b12f6764d21557f4775f7 100644 (file)
@@ -5,6 +5,10 @@
  */
 
 #include "zbuild.h"
+#include "arch_functions.h"
+
+#ifdef SLIDE_HASH_FALLBACK
+
 #include "deflate.h"
 
 /* ===========================================================================
@@ -50,3 +54,5 @@ Z_INTERNAL void slide_hash_c(deflate_state *s) {
     slide_hash_c_chain(s->head, HASH_SIZE, wsize);
     slide_hash_c_chain(s->prev, wsize, wsize);
 }
+
+#endif /* SLIDE_HASH_FALLBACK */
index 0ec8bd66d79367ba124b784054579b92c9b0a17b..980ebca02cac2619d3835f2c8a4842d790e4b0ee 100644 (file)
@@ -15,6 +15,10 @@ uint32_t crc32_loongarch64(uint32_t crc, const uint8_t *buf, size_t len);
 uint32_t crc32_copy_loongarch64(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
 
+#ifndef LOONGARCH_CRC_NATIVE
+#  define CRC32_BRAID_FALLBACK
+#endif
+
 #ifdef LOONGARCH_LSX
 uint32_t adler32_lsx(uint32_t adler, const uint8_t *src, size_t len);
 uint32_t adler32_copy_lsx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
@@ -26,6 +30,13 @@ uint32_t longest_match_slow_lsx(deflate_state *const s, uint32_t cur_match);
 void slide_hash_lsx(deflate_state *s);
 #endif
 
+#ifndef LOONGARCH_LSX_NATIVE
+#  define ADLER32_FALLBACK
+#  define CHUNKSET_FALLBACK
+#  define COMPARE256_FALLBACK
+#  define SLIDE_HASH_FALLBACK
+#endif
+
 #ifdef LOONGARCH_LASX
 uint32_t adler32_lasx(uint32_t adler, const uint8_t *src, size_t len);
 uint32_t adler32_copy_lasx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
index ccc7754a4c7a8befcebc675c6eb06c569b95c72e..78bae4a4956af17f0d33965519e12b93961978bd 100644 (file)
@@ -25,12 +25,28 @@ void slide_hash_power8(deflate_state *s);
 void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start);
 #endif
 
+#if !defined(PPC_VMX_NATIVE) && !defined(POWER8_VSX_NATIVE)
+#  define ADLER32_FALLBACK
+#  define SLIDE_HASH_FALLBACK
+#endif
+
+#ifndef POWER8_VSX_NATIVE
+#  define CHUNKSET_FALLBACK
+#endif
+#ifndef POWER8_VSX_CRC32_NATIVE
+#  define CRC32_BRAID_FALLBACK
+#endif
+
 #ifdef POWER9
 uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1);
 uint32_t longest_match_power9(deflate_state *const s, uint32_t cur_match);
 uint32_t longest_match_slow_power9(deflate_state *const s, uint32_t cur_match);
 #endif
 
+#ifndef POWER9_NATIVE
+#  define COMPARE256_FALLBACK
+#endif
+
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 // Power - VMX
 #  ifdef PPC_VMX_NATIVE
index 89120ffabf2420ebbfc49e3785b7a68620f2ec7a..22f783c5ee351323fda32031c528b47b0b10e88b 100644 (file)
@@ -11,6 +11,8 @@
 
 #include "riscv_natives.h"
 
+#define CRC32_BRAID_FALLBACK  /* used by crc32_zbc */
+
 #ifdef RISCV_RVV
 uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len);
 uint32_t adler32_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
@@ -23,6 +25,13 @@ void slide_hash_rvv(deflate_state *s);
 void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
 #endif
 
+#ifndef RISCV_RVV_NATIVE
+#  define ADLER32_FALLBACK
+#  define CHUNKSET_FALLBACK
+#  define COMPARE256_FALLBACK
+#  define SLIDE_HASH_FALLBACK
+#endif
+
 #ifdef RISCV_CRC32_ZBC
 uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf, size_t len);
 uint32_t crc32_copy_riscv64_zbc(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
index d5308c8d0441314bee75dbd300969673e260c665..c0043a6e85879b7d16851c3aa5496bc66cc497a2 100644 (file)
@@ -7,6 +7,15 @@
 
 #include "s390_natives.h"
 
+#define ADLER32_FALLBACK
+#define CHUNKSET_FALLBACK
+#define COMPARE256_FALLBACK
+#define CRC32_BRAID_FALLBACK  /* used by crc32_s390_vx */
+
+#ifndef S390_VX_NATIVE
+#  define SLIDE_HASH_FALLBACK
+#endif
+
 #ifdef S390_VX
 uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len);
 uint32_t crc32_copy_s390_vx(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
index 93ec5d535ceffaf7062f8b089e7ca5788bf64a4e..5fbdbc6efb30d7771f1b0f7c4f53a83a77a0d4c4 100644 (file)
@@ -1,12 +1,13 @@
-#if defined(X86_SSE2) && !defined(WITHOUT_CHORBA_SSE)
-
 #include "zbuild.h"
+#include "arch_functions.h"
+
+#if defined(X86_SSE2) && !defined(WITHOUT_CHORBA_SSE) && defined(CRC32_CHORBA_FALLBACK)
+
 #include "crc32_chorba_p.h"
 #include "crc32_braid_p.h"
 #include "crc32_braid_tbl.h"
 #include <emmintrin.h>
 #include "arch/x86/x86_intrins.h"
-#include "arch_functions.h"
 
 #define LSHIFT_QWORD(x)     _mm_unpacklo_epi64(_mm_setzero_si128(), (x))
 #define RSHIFT_QWORD(x)     _mm_unpackhi_epi64((x), _mm_setzero_si128())
index a137c7a5698064619a6ba5557bcee326e2da2ec0..d8cadc93a10e47d76f924a5edf437ddb088c1402 100644 (file)
@@ -1,13 +1,14 @@
-#if defined(X86_SSE41) && !defined(WITHOUT_CHORBA_SSE)
-
 #include "zbuild.h"
+#include "arch_functions.h"
+
+#if defined(X86_SSE41) && !defined(WITHOUT_CHORBA_SSE) && defined(CRC32_CHORBA_FALLBACK)
+
 #include "crc32_chorba_p.h"
 #include "crc32_braid_p.h"
 #include "crc32_braid_tbl.h"
 #include <emmintrin.h>
 #include <smmintrin.h>
 #include "arch/x86/x86_intrins.h"
-#include "arch_functions.h"
 
 #define READ_NEXT(in, off, a, b) \
     do { \
index 881c6efe238b1906da2b11151480fc99e3015b3b..0bcbdaedadb24117acc1e29241dfeb8d205e96f9 100644 (file)
@@ -24,13 +24,19 @@ uint32_t longest_match_sse2(deflate_state *const s, uint32_t cur_match);
 uint32_t longest_match_slow_sse2(deflate_state *const s, uint32_t cur_match);
 void slide_hash_sse2(deflate_state *s);
 
-#  if !defined(WITHOUT_CHORBA_SSE)
+#  if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE)
     uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t len);
     uint32_t crc32_copy_chorba_sse2(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
     uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint8_t *buf, size_t len);
 #  endif
 #endif
 
+#ifndef X86_SSE2_NATIVE
+#  define CHUNKSET_FALLBACK
+#  define COMPARE256_FALLBACK
+#  define SLIDE_HASH_FALLBACK
+#endif
+
 #ifdef X86_SSSE3
 uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
 uint32_t adler32_copy_ssse3(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
@@ -38,9 +44,15 @@ uint8_t* chunkmemset_safe_ssse3(uint8_t *out, uint8_t *from, size_t len, size_t
 void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
 #endif
 
-#if defined(X86_SSE41) && !defined(WITHOUT_CHORBA_SSE)
+#ifndef X86_SSSE3_NATIVE
+#  define ADLER32_FALLBACK
+#endif
+
+#if defined(X86_SSE41)
+#  if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE)
     uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t len);
     uint32_t crc32_copy_chorba_sse41(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
+#  endif
 #endif
 
 #ifdef X86_SSE42
@@ -84,6 +96,10 @@ uint32_t crc32_vpclmulqdq_avx512(uint32_t crc, const uint8_t *buf, size_t len);
 uint32_t crc32_copy_vpclmulqdq_avx512(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
 
+#if !defined(X86_PCLMULQDQ_NATIVE) && !defined(X86_VPCLMULQDQ_NATIVE)
+#  define CRC32_BRAID_FALLBACK
+#endif
+
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 // X86 - SSE2
 #  ifdef X86_SSE2_NATIVE
@@ -97,7 +113,7 @@ uint32_t crc32_copy_vpclmulqdq_avx512(uint32_t crc, uint8_t *dst, const uint8_t
 #    define native_longest_match longest_match_sse2
 #    undef native_longest_match_slow
 #    define native_longest_match_slow longest_match_slow_sse2
-#    if !defined(WITHOUT_CHORBA_SSE)
+#    if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE)
 #      undef native_crc32
 #      define native_crc32 crc32_chorba_sse2
 #      undef native_crc32_copy
@@ -118,11 +134,13 @@ uint32_t crc32_copy_vpclmulqdq_avx512(uint32_t crc, uint8_t *dst, const uint8_t
 #    define native_inflate_fast inflate_fast_ssse3
 #  endif
 // X86 - SSE4.1
-#  if defined(X86_SSE41_NATIVE) && !defined(WITHOUT_CHORBA_SSE)
-#    undef native_crc32
-#    define native_crc32 crc32_chorba_sse41
-#    undef native_crc32_copy
-#    define native_crc32_copy crc32_copy_chorba_sse41
+#  if defined(X86_SSE41_NATIVE)
+#    if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE)
+#      undef native_crc32
+#      define native_crc32 crc32_chorba_sse41
+#      undef native_crc32_copy
+#      define native_crc32_copy crc32_copy_chorba_sse41
+#    endif
 #  endif
 // X86 - SSE4.2
 #  ifdef X86_SSE42_NATIVE
index 979c968624483130afde3095b70f31b3734c3731..d5b152e732898fd891ee811bb5541d513e3bb510 100644 (file)
@@ -11,8 +11,6 @@
 #include "deflate.h"
 #include "fallback_builtins.h"
 
-#include "arch/generic/generic_functions.h"
-
 #if defined(X86_FEATURES)
 #  include "arch/x86/x86_functions.h"
 #elif defined(ARM_FEATURES)
 #  include "arch/riscv/riscv_functions.h"
 #elif defined(LOONGARCH_FEATURES)
 #  include "arch/loongarch/loongarch_functions.h"
+#else
+/* No architecture detected - all fallbacks needed */
+#  ifndef WITH_ALL_FALLBACKS
+#    define WITH_ALL_FALLBACKS
+#  endif
+#endif
+
+#ifdef WITH_ALL_FALLBACKS
+#  ifndef ADLER32_FALLBACK
+#    define ADLER32_FALLBACK
+#  endif
+#  ifndef CHUNKSET_FALLBACK
+#    define CHUNKSET_FALLBACK
+#  endif
+#  ifndef COMPARE256_FALLBACK
+#    define COMPARE256_FALLBACK
+#  endif
+#  ifndef CRC32_BRAID_FALLBACK
+#    define CRC32_BRAID_FALLBACK
+#  endif
+#  if !defined(CRC32_CHORBA_FALLBACK) && !defined(WITHOUT_CHORBA)
+#    define CRC32_CHORBA_FALLBACK
+#  endif
+#  ifndef SLIDE_HASH_FALLBACK
+#    define SLIDE_HASH_FALLBACK
+#  endif
 #endif
 
+#include "arch/generic/generic_functions.h"
+
 #endif
index fad863ecb64a0ccdf55881ecc74c348d1aa9d32f..4064c6942818f22f416f8f0ec36edba1a29ccc56 100644 (file)
@@ -75,60 +75,25 @@ static int init_functable(void) {
     cpu_check_features(&cf);
     ft.force_init = &force_init_empty;
 
-    // Set up generic C code fallbacks
-#ifndef WITH_ALL_FALLBACKS
     // Only use necessary generic functions when no suitable simd versions are available.
-#  ifdef X86_SSE2_NATIVE
-    // x86_64 always has SSE2
-    ft.adler32 = &adler32_c;
-    ft.adler32_copy = &adler32_copy_c;
-    ft.crc32 = &crc32_braid;
-    ft.crc32_copy = &crc32_copy_braid;
-#  elif defined(ARM_NEON_NATIVE)
-#    ifndef ARM_CRC32_NATIVE
-    ft.crc32 = &crc32_braid;
-    ft.crc32_copy = &crc32_copy_braid;
-#    endif
-#  elif defined(POWER8_VSX_NATIVE)
-#    ifndef POWER9_NATIVE
-    ft.compare256 = &compare256_c;
-    ft.longest_match = &longest_match_c;
-    ft.longest_match_slow = &longest_match_slow_c;
-#    endif
-#    ifndef POWER8_VSX_CRC32_NATIVE
-    ft.crc32 = &crc32_braid;
-    ft.crc32_copy = &crc32_copy_braid;
-#    endif
-#  elif defined(LOONGARCH_LSX_NATIVE)
-#    ifndef LOONGARCH_CRC
-    ft.crc32 = &crc32_braid;
-    ft.crc32_copy = &crc32_copy_braid;
-#    endif
-#  elif defined(RISCV_RVV_NATIVE)
-#    ifndef RISCV_ZBC_NATIVE
-    ft.crc32 = &crc32_braid;
-    ft.crc32_copy = &crc32_copy_braid;
-#    endif
-#  elif defined(S390_VX_NATIVE)
+#ifdef ADLER32_FALLBACK
     ft.adler32 = &adler32_c;
     ft.adler32_copy = &adler32_copy_c;
+#endif
+#ifdef CHUNKSET_FALLBACK
     ft.chunkmemset_safe = &chunkmemset_safe_c;
-    ft.compare256 = &compare256_c;
     ft.inflate_fast = &inflate_fast_c;
+#endif
+#ifdef COMPARE256_FALLBACK
+    ft.compare256 = &compare256_c;
     ft.longest_match = &longest_match_c;
     ft.longest_match_slow = &longest_match_slow_c;
-    ft.slide_hash = &slide_hash_c;
-#  endif
-#else // WITH_ALL_FALLBACKS
-    ft.adler32 = &adler32_c;
-    ft.adler32_copy = &adler32_copy_c;
-    ft.chunkmemset_safe = &chunkmemset_safe_c;
-    ft.compare256 = &compare256_c;
+#endif
+#ifdef CRC32_BRAID_FALLBACK
     ft.crc32 = &crc32_braid;
     ft.crc32_copy = &crc32_copy_braid;
-    ft.inflate_fast = &inflate_fast_c;
-    ft.longest_match = &longest_match_c;
-    ft.longest_match_slow = &longest_match_slow_c;
+#endif
+#ifdef SLIDE_HASH_FALLBACK
     ft.slide_hash = &slide_hash_c;
 #endif
 
@@ -136,7 +101,7 @@ static int init_functable(void) {
 #ifdef WITH_OPTIM
 
     // Chorba generic C fallback
-#ifndef WITHOUT_CHORBA
+#ifdef CRC32_CHORBA_FALLBACK
     ft.crc32 = &crc32_chorba;
     ft.crc32_copy = &crc32_copy_chorba;
 #endif
index 5ee9102e2339c38537d70b7e3d0f31d27c230674..6916af71e505ca2d634eec9ad6d9be35eec2785d 100644 (file)
@@ -77,7 +77,9 @@ public:
     BENCHMARK_ADLER32_MISALIGNED(name, hashfunc, support_flag); \
     BENCHMARK_ADLER32_ALIGNED(name, hashfunc, support_flag);
 
+#ifdef ADLER32_FALLBACK
 BENCHMARK_ADLER32(c, adler32_c, 1);
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 BENCHMARK_ADLER32(native, native_adler32, 1);
index 6d913b1d19431c5d8d20fa9fcaa55317a8a0a19f..ff6a1b0794187a9e4dcc0af34bd86695cf0d3b3e 100644 (file)
@@ -128,7 +128,9 @@ public:
     BENCHMARK_ADLER32_COPY_ONLY(name, copyfunc, support_flag)
 #endif
 
+#ifdef ADLER32_FALLBACK
 BENCHMARK_ADLER32_COPY(c, adler32_c, adler32_copy_c, 1);
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 BENCHMARK_ADLER32_COPY(native, native_adler32, native_adler32_copy, 1);
index 2d8352879d00d0435e03c85cbde735486f0df988..88929bfb16079452c96c5f22cd3e3926c36caa0f 100644 (file)
@@ -73,7 +73,7 @@ public:
 BENCHMARK_COMPARE256(native, native_compare256, 1);
 #else
 
-#ifdef WITH_ALL_FALLBACKS
+#ifdef COMPARE256_FALLBACK
 BENCHMARK_COMPARE256(8, compare256_8, 1);
 BENCHMARK_COMPARE256(64, compare256_64, 1);
 #endif
index 772dbfd72c4fb8430bad8adb0ce07ab0fc1f4640..b95f9520a8b18c20e6754d615551e8ed2f9a2085 100644 (file)
@@ -77,16 +77,18 @@ public:
     BENCHMARK_CRC32_MISALIGNED(name, hashfunc, support_flag); \
     BENCHMARK_CRC32_ALIGNED(name, hashfunc, support_flag);
 
+#ifdef CRC32_BRAID_FALLBACK
 BENCHMARK_CRC32(braid, crc32_braid, 1);
+#endif
+#ifdef CRC32_CHORBA_FALLBACK
+BENCHMARK_CRC32(chorba_c, crc32_chorba, 1);
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 BENCHMARK_CRC32(native, native_crc32, 1);
 #else
 
-#ifndef WITHOUT_CHORBA
-BENCHMARK_CRC32(chorba_c, crc32_chorba, 1);
-#endif
-#ifndef WITHOUT_CHORBA_SSE
+#if defined(CRC32_CHORBA_FALLBACK) && !defined(WITHOUT_CHORBA_SSE)
 #   ifdef X86_SSE2
     BENCHMARK_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2);
 #   endif
index b0f0704e0e604c7f0fac0b867bf93180da8b3209..2df1f5710d0d3035323e111acf7d8661aea5e822 100644 (file)
@@ -128,17 +128,19 @@ public:
 #endif
 
 // Base test
+#ifdef CRC32_BRAID_FALLBACK
 BENCHMARK_CRC32_COPY(braid, crc32_braid, crc32_copy_braid, 1);
+#endif
+#ifdef CRC32_CHORBA_FALLBACK
+BENCHMARK_CRC32_COPY(chorba, crc32_chorba, crc32_copy_chorba, 1)
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
     // Native
     BENCHMARK_CRC32_COPY(native, native_crc32, native_crc32_copy, 1)
 #else
     // Optimized functions
-#  ifndef WITHOUT_CHORBA
-    BENCHMARK_CRC32_COPY(chorba, crc32_chorba, crc32_copy_chorba, 1)
-#  endif
-#  ifndef WITHOUT_CHORBA_SSE
+#  if defined(CRC32_CHORBA_FALLBACK) && !defined(WITHOUT_CHORBA_SSE)
 #    ifdef X86_SSE2
     BENCHMARK_CRC32_COPY(chorba_sse2, crc32_chorba_sse2, crc32_copy_chorba_sse2, test_cpu_features.x86.has_sse2);
 #    endif
index 6e8f455cd2a78f44ca79941383fd33067b33f197..34c8fbe3d95004698b74d7d181a055c4340f51be 100644 (file)
@@ -77,7 +77,7 @@ public:
     } \
     BENCHMARK_REGISTER_F(slide_hash, name)->RangeMultiplier(2)->Range(512, MAX_RANDOM_INTS);
 
-#if defined(WITH_ALL_FALLBACKS) || !(defined(__x86_64__) || defined(_M_X64))
+#ifdef SLIDE_HASH_FALLBACK
 BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);
 #endif
 
index c461f9393990d3a85d4641ab3e335937b05a6eab..7fe8bd69a6670604f31adceed9e9ef87cbdab299 100644 (file)
@@ -36,7 +36,9 @@ INSTANTIATE_TEST_SUITE_P(adler32, adler32_variant, testing::ValuesIn(hash_tests)
         hash(GetParam(), func); \
     }
 
+#ifdef ADLER32_FALLBACK
 TEST_ADLER32(c, adler32_c, 1)
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 TEST_ADLER32(native, native_adler32, 1)
index 725d86aa95433dd10c927b2ba91fa850ecebe6f0..47b63412ab5823158ad958fb626d4e32ad6bbea0 100644 (file)
@@ -40,7 +40,9 @@ INSTANTIATE_TEST_SUITE_P(adler32_copy, adler32_copy_variant, testing::ValuesIn(h
     }
 
 // Base test
+#ifdef ADLER32_FALLBACK
 TEST_ADLER32_COPY(c, adler32_copy_c, 1)
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
     // Native test
index b3efe79fb9f29009cfe39c67edcdc034857b50f8..9978f9a736a6da00153e4fbf28b40195c995aa17 100644 (file)
@@ -63,7 +63,7 @@ static inline void compare256_match_check(compare256_func compare256) {
 TEST_COMPARE256(native, native_compare256, 1)
 #else
 
-#ifdef WITH_ALL_FALLBACKS
+#ifdef COMPARE256_FALLBACK
 TEST_COMPARE256(8, compare256_8, 1)
 TEST_COMPARE256(64, compare256_64, 1)
 #endif
index 19eb43932b3356be153e9c65a18b2569c1264981..3da7a34a39b048aca3c226e9deee09acb2b9c9aa 100644 (file)
@@ -77,7 +77,12 @@ INSTANTIATE_TEST_SUITE_P(crc32, crc32_variant, testing::ValuesIn(hash_tests));
         hash(func); \
     }
 
+#ifdef CRC32_BRAID_FALLBACK
 TEST_CRC32(braid, crc32_braid, 1)
+#endif
+#ifdef CRC32_CHORBA_FALLBACK
+TEST_CRC32(chorba_c, crc32_chorba, 1)
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 TEST_CRC32(native, native_crc32, 1)
@@ -99,9 +104,6 @@ static const int align_offsets[] = {
     }
 #endif
 
-#ifndef WITHOUT_CHORBA
-TEST_CRC32(chorba_c, crc32_chorba, 1)
-#endif
 #ifdef ARM_CRC32
 INSTANTIATE_TEST_SUITE_P(crc32_alignment, crc32_align, testing::ValuesIn(align_offsets));
 TEST_CRC32(armv8, crc32_armv8, test_cpu_features.arm.has_crc32)
@@ -129,7 +131,7 @@ TEST_CRC32(vpclmulqdq_avx2, crc32_vpclmulqdq_avx2, (test_cpu_features.x86.has_pc
 #ifdef X86_VPCLMULQDQ_AVX512
 TEST_CRC32(vpclmulqdq_avx512, crc32_vpclmulqdq_avx512, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq))
 #endif
-#ifndef WITHOUT_CHORBA_SSE
+#if defined(CRC32_CHORBA_FALLBACK) && !defined(WITHOUT_CHORBA_SSE)
 #   ifdef X86_SSE2
     TEST_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2)
 #   endif
index 12b2be7e8682a763604441e868e8cbc1da5671ff..9edc8f601ab105e7e99ef8f87261c7ef7e326cca 100644 (file)
@@ -40,17 +40,19 @@ INSTANTIATE_TEST_SUITE_P(crc32_copy, crc32_copy_variant, testing::ValuesIn(hash_
     }
 
 // Base test
+#ifdef CRC32_BRAID_FALLBACK
 TEST_CRC32_COPY(braid, crc32_copy_braid, 1)
+#endif
+#ifdef CRC32_CHORBA_FALLBACK
+TEST_CRC32_COPY(chorba, crc32_copy_chorba, 1)
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
     // Native test
     TEST_CRC32_COPY(native, native_crc32_copy, 1)
 #else
     // Optimized functions
-#  ifndef WITHOUT_CHORBA
-    TEST_CRC32_COPY(chorba, crc32_copy_chorba, 1)
-#  endif
-#  ifndef WITHOUT_CHORBA_SSE
+#  if defined(CRC32_CHORBA_FALLBACK) && !defined(WITHOUT_CHORBA_SSE)
 #    ifdef X86_SSE2
     TEST_CRC32_COPY(chorba_sse2, crc32_copy_chorba_sse2, test_cpu_features.x86.has_sse2)
 #    endif