]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Allow disabling runtime CPU features detection in tests and benchmarks
authorVladislav Shchapov <vladislav@shchapov.ru>
Wed, 20 Mar 2024 17:52:13 +0000 (22:52 +0500)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Thu, 4 Apr 2024 11:47:02 +0000 (13:47 +0200)
Signed-off-by: Vladislav Shchapov <vladislav@shchapov.ru>
14 files changed:
CMakeLists.txt
Makefile.in
cpu_features.h
test/benchmarks/benchmark_adler32.cc
test/benchmarks/benchmark_adler32_copy.cc
test/benchmarks/benchmark_compare256.cc
test/benchmarks/benchmark_crc32.cc
test/benchmarks/benchmark_main.cc
test/benchmarks/benchmark_slidehash.cc
test/test_adler32.cc
test/test_compare256.cc
test/test_cpu_features.h
test/test_crc32.cc
test/test_main.cc

index ef69f86458c3c677bc891f363bc95be06b2a352d..56654bf00d758e06c7d04a78a06fdc9654bb2074 100644 (file)
@@ -679,8 +679,12 @@ if(WITH_OPTIM)
                 endif()
             endif()
         endif()
-        list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h ${ARCHDIR}/arm_functions.h)
-        list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/arm_features.c)
+        list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_functions.h)
+        if(WITH_RUNTIME_CPU_DETECTION)
+            list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h)
+            list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/arm_features.c)
+        endif()
+
         if(WITH_ACLE)
             check_acle_compiler_flag()
             if(HAVE_ACLE_FLAG)
@@ -751,8 +755,11 @@ if(WITH_OPTIM)
             add_definitions(-DPOWER_FEATURES)
         endif()
         if(HAVE_VMX OR HAVE_POWER8_INTRIN OR HAVE_POWER9_INTRIN)
-            list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h ${ARCHDIR}/power_functions.h)
-            list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power_features.c)
+            list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_functions.h)
+            if(WITH_RUNTIME_CPU_DETECTION)
+                list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h)
+                list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power_features.c)
+            endif()
         endif()
         # VMX specific options and files
         if(WITH_ALTIVEC)
@@ -801,11 +808,17 @@ if(WITH_OPTIM)
             if(HAVE_RVV_INTRIN)
                 add_definitions(-DRISCV_FEATURES)
                 add_definitions(-DRISCV_RVV)
-                list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_features.h ${ARCHDIR}/riscv_functions.h)
-                list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c)
+                list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_functions.h)
+                if(WITH_RUNTIME_CPU_DETECTION)
+                    list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_features.h)
+                    list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c)
+                endif()
                 # FIXME: we will not set compile flags for riscv_features.c when
                 # the kernels update hwcap or hwprobe for riscv
-                set(RVV_SRCS ${ARCHDIR}/riscv_features.c ${ARCHDIR}/adler32_rvv.c ${ARCHDIR}/chunkset_rvv.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c)
+                set(RVV_SRCS ${ARCHDIR}/adler32_rvv.c ${ARCHDIR}/chunkset_rvv.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c)
+                if(WITH_RUNTIME_CPU_DETECTION)
+                    list(APPEND RVV_SRCS ${ARCHDIR}/riscv_features.c)
+                endif()
                 list(APPEND ZLIB_ARCH_SRCS ${RVV_SRCS})
                 set_property(SOURCE ${RVV_SRCS} PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}")
             else()
@@ -816,8 +829,11 @@ if(WITH_OPTIM)
         check_s390_intrinsics()
         if(HAVE_S390_INTRIN)
             add_definitions(-DS390_FEATURES)
-            list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_features.h ${ARCHDIR}/s390_functions.h)
-            list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/s390_features.c)
+            list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_functions.h)
+            if(WITH_RUNTIME_CPU_DETECTION)
+                list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_features.h)
+                list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/s390_features.c)
+            endif()
         endif()
         if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE)
             list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/dfltcc_common.c)
@@ -843,15 +859,20 @@ if(WITH_OPTIM)
         endif()
     elseif(BASEARCH_X86_FOUND)
         add_definitions(-DX86_FEATURES)
-        list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h ${ARCHDIR}/x86_functions.h)
-        list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c)
+        list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_functions.h)
+        if(WITH_RUNTIME_CPU_DETECTION)
+            list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h)
+            list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c)
+        endif()
         if(MSVC)
             list(APPEND ZLIB_ARCH_HDRS fallback_builtins.h)
         endif()
         check_xsave_intrinsics()
         if(HAVE_XSAVE_INTRIN)
             add_feature_info(XSAVE 1 "Support XSAVE intrinsics using \"${XSAVEFLAG}\"")
-            set_property(SOURCE ${ARCHDIR}/x86_features.c PROPERTY COMPILE_FLAGS "${XSAVEFLAG}")
+            if(WITH_RUNTIME_CPU_DETECTION)
+                set_property(SOURCE ${ARCHDIR}/x86_features.c PROPERTY COMPILE_FLAGS "${XSAVEFLAG}")
+            endif()
             if(NOT (CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 8.2))
                 add_definitions(-DX86_HAVE_XSAVE_INTRIN)
             endif()
@@ -1038,7 +1059,6 @@ set(ZLIB_PRIVATE_HDRS
     chunkset_tpl.h
     compare256_rle.h
     arch_functions.h
-    cpu_features.h
     crc32_braid_p.h
     crc32_braid_comb_p.h
     crc32_braid_tbl.h
@@ -1069,7 +1089,6 @@ set(ZLIB_SRCS
     arch/generic/slide_hash_c.c
     adler32.c
     compress.c
-    cpu_features.c
     crc32.c
     crc32_braid_comb.c
     deflate.c
@@ -1091,6 +1110,11 @@ set(ZLIB_SRCS
     zutil.c
 )
 
+if(WITH_RUNTIME_CPU_DETECTION)
+    list(APPEND ZLIB_PRIVATE_HDRS cpu_features.h)
+    list(APPEND ZLIB_SRCS cpu_features.c)
+endif()
+
 set(ZLIB_GZFILE_PRIVATE_HDRS
     gzguts.h
 )
index df8105f27c249c3ebdf49266b1e1da9a50c57a62..2bf0dc13bf2843b53b47c3d05612018ec5d35824 100644 (file)
@@ -83,7 +83,6 @@ OBJZ = \
        arch/generic/slide_hash_c.o \
        adler32.o \
        compress.o \
-       cpu_features.o \
        crc32.o \
        crc32_braid_comb.o \
        deflate.o \
@@ -103,6 +102,7 @@ OBJZ = \
        trees.o \
        uncompr.o \
        zutil.o \
+       cpu_features.o \
        $(ARCH_STATIC_OBJS)
 
 OBJG = \
@@ -123,7 +123,6 @@ PIC_OBJZ = \
        arch/generic/slide_hash_c.lo \
        adler32.lo \
        compress.lo \
-       cpu_features.lo \
        crc32.lo \
        crc32_braid_comb.lo \
        deflate.lo \
@@ -143,6 +142,7 @@ PIC_OBJZ = \
        trees.lo \
        uncompr.lo \
        zutil.lo \
+       cpu_features.lo \
        $(ARCH_SHARED_OBJS)
 
 PIC_OBJG = \
index d74b1d6e0373f8e8d2f71d1e79178929cddce5d1..8708724bc0d53ee454fd265eb58aa56aa97daa60 100644 (file)
@@ -6,6 +6,8 @@
 #ifndef CPU_FEATURES_H_
 #define CPU_FEATURES_H_
 
+#ifndef DISABLE_RUNTIME_CPU_DETECTION
+
 #if defined(X86_FEATURES)
 #  include "arch/x86/x86_features.h"
 #elif defined(ARM_FEATURES)
@@ -37,3 +39,5 @@ struct cpu_features {
 void cpu_check_features(struct cpu_features *features);
 
 #endif
+
+#endif
index fd86145782319452730705dc7bd696a42acd6c64..84f0d617c2bf01937a54924f7e17a94f3379970f 100644 (file)
@@ -65,6 +65,10 @@ public:
 
 BENCHMARK_ADLER32(c, adler32_c, 1);
 
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+BENCHMARK_ADLER32(native, native_adler32, 1);
+#else
+
 #ifdef ARM_NEON
 BENCHMARK_ADLER32(neon, adler32_neon, test_cpu_features.arm.has_neon);
 #endif
@@ -92,3 +96,5 @@ BENCHMARK_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512);
 #ifdef X86_AVX512VNNI
 BENCHMARK_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
 #endif
+
+#endif
index 7992bdd5a476dd2648c2122ec9070efbf0b72b7e..e052ee76d0eca0fc536a4c5e4fdcafe8e26826b9 100644 (file)
@@ -86,6 +86,10 @@ public:
 
 BENCHMARK_ADLER32_BASELINE_COPY(c, adler32_c, 1);
 
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+BENCHMARK_ADLER32_BASELINE_COPY(native, native_adler32, 1);
+#else
+
 #ifdef ARM_NEON
 /* If we inline this copy for neon, the function would go here */
 //BENCHMARK_ADLER32_COPY(neon, adler32_neon, test_cpu_features.arm.has_neon);
@@ -122,3 +126,5 @@ BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, test_cpu_features.x86.h
 BENCHMARK_ADLER32_BASELINE_COPY(avx512_vnni_baseline, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
 BENCHMARK_ADLER32_COPY(avx512_vnni, adler32_fold_copy_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
 #endif
+
+#endif
index 188539df51bdb779ce78da6a3c3fa41115d182f1..efdbbacc9fc1f0ab8f65c27af54610d09b8da4e1 100644 (file)
@@ -62,6 +62,10 @@ public:
 
 BENCHMARK_COMPARE256(c, compare256_c, 1);
 
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+BENCHMARK_COMPARE256(native, native_compare256, 1);
+#else
+
 #if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
 BENCHMARK_COMPARE256(unaligned_16, compare256_unaligned_16, 1);
 #ifdef HAVE_BUILTIN_CTZ
@@ -86,3 +90,5 @@ BENCHMARK_COMPARE256(power9, compare256_power9, test_cpu_features.power.has_arch
 #ifdef RISCV_RVV
 BENCHMARK_COMPARE256(rvv, compare256_rvv, test_cpu_features.riscv.has_rvv);
 #endif
+
+#endif
index 062d22918410c3b98ef7639bffb49ac6b044f143..8611b280521208dafbc5fe84446ffa2c1a866bd0 100644 (file)
@@ -58,6 +58,10 @@ public:
 
 BENCHMARK_CRC32(braid, PREFIX(crc32_braid), 1);
 
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+BENCHMARK_CRC32(native, native_crc32, 1);
+#else
+
 #ifdef ARM_ACLE
 BENCHMARK_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32);
 #endif
@@ -75,3 +79,5 @@ BENCHMARK_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq)
 /* CRC32 fold does a memory copy while hashing */
 BENCHMARK_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512 && test_cpu_features.x86.has_vpclmulqdq));
 #endif
+
+#endif
index 3ef2c5e87d107c6358bc8c6a3089400312b33ef2..f3c227bdf7c81879ec60eeaf00057b4ace1e9dc1 100644 (file)
@@ -12,13 +12,17 @@ extern "C" {
 #  include "zbuild.h"
 #  include "../test_cpu_features.h"
 
+#  ifndef DISABLE_RUNTIME_CPU_DETECTION
     struct cpu_features test_cpu_features;
+#  endif
 }
 #endif
 
 int main(int argc, char** argv) {
 #ifndef BUILD_ALT
+#  ifndef DISABLE_RUNTIME_CPU_DETECTION
     cpu_check_features(&test_cpu_features);
+#  endif
 #endif
 
     ::benchmark::Initialize(&argc, argv);
index e15caa4f5582b7b512b530e225d455c5f264b4b7..e51aa685a96ad6f4f86b220a6b05ca367bf18d1d 100644 (file)
@@ -69,6 +69,10 @@ public:
 
 BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);
 
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+BENCHMARK_SLIDEHASH(native, native_slide_hash, 1);
+#else
+
 #ifdef ARM_SIMD
 BENCHMARK_SLIDEHASH(armv6, slide_hash_armv6, test_cpu_features.arm.has_simd);
 #endif
@@ -90,3 +94,5 @@ BENCHMARK_SLIDEHASH(sse2, slide_hash_sse2, test_cpu_features.x86.has_sse2);
 #ifdef X86_AVX2
 BENCHMARK_SLIDEHASH(avx2, slide_hash_avx2, test_cpu_features.x86.has_avx2);
 #endif
+
+#endif
index 2d3f4a98cf185bba70e33ff177aca73feca4a038..85c4c78bbc5d730f31612cd6d0a8e2ef82aed2f8 100644 (file)
@@ -365,6 +365,10 @@ INSTANTIATE_TEST_SUITE_P(adler32, adler32_variant, testing::ValuesIn(tests));
 
 TEST_ADLER32(c, adler32_c, 1)
 
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+TEST_ADLER32(native, native_adler32, 1)
+#else
+
 #ifdef ARM_NEON
 TEST_ADLER32(neon, adler32_neon, test_cpu_features.arm.has_neon)
 #elif defined(POWER8_VSX)
@@ -387,3 +391,5 @@ TEST_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512)
 #ifdef X86_AVX512VNNI
 TEST_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni)
 #endif
+
+#endif
index 9aa7c977bc476fee263ae36965c7093d461f67ca..ec2136aebadc67e08d39c8dc166abb6ae5a8777b 100644 (file)
@@ -61,6 +61,10 @@ static inline void compare256_match_check(compare256_func compare256) {
 
 TEST_COMPARE256(c, compare256_c, 1)
 
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+TEST_COMPARE256(native, native_compare256, 1)
+#else
+
 #if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
 TEST_COMPARE256(unaligned_16, compare256_unaligned_16, 1)
 #ifdef HAVE_BUILTIN_CTZ
@@ -85,3 +89,5 @@ TEST_COMPARE256(power9, compare256_power9, test_cpu_features.power.has_arch_3_00
 #ifdef RISCV_RVV
 TEST_COMPARE256(rvv, compare256_rvv, test_cpu_features.riscv.has_rvv)
 #endif
+
+#endif
index 1bb4b13a08503c466763e7cdbc888edfb49830b8..70b74283d2ef9bc08b22b6d12ac049e3283042ea 100644 (file)
@@ -1,8 +1,12 @@
 #ifndef TEST_CPU_FEATURES_H
 #define TEST_CPU_FEATURES_H
 
-#include "cpu_features.h"
+#ifndef DISABLE_RUNTIME_CPU_DETECTION
+
+#  include "cpu_features.h"
 
 extern struct cpu_features test_cpu_features;
 
 #endif
+
+#endif
index 83bf31131649f7ff15d2ff47d2ed6ff3b5b05439..f8322085e6fa45634812ecb9ba1aa259294eb7fe 100644 (file)
@@ -208,6 +208,10 @@ INSTANTIATE_TEST_SUITE_P(crc32, crc32_variant, testing::ValuesIn(tests));
 
 TEST_CRC32(braid, PREFIX(crc32_braid), 1)
 
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+TEST_CRC32(native, native_crc32, 1)
+#else
+
 #ifdef ARM_ACLE
 TEST_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32)
 #endif
@@ -223,3 +227,5 @@ TEST_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq)
 #ifdef X86_VPCLMULQDQ_CRC
 TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512 && test_cpu_features.x86.has_vpclmulqdq))
 #endif
+
+#endif
index 82b39e48748f03c972b4b74cf102eb62c019c76a..994a3ef389b1d533b58e3814c78b036714f70df6 100644 (file)
@@ -7,13 +7,16 @@
 extern "C" {
 #  include "zbuild.h"
 #  include "test_cpu_features.h"
-
+#  ifndef DISABLE_RUNTIME_CPU_DETECTION
     struct cpu_features test_cpu_features;
+#  endif
 }
 
 GTEST_API_ int main(int argc, char **argv) {
   printf("Running main() from %s\n", __FILE__);
+#ifndef DISABLE_RUNTIME_CPU_DETECTION
   cpu_check_features(&test_cpu_features);
+#endif
   testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }