]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Add option to disable runtime CPU detection
authorVladislav Shchapov <vladislav@shchapov.ru>
Wed, 28 Feb 2024 05:16:12 +0000 (10:16 +0500)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Wed, 6 Mar 2024 22:32:15 +0000 (23:32 +0100)
Signed-off-by: Vladislav Shchapov <vladislav@shchapov.ru>
12 files changed:
CMakeLists.txt
README.md
arch/arm/arm_functions.h
arch/generic/generic_functions.h
arch/power/power_functions.h
arch/riscv/riscv_functions.h
arch/s390/s390_functions.h
arch/x86/x86_functions.h
deflate.c
functable.c
functable.h
inflate.c

index 6b9ca7b41d4b5b00ae5b639f7d81719e10a86fbd..b5abd6f94961d3ca2643ee783960e97021f41a17 100644 (file)
@@ -86,6 +86,7 @@ option(WITH_REDUCED_MEM "Reduced memory usage for special cases (reduces perform
 option(WITH_NEW_STRATEGIES "Use new strategies" ON)
 option(WITH_NATIVE_INSTRUCTIONS
     "Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)" OFF)
+option(WITH_RUNTIME_CPU_DETECTION "Build with runtime detection of CPU architecture" ON)
 option(WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings" OFF)
 option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF)
 option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF)
@@ -287,12 +288,21 @@ if(WITH_NATIVE_INSTRUCTIONS)
             separate_arguments(NATIVEOPTIONS UNIX_COMMAND "${NATIVEFLAG}")
         endif()
         add_compile_options(${NATIVEOPTIONS})
+        set(WITH_RUNTIME_CPU_DETECTION OFF)
     else()
         message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration")
         set(WITH_NATIVE_INSTRUCTIONS OFF)
     endif()
 endif()
 
+# Compile without functable or CPU detection
+if(NOT WITH_RUNTIME_CPU_DETECTION)
+    if(MSVC AND BASEARCH_X86_FOUND)
+        message(STATUS "WARNING: Microsoft Visual Studio does not support compile time detection of CPU features for \"/arch\" before \"AVX\"")
+    endif()
+    add_definitions(-DDISABLE_RUNTIME_CPU_DETECTION)
+endif()
+
 # Force disable LTO if WITH_NATIVE_INSTRUCTIONS is not active
 if(NOT WITH_NATIVE_INSTRUCTIONS)
     set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF)
@@ -1302,6 +1312,7 @@ add_feature_info(WITH_OPTIM WITH_OPTIM "Build with optimisation")
 add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies")
 add_feature_info(WITH_NATIVE_INSTRUCTIONS WITH_NATIVE_INSTRUCTIONS
     "Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)")
+add_feature_info(WITH_RUNTIME_CPU_DETECTION WITH_RUNTIME_CPU_DETECTION "Build with runtime CPU detection")
 add_feature_info(WITH_MAINTAINER_WARNINGS WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings")
 add_feature_info(WITH_CODE_COVERAGE WITH_CODE_COVERAGE "Enable code coverage reporting")
 add_feature_info(WITH_INFLATE_STRICT WITH_INFLATE_STRICT "Build with strict inflate distance checking")
index 123a4bbc39874adf5427558200b8a4ac6accc1bd..411621b52ffbc68deba41977095c6d2df8e3c531 100644 (file)
--- a/README.md
+++ b/README.md
@@ -94,20 +94,21 @@ make test
 Build Options
 -------------
 
-| CMake                    | configure                | Description                                                                           | Default |
-|:-------------------------|:-------------------------|:--------------------------------------------------------------------------------------|---------|
-| ZLIB_COMPAT              | --zlib-compat            | Compile with zlib compatible API                                                      | OFF     |
-| ZLIB_ENABLE_TESTS        |                          | Build test binaries                                                                   | ON      |
-| WITH_GZFILEOP            | --without-gzfileops      | Compile with support for gzFile related functions                                     | ON      |
-| WITH_OPTIM               | --without-optimizations  | Build with optimisations                                                              | ON      |
-| WITH_NEW_STRATEGIES      | --without-new-strategies | Use new strategies                                                                    | ON      |
-| WITH_NATIVE_INSTRUCTIONS |                          | Compiles with full instruction set supported on this host (gcc/clang -march=native)   | OFF     |
-| WITH_SANITIZER           |                          | Build with sanitizer (memory, address, undefined)                                     | OFF     |
-| WITH_GTEST               |                          | Build gtest_zlib                                                                      | ON      |
-| WITH_FUZZERS             |                          | Build test/fuzz                                                                       | OFF     |
-| WITH_BENCHMARKS          |                          | Build test/benchmarks                                                                 | OFF     |
-| WITH_MAINTAINER_WARNINGS |                          | Build with project maintainer warnings                                                | OFF     |
-| WITH_CODE_COVERAGE       |                          | Enable code coverage reporting                                                        | OFF     |
+| CMake                      | configure                | Description                                                                         | Default |
+|:---------------------------|:-------------------------|:------------------------------------------------------------------------------------|---------|
+| ZLIB_COMPAT                | --zlib-compat            | Compile with zlib compatible API                                                    | OFF     |
+| ZLIB_ENABLE_TESTS          |                          | Build test binaries                                                                 | ON      |
+| WITH_GZFILEOP              | --without-gzfileops      | Compile with support for gzFile related functions                                   | ON      |
+| WITH_OPTIM                 | --without-optimizations  | Build with optimisations                                                            | ON      |
+| WITH_NEW_STRATEGIES        | --without-new-strategies | Use new strategies                                                                  | ON      |
+| WITH_NATIVE_INSTRUCTIONS   |                          | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF     |
+| WITH_RUNTIME_CPU_DETECTION |                          | Compiles with runtime CPU detection                                                 | ON      |
+| WITH_SANITIZER             |                          | Build with sanitizer (memory, address, undefined)                                   | OFF     |
+| WITH_GTEST                 |                          | Build gtest_zlib                                                                    | ON      |
+| WITH_FUZZERS               |                          | Build test/fuzz                                                                     | OFF     |
+| WITH_BENCHMARKS            |                          | Build test/benchmarks                                                               | OFF     |
+| WITH_MAINTAINER_WARNINGS   |                          | Build with project maintainer warnings                                              | OFF     |
+| WITH_CODE_COVERAGE         |                          | Enable code coverage reporting                                                      | OFF     |
 
 
 Install
index 95a9a7e04eaba0e33cf730c6fe3e311588a89c15..61c682710a5512d7240e11f544b2806f01b26fdb 100644 (file)
@@ -5,7 +5,6 @@
 #ifndef ARM_FUNCTIONS_H_
 #define ARM_FUNCTIONS_H_
 
-
 #ifdef ARM_NEON
 uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
 uint32_t chunksize_neon(void);
@@ -28,4 +27,39 @@ uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
 void slide_hash_armv6(deflate_state *s);
 #endif
 
+
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+// ARM - SIMD
+#  if (defined(ARM_SIMD) && defined(__ARM_FEATURE_SIMD32)) || defined(ARM_NOCHECK_SIMD)
+#    undef native_slide_hash
+#    define native_slide_hash slide_hash_armv6
+#  endif
+// ARM - NEON
+#  if (defined(ARM_NEON) && (defined(__ARM_NEON__) || defined(__ARM_NEON))) || ARM_NOCHECK_NEON
+#    undef native_adler32
+#    define native_adler32 adler32_neon
+#    undef native_chunkmemset_safe
+#    define native_chunkmemset_safe chunkmemset_safe_neon
+#    undef native_chunksize
+#    define native_chunksize chunksize_neon
+#    undef native_inflate_fast
+#    define native_inflate_fast inflate_fast_neon
+#    undef native_slide_hash
+#    define native_slide_hash slide_hash_neon
+#    ifdef HAVE_BUILTIN_CTZLL
+#      undef native_compare256
+#      define native_compare256 compare256_neon
+#      undef native_longest_match
+#      define native_longest_match longest_match_neon
+#      undef native_longest_match_slow
+#      define native_longest_match_slow longest_match_slow_neon
+#    endif
+#  endif
+// ARM - ACLE
+#  if defined(ARM_ACLE) && defined(__ARM_ACLE) && defined(__ARM_FEATURE_CRC32)
+#    undef native_crc32
+#    define native_crc32 crc32_acle
+#  endif
+#endif
+
 #endif /* ARM_FUNCTIONS_H_ */
index 02b2cdda0821a7937899259838e22262451a4083..997dd4d01eeea09a3305090ebb1578f5c3523c63 100644 (file)
@@ -84,4 +84,23 @@ uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
 #  define compare256_generic compare256_c
 #endif
 
+
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+// Generic code
+#  define native_adler32 adler32_c
+#  define native_adler32_fold_copy adler32_fold_copy_c
+#  define native_chunkmemset_safe chunkmemset_safe_c
+#  define native_chunksize chunksize_c
+#  define native_crc32 PREFIX(crc32_braid)
+#  define native_crc32_fold crc32_fold_c
+#  define native_crc32_fold_copy crc32_fold_copy_c
+#  define native_crc32_fold_final crc32_fold_final_c
+#  define native_crc32_fold_reset crc32_fold_reset_c
+#  define native_inflate_fast inflate_fast_c
+#  define native_slide_hash slide_hash_c
+#  define native_longest_match longest_match_generic
+#  define native_longest_match_slow longest_match_slow_generic
+#  define native_compare256 compare256_generic
+#endif
+
 #endif
index c64eafcdbe9cc1b07334abb94929df81e6f0468e..cb6b7650ecafa9e4f23d6929e610ce2fa9fe797d 100644 (file)
@@ -27,4 +27,41 @@ uint32_t longest_match_power9(deflate_state *const s, Pos cur_match);
 uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match);
 #endif
 
+
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+// Power - VMX
+#  if defined(PPC_VMX) && defined(__ALTIVEC__)
+#    undef native_adler32
+#    define native_adler32 adler32_vmx
+#    undef native_slide_hash
+#    define native_slide_hash slide_hash_vmx
+#  endif
+// Power8 - VSX
+#  if defined(POWER8_VSX) && defined(_ARCH_PWR8) && defined(__VSX__)
+#    undef native_adler32
+#    define native_adler32 adler32_power8
+#    undef native_chunkmemset_safe
+#    define native_chunkmemset_safe chunkmemset_safe_power8
+#    undef native_chunksize
+#    define native_chunksize chunksize_power8
+#    undef native_inflate_fast
+#    define native_inflate_fast inflate_fast_power8
+#    undef native_slide_hash
+#    define native_slide_hash slide_hash_power8
+#  endif
+#  if defined(POWER8_VSX_CRC32) && defined(_ARCH_PWR8) && defined(__VSX__)
+#    undef native_crc32
+#    define native_crc32 crc32_power8
+#  endif
+// Power9
+#  if defined(POWER9) && defined(_ARCH_PWR9)
+#    undef native_compare256
+#    define native_compare256 compare256_power9
+#    undef native_longest_match
+#    define native_longest_match longest_match_power9
+#    undef native_longest_match_slow
+#    define native_longest_match_slow longest_match_slow_power9
+#  endif
+#endif
+
 #endif /* POWER_FUNCTIONS_H_ */
index 90b398fbb055520a3c5cd58bd1c96ffb24717d63..015b2fbd75c4276a9785ceaeee5ba86402625ef4 100644 (file)
@@ -22,4 +22,28 @@ void slide_hash_rvv(deflate_state *s);
 void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
 #endif
 
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+// RISCV - RVV
+#  if defined(RISCV_RVV) && defined(__riscv_v) && defined(__linux__)
+#    undef native_adler32
+#    define native_adler32 adler32_rvv
+#    undef native_adler32_fold_copy
+#    define native_adler32_fold_copy adler32_fold_copy_rvv
+#    undef native_chunkmemset_safe
+#    define native_chunkmemset_safe chunkmemset_safe_rvv
+#    undef native_chunksize
+#    define native_chunksize chunksize_rvv
+#    undef native_compare256
+#    define native_compare256 compare256_rvv
+#    undef native_inflate_fast
+#    define native_inflate_fast inflate_fast_rvv
+#    undef native_longest_match
+#    define native_longest_match longest_match_rvv
+#    undef native_longest_match_slow
+#    define native_longest_match_slow longest_match_slow_rvv
+#    undef native_slide_hash
+#    define native_slide_hash slide_hash_rvv
+#  endif
+#endif
+
 #endif /* RISCV_FUNCTIONS_H_ */
index e9f3cda47da205b97b8fc2a9545c2ddbcd066d00..e9c67978f0a0372199058f901b5cfb3facdcf29d 100644 (file)
@@ -9,4 +9,12 @@
 uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len);
 #endif
 
+
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+#  if defined(S390_CRC32_VX) && defined(__zarch__) && __ARCH__ >= 11 && defined(__VX__)
+#    undef native_crc32
+#    define native_crc32 = crc32_s390_vx
+#  endif
+#endif
+
 #endif
index 55ec4acc91da2550f0b4abd034c1c015aa0c5cec..5aa9b31747452fa65acef7ebb8d4dff50a4a82a8 100644 (file)
@@ -67,4 +67,106 @@ uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc);
 uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
 #endif
 
+
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+// X86 - SSE2
+#  if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64) || defined(X86_NOCHECK_SSE2)
+#    undef native_chunkmemset_safe
+#    define native_chunkmemset_safe chunkmemset_safe_sse2
+#    undef native_chunksize
+#    define native_chunksize chunksize_sse2
+#    undef native_inflate_fast
+#    define native_inflate_fast inflate_fast_sse2
+#    undef native_slide_hash
+#    define native_slide_hash slide_hash_sse2
+#    ifdef HAVE_BUILTIN_CTZ
+#      undef native_compare256
+#      define native_compare256 compare256_sse2
+#      undef native_longest_match
+#      define native_longest_match longest_match_sse2
+#      undef native_longest_match_slow
+#      define native_longest_match_slow longest_match_slow_sse2
+#    endif
+#endif
+// X86 - SSSE3
+#  if defined(X86_SSSE3) && defined(__SSSE3__)
+#    undef native_adler32
+#    define native_adler32 adler32_ssse3
+#    undef native_chunkmemset_safe
+#    define native_chunkmemset_safe chunkmemset_safe_ssse3
+#    undef native_inflate_fast
+#    define native_inflate_fast inflate_fast_ssse3
+#  endif
+// X86 - SSE4.2
+#  if defined(X86_SSE42) && defined(__SSE4_2__)
+#    undef native_adler32_fold_copy
+#    define native_adler32_fold_copy adler32_fold_copy_sse42
+#  endif
+
+// X86 - PCLMUL
+#if defined(X86_PCLMULQDQ_CRC) && defined(__PCLMUL__)
+#  undef native_crc32
+#  define native_crc32 crc32_pclmulqdq
+#  undef native_crc32_fold
+#  define native_crc32_fold crc32_fold_pclmulqdq
+#  undef native_crc32_fold_copy
+#  define native_crc32_fold_copy crc32_fold_pclmulqdq_copy
+#  undef native_crc32_fold_final
+#  define native_crc32_fold_final crc32_fold_pclmulqdq_final
+#  undef native_crc32_fold_reset
+#  define native_crc32_fold_reset crc32_fold_pclmulqdq_reset
+#endif
+// X86 - AVX
+#  if defined(X86_AVX2) && defined(__AVX2__)
+#    undef native_adler32
+#    define native_adler32 adler32_avx2
+#    undef native_adler32_fold_copy
+#    define native_adler32_fold_copy adler32_fold_copy_avx2
+#    undef native_chunkmemset_safe
+#    define native_chunkmemset_safe chunkmemset_safe_avx2
+#    undef native_chunksize
+#    define native_chunksize chunksize_avx2
+#    undef native_inflate_fast
+#    define native_inflate_fast inflate_fast_avx2
+#    undef native_slide_hash
+#    define native_slide_hash slide_hash_avx2
+#    ifdef HAVE_BUILTIN_CTZ
+#      undef native_compare256
+#      define native_compare256 compare256_avx2
+#      undef native_longest_match
+#      define native_longest_match longest_match_avx2
+#      undef native_longest_match_slow
+#      define native_longest_match_slow longest_match_slow_avx2
+#    endif
+#  endif
+
+// X86 - AVX512 (F,DQ,BW,Vl)
+#  if defined(X86_AVX512) && defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__)
+#    undef native_adler32
+#    define native_adler32 adler32_avx512
+#    undef native_adler32_fold_copy
+#    define native_adler32_fold_copy adler32_fold_copy_avx512
+// X86 - AVX512 (VNNI)
+#    if defined(X86_AVX512VNNI) && defined(__AVX512VNNI__)
+#      undef native_adler32
+#      define native_adler32 adler32_avx512_vnni
+#      undef native_adler32_fold_copy
+#      define native_adler32_fold_copy adler32_fold_copy_avx512_vnni
+#    endif
+// X86 - VPCLMULQDQ
+#    if defined(__PCLMUL__) && defined(__AVX512F__) && defined(__VPCLMULQDQ__)
+#      undef native_crc32
+#      define native_crc32 crc32_vpclmulqdq
+#      undef native_crc32_fold
+#      define native_crc32_fold crc32_fold_vpclmulqdq
+#      undef native_crc32_fold_copy
+#      define native_crc32_fold_copy crc32_fold_vpclmulqdq_copy
+#      undef native_crc32_fold_final
+#      define native_crc32_fold_final crc32_fold_vpclmulqdq_final
+#      undef native_crc32_fold_reset
+#      define native_crc32_fold_reset crc32_fold_vpclmulqdq_reset
+#    endif
+#  endif
+#endif
+
 #endif /* X86_FUNCTIONS_H_ */
index b542815bc96d9ac25a596fb962315f8d8b79ff9f..cf77eb94d418f21207154d980d475e60482c9b9a 100644 (file)
--- a/deflate.c
+++ b/deflate.c
@@ -190,8 +190,10 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level
     deflate_state *s;
     int wrap = 1;
 
+#ifndef DISABLE_RUNTIME_CPU_DETECTION
     /* Force initialization functable, because deflate captures function pointers from functable. */
     functable.force_init();
+#endif
 
     if (strm == NULL)
         return Z_STREAM_ERROR;
index c9444c0454bb5fe672589e1ad1d2a4c555c57337..8012a40b11b428cc9bb0bdb1f413046f4d626377 100644 (file)
@@ -2,6 +2,7 @@
  * Copyright (C) 2017 Hans Kristian Rosbach
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
+#ifndef DISABLE_RUNTIME_CPU_DETECTION
 
 #include "zbuild.h"
 #include "functable.h"
@@ -122,6 +123,7 @@ static void init_functable(void) {
 #  endif
     }
 #endif
+    // X86 - AVX512 (F,DQ,BW,Vl)
 #ifdef X86_AVX512
     if (cf.x86.has_avx512) {
         ft.adler32 = &adler32_avx512;
@@ -348,3 +350,5 @@ Z_INTERNAL struct functable_s functable = {
     longest_match_slow_stub,
     slide_hash_stub,
 };
+
+#endif
index b0d64d49c207b7a1c4f620ebc77a33ba03ef1461..81a386cd656d13cb52591bd1890e648ee911acfa 100644 (file)
@@ -9,6 +9,19 @@
 #include "deflate.h"
 #include "crc32.h"
 
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+
+#  include "arch_functions.h"
+
+/* When compiling with native instructions it is not necessary to use functable.
+ * Instead we use native_ macro indicating the best available variant of arch-specific
+ * functions for the current platform.
+ */
+#  define FUNCTABLE_CALL(name) native_ ## name
+#  define FUNCTABLE_FPTR(name) &native_ ## name
+
+#else
+
 struct functable_s {
     void     (* force_init)         (void);
     uint32_t (* adler32)            (uint32_t adler, const uint8_t *buf, size_t len);
@@ -32,8 +45,9 @@ Z_INTERNAL extern struct functable_s functable;
 
 /* Explicitly indicate functions are conditionally dispatched.
  */
-#define FUNCTABLE_CALL(name) functable.name
-#define FUNCTABLE_FPTR(name) functable.name
+#  define FUNCTABLE_CALL(name) functable.name
+#  define FUNCTABLE_FPTR(name) functable.name
 
+#endif
 
 #endif
index 52b0a29e0b0dac8c4b712370d72e33304f095175..9acfbc0591d5f2ab0fdb9e347536186f869d2718 100644 (file)
--- a/inflate.c
+++ b/inflate.c
@@ -139,8 +139,10 @@ int32_t ZNG_CONDEXPORT PREFIX(inflateInit2)(PREFIX3(stream) *strm, int32_t windo
     int32_t ret;
     struct inflate_state *state;
 
+#ifndef DISABLE_RUNTIME_CPU_DETECTION
     /* Initialize functable earlier. */
     functable.force_init();
+#endif
 
     if (strm == NULL)
         return Z_STREAM_ERROR;