option(WITH_NEW_STRATEGIES "Use new strategies" ON)
option(WITH_NATIVE_INSTRUCTIONS
"Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)" OFF)
+option(WITH_RUNTIME_CPU_DETECTION "Build with runtime detection of CPU architecture" ON)
option(WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings" OFF)
option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF)
option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF)
separate_arguments(NATIVEOPTIONS UNIX_COMMAND "${NATIVEFLAG}")
endif()
add_compile_options(${NATIVEOPTIONS})
+ set(WITH_RUNTIME_CPU_DETECTION OFF)
else()
message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration")
set(WITH_NATIVE_INSTRUCTIONS OFF)
endif()
endif()
+# Compile without functable or CPU detection
+if(NOT WITH_RUNTIME_CPU_DETECTION)
+ if(MSVC AND BASEARCH_X86_FOUND)
+ message(STATUS "WARNING: Microsoft Visual Studio does not support compile time detection of CPU features for \"/arch\" before \"AVX\"")
+ endif()
+ add_definitions(-DDISABLE_RUNTIME_CPU_DETECTION)
+endif()
+
# Force disable LTO if WITH_NATIVE_INSTRUCTIONS is not active
if(NOT WITH_NATIVE_INSTRUCTIONS)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF)
add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies")
add_feature_info(WITH_NATIVE_INSTRUCTIONS WITH_NATIVE_INSTRUCTIONS
"Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)")
+add_feature_info(WITH_RUNTIME_CPU_DETECTION WITH_RUNTIME_CPU_DETECTION "Build with runtime CPU detection")
add_feature_info(WITH_MAINTAINER_WARNINGS WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings")
add_feature_info(WITH_CODE_COVERAGE WITH_CODE_COVERAGE "Enable code coverage reporting")
add_feature_info(WITH_INFLATE_STRICT WITH_INFLATE_STRICT "Build with strict inflate distance checking")
Build Options
-------------
-| CMake | configure | Description | Default |
-|:-------------------------|:-------------------------|:--------------------------------------------------------------------------------------|---------|
-| ZLIB_COMPAT | --zlib-compat | Compile with zlib compatible API | OFF |
-| ZLIB_ENABLE_TESTS | | Build test binaries | ON |
-| WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON |
-| WITH_OPTIM | --without-optimizations | Build with optimisations | ON |
-| WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON |
-| WITH_NATIVE_INSTRUCTIONS | | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF |
-| WITH_SANITIZER | | Build with sanitizer (memory, address, undefined) | OFF |
-| WITH_GTEST | | Build gtest_zlib | ON |
-| WITH_FUZZERS | | Build test/fuzz | OFF |
-| WITH_BENCHMARKS | | Build test/benchmarks | OFF |
-| WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF |
-| WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF |
+| CMake | configure | Description | Default |
+|:---------------------------|:-------------------------|:------------------------------------------------------------------------------------|---------|
+| ZLIB_COMPAT | --zlib-compat | Compile with zlib compatible API | OFF |
+| ZLIB_ENABLE_TESTS | | Build test binaries | ON |
+| WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON |
+| WITH_OPTIM | --without-optimizations | Build with optimisations | ON |
+| WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON |
+| WITH_NATIVE_INSTRUCTIONS | | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF |
+| WITH_RUNTIME_CPU_DETECTION | | Compiles with runtime CPU detection | ON |
+| WITH_SANITIZER | | Build with sanitizer (memory, address, undefined) | OFF |
+| WITH_GTEST | | Build gtest_zlib | ON |
+| WITH_FUZZERS | | Build test/fuzz | OFF |
+| WITH_BENCHMARKS | | Build test/benchmarks | OFF |
+| WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF |
+| WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF |
Install
#ifndef ARM_FUNCTIONS_H_
#define ARM_FUNCTIONS_H_
-
#ifdef ARM_NEON
uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
uint32_t chunksize_neon(void);
void slide_hash_armv6(deflate_state *s);
#endif
+
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+// ARM - SIMD
+# if (defined(ARM_SIMD) && defined(__ARM_FEATURE_SIMD32)) || defined(ARM_NOCHECK_SIMD)
+# undef native_slide_hash
+# define native_slide_hash slide_hash_armv6
+# endif
+// ARM - NEON
+# if (defined(ARM_NEON) && (defined(__ARM_NEON__) || defined(__ARM_NEON))) || ARM_NOCHECK_NEON
+# undef native_adler32
+# define native_adler32 adler32_neon
+# undef native_chunkmemset_safe
+# define native_chunkmemset_safe chunkmemset_safe_neon
+# undef native_chunksize
+# define native_chunksize chunksize_neon
+# undef native_inflate_fast
+# define native_inflate_fast inflate_fast_neon
+# undef native_slide_hash
+# define native_slide_hash slide_hash_neon
+# ifdef HAVE_BUILTIN_CTZLL
+# undef native_compare256
+# define native_compare256 compare256_neon
+# undef native_longest_match
+# define native_longest_match longest_match_neon
+# undef native_longest_match_slow
+# define native_longest_match_slow longest_match_slow_neon
+# endif
+# endif
+// ARM - ACLE
+# if defined(ARM_ACLE) && defined(__ARM_ACLE) && defined(__ARM_FEATURE_CRC32)
+# undef native_crc32
+# define native_crc32 crc32_acle
+# endif
+#endif
+
#endif /* ARM_FUNCTIONS_H_ */
# define compare256_generic compare256_c
#endif
+
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+// Generic code
+# define native_adler32 adler32_c
+# define native_adler32_fold_copy adler32_fold_copy_c
+# define native_chunkmemset_safe chunkmemset_safe_c
+# define native_chunksize chunksize_c
+# define native_crc32 PREFIX(crc32_braid)
+# define native_crc32_fold crc32_fold_c
+# define native_crc32_fold_copy crc32_fold_copy_c
+# define native_crc32_fold_final crc32_fold_final_c
+# define native_crc32_fold_reset crc32_fold_reset_c
+# define native_inflate_fast inflate_fast_c
+# define native_slide_hash slide_hash_c
+# define native_longest_match longest_match_generic
+# define native_longest_match_slow longest_match_slow_generic
+# define native_compare256 compare256_generic
+#endif
+
#endif
uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match);
#endif
+
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+// Power - VMX
+# if defined(PPC_VMX) && defined(__ALTIVEC__)
+# undef native_adler32
+# define native_adler32 adler32_vmx
+# undef native_slide_hash
+# define native_slide_hash slide_hash_vmx
+# endif
+// Power8 - VSX
+# if defined(POWER8_VSX) && defined(_ARCH_PWR8) && defined(__VSX__)
+# undef native_adler32
+# define native_adler32 adler32_power8
+# undef native_chunkmemset_safe
+# define native_chunkmemset_safe chunkmemset_safe_power8
+# undef native_chunksize
+# define native_chunksize chunksize_power8
+# undef native_inflate_fast
+# define native_inflate_fast inflate_fast_power8
+# undef native_slide_hash
+# define native_slide_hash slide_hash_power8
+# endif
+# if defined(POWER8_VSX_CRC32) && defined(_ARCH_PWR8) && defined(__VSX__)
+# undef native_crc32
+# define native_crc32 crc32_power8
+# endif
+// Power9
+# if defined(POWER9) && defined(_ARCH_PWR9)
+# undef native_compare256
+# define native_compare256 compare256_power9
+# undef native_longest_match
+# define native_longest_match longest_match_power9
+# undef native_longest_match_slow
+# define native_longest_match_slow longest_match_slow_power9
+# endif
+#endif
+
#endif /* POWER_FUNCTIONS_H_ */
void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
#endif
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+// RISCV - RVV
+# if defined(RISCV_RVV) && defined(__riscv_v) && defined(__linux__)
+# undef native_adler32
+# define native_adler32 adler32_rvv
+# undef native_adler32_fold_copy
+# define native_adler32_fold_copy adler32_fold_copy_rvv
+# undef native_chunkmemset_safe
+# define native_chunkmemset_safe chunkmemset_safe_rvv
+# undef native_chunksize
+# define native_chunksize chunksize_rvv
+# undef native_compare256
+# define native_compare256 compare256_rvv
+# undef native_inflate_fast
+# define native_inflate_fast inflate_fast_rvv
+# undef native_longest_match
+# define native_longest_match longest_match_rvv
+# undef native_longest_match_slow
+# define native_longest_match_slow longest_match_slow_rvv
+# undef native_slide_hash
+# define native_slide_hash slide_hash_rvv
+# endif
+#endif
+
#endif /* RISCV_FUNCTIONS_H_ */
uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len);
#endif
+
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+# if defined(S390_CRC32_VX) && defined(__zarch__) && __ARCH__ >= 11 && defined(__VX__)
+# undef native_crc32
+# define native_crc32 = crc32_s390_vx
+# endif
+#endif
+
#endif
uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
#endif
+
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+// X86 - SSE2
+# if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64) || defined(X86_NOCHECK_SSE2)
+# undef native_chunkmemset_safe
+# define native_chunkmemset_safe chunkmemset_safe_sse2
+# undef native_chunksize
+# define native_chunksize chunksize_sse2
+# undef native_inflate_fast
+# define native_inflate_fast inflate_fast_sse2
+# undef native_slide_hash
+# define native_slide_hash slide_hash_sse2
+# ifdef HAVE_BUILTIN_CTZ
+# undef native_compare256
+# define native_compare256 compare256_sse2
+# undef native_longest_match
+# define native_longest_match longest_match_sse2
+# undef native_longest_match_slow
+# define native_longest_match_slow longest_match_slow_sse2
+# endif
+#endif
+// X86 - SSSE3
+# if defined(X86_SSSE3) && defined(__SSSE3__)
+# undef native_adler32
+# define native_adler32 adler32_ssse3
+# undef native_chunkmemset_safe
+# define native_chunkmemset_safe chunkmemset_safe_ssse3
+# undef native_inflate_fast
+# define native_inflate_fast inflate_fast_ssse3
+# endif
+// X86 - SSE4.2
+# if defined(X86_SSE42) && defined(__SSE4_2__)
+# undef native_adler32_fold_copy
+# define native_adler32_fold_copy adler32_fold_copy_sse42
+# endif
+
+// X86 - PCLMUL
+#if defined(X86_PCLMULQDQ_CRC) && defined(__PCLMUL__)
+# undef native_crc32
+# define native_crc32 crc32_pclmulqdq
+# undef native_crc32_fold
+# define native_crc32_fold crc32_fold_pclmulqdq
+# undef native_crc32_fold_copy
+# define native_crc32_fold_copy crc32_fold_pclmulqdq_copy
+# undef native_crc32_fold_final
+# define native_crc32_fold_final crc32_fold_pclmulqdq_final
+# undef native_crc32_fold_reset
+# define native_crc32_fold_reset crc32_fold_pclmulqdq_reset
+#endif
+// X86 - AVX
+# if defined(X86_AVX2) && defined(__AVX2__)
+# undef native_adler32
+# define native_adler32 adler32_avx2
+# undef native_adler32_fold_copy
+# define native_adler32_fold_copy adler32_fold_copy_avx2
+# undef native_chunkmemset_safe
+# define native_chunkmemset_safe chunkmemset_safe_avx2
+# undef native_chunksize
+# define native_chunksize chunksize_avx2
+# undef native_inflate_fast
+# define native_inflate_fast inflate_fast_avx2
+# undef native_slide_hash
+# define native_slide_hash slide_hash_avx2
+# ifdef HAVE_BUILTIN_CTZ
+# undef native_compare256
+# define native_compare256 compare256_avx2
+# undef native_longest_match
+# define native_longest_match longest_match_avx2
+# undef native_longest_match_slow
+# define native_longest_match_slow longest_match_slow_avx2
+# endif
+# endif
+
+// X86 - AVX512 (F,DQ,BW,Vl)
+# if defined(X86_AVX512) && defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__)
+# undef native_adler32
+# define native_adler32 adler32_avx512
+# undef native_adler32_fold_copy
+# define native_adler32_fold_copy adler32_fold_copy_avx512
+// X86 - AVX512 (VNNI)
+# if defined(X86_AVX512VNNI) && defined(__AVX512VNNI__)
+# undef native_adler32
+# define native_adler32 adler32_avx512_vnni
+# undef native_adler32_fold_copy
+# define native_adler32_fold_copy adler32_fold_copy_avx512_vnni
+# endif
+// X86 - VPCLMULQDQ
+# if defined(__PCLMUL__) && defined(__AVX512F__) && defined(__VPCLMULQDQ__)
+# undef native_crc32
+# define native_crc32 crc32_vpclmulqdq
+# undef native_crc32_fold
+# define native_crc32_fold crc32_fold_vpclmulqdq
+# undef native_crc32_fold_copy
+# define native_crc32_fold_copy crc32_fold_vpclmulqdq_copy
+# undef native_crc32_fold_final
+# define native_crc32_fold_final crc32_fold_vpclmulqdq_final
+# undef native_crc32_fold_reset
+# define native_crc32_fold_reset crc32_fold_vpclmulqdq_reset
+# endif
+# endif
+#endif
+
#endif /* X86_FUNCTIONS_H_ */
deflate_state *s;
int wrap = 1;
+#ifndef DISABLE_RUNTIME_CPU_DETECTION
/* Force initialization functable, because deflate captures function pointers from functable. */
functable.force_init();
+#endif
if (strm == NULL)
return Z_STREAM_ERROR;
* Copyright (C) 2017 Hans Kristian Rosbach
* For conditions of distribution and use, see copyright notice in zlib.h
*/
+#ifndef DISABLE_RUNTIME_CPU_DETECTION
#include "zbuild.h"
#include "functable.h"
# endif
}
#endif
+ // X86 - AVX512 (F,DQ,BW,Vl)
#ifdef X86_AVX512
if (cf.x86.has_avx512) {
ft.adler32 = &adler32_avx512;
longest_match_slow_stub,
slide_hash_stub,
};
+
+#endif
#include "deflate.h"
#include "crc32.h"
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+
+# include "arch_functions.h"
+
+/* When compiling with native instructions it is not necessary to use functable.
+ * Instead we use native_ macro indicating the best available variant of arch-specific
+ * functions for the current platform.
+ */
+# define FUNCTABLE_CALL(name) native_ ## name
+# define FUNCTABLE_FPTR(name) &native_ ## name
+
+#else
+
struct functable_s {
void (* force_init) (void);
uint32_t (* adler32) (uint32_t adler, const uint8_t *buf, size_t len);
/* Explicitly indicate functions are conditionally dispatched.
*/
-#define FUNCTABLE_CALL(name) functable.name
-#define FUNCTABLE_FPTR(name) functable.name
+# define FUNCTABLE_CALL(name) functable.name
+# define FUNCTABLE_FPTR(name) functable.name
+#endif
#endif
int32_t ret;
struct inflate_state *state;
+#ifndef DISABLE_RUNTIME_CPU_DETECTION
/* Initialize functable earlier. */
functable.force_init();
+#endif
if (strm == NULL)
return Z_STREAM_ERROR;