From: Vladislav Shchapov Date: Sat, 18 Feb 2023 16:25:55 +0000 (+0500) Subject: Replace global CPU feature flag variables with local variable in init_functable X-Git-Tag: 2.1.0-beta1~26 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=20d8fa8af137e5efb4ec79b25caf2de0ed68b5c7;p=thirdparty%2Fzlib-ng.git Replace global CPU feature flag variables with local variable in init_functable Signed-off-by: Vladislav Shchapov --- diff --git a/arch/arm/arm_features.c b/arch/arm/arm_features.c index d41c13acb..7394351fa 100644 --- a/arch/arm/arm_features.c +++ b/arch/arm/arm_features.c @@ -1,4 +1,5 @@ #include "../../zbuild.h" +#include "arm_features.h" #if defined(__linux__) && defined(HAVE_SYS_AUXV_H) # include @@ -71,14 +72,11 @@ static inline int arm_has_neon() { } #endif -Z_INTERNAL int arm_cpu_has_neon; -Z_INTERNAL int arm_cpu_has_crc32; - -void Z_INTERNAL arm_check_features(void) { +void Z_INTERNAL arm_check_features(struct arm_cpu_features *features) { #if defined(__aarch64__) || defined(_M_ARM64) - arm_cpu_has_neon = 1; /* always available */ + features->has_neon = 1; /* always available */ #else - arm_cpu_has_neon = arm_has_neon(); + features->has_neon = arm_has_neon(); #endif - arm_cpu_has_crc32 = arm_has_crc32(); + features->has_crc32 = arm_has_crc32(); } diff --git a/arch/arm/arm_features.h b/arch/arm/arm_features.h index 7998e7942..6fcd8d3eb 100644 --- a/arch/arm/arm_features.h +++ b/arch/arm/arm_features.h @@ -5,9 +5,11 @@ #ifndef ARM_H_ #define ARM_H_ -extern int arm_cpu_has_neon; -extern int arm_cpu_has_crc32; +struct arm_cpu_features { + int has_neon; + int has_crc32; +}; -void Z_INTERNAL arm_check_features(void); +void Z_INTERNAL arm_check_features(struct arm_cpu_features *features); #endif /* ARM_H_ */ diff --git a/arch/power/power_features.c b/arch/power/power_features.c index 0614ff0f2..003a4c6e3 100644 --- a/arch/power/power_features.c +++ b/arch/power/power_features.c @@ -13,11 +13,7 @@ #include "../../zbuild.h" #include "power_features.h" -Z_INTERNAL int power_cpu_has_altivec = 0; -Z_INTERNAL int power_cpu_has_arch_2_07 = 0; -Z_INTERNAL int power_cpu_has_arch_3_00 = 0; - -void Z_INTERNAL power_check_features(void) { +void Z_INTERNAL power_check_features(struct power_cpu_features *features) { #ifdef PPC_FEATURES unsigned long hwcap; #ifdef __FreeBSD__ @@ -27,7 +23,7 @@ void Z_INTERNAL power_check_features(void) { #endif if (hwcap & PPC_FEATURE_HAS_ALTIVEC) - power_cpu_has_altivec = 1; + features->has_altivec = 1; #endif #ifdef POWER_FEATURES @@ -39,8 +35,8 @@ void Z_INTERNAL power_check_features(void) { #endif if (hwcap2 & PPC_FEATURE2_ARCH_2_07) - power_cpu_has_arch_2_07 = 1; + features->has_arch_2_07 = 1; if (hwcap2 & PPC_FEATURE2_ARCH_3_00) - power_cpu_has_arch_3_00 = 1; + features->has_arch_3_00 = 1; #endif } diff --git a/arch/power/power_features.h b/arch/power/power_features.h index 8df9f9e95..9252364cc 100644 --- a/arch/power/power_features.h +++ b/arch/power/power_features.h @@ -7,10 +7,12 @@ #ifndef POWER_H_ #define POWER_H_ -extern int power_cpu_has_altivec; -extern int power_cpu_has_arch_2_07; -extern int power_cpu_has_arch_3_00; +struct power_cpu_features { + int has_altivec; + int has_arch_2_07; + int has_arch_3_00; +}; -void Z_INTERNAL power_check_features(void); +void Z_INTERNAL power_check_features(struct power_cpu_features *features); #endif /* POWER_H_ */ diff --git a/arch/s390/crc32-vx.c b/arch/s390/crc32-vx.c index 78c0be5ce..acfa21887 100644 --- a/arch/s390/crc32-vx.c +++ b/arch/s390/crc32-vx.c @@ -198,7 +198,7 @@ static uint32_t crc32_le_vgfm_16(uint32_t crc, const uint8_t *buf, size_t len) { #define VX_ALIGNMENT 16L #define VX_ALIGN_MASK (VX_ALIGNMENT - 1) -uint32_t Z_INTERNAL PREFIX(s390_crc32_vx)(uint32_t crc, const unsigned char *buf, size_t len) { +uint32_t Z_INTERNAL crc32_s390_vx(uint32_t crc, const unsigned char *buf, size_t len) { size_t prealign, aligned, remaining; if (len < VX_MIN_LEN + VX_ALIGN_MASK) diff --git a/arch/s390/s390_features.c b/arch/s390/s390_features.c index 0658e4bbe..711b7dd46 100644 --- a/arch/s390/s390_features.c +++ b/arch/s390/s390_features.c @@ -5,10 +5,6 @@ # include #endif -Z_INTERNAL int PREFIX(s390_cpu_has_vx) = 0; - -void Z_INTERNAL PREFIX(s390_check_features)(void) { -#ifdef S390_FEATURES - PREFIX(s390_cpu_has_vx) = getauxval(AT_HWCAP) & HWCAP_S390_VX; -#endif +void Z_INTERNAL s390_check_features(struct s390_cpu_features *features) { + features->has_vx = getauxval(AT_HWCAP) & HWCAP_S390_VX; } diff --git a/arch/s390/s390_features.h b/arch/s390/s390_features.h index 9e2608fa9..b8ffef74d 100644 --- a/arch/s390/s390_features.h +++ b/arch/s390/s390_features.h @@ -1,8 +1,10 @@ #ifndef S390_FEATURES_H_ #define S390_FEATURES_H_ -extern int PREFIX(s390_cpu_has_vx); +struct s390_cpu_features { + int has_vx; +}; -void Z_INTERNAL PREFIX(s390_check_features)(void); +void Z_INTERNAL s390_check_features(struct s390_cpu_features *features); #endif diff --git a/arch/x86/x86_features.c b/arch/x86/x86_features.c index 2c5cb54c6..4ff7f63ee 100644 --- a/arch/x86/x86_features.c +++ b/arch/x86/x86_features.c @@ -8,6 +8,7 @@ */ #include "../../zbuild.h" +#include "x86_features.h" #ifdef _WIN32 # include @@ -18,18 +19,6 @@ #include -Z_INTERNAL int x86_cpu_has_avx2; -Z_INTERNAL int x86_cpu_has_avx512; -Z_INTERNAL int x86_cpu_has_avx512vnni; -Z_INTERNAL int x86_cpu_has_sse2; -Z_INTERNAL int x86_cpu_has_ssse3; -Z_INTERNAL int x86_cpu_has_sse41; -Z_INTERNAL int x86_cpu_has_sse42; -Z_INTERNAL int x86_cpu_has_pclmulqdq; -Z_INTERNAL int x86_cpu_has_vpclmulqdq; -Z_INTERNAL int x86_cpu_has_os_save_ymm; -Z_INTERNAL int x86_cpu_has_os_save_zmm; - static inline void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) { #ifdef _WIN32 unsigned int registers[4]; @@ -68,27 +57,27 @@ static inline uint64_t xgetbv(unsigned int xcr) { #endif } -void Z_INTERNAL x86_check_features(void) { +void Z_INTERNAL x86_check_features(struct x86_cpu_features *features) { unsigned eax, ebx, ecx, edx; unsigned maxbasic; cpuid(0, &maxbasic, &ebx, &ecx, &edx); cpuid(1 /*CPU_PROCINFO_AND_FEATUREBITS*/, &eax, &ebx, &ecx, &edx); - x86_cpu_has_sse2 = edx & 0x4000000; - x86_cpu_has_ssse3 = ecx & 0x200; - x86_cpu_has_sse41 = ecx & 0x80000; - x86_cpu_has_sse42 = ecx & 0x100000; - x86_cpu_has_pclmulqdq = ecx & 0x2; + features->has_sse2 = edx & 0x4000000; + features->has_ssse3 = ecx & 0x200; + features->has_sse41 = ecx & 0x80000; + features->has_sse42 = ecx & 0x100000; + features->has_pclmulqdq = ecx & 0x2; if (ecx & 0x08000000) { uint64_t xfeature = xgetbv(0); - x86_cpu_has_os_save_ymm = ((xfeature & 0x06) == 0x06); - x86_cpu_has_os_save_zmm = ((xfeature & 0xe6) == 0xe6); + features->has_os_save_ymm = ((xfeature & 0x06) == 0x06); + features->has_os_save_zmm = ((xfeature & 0xe6) == 0xe6); } else { - x86_cpu_has_os_save_ymm = 0; - x86_cpu_has_os_save_zmm = 0; + features->has_os_save_ymm = 0; + features->has_os_save_zmm = 0; } if (maxbasic >= 7) { @@ -96,27 +85,27 @@ void Z_INTERNAL x86_check_features(void) { // check BMI1 bit // Reference: https://software.intel.com/sites/default/files/article/405250/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf - x86_cpu_has_vpclmulqdq = ecx & 0x400; + features->has_vpclmulqdq = ecx & 0x400; // check AVX2 bit if the OS supports saving YMM registers - if (x86_cpu_has_os_save_ymm) { - x86_cpu_has_avx2 = ebx & 0x20; + if (features->has_os_save_ymm) { + features->has_avx2 = ebx & 0x20; } else { - x86_cpu_has_avx2 = 0; + features->has_avx2 = 0; } // check AVX512 bits if the OS supports saving ZMM registers - if (x86_cpu_has_os_save_zmm) { - x86_cpu_has_avx512 = ebx & 0x00010000; - x86_cpu_has_avx512vnni = ecx & 0x800; + if (features->has_os_save_zmm) { + features->has_avx512 = ebx & 0x00010000; + features->has_avx512vnni = ecx & 0x800; } else { - x86_cpu_has_avx512 = 0; - x86_cpu_has_avx512vnni = 0; + features->has_avx512 = 0; + features->has_avx512vnni = 0; } } else { - x86_cpu_has_avx2 = 0; - x86_cpu_has_avx512 = 0; - x86_cpu_has_avx512vnni = 0; - x86_cpu_has_vpclmulqdq = 0; + features->has_avx2 = 0; + features->has_avx512 = 0; + features->has_avx512vnni = 0; + features->has_vpclmulqdq = 0; } } diff --git a/arch/x86/x86_features.h b/arch/x86/x86_features.h index 06677b2e1..00b510ffc 100644 --- a/arch/x86/x86_features.h +++ b/arch/x86/x86_features.h @@ -6,18 +6,20 @@ #ifndef X86_FEATURES_H_ #define X86_FEATURES_H_ -extern int x86_cpu_has_avx2; -extern int x86_cpu_has_avx512; -extern int x86_cpu_has_avx512vnni; -extern int x86_cpu_has_sse2; -extern int x86_cpu_has_ssse3; -extern int x86_cpu_has_sse41; -extern int x86_cpu_has_sse42; -extern int x86_cpu_has_pclmulqdq; -extern int x86_cpu_has_vpclmulqdq; -extern int x86_cpu_has_os_save_ymm; -extern int x86_cpu_has_os_save_zmm; +struct x86_cpu_features { + int has_avx2; + int has_avx512; + int has_avx512vnni; + int has_sse2; + int has_ssse3; + int has_sse41; + int has_sse42; + int has_pclmulqdq; + int has_vpclmulqdq; + int has_os_save_ymm; + int has_os_save_zmm; +}; -void Z_INTERNAL x86_check_features(void); +void Z_INTERNAL x86_check_features(struct x86_cpu_features *features); #endif /* CPU_H_ */ diff --git a/cpu_features.c b/cpu_features.c index b5e725769..b69a01304 100644 --- a/cpu_features.c +++ b/cpu_features.c @@ -4,21 +4,18 @@ */ #include "zbuild.h" - #include "cpu_features.h" +#include -Z_INTERNAL void cpu_check_features(void) { - static int features_checked = 0; - if (features_checked) - return; +Z_INTERNAL void cpu_check_features(struct cpu_features *features) { + memset(features, 0, sizeof(struct cpu_features)); #if defined(X86_FEATURES) - x86_check_features(); + x86_check_features(&features->x86); #elif defined(ARM_FEATURES) - arm_check_features(); + arm_check_features(&features->arm); #elif defined(PPC_FEATURES) || defined(POWER_FEATURES) - power_check_features(); + power_check_features(&features->power); #elif defined(S390_FEATURES) - PREFIX(s390_check_features)(); + s390_check_features(&features->s390); #endif - features_checked = 1; } diff --git a/cpu_features.h b/cpu_features.h index 22d70da3d..14eb19a75 100644 --- a/cpu_features.h +++ b/cpu_features.h @@ -20,7 +20,19 @@ # include "arch/s390/s390_features.h" #endif -extern void cpu_check_features(void); +struct cpu_features { +#if defined(X86_FEATURES) + struct x86_cpu_features x86; +#elif defined(ARM_FEATURES) + struct arm_cpu_features arm; +#elif defined(PPC_FEATURES) || defined(POWER_FEATURES) + struct power_cpu_features power; +#elif defined(S390_FEATURES) + struct s390_cpu_features s390; +#endif +}; + +extern void cpu_check_features(struct cpu_features *features); /* adler32 */ typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len); @@ -134,7 +146,7 @@ extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len); #elif defined(POWER8_VSX) extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len); #elif defined(S390_CRC32_VX) -extern uint32_t PREFIX(s390_crc32_vx)(uint32_t crc, const uint8_t *buf, size_t len); +extern uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len); #endif /* compare256 */ diff --git a/deflate.c b/deflate.c index 273967b42..3ea92a82d 100644 --- a/deflate.c +++ b/deflate.c @@ -48,7 +48,6 @@ */ #include "zbuild.h" -#include "cpu_features.h" #include "deflate.h" #include "deflate_p.h" #include "functable.h" @@ -195,8 +194,6 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level deflate_state *s; int wrap = 1; - cpu_check_features(); - if (strm == NULL) return Z_STREAM_ERROR; diff --git a/functable.c b/functable.c index da9d10ec5..c7d477c7f 100644 --- a/functable.c +++ b/functable.c @@ -13,8 +13,9 @@ static void init_functable(void) { struct functable_s ft; + struct cpu_features cf; - cpu_check_features(); + cpu_check_features(&cf); // Generic code ft.adler32 = &adler32_c; @@ -58,7 +59,7 @@ static void init_functable(void) { // X86 - SSE2 #ifdef X86_SSE2 # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) - if (x86_cpu_has_sse2) + if (cf.x86.has_sse2) # endif { ft.chunkmemset_safe = &chunkmemset_safe_sse2; @@ -74,18 +75,18 @@ static void init_functable(void) { #endif // X86 - SSSE3 #ifdef X86_SSSE3 - if (x86_cpu_has_ssse3) + if (cf.x86.has_ssse3) ft.adler32 = &adler32_ssse3; #endif // X86 - SSE4 #if defined(X86_SSE41) && defined(X86_SSE2) - if (x86_cpu_has_sse41) { + if (cf.x86.has_sse41) { ft.chunkmemset_safe = &chunkmemset_safe_sse41; ft.inflate_fast = &inflate_fast_sse41; } #endif #ifdef X86_SSE42 - if (x86_cpu_has_sse42) { + if (cf.x86.has_sse42) { ft.adler32_fold_copy = &adler32_fold_copy_sse42; ft.insert_string = &insert_string_sse4; ft.quick_insert_string = &quick_insert_string_sse4; @@ -94,7 +95,7 @@ static void init_functable(void) { #endif // X86 - PCLMUL #ifdef X86_PCLMULQDQ_CRC - if (x86_cpu_has_pclmulqdq) { + if (cf.x86.has_pclmulqdq) { ft.crc32 = &crc32_pclmulqdq; ft.crc32_fold = &crc32_fold_pclmulqdq; ft.crc32_fold_copy = &crc32_fold_pclmulqdq_copy; @@ -104,7 +105,7 @@ static void init_functable(void) { #endif // X86 - AVX #ifdef X86_AVX2 - if (x86_cpu_has_avx2) { + if (cf.x86.has_avx2) { ft.adler32 = &adler32_avx2; ft.adler32_fold_copy = &adler32_fold_copy_avx2; ft.chunkmemset_safe = &chunkmemset_safe_avx; @@ -119,20 +120,20 @@ static void init_functable(void) { } #endif #ifdef X86_AVX512 - if (x86_cpu_has_avx512) { + if (cf.x86.has_avx512) { ft.adler32 = &adler32_avx512; ft.adler32_fold_copy = &adler32_fold_copy_avx512; } #endif #ifdef X86_AVX512VNNI - if (x86_cpu_has_avx512vnni) { + if (cf.x86.has_avx512vnni) { ft.adler32 = &adler32_avx512_vnni; ft.adler32_fold_copy = &adler32_fold_copy_avx512_vnni; } #endif // X86 - VPCLMULQDQ #if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC) - if (x86_cpu_has_pclmulqdq && x86_cpu_has_avx512 && x86_cpu_has_vpclmulqdq) { + if (cf.x86.has_pclmulqdq && cf.x86.has_avx512 && cf.x86.has_vpclmulqdq) { ft.crc32 = &crc32_vpclmulqdq; ft.crc32_fold = &crc32_fold_vpclmulqdq; ft.crc32_fold_copy = &crc32_fold_vpclmulqdq_copy; @@ -145,7 +146,7 @@ static void init_functable(void) { // ARM - NEON #ifdef ARM_NEON # ifndef ARM_NOCHECK_NEON - if (arm_cpu_has_neon) + if (cf.arm.has_neon) # endif { ft.adler32 = &adler32_neon; @@ -162,7 +163,7 @@ static void init_functable(void) { #endif // ARM - ACLE #ifdef ARM_ACLE - if (arm_cpu_has_crc32) { + if (cf.arm.has_crc32) { ft.crc32 = &crc32_acle; ft.insert_string = &insert_string_acle; ft.quick_insert_string = &quick_insert_string_acle; @@ -173,14 +174,14 @@ static void init_functable(void) { // Power - VMX #ifdef PPC_VMX - if (power_cpu_has_altivec) { + if (cf.power.has_altivec) { ft.adler32 = &adler32_vmx; ft.slide_hash = &slide_hash_vmx; } #endif // Power8 - VSX #ifdef POWER8_VSX - if (power_cpu_has_arch_2_07) { + if (cf.power.has_arch_2_07) { ft.adler32 = &adler32_power8; ft.chunkmemset_safe = &chunkmemset_safe_power8; ft.chunksize = &chunksize_power8; @@ -189,12 +190,12 @@ static void init_functable(void) { } #endif #ifdef POWER8_VSX_CRC32 - if (power_cpu_has_arch_2_07) + if (cf.power.has_arch_2_07) ft.crc32 = &crc32_power8; #endif // Power9 #ifdef POWER9 - if (power_cpu_has_arch_3_00) { + if (cf.power.has_arch_3_00) { ft.compare256 = &compare256_power9; ft.longest_match = &longest_match_power9; ft.longest_match_slow = &longest_match_slow_power9; @@ -204,8 +205,8 @@ static void init_functable(void) { // S390 #ifdef S390_CRC32_VX - if (PREFIX(s390_cpu_has_vx)) - ft.crc32 = &PREFIX(s390_crc32_vx); + if (cf.s390.has_vx) + ft.crc32 = crc32_s390_vx; #endif // Assign function pointers individually for atomic operation diff --git a/inflate.c b/inflate.c index 506bb2a50..df4c56a16 100644 --- a/inflate.c +++ b/inflate.c @@ -5,7 +5,6 @@ #include "zbuild.h" #include "zutil.h" -#include "cpu_features.h" #include "inftrees.h" #include "inflate.h" #include "inflate_p.h" @@ -140,8 +139,6 @@ int32_t ZNG_CONDEXPORT PREFIX(inflateInit2)(PREFIX3(stream) *strm, int32_t windo int32_t ret; struct inflate_state *state; - cpu_check_features(); - if (strm == NULL) return Z_STREAM_ERROR; strm->msg = NULL; /* in case we return an error */ diff --git a/test/benchmarks/benchmark_adler32.cc b/test/benchmarks/benchmark_adler32.cc index 19691376f..5b0b65d67 100644 --- a/test/benchmarks/benchmark_adler32.cc +++ b/test/benchmarks/benchmark_adler32.cc @@ -11,7 +11,7 @@ extern "C" { # include "zbuild.h" # include "zutil_p.h" -# include "cpu_features.h" +# include "../test_cpu_features.h" } #define MAX_RANDOM_INTS (1024 * 1024) @@ -65,25 +65,25 @@ public: BENCHMARK_ADLER32(c, adler32_c, 1); #ifdef ARM_NEON -BENCHMARK_ADLER32(neon, adler32_neon, arm_cpu_has_neon); +BENCHMARK_ADLER32(neon, adler32_neon, test_cpu_features.arm.has_neon); #endif #ifdef PPC_VMX -BENCHMARK_ADLER32(vmx, adler32_vmx, power_cpu_has_altivec); +BENCHMARK_ADLER32(vmx, adler32_vmx, test_cpu_features.power.has_altivec); #endif #ifdef POWER8_VSX -BENCHMARK_ADLER32(power8, adler32_power8, power_cpu_has_arch_2_07); +BENCHMARK_ADLER32(power8, adler32_power8, test_cpu_features.power.has_arch_2_07); #endif #ifdef X86_SSSE3 -BENCHMARK_ADLER32(ssse3, adler32_ssse3, x86_cpu_has_ssse3); +BENCHMARK_ADLER32(ssse3, adler32_ssse3, test_cpu_features.x86.has_ssse3); #endif #ifdef X86_AVX2 -BENCHMARK_ADLER32(avx2, adler32_avx2, x86_cpu_has_avx2); +BENCHMARK_ADLER32(avx2, adler32_avx2, test_cpu_features.x86.has_avx2); #endif #ifdef X86_AVX512 -BENCHMARK_ADLER32(avx512, adler32_avx512, x86_cpu_has_avx512); +BENCHMARK_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512); #endif #ifdef X86_AVX512VNNI -BENCHMARK_ADLER32(avx512_vnni, adler32_avx512_vnni, x86_cpu_has_avx512vnni); +BENCHMARK_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni); #endif diff --git a/test/benchmarks/benchmark_adler32_copy.cc b/test/benchmarks/benchmark_adler32_copy.cc index d508a004a..cbee780b7 100644 --- a/test/benchmarks/benchmark_adler32_copy.cc +++ b/test/benchmarks/benchmark_adler32_copy.cc @@ -12,7 +12,7 @@ extern "C" { # include "zbuild.h" # include "zutil_p.h" -# include "cpu_features.h" +# include "../test_cpu_features.h" } #define MAX_RANDOM_INTS (1024 * 1024) @@ -87,32 +87,32 @@ BENCHMARK_ADLER32_BASELINE_COPY(c, adler32_c, 1); #ifdef ARM_NEON /* If we inline this copy for neon, the function would go here */ -//BENCHMARK_ADLER32_COPY(neon, adler32_neon, arm_cpu_has_neon); -BENCHMARK_ADLER32_BASELINE_COPY(neon_copy_baseline, adler32_neon, arm_cpu_has_neon); +//BENCHMARK_ADLER32_COPY(neon, adler32_neon, test_cpu_features.arm.has_neon); +BENCHMARK_ADLER32_BASELINE_COPY(neon_copy_baseline, adler32_neon, test_cpu_features.arm.has_neon); #endif #ifdef PPC_VMX -//BENCHMARK_ADLER32_COPY(vmx_inline_copy, adler32_fold_copy_vmx, power_cpu_has_altivec); -BENCHMARK_ADLER32_BASELINE_COPY(vmx_copy_baseline, adler32_vmx, power_cpu_has_altivec); +//BENCHMARK_ADLER32_COPY(vmx_inline_copy, adler32_fold_copy_vmx, test_cpu_features.power.has_altivec); +BENCHMARK_ADLER32_BASELINE_COPY(vmx_copy_baseline, adler32_vmx, test_cpu_features.power.has_altivec); #endif #ifdef POWER8_VSX -//BENCHMARK_ADLER32_COPY(power8_inline_copy, adler32_fold_copy_power8, power_cpu_has_arch_2_07); -BENCHMARK_ADLER32_BASELINE_COPY(power8, adler32_power8, power_cpu_has_arch_2_07); +//BENCHMARK_ADLER32_COPY(power8_inline_copy, adler32_fold_copy_power8, test_cpu_features.power.has_arch_2_07); +BENCHMARK_ADLER32_BASELINE_COPY(power8, adler32_power8, test_cpu_features.power.has_arch_2_07); #endif #ifdef X86_SSE42 -BENCHMARK_ADLER32_BASELINE_COPY(sse42_baseline, adler32_ssse3, x86_cpu_has_ssse3); -BENCHMARK_ADLER32_COPY(sse42, adler32_fold_copy_sse42, x86_cpu_has_sse42); +BENCHMARK_ADLER32_BASELINE_COPY(sse42_baseline, adler32_ssse3, test_cpu_features.x86.has_ssse3); +BENCHMARK_ADLER32_COPY(sse42, adler32_fold_copy_sse42, test_cpu_features.x86.has_sse42); #endif #ifdef X86_AVX2 -BENCHMARK_ADLER32_BASELINE_COPY(avx2_baseline, adler32_avx2, x86_cpu_has_avx2); -BENCHMARK_ADLER32_COPY(avx2, adler32_fold_copy_avx2, x86_cpu_has_avx2); +BENCHMARK_ADLER32_BASELINE_COPY(avx2_baseline, adler32_avx2, test_cpu_features.x86.has_avx2); +BENCHMARK_ADLER32_COPY(avx2, adler32_fold_copy_avx2, test_cpu_features.x86.has_avx2); #endif #ifdef X86_AVX512 -BENCHMARK_ADLER32_BASELINE_COPY(avx512_baseline, adler32_avx512, x86_cpu_has_avx512); -BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, x86_cpu_has_avx512); +BENCHMARK_ADLER32_BASELINE_COPY(avx512_baseline, adler32_avx512, test_cpu_features.x86.has_avx512); +BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, test_cpu_features.x86.has_avx512); #endif #ifdef X86_AVX512VNNI -BENCHMARK_ADLER32_BASELINE_COPY(avx512_vnni_baseline, adler32_avx512_vnni, x86_cpu_has_avx512vnni); -BENCHMARK_ADLER32_COPY(avx512_vnni, adler32_fold_copy_avx512_vnni, x86_cpu_has_avx512vnni); +BENCHMARK_ADLER32_BASELINE_COPY(avx512_vnni_baseline, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni); +BENCHMARK_ADLER32_COPY(avx512_vnni, adler32_fold_copy_avx512_vnni, test_cpu_features.x86.has_avx512vnni); #endif diff --git a/test/benchmarks/benchmark_compare256.cc b/test/benchmarks/benchmark_compare256.cc index 54f6b14b8..00c6cc6f6 100644 --- a/test/benchmarks/benchmark_compare256.cc +++ b/test/benchmarks/benchmark_compare256.cc @@ -10,7 +10,7 @@ extern "C" { # include "zbuild.h" # include "zutil_p.h" -# include "cpu_features.h" +# include "../test_cpu_features.h" } #define MAX_COMPARE_SIZE (256) @@ -71,14 +71,14 @@ BENCHMARK_COMPARE256(unaligned_64, compare256_unaligned_64, 1); #endif #endif #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) -BENCHMARK_COMPARE256(sse2, compare256_sse2, x86_cpu_has_sse2); +BENCHMARK_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2); #endif #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) -BENCHMARK_COMPARE256(avx2, compare256_avx2, x86_cpu_has_avx2); +BENCHMARK_COMPARE256(avx2, compare256_avx2, test_cpu_features.x86.has_avx2); #endif #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) -BENCHMARK_COMPARE256(neon, compare256_neon, arm_cpu_has_neon); +BENCHMARK_COMPARE256(neon, compare256_neon, test_cpu_features.arm.has_neon); #endif #ifdef POWER9 -BENCHMARK_COMPARE256(power9, compare256_power9, power_cpu_has_arch_3_00); +BENCHMARK_COMPARE256(power9, compare256_power9, test_cpu_features.power.has_arch_3_00); #endif diff --git a/test/benchmarks/benchmark_crc32.cc b/test/benchmarks/benchmark_crc32.cc index b5ecda517..b2b9673d9 100644 --- a/test/benchmarks/benchmark_crc32.cc +++ b/test/benchmarks/benchmark_crc32.cc @@ -11,7 +11,7 @@ extern "C" { # include "zbuild.h" # include "zutil_p.h" -# include "cpu_features.h" +# include "../test_cpu_features.h" } #define MAX_RANDOM_INTS (1024 * 1024) @@ -58,12 +58,12 @@ public: BENCHMARK_CRC32(braid, PREFIX(crc32_braid), 1); #ifdef ARM_ACLE -BENCHMARK_CRC32(acle, crc32_acle, arm_cpu_has_crc32); +BENCHMARK_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32); #elif defined(POWER8_VSX) -BENCHMARK_CRC32(power8, crc32_power8, power_cpu_has_arch_2_07); +BENCHMARK_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07); #elif defined(S390_CRC32_VX) -BENCHMARK_CRC32(vx, PREFIX(s390_crc32_vx), PREFIX(s390_cpu_has_vx)); +BENCHMARK_CRC32(vx, crc32_s390_vx, test_cpu_features.s390.has_vx); #elif defined(X86_PCLMULQDQ_CRC) /* CRC32 fold does a memory copy while hashing */ -BENCHMARK_CRC32(pclmulqdq, crc32_pclmulqdq, x86_cpu_has_pclmulqdq); +BENCHMARK_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq); #endif diff --git a/test/benchmarks/benchmark_main.cc b/test/benchmarks/benchmark_main.cc index ee8b61489..3ef2c5e87 100644 --- a/test/benchmarks/benchmark_main.cc +++ b/test/benchmarks/benchmark_main.cc @@ -10,13 +10,15 @@ #ifndef BUILD_ALT extern "C" { # include "zbuild.h" -# include "cpu_features.h" +# include "../test_cpu_features.h" + + struct cpu_features test_cpu_features; } #endif int main(int argc, char** argv) { #ifndef BUILD_ALT - cpu_check_features(); + cpu_check_features(&test_cpu_features); #endif ::benchmark::Initialize(&argc, argv); diff --git a/test/benchmarks/benchmark_slidehash.cc b/test/benchmarks/benchmark_slidehash.cc index 5ffa7039d..238cc1f65 100644 --- a/test/benchmarks/benchmark_slidehash.cc +++ b/test/benchmarks/benchmark_slidehash.cc @@ -11,7 +11,7 @@ extern "C" { # include "zbuild.h" # include "zutil_p.h" # include "deflate.h" -# include "cpu_features.h" +# include "../test_cpu_features.h" } #define MAX_RANDOM_INTS 32768 @@ -69,18 +69,18 @@ public: BENCHMARK_SLIDEHASH(c, slide_hash_c, 1); #ifdef ARM_NEON -BENCHMARK_SLIDEHASH(neon, slide_hash_neon, arm_cpu_has_neon); +BENCHMARK_SLIDEHASH(neon, slide_hash_neon, test_cpu_features.arm.has_neon); #endif #ifdef POWER8_VSX -BENCHMARK_SLIDEHASH(power8, slide_hash_power8, power_cpu_has_arch_2_07); +BENCHMARK_SLIDEHASH(power8, slide_hash_power8, test_cpu_features.power.has_arch_2_07); #endif #ifdef PPC_VMX -BENCHMARK_SLIDEHASH(vmx, slide_hash_vmx, power_cpu_has_altivec); +BENCHMARK_SLIDEHASH(vmx, slide_hash_vmx, test_cpu_features.power.has_altivec); #endif #ifdef X86_SSE2 -BENCHMARK_SLIDEHASH(sse2, slide_hash_sse2, x86_cpu_has_sse2); +BENCHMARK_SLIDEHASH(sse2, slide_hash_sse2, test_cpu_features.x86.has_sse2); #endif #ifdef X86_AVX2 -BENCHMARK_SLIDEHASH(avx2, slide_hash_avx2, x86_cpu_has_avx2); +BENCHMARK_SLIDEHASH(avx2, slide_hash_avx2, test_cpu_features.x86.has_avx2); #endif diff --git a/test/test_adler32.cc b/test/test_adler32.cc index 7f88f2556..4dfe63f20 100644 --- a/test/test_adler32.cc +++ b/test/test_adler32.cc @@ -10,7 +10,7 @@ extern "C" { # include "zbuild.h" -# include "cpu_features.h" +# include "test_cpu_features.h" } #include @@ -365,22 +365,22 @@ INSTANTIATE_TEST_SUITE_P(adler32, adler32_variant, testing::ValuesIn(tests)); TEST_ADLER32(c, adler32_c, 1) #ifdef ARM_NEON -TEST_ADLER32(neon, adler32_neon, arm_cpu_has_neon) +TEST_ADLER32(neon, adler32_neon, test_cpu_features.arm.has_neon) #elif defined(POWER8_VSX) -TEST_ADLER32(power8, adler32_power8, power_cpu_has_arch_2_07) +TEST_ADLER32(power8, adler32_power8, test_cpu_features.power.has_arch_2_07) #elif defined(PPC_VMX) -TEST_ADLER32(vmx, adler32_vmx, power_cpu_has_altivec) +TEST_ADLER32(vmx, adler32_vmx, test_cpu_features.power.has_altivec) #endif #ifdef X86_SSSE3 -TEST_ADLER32(ssse3, adler32_ssse3, x86_cpu_has_ssse3) +TEST_ADLER32(ssse3, adler32_ssse3, test_cpu_features.x86.has_ssse3) #endif #ifdef X86_AVX2 -TEST_ADLER32(avx2, adler32_avx2, x86_cpu_has_avx2) +TEST_ADLER32(avx2, adler32_avx2, test_cpu_features.x86.has_avx2) #endif #ifdef X86_AVX512 -TEST_ADLER32(avx512, adler32_avx512, x86_cpu_has_avx512) +TEST_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512) #endif #ifdef X86_AVX512VNNI -TEST_ADLER32(avx512_vnni, adler32_avx512_vnni, x86_cpu_has_avx512vnni) +TEST_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni) #endif diff --git a/test/test_compare256.cc b/test/test_compare256.cc index 663ad9633..f920d1d20 100644 --- a/test/test_compare256.cc +++ b/test/test_compare256.cc @@ -10,7 +10,7 @@ extern "C" { # include "zbuild.h" # include "zutil_p.h" -# include "cpu_features.h" +# include "test_cpu_features.h" } #include @@ -70,14 +70,14 @@ TEST_COMPARE256(unaligned_64, compare256_unaligned_64, 1) #endif #endif #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) -TEST_COMPARE256(sse2, compare256_sse2, x86_cpu_has_sse2) +TEST_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2) #endif #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) -TEST_COMPARE256(avx2, compare256_avx2, x86_cpu_has_avx2) +TEST_COMPARE256(avx2, compare256_avx2, test_cpu_features.x86.has_avx2) #endif #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) -TEST_COMPARE256(neon, compare256_neon, arm_cpu_has_neon) +TEST_COMPARE256(neon, compare256_neon, test_cpu_features.arm.has_neon) #endif #ifdef POWER9 -TEST_COMPARE256(power9, compare256_power9, power_cpu_has_arch_3_00) +TEST_COMPARE256(power9, compare256_power9, test_cpu_features.power.has_arch_3_00) #endif diff --git a/test/test_cpu_features.h b/test/test_cpu_features.h new file mode 100644 index 000000000..1bb4b13a0 --- /dev/null +++ b/test/test_cpu_features.h @@ -0,0 +1,8 @@ +#ifndef TEST_CPU_FEATURES_H +#define TEST_CPU_FEATURES_H + +#include "cpu_features.h" + +extern struct cpu_features test_cpu_features; + +#endif diff --git a/test/test_crc32.cc b/test/test_crc32.cc index 4d0b5b966..f194b4ccf 100644 --- a/test/test_crc32.cc +++ b/test/test_crc32.cc @@ -12,7 +12,7 @@ extern "C" { # include "zbuild.h" # include "zutil_p.h" -# include "cpu_features.h" +# include "test_cpu_features.h" } #include @@ -209,14 +209,14 @@ INSTANTIATE_TEST_SUITE_P(crc32, crc32_variant, testing::ValuesIn(tests)); TEST_CRC32(braid, PREFIX(crc32_braid), 1) #ifdef ARM_ACLE -TEST_CRC32(acle, crc32_acle, arm_cpu_has_crc32) +TEST_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32) #elif defined(POWER8_VSX_CRC32) -TEST_CRC32(power8, crc32_power8, power_cpu_has_arch_2_07) +TEST_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07) #elif defined(S390_CRC32_VX) -TEST_CRC32(vx, PREFIX(s390_crc32_vx), PREFIX(s390_cpu_has_vx)) +TEST_CRC32(vx, crc32_s390_vx, test_cpu_features.s390.has_vx) #elif defined(X86_PCLMULQDQ_CRC) -TEST_CRC32(pclmulqdq, crc32_pclmulqdq, x86_cpu_has_pclmulqdq) +TEST_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq) # ifdef X86_VPCLMULQDQ_CRC -TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (x86_cpu_has_pclmulqdq && x86_cpu_has_avx512 && x86_cpu_has_vpclmulqdq)) +TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512 && test_cpu_features.x86.has_vpclmulqdq)) # endif #endif diff --git a/test/test_main.cc b/test/test_main.cc index c129db259..82b39e487 100644 --- a/test/test_main.cc +++ b/test/test_main.cc @@ -6,12 +6,14 @@ extern "C" { # include "zbuild.h" -# include "cpu_features.h" +# include "test_cpu_features.h" + + struct cpu_features test_cpu_features; } GTEST_API_ int main(int argc, char **argv) { printf("Running main() from %s\n", __FILE__); - cpu_check_features(); + cpu_check_features(&test_cpu_features); testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); -} \ No newline at end of file +}