From: Pavel P Date: Sun, 22 Jan 2023 10:50:04 +0000 (+0300) Subject: Use local functable variable instead of standalone function pointers X-Git-Tag: 2.1.0-beta1~74 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=709a710f6fb98426bb6f7dea6b505f0fcab3eb99;p=thirdparty%2Fzlib-ng.git Use local functable variable instead of standalone function pointers --- diff --git a/functable.c b/functable.c index bbfbd8e7a..28d7d214a 100644 --- a/functable.c +++ b/functable.c @@ -12,396 +12,380 @@ #include "cpu_features.h" static void init_functable(void) { - uint32_t (* adler32) (uint32_t adler, const uint8_t *buf, uint64_t len); - uint32_t (* adler32_fold_copy) (uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len); - uint32_t (* crc32) (uint32_t crc, const uint8_t *buf, uint64_t len); - uint32_t (* crc32_fold_reset) (struct crc32_fold_s *crc); - void (* crc32_fold_copy) (struct crc32_fold_s *crc, uint8_t *dst, const uint8_t *src, uint64_t len); - void (* crc32_fold) (struct crc32_fold_s *crc, const uint8_t *src, uint64_t len, uint32_t init_crc); - uint32_t (* crc32_fold_final) (struct crc32_fold_s *crc); - uint32_t (* compare256) (const uint8_t *src0, const uint8_t *src1); - uint32_t (* chunksize) (void); - uint8_t* (* chunkcopy) (uint8_t *out, uint8_t const *from, unsigned len); - uint8_t* (* chunkunroll) (uint8_t *out, unsigned *dist, unsigned *len); - uint8_t* (* chunkmemset) (uint8_t *out, unsigned dist, unsigned len); - uint8_t* (* chunkmemset_safe) (uint8_t *out, unsigned dist, unsigned len, unsigned left); - void (* insert_string) (deflate_state *const s, uint32_t str, uint32_t count); - uint32_t (* longest_match) (deflate_state *const s, Pos cur_match); - uint32_t (* longest_match_slow) (deflate_state *const s, Pos cur_match); - Pos (* quick_insert_string)(deflate_state *const s, uint32_t str); - void (* slide_hash) (deflate_state *s); - uint32_t (* update_hash) (deflate_state *const s, uint32_t h, uint32_t val); + struct functable_s* functable_ptr = &functable; + struct functable_s functable; + Assert(functable_ptr != &functable, + "functable_ptr has to point to global functable"); cpu_check_features(); // update_hash_stub: - update_hash = &update_hash_c; + functable.update_hash = &update_hash_c; #ifdef X86_SSE42_CRC_HASH if (x86_cpu_has_sse42) - update_hash = &update_hash_sse4; + functable.update_hash = &update_hash_sse4; #elif defined(ARM_ACLE_CRC_HASH) if (arm_cpu_has_crc32) - update_hash = &update_hash_acle; + functable.update_hash = &update_hash_acle; #endif // insert_string_stub: - insert_string = &insert_string_c; + functable.insert_string = &insert_string_c; #ifdef X86_SSE42_CRC_HASH if (x86_cpu_has_sse42) - insert_string = &insert_string_sse4; + functable.insert_string = &insert_string_sse4; #elif defined(ARM_ACLE_CRC_HASH) if (arm_cpu_has_crc32) - insert_string = &insert_string_acle; + functable.insert_string = &insert_string_acle; #endif // quick_insert_string_stub: - quick_insert_string = &quick_insert_string_c; + functable.quick_insert_string = &quick_insert_string_c; #ifdef X86_SSE42_CRC_HASH if (x86_cpu_has_sse42) - quick_insert_string = &quick_insert_string_sse4; + functable.quick_insert_string = &quick_insert_string_sse4; #elif defined(ARM_ACLE_CRC_HASH) if (arm_cpu_has_crc32) - quick_insert_string = &quick_insert_string_acle; + functable.quick_insert_string = &quick_insert_string_acle; #endif // slide_hash_stub: - slide_hash = &slide_hash_c; + functable.slide_hash = &slide_hash_c; #ifdef X86_SSE2 # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) if (x86_cpu_has_sse2) # endif - slide_hash = &slide_hash_sse2; + functable.slide_hash = &slide_hash_sse2; #elif defined(ARM_NEON_SLIDEHASH) # ifndef ARM_NOCHECK_NEON if (arm_cpu_has_neon) # endif - slide_hash = &slide_hash_neon; + functable.slide_hash = &slide_hash_neon; #endif #ifdef X86_AVX2 if (x86_cpu_has_avx2) - slide_hash = &slide_hash_avx2; + functable.slide_hash = &slide_hash_avx2; #endif #ifdef PPC_VMX_SLIDEHASH if (power_cpu_has_altivec) - slide_hash = &slide_hash_vmx; + functable.slide_hash = &slide_hash_vmx; #endif #ifdef POWER8_VSX_SLIDEHASH if (power_cpu_has_arch_2_07) - slide_hash = &slide_hash_power8; + functable.slide_hash = &slide_hash_power8; #endif // longest_match_stub: #ifdef UNALIGNED_OK # if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) - longest_match = &longest_match_unaligned_64; + functable.longest_match = &longest_match_unaligned_64; # elif defined(HAVE_BUILTIN_CTZ) - longest_match = &longest_match_unaligned_32; + functable.longest_match = &longest_match_unaligned_32; # else - longest_match = &longest_match_unaligned_16; + functable.longest_match = &longest_match_unaligned_16; # endif #else - longest_match = &longest_match_c; + functable.longest_match = &longest_match_c; #endif #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) if (x86_cpu_has_sse2) - longest_match = &longest_match_sse2; + functable.longest_match = &longest_match_sse2; #endif #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) if (x86_cpu_has_avx2) - longest_match = &longest_match_avx2; + functable.longest_match = &longest_match_avx2; #endif #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) if (arm_cpu_has_neon) - longest_match = &longest_match_neon; + functable.longest_match = &longest_match_neon; #endif #ifdef POWER9 if (power_cpu_has_arch_3_00) - longest_match = &longest_match_power9; + functable.longest_match = &longest_match_power9; #endif // longest_match_slow_stub: #ifdef UNALIGNED_OK # if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) - longest_match_slow = &longest_match_slow_unaligned_64; + functable.longest_match_slow = &longest_match_slow_unaligned_64; # elif defined(HAVE_BUILTIN_CTZ) - longest_match_slow = &longest_match_slow_unaligned_32; + functable.longest_match_slow = &longest_match_slow_unaligned_32; # else - longest_match_slow = &longest_match_slow_unaligned_16; + functable.longest_match_slow = &longest_match_slow_unaligned_16; # endif #else - longest_match_slow = &longest_match_slow_c; + functable.longest_match_slow = &longest_match_slow_c; #endif #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) if (x86_cpu_has_sse2) - longest_match_slow = &longest_match_slow_sse2; + functable.longest_match_slow = &longest_match_slow_sse2; #endif #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) if (x86_cpu_has_avx2) - longest_match_slow = &longest_match_slow_avx2; + functable.longest_match_slow = &longest_match_slow_avx2; #endif #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) if (arm_cpu_has_neon) - longest_match_slow = &longest_match_slow_neon; + functable.longest_match_slow = &longest_match_slow_neon; #endif #ifdef POWER9 if (power_cpu_has_arch_3_00) - longest_match_slow = &longest_match_slow_power9; + functable.longest_match_slow = &longest_match_slow_power9; #endif // adler32_stub: - adler32 = &adler32_c; + functable.adler32 = &adler32_c; #ifdef ARM_NEON_ADLER32 # ifndef ARM_NOCHECK_NEON if (arm_cpu_has_neon) # endif - adler32 = &adler32_neon; + functable.adler32 = &adler32_neon; #endif #ifdef X86_SSSE3_ADLER32 if (x86_cpu_has_ssse3) - adler32 = &adler32_ssse3; + functable.adler32 = &adler32_ssse3; #endif #ifdef X86_AVX2_ADLER32 if (x86_cpu_has_avx2) - adler32 = &adler32_avx2; + functable.adler32 = &adler32_avx2; #endif #ifdef X86_AVX512_ADLER32 if (x86_cpu_has_avx512) - adler32 = &adler32_avx512; + functable.adler32 = &adler32_avx512; #endif #ifdef X86_AVX512VNNI_ADLER32 if (x86_cpu_has_avx512vnni) - adler32 = &adler32_avx512_vnni; + functable.adler32 = &adler32_avx512_vnni; #endif #ifdef PPC_VMX_ADLER32 if (power_cpu_has_altivec) - adler32 = &adler32_vmx; + functable.adler32 = &adler32_vmx; #endif #ifdef POWER8_VSX_ADLER32 if (power_cpu_has_arch_2_07) - adler32 = &adler32_power8; + functable.adler32 = &adler32_power8; #endif // adler32_fold_copy_stub: - adler32_fold_copy = &adler32_fold_copy_c; + functable.adler32_fold_copy = &adler32_fold_copy_c; #ifdef X86_SSE42_ADLER32 if (x86_cpu_has_sse42) - adler32_fold_copy = &adler32_fold_copy_sse42; + functable.adler32_fold_copy = &adler32_fold_copy_sse42; #endif #ifdef X86_AVX2_ADLER32 if (x86_cpu_has_avx2) - adler32_fold_copy = &adler32_fold_copy_avx2; + functable.adler32_fold_copy = &adler32_fold_copy_avx2; #endif #ifdef X86_AVX512_ADLER32 if (x86_cpu_has_avx512) - adler32_fold_copy = &adler32_fold_copy_avx512; + functable.adler32_fold_copy = &adler32_fold_copy_avx512; #endif #ifdef X86_AVX512VNNI_ADLER32 if (x86_cpu_has_avx512vnni) - adler32_fold_copy = &adler32_fold_copy_avx512_vnni; + functable.adler32_fold_copy = &adler32_fold_copy_avx512_vnni; #endif // crc32_fold_reset_stub: - crc32_fold_reset = &crc32_fold_reset_c; + functable.crc32_fold_reset = &crc32_fold_reset_c; #ifdef X86_PCLMULQDQ_CRC if (x86_cpu_has_pclmulqdq) - crc32_fold_reset = &crc32_fold_pclmulqdq_reset; + functable.crc32_fold_reset = &crc32_fold_pclmulqdq_reset; #endif // crc32_fold_copy_stub: - crc32_fold_copy = &crc32_fold_copy_c; + functable.crc32_fold_copy = &crc32_fold_copy_c; #ifdef X86_PCLMULQDQ_CRC if (x86_cpu_has_pclmulqdq) - crc32_fold_copy = &crc32_fold_pclmulqdq_copy; + functable.crc32_fold_copy = &crc32_fold_pclmulqdq_copy; #endif // crc32_fold_stub: - crc32_fold = &crc32_fold_c; + functable.crc32_fold = &crc32_fold_c; #ifdef X86_PCLMULQDQ_CRC if (x86_cpu_has_pclmulqdq) - crc32_fold = &crc32_fold_pclmulqdq; + functable.crc32_fold = &crc32_fold_pclmulqdq; #endif // crc32_fold_final_stub: - crc32_fold_final = &crc32_fold_final_c; + functable.crc32_fold_final = &crc32_fold_final_c; #ifdef X86_PCLMULQDQ_CRC if (x86_cpu_has_pclmulqdq) - crc32_fold_final = &crc32_fold_pclmulqdq_final; + functable.crc32_fold_final = &crc32_fold_pclmulqdq_final; #endif //chunksize_stub: - chunksize = &chunksize_c; + functable.chunksize = &chunksize_c; #ifdef X86_SSE2_CHUNKSET # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) if (x86_cpu_has_sse2) # endif - chunksize = &chunksize_sse2; + functable.chunksize = &chunksize_sse2; #endif #ifdef X86_AVX_CHUNKSET if (x86_cpu_has_avx2) - chunksize = &chunksize_avx; + functable.chunksize = &chunksize_avx; #endif #ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) - chunksize = &chunksize_neon; + functable.chunksize = &chunksize_neon; #endif #ifdef POWER8_VSX_CHUNKSET if (power_cpu_has_arch_2_07) - chunksize = &chunksize_power8; + functable.chunksize = &chunksize_power8; #endif // chunkcopy_stub: - chunkcopy = &chunkcopy_c; + functable.chunkcopy = &chunkcopy_c; #ifdef X86_SSE2_CHUNKSET # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) if (x86_cpu_has_sse2) # endif - chunkcopy = &chunkcopy_sse2; + functable.chunkcopy = &chunkcopy_sse2; #endif #ifdef X86_AVX_CHUNKSET if (x86_cpu_has_avx2) - chunkcopy = &chunkcopy_avx; + functable.chunkcopy = &chunkcopy_avx; #endif #ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) - chunkcopy = &chunkcopy_neon; + functable.chunkcopy = &chunkcopy_neon; #endif #ifdef POWER8_VSX_CHUNKSET if (power_cpu_has_arch_2_07) - chunkcopy = &chunkcopy_power8; + functable.chunkcopy = &chunkcopy_power8; #endif // chunkunroll_stub: - chunkunroll = &chunkunroll_c; + functable.chunkunroll = &chunkunroll_c; #ifdef X86_SSE2_CHUNKSET # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) if (x86_cpu_has_sse2) # endif - chunkunroll = &chunkunroll_sse2; + functable.chunkunroll = &chunkunroll_sse2; #endif #ifdef X86_AVX_CHUNKSET if (x86_cpu_has_avx2) - chunkunroll = &chunkunroll_avx; + functable.chunkunroll = &chunkunroll_avx; #endif #ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) - chunkunroll = &chunkunroll_neon; + functable.chunkunroll = &chunkunroll_neon; #endif #ifdef POWER8_VSX_CHUNKSET if (power_cpu_has_arch_2_07) - chunkunroll = &chunkunroll_power8; + functable.chunkunroll = &chunkunroll_power8; #endif // chunkmemset_stub: - chunkmemset = &chunkmemset_c; + functable.chunkmemset = &chunkmemset_c; #ifdef X86_SSE2_CHUNKSET # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) if (x86_cpu_has_sse2) # endif - chunkmemset = &chunkmemset_sse2; + functable.chunkmemset = &chunkmemset_sse2; #endif #if defined(X86_SSE41) && defined(X86_SSE2) if (x86_cpu_has_sse41) - chunkmemset = &chunkmemset_sse41; + functable.chunkmemset = &chunkmemset_sse41; #endif #ifdef X86_AVX_CHUNKSET if (x86_cpu_has_avx2) - chunkmemset = &chunkmemset_avx; + functable.chunkmemset = &chunkmemset_avx; #endif #ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) - chunkmemset = &chunkmemset_neon; + functable.chunkmemset = &chunkmemset_neon; #endif #ifdef POWER8_VSX_CHUNKSET if (power_cpu_has_arch_2_07) - chunkmemset = &chunkmemset_power8; + functable.chunkmemset = &chunkmemset_power8; #endif // chunkmemset_safe_stub: - chunkmemset_safe = &chunkmemset_safe_c; + functable.chunkmemset_safe = &chunkmemset_safe_c; #ifdef X86_SSE2_CHUNKSET # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) if (x86_cpu_has_sse2) # endif - chunkmemset_safe = &chunkmemset_safe_sse2; + functable.chunkmemset_safe = &chunkmemset_safe_sse2; #endif #if defined(X86_SSE41) && defined(X86_SSE2) if (x86_cpu_has_sse41) - chunkmemset_safe = &chunkmemset_safe_sse41; + functable.chunkmemset_safe = &chunkmemset_safe_sse41; #endif #ifdef X86_AVX_CHUNKSET if (x86_cpu_has_avx2) - chunkmemset_safe = &chunkmemset_safe_avx; + functable.chunkmemset_safe = &chunkmemset_safe_avx; #endif #ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) - chunkmemset_safe = &chunkmemset_safe_neon; + functable.chunkmemset_safe = &chunkmemset_safe_neon; #endif #ifdef POWER8_VSX_CHUNKSET if (power_cpu_has_arch_2_07) - chunkmemset_safe = &chunkmemset_safe_power8; + functable.chunkmemset_safe = &chunkmemset_safe_power8; #endif // crc32_stub: - crc32 = &PREFIX(crc32_braid); + functable.crc32 = &PREFIX(crc32_braid); #ifdef ARM_ACLE_CRC_HASH if (arm_cpu_has_crc32) - crc32 = &crc32_acle; + functable.crc32 = &crc32_acle; #elif defined(POWER8_VSX_CRC32) if (power_cpu_has_arch_2_07) - crc32 = &crc32_power8; + functable.crc32 = &crc32_power8; #elif defined(S390_CRC32_VX) if (PREFIX(s390_cpu_has_vx)) - crc32 = &PREFIX(s390_crc32_vx); + functable.crc32 = &PREFIX(s390_crc32_vx); #elif defined(X86_PCLMULQDQ_CRC) if (x86_cpu_has_pclmulqdq) - crc32 = &crc32_pclmulqdq; + functable.crc32 = &crc32_pclmulqdq; #endif // compare256_stub: #ifdef UNALIGNED_OK # if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) - compare256 = &compare256_unaligned_64; + functable.compare256 = &compare256_unaligned_64; # elif defined(HAVE_BUILTIN_CTZ) - compare256 = &compare256_unaligned_32; + functable.compare256 = &compare256_unaligned_32; # else - compare256 = &compare256_unaligned_16; + functable.compare256 = &compare256_unaligned_16; # endif #else - compare256 = &compare256_c; + functable.compare256 = &compare256_c; #endif #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) if (x86_cpu_has_sse2) - compare256 = &compare256_sse2; + functable.compare256 = &compare256_sse2; #endif #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) if (x86_cpu_has_avx2) - compare256 = &compare256_avx2; + functable.compare256 = &compare256_avx2; #endif #ifdef POWER9 if (power_cpu_has_arch_3_00) - compare256 = &compare256_power9; -#endif - - - functable.adler32 = adler32; - functable.adler32_fold_copy = adler32_fold_copy; - functable.crc32 = crc32; - functable.crc32_fold_reset = crc32_fold_reset; - functable.crc32_fold_copy = crc32_fold_copy; - functable.crc32_fold = crc32_fold; - functable.crc32_fold_final = crc32_fold_final; - functable.compare256 = compare256; - functable.chunksize = chunksize; - functable.chunkcopy = chunkcopy; - functable.chunkunroll = chunkunroll; - functable.chunkmemset = chunkmemset; - functable.chunkmemset_safe = chunkmemset_safe; - functable.insert_string = insert_string; - functable.longest_match = longest_match; - functable.longest_match_slow = longest_match_slow; - functable.quick_insert_string = quick_insert_string; - functable.slide_hash = slide_hash; - functable.update_hash = update_hash; + functable.compare256 = &compare256_power9; +#endif + + functable_ptr->adler32 = functable.adler32; + functable_ptr->adler32_fold_copy = functable.adler32_fold_copy; + functable_ptr->crc32 = functable.crc32; + functable_ptr->crc32_fold_reset = functable.crc32_fold_reset; + functable_ptr->crc32_fold_copy = functable.crc32_fold_copy; + functable_ptr->crc32_fold = functable.crc32_fold; + functable_ptr->crc32_fold_final = functable.crc32_fold_final; + functable_ptr->compare256 = functable.compare256; + functable_ptr->chunksize = functable.chunksize; + functable_ptr->chunkcopy = functable.chunkcopy; + functable_ptr->chunkunroll = functable.chunkunroll; + functable_ptr->chunkmemset = functable.chunkmemset; + functable_ptr->chunkmemset_safe = functable.chunkmemset_safe; + functable_ptr->insert_string = functable.insert_string; + functable_ptr->longest_match = functable.longest_match; + functable_ptr->longest_match_slow = functable.longest_match_slow; + functable_ptr->quick_insert_string = functable.quick_insert_string; + functable_ptr->slide_hash = functable.slide_hash; + functable_ptr->update_hash = functable.update_hash; } /* stub functions */