From: Nathan Moinvaziri Date: Sat, 22 Jan 2022 17:37:13 +0000 (-0800) Subject: Group together functable definitions that use deflate_state. X-Git-Tag: 2.1.0-beta1~427 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=784c56346522d40725e6043bf6aebdabca47a23e;p=thirdparty%2Fzlib-ng.git Group together functable definitions that use deflate_state. --- diff --git a/cpu_features.h b/cpu_features.h index 7dea3a683..4d09646e7 100644 --- a/cpu_features.h +++ b/cpu_features.h @@ -22,46 +22,6 @@ extern void cpu_check_features(); -/* update_hash */ -extern uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val); -#ifdef X86_SSE42_CRC_HASH -extern uint32_t update_hash_sse4(deflate_state *const s, uint32_t h, uint32_t val); -#elif defined(ARM_ACLE_CRC_HASH) -extern uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val); -#endif - -/* insert_string */ -extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count); -#ifdef X86_SSE42_CRC_HASH -extern void insert_string_sse4(deflate_state *const s, const uint32_t str, uint32_t count); -#elif defined(ARM_ACLE_CRC_HASH) -extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count); -#endif - -/* quick_insert_string */ -extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str); -#ifdef X86_SSE42_CRC_HASH -extern Pos quick_insert_string_sse4(deflate_state *const s, const uint32_t str); -#elif defined(ARM_ACLE_CRC_HASH) -extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str); -#endif - -/* slide_hash */ -#ifdef X86_SSE2 -void slide_hash_sse2(deflate_state *s); -#elif defined(ARM_NEON_SLIDEHASH) -void slide_hash_neon(deflate_state *s); -#endif -#if defined(PPC_VMX_SLIDEHASH) -void slide_hash_vmx(deflate_state *s); -#endif -#if defined(POWER8_VSX_SLIDEHASH) -void slide_hash_power8(deflate_state *s); -#endif -#ifdef X86_AVX2 -void slide_hash_avx2(deflate_state *s); -#endif - /* adler32 */ extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len); #ifdef ARM_NEON_ADLER32 @@ -162,6 +122,14 @@ extern uint32_t compare256_unaligned_avx2(const uint8_t *src0, const uint8_t *sr #endif #endif +/* insert_string */ +extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count); +#ifdef X86_SSE42_CRC_HASH +extern void insert_string_sse4(deflate_state *const s, const uint32_t str, uint32_t count); +#elif defined(ARM_ACLE_CRC_HASH) +extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count); +#endif + /* longest_match */ extern uint32_t longest_match_c(deflate_state *const s, Pos cur_match); #ifdef UNALIGNED_OK @@ -194,4 +162,36 @@ extern uint32_t longest_match_slow_unaligned_avx2(deflate_state *const s, Pos cu #endif #endif +/* quick_insert_string */ +extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str); +#ifdef X86_SSE42_CRC_HASH +extern Pos quick_insert_string_sse4(deflate_state *const s, const uint32_t str); +#elif defined(ARM_ACLE_CRC_HASH) +extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str); +#endif + +/* slide_hash */ +#ifdef X86_SSE2 +void slide_hash_sse2(deflate_state *s); +#elif defined(ARM_NEON_SLIDEHASH) +void slide_hash_neon(deflate_state *s); +#endif +#if defined(PPC_VMX_SLIDEHASH) +void slide_hash_vmx(deflate_state *s); +#endif +#if defined(POWER8_VSX_SLIDEHASH) +void slide_hash_power8(deflate_state *s); +#endif +#ifdef X86_AVX2 +void slide_hash_avx2(deflate_state *s); +#endif + +/* update_hash */ +extern uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val); +#ifdef X86_SSE42_CRC_HASH +extern uint32_t update_hash_sse4(deflate_state *const s, uint32_t h, uint32_t val); +#elif defined(ARM_ACLE_CRC_HASH) +extern uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val); +#endif + #endif diff --git a/functable.c b/functable.c index edcbba05d..19d7258e1 100644 --- a/functable.c +++ b/functable.c @@ -96,6 +96,56 @@ Z_INTERNAL void slide_hash_stub(deflate_state *s) { functable.slide_hash(s); } +Z_INTERNAL uint32_t longest_match_stub(deflate_state *const s, Pos cur_match) { + +#ifdef UNALIGNED_OK +# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) + functable.longest_match = &longest_match_unaligned_64; +# elif defined(HAVE_BUILTIN_CTZ) + functable.longest_match = &longest_match_unaligned_32; +# else + functable.longest_match = &longest_match_unaligned_16; +# endif +# ifdef X86_SSE42_CMP_STR + if (x86_cpu_has_sse42) + functable.longest_match = &longest_match_unaligned_sse4; +# endif +# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) + if (x86_cpu_has_avx2) + functable.longest_match = &longest_match_unaligned_avx2; +# endif +#else + functable.longest_match = &longest_match_c; +#endif + + return functable.longest_match(s, cur_match); +} + +Z_INTERNAL uint32_t longest_match_slow_stub(deflate_state *const s, Pos cur_match) { + +#ifdef UNALIGNED_OK +# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) + functable.longest_match_slow = &longest_match_slow_unaligned_64; +# elif defined(HAVE_BUILTIN_CTZ) + functable.longest_match_slow = &longest_match_slow_unaligned_32; +# else + functable.longest_match_slow = &longest_match_slow_unaligned_16; +# endif +# ifdef X86_SSE42_CMP_STR + if (x86_cpu_has_sse42) + functable.longest_match_slow = &longest_match_slow_unaligned_sse4; +# endif +# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) + if (x86_cpu_has_avx2) + functable.longest_match_slow = &longest_match_slow_unaligned_avx2; +# endif +#else + functable.longest_match_slow = &longest_match_slow_c; +#endif + + return functable.longest_match_slow(s, cur_match); +} + Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len) { // Initialize default functable.adler32 = &adler32_c; @@ -373,74 +423,24 @@ Z_INTERNAL uint32_t compare256_stub(const uint8_t *src0, const uint8_t *src1) { return functable.compare256(src0, src1); } -Z_INTERNAL uint32_t longest_match_stub(deflate_state *const s, Pos cur_match) { - -#ifdef UNALIGNED_OK -# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) - functable.longest_match = &longest_match_unaligned_64; -# elif defined(HAVE_BUILTIN_CTZ) - functable.longest_match = &longest_match_unaligned_32; -# else - functable.longest_match = &longest_match_unaligned_16; -# endif -# ifdef X86_SSE42_CMP_STR - if (x86_cpu_has_sse42) - functable.longest_match = &longest_match_unaligned_sse4; -# endif -# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) - if (x86_cpu_has_avx2) - functable.longest_match = &longest_match_unaligned_avx2; -# endif -#else - functable.longest_match = &longest_match_c; -#endif - - return functable.longest_match(s, cur_match); -} - -Z_INTERNAL uint32_t longest_match_slow_stub(deflate_state *const s, Pos cur_match) { - -#ifdef UNALIGNED_OK -# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) - functable.longest_match_slow = &longest_match_slow_unaligned_64; -# elif defined(HAVE_BUILTIN_CTZ) - functable.longest_match_slow = &longest_match_slow_unaligned_32; -# else - functable.longest_match_slow = &longest_match_slow_unaligned_16; -# endif -# ifdef X86_SSE42_CMP_STR - if (x86_cpu_has_sse42) - functable.longest_match_slow = &longest_match_slow_unaligned_sse4; -# endif -# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) - if (x86_cpu_has_avx2) - functable.longest_match_slow = &longest_match_slow_unaligned_avx2; -# endif -#else - functable.longest_match_slow = &longest_match_slow_c; -#endif - - return functable.longest_match_slow(s, cur_match); -} - /* functable init */ Z_INTERNAL Z_TLS struct functable_s functable = { - update_hash_stub, - insert_string_stub, - quick_insert_string_stub, adler32_stub, crc32_stub, crc32_fold_reset_stub, crc32_fold_copy_stub, crc32_fold_final_stub, - slide_hash_stub, compare256_stub, - longest_match_stub, - longest_match_slow_stub, chunksize_stub, chunkcopy_stub, chunkcopy_safe_stub, chunkunroll_stub, chunkmemset_stub, - chunkmemset_safe_stub + chunkmemset_safe_stub, + insert_string_stub, + longest_match_stub, + longest_match_slow_stub, + quick_insert_string_stub, + slide_hash_stub, + update_hash_stub }; diff --git a/functable.h b/functable.h index 277e34a24..949c5b1be 100644 --- a/functable.h +++ b/functable.h @@ -10,24 +10,24 @@ #include "crc32_fold.h" struct functable_s { - uint32_t (* update_hash) (deflate_state *const s, uint32_t h, uint32_t val); - void (* insert_string) (deflate_state *const s, uint32_t str, uint32_t count); - Pos (* quick_insert_string)(deflate_state *const s, uint32_t str); uint32_t (* adler32) (uint32_t adler, const unsigned char *buf, size_t len); uint32_t (* crc32) (uint32_t crc, const unsigned char *buf, uint64_t len); uint32_t (* crc32_fold_reset) (crc32_fold *crc); void (* crc32_fold_copy) (crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); uint32_t (* crc32_fold_final) (crc32_fold *crc); - void (* slide_hash) (deflate_state *s); uint32_t (* compare256) (const uint8_t *src0, const uint8_t *src1); - uint32_t (* longest_match) (deflate_state *const s, Pos cur_match); - uint32_t (* longest_match_slow) (deflate_state *const s, Pos cur_match); uint32_t (* chunksize) (void); uint8_t* (* chunkcopy) (uint8_t *out, uint8_t const *from, unsigned len); uint8_t* (* chunkcopy_safe) (uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe); uint8_t* (* chunkunroll) (uint8_t *out, unsigned *dist, unsigned *len); uint8_t* (* chunkmemset) (uint8_t *out, unsigned dist, unsigned len); uint8_t* (* chunkmemset_safe) (uint8_t *out, unsigned dist, unsigned len, unsigned left); + void (* insert_string) (deflate_state *const s, uint32_t str, uint32_t count); + uint32_t (* longest_match) (deflate_state *const s, Pos cur_match); + uint32_t (* longest_match_slow) (deflate_state *const s, Pos cur_match); + Pos (* quick_insert_string)(deflate_state *const s, uint32_t str); + void (* slide_hash) (deflate_state *s); + uint32_t (* update_hash) (deflate_state *const s, uint32_t h, uint32_t val); }; Z_INTERNAL extern Z_TLS struct functable_s functable;