From: Cameron Cawley Date: Thu, 2 Oct 2025 16:14:09 +0000 (+0100) Subject: Inline the CHUNKSIZE function X-Git-Tag: 2.3.0-rc1~18 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2801a6f97b9c59aa2df8ff0dce1c3f800538e2ca;p=thirdparty%2Fzlib-ng.git Inline the CHUNKSIZE function --- diff --git a/arch/arm/arm_functions.h b/arch/arm/arm_functions.h index 8a89c7c4f..f313655e7 100644 --- a/arch/arm/arm_functions.h +++ b/arch/arm/arm_functions.h @@ -8,7 +8,6 @@ #ifdef ARM_NEON uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_fold_copy_neon(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -uint32_t chunksize_neon(void); uint8_t* chunkmemset_safe_neon(uint8_t *out, uint8_t *from, unsigned len, unsigned left); # ifdef HAVE_BUILTIN_CTZLL @@ -45,8 +44,6 @@ void slide_hash_armv6(deflate_state *s); # define native_adler32_fold_copy adler32_fold_copy_neon # undef native_chunkmemset_safe # define native_chunkmemset_safe chunkmemset_safe_neon -# undef native_chunksize -# define native_chunksize chunksize_neon # undef native_inflate_fast # define native_inflate_fast inflate_fast_neon # undef native_slide_hash diff --git a/arch/generic/generic_functions.h b/arch/generic/generic_functions.h index e2073b8f1..21358f069 100644 --- a/arch/generic/generic_functions.h +++ b/arch/generic/generic_functions.h @@ -19,7 +19,6 @@ uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); uint8_t* chunkmemset_safe_c(uint8_t *out, uint8_t *from, unsigned len, unsigned left); -uint32_t chunksize_c(void); uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1); @@ -51,7 +50,6 @@ void slide_hash_c(deflate_state *s); # define native_adler32 adler32_c # define native_adler32_fold_copy adler32_fold_copy_c # define native_chunkmemset_safe chunkmemset_safe_c -# define native_chunksize chunksize_c # define native_crc32 crc32_c # define native_crc32_fold crc32_fold_c # define native_crc32_fold_copy crc32_fold_copy_c diff --git a/arch/loongarch/loongarch_functions.h b/arch/loongarch/loongarch_functions.h index 3bc8344da..798c1484c 100644 --- a/arch/loongarch/loongarch_functions.h +++ b/arch/loongarch/loongarch_functions.h @@ -23,7 +23,6 @@ void slide_hash_lsx(deflate_state *s); uint32_t longest_match_lsx(deflate_state *const s, Pos cur_match); uint32_t longest_match_slow_lsx(deflate_state *const s, Pos cur_match); # endif -uint32_t chunksize_lsx(void); uint8_t* chunkmemset_safe_lsx(uint8_t *out, uint8_t *from, unsigned len, unsigned left); void inflate_fast_lsx(PREFIX3(stream) *strm, uint32_t start); #endif @@ -37,7 +36,6 @@ void slide_hash_lasx(deflate_state *s); uint32_t longest_match_lasx(deflate_state *const s, Pos cur_match); uint32_t longest_match_slow_lasx(deflate_state *const s, Pos cur_match); # endif -uint32_t chunksize_lasx(void); uint8_t* chunkmemset_safe_lasx(uint8_t *out, uint8_t *from, unsigned len, unsigned left); void inflate_fast_lasx(PREFIX3(stream) *strm, uint32_t start); #endif @@ -59,8 +57,6 @@ void inflate_fast_lasx(PREFIX3(stream) *strm, uint32_t start); # define native_adler32_fold_copy adler32_fold_copy_lsx # undef native_slide_hash # define native_slide_hash slide_hash_lsx -# undef native_chunksize -# define native_chunksize chunksize_lsx # undef native_chunkmemset_safe # define native_chunkmemset_safe chunkmemset_safe_lsx # undef native_inflate_fast @@ -81,8 +77,6 @@ void inflate_fast_lasx(PREFIX3(stream) *strm, uint32_t start); # define native_adler32_fold_copy adler32_fold_copy_lasx # undef native_slide_hash # define native_slide_hash slide_hash_lasx -# undef native_chunksize -# define native_chunksize chunksize_lasx # undef native_chunkmemset_safe # define native_chunkmemset_safe chunkmemset_safe_lasx # undef native_inflate_fast diff --git a/arch/power/power_functions.h b/arch/power/power_functions.h index 44d36af83..6508b979d 100644 --- a/arch/power/power_functions.h +++ b/arch/power/power_functions.h @@ -14,7 +14,6 @@ void slide_hash_vmx(deflate_state *s); #ifdef POWER8_VSX uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len); -uint32_t chunksize_power8(void); uint8_t* chunkmemset_safe_power8(uint8_t *out, uint8_t *from, unsigned len, unsigned left); uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len); void slide_hash_power8(deflate_state *s); @@ -42,8 +41,6 @@ uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match); # define native_adler32 adler32_power8 # undef native_chunkmemset_safe # define native_chunkmemset_safe chunkmemset_safe_power8 -# undef native_chunksize -# define native_chunksize chunksize_power8 # undef native_inflate_fast # define native_inflate_fast inflate_fast_power8 # undef native_slide_hash diff --git a/arch/riscv/riscv_functions.h b/arch/riscv/riscv_functions.h index 86b68a6df..d68dded92 100644 --- a/arch/riscv/riscv_functions.h +++ b/arch/riscv/riscv_functions.h @@ -12,7 +12,6 @@ #ifdef RISCV_RVV uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -uint32_t chunksize_rvv(void); uint8_t* chunkmemset_safe_rvv(uint8_t *out, uint8_t *from, unsigned len, unsigned left); uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1); @@ -35,8 +34,6 @@ uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf, size_t len); # define native_adler32_fold_copy adler32_fold_copy_rvv # undef native_chunkmemset_safe # define native_chunkmemset_safe chunkmemset_safe_rvv -# undef native_chunksize -# define native_chunksize chunksize_rvv # undef native_compare256 # define native_compare256 compare256_rvv # undef native_inflate_fast diff --git a/arch/x86/x86_functions.h b/arch/x86/x86_functions.h index 9cf4b4313..ddb61b74e 100644 --- a/arch/x86/x86_functions.h +++ b/arch/x86/x86_functions.h @@ -15,7 +15,6 @@ #endif #ifdef X86_SSE2 -uint32_t chunksize_sse2(void); uint8_t* chunkmemset_safe_sse2(uint8_t *out, uint8_t *from, unsigned len, unsigned left); # ifdef HAVE_BUILTIN_CTZ @@ -49,7 +48,6 @@ uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *sr #ifdef X86_AVX2 uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -uint32_t chunksize_avx2(void); uint8_t* chunkmemset_safe_avx2(uint8_t *out, uint8_t *from, unsigned len, unsigned left); # ifdef HAVE_BUILTIN_CTZ @@ -63,7 +61,6 @@ uint8_t* chunkmemset_safe_avx2(uint8_t *out, uint8_t *from, unsigned len, unsign #ifdef X86_AVX512 uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -uint32_t chunksize_avx512(void); uint8_t* chunkmemset_safe_avx512(uint8_t *out, uint8_t *from, unsigned len, unsigned left); void inflate_fast_avx512(PREFIX3(stream)* strm, uint32_t start); # ifdef HAVE_BUILTIN_CTZLL @@ -97,8 +94,6 @@ uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); # if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64) || defined(X86_NOCHECK_SSE2) # undef native_chunkmemset_safe # define native_chunkmemset_safe chunkmemset_safe_sse2 -# undef native_chunksize -# define native_chunksize chunksize_sse2 # undef native_inflate_fast # define native_inflate_fast inflate_fast_sse2 # undef native_slide_hash @@ -156,8 +151,6 @@ uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); # define native_adler32_fold_copy adler32_fold_copy_avx2 # undef native_chunkmemset_safe # define native_chunkmemset_safe chunkmemset_safe_avx2 -# undef native_chunksize -# define native_chunksize chunksize_avx2 # undef native_inflate_fast # define native_inflate_fast inflate_fast_avx2 # undef native_slide_hash @@ -179,8 +172,6 @@ uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); # define native_adler32_fold_copy adler32_fold_copy_avx512 # undef native_chunkmemset_safe # define native_chunkmemset_safe chunkmemset_safe_avx512 -# undef native_chunksize -# define native_chunksize chunksize_avx512 # undef native_inflate_fast # define native_inflate_fast inflate_fast_avx512 # ifdef HAVE_BUILTIN_CTZLL diff --git a/chunkset_tpl.h b/chunkset_tpl.h index 383b4d8f8..2087e66f5 100644 --- a/chunkset_tpl.h +++ b/chunkset_tpl.h @@ -6,7 +6,7 @@ #include /* Returns the chunk size */ -Z_INTERNAL uint32_t CHUNKSIZE(void) { +static inline size_t CHUNKSIZE(void) { return sizeof(chunk_t); } diff --git a/functable.c b/functable.c index f09544d12..831a8a27b 100644 --- a/functable.c +++ b/functable.c @@ -53,7 +53,6 @@ static void init_functable(void) { ft.adler32 = &adler32_c; ft.adler32_fold_copy = &adler32_fold_copy_c; ft.chunkmemset_safe = &chunkmemset_safe_c; - ft.chunksize = &chunksize_c; ft.crc32 = &crc32_c; ft.crc32_fold = &crc32_fold_c; ft.crc32_fold_copy = &crc32_fold_copy_c; @@ -74,7 +73,6 @@ static void init_functable(void) { # endif { ft.chunkmemset_safe = &chunkmemset_safe_sse2; - ft.chunksize = &chunksize_sse2; #if !defined(WITHOUT_CHORBA) && !defined(NO_CHORBA_SSE) ft.crc32 = &crc32_chorba_sse2; #endif @@ -131,7 +129,6 @@ static void init_functable(void) { ft.adler32 = &adler32_avx2; ft.adler32_fold_copy = &adler32_fold_copy_avx2; ft.chunkmemset_safe = &chunkmemset_safe_avx2; - ft.chunksize = &chunksize_avx2; ft.inflate_fast = &inflate_fast_avx2; ft.slide_hash = &slide_hash_avx2; # ifdef HAVE_BUILTIN_CTZ @@ -147,7 +144,6 @@ static void init_functable(void) { ft.adler32 = &adler32_avx512; ft.adler32_fold_copy = &adler32_fold_copy_avx512; ft.chunkmemset_safe = &chunkmemset_safe_avx512; - ft.chunksize = &chunksize_avx512; ft.inflate_fast = &inflate_fast_avx512; # ifdef HAVE_BUILTIN_CTZLL ft.compare256 = &compare256_avx512; @@ -192,7 +188,6 @@ static void init_functable(void) { ft.adler32 = &adler32_neon; ft.adler32_fold_copy = &adler32_fold_copy_neon; ft.chunkmemset_safe = &chunkmemset_safe_neon; - ft.chunksize = &chunksize_neon; ft.inflate_fast = &inflate_fast_neon; ft.slide_hash = &slide_hash_neon; # ifdef HAVE_BUILTIN_CTZLL @@ -224,7 +219,6 @@ static void init_functable(void) { if (cf.power.has_arch_2_07) { ft.adler32 = &adler32_power8; ft.chunkmemset_safe = &chunkmemset_safe_power8; - ft.chunksize = &chunksize_power8; ft.inflate_fast = &inflate_fast_power8; ft.slide_hash = &slide_hash_power8; } @@ -249,7 +243,6 @@ static void init_functable(void) { ft.adler32 = &adler32_rvv; ft.adler32_fold_copy = &adler32_fold_copy_rvv; ft.chunkmemset_safe = &chunkmemset_safe_rvv; - ft.chunksize = &chunksize_rvv; ft.compare256 = &compare256_rvv; ft.inflate_fast = &inflate_fast_rvv; ft.longest_match = &longest_match_rvv; @@ -289,7 +282,6 @@ static void init_functable(void) { ft.longest_match = &longest_match_lsx; ft.longest_match_slow = &longest_match_slow_lsx; # endif - ft.chunksize = &chunksize_lsx; ft.chunkmemset_safe = &chunkmemset_safe_lsx; ft.inflate_fast = &inflate_fast_lsx; } @@ -304,7 +296,6 @@ static void init_functable(void) { ft.longest_match = &longest_match_lasx; ft.longest_match_slow = &longest_match_slow_lasx; # endif - ft.chunksize = &chunksize_lasx; ft.chunkmemset_safe = &chunkmemset_safe_lasx; ft.inflate_fast = &inflate_fast_lasx; } @@ -315,7 +306,6 @@ static void init_functable(void) { FUNCTABLE_ASSIGN(ft, adler32); FUNCTABLE_ASSIGN(ft, adler32_fold_copy); FUNCTABLE_ASSIGN(ft, chunkmemset_safe); - FUNCTABLE_ASSIGN(ft, chunksize); FUNCTABLE_ASSIGN(ft, compare256); FUNCTABLE_ASSIGN(ft, crc32); FUNCTABLE_ASSIGN(ft, crc32_fold); @@ -351,11 +341,6 @@ static uint8_t* chunkmemset_safe_stub(uint8_t* out, uint8_t *from, unsigned len, return functable.chunkmemset_safe(out, from, len, left); } -static uint32_t chunksize_stub(void) { - init_functable(); - return functable.chunksize(); -} - static uint32_t compare256_stub(const uint8_t* src0, const uint8_t* src1) { init_functable(); return functable.compare256(src0, src1); @@ -412,7 +397,6 @@ Z_INTERNAL struct functable_s functable = { adler32_stub, adler32_fold_copy_stub, chunkmemset_safe_stub, - chunksize_stub, compare256_stub, crc32_stub, crc32_fold_stub, diff --git a/functable.h b/functable.h index 83dda8808..7e3b60368 100644 --- a/functable.h +++ b/functable.h @@ -28,7 +28,6 @@ struct functable_s { uint32_t (* adler32) (uint32_t adler, const uint8_t *buf, size_t len); uint32_t (* adler32_fold_copy) (uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); uint8_t* (* chunkmemset_safe) (uint8_t *out, uint8_t *from, unsigned len, unsigned left); - uint32_t (* chunksize) (void); uint32_t (* compare256) (const uint8_t *src0, const uint8_t *src1); uint32_t (* crc32) (uint32_t crc, const uint8_t *buf, size_t len); void (* crc32_fold) (struct crc32_fold_s *crc, const uint8_t *src, size_t len, uint32_t init_crc); diff --git a/infback.c b/infback.c index b6d98d4a3..b3494e26a 100644 --- a/infback.c +++ b/infback.c @@ -59,7 +59,6 @@ int32_t ZNG_CONDEXPORT PREFIX(inflateBackInit)(PREFIX3(stream) *strm, int32_t wi state->window = window; state->wnext = 0; state->whave = 0; - state->chunksize = FUNCTABLE_CALL(chunksize)(); #ifdef INFLATE_STRICT state->dmax = 32768U; #endif diff --git a/inffast_tpl.h b/inffast_tpl.h index 2ec865dbf..4f0ca9827 100644 --- a/inffast_tpl.h +++ b/inffast_tpl.h @@ -272,7 +272,7 @@ void Z_INTERNAL INFLATE_FAST(PREFIX3(stream) *strm, uint32_t start) { so unroll and roundoff operations can write beyond `out+len` so long as they stay within 258 bytes of `out`. */ - if (dist >= len || dist >= state->chunksize) + if (dist >= len || dist >= CHUNKSIZE()) out = CHUNKCOPY(out, out - dist, len); else out = CHUNKMEMSET(out, out - dist, len); diff --git a/inflate.c b/inflate.c index 8c373da69..8baa725c0 100644 --- a/inflate.c +++ b/inflate.c @@ -246,7 +246,6 @@ int32_t ZNG_CONDEXPORT PREFIX(inflateInit2)(PREFIX3(stream) *strm, int32_t windo strm->state = (struct internal_state *)state; state->strm = strm; state->mode = HEAD; /* to pass state test in inflateReset2() */ - state->chunksize = FUNCTABLE_CALL(chunksize)(); ret = PREFIX(inflateReset2)(strm, windowBits); if (ret != Z_OK) { free_inflate(strm); diff --git a/inflate.h b/inflate.h index 9d5c48270..1427e9085 100644 --- a/inflate.h +++ b/inflate.h @@ -118,7 +118,6 @@ struct ALIGNED_(64) inflate_state { uint32_t whave; /* valid bytes in the window */ uint32_t wnext; /* window write index */ unsigned char *window; /* allocated sliding window, if needed */ - uint32_t chunksize; /* size of memory copying chunk */ /* bit accumulator */ uint64_t hold; /* input bit accumulator */