From: Hans Kristian Rosbach Date: Sun, 30 Nov 2025 21:31:49 +0000 (+0100) Subject: Inline all uses of quick_insert_string*/quick_insert_value*. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bf95fa0fba9764608aa4164d9ead5839d2edde87;p=thirdparty%2Fzlib-ng.git Inline all uses of quick_insert_string*/quick_insert_value*. Inline all uses of update_hash*. Inline insert_string into deflate_quick, deflate_fast and deflate_medium. Remove insert_string from deflate_state Use local function pointer for insert_string. Fix level check to actually check level and not `s->max_chain_length <= 1024`. --- diff --git a/CMakeLists.txt b/CMakeLists.txt index f59bd9e68..5bb690607 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1329,6 +1329,7 @@ set(ZLIB_PRIVATE_HDRS inflate.h inflate_p.h inftrees.h + insert_string_p.h insert_string_tpl.h match_tpl.h trees.h @@ -1358,7 +1359,6 @@ set(ZLIB_SRCS inflate.c inftrees.c insert_string.c - insert_string_roll.c trees.c uncompr.c zutil.c diff --git a/Makefile.in b/Makefile.in index 1e248a893..19fced000 100644 --- a/Makefile.in +++ b/Makefile.in @@ -98,7 +98,6 @@ OBJZ = \ inflate.o \ inftrees.o \ insert_string.o \ - insert_string_roll.o \ trees.o \ uncompr.o \ zutil.o \ @@ -138,7 +137,6 @@ PIC_OBJZ = \ inflate.lo \ inftrees.lo \ insert_string.lo \ - insert_string_roll.lo \ trees.lo \ uncompr.lo \ zutil.lo \ diff --git a/deflate.c b/deflate.c index 79baa181c..19a9ddf5b 100644 --- a/deflate.c +++ b/deflate.c @@ -51,6 +51,7 @@ #include "functable.h" #include "deflate.h" #include "deflate_p.h" +#include "insert_string_p.h" /* Avoid conflicts with zlib.h macros */ #ifdef ZLIB_COMPAT @@ -415,6 +416,7 @@ static int deflateStateCheck(PREFIX3(stream) *strm) { /* ========================================================================= */ int32_t Z_EXPORT PREFIX(deflateSetDictionary)(PREFIX3(stream) *strm, const uint8_t *dictionary, uint32_t dictLength) { deflate_state *s; + insert_string_cb insert_string_func; unsigned int str, n; int wrap; uint32_t avail; @@ -427,6 +429,11 @@ int32_t Z_EXPORT PREFIX(deflateSetDictionary)(PREFIX3(stream) *strm, const uint8 if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead) return Z_STREAM_ERROR; + if (s->level >= 9) + insert_string_func = insert_string_roll; + else + insert_string_func = insert_string; + /* when using zlib wrappers, compute Adler-32 for provided dictionary */ if (wrap == 1) strm->adler = FUNCTABLE_CALL(adler32)(strm->adler, dictionary, dictLength); @@ -454,7 +461,7 @@ int32_t Z_EXPORT PREFIX(deflateSetDictionary)(PREFIX3(stream) *strm, const uint8 while (s->lookahead >= STD_MIN_MATCH) { str = s->strstart; n = s->lookahead - (STD_MIN_MATCH - 1); - s->insert_string(s, str, n); + insert_string_func(s, str, n); s->strstart = str + n; s->lookahead = STD_MIN_MATCH - 1; PREFIX(fill_window)(s); @@ -1129,22 +1136,6 @@ static void lm_set_level(deflate_state *s, int level) { s->good_match = configuration_table[level].good_length; s->nice_match = configuration_table[level].nice_length; s->max_chain_length = configuration_table[level].max_chain; - - /* Use rolling hash for deflate_slow algorithm with level 9. It allows us to - * properly lookup different hash chains to speed up longest_match search. Since hashing - * method changes depending on the level we cannot put this into functable. */ - if (s->max_chain_length > 1024) { - s->update_hash = &update_hash_roll; - s->insert_string = &insert_string_roll; - s->quick_insert_string = &quick_insert_string_roll; - s->quick_insert_value = &quick_insert_value_roll; - } else { - s->update_hash = update_hash; - s->insert_string = insert_string; - s->quick_insert_string = quick_insert_string; - s->quick_insert_value = quick_insert_value; - } - s->level = level; } @@ -1182,12 +1173,19 @@ static void lm_init(deflate_state *s) { */ void Z_INTERNAL PREFIX(fill_window)(deflate_state *s) { + insert_string_cb insert_string_func; unsigned n; unsigned int more; /* Amount of free space at the end of the window. */ unsigned int wsize = s->w_size; + int level = s->level; Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead"); + if (level >= 9) + insert_string_func = insert_string_roll; + else + insert_string_func = insert_string; + do { more = s->window_size - s->lookahead - s->strstart; @@ -1231,17 +1229,17 @@ void Z_INTERNAL PREFIX(fill_window)(deflate_state *s) { /* Initialize the hash value now that we have some input: */ if (s->lookahead + s->insert >= STD_MIN_MATCH) { unsigned int str = s->strstart - s->insert; - if (UNLIKELY(s->max_chain_length > 1024)) { - s->ins_h = s->update_hash(s->window[str], s->window[str+1]); + if (UNLIKELY(level >= 9)) { + s->ins_h = update_hash_roll(s->window[str], s->window[str+1]); } else if (str >= 1) { - s->quick_insert_string(s, str + 2 - STD_MIN_MATCH); + quick_insert_string(s, str + 2 - STD_MIN_MATCH); } unsigned int count = s->insert; if (UNLIKELY(s->lookahead == 1)) { count -= 1; } if (count > 0) { - s->insert_string(s, str, count); + insert_string_func(s, str, count); s->insert -= count; } } diff --git a/deflate.h b/deflate.h index 72bd0198f..d91b35fc2 100644 --- a/deflate.h +++ b/deflate.h @@ -119,20 +119,9 @@ typedef uint16_t Pos; /* Type definitions for hash callbacks */ typedef struct internal_state deflate_state; -typedef uint32_t (* update_hash_cb) (uint32_t h, uint32_t val); typedef void (* insert_string_cb) (deflate_state *const s, uint32_t str, uint32_t count); -typedef Pos (* quick_insert_string_cb)(deflate_state *const s, uint32_t str); -typedef Pos (* quick_insert_value_cb) (deflate_state *const s, uint32_t str, uint32_t val); - -uint32_t update_hash (uint32_t h, uint32_t val); void insert_string (deflate_state *const s, uint32_t str, uint32_t count); -Pos quick_insert_string (deflate_state *const s, uint32_t str); -Pos quick_insert_value (deflate_state *const s, uint32_t str, uint32_t val); - -uint32_t update_hash_roll (uint32_t h, uint32_t val); void insert_string_roll (deflate_state *const s, uint32_t str, uint32_t count); -Pos quick_insert_string_roll(deflate_state *const s, uint32_t str); -Pos quick_insert_value_roll (deflate_state *const s, uint32_t str, uint32_t val); /* Struct for memory allocation handling */ typedef struct deflate_allocs_s { @@ -233,13 +222,6 @@ struct ALIGNED_(64) internal_state { * max_insert_length is used only for compression levels <= 6. */ - update_hash_cb update_hash; - insert_string_cb insert_string; - quick_insert_string_cb quick_insert_string; - quick_insert_value_cb quick_insert_value; - /* Hash function callbacks that can be configured depending on the deflate - * algorithm being used */ - int level; /* compression level (1..9) */ int strategy; /* favor or force Huffman coding*/ diff --git a/deflate_fast.c b/deflate_fast.c index d3bc12ffb..ad650fa1e 100644 --- a/deflate_fast.c +++ b/deflate_fast.c @@ -9,6 +9,7 @@ #include "deflate.h" #include "deflate_p.h" #include "functable.h" +#include "insert_string_p.h" /* =========================================================================== * Compress as much as possible from the input stream, return the current @@ -82,7 +83,7 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { match_len--; /* string at strstart already in table */ s->strstart++; - insert_string(s, s->strstart, match_len); + insert_string_static(s, s->strstart, match_len); s->strstart += match_len; } else { s->strstart += match_len; diff --git a/deflate_medium.c b/deflate_medium.c index ae7c737ec..40c9b6278 100644 --- a/deflate_medium.c +++ b/deflate_medium.c @@ -11,6 +11,7 @@ #include "deflate.h" #include "deflate_p.h" #include "functable.h" +#include "insert_string_p.h" struct match { uint16_t match_start; diff --git a/deflate_quick.c b/deflate_quick.c index 5f49dfa69..961ec87c9 100644 --- a/deflate_quick.c +++ b/deflate_quick.c @@ -23,6 +23,7 @@ #include "deflate_p.h" #include "functable.h" #include "trees_emit.h" +#include "insert_string_p.h" extern const ct_data static_ltree[L_CODES+2]; extern const ct_data static_dtree[D_CODES]; diff --git a/deflate_slow.c b/deflate_slow.c index e9d7fc76c..f50e71b36 100644 --- a/deflate_slow.c +++ b/deflate_slow.c @@ -8,6 +8,7 @@ #include "deflate.h" #include "deflate_p.h" #include "functable.h" +#include "insert_string_p.h" /* =========================================================================== * Same as deflate_medium, but achieves better compression. We use a lazy @@ -15,13 +16,17 @@ * no better match at the next window position. */ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { - int bflush; /* set if current block must be flushed */ match_func longest_match; + insert_string_cb insert_string_func; + int bflush; /* set if current block must be flushed */ - if (s->max_chain_length <= 1024) - longest_match = FUNCTABLE_FPTR(longest_match); - else + if (s->level >= 9) { longest_match = FUNCTABLE_FPTR(longest_match_slow); + insert_string_func = insert_string_roll; + } else { + longest_match = FUNCTABLE_FPTR(longest_match); + insert_string_func = insert_string; + } /* Process the input block. */ for (;;) { @@ -44,7 +49,10 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { */ Pos hash_head = 0; if (LIKELY(s->lookahead >= WANT_MIN_MATCH)) { - hash_head = s->quick_insert_string(s, s->strstart); + if (s->level >= 9) + hash_head = quick_insert_string_roll(s, s->strstart); + else + hash_head = quick_insert_string(s, s->strstart); } /* Find the longest match, discarding those <= prev_length. @@ -93,7 +101,7 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { unsigned int insert_cnt = mov_fwd; if (UNLIKELY(insert_cnt > max_insert - s->strstart)) insert_cnt = max_insert - s->strstart; - s->insert_string(s, s->strstart + 1, insert_cnt); + insert_string_func(s, s->strstart + 1, insert_cnt); } s->prev_length = 0; s->match_available = 0; diff --git a/insert_string.c b/insert_string.c index 282692129..dd1960fbe 100644 --- a/insert_string.c +++ b/insert_string.c @@ -1,4 +1,4 @@ -/* insert_string.c -- insert_string integer hash variant +/* insert_string.c -- make insert_string functions from static inlined functions * * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h @@ -7,16 +7,12 @@ #include "zbuild.h" #include "deflate.h" +#include "insert_string_p.h" -#define HASH_SLIDE 16 +void insert_string(deflate_state *const s, uint32_t str, uint32_t count) { + insert_string_static(s, str, count); +} -#define HASH_CALC(h, val) h = ((val * 2654435761U) >> HASH_SLIDE); -#define HASH_CALC_VAR h -#define HASH_CALC_VAR_INIT uint32_t h - -#define UPDATE_HASH update_hash -#define INSERT_STRING insert_string -#define QUICK_INSERT_STRING quick_insert_string -#define QUICK_INSERT_VALUE quick_insert_value - -#include "insert_string_tpl.h" +void insert_string_roll(deflate_state *const s, uint32_t str, uint32_t count) { + insert_string_roll_static(s, str, count); +} diff --git a/insert_string_p.h b/insert_string_p.h new file mode 100644 index 000000000..cd0223440 --- /dev/null +++ b/insert_string_p.h @@ -0,0 +1,57 @@ +#ifndef INSERT_STRING_P_H_ +#define INSERT_STRING_P_H_ + +/* insert_string_p.h -- insert_string function generator + * + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + */ + +// Normal insert_string, levels 1-8 +#define HASH_SLIDE 16 + +#define HASH_CALC(h, val) h = ((val * 2654435761U) >> HASH_SLIDE); +#define HASH_CALC_MASK HASH_MASK +#define HASH_CALC_VAR h +#define HASH_CALC_VAR_INIT uint32_t h +#define HASH_CALC_OFFSET 0 + +#define UPDATE_HASH update_hash +#define INSERT_STRING insert_string_static +#define QUICK_INSERT_STRING quick_insert_string +#define QUICK_INSERT_VALUE quick_insert_value + +#include "insert_string_tpl.h" + +// Cleanup +#undef HASH_SLIDE +#undef HASH_CALC +#undef HASH_CALC_READ +#undef HASH_CALC_MASK +#undef HASH_CALC_OFFSET +#undef HASH_CALC_VAR +#undef HASH_CALC_VAR_INIT +#undef UPDATE_HASH +#undef INSERT_STRING +#undef QUICK_INSERT_STRING +#undef QUICK_INSERT_VALUE + +// Rolling insert_string, level 9 +#define HASH_SLIDE 5 + +#define HASH_CALC(h, val) h = ((h << HASH_SLIDE) ^ ((uint8_t)val)) +#define HASH_CALC_VAR s->ins_h +#define HASH_CALC_VAR_INIT +#define HASH_CALC_READ val = strstart[0] +#define HASH_CALC_MASK (32768u - 1u) +#define HASH_CALC_OFFSET (STD_MIN_MATCH-1) + +#define UPDATE_HASH update_hash_roll +#define INSERT_STRING insert_string_roll_static +#define QUICK_INSERT_STRING quick_insert_string_roll +#define QUICK_INSERT_VALUE quick_insert_value_roll + +#include "insert_string_tpl.h" + +#endif diff --git a/insert_string_roll.c b/insert_string_roll.c deleted file mode 100644 index bf12967c7..000000000 --- a/insert_string_roll.c +++ /dev/null @@ -1,25 +0,0 @@ -/* insert_string_roll.c -- insert_string rolling hash variant - * - * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - * - */ - -#include "zbuild.h" -#include "deflate.h" - -#define HASH_SLIDE 5 - -#define HASH_CALC(h, val) h = ((h << HASH_SLIDE) ^ ((uint8_t)val)) -#define HASH_CALC_VAR s->ins_h -#define HASH_CALC_VAR_INIT -#define HASH_CALC_READ val = strstart[0] -#define HASH_CALC_MASK (32768u - 1u) -#define HASH_CALC_OFFSET (STD_MIN_MATCH-1) - -#define UPDATE_HASH update_hash_roll -#define INSERT_STRING insert_string_roll -#define QUICK_INSERT_STRING quick_insert_string_roll -#define QUICK_INSERT_VALUE quick_insert_value_roll - -#include "insert_string_tpl.h" diff --git a/insert_string_tpl.h b/insert_string_tpl.h index 64cc74345..04a9b0a6b 100644 --- a/insert_string_tpl.h +++ b/insert_string_tpl.h @@ -1,6 +1,3 @@ -#ifndef INSERT_STRING_H_ -#define INSERT_STRING_H_ - /* insert_string_tpl.h -- Private insert_string functions shared with more than * one insert string implementation * @@ -22,14 +19,6 @@ * */ -#include "zmemory.h" - -#ifndef HASH_CALC_OFFSET -# define HASH_CALC_OFFSET 0 -#endif -#ifndef HASH_CALC_MASK -# define HASH_CALC_MASK HASH_MASK -#endif #ifndef HASH_CALC_READ # if BYTE_ORDER == LITTLE_ENDIAN # define HASH_CALC_READ \ @@ -46,7 +35,7 @@ * input characters, so that a running hash key can be computed from the * previous key instead of complete recalculation each time. */ -Z_INTERNAL uint32_t UPDATE_HASH(uint32_t h, uint32_t val) { +Z_FORCEINLINE static uint32_t UPDATE_HASH(uint32_t h, uint32_t val) { HASH_CALC(h, val); return h & HASH_CALC_MASK; } @@ -56,7 +45,7 @@ Z_INTERNAL uint32_t UPDATE_HASH(uint32_t h, uint32_t val) { * to the previous head of the hash chain (the most recent string with same hash key). * Return the previous length of the hash chain. */ -Z_INTERNAL Pos QUICK_INSERT_VALUE(deflate_state *const s, uint32_t str, uint32_t val) { +Z_FORCEINLINE static Pos QUICK_INSERT_VALUE(deflate_state *const s, uint32_t str, uint32_t val) { uint32_t hm; Pos head; @@ -78,7 +67,7 @@ Z_INTERNAL Pos QUICK_INSERT_VALUE(deflate_state *const s, uint32_t str, uint32_t * of the hash chain (the most recent string with same hash key). Return * the previous length of the hash chain. */ -Z_INTERNAL Pos QUICK_INSERT_STRING(deflate_state *const s, uint32_t str) { +Z_FORCEINLINE static Pos QUICK_INSERT_STRING(deflate_state *const s, uint32_t str) { uint8_t *strstart = s->window + str + HASH_CALC_OFFSET; uint32_t val, hm; Pos head; @@ -105,7 +94,7 @@ Z_INTERNAL Pos QUICK_INSERT_STRING(deflate_state *const s, uint32_t str) { * input characters and the first STD_MIN_MATCH bytes of str are valid * (except for the last STD_MIN_MATCH-1 bytes of the input file). */ -Z_INTERNAL void INSERT_STRING(deflate_state *const s, uint32_t str, uint32_t count) { +Z_FORCEINLINE static void INSERT_STRING(deflate_state *const s, uint32_t str, uint32_t count) { uint8_t *strstart = s->window + str + HASH_CALC_OFFSET; uint8_t *strend = strstart + count; @@ -130,4 +119,3 @@ Z_INTERNAL void INSERT_STRING(deflate_state *const s, uint32_t str, uint32_t cou } } } -#endif diff --git a/match_tpl.h b/match_tpl.h index 47e9aed9f..94de125e4 100644 --- a/match_tpl.h +++ b/match_tpl.h @@ -8,13 +8,10 @@ * https://github.com/gildor2/fast_zlib */ -#ifndef MATCH_TPL_H -#define MATCH_TPL_H +#include "insert_string_p.h" #define EARLY_EXIT_TRIGGER_LEVEL 5 -#endif - /* Set match_start to the longest match starting at the given string and * return its length. Matches shorter or equal to prev_length are discarded, * in which case the result is equal to prev_length and match_start is garbage. @@ -91,12 +88,13 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because * these strings are not yet inserted into the hash table. */ - hash = s->update_hash(0, scan[1]); - hash = s->update_hash(hash, scan[2]); + // use update_hash_roll for deflate_slow + hash = update_hash_roll(0, scan[1]); + hash = update_hash_roll(hash, scan[2]); for (i = 3; i <= best_len; i++) { - hash = s->update_hash(hash, scan[i]); - + // use update_hash_roll for deflate_slow + hash = update_hash_roll(hash, scan[i]); /* If we're starting with best_len >= 3, we can use offset search. */ pos = s->head[hash]; if (pos < cur_match) { @@ -203,9 +201,10 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { */ scan_endstr = scan + len - (STD_MIN_MATCH+1); - hash = s->update_hash(0, scan_endstr[0]); - hash = s->update_hash(hash, scan_endstr[1]); - hash = s->update_hash(hash, scan_endstr[2]); + // use update_hash_roll for deflate_slow + hash = update_hash_roll(0, scan_endstr[0]); + hash = update_hash_roll(hash, scan_endstr[1]); + hash = update_hash_roll(hash, scan_endstr[2]); pos = s->head[hash]; if (pos < cur_match) { diff --git a/test/benchmarks/benchmark_insert_string.cc b/test/benchmarks/benchmark_insert_string.cc index 1ff6e694a..fb0f99e8e 100644 --- a/test/benchmarks/benchmark_insert_string.cc +++ b/test/benchmarks/benchmark_insert_string.cc @@ -14,11 +14,14 @@ extern "C" { # include "deflate.h" # include "arch_functions.h" # include "../test_cpu_features.h" +# include "insert_string_p.h" } #define MAX_WSIZE 32768 #define TEST_WINDOW_SIZE (MAX_WSIZE * 2) +typedef Pos (* quick_insert_string_cb)(deflate_state *const s, uint32_t str); + // Base class with common setup/teardown for both insert_string benchmarks class insert_string_base: public benchmark::Fixture { protected: