From: Nathan Moinvaziri Date: Sat, 29 Nov 2025 03:31:10 +0000 (-0800) Subject: Add quick_insert_value for optimized hash insertion X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2c8703510b62bc726e1d2ff9e04e10f221b43cb2;p=thirdparty%2Fzlib-ng.git Add quick_insert_value for optimized hash insertion Reduces the number of reads by two Co-authored-by: Brian Pane trifectatechfoundation/zlib-rs#374 trifectatechfoundation/zlib-rs#375 --- diff --git a/deflate.c b/deflate.c index 9372c21f0..79baa181c 100644 --- a/deflate.c +++ b/deflate.c @@ -1137,10 +1137,12 @@ static void lm_set_level(deflate_state *s, int level) { s->update_hash = &update_hash_roll; s->insert_string = &insert_string_roll; s->quick_insert_string = &quick_insert_string_roll; + s->quick_insert_value = &quick_insert_value_roll; } else { s->update_hash = update_hash; s->insert_string = insert_string; s->quick_insert_string = quick_insert_string; + s->quick_insert_value = quick_insert_value; } s->level = level; diff --git a/deflate.h b/deflate.h index 6d5558d66..72bd0198f 100644 --- a/deflate.h +++ b/deflate.h @@ -122,14 +122,17 @@ typedef struct internal_state deflate_state; typedef uint32_t (* update_hash_cb) (uint32_t h, uint32_t val); typedef void (* insert_string_cb) (deflate_state *const s, uint32_t str, uint32_t count); typedef Pos (* quick_insert_string_cb)(deflate_state *const s, uint32_t str); +typedef Pos (* quick_insert_value_cb) (deflate_state *const s, uint32_t str, uint32_t val); uint32_t update_hash (uint32_t h, uint32_t val); void insert_string (deflate_state *const s, uint32_t str, uint32_t count); Pos quick_insert_string (deflate_state *const s, uint32_t str); +Pos quick_insert_value (deflate_state *const s, uint32_t str, uint32_t val); uint32_t update_hash_roll (uint32_t h, uint32_t val); void insert_string_roll (deflate_state *const s, uint32_t str, uint32_t count); Pos quick_insert_string_roll(deflate_state *const s, uint32_t str); +Pos quick_insert_value_roll (deflate_state *const s, uint32_t str, uint32_t val); /* Struct for memory allocation handling */ typedef struct deflate_allocs_s { @@ -233,6 +236,7 @@ struct ALIGNED_(64) internal_state { update_hash_cb update_hash; insert_string_cb insert_string; quick_insert_string_cb quick_insert_string; + quick_insert_value_cb quick_insert_value; /* Hash function callbacks that can be configured depending on the deflate * algorithm being used */ diff --git a/deflate_fast.c b/deflate_fast.c index efb95b856..d3bc12ffb 100644 --- a/deflate_fast.c +++ b/deflate_fast.c @@ -5,6 +5,7 @@ */ #include "zbuild.h" +#include "zmemory.h" #include "deflate.h" #include "deflate_p.h" #include "functable.h" @@ -21,6 +22,8 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { uint32_t match_len = 0; for (;;) { + uint8_t lc; + /* Make sure that we always have enough lookahead, except * at the end of the input file. We need STD_MAX_MATCH bytes * for the next match, plus WANT_MIN_MATCH bytes to insert the @@ -39,8 +42,14 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { * dictionary, and set hash_head to the head of the hash chain: */ if (s->lookahead >= WANT_MIN_MATCH) { - Pos hash_head = quick_insert_string(s, s->strstart); +#if BYTE_ORDER == LITTLE_ENDIAN + uint32_t str_val = zng_memread_4(&s->window[s->strstart]); +#else + uint32_t str_val = ZSWAP32(zng_memread_4(&s->window[s->strstart])); +#endif + Pos hash_head = quick_insert_value(s, s->strstart, str_val); int64_t dist = (int64_t)s->strstart - hash_head; + lc = (uint8_t)str_val; /* Find the longest match, discarding those <= prev_length. * At this point we have always match length < WANT_MIN_MATCH @@ -53,6 +62,8 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { match_len = FUNCTABLE_CALL(longest_match)(s, hash_head); /* longest_match() sets match_start */ } + } else { + lc = s->window[s->strstart]; } if (match_len >= WANT_MIN_MATCH) { @@ -84,7 +95,7 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { match_len = 0; } else { /* No match, output a literal byte */ - bflush = zng_tr_tally_lit(s, s->window[s->strstart]); + bflush = zng_tr_tally_lit(s, lc); s->lookahead--; s->strstart++; } diff --git a/deflate_quick.c b/deflate_quick.c index f9312a2eb..5f49dfa69 100644 --- a/deflate_quick.c +++ b/deflate_quick.c @@ -58,6 +58,8 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) { } for (;;) { + uint8_t lc; + if (UNLIKELY(s->pending + ((BIT_BUF_SIZE + 7) >> 3) >= s->pending_buf_size)) { PREFIX(flush_pending)(s->strm); if (s->strm->avail_out == 0) { @@ -81,14 +83,25 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) { } if (LIKELY(s->lookahead >= WANT_MIN_MATCH)) { - Pos hash_head = quick_insert_string(s, s->strstart); +#if BYTE_ORDER == LITTLE_ENDIAN + uint32_t str_val = zng_memread_4(&s->window[s->strstart]); +#else + uint32_t str_val = ZSWAP32(zng_memread_4(&s->window[s->strstart])); +#endif + Pos hash_head = quick_insert_value(s, s->strstart, str_val); int64_t dist = (int64_t)s->strstart - hash_head; + lc = (uint8_t)str_val; if (dist <= MAX_DIST(s) && dist > 0) { - const uint8_t *str_start = s->window + s->strstart; const uint8_t *match_start = s->window + hash_head; - - if (zng_memcmp_2(str_start, match_start) == 0) { +#if BYTE_ORDER == LITTLE_ENDIAN + uint32_t match_val = zng_memread_4(match_start); +#else + uint32_t match_val = ZSWAP32(zng_memread_4(match_start)); +#endif + + if (str_val == match_val) { + const uint8_t *str_start = s->window + s->strstart; uint32_t match_len = FUNCTABLE_CALL(compare256)(str_start+2, match_start+2) + 2; if (match_len >= WANT_MIN_MATCH) { @@ -107,9 +120,10 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) { } } } + } else { + lc = s->window[s->strstart]; } - - zng_tr_emit_lit(s, static_ltree, s->window[s->strstart]); + zng_tr_emit_lit(s, static_ltree, lc); s->strstart++; s->lookahead--; } diff --git a/insert_string.c b/insert_string.c index 0c72347fc..282692129 100644 --- a/insert_string.c +++ b/insert_string.c @@ -17,5 +17,6 @@ #define UPDATE_HASH update_hash #define INSERT_STRING insert_string #define QUICK_INSERT_STRING quick_insert_string +#define QUICK_INSERT_VALUE quick_insert_value #include "insert_string_tpl.h" diff --git a/insert_string_roll.c b/insert_string_roll.c index 8693f96f5..bf12967c7 100644 --- a/insert_string_roll.c +++ b/insert_string_roll.c @@ -20,5 +20,6 @@ #define UPDATE_HASH update_hash_roll #define INSERT_STRING insert_string_roll #define QUICK_INSERT_STRING quick_insert_string_roll +#define QUICK_INSERT_VALUE quick_insert_value_roll #include "insert_string_tpl.h" diff --git a/insert_string_tpl.h b/insert_string_tpl.h index eac1bfe04..64cc74345 100644 --- a/insert_string_tpl.h +++ b/insert_string_tpl.h @@ -51,6 +51,28 @@ Z_INTERNAL uint32_t UPDATE_HASH(uint32_t h, uint32_t val) { return h & HASH_CALC_MASK; } +/* =========================================================================== + * Quick insert string str in the dictionary using a pre-read value and set match_head + * to the previous head of the hash chain (the most recent string with same hash key). + * Return the previous length of the hash chain. + */ +Z_INTERNAL Pos QUICK_INSERT_VALUE(deflate_state *const s, uint32_t str, uint32_t val) { + uint32_t hm; + Pos head; + + HASH_CALC_VAR_INIT; + HASH_CALC(HASH_CALC_VAR, val); + HASH_CALC_VAR &= HASH_CALC_MASK; + hm = HASH_CALC_VAR; + + head = s->head[hm]; + if (LIKELY(head != str)) { + s->prev[str & s->w_mask] = head; + s->head[hm] = (Pos)str; + } + return head; +} + /* =========================================================================== * Quick insert string str in the dictionary and set match_head to the previous head * of the hash chain (the most recent string with same hash key). Return