]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Add quick_insert_value for optimized hash insertion
authorNathan Moinvaziri <nathan@nathanm.com>
Sat, 29 Nov 2025 03:31:10 +0000 (19:31 -0800)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Mon, 1 Dec 2025 10:21:30 +0000 (11:21 +0100)
Reduces the number of reads by two

Co-authored-by: Brian Pane <brianp@brianp.net>
trifectatechfoundation/zlib-rs#374
trifectatechfoundation/zlib-rs#375

deflate.c
deflate.h
deflate_fast.c
deflate_quick.c
insert_string.c
insert_string_roll.c
insert_string_tpl.h

index 9372c21f0c320e2e7467029ce6c36fd95a785bb5..79baa181cca646b0e9feef4008250f20047ab8c9 100644 (file)
--- a/deflate.c
+++ b/deflate.c
@@ -1137,10 +1137,12 @@ static void lm_set_level(deflate_state *s, int level) {
         s->update_hash = &update_hash_roll;
         s->insert_string = &insert_string_roll;
         s->quick_insert_string = &quick_insert_string_roll;
+        s->quick_insert_value = &quick_insert_value_roll;
     } else {
         s->update_hash = update_hash;
         s->insert_string = insert_string;
         s->quick_insert_string = quick_insert_string;
+        s->quick_insert_value = quick_insert_value;
     }
 
     s->level = level;
index 6d5558d66cb195c7c6a9f91d60c1c9c8c3fb7819..72bd0198f6d31a2e4e84abd725c39f38aa55fe38 100644 (file)
--- a/deflate.h
+++ b/deflate.h
@@ -122,14 +122,17 @@ typedef struct internal_state deflate_state;
 typedef uint32_t (* update_hash_cb)        (uint32_t h, uint32_t val);
 typedef void     (* insert_string_cb)      (deflate_state *const s, uint32_t str, uint32_t count);
 typedef Pos      (* quick_insert_string_cb)(deflate_state *const s, uint32_t str);
+typedef Pos      (* quick_insert_value_cb) (deflate_state *const s, uint32_t str, uint32_t val);
 
 uint32_t update_hash             (uint32_t h, uint32_t val);
 void     insert_string           (deflate_state *const s, uint32_t str, uint32_t count);
 Pos      quick_insert_string     (deflate_state *const s, uint32_t str);
+Pos      quick_insert_value      (deflate_state *const s, uint32_t str, uint32_t val);
 
 uint32_t update_hash_roll        (uint32_t h, uint32_t val);
 void     insert_string_roll      (deflate_state *const s, uint32_t str, uint32_t count);
 Pos      quick_insert_string_roll(deflate_state *const s, uint32_t str);
+Pos      quick_insert_value_roll (deflate_state *const s, uint32_t str, uint32_t val);
 
 /* Struct for memory allocation handling */
 typedef struct deflate_allocs_s {
@@ -233,6 +236,7 @@ struct ALIGNED_(64) internal_state {
     update_hash_cb          update_hash;
     insert_string_cb        insert_string;
     quick_insert_string_cb  quick_insert_string;
+    quick_insert_value_cb   quick_insert_value;
     /* Hash function callbacks that can be configured depending on the deflate
      * algorithm being used */
 
index efb95b856ed578da3d24468a5c3f4daa8cb76975..d3bc12ffb25dc9de11a69e5816acd88c678a7a11 100644 (file)
@@ -5,6 +5,7 @@
  */
 
 #include "zbuild.h"
+#include "zmemory.h"
 #include "deflate.h"
 #include "deflate_p.h"
 #include "functable.h"
@@ -21,6 +22,8 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
     uint32_t match_len = 0;
 
     for (;;) {
+        uint8_t lc;
+
         /* Make sure that we always have enough lookahead, except
          * at the end of the input file. We need STD_MAX_MATCH bytes
          * for the next match, plus WANT_MIN_MATCH bytes to insert the
@@ -39,8 +42,14 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
          * dictionary, and set hash_head to the head of the hash chain:
          */
         if (s->lookahead >= WANT_MIN_MATCH) {
-            Pos hash_head = quick_insert_string(s, s->strstart);
+#if BYTE_ORDER == LITTLE_ENDIAN
+            uint32_t str_val = zng_memread_4(&s->window[s->strstart]);
+#else
+            uint32_t str_val = ZSWAP32(zng_memread_4(&s->window[s->strstart]));
+#endif
+            Pos hash_head = quick_insert_value(s, s->strstart, str_val);
             int64_t dist = (int64_t)s->strstart - hash_head;
+            lc = (uint8_t)str_val;
 
             /* Find the longest match, discarding those <= prev_length.
              * At this point we have always match length < WANT_MIN_MATCH
@@ -53,6 +62,8 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
                 match_len = FUNCTABLE_CALL(longest_match)(s, hash_head);
                 /* longest_match() sets match_start */
             }
+        } else {
+            lc = s->window[s->strstart];
         }
 
         if (match_len >= WANT_MIN_MATCH) {
@@ -84,7 +95,7 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
             match_len = 0;
         } else {
             /* No match, output a literal byte */
-            bflush = zng_tr_tally_lit(s, s->window[s->strstart]);
+            bflush = zng_tr_tally_lit(s, lc);
             s->lookahead--;
             s->strstart++;
         }
index f9312a2eba9e735dd0189ce9112b4b4879980d9d..5f49dfa69b60a5a13e5b4cf7b51ed6a343d5dfad 100644 (file)
@@ -58,6 +58,8 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
     }
 
     for (;;) {
+        uint8_t lc;
+
         if (UNLIKELY(s->pending + ((BIT_BUF_SIZE + 7) >> 3) >= s->pending_buf_size)) {
             PREFIX(flush_pending)(s->strm);
             if (s->strm->avail_out == 0) {
@@ -81,14 +83,25 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
         }
 
         if (LIKELY(s->lookahead >= WANT_MIN_MATCH)) {
-            Pos hash_head = quick_insert_string(s, s->strstart);
+#if BYTE_ORDER == LITTLE_ENDIAN
+            uint32_t str_val = zng_memread_4(&s->window[s->strstart]);
+#else
+            uint32_t str_val = ZSWAP32(zng_memread_4(&s->window[s->strstart]));
+#endif
+            Pos hash_head = quick_insert_value(s, s->strstart, str_val);
             int64_t dist = (int64_t)s->strstart - hash_head;
+            lc = (uint8_t)str_val;
 
             if (dist <= MAX_DIST(s) && dist > 0) {
-                const uint8_t *str_start = s->window + s->strstart;
                 const uint8_t *match_start = s->window + hash_head;
-
-                if (zng_memcmp_2(str_start, match_start) == 0) {
+#if BYTE_ORDER == LITTLE_ENDIAN
+                uint32_t match_val = zng_memread_4(match_start);
+#else
+                uint32_t match_val = ZSWAP32(zng_memread_4(match_start));
+#endif
+
+                if (str_val == match_val) {
+                    const uint8_t *str_start = s->window + s->strstart;
                     uint32_t match_len = FUNCTABLE_CALL(compare256)(str_start+2, match_start+2) + 2;
 
                     if (match_len >= WANT_MIN_MATCH) {
@@ -107,9 +120,10 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
                     }
                 }
             }
+        } else {
+            lc = s->window[s->strstart];
         }
-
-        zng_tr_emit_lit(s, static_ltree, s->window[s->strstart]);
+        zng_tr_emit_lit(s, static_ltree, lc);
         s->strstart++;
         s->lookahead--;
     }
index 0c72347fc6af8b7c2bd65bab9c8d1d956bebbfff..282692129229659bd68d5fcba718818fcfaff583 100644 (file)
@@ -17,5 +17,6 @@
 #define UPDATE_HASH          update_hash
 #define INSERT_STRING        insert_string
 #define QUICK_INSERT_STRING  quick_insert_string
+#define QUICK_INSERT_VALUE   quick_insert_value
 
 #include "insert_string_tpl.h"
index 8693f96f59021edfcdd4ac67fbb8236d200c5355..bf12967c76e6c2abb70d9de88edb8669a9f89761 100644 (file)
@@ -20,5 +20,6 @@
 #define UPDATE_HASH          update_hash_roll
 #define INSERT_STRING        insert_string_roll
 #define QUICK_INSERT_STRING  quick_insert_string_roll
+#define QUICK_INSERT_VALUE   quick_insert_value_roll
 
 #include "insert_string_tpl.h"
index eac1bfe0457dc83461e03042594326b8a33d2db1..64cc74345819f894083652cdf937e35eec5a62a1 100644 (file)
@@ -51,6 +51,28 @@ Z_INTERNAL uint32_t UPDATE_HASH(uint32_t h, uint32_t val) {
     return h & HASH_CALC_MASK;
 }
 
+/* ===========================================================================
+ * Quick insert string str in the dictionary using a pre-read value and set match_head
+ * to the previous head of the hash chain (the most recent string with same hash key).
+ * Return the previous length of the hash chain.
+ */
+Z_INTERNAL Pos QUICK_INSERT_VALUE(deflate_state *const s, uint32_t str, uint32_t val) {
+    uint32_t hm;
+    Pos head;
+
+    HASH_CALC_VAR_INIT;
+    HASH_CALC(HASH_CALC_VAR, val);
+    HASH_CALC_VAR &= HASH_CALC_MASK;
+    hm = HASH_CALC_VAR;
+
+    head = s->head[hm];
+    if (LIKELY(head != str)) {
+        s->prev[str & s->w_mask] = head;
+        s->head[hm] = (Pos)str;
+    }
+    return head;
+}
+
 /* ===========================================================================
  * Quick insert string str in the dictionary and set match_head to the previous head
  * of the hash chain (the most recent string with same hash key). Return