]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Add LIT_MEM define to use more memory for a small deflate speedup.
authorMark Adler <madler@alumni.caltech.edu>
Tue, 30 Jan 2024 00:38:32 +0000 (16:38 -0800)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Wed, 7 Feb 2024 18:15:56 +0000 (19:15 +0100)
A bug fix in zlib 1.2.12 resulted in a slight slowdown (1-2%) of
deflate. This commit provides the option to #define LIT_MEM, which
uses more memory to reverse most of that slowdown. The memory for
the pending buffer and symbol buffers is increased by 25%, which
increases the total memory usage with the default parameters by
about 6%.

madler/zlib#ac8f12c97d1afd9bafa9c710f827d40a407d3266

deflate.c
deflate.h
deflate_p.h
deflate_slow.c
trees.c

index 2a0a20e5d29a99e4352a93f05775063762fbac83..c6accfd0eed25408fa6c6a89b8af4dec9507f3b5 100644 (file)
--- a/deflate.c
+++ b/deflate.c
@@ -295,7 +295,11 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level
      * symbols from which it is being constructed.
      */
 
+#ifdef LIT_MEM
+    s->pending_buf = (unsigned char *) ZALLOC(strm, s->lit_bufsize, 5);
+#else
     s->pending_buf = (unsigned char *) ZALLOC(strm, s->lit_bufsize, 4);
+#endif
     s->pending_buf_size = s->lit_bufsize * 4;
 
     if (s->window == NULL || s->prev == NULL || s->head == NULL || s->pending_buf == NULL) {
@@ -304,8 +308,14 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level
         PREFIX(deflateEnd)(strm);
         return Z_MEM_ERROR;
     }
+#ifdef LIT_MEM
+    s->d_buf = (uint16_t *)(s->pending_buf + (s->lit_bufsize << 1));
+    s->l_buf = s->pending_buf + (s->lit_bufsize << 2);
+    s->sym_end = s->lit_bufsize - 1;
+#else
     s->sym_buf = s->pending_buf + s->lit_bufsize;
     s->sym_end = (s->lit_bufsize - 1) * 3;
+#endif
     /* We avoid equality with lit_bufsize*3 because of wraparound at 64K
      * on 16 bit machines and because stored blocks are restricted to
      * 64K-1 bytes.
@@ -506,9 +516,17 @@ int32_t Z_EXPORT PREFIX(deflatePrime)(PREFIX3(stream) *strm, int32_t bits, int32
     if (deflateStateCheck(strm))
         return Z_STREAM_ERROR;
     s = strm->state;
+
+#ifdef LIT_MEM
+    if (bits < 0 || bits > BIT_BUF_SIZE ||
+        (unsigned char *)s->d_buf < s->pending_out + ((BIT_BUF_SIZE + 7) >> 3))
+        return Z_BUF_ERROR;
+#else
     if (bits < 0 || bits > BIT_BUF_SIZE || bits > (int32_t)(sizeof(value) << 3) ||
         s->sym_buf < s->pending_out + ((BIT_BUF_SIZE + 7) >> 3))
         return Z_BUF_ERROR;
+#endif
+
     do {
         put = BIT_BUF_SIZE - s->bi_valid;
         put = MIN(put, bits);
@@ -1068,7 +1086,12 @@ int32_t Z_EXPORT PREFIX(deflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *sou
     memcpy(ds->pending_buf, ss->pending_buf, ds->pending_buf_size);
 
     ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
+#ifdef LIT_MEM
+    ds->d_buf = (uint16_t *)(ds->pending_buf + (ds->lit_bufsize << 1));
+    ds->l_buf = ds->pending_buf + (ds->lit_bufsize << 2);
+#else
     ds->sym_buf = ds->pending_buf + ds->lit_bufsize;
+#endif
 
     ds->l_desc.dyn_tree = ds->dyn_ltree;
     ds->d_desc.dyn_tree = ds->dyn_dtree;
index ec1519daf42e21729f6112171f1858484a5c1709..3db110a47028618c99bedd0caa9cc927f1f2003c 100644 (file)
--- a/deflate.h
+++ b/deflate.h
 #  define GZIP
 #endif
 
+/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at
+   the cost of a larger memory footprint */
+/* #define LIT_MEM */
+
 /* ===========================================================================
  * Internal compression state.
  */
@@ -259,8 +263,14 @@ struct internal_state {
      *   - I can't count above 4
      */
 
+#ifdef LIT_MEM
+    uint16_t *d_buf;              /* buffer for distances */
+    unsigned char *l_buf;         /* buffer for literals/lengths */
+#else
     unsigned char *sym_buf;       /* buffer for distances and literals/lengths */
-    unsigned int sym_next;        /* running index in sym_buf */
+#endif
+
+    unsigned int sym_next;        /* running index in symbol buffer */
     unsigned int sym_end;         /* symbol table full when sym_next reaches this */
 
     unsigned long opt_len;        /* bit length of current block with optimal trees */
index dd2021a0f59a167994e86a5aed95639755942bd7..2b15b91d8803325f119653467e5df7a800ed4005 100644 (file)
@@ -60,27 +60,37 @@ extern const unsigned char Z_INTERNAL zng_dist_code[];
 
 static inline int zng_tr_tally_lit(deflate_state *s, unsigned char c) {
     /* c is the unmatched char */
+#ifdef LIT_MEM
+    s->d_buf[s->sym_next] = 0;
+    s->l_buf[s->sym_next++] = c;
+#else
     s->sym_buf[s->sym_next++] = 0;
     s->sym_buf[s->sym_next++] = 0;
     s->sym_buf[s->sym_next++] = c;
+#endif
     s->dyn_ltree[c].Freq++;
     Tracevv((stderr, "%c", c));
     Assert(c <= (STD_MAX_MATCH-STD_MIN_MATCH), "zng_tr_tally: bad literal");
     return (s->sym_next == s->sym_end);
 }
 
-static inline int zng_tr_tally_dist(deflate_state *s, uint32_t dist, uint32_t len) {
+static inline int zng_tr_tally_dist(deflate_states, uint32_t dist, uint32_t len) {
     /* dist: distance of matched string */
     /* len: match length-STD_MIN_MATCH */
+#ifdef LIT_MEM
+    s->d_buf[s->sym_next] = dist;
+    s->l_buf[s->sym_next++] = len;
+#else
     s->sym_buf[s->sym_next++] = (uint8_t)(dist);
     s->sym_buf[s->sym_next++] = (uint8_t)(dist >> 8);
     s->sym_buf[s->sym_next++] = (uint8_t)len;
+#endif
     s->matches++;
     dist--;
     Assert(dist < MAX_DIST(s) && (uint16_t)d_code(dist) < (uint16_t)D_CODES,
         "zng_tr_tally: bad match");
 
-    s->dyn_ltree[zng_length_code[len]+LITERALS+1].Freq++;
+    s->dyn_ltree[zng_length_code[len] + LITERALS + 1].Freq++;
     s->dyn_dtree[d_code(dist)].Freq++;
     return (s->sym_next == s->sym_end);
 }
index cb2f0a02b4e0f5b9e4c194574717ecb4f97659b2..cbafdb9862eeb12d7b2d50d04e91d09d8b3cf2f5 100644 (file)
@@ -129,7 +129,7 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
     }
     Assert(flush != Z_NO_FLUSH, "no flush?");
     if (UNLIKELY(s->match_available)) {
-        (void) zng_tr_tally_lit(s, s->window[s->strstart-1]);
+        Z_UNUSED(zng_tr_tally_lit(s, s->window[s->strstart-1]));
         s->match_available = 0;
     }
     s->insert = s->strstart < (STD_MIN_MATCH - 1) ? s->strstart : (STD_MIN_MATCH - 1);
diff --git a/trees.c b/trees.c
index 5bb88389baa331cd50ccec7c725ce6ff0350eac5..d10f4a49f3a8d6d44c76e92bb44fc223a19f7896 100644 (file)
--- a/trees.c
+++ b/trees.c
@@ -718,21 +718,30 @@ static void compress_block(deflate_state *s, const ct_data *ltree, const ct_data
     /* dtree: distance tree */
     unsigned dist;      /* distance of matched string */
     int lc;             /* match length or unmatched char (if dist == 0) */
-    unsigned sx = 0;    /* running index in sym_buf */
+    unsigned sx = 0;    /* running index in symbol buffers */
 
     if (s->sym_next != 0) {
         do {
+#ifdef LIT_MEM
+            dist = s->d_buf[sx];
+            lc = s->l_buf[sx++];
+#else
             dist = s->sym_buf[sx++] & 0xff;
             dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
             lc = s->sym_buf[sx++];
+#endif
             if (dist == 0) {
                 zng_emit_lit(s, ltree, lc);
             } else {
                 zng_emit_dist(s, ltree, dtree, lc, dist);
             } /* literal or match pair ? */
 
-            /* Check that the overlay between pending_buf and sym_buf is ok: */
+            /* Check for no overlay of pending_buf on needed symbols */
+#ifdef LIT_MEM
+            Assert(s->pending < (s->lit_bufsize << 1) + sx, "pending_buf overflow");
+#else
             Assert(s->pending < s->lit_bufsize + sx, "pending_buf overflow");
+#endif
         } while (sx < s->sym_next);
     }