From: Mark Adler Date: Tue, 30 Jan 2024 00:38:32 +0000 (-0800) Subject: Add LIT_MEM define to use more memory for a small deflate speedup. X-Git-Tag: 2.2.0~95 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a3fb271c6efa337bda675d09d674179007a1be90;p=thirdparty%2Fzlib-ng.git Add LIT_MEM define to use more memory for a small deflate speedup. A bug fix in zlib 1.2.12 resulted in a slight slowdown (1-2%) of deflate. This commit provides the option to #define LIT_MEM, which uses more memory to reverse most of that slowdown. The memory for the pending buffer and symbol buffers is increased by 25%, which increases the total memory usage with the default parameters by about 6%. madler/zlib#ac8f12c97d1afd9bafa9c710f827d40a407d3266 --- diff --git a/deflate.c b/deflate.c index 2a0a20e5..c6accfd0 100644 --- a/deflate.c +++ b/deflate.c @@ -295,7 +295,11 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level * symbols from which it is being constructed. */ +#ifdef LIT_MEM + s->pending_buf = (unsigned char *) ZALLOC(strm, s->lit_bufsize, 5); +#else s->pending_buf = (unsigned char *) ZALLOC(strm, s->lit_bufsize, 4); +#endif s->pending_buf_size = s->lit_bufsize * 4; if (s->window == NULL || s->prev == NULL || s->head == NULL || s->pending_buf == NULL) { @@ -304,8 +308,14 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level PREFIX(deflateEnd)(strm); return Z_MEM_ERROR; } +#ifdef LIT_MEM + s->d_buf = (uint16_t *)(s->pending_buf + (s->lit_bufsize << 1)); + s->l_buf = s->pending_buf + (s->lit_bufsize << 2); + s->sym_end = s->lit_bufsize - 1; +#else s->sym_buf = s->pending_buf + s->lit_bufsize; s->sym_end = (s->lit_bufsize - 1) * 3; +#endif /* We avoid equality with lit_bufsize*3 because of wraparound at 64K * on 16 bit machines and because stored blocks are restricted to * 64K-1 bytes. @@ -506,9 +516,17 @@ int32_t Z_EXPORT PREFIX(deflatePrime)(PREFIX3(stream) *strm, int32_t bits, int32 if (deflateStateCheck(strm)) return Z_STREAM_ERROR; s = strm->state; + +#ifdef LIT_MEM + if (bits < 0 || bits > BIT_BUF_SIZE || + (unsigned char *)s->d_buf < s->pending_out + ((BIT_BUF_SIZE + 7) >> 3)) + return Z_BUF_ERROR; +#else if (bits < 0 || bits > BIT_BUF_SIZE || bits > (int32_t)(sizeof(value) << 3) || s->sym_buf < s->pending_out + ((BIT_BUF_SIZE + 7) >> 3)) return Z_BUF_ERROR; +#endif + do { put = BIT_BUF_SIZE - s->bi_valid; put = MIN(put, bits); @@ -1068,7 +1086,12 @@ int32_t Z_EXPORT PREFIX(deflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *sou memcpy(ds->pending_buf, ss->pending_buf, ds->pending_buf_size); ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); +#ifdef LIT_MEM + ds->d_buf = (uint16_t *)(ds->pending_buf + (ds->lit_bufsize << 1)); + ds->l_buf = ds->pending_buf + (ds->lit_bufsize << 2); +#else ds->sym_buf = ds->pending_buf + ds->lit_bufsize; +#endif ds->l_desc.dyn_tree = ds->dyn_ltree; ds->d_desc.dyn_tree = ds->dyn_dtree; diff --git a/deflate.h b/deflate.h index ec1519da..3db110a4 100644 --- a/deflate.h +++ b/deflate.h @@ -22,6 +22,10 @@ # define GZIP #endif +/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at + the cost of a larger memory footprint */ +/* #define LIT_MEM */ + /* =========================================================================== * Internal compression state. */ @@ -259,8 +263,14 @@ struct internal_state { * - I can't count above 4 */ +#ifdef LIT_MEM + uint16_t *d_buf; /* buffer for distances */ + unsigned char *l_buf; /* buffer for literals/lengths */ +#else unsigned char *sym_buf; /* buffer for distances and literals/lengths */ - unsigned int sym_next; /* running index in sym_buf */ +#endif + + unsigned int sym_next; /* running index in symbol buffer */ unsigned int sym_end; /* symbol table full when sym_next reaches this */ unsigned long opt_len; /* bit length of current block with optimal trees */ diff --git a/deflate_p.h b/deflate_p.h index dd2021a0..2b15b91d 100644 --- a/deflate_p.h +++ b/deflate_p.h @@ -60,27 +60,37 @@ extern const unsigned char Z_INTERNAL zng_dist_code[]; static inline int zng_tr_tally_lit(deflate_state *s, unsigned char c) { /* c is the unmatched char */ +#ifdef LIT_MEM + s->d_buf[s->sym_next] = 0; + s->l_buf[s->sym_next++] = c; +#else s->sym_buf[s->sym_next++] = 0; s->sym_buf[s->sym_next++] = 0; s->sym_buf[s->sym_next++] = c; +#endif s->dyn_ltree[c].Freq++; Tracevv((stderr, "%c", c)); Assert(c <= (STD_MAX_MATCH-STD_MIN_MATCH), "zng_tr_tally: bad literal"); return (s->sym_next == s->sym_end); } -static inline int zng_tr_tally_dist(deflate_state *s, uint32_t dist, uint32_t len) { +static inline int zng_tr_tally_dist(deflate_state* s, uint32_t dist, uint32_t len) { /* dist: distance of matched string */ /* len: match length-STD_MIN_MATCH */ +#ifdef LIT_MEM + s->d_buf[s->sym_next] = dist; + s->l_buf[s->sym_next++] = len; +#else s->sym_buf[s->sym_next++] = (uint8_t)(dist); s->sym_buf[s->sym_next++] = (uint8_t)(dist >> 8); s->sym_buf[s->sym_next++] = (uint8_t)len; +#endif s->matches++; dist--; Assert(dist < MAX_DIST(s) && (uint16_t)d_code(dist) < (uint16_t)D_CODES, "zng_tr_tally: bad match"); - s->dyn_ltree[zng_length_code[len]+LITERALS+1].Freq++; + s->dyn_ltree[zng_length_code[len] + LITERALS + 1].Freq++; s->dyn_dtree[d_code(dist)].Freq++; return (s->sym_next == s->sym_end); } diff --git a/deflate_slow.c b/deflate_slow.c index cb2f0a02..cbafdb98 100644 --- a/deflate_slow.c +++ b/deflate_slow.c @@ -129,7 +129,7 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { } Assert(flush != Z_NO_FLUSH, "no flush?"); if (UNLIKELY(s->match_available)) { - (void) zng_tr_tally_lit(s, s->window[s->strstart-1]); + Z_UNUSED(zng_tr_tally_lit(s, s->window[s->strstart-1])); s->match_available = 0; } s->insert = s->strstart < (STD_MIN_MATCH - 1) ? s->strstart : (STD_MIN_MATCH - 1); diff --git a/trees.c b/trees.c index 5bb88389..d10f4a49 100644 --- a/trees.c +++ b/trees.c @@ -718,21 +718,30 @@ static void compress_block(deflate_state *s, const ct_data *ltree, const ct_data /* dtree: distance tree */ unsigned dist; /* distance of matched string */ int lc; /* match length or unmatched char (if dist == 0) */ - unsigned sx = 0; /* running index in sym_buf */ + unsigned sx = 0; /* running index in symbol buffers */ if (s->sym_next != 0) { do { +#ifdef LIT_MEM + dist = s->d_buf[sx]; + lc = s->l_buf[sx++]; +#else dist = s->sym_buf[sx++] & 0xff; dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; lc = s->sym_buf[sx++]; +#endif if (dist == 0) { zng_emit_lit(s, ltree, lc); } else { zng_emit_dist(s, ltree, dtree, lc, dist); } /* literal or match pair ? */ - /* Check that the overlay between pending_buf and sym_buf is ok: */ + /* Check for no overlay of pending_buf on needed symbols */ +#ifdef LIT_MEM + Assert(s->pending < (s->lit_bufsize << 1) + sx, "pending_buf overflow"); +#else Assert(s->pending < s->lit_bufsize + sx, "pending_buf overflow"); +#endif } while (sx < s->sym_next); }