From dc3b60841dbfa9cf37be3efb4568f055b4e15580 Mon Sep 17 00:00:00 2001 From: Jim Kukunas Date: Wed, 30 Jun 2021 19:36:08 -0400 Subject: [PATCH] Reorganize inflate window layout This commit significantly improves inflate performance by reorganizing the window buffer into a contiguous window and pending output buffer. The goal of this layout is to reduce branching, improve cache locality, and enable for the use of crc folding with gzip input. The window buffer is allocated as a multiple of the user-selected window size. In this commit, a factor of 2 is utilized. The layout of the window buffer is divided into two sections. The first section, window offset [0, wsize), is reserved for history that has already been output. The second section, window offset [wsize, 2 * wsize), is reserved for buffering pending output that hasn't been flushed to the user's output buffer yet. The history section grows downwards, towards the window offset of 0. The pending output section grows upwards, towards the end of the buffer. As a result, all of the possible distance/length data that may need to be copied is contiguous. This removes the need to stitch together output from 2 separate buffers. In the case of gzip input, crc folding is used to copy the pending output to the user's buffers. Co-authored-by: Nathan Moinvaziri --- infback.c | 15 +-- inffast.c | 117 +++++---------------- inffast.h | 2 +- inflate.c | 267 +++++++++++++++++++++--------------------------- inflate.h | 5 + inflate_p.h | 100 ++++++++++++++++-- test/infcover.c | 19 +--- 7 files changed, 243 insertions(+), 282 deletions(-) diff --git a/infback.c b/infback.c index cab3f66c..8c43a7ce 100644 --- a/infback.c +++ b/infback.c @@ -40,7 +40,7 @@ int32_t Z_EXPORT PREFIX(inflateBackInit_)(PREFIX3(stream) *strm, int32_t windowB } if (strm->zfree == NULL) strm->zfree = zng_cfree; - state = (struct inflate_state *) ZALLOC(strm, 1, sizeof(struct inflate_state)); + state = (struct inflate_state *)ZALLOC_STATE(strm, 1, sizeof(struct inflate_state)); if (state == NULL) return Z_MEM_ERROR; Tracev((stderr, "inflate: allocated\n")); @@ -338,17 +338,6 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in state->mode = LEN; case LEN: - /* use inflate_fast() if we have enough input and output */ - if (have >= INFLATE_FAST_MIN_HAVE && - left >= INFLATE_FAST_MIN_LEFT) { - RESTORE(); - if (state->whave < state->wsize) - state->whave = state->wsize - left; - zng_inflate_fast(strm, state->wsize); - LOAD(); - break; - } - /* get a literal, length, or end-of-block code */ for (;;) { here = state->lencode[BITS(state->lenbits)]; @@ -490,7 +479,7 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in int32_t Z_EXPORT PREFIX(inflateBackEnd)(PREFIX3(stream) *strm) { if (strm == NULL || strm->state == NULL || strm->zfree == NULL) return Z_STREAM_ERROR; - ZFREE(strm, strm->state); + ZFREE_STATE(strm, strm->state); strm->state = NULL; Tracev((stderr, "inflate: end\n")); return Z_OK; diff --git a/inffast.c b/inffast.c index 2c3add3a..57031aec 100644 --- a/inffast.c +++ b/inffast.c @@ -62,22 +62,16 @@ static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) { requires strm->avail_out >= 258 for each loop to avoid checking for output space. */ -void Z_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start) { - /* start: inflate()'s starting value for strm->avail_out */ +void Z_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm) { struct inflate_state *state; z_const unsigned char *in; /* local strm->next_in */ const unsigned char *last; /* have enough input while in < last */ unsigned char *out; /* local strm->next_out */ - unsigned char *beg; /* inflate()'s initial strm->next_out */ unsigned char *end; /* while out < end, enough space available */ - unsigned char *safe; /* can use chunkcopy provided out < safe */ #ifdef INFLATE_STRICT unsigned dmax; /* maximum distance from zlib header */ #endif unsigned wsize; /* window size or zero if not using window */ - unsigned whave; /* valid bytes in the window */ - unsigned wnext; /* window write index */ - unsigned char *window; /* allocated sliding window, if wsize != 0 */ /* hold is a local copy of strm->hold. By default, hold satisfies the same invariants that strm->hold does, namely that (hold >> bits) == 0. This @@ -127,24 +121,17 @@ void Z_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start) { /* window position, window bytes to copy */ unsigned len; /* match length, unused bytes */ unsigned dist; /* match distance */ - unsigned char *from; /* where to copy match from */ - unsigned extra_safe; /* copy chunks safely in all cases */ /* copy state to local variables */ state = (struct inflate_state *)strm->state; in = strm->next_in; last = in + (strm->avail_in - (INFLATE_FAST_MIN_HAVE - 1)); - out = strm->next_out; - beg = out - (start - strm->avail_out); - end = out + (strm->avail_out - (INFLATE_FAST_MIN_LEFT - 1)); - safe = out + strm->avail_out; + wsize = state->wsize; + out = state->window + wsize + state->wnext; + end = state->window + (wsize * 2) - (INFLATE_FAST_MIN_LEFT - 1); #ifdef INFLATE_STRICT dmax = state->dmax; #endif - wsize = state->wsize; - whave = state->whave; - wnext = state->wnext; - window = state->window; hold = state->hold; bits = state->bits; lcode = state->lencode; @@ -152,11 +139,6 @@ void Z_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start) { lmask = (1U << state->lenbits) - 1; dmask = (1U << state->distbits) - 1; - /* Detect if out and window point to the same memory allocation. In this instance it is - necessary to use safe chunk copy functions to prevent overwriting the window. If the - window is overwritten then future matches with far distances will fail to copy correctly. */ - extra_safe = (wsize != 0 && out >= window && out + INFLATE_FAST_MIN_LEFT <= window + wsize); - /* decode literals and length/distances until end-of-block or not enough input data or output space */ do { @@ -165,6 +147,13 @@ void Z_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start) { in += 6; bits += 48; } + if (out >= end) { + state->wnext = (uint32_t)(out - (state->window + wsize)); + window_output_flush(strm); + out = state->window + state->wsize + state->wnext; + if (strm->avail_out == 0) + break; + } here = lcode + (hold & lmask); dolen: DROPBITS(here->bits); @@ -211,76 +200,18 @@ void Z_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start) { #endif DROPBITS(op); Tracevv((stderr, "inflate: distance %u\n", dist)); - op = (unsigned)(out - beg); /* max distance in output */ - if (dist > op) { /* see if copy from window */ - op = dist - op; /* distance back in window */ - if (op > whave) { - if (state->sane) { - SET_BAD("invalid distance too far back"); - break; - } -#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR - if (len <= op - whave) { - do { - *out++ = 0; - } while (--len); - continue; - } - len -= op - whave; - do { - *out++ = 0; - } while (--op > whave); - if (op == 0) { - from = out - dist; - do { - *out++ = *from++; - } while (--len); - continue; - } -#endif - } - from = window; - if (wnext == 0) { /* very common case */ - from += wsize - op; - } else if (wnext >= op) { /* contiguous in window */ - from += wnext - op; - } else { /* wrap around window */ - op -= wnext; - from += wsize - op; - if (op < len) { /* some from end of window */ - len -= op; - out = functable.chunkcopy_safe(out, from, op, safe); - from = window; /* more from start of window */ - op = wnext; - /* This (rare) case can create a situation where - the first chunkcopy below must be checked. - */ - } - } - if (op < len) { /* still need some from output */ - len -= op; - out = functable.chunkcopy_safe(out, from, op, safe); - out = functable.chunkunroll(out, &dist, &len); - out = functable.chunkcopy_safe(out, out - dist, len, safe); - } else { - out = functable.chunkcopy_safe(out, from, len, safe); + + if (out - dist < ((state->window + state->wsize) - state->whave)) { + if (state->sane) { + SET_BAD("invalid distance too far back"); + break; } - } else if (extra_safe) { - /* Whole reference is in range of current output. */ - if (dist >= len || dist >= state->chunksize) - out = functable.chunkcopy_safe(out, out - dist, len, safe); - else - out = functable.chunkmemset_safe(out, dist, len, (unsigned)((safe - out) + 1)); + } + + if (len > dist || dist < state->chunksize) { + out = functable.chunkmemset(out, dist, len); } else { - /* Whole reference is in range of current output. No range checks are - necessary because we start with room for at least 258 bytes of output, - so unroll and roundoff operations can write beyond `out+len` so long - as they stay within 258 bytes of `out`. - */ - if (dist >= len || dist >= state->chunksize) - out = functable.chunkcopy(out, out - dist, len); - else - out = functable.chunkmemset(out, dist, len); + out = functable.chunkcopy(out, out - dist, len); } } else if ((op & 64) == 0) { /* 2nd level distance code */ here = dcode + here->val + BITS(op); @@ -300,7 +231,7 @@ void Z_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start) { SET_BAD("invalid literal/length code"); break; } - } while (in < last && out < end); + } while (in < last); /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ len = bits >> 3; @@ -310,12 +241,10 @@ void Z_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start) { /* update state and return */ strm->next_in = in; - strm->next_out = out; strm->avail_in = (unsigned)(in < last ? (INFLATE_FAST_MIN_HAVE - 1) + (last - in) : (INFLATE_FAST_MIN_HAVE - 1) - (in - last)); - strm->avail_out = (unsigned)(out < end ? (INFLATE_FAST_MIN_LEFT - 1) + (end - out) - : (INFLATE_FAST_MIN_LEFT - 1) - (out - end)); + state->wnext = (uint32_t)(out - (state->window + state->wsize)); Assert(bits <= 32, "Remaining bits greater than 32"); state->hold = (uint32_t)hold; state->bits = bits; diff --git a/inffast.h b/inffast.h index 179a65da..cf9a3e2d 100644 --- a/inffast.h +++ b/inffast.h @@ -10,7 +10,7 @@ subject to change. Applications should only use zlib.h. */ -void Z_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start); +void Z_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm); #define INFLATE_FAST_MIN_HAVE 8 #define INFLATE_FAST_MIN_LEFT 258 diff --git a/inflate.c b/inflate.c index f87d6c94..cca8df67 100644 --- a/inflate.c +++ b/inflate.c @@ -12,36 +12,8 @@ #include "inffixed_tbl.h" #include "functable.h" -/* Architecture-specific hooks. */ -#ifdef S390_DFLTCC_INFLATE -# include "arch/s390/dfltcc_inflate.h" -#else -/* Memory management for the inflate state. Useful for allocating arch-specific extension blocks. */ -# define ZALLOC_STATE(strm, items, size) ZALLOC(strm, items, size) -# define ZFREE_STATE(strm, addr) ZFREE(strm, addr) -# define ZCOPY_STATE(dst, src, size) memcpy(dst, src, size) -/* Memory management for the window. Useful for allocation the aligned window. */ -# define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size) -# define ZFREE_WINDOW(strm, addr) ZFREE(strm, addr) -/* Invoked at the end of inflateResetKeep(). Useful for initializing arch-specific extension blocks. */ -# define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0) -/* Invoked at the beginning of inflatePrime(). Useful for updating arch-specific buffers. */ -# define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0) -/* Invoked at the beginning of each block. Useful for plugging arch-specific inflation code. */ -# define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0) -/* Returns whether zlib-ng should compute a checksum. Set to 0 if arch-specific inflation code already does that. */ -# define INFLATE_NEED_CHECKSUM(strm) 1 -/* Returns whether zlib-ng should update a window. Set to 0 if arch-specific inflation code already does that. */ -# define INFLATE_NEED_UPDATEWINDOW(strm) 1 -/* Invoked at the beginning of inflateMark(). Useful for updating arch-specific pointers and offsets. */ -# define INFLATE_MARK_HOOK(strm) do {} while (0) -/* Invoked at the beginning of inflateSyncPoint(). Useful for performing arch-specific state checks. */ -#define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0) -#endif - /* function prototypes */ static int inflateStateCheck(PREFIX3(stream) *strm); -static int updatewindow(PREFIX3(stream) *strm, const unsigned char *end, uint32_t copy); static uint32_t syncsearch(uint32_t *have, const unsigned char *buf, uint32_t len); static int inflateStateCheck(PREFIX3(stream) *strm) { @@ -87,7 +59,6 @@ int32_t Z_EXPORT PREFIX(inflateReset)(PREFIX3(stream) *strm) { if (inflateStateCheck(strm)) return Z_STREAM_ERROR; state = (struct inflate_state *)strm->state; - state->wsize = 0; state->whave = 0; state->wnext = 0; return PREFIX(inflateResetKeep)(strm); @@ -120,6 +91,7 @@ int32_t Z_EXPORT PREFIX(inflateReset2)(PREFIX3(stream) *strm, int32_t windowBits if (state->window != NULL && state->wbits != (unsigned)windowBits) { ZFREE_WINDOW(strm, state->window); state->window = NULL; + state->wsize = 0; } /* update state and reset the rest of it */ @@ -156,12 +128,14 @@ int32_t Z_EXPORT PREFIX(inflateInit2_)(PREFIX3(stream) *strm, int32_t windowBits strm->state = (struct internal_state *)state; state->strm = strm; state->window = NULL; + state->wsize = 0; state->mode = HEAD; /* to pass state test in inflateReset2() */ state->chunksize = functable.chunksize(); ret = PREFIX(inflateReset2)(strm, windowBits); if (ret != Z_OK) { ZFREE_STATE(strm, state); strm->state = NULL; + return ret; } return ret; } @@ -206,9 +180,9 @@ int Z_INTERNAL inflate_ensure_window(struct inflate_state *state) { /* if it hasn't been done already, allocate space for the window */ if (state->window == NULL) { unsigned wsize = 1U << state->wbits; - state->window = (unsigned char *) ZALLOC_WINDOW(state->strm, wsize + state->chunksize, sizeof(unsigned char)); - if (state->window == Z_NULL) - return 1; + state->window = (unsigned char *)ZALLOC_WINDOW(state->strm, (wsize * 2) + state->chunksize, sizeof(unsigned char)); + if (state->window == NULL) + return Z_MEM_ERROR; memset(state->window + wsize, 0, state->chunksize); } @@ -219,58 +193,9 @@ int Z_INTERNAL inflate_ensure_window(struct inflate_state *state) { state->whave = 0; } - return 0; -} - -/* - Update the window with the last wsize (normally 32K) bytes written before - returning. If window does not exist yet, create it. This is only called - when a window is already in use, or when output has been written during this - inflate call, but the end of the deflate stream has not been reached yet. - It is also called to create a window for dictionary data when a dictionary - is loaded. - - Providing output buffers larger than 32K to inflate() should provide a speed - advantage, since only the last 32K of output is copied to the sliding window - upon return from inflate(), and since all distances after the first 32K of - output will fall in the output data, making match copies simpler and faster. - The advantage may be dependent on the size of the processor's data caches. - */ -static int32_t updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t copy) { - struct inflate_state *state; - uint32_t dist; - - state = (struct inflate_state *)strm->state; - - if (inflate_ensure_window(state)) return 1; - - /* copy state->wsize or less output bytes into the circular window */ - if (copy >= state->wsize) { - memcpy(state->window, end - state->wsize, state->wsize); - state->wnext = 0; - state->whave = state->wsize; - } else { - dist = state->wsize - state->wnext; - if (dist > copy) - dist = copy; - memcpy(state->window + state->wnext, end - copy, dist); - copy -= dist; - if (copy) { - memcpy(state->window, end - copy, copy); - state->wnext = copy; - state->whave = state->wsize; - } else { - state->wnext += dist; - if (state->wnext == state->wsize) - state->wnext = 0; - if (state->whave < state->wsize) - state->whave += dist; - } - } - return 0; + return Z_OK; } - /* Private macros for inflate() Look in inflate_p.h for macros shared with inflateBack() @@ -376,7 +301,6 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { unsigned bits; /* bits in bit buffer */ uint32_t in, out; /* save starting available input and output */ unsigned copy; /* number of stored or match bytes to copy */ - unsigned char *from; /* where to copy match bytes from */ code here; /* current decoding table entry */ code last; /* parent table entry */ unsigned len; /* length to copy for repeats, bits to drop */ @@ -578,7 +502,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { state->head->hcrc = (int)((state->flags >> 9) & 1); state->head->done = 1; } - strm->adler = state->check = CRC32_INITIAL_VALUE; + strm->adler = state->check = functable.crc32_fold_reset(&state->crc_fold); state->mode = TYPE; break; #endif @@ -601,6 +525,17 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { goto inf_leave; case TYPEDO: + /* create window if it doesn't exist */ + if (state->window == NULL) { + RESTORE(); + ret = inflate_ensure_window(state); + if (ret != Z_OK) { + ZFREE_STATE(strm, state); + strm->state = NULL; + return ret; + } + LOAD(); + } /* determine and dispatch block type */ INFLATE_TYPEDO_HOOK(strm, flush); /* hook for IBM Z DFLTCC */ if (state->last) { @@ -657,13 +592,24 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { /* copy stored block from input to output */ copy = state->length; if (copy) { + unsigned char *end = state->window + (state->wsize * 2); + int64_t diff = end - put; + copy = MIN(copy, have); - copy = MIN(copy, left); - if (copy == 0) goto inf_leave; + if (copy > diff) { + if (left > 0) { + RESTORE(); + window_output_flush(strm); + LOAD(); + diff = end - put; + } + copy = MIN(copy, (uint32_t)diff); + } + if (copy == 0) + goto inf_leave; memcpy(put, next, copy); have -= copy; next += copy; - left -= copy; put += copy; state->length -= copy; break; @@ -799,7 +745,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { /* use inflate_fast() if we have enough input and output */ if (have >= INFLATE_FAST_MIN_HAVE && left >= INFLATE_FAST_MIN_LEFT) { RESTORE(); - zng_inflate_fast(strm, out); + zng_inflate_fast(strm); LOAD(); if (state->mode == TYPE) state->back = -1; @@ -914,69 +860,73 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { Tracevv((stderr, "inflate: distance %u\n", state->offset)); state->mode = MATCH; - case MATCH: + case MATCH: { /* copy match from window to output */ - if (left == 0) goto inf_leave; - copy = out - left; - if (state->offset > copy) { /* copy from window */ - copy = state->offset - copy; - if (copy > state->whave) { - if (state->sane) { - SET_BAD("invalid distance too far back"); - break; - } -#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR - Trace((stderr, "inflate.c too far\n")); - copy -= state->whave; - copy = MIN(copy, state->length); - copy = MIN(copy, left); - left -= copy; - state->length -= copy; - do { - *put++ = 0; - } while (--copy); - if (state->length == 0) - state->mode = LEN; - break; -#endif + if (left == 0) + goto inf_leave; + + unsigned char *end = state->window + (state->wsize * 2); + int64_t buf_left = end - put; + copy = state->length; + RESTORE(); + if (copy > buf_left) { + if (strm->avail_out > 0) { + /* relies on RESTORE() above with no changes to those vars */ + window_output_flush(strm); + LOAD(); + buf_left = end - put; } - if (copy > state->wnext) { - copy -= state->wnext; - from = state->window + (state->wsize - copy); - } else { - from = state->window + (state->wnext - copy); + copy = MIN(copy, (uint32_t)buf_left); + } + if (copy == 0) + goto inf_leave; + if (state->offset > state->wnext + state->whave) { + if (state->sane) { + SET_BAD("invalid distance too far back"); + break; } - copy = MIN(copy, state->length); - copy = MIN(copy, left); - - put = functable.chunkcopy_safe(put, from, copy, put + left); + } + unsigned char *next_out = state->window + state->wsize + state->wnext; + if (copy <= state->offset) { + functable.chunkcopy_safe(next_out, next_out - state->offset, copy, put + buf_left); } else { /* copy from output */ - copy = MIN(state->length, left); - - put = functable.chunkmemset_safe(put, state->offset, copy, left); + functable.chunkmemset_safe(next_out, state->offset, copy, (uint32_t)buf_left); } - left -= copy; + state->wnext += copy; state->length -= copy; + LOAD(); if (state->length == 0) state->mode = LEN; break; - + } case LIT: + if (put >= state->window + (state->wsize * 2)) { + RESTORE(); + window_output_flush(strm); + LOAD(); + } if (left == 0) goto inf_leave; *put++ = (unsigned char)(state->length); - left--; state->mode = LEN; break; case CHECK: + RESTORE(); + window_output_flush(strm); + LOAD(); + if (strm->avail_out == 0 && state->wnext) + goto inf_leave; if (state->wrap) { NEEDBITS(32); out -= left; strm->total_out += out; state->total += out; - if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out) - strm->adler = state->check = UPDATE(state->check, put - out, out); + + if (INFLATE_NEED_CHECKSUM(strm) && strm->total_out) { + if (state->wrap & 2) + strm->adler = state->check = functable.crc32_fold_final(&state->crc_fold); + } out = left; if ((state->wrap & 4) && ( #ifdef GUNZIP @@ -1026,26 +976,21 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { /* Return from inflate(), updating the total counts and the check value. If there was no progress during the inflate() call, return a buffer - error. Call updatewindow() to create and/or update the window state. + error. Note: a memory error from inflate() is non-recoverable. */ inf_leave: RESTORE(); - if (INFLATE_NEED_UPDATEWINDOW(strm) && - (state->wsize || (out != strm->avail_out && state->mode < BAD && - (state->mode < CHECK || flush != Z_FINISH)))) { - if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { - state->mode = MEM; - return Z_MEM_ERROR; - } - } + + if (strm->avail_out && state->wnext) + window_output_flush(strm); + in -= strm->avail_in; out -= strm->avail_out; strm->total_in += in; strm->total_out += out; state->total += out; - if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out) - strm->adler = state->check = UPDATE(state->check, strm->next_out - out, out); + strm->data_type = (int)state->bits + (state->last ? 64 : 0) + (state->mode == TYPE ? 128 : 0) + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) @@ -1086,8 +1031,10 @@ int32_t Z_EXPORT PREFIX(inflateGetDictionary)(PREFIX3(stream) *strm, uint8_t *di int32_t Z_EXPORT PREFIX(inflateSetDictionary)(PREFIX3(stream) *strm, const uint8_t *dictionary, uint32_t dictLength) { struct inflate_state *state; - unsigned long dictid; - int32_t ret; + unsigned long dictid, dict_copy, hist_copy; + const unsigned char *dict_from, *hist_from; + unsigned char *dict_to, *hist_to; + int ret; /* check state */ if (inflateStateCheck(strm)) @@ -1102,14 +1049,33 @@ int32_t Z_EXPORT PREFIX(inflateSetDictionary)(PREFIX3(stream) *strm, const uint8 if (dictid != state->check) return Z_DATA_ERROR; } - - /* copy dictionary to window using updatewindow(), which will amend the - existing dictionary if appropriate */ - ret = updatewindow(strm, dictionary + dictLength, dictLength); - if (ret) { - state->mode = MEM; + ret = inflate_ensure_window(state); + if (ret != Z_OK) return Z_MEM_ERROR; + + Tracec(state->wnext != 0, (stderr, "Setting dictionary with unflushed output")); + + /* copy dictionary to window and amend if necessary */ + dict_from = dictionary; + dict_copy = dictLength; + if (dict_copy > state->wsize) { + dict_copy = state->wsize; + dict_from += (dictLength - dict_copy); } + dict_to = state->window + state->wsize - dict_copy; + + hist_from = state->window + state->wsize - state->whave; + hist_copy = state->wsize - dict_copy; + if (hist_copy > state->whave) + hist_copy = state->whave; + hist_to = dict_to - hist_copy; + + if (hist_copy) + memcpy(hist_to, hist_from, hist_copy); + if (dict_copy) + memcpy(dict_to, dict_from, dict_copy); + + state->whave = hist_copy + dict_copy; state->havedict = 1; Tracev((stderr, "inflate: dictionary set\n")); return Z_OK; @@ -1248,7 +1214,8 @@ int32_t Z_EXPORT PREFIX(inflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *sou return Z_MEM_ERROR; window = NULL; if (state->window != NULL) { - window = (unsigned char *)ZALLOC_WINDOW(source, 1U << state->wbits, sizeof(unsigned char)); + wsize = 1U << state->wbits; + window = (unsigned char *)ZALLOC_WINDOW(state->strm, (wsize * 2) + state->chunksize, sizeof(unsigned char)); if (window == NULL) { ZFREE_STATE(source, copy); return Z_MEM_ERROR; @@ -1266,7 +1233,7 @@ int32_t Z_EXPORT PREFIX(inflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *sou copy->next = copy->codes + (state->next - state->codes); if (window != NULL) { wsize = 1U << state->wbits; - memcpy(window, state->window, wsize); + memcpy(window, state->window, (wsize * 2) + state->chunksize); } copy->window = window; dest->state = (struct internal_state *)copy; diff --git a/inflate.h b/inflate.h index a4274946..d33ea2a7 100644 --- a/inflate.h +++ b/inflate.h @@ -11,6 +11,8 @@ #ifndef INFLATE_H_ #define INFLATE_H_ +#include "crc32_fold.h" + /* define NO_GZIP when compiling if you want to disable gzip header and trailer decoding by inflate(). NO_GZIP would be used to avoid linking in the crc code when it is not needed. For shared libraries, gzip decoding should be left enabled. */ @@ -100,6 +102,9 @@ struct inflate_state { uint32_t whave; /* valid bytes in the window */ uint32_t wnext; /* window write index */ unsigned char *window; /* allocated sliding window, if needed */ + + crc32_fold ALIGNED_(16) crc_fold; + /* bit accumulator */ uint32_t hold; /* input bit accumulator */ unsigned bits; /* number of bits in "in" */ diff --git a/inflate_p.h b/inflate_p.h index 76fe2dcc..168f270f 100644 --- a/inflate_p.h +++ b/inflate_p.h @@ -5,18 +5,41 @@ #ifndef INFLATE_P_H #define INFLATE_P_H -/* - * Macros shared by inflate() and inflateBack() - */ +#include "zbuild.h" +#include "functable.h" -/* check function to use adler32() for zlib or crc32() for gzip */ -#ifdef GUNZIP -# define UPDATE(check, buf, len) \ - (state->flags ? PREFIX(crc32)(check, buf, len) : functable.adler32(check, buf, len)) +/* Architecture-specific hooks. */ +#ifdef S390_DFLTCC_INFLATE +# include "arch/s390/dfltcc_inflate.h" #else -# define UPDATE(check, buf, len) functable.adler32(check, buf, len) +/* Memory management for the inflate state. Useful for allocating arch-specific extension blocks. */ +# define ZALLOC_STATE(strm, items, size) ZALLOC(strm, items, size) +# define ZFREE_STATE(strm, addr) ZFREE(strm, addr) +# define ZCOPY_STATE(dst, src, size) memcpy(dst, src, size) +/* Memory management for the window. Useful for allocation the aligned window. */ +# define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size) +# define ZFREE_WINDOW(strm, addr) ZFREE(strm, addr) +/* Invoked at the end of inflateResetKeep(). Useful for initializing arch-specific extension blocks. */ +# define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0) +/* Invoked at the beginning of inflatePrime(). Useful for updating arch-specific buffers. */ +# define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0) +/* Invoked at the beginning of each block. Useful for plugging arch-specific inflation code. */ +# define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0) +/* Returns whether zlib-ng should compute a checksum. Set to 0 if arch-specific inflation code already does that. */ +# define INFLATE_NEED_CHECKSUM(strm) 1 +/* Returns whether zlib-ng should update a window. Set to 0 if arch-specific inflation code already does that. */ +# define INFLATE_NEED_UPDATEWINDOW(strm) 1 +/* Invoked at the beginning of inflateMark(). Useful for updating arch-specific pointers and offsets. */ +# define INFLATE_MARK_HOOK(strm) do {} while (0) +/* Invoked at the beginning of inflateSyncPoint(). Useful for performing arch-specific state checks. */ +#define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0) #endif + +/* + * Macros shared by inflate() and inflateBack() + */ + /* check macros for header crc */ #ifdef GUNZIP # define CRC2(check, word) \ @@ -39,7 +62,7 @@ /* Load registers with state in inflate() for speed */ #define LOAD() \ do { \ - put = strm->next_out; \ + put = state->window + state->wsize + state->wnext; \ left = strm->avail_out; \ next = strm->next_in; \ have = strm->avail_in; \ @@ -50,7 +73,7 @@ /* Restore state from registers in inflate() */ #define RESTORE() \ do { \ - strm->next_out = put; \ + state->wnext = (uint32_t)(put - (state->window + state->wsize)); \ strm->avail_out = left; \ strm->next_in = (z_const unsigned char *)next; \ strm->avail_in = have; \ @@ -99,3 +122,60 @@ state->mode = BAD; \ strm->msg = (char *)errmsg; \ } while (0) + + +static inline void inf_crc_copy(PREFIX3(stream) *strm, unsigned char *const dst, + const unsigned char *const src, size_t len) { + struct inflate_state *const state = (struct inflate_state *const)strm->state; + + if (!INFLATE_NEED_CHECKSUM(strm)) + return; + + /* check function to use adler32() for zlib or crc32() for gzip */ +#ifdef GUNZIP + if (state->flags) + functable.crc32_fold_copy(&state->crc_fold, dst, src, len); + else +#endif + { + memcpy(dst, src, len); + strm->adler = state->check = functable.adler32(state->check, dst, len); + } +} + +static inline void window_output_flush(PREFIX3(stream) *strm) { + struct inflate_state *const state = (struct inflate_state *const)strm->state; + size_t write_offset, read_offset, copy_size; + uint32_t out_bytes; + + if (state->wnext > strm->avail_out) { + out_bytes = strm->avail_out; + copy_size = state->wnext - out_bytes; + } else { + out_bytes = state->wnext; + copy_size = 0; + } + + /* Copy from pending buffer to stream output */ + inf_crc_copy(strm, strm->next_out, state->window + state->wsize, out_bytes); + + strm->avail_out -= out_bytes; + strm->next_out += out_bytes; + + /* Discard bytes in sliding window */ + if (state->whave + out_bytes > state->wsize) { + write_offset = 0; + read_offset = out_bytes; + copy_size += state->wsize; + } else { + read_offset = state->wsize - state->whave; + write_offset = read_offset - out_bytes; + copy_size += state->whave + out_bytes; + } + + memmove(state->window + write_offset, state->window + read_offset, copy_size); + + state->wnext -= out_bytes; + state->whave += out_bytes; + state->whave = MIN(state->whave, state->wsize); +} diff --git a/test/infcover.c b/test/infcover.c index 3446289e..72a4c575 100644 --- a/test/infcover.c +++ b/test/infcover.c @@ -15,13 +15,8 @@ /* get definition of internal structure so we can mess with it (see pull()), and so we can call inflate_trees() (see cover5()) */ -#define ZLIB_INTERNAL #include "zbuild.h" -#ifdef ZLIB_COMPAT -# include "zlib.h" -#else -# include "zlib-ng.h" -#endif +#include "zutil.h" #include "inftrees.h" #include "inflate.h" @@ -293,6 +288,10 @@ static void inf(char *hex, char *what, unsigned step, int win, unsigned len, int mem_setup(&strm); strm.avail_in = 0; strm.next_in = NULL; + + mem_limit(&strm, 1); + ret = PREFIX(inflateInit2)(&strm, win); assert(ret == Z_MEM_ERROR); + mem_limit(&strm, 0); ret = PREFIX(inflateInit2)(&strm, win); if (ret != Z_OK) { mem_done(&strm, what); @@ -325,10 +324,6 @@ static void inf(char *hex, char *what, unsigned step, int win, unsigned len, int if (ret == Z_NEED_DICT) { ret = PREFIX(inflateSetDictionary)(&strm, in, 1); assert(ret == Z_DATA_ERROR); - mem_limit(&strm, 1); - ret = PREFIX(inflateSetDictionary)(&strm, out, 0); - assert(ret == Z_MEM_ERROR); - mem_limit(&strm, 0); ((struct inflate_state *)strm.state)->mode = DICT; ret = PREFIX(inflateSetDictionary)(&strm, out, 0); assert(ret == Z_OK); @@ -422,10 +417,6 @@ static void cover_wrap(void) { strm.next_in = (void *)"\x63"; strm.avail_out = 1; strm.next_out = (void *)&ret; - mem_limit(&strm, 1); - ret = PREFIX(inflate)(&strm, Z_NO_FLUSH); assert(ret == Z_MEM_ERROR); - ret = PREFIX(inflate)(&strm, Z_NO_FLUSH); assert(ret == Z_MEM_ERROR); - mem_limit(&strm, 0); memset(dict, 0, 257); ret = PREFIX(inflateSetDictionary)(&strm, dict, 257); assert(ret == Z_OK); -- 2.47.3