uint32_t adler32_copy_neon(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
uint8_t* chunkmemset_safe_neon(uint8_t *out, uint8_t *from, size_t len, size_t left);
uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1);
-void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start);
+void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start, int safe_mode);
uint32_t longest_match_neon(deflate_state *const s, uint32_t cur_match);
uint32_t longest_match_roll_neon(deflate_state *const s, uint32_t cur_match);
void slide_hash_neon(deflate_state *s);
uint32_t crc32_copy_chorba(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
#endif
#ifdef CHUNKSET_FALLBACK
-void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
+void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start, int safe_mode);
#endif
#ifdef COMPARE256_FALLBACK
uint32_t longest_match_c(deflate_state *const s, uint32_t cur_match);
uint32_t adler32_copy_lsx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
uint8_t* chunkmemset_safe_lsx(uint8_t *out, uint8_t *from, size_t len, size_t left);
uint32_t compare256_lsx(const uint8_t *src0, const uint8_t *src1);
-void inflate_fast_lsx(PREFIX3(stream) *strm, uint32_t start);
+void inflate_fast_lsx(PREFIX3(stream) *strm, uint32_t start, int safe_mode);
uint32_t longest_match_lsx(deflate_state *const s, uint32_t cur_match);
uint32_t longest_match_roll_lsx(deflate_state *const s, uint32_t cur_match);
void slide_hash_lsx(deflate_state *s);
uint32_t adler32_copy_lasx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
uint8_t* chunkmemset_safe_lasx(uint8_t *out, uint8_t *from, size_t len, size_t left);
uint32_t compare256_lasx(const uint8_t *src0, const uint8_t *src1);
-void inflate_fast_lasx(PREFIX3(stream) *strm, uint32_t start);
+void inflate_fast_lasx(PREFIX3(stream) *strm, uint32_t start, int safe_mode);
uint32_t longest_match_lasx(deflate_state *const s, uint32_t cur_match);
uint32_t longest_match_roll_lasx(deflate_state *const s, uint32_t cur_match);
void slide_hash_lasx(deflate_state *s);
uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len);
uint32_t crc32_copy_power8(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
void slide_hash_power8(deflate_state *s);
-void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start);
+void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start, int safe_mode);
#endif
#if !defined(PPC_VMX_NATIVE) && !defined(POWER8_VSX_NATIVE)
uint32_t longest_match_rvv(deflate_state *const s, uint32_t cur_match);
uint32_t longest_match_roll_rvv(deflate_state *const s, uint32_t cur_match);
void slide_hash_rvv(deflate_state *s);
-void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
+void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start, int safe_mode);
#endif
#ifndef RISCV_RVV_NATIVE
#ifdef X86_SSE2
uint8_t* chunkmemset_safe_sse2(uint8_t *out, uint8_t *from, size_t len, size_t left);
uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1);
-void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start);
+void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start, int safe_mode);
uint32_t longest_match_sse2(deflate_state *const s, uint32_t cur_match);
uint32_t longest_match_roll_sse2(deflate_state *const s, uint32_t cur_match);
void slide_hash_sse2(deflate_state *s);
uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
uint32_t adler32_copy_ssse3(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
uint8_t* chunkmemset_safe_ssse3(uint8_t *out, uint8_t *from, size_t len, size_t left);
-void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
+void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start, int safe_mode);
#endif
#ifndef X86_SSSE3_NATIVE
uint32_t adler32_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
uint8_t* chunkmemset_safe_avx2(uint8_t *out, uint8_t *from, size_t len, size_t left);
uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1);
-void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start);
+void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start, int safe_mode);
uint32_t longest_match_avx2(deflate_state *const s, uint32_t cur_match);
uint32_t longest_match_roll_avx2(deflate_state *const s, uint32_t cur_match);
void slide_hash_avx2(deflate_state *s);
uint32_t adler32_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
uint8_t* chunkmemset_safe_avx512(uint8_t *out, uint8_t *from, size_t len, size_t left);
uint32_t compare256_avx512(const uint8_t *src0, const uint8_t *src1);
-void inflate_fast_avx512(PREFIX3(stream)* strm, uint32_t start);
+void inflate_fast_avx512(PREFIX3(stream)* strm, uint32_t start, int safe_mode);
uint32_t longest_match_avx512(deflate_state *const s, uint32_t cur_match);
uint32_t longest_match_roll_avx512(deflate_state *const s, uint32_t cur_match);
#endif
return functable.crc32_copy(crc, dst, src, len);
}
-static void inflate_fast_stub(PREFIX3(stream) *strm, uint32_t start) {
+static void inflate_fast_stub(PREFIX3(stream) *strm, uint32_t start, int safe_mode) {
FUNCTABLE_INIT_ABORT;
- functable.inflate_fast(strm, start);
+ functable.inflate_fast(strm, start, safe_mode);
}
static uint32_t longest_match_stub(deflate_state* const s, uint32_t cur_match) {
uint32_t (* compare256) (const uint8_t *src0, const uint8_t *src1);
uint32_t (* crc32) (uint32_t crc, const uint8_t *buf, size_t len);
uint32_t (* crc32_copy) (uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
- void (* inflate_fast) (PREFIX3(stream) *strm, uint32_t start);
+ void (* inflate_fast) (PREFIX3(stream) *strm, uint32_t start, int safe_mode);
uint32_t (* longest_match) (deflate_state *const s, uint32_t cur_match);
uint32_t (* longest_match_roll) (deflate_state *const s, uint32_t cur_match);
void (* slide_hash) (deflate_state *s);
case LEN:
/* use inflate_fast() if we have enough input and output */
- if (have >= INFLATE_FAST_MIN_HAVE &&
- left >= INFLATE_FAST_MIN_LEFT) {
+ if (have >= INFLATE_FAST_MIN_HAVE && left >= INFLATE_FAST_MIN_SAFE) {
RESTORE();
if (state->whave < state->wsize)
state->whave = state->wsize - left;
- FUNCTABLE_CALL(inflate_fast)(strm, state->wsize);
+ /* inflateBack() writes directly into the window, so out and window
+ always overlap. Pass safe_mode=1 to use safe chunk copy functions
+ that prevent overwriting window data needed by future back-references. */
+ FUNCTABLE_CALL(inflate_fast)(strm, state->wsize, 1);
LOAD();
break;
}
} while (state->length != 0);
break;
+ case MATCH:
+ /* Copy back-reference that inflate_fast() could not complete due to
+ insufficient output space. state->length and state->offset were set
+ by the safe_mode MATCH bailout in inflate_fast(). */
+ do {
+ ROOM();
+ copy = state->wsize - state->offset;
+ if (copy < left) {
+ from = put + copy;
+ copy = left - copy;
+ copy = MIN(copy, state->length);
+ put = chunkcopy_safe(put, from, copy, put + left);
+ } else {
+ copy = MIN(state->length, left);
+ put = FUNCTABLE_CALL(chunkmemset_safe)(put, put - state->offset, copy, left);
+ }
+ state->length -= copy;
+ left -= copy;
+ } while (state->length != 0);
+ state->mode = LEN;
+ break;
+
case DONE:
/* inflate stream terminated properly */
ret = Z_STREAM_END;
requires strm->avail_out >= 258 for each loop to avoid checking for
output space.
*/
-void Z_INTERNAL INFLATE_FAST(PREFIX3(stream) *strm, uint32_t start) {
+void Z_INTERNAL INFLATE_FAST(PREFIX3(stream) *strm, uint32_t start, int safe_mode) {
/* start: inflate()'s starting value for strm->avail_out */
struct inflate_state *state;
z_const unsigned char *in; /* local strm->next_in */
unsigned len; /* match length, unused bytes */
unsigned char *from; /* where to copy match from */
unsigned dist; /* match distance */
- unsigned extra_safe; /* copy chunks safely in all cases */
uint64_t old; /* look-behind buffer for extra bits */
/* copy state to local variables */
last = in + (strm->avail_in - (INFLATE_FAST_MIN_HAVE - 1));
out = strm->next_out;
beg = out - (start - strm->avail_out);
- end = out + (strm->avail_out - (INFLATE_FAST_MIN_LEFT - 1));
safe = out + strm->avail_out;
+ end = safe - (safe_mode ? INFLATE_FAST_MIN_SAFE : INFLATE_FAST_MIN_LEFT) + 1;
wsize = state->wsize;
whave = state->whave;
wnext = state->wnext;
lmask = (1U << state->lenbits) - 1;
dmask = (1U << state->distbits) - 1;
- /* Detect if out and window point to the same memory allocation. In this instance it is
- necessary to use safe chunk copy functions to prevent overwriting the window. If the
- window is overwritten then future matches with far distances will fail to copy correctly. */
- extra_safe = (wsize != 0 && out >= window && out + INFLATE_FAST_MIN_LEFT <= window + state->wbufsize);
-
/* decode literals and length/distances until end-of-block or not enough
input data or output space */
do {
}
#endif
TRACE_DISTANCE(dist);
+
+ /* In safe mode, if there isn't enough output space for the full copy,
+ bail to the slow path's MATCH state which handles partial copies. */
+ if (UNLIKELY(safe_mode && len > (unsigned)(safe - out))) {
+ state->mode = MATCH;
+ state->length = len;
+ state->offset = dist;
+ break;
+ }
+
op = (unsigned)(out - beg); /* max distance in output */
if (UNLIKELY(dist > op)) { /* see if copy from window */
op = dist - op; /* distance back in window */
}
if (UNLIKELY(op < len)) { /* still need some from output */
len -= op;
- if (LIKELY(!extra_safe)) {
+ if (LIKELY(!safe_mode)) {
out = CHUNKCOPY_SAFE(out, from, op, safe);
out = CHUNKUNROLL(out, &dist, &len);
out = CHUNKCOPY_SAFE(out, out - dist, len, safe);
}
} else {
#ifndef HAVE_MASKED_READWRITE
- if (UNLIKELY(extra_safe))
+ if (UNLIKELY(safe_mode))
out = chunkcopy_safe(out, from, len, safe);
else
#endif
out = CHUNKCOPY_SAFE(out, from, len, safe);
}
#ifndef HAVE_MASKED_READWRITE
- } else if (UNLIKELY(extra_safe)) {
+ } else if (UNLIKELY(safe_mode)) {
/* Whole reference is in range of current output. */
out = chunkcopy_safe(out, out - dist, len, safe);
#endif
strm->next_out = out;
strm->avail_in = (unsigned)(in < last ? (INFLATE_FAST_MIN_HAVE - 1) + (last - in)
: (INFLATE_FAST_MIN_HAVE - 1) - (in - last));
- strm->avail_out = (unsigned)(out < end ? (INFLATE_FAST_MIN_LEFT - 1) + (end - out)
- : (INFLATE_FAST_MIN_LEFT - 1) - (out - end));
+ strm->avail_out = (unsigned)(safe - out);
Assert(bits <= 32, "Remaining bits greater than 32");
state->hold = (uint32_t)hold;
case LEN:
/* use inflate_fast() if we have enough input and output */
- if (have >= INFLATE_FAST_MIN_HAVE && left >= INFLATE_FAST_MIN_LEFT) {
+ if (have >= INFLATE_FAST_MIN_HAVE && left >= INFLATE_FAST_MIN_SAFE) {
RESTORE();
- FUNCTABLE_CALL(inflate_fast)(strm, out);
+ FUNCTABLE_CALL(inflate_fast)(strm, out, left < INFLATE_FAST_MIN_LEFT);
LOAD();
if (state->mode == TYPE)
state->back = -1;
#define TRACE_END_OF_BLOCK() \
Tracevv((stderr, "inflate: end of block\n"))
-#define INFLATE_FAST_MIN_HAVE 15
-#define INFLATE_FAST_MIN_LEFT 260
+#define INFLATE_FAST_MIN_HAVE 15 /* max input bits per length/distance pair */
+#define INFLATE_FAST_MIN_LEFT 260 /* max output per token (258) + 2 */
+#define INFLATE_FAST_MIN_SAFE 3 /* max unchecked literal writes per iteration */
/* Load 64 bits from IN and place the bytes at offset BITS in the result. */
static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {