]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Add back original version of inflate_fast for use with inflateBack.
authorNathan Moinvaziri <nathan@nathanm.com>
Mon, 23 Aug 2021 19:21:40 +0000 (12:21 -0700)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Thu, 2 Dec 2021 08:26:32 +0000 (09:26 +0100)
infback.c
inffast.c
inffast.h
inflate_p.h

index 8c43a7cef89111a27303037545649a02bfc5fdec..cf0549ae1e62a545eaedb42e9f604007bb4c12c5 100644 (file)
--- a/infback.c
+++ b/infback.c
@@ -338,6 +338,17 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in
             state->mode = LEN;
 
         case LEN:
+            /* use inflate_fast() if we have enough input and output */
+            if (have >= INFLATE_FAST_MIN_HAVE &&
+                left >= INFLATE_FAST_MIN_LEFT) {
+                RESTORE_BACK();
+                if (state->whave < state->wsize)
+                    state->whave = state->wsize - left;
+                zng_inflate_fast_back(strm, state->wsize);
+                LOAD_BACK();
+                break;
+            }
+
             /* get a literal, length, or end-of-block code */
             for (;;) {
                 here = state->lencode[BITS(state->lenbits)];
index 57031aeca72b23896496e55cc6720a5d103f4805..899534f0dc0f6acd2580b0a39982112611b36d0f 100644 (file)
--- a/inffast.c
+++ b/inffast.c
@@ -250,6 +250,258 @@ void Z_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm) {
     state->bits = bits;
     return;
 }
+void Z_INTERNAL zng_inflate_fast_back(PREFIX3(stream) *strm, unsigned long start) {
+    /* start: inflate()'s starting value for strm->avail_out */
+    struct inflate_state *state;
+    z_const unsigned char *in;  /* local strm->next_in */
+    const unsigned char *last;  /* have enough input while in < last */
+    unsigned char *out;         /* local strm->next_out */
+    unsigned char *beg;         /* inflate()'s initial strm->next_out */
+    unsigned char *end;         /* while out < end, enough space available */
+    unsigned char *safe;        /* can use chunkcopy provided out < safe */
+#ifdef INFLATE_STRICT
+    unsigned dmax;              /* maximum distance from zlib header */
+#endif
+    unsigned wsize;             /* window size or zero if not using window */
+    unsigned whave;             /* valid bytes in the window */
+    unsigned wnext;             /* window write index */
+    unsigned char *window;      /* allocated sliding window, if wsize != 0 */
+
+    /* hold is a local copy of strm->hold. By default, hold satisfies the same
+       invariants that strm->hold does, namely that (hold >> bits) == 0. This
+       invariant is kept by loading bits into hold one byte at a time, like:
+       hold |= next_byte_of_input << bits; in++; bits += 8;
+       If we need to ensure that bits >= 15 then this code snippet is simply
+       repeated. Over one iteration of the outermost do/while loop, this
+       happens up to six times (48 bits of input), as described in the NOTES
+       above.
+       However, on some little endian architectures, it can be significantly
+       faster to load 64 bits once instead of 8 bits six times:
+       if (bits <= 16) {
+         hold |= next_8_bytes_of_input << bits; in += 6; bits += 48;
+       }
+       Unlike the simpler one byte load, shifting the next_8_bytes_of_input
+       by bits will overflow and lose those high bits, up to 2 bytes' worth.
+       The conservative estimate is therefore that we have read only 6 bytes
+       (48 bits). Again, as per the NOTES above, 48 bits is sufficient for the
+       rest of the iteration, and we will not need to load another 8 bytes.
+       Inside this function, we no longer satisfy (hold >> bits) == 0, but
+       this is not problematic, even if that overflow does not land on an 8 bit
+       byte boundary. Those excess bits will eventually shift down lower as the
+       Huffman decoder consumes input, and when new input bits need to be loaded
+       into the bits variable, the same input bits will be or'ed over those
+       existing bits. A bitwise or is idempotent: (a | b | b) equals (a | b).
+       Note that we therefore write that load operation as "hold |= etc" and not
+       "hold += etc".
+       Outside that loop, at the end of the function, hold is bitwise and'ed
+       with (1<<bits)-1 to drop those excess bits so that, on function exit, we
+       keep the invariant that (state->hold >> state->bits) == 0.
+    */
+    uint64_t hold;              /* local strm->hold */
+    unsigned bits;              /* local strm->bits */
+    code const *lcode;          /* local strm->lencode */
+    code const *dcode;          /* local strm->distcode */
+    unsigned lmask;             /* mask for first level of length codes */
+    unsigned dmask;             /* mask for first level of distance codes */
+    const code *here;           /* retrieved table entry */
+    unsigned op;                /* code bits, operation, extra bits, or */
+                                /*  window position, window bytes to copy */
+    unsigned len;               /* match length, unused bytes */
+    unsigned dist;              /* match distance */
+    unsigned char *from;        /* where to copy match from */
+    unsigned extra_safe;        /* copy chunks safely in all cases */
+
+    /* copy state to local variables */
+    state = (struct inflate_state *)strm->state;
+    in = strm->next_in;
+    last = in + (strm->avail_in - (INFLATE_FAST_MIN_HAVE - 1));
+    out = strm->next_out;
+    beg = out - (start - strm->avail_out);
+    end = out + (strm->avail_out - (INFLATE_FAST_MIN_LEFT - 1));
+    safe = out + strm->avail_out;
+#ifdef INFLATE_STRICT
+    dmax = state->dmax;
+#endif
+    wsize = state->wsize;
+    whave = state->whave;
+    wnext = state->wnext;
+    window = state->window;
+    hold = state->hold;
+    bits = state->bits;
+    lcode = state->lencode;
+    dcode = state->distcode;
+    lmask = (1U << state->lenbits) - 1;
+    dmask = (1U << state->distbits) - 1;
+
+    /* Detect if out and window point to the same memory allocation. In this instance it is
+       necessary to use safe chunk copy functions to prevent overwriting the window. If the
+       window is overwritten then future matches with far distances will fail to copy correctly. */
+    extra_safe = (wsize != 0 && out >= window && out + INFLATE_FAST_MIN_LEFT <= window + wsize);
+
+    /* decode literals and length/distances until end-of-block or not enough
+       input data or output space */
+    do {
+        if (bits < 15) {
+            hold |= load_64_bits(in, bits);
+            in += 6;
+            bits += 48;
+        }
+        here = lcode + (hold & lmask);
+      dolen:
+        DROPBITS(here->bits);
+        op = here->op;
+        if (op == 0) {                          /* literal */
+            Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
+                    "inflate:         literal '%c'\n" :
+                    "inflate:         literal 0x%02x\n", here->val));
+            *out++ = (unsigned char)(here->val);
+        } else if (op & 16) {                     /* length base */
+            len = here->val;
+            op &= 15;                           /* number of extra bits */
+            if (bits < op) {
+                hold |= load_64_bits(in, bits);
+                in += 6;
+                bits += 48;
+            }
+            len += BITS(op);
+            DROPBITS(op);
+            Tracevv((stderr, "inflate:         length %u\n", len));
+            if (bits < 15) {
+                hold |= load_64_bits(in, bits);
+                in += 6;
+                bits += 48;
+            }
+            here = dcode + (hold & dmask);
+          dodist:
+            DROPBITS(here->bits);
+            op = here->op;
+            if (op & 16) {                      /* distance base */
+                dist = here->val;
+                op &= 15;                       /* number of extra bits */
+                if (bits < op) {
+                    hold |= load_64_bits(in, bits);
+                    in += 6;
+                    bits += 48;
+                }
+                dist += BITS(op);
+#ifdef INFLATE_STRICT
+                if (dist > dmax) {
+                    SET_BAD("invalid distance too far back");
+                    break;
+                }
+#endif
+                DROPBITS(op);
+                Tracevv((stderr, "inflate:         distance %u\n", dist));
+                op = (unsigned)(out - beg);     /* max distance in output */
+                if (dist > op) {                /* see if copy from window */
+                    op = dist - op;             /* distance back in window */
+                    if (op > whave) {
+                        if (state->sane) {
+                            SET_BAD("invalid distance too far back");
+                            break;
+                        }
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
+                        if (len <= op - whave) {
+                            do {
+                                *out++ = 0;
+                            } while (--len);
+                            continue;
+                        }
+                        len -= op - whave;
+                        do {
+                            *out++ = 0;
+                        } while (--op > whave);
+                        if (op == 0) {
+                            from = out - dist;
+                            do {
+                                *out++ = *from++;
+                            } while (--len);
+                            continue;
+                        }
+#endif
+                    }
+                    from = window;
+                    if (wnext == 0) {           /* very common case */
+                        from += wsize - op;
+                    } else if (wnext >= op) {   /* contiguous in window */
+                        from += wnext - op;
+                    } else {                    /* wrap around window */
+                        op -= wnext;
+                        from += wsize - op;
+                        if (op < len) {         /* some from end of window */
+                            len -= op;
+                            out = functable.chunkcopy_safe(out, from, op, safe);
+                            from = window;      /* more from start of window */
+                            op = wnext;
+                            /* This (rare) case can create a situation where
+                               the first chunkcopy below must be checked.
+                             */
+                        }
+                    }
+                    if (op < len) {             /* still need some from output */
+                        len -= op;
+                        out = functable.chunkcopy_safe(out, from, op, safe);
+                        out = functable.chunkunroll(out, &dist, &len);
+                        out = functable.chunkcopy_safe(out, out - dist, len, safe);
+                    } else {
+                        out = functable.chunkcopy_safe(out, from, len, safe);
+                    }
+                } else if (extra_safe) {
+                    /* Whole reference is in range of current output. */
+                    if (dist >= len || dist >= state->chunksize)
+                        out = functable.chunkcopy_safe(out, out - dist, len, safe);
+                    else
+                        out = functable.chunkmemset_safe(out, dist, len, (unsigned)((safe - out) + 1));
+                } else {
+                    /* Whole reference is in range of current output.  No range checks are
+                       necessary because we start with room for at least 258 bytes of output,
+                       so unroll and roundoff operations can write beyond `out+len` so long
+                       as they stay within 258 bytes of `out`.
+                    */
+                    if (dist >= len || dist >= state->chunksize)
+                        out = functable.chunkcopy(out, out - dist, len);
+                    else
+                        out = functable.chunkmemset(out, dist, len);
+                }
+            } else if ((op & 64) == 0) {          /* 2nd level distance code */
+                here = dcode + here->val + BITS(op);
+                goto dodist;
+            } else {
+                SET_BAD("invalid distance code");
+                break;
+            }
+        } else if ((op & 64) == 0) {              /* 2nd level length code */
+            here = lcode + here->val + BITS(op);
+            goto dolen;
+        } else if (op & 32) {                     /* end-of-block */
+            Tracevv((stderr, "inflate:         end of block\n"));
+            state->mode = TYPE;
+            break;
+        } else {
+            SET_BAD("invalid literal/length code");
+            break;
+        }
+    } while (in < last && out < end);
+
+    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
+    len = bits >> 3;
+    in -= len;
+    bits -= len << 3;
+    hold &= (UINT64_C(1) << bits) - 1;
+
+    /* update state and return */
+    strm->next_in = in;
+    strm->next_out = out;
+    strm->avail_in = (unsigned)(in < last ? (INFLATE_FAST_MIN_HAVE - 1) + (last - in)
+                                          : (INFLATE_FAST_MIN_HAVE - 1) - (in - last));
+    strm->avail_out = (unsigned)(out < end ? (INFLATE_FAST_MIN_LEFT - 1) + (end - out)
+                                           : (INFLATE_FAST_MIN_LEFT - 1) - (out - end));
+
+    Assert(bits <= 32, "Remaining bits greater than 32");
+    state->hold = (uint32_t)hold;
+    state->bits = bits;
+    return;
+}
 
 /*
    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
index cf9a3e2d2bb1efe66975ddf9b59f72bc1abe5908..e8d7e6e01d8a3437b967bd78503f119d69a20144 100644 (file)
--- a/inffast.h
+++ b/inffast.h
@@ -11,6 +11,7 @@
  */
 
 void Z_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm);
+void Z_INTERNAL zng_inflate_fast_back(PREFIX3(stream) *strm, unsigned long start);
 
 #define INFLATE_FAST_MIN_HAVE 8
 #define INFLATE_FAST_MIN_LEFT 258
index 168f270fadc9e54c86f0472a3d1788169c3f4eca..b2e8d8de2fc7494d20e049515ddb96e454169f81 100644 (file)
         bits = state->bits; \
     } while (0)
 
+/* Load registers with state in inflateBack() for speed */
+#define LOAD_BACK() \
+    do { \
+        put = strm->next_out; \
+        left = strm->avail_out; \
+        next = strm->next_in; \
+        have = strm->avail_in; \
+        hold = state->hold; \
+        bits = state->bits; \
+    } while (0)
+
 /* Restore state from registers in inflate() */
 #define RESTORE() \
     do { \
         state->bits = bits; \
     } while (0)
 
+/* Restore state from registers in inflateBack() */
+#define RESTORE_BACK() \
+    do { \
+        strm->next_out = put; \
+        strm->avail_out = left; \
+        strm->next_in = (z_const unsigned char *)next; \
+        strm->avail_in = have; \
+        state->hold = hold; \
+        state->bits = bits; \
+    } while (0)
+
 /* Clear the input bit accumulator */
 #define INITBITS() \
     do { \