__m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3;
__m128i xmm_crc0, xmm_crc1, xmm_crc2, xmm_crc3, xmm_crc_part;
__m128i xmm_initial = _mm_cvtsi32_si128(init_crc);
- int32_t first = 1;
+ xmm_crc_part = _mm_setzero_si128();
+ int32_t first = init_crc != 0;
/* Technically the CRC functions don't even call this for input < 64, but a bare minimum of 31
* bytes of input is needed for the aligning load that occurs. If there's an initial CRC, to
* carry it forward through the folded CRC there must be 16 - src % 16 + 16 bytes available, which
* by definition can be up to 15 bytes + one full vector load. */
- assert(len >= 31);
+ assert(len >= 31 || first == 0);
crc32_fold_load((__m128i *)crc->fold, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
+ if (len < 16) {
+ goto partial_nocpy;
+ }
+
algn_diff = ((uintptr_t)16 - ((uintptr_t)src & 0xF)) & 0xF;
if (algn_diff) {
- if (algn_diff >= 4) {
+ if (algn_diff >= 4 || init_crc == 0) {
xmm_crc_part = _mm_loadu_si128((__m128i *)src);
src += algn_diff;
src += (algn_diff + 16);
len -= (algn_diff + 16);
}
- }
- xmm_crc_part = _mm_setzero_si128();
+ xmm_crc_part = _mm_setzero_si128();
+ }
#ifdef X86_VPCLMULQDQ_CRC
if (x86_cpu_has_vpclmulqdq && x86_cpu_has_avx512 && (len >= 256)) {
src += 16;
}
+partial_nocpy:
if (len) {
memcpy(&xmm_crc_part, src, len);
partial_fold((size_t)len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
/* function prototypes */
static int inflateStateCheck(PREFIX3(stream) *strm);
-static int updatewindow(PREFIX3(stream) *strm, const unsigned char *end, uint32_t copy);
+static int updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t copy);
static uint32_t syncsearch(uint32_t *have, const unsigned char *buf, uint32_t len);
+static inline void inf_chksum_cpy(PREFIX3(stream) *strm, uint8_t *dst,
+ const uint8_t *src, uint32_t copy) {
+ struct inflate_state *state = (struct inflate_state*)strm->state;
+#ifdef GUNZIP
+ if (state->flags) {
+ functable.crc32_fold_copy(&state->crc_fold, dst, src, copy);
+ } else
+#endif
+ {
+ strm->adler = state->check = functable.adler32(state->check, src, copy);
+ memcpy(dst, src, copy);
+ }
+}
+
+static inline void inf_chksum(PREFIX3(stream) *strm, const uint8_t *src, uint32_t len) {
+ struct inflate_state *state = (struct inflate_state*)strm->state;
+#ifdef GUNZIP
+ if (state->flags) {
+ functable.crc32_fold(&state->crc_fold, src, len, 0);
+ } else
+#endif
+ {
+ strm->adler = state->check = functable.adler32(state->check, src, len);
+ }
+}
+
static int inflateStateCheck(PREFIX3(stream) *strm) {
struct inflate_state *state;
if (strm == NULL || strm->zalloc == NULL || strm->zfree == NULL)
/* copy state->wsize or less output bytes into the circular window */
if (copy >= state->wsize) {
- memcpy(state->window, end - state->wsize, state->wsize);
+ /* Only do this if the caller specifies to checksum bytes AND the platform requires
+ * it (s/390 being the primary exception to this. Also, for now, do the adler checksums
+ * if not a gzip based header. The inline adler checksums will come in the near future,
+ * possibly the next commit */
+ if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4)) {
+ /* We have to split the checksum over non-copied and copied bytes */
+ if (copy > state->wsize)
+ inf_chksum(strm, end - copy, copy - state->wsize);
+ inf_chksum_cpy(strm, state->window, end - state->wsize, state->wsize);
+ } else {
+ memcpy(state->window, end - state->wsize, state->wsize);
+ }
+
state->wnext = 0;
state->whave = state->wsize;
} else {
dist = state->wsize - state->wnext;
- if (dist > copy)
- dist = copy;
- memcpy(state->window + state->wnext, end - copy, dist);
+ /* Only do this if the caller specifies to checksum bytes AND the platform requires
+ * We need to maintain the correct order here for the checksum */
+ dist = MIN(dist, copy);
+ if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4)) {
+ inf_chksum_cpy(strm, state->window + state->wnext, end - copy, dist);
+ } else {
+ memcpy(state->window + state->wnext, end - copy, dist);
+ }
copy -= dist;
if (copy) {
- memcpy(state->window, end - copy, copy);
+ if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4)) {
+ inf_chksum_cpy(strm, state->window, end - copy, copy);
+ } else {
+ memcpy(state->window, end - copy, copy);
+ }
+
state->wnext = copy;
state->whave = state->wsize;
} else {
len + copy > state->head->extra_max ?
state->head->extra_max - len : copy);
}
- if ((state->flags & 0x0200) && (state->wrap & 4))
+ if ((state->flags & 0x0200) && (state->wrap & 4)) {
state->check = PREFIX(crc32)(state->check, next, copy);
+ }
have -= copy;
next += copy;
state->length -= copy;
state->head->hcrc = (int)((state->flags >> 9) & 1);
state->head->done = 1;
}
- strm->adler = state->check = CRC32_INITIAL_VALUE;
+ /* compute crc32 checksum if not in raw mode */
+ if ((state->wrap & 4) && state->flags)
+ strm->adler = state->check = functable.crc32_fold_reset(&state->crc_fold);
state->mode = TYPE;
break;
#endif
out -= left;
strm->total_out += out;
state->total += out;
- if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out)
- strm->adler = state->check = UPDATE(state->check, put - out, out);
+
+ /* compute crc32 checksum if not in raw mode */
+ if (INFLATE_NEED_CHECKSUM(strm) && state->wrap & 4) {
+ if (out) {
+ inf_chksum(strm, put - out, out);
+ }
+#ifdef GUNZIP
+ if (state->flags)
+ strm->adler = state->check = functable.crc32_fold_final(&state->crc_fold);
+#endif
+ }
out = left;
if ((state->wrap & 4) && (
#ifdef GUNZIP
strm->total_in += in;
strm->total_out += out;
state->total += out;
- if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out)
- strm->adler = state->check = UPDATE(state->check, strm->next_out - out, out);
+
strm->data_type = (int)state->bits + (state->last ? 64 : 0) +
(state->mode == TYPE ? 128 : 0) + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0);
if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)