From: Ilya Leoshkevich Date: Mon, 25 Oct 2021 22:50:26 +0000 (-0400) Subject: DFLTCC update for window optimization from Jim & Nathan X-Git-Tag: 2.1.0-beta1~490 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b4ca25afabba7b4bf74d36e26728006d28df891d;p=thirdparty%2Fzlib-ng.git DFLTCC update for window optimization from Jim & Nathan Stop relying on software and hardware inflate window formats being the same and act the way we already do for deflate: provide and implement window-related hooks. Another possibility would be to use an in-line history buffer (by not setting HBT_CIRCULAR), but this would require an extra memmove(). Also fix a couple corner cases in the software implementation of inflateGetDictionary() and inflateSetDictionary(). --- diff --git a/arch/s390/README.md b/arch/s390/README.md index 90066f0f9..b4b3fb8b5 100644 --- a/arch/s390/README.md +++ b/arch/s390/README.md @@ -66,11 +66,10 @@ buffer and a window. `ZALLOC_STATE()`, `ZFREE_STATE()`, `ZCOPY_STATE()`, details for the parameter block (which is allocated alongside zlib-ng state) and the window (which must be page-aligned). -While inflate software and hardware window formats match, this is not -the case for deflate. Therefore, `deflateSetDictionary()` and -`deflateGetDictionary()` need special handling, which is triggered using -`DEFLATE_SET_DICTIONARY_HOOK()` and `DEFLATE_GET_DICTIONARY_HOOK()` -macros. +Software and hardware deflate window formats don't match, therefore, +`deflateSetDictionary()` and `deflateGetDictionary()` need special handling, +which is triggered using `DEFLATE_SET_DICTIONARY_HOOK()` and +`DEFLATE_GET_DICTIONARY_HOOK()` macros. `deflateResetKeep()` and `inflateResetKeep()` update the DFLTCC parameter block using `DEFLATE_RESET_KEEP_HOOK()` and @@ -92,9 +91,14 @@ and `DEFLATE_NEED_CONSERVATIVE_BOUND()` macros make `deflateBound()` return the correct results for the hardware implementation. Actual compression and decompression are handled by `DEFLATE_HOOK()` and -`INFLATE_TYPEDO_HOOK()` macros. Since inflation with DFLTCC manages the -window on its own, calling `updatewindow()` is suppressed using -`INFLATE_NEED_UPDATEWINDOW()` macro. +`INFLATE_TYPEDO_HOOK()` macros. + +Software and hardware inflate window formats don't match, therefore, +`inflateSetDictionary()` and `inflateGetDictionary()` need special handling, +which is triggered using `INFLATE_SET_DICTIONARY_HOOK()` and +`INFLATE_GET_DICTIONARY_HOOK()` macros. Furthermore, calling +`window_output_flush()` is suppressed using +`INFLATE_NEED_WINDOW_OUTPUT_FLUSH()` macro. In addition to compression, DFLTCC computes CRC-32 and Adler-32 checksums, therefore, whenever it's used, software checksumming is diff --git a/arch/s390/dfltcc_deflate.c b/arch/s390/dfltcc_deflate.c index 0a65aa7e8..519f97ea5 100644 --- a/arch/s390/dfltcc_deflate.c +++ b/arch/s390/dfltcc_deflate.c @@ -343,46 +343,13 @@ int Z_INTERNAL dfltcc_can_set_reproducible(PREFIX3(streamp) strm, int reproducib return reproducible != state->reproducible && !dfltcc_was_deflate_used(strm); } -/* - Preloading history. -*/ -static void append_history(struct dfltcc_param_v0 *param, unsigned char *history, const unsigned char *buf, uInt count) { - size_t offset; - size_t n; - - /* Do not use more than 32K */ - if (count > HB_SIZE) { - buf += count - HB_SIZE; - count = HB_SIZE; - } - offset = (param->ho + param->hl) % HB_SIZE; - if (offset + count <= HB_SIZE) - /* Circular history buffer does not wrap - copy one chunk */ - memcpy(history + offset, buf, count); - else { - /* Circular history buffer wraps - copy two chunks */ - n = HB_SIZE - offset; - memcpy(history + offset, buf, n); - memcpy(history, buf + n, count - n); - } - n = param->hl + count; - if (n <= HB_SIZE) - /* All history fits into buffer - no need to discard anything */ - param->hl = n; - else { - /* History does not fit into buffer - discard extra bytes */ - param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE; - param->hl = HB_SIZE; - } -} - int Z_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm, const unsigned char *dictionary, uInt dict_length) { deflate_state *state = (deflate_state *)strm->state; struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); struct dfltcc_param_v0 *param = &dfltcc_state->param; - append_history(param, state->window, dictionary, dict_length); + dfltcc_append_history(param, state->window, dictionary, dict_length); state->strstart = 1; /* Add FDICT to zlib header */ state->block_start = state->strstart; /* Make deflate_stored happy */ return Z_OK; @@ -393,17 +360,6 @@ int Z_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned cha struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); struct dfltcc_param_v0 *param = &dfltcc_state->param; - if (dictionary) { - if (param->ho + param->hl <= HB_SIZE) - /* Circular history buffer does not wrap - copy one chunk */ - memcpy(dictionary, state->window + param->ho, param->hl); - else { - /* Circular history buffer wraps - copy two chunks */ - memcpy(dictionary, state->window + param->ho, HB_SIZE - param->ho); - memcpy(dictionary + HB_SIZE - param->ho, state->window, param->ho + param->hl - HB_SIZE); - } - } - if (dict_length) - *dict_length = param->hl; + dfltcc_get_history(param, state->window, dictionary, dict_length); return Z_OK; } diff --git a/arch/s390/dfltcc_detail.h b/arch/s390/dfltcc_detail.h index 4ec03f809..5fb8f47c5 100644 --- a/arch/s390/dfltcc_detail.h +++ b/arch/s390/dfltcc_detail.h @@ -197,3 +197,50 @@ struct dfltcc_state { #define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) #define GET_DFLTCC_STATE(state) ((struct dfltcc_state *)((char *)(state) + ALIGN_UP(sizeof(*state), 8))) + +static inline void dfltcc_get_history(struct dfltcc_param_v0 *param, const unsigned char *history, + unsigned char *buf, uInt *count) { + if (buf) { + if (param->ho + param->hl <= HB_SIZE) + /* Circular history buffer does not wrap - copy one chunk */ + memcpy(buf, history + param->ho, param->hl); + else { + /* Circular history buffer wraps - copy two chunks */ + memcpy(buf, history + param->ho, HB_SIZE - param->ho); + memcpy(buf + HB_SIZE - param->ho, history, param->ho + param->hl - HB_SIZE); + } + } + if (count) + *count = param->hl; +} + +static inline void dfltcc_append_history(struct dfltcc_param_v0 *param, unsigned char *history, + const unsigned char *buf, uInt count) { + size_t offset; + size_t n; + + /* Do not use more than 32K */ + if (count > HB_SIZE) { + buf += count - HB_SIZE; + count = HB_SIZE; + } + offset = (param->ho + param->hl) % HB_SIZE; + if (offset + count <= HB_SIZE) + /* Circular history buffer does not wrap - copy one chunk */ + memcpy(history + offset, buf, count); + else { + /* Circular history buffer wraps - copy two chunks */ + n = HB_SIZE - offset; + memcpy(history + offset, buf, n); + memcpy(history, buf + n, count - n); + } + n = param->hl + count; + if (n <= HB_SIZE) + /* All history fits into buffer - no need to discard anything */ + param->hl = n; + else { + /* History does not fit into buffer - discard extra bytes */ + param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE; + param->hl = HB_SIZE; + } +} diff --git a/arch/s390/dfltcc_inflate.c b/arch/s390/dfltcc_inflate.c index 253506466..9b1497d2b 100644 --- a/arch/s390/dfltcc_inflate.c +++ b/arch/s390/dfltcc_inflate.c @@ -83,8 +83,6 @@ dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush /* Translate stream to parameter block */ param->cvt = state->flags ? CVT_CRC32 : CVT_ADLER32; param->sbb = state->bits; - param->hl = state->whave; /* Software and hardware history formats match */ - param->ho = (state->wnext - state->whave) & ((1 << HB_BITS) - 1); if (param->hl) param->nt = 0; /* Honor history for the first block */ param->cv = state->flags ? ZSWAP32(state->check) : state->check; @@ -98,8 +96,6 @@ dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); state->last = cc == DFLTCC_CC_OK; state->bits = param->sbb; - state->whave = param->hl; - state->wnext = (param->ho + param->hl) & ((1 << HB_BITS) - 1); state->check = state->flags ? ZSWAP32(param->cv) : param->cv; if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { /* Report an error if stream is corrupted */ @@ -122,6 +118,8 @@ int Z_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm) { int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm) { struct inflate_state *state = (struct inflate_state *)strm->state; struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + uInt count; if (!dfltcc_can_inflate(strm)) return 0; @@ -133,5 +131,29 @@ int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm) { return 1; /* DFLTCC was not used yet - decompress in software */ memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); + /* Convert window from hardware to software format. Use its second part as scratch space. */ + dfltcc_get_history(param, state->window, state->window + state->wsize, &count); + state->whave = count; + state->wnext = 0; + memcpy(state->window + state->wsize - state->whave, state->window + state->wsize, state->whave); return 0; } + +int Z_INTERNAL dfltcc_inflate_set_dictionary(PREFIX3(streamp) strm, const unsigned char *dictionary, uInt dict_length) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + + dfltcc_append_history(param, state->window, dictionary, dict_length); + state->havedict = 1; + return Z_OK; +} + +int Z_INTERNAL dfltcc_inflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + + dfltcc_get_history(param, state->window, dictionary, dict_length); + return Z_OK; +} diff --git a/arch/s390/dfltcc_inflate.h b/arch/s390/dfltcc_inflate.h index fc8a000f7..4b68b7048 100644 --- a/arch/s390/dfltcc_inflate.h +++ b/arch/s390/dfltcc_inflate.h @@ -12,6 +12,8 @@ typedef enum { dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret); int Z_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm); int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm); +int Z_INTERNAL dfltcc_inflate_set_dictionary(PREFIX3(streamp) strm, const unsigned char *dictionary, uInt dict_length); +int Z_INTERNAL dfltcc_inflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length); #define INFLATE_RESET_KEEP_HOOK(strm) \ dfltcc_reset((strm), sizeof(struct inflate_state)) @@ -34,7 +36,7 @@ int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm); #define INFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_inflate((strm))) -#define INFLATE_NEED_UPDATEWINDOW(strm) (!dfltcc_can_inflate((strm))) +#define INFLATE_NEED_WINDOW_OUTPUT_FLUSH(strm) (!dfltcc_can_inflate((strm))) #define INFLATE_MARK_HOOK(strm) \ do { \ @@ -46,4 +48,16 @@ int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm); if (dfltcc_was_inflate_used((strm))) return Z_STREAM_ERROR; \ } while (0) +#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ + if (dfltcc_can_inflate((strm))) \ + return dfltcc_inflate_set_dictionary((strm), (dict), (dict_len)); \ + } while (0) + +#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ + if (dfltcc_can_inflate((strm))) \ + return dfltcc_inflate_get_dictionary((strm), (dict), (dict_len)); \ + } while (0) + #endif diff --git a/deflate.c b/deflate.c index 79fa85dbc..bb20a7d7a 100644 --- a/deflate.c +++ b/deflate.c @@ -82,7 +82,7 @@ const char PREFIX(deflate_copyright)[] = " deflate 1.2.11.f Copyright 1995-2016 /* Invoked at the beginning of deflateParams(). Useful for updating arch-specific compression parameters. */ # define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) do {} while (0) /* Returns whether the last deflate(flush) operation did everything it's supposed to do. */ -# define DEFLATE_DONE(strm, flush) 1 +# define DEFLATE_DONE(strm, flush) 1 /* Adjusts the upper bound on compressed data length based on compression parameters and uncompressed data length. * Useful when arch-specific deflation code behaves differently than regular zlib-ng algorithms. */ # define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, sourceLen) do {} while (0) diff --git a/inflate.c b/inflate.c index cca8df67a..1abb0290f 100644 --- a/inflate.c +++ b/inflate.c @@ -912,11 +912,13 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { break; case CHECK: - RESTORE(); - window_output_flush(strm); - LOAD(); - if (strm->avail_out == 0 && state->wnext) - goto inf_leave; + if (INFLATE_NEED_WINDOW_OUTPUT_FLUSH(strm)) { + RESTORE(); + window_output_flush(strm); + LOAD(); + if (strm->avail_out == 0 && state->wnext) + goto inf_leave; + } if (state->wrap) { NEEDBITS(32); out -= left; @@ -982,7 +984,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { inf_leave: RESTORE(); - if (strm->avail_out && state->wnext) + if (INFLATE_NEED_WINDOW_OUTPUT_FLUSH(strm) && strm->avail_out && state->wnext) window_output_flush(strm); in -= strm->avail_in; @@ -1017,13 +1019,12 @@ int32_t Z_EXPORT PREFIX(inflateGetDictionary)(PREFIX3(stream) *strm, uint8_t *di /* check state */ if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + INFLATE_GET_DICTIONARY_HOOK(strm, dictionary, dictLength); /* hook for IBM Z DFLTCC */ state = (struct inflate_state *)strm->state; /* copy dictionary */ - if (state->whave && dictionary != NULL) { - memcpy(dictionary, state->window + state->wnext, state->whave - state->wnext); - memcpy(dictionary + state->whave - state->wnext, state->window, state->wnext); - } + if (state->whave && dictionary != NULL) + memcpy(dictionary, state->window + state->wsize - state->whave, state->whave); if (dictLength != NULL) *dictLength = state->whave; return Z_OK; @@ -1032,7 +1033,7 @@ int32_t Z_EXPORT PREFIX(inflateGetDictionary)(PREFIX3(stream) *strm, uint8_t *di int32_t Z_EXPORT PREFIX(inflateSetDictionary)(PREFIX3(stream) *strm, const uint8_t *dictionary, uint32_t dictLength) { struct inflate_state *state; unsigned long dictid, dict_copy, hist_copy; - const unsigned char *dict_from, *hist_from; + const unsigned char *dict_from; unsigned char *dict_to, *hist_to; int ret; @@ -1055,6 +1056,8 @@ int32_t Z_EXPORT PREFIX(inflateSetDictionary)(PREFIX3(stream) *strm, const uint8 Tracec(state->wnext != 0, (stderr, "Setting dictionary with unflushed output")); + INFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength); /* hook for IBM Z DFLTCC */ + /* copy dictionary to window and amend if necessary */ dict_from = dictionary; dict_copy = dictLength; @@ -1064,14 +1067,13 @@ int32_t Z_EXPORT PREFIX(inflateSetDictionary)(PREFIX3(stream) *strm, const uint8 } dict_to = state->window + state->wsize - dict_copy; - hist_from = state->window + state->wsize - state->whave; hist_copy = state->wsize - dict_copy; if (hist_copy > state->whave) hist_copy = state->whave; hist_to = dict_to - hist_copy; if (hist_copy) - memcpy(hist_to, hist_from, hist_copy); + memcpy(hist_to, state->window + state->wsize - hist_copy, hist_copy); if (dict_copy) memcpy(dict_to, dict_from, dict_copy); diff --git a/inflate_p.h b/inflate_p.h index b2e8d8de2..464b04fdf 100644 --- a/inflate_p.h +++ b/inflate_p.h @@ -27,12 +27,17 @@ # define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0) /* Returns whether zlib-ng should compute a checksum. Set to 0 if arch-specific inflation code already does that. */ # define INFLATE_NEED_CHECKSUM(strm) 1 -/* Returns whether zlib-ng should update a window. Set to 0 if arch-specific inflation code already does that. */ -# define INFLATE_NEED_UPDATEWINDOW(strm) 1 +/* Returns whether zlib-ng should flush the window to the output buffer. + Set to 0 if arch-specific inflation code already does that. */ +# define INFLATE_NEED_WINDOW_OUTPUT_FLUSH(strm) 1 /* Invoked at the beginning of inflateMark(). Useful for updating arch-specific pointers and offsets. */ # define INFLATE_MARK_HOOK(strm) do {} while (0) /* Invoked at the beginning of inflateSyncPoint(). Useful for performing arch-specific state checks. */ -#define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0) +# define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0) +/* Invoked at the beginning of inflateSetDictionary(). Useful for checking arch-specific window data. */ +# define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) +/* Invoked at the beginning of inflateGetDictionary(). Useful for adjusting arch-specific window data. */ +# define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) #endif