From: Ilya Leoshkevich Date: Thu, 20 Oct 2022 00:28:47 +0000 (+0200) Subject: IBM zSystems DFLTCC: Support inflate with small window X-Git-Tag: 2.1.0-beta1~123 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3eab3173ac7d1d53457452f3cd1eaeea5b2d43df;p=thirdparty%2Fzlib-ng.git IBM zSystems DFLTCC: Support inflate with small window There is no hardware control for DFLTCC window size, and because of that supporting small windows for deflate is not trivial: one has to make sure that DFLTCC does not emit large distances, which most likely entails somehow trimming the window and/or input in order to make sure that whave + avail_in <= wsize. But inflate is much easier: one only has to allocate enough space. Do that in dfltcc_alloc_window(), and also introduce ZCOPY_WINDOW() in order to copy everything, not just what the software implementation cares about. After this change, software and hardware window formats no longer match: the software will use wbits and wsize, and the hardware will use HB_BITS and HB_SIZE. Unlike deflate, inflate does not switch between software and hardware implementations mid-stream, which leaves only inflateSetDictionary() and inflateGetDictionary() interesting. --- diff --git a/arch/s390/README.md b/arch/s390/README.md index 18a7ca8cd..2c3165412 100644 --- a/arch/s390/README.md +++ b/arch/s390/README.md @@ -63,15 +63,15 @@ integrated with the rest of zlib-ng using hook macros. DFLTCC takes as arguments a parameter block, an input buffer, an output buffer and a window. `ZALLOC_DEFLATE_STATE()`, `ZALLOC_INFLATE_STATE()`, `ZFREE_STATE()`, `ZCOPY_DEFLATE_STATE()`, `ZCOPY_INFLATE_STATE()`, -`ZALLOC_WINDOW()` and `TRY_FREE_WINDOW()` macros encapsulate allocation -details for the parameter block (which is allocated alongside zlib-ng -state) and the window (which must be page-aligned). - -While inflate software and hardware window formats match, this is not -the case for deflate. Therefore, `deflateSetDictionary()` and -`deflateGetDictionary()` need special handling, which is triggered using -`DEFLATE_SET_DICTIONARY_HOOK()` and `DEFLATE_GET_DICTIONARY_HOOK()` -macros. +`ZALLOC_WINDOW()`, `ZCOPY_WINDOW()` and `TRY_FREE_WINDOW()` macros encapsulate +allocation details for the parameter block (which is allocated alongside +zlib-ng state) and the window (which must be page-aligned and large enough). + +Software and hardware window formats do not match, therefore, +`deflateSetDictionary()`, `deflateGetDictionary()`, `inflateSetDictionary()` +and `inflateGetDictionary()` need special handling, which is triggered using +`DEFLATE_SET_DICTIONARY_HOOK()`, `DEFLATE_GET_DICTIONARY_HOOK()`, +`INFLATE_SET_DICTIONARY_HOOK()` and `INFLATE_GET_DICTIONARY_HOOK()` macros. `deflateResetKeep()` and `inflateResetKeep()` update the DFLTCC parameter block using `DEFLATE_RESET_KEEP_HOOK()` and diff --git a/arch/s390/dfltcc_common.c b/arch/s390/dfltcc_common.c index 2937d8d64..78be71811 100644 --- a/arch/s390/dfltcc_common.c +++ b/arch/s390/dfltcc_common.c @@ -20,9 +20,9 @@ void Z_INTERNAL *PREFIX(dfltcc_alloc_window)(PREFIX3(streamp) strm, uInt items, void *w; /* To simplify freeing, we store the pointer to the allocated buffer right - * before the window. + * before the window. Note that DFLTCC always uses HB_SIZE bytes. */ - p = ZALLOC(strm, sizeof(void *) + items * size + PAGE_ALIGN, sizeof(unsigned char)); + p = ZALLOC(strm, sizeof(void *) + MAX(items * size, HB_SIZE) + PAGE_ALIGN, sizeof(unsigned char)); if (p == NULL) return NULL; w = ALIGN_UP((char *)p + sizeof(void *), PAGE_ALIGN); @@ -30,6 +30,10 @@ void Z_INTERNAL *PREFIX(dfltcc_alloc_window)(PREFIX3(streamp) strm, uInt items, return w; } +void Z_INTERNAL PREFIX(dfltcc_copy_window)(void *dest, const void *src, size_t n) { + memcpy(dest, src, MAX(n, HB_SIZE)); +} + void Z_INTERNAL PREFIX(dfltcc_free_window)(PREFIX3(streamp) strm, void *w) { if (w) ZFREE(strm, *(void **)((unsigned char *)w - sizeof(void *))); diff --git a/arch/s390/dfltcc_common.h b/arch/s390/dfltcc_common.h index 4f48bd9a4..b73437411 100644 --- a/arch/s390/dfltcc_common.h +++ b/arch/s390/dfltcc_common.h @@ -4,12 +4,15 @@ #include "zutil.h" void Z_INTERNAL *PREFIX(dfltcc_alloc_window)(PREFIX3(streamp) strm, uInt items, uInt size); +void Z_INTERNAL PREFIX(dfltcc_copy_window)(void *dest, const void *src, size_t n); void Z_INTERNAL PREFIX(dfltcc_free_window)(PREFIX3(streamp) strm, void *w); #define ZFREE_STATE ZFREE #define ZALLOC_WINDOW PREFIX(dfltcc_alloc_window) +#define ZCOPY_WINDOW PREFIX(dfltcc_copy_window) + #define ZFREE_WINDOW PREFIX(dfltcc_free_window) #define TRY_FREE_WINDOW PREFIX(dfltcc_free_window) diff --git a/arch/s390/dfltcc_deflate.c b/arch/s390/dfltcc_deflate.c index 0210ddc17..ab3343aea 100644 --- a/arch/s390/dfltcc_deflate.c +++ b/arch/s390/dfltcc_deflate.c @@ -376,36 +376,6 @@ int Z_INTERNAL PREFIX(dfltcc_can_set_reproducible)(PREFIX3(streamp) strm, int re /* Preloading history. */ -static void append_history(struct dfltcc_param_v0 *param, unsigned char *history, const unsigned char *buf, uInt count) { - size_t offset; - size_t n; - - /* Do not use more than 32K */ - if (count > HB_SIZE) { - buf += count - HB_SIZE; - count = HB_SIZE; - } - offset = (param->ho + param->hl) % HB_SIZE; - if (offset + count <= HB_SIZE) - /* Circular history buffer does not wrap - copy one chunk */ - memcpy(history + offset, buf, count); - else { - /* Circular history buffer wraps - copy two chunks */ - n = HB_SIZE - offset; - memcpy(history + offset, buf, n); - memcpy(history, buf + n, count - n); - } - n = param->hl + count; - if (n <= HB_SIZE) - /* All history fits into buffer - no need to discard anything */ - param->hl = n; - else { - /* History does not fit into buffer - discard extra bytes */ - param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE; - param->hl = HB_SIZE; - } -} - int Z_INTERNAL PREFIX(dfltcc_deflate_set_dictionary)(PREFIX3(streamp) strm, const unsigned char *dictionary, uInt dict_length) { deflate_state *state = (deflate_state *)strm->state; @@ -423,16 +393,8 @@ int Z_INTERNAL PREFIX(dfltcc_deflate_get_dictionary)(PREFIX3(streamp) strm, unsi struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); struct dfltcc_param_v0 *param = &dfltcc_state->param; - if (dictionary) { - if (param->ho + param->hl <= HB_SIZE) - /* Circular history buffer does not wrap - copy one chunk */ - memcpy(dictionary, state->window + param->ho, param->hl); - else { - /* Circular history buffer wraps - copy two chunks */ - memcpy(dictionary, state->window + param->ho, HB_SIZE - param->ho); - memcpy(dictionary + HB_SIZE - param->ho, state->window, param->ho + param->hl - HB_SIZE); - } - } + if (dictionary) + get_history(param, state->window, dictionary); if (dict_length) *dict_length = param->hl; return Z_OK; diff --git a/arch/s390/dfltcc_detail.h b/arch/s390/dfltcc_detail.h index e1d625b84..354c2f555 100644 --- a/arch/s390/dfltcc_detail.h +++ b/arch/s390/dfltcc_detail.h @@ -267,3 +267,46 @@ static inline void dfltcc_reset_state(struct dfltcc_state *dfltcc_state) { static inline void dfltcc_copy_state(void *dst, const void *src, uInt size, uInt extension_size) { memcpy(dst, src, ALIGN_UP(size, 8) + extension_size); } + +static inline void append_history(struct dfltcc_param_v0 *param, unsigned char *history, + const unsigned char *buf, uInt count) { + size_t offset; + size_t n; + + /* Do not use more than 32K */ + if (count > HB_SIZE) { + buf += count - HB_SIZE; + count = HB_SIZE; + } + offset = (param->ho + param->hl) % HB_SIZE; + if (offset + count <= HB_SIZE) + /* Circular history buffer does not wrap - copy one chunk */ + memcpy(history + offset, buf, count); + else { + /* Circular history buffer wraps - copy two chunks */ + n = HB_SIZE - offset; + memcpy(history + offset, buf, n); + memcpy(history, buf + n, count - n); + } + n = param->hl + count; + if (n <= HB_SIZE) + /* All history fits into buffer - no need to discard anything */ + param->hl = n; + else { + /* History does not fit into buffer - discard extra bytes */ + param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE; + param->hl = HB_SIZE; + } +} + +static inline void get_history(struct dfltcc_param_v0 *param, const unsigned char *history, + unsigned char *buf) { + if (param->ho + param->hl <= HB_SIZE) + /* Circular history buffer does not wrap - copy one chunk */ + memcpy(buf, history + param->ho, param->hl); + else { + /* Circular history buffer wraps - copy two chunks */ + memcpy(buf, history + param->ho, HB_SIZE - param->ho); + memcpy(buf + HB_SIZE - param->ho, history, param->ho + param->hl - HB_SIZE); + } +} diff --git a/arch/s390/dfltcc_inflate.c b/arch/s390/dfltcc_inflate.c index b7ecbe275..34fbbb227 100644 --- a/arch/s390/dfltcc_inflate.c +++ b/arch/s390/dfltcc_inflate.c @@ -39,10 +39,6 @@ int Z_INTERNAL PREFIX(dfltcc_can_inflate)(PREFIX3(streamp) strm) { struct inflate_state *state = (struct inflate_state *)strm->state; struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); - /* Unsupported compression settings */ - if (state->wbits != HB_BITS) - return 0; - /* Unsupported hardware */ return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0); } @@ -98,8 +94,6 @@ dfltcc_inflate_action Z_INTERNAL PREFIX(dfltcc_inflate)(PREFIX3(streamp) strm, i /* Translate stream to parameter block */ param->cvt = state->flags ? CVT_CRC32 : CVT_ADLER32; param->sbb = state->bits; - param->hl = state->whave; /* Software and hardware history formats match */ - param->ho = (state->wnext - state->whave) & ((1 << HB_BITS) - 1); if (param->hl) param->nt = 0; /* Honor history for the first block */ param->cv = state->flags ? ZSWAP32(state->check) : state->check; @@ -113,8 +107,6 @@ dfltcc_inflate_action Z_INTERNAL PREFIX(dfltcc_inflate)(PREFIX3(streamp) strm, i strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); state->last = cc == DFLTCC_CC_OK; state->bits = param->sbb; - state->whave = param->hl; - state->wnext = (param->ho + param->hl) & ((1 << HB_BITS) - 1); strm->adler = state->check = state->flags ? ZSWAP32(param->cv) : param->cv; if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { /* Report an error if stream is corrupted */ @@ -134,9 +126,33 @@ int Z_INTERNAL PREFIX(dfltcc_was_inflate_used)(PREFIX3(streamp) strm) { return !param->nt; } +/* + Rotates a circular buffer. + The implementation is based on https://cplusplus.com/reference/algorithm/rotate/ + */ +static void rotate(unsigned char *start, unsigned char *pivot, unsigned char *end) { + unsigned char *p = pivot; + unsigned char tmp; + + while (p != start) { + tmp = *start; + *start = *p; + *p = tmp; + + start++; + p++; + + if (p == end) + p = pivot; + else if (start == pivot) + pivot = p; + } +} + int Z_INTERNAL PREFIX(dfltcc_inflate_disable)(PREFIX3(streamp) strm) { struct inflate_state *state = (struct inflate_state *)strm->state; struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; if (!PREFIX(dfltcc_can_inflate)(strm)) return 0; @@ -148,5 +164,40 @@ int Z_INTERNAL PREFIX(dfltcc_inflate_disable)(PREFIX3(streamp) strm) { return 1; /* DFLTCC was not used yet - decompress in software */ memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); + /* Convert the window from the hardware to the software format */ + rotate(state->window, state->window + param->ho, state->window + HB_SIZE); + state->whave = state->wnext = MIN(param->hl, state->wsize); return 0; } + +/* + Preloading history. +*/ +int Z_INTERNAL PREFIX(dfltcc_inflate_set_dictionary)(PREFIX3(streamp) strm, + const unsigned char *dictionary, uInt dict_length) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + + if (PREFIX(inflate_ensure_window)(state)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + + append_history(param, state->window, dictionary, dict_length); + state->havedict = 1; + return Z_OK; +} + +int Z_INTERNAL PREFIX(dfltcc_inflate_get_dictionary)(PREFIX3(streamp) strm, + unsigned char *dictionary, uInt *dict_length) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + + if (dictionary && state->window) + get_history(param, state->window, dictionary); + if (dict_length) + *dict_length = param->hl; + return Z_OK; +} diff --git a/arch/s390/dfltcc_inflate.h b/arch/s390/dfltcc_inflate.h index 5e86fe87f..632fada62 100644 --- a/arch/s390/dfltcc_inflate.h +++ b/arch/s390/dfltcc_inflate.h @@ -15,6 +15,10 @@ typedef enum { dfltcc_inflate_action Z_INTERNAL PREFIX(dfltcc_inflate)(PREFIX3(streamp) strm, int flush, int *ret); int Z_INTERNAL PREFIX(dfltcc_was_inflate_used)(PREFIX3(streamp) strm); int Z_INTERNAL PREFIX(dfltcc_inflate_disable)(PREFIX3(streamp) strm); +int Z_INTERNAL PREFIX(dfltcc_inflate_set_dictionary)(PREFIX3(streamp) strm, + const unsigned char *dictionary, uInt dict_length); +int Z_INTERNAL PREFIX(dfltcc_inflate_get_dictionary)(PREFIX3(streamp) strm, + unsigned char *dictionary, uInt* dict_length); #define ZALLOC_INFLATE_STATE PREFIX(dfltcc_alloc_inflate_state) #define ZCOPY_INFLATE_STATE PREFIX(dfltcc_copy_inflate_state) @@ -51,4 +55,16 @@ int Z_INTERNAL PREFIX(dfltcc_inflate_disable)(PREFIX3(streamp) strm); if (PREFIX(dfltcc_was_inflate_used)((strm))) return Z_STREAM_ERROR; \ } while (0) +#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ + if (PREFIX(dfltcc_can_inflate)((strm))) \ + return PREFIX(dfltcc_inflate_set_dictionary)((strm), (dict), (dict_len)); \ + } while (0) + +#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ + if (PREFIX(dfltcc_can_inflate)((strm))) \ + return PREFIX(dfltcc_inflate_get_dictionary)((strm), (dict), (dict_len)); \ + } while (0) + #endif diff --git a/inflate.c b/inflate.c index ead160df8..c4de05843 100644 --- a/inflate.c +++ b/inflate.c @@ -1157,6 +1157,8 @@ int32_t Z_EXPORT PREFIX(inflateGetDictionary)(PREFIX3(stream) *strm, uint8_t *di return Z_STREAM_ERROR; state = (struct inflate_state *)strm->state; + INFLATE_GET_DICTIONARY_HOOK(strm, dictionary, dictLength); /* hook for IBM Z DFLTCC */ + /* copy dictionary */ if (state->whave && dictionary != NULL) { memcpy(dictionary, state->window + state->wnext, state->whave - state->wnext); @@ -1186,6 +1188,8 @@ int32_t Z_EXPORT PREFIX(inflateSetDictionary)(PREFIX3(stream) *strm, const uint8 return Z_DATA_ERROR; } + INFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength); /* hook for IBM Z DFLTCC */ + /* copy dictionary to window using updatewindow(), which will amend the existing dictionary if appropriate */ ret = updatewindow(strm, dictionary + dictLength, dictLength, 0); @@ -1349,8 +1353,7 @@ int32_t Z_EXPORT PREFIX(inflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *sou } copy->next = copy->codes + (state->next - state->codes); if (window != NULL) { - wsize = 1U << state->wbits; - memcpy(window, state->window, wsize); + ZCOPY_WINDOW(window, state->window, 1U << state->wbits); } copy->window = window; dest->state = (struct internal_state *)copy; diff --git a/inflate_p.h b/inflate_p.h index 7122d7ce6..e3daae1e1 100644 --- a/inflate_p.h +++ b/inflate_p.h @@ -17,6 +17,7 @@ # define ZCOPY_INFLATE_STATE(dst, src) memcpy(dst, src, sizeof(struct inflate_state)) /* Memory management for the window. Useful for allocation the aligned window. */ # define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size) +# define ZCOPY_WINDOW(dest, src, n) memcpy(dest, src, n) # define ZFREE_WINDOW(strm, addr) ZFREE(strm, addr) /* Invoked at the end of inflateResetKeep(). Useful for initializing arch-specific extension blocks. */ # define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0) @@ -32,6 +33,10 @@ # define INFLATE_MARK_HOOK(strm) do {} while (0) /* Invoked at the beginning of inflateSyncPoint(). Useful for performing arch-specific state checks. */ # define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0) +/* Invoked at the beginning of inflateSetDictionary(). Useful for checking arch-specific window data. */ +# define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) +/* Invoked at the beginning of inflateGetDictionary(). Useful for adjusting arch-specific window data. */ +# define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) #endif /* diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3bf1f7673..a5b4d7590 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -168,6 +168,7 @@ else() test_inflate_adler32.cc test_large_buffers.cc test_small_buffers.cc + test_small_window.cc ) if(WITH_GZFILEOP) diff --git a/test/test_small_window.cc b/test/test_small_window.cc new file mode 100644 index 000000000..e351efac0 --- /dev/null +++ b/test/test_small_window.cc @@ -0,0 +1,67 @@ +/* test_small_window.cc - Test deflate() and inflate() with a small window and a preset dictionary */ + +#include "zbuild.h" +#ifdef ZLIB_COMPAT +# include "zlib.h" +#else +# include "zlib-ng.h" +#endif + +#include + +TEST(small_window, basic) { + PREFIX3(stream) stream; + int err; + unsigned char plain[128]; + unsigned char dictionary1[(1 << 9) - sizeof(plain) / 2]; + size_t i; + unsigned char compr[sizeof(plain)]; + unsigned int compr_len; + unsigned char plain_again[sizeof(plain)]; + + memset(&stream, 0, sizeof(stream)); + err = PREFIX(deflateInit2)(&stream, Z_BEST_COMPRESSION, Z_DEFLATED, -9, 8, Z_DEFAULT_STRATEGY); + EXPECT_EQ(err, Z_OK); + + /* Use a large dictionary that is loaded in two parts */ + memset(dictionary1, 'a', sizeof(dictionary1)); + err = PREFIX(deflateSetDictionary)(&stream, dictionary1, (unsigned int)sizeof(dictionary1)); + EXPECT_EQ(err, Z_OK); + for (i = 0; i < sizeof(plain); i++) + plain[i] = (unsigned char)i; + err = PREFIX(deflateSetDictionary)(&stream, plain, (unsigned int)sizeof(plain)); + EXPECT_EQ(err, Z_OK); + + stream.next_in = plain; + stream.avail_in = (uint32_t)sizeof(plain); + stream.next_out = compr; + stream.avail_out = (uint32_t)sizeof(compr); + err = PREFIX(deflate)(&stream, Z_FINISH); + EXPECT_EQ(err, Z_STREAM_END); + compr_len = sizeof(compr) - stream.avail_out; + + err = PREFIX(deflateEnd)(&stream); + EXPECT_EQ(err, Z_OK); + + memset(&stream, 0, sizeof(stream)); + err = PREFIX(inflateInit2)(&stream, -9); + EXPECT_EQ(err, Z_OK); + + err = PREFIX(inflateSetDictionary)(&stream, dictionary1, (unsigned int)sizeof(dictionary1)); + EXPECT_EQ(err, Z_OK); + err = PREFIX(inflateSetDictionary)(&stream, plain, (unsigned int)sizeof(plain)); + EXPECT_EQ(err, Z_OK); + + stream.next_in = compr; + stream.avail_in = compr_len; + stream.next_out = plain_again; + stream.avail_out = (unsigned int)sizeof(plain_again); + + err = PREFIX(inflate)(&stream, Z_NO_FLUSH); + EXPECT_EQ(err, Z_STREAM_END); + + err = PREFIX(inflateEnd)(&stream); + EXPECT_EQ(err, Z_OK); + + EXPECT_TRUE(memcmp(plain_again, plain, sizeof(plain)) == 0); +}