From: Ilya Leoshkevich Date: Wed, 10 Apr 2019 13:46:58 +0000 (+0200) Subject: Add support for IBM Z hardware-accelerated deflate X-Git-Tag: 1.9.9-b1~485 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8e30d1173699412c11880db10de2bc10893a2448;p=thirdparty%2Fzlib-ng.git Add support for IBM Z hardware-accelerated deflate Future versions of IBM Z mainframes will provide DFLTCC instruction, which implements deflate algorithm in hardware with estimated compression and decompression performance orders of magnitude faster than the current zlib-ng and ratio comparable with that of level 1. This patch adds DFLTCC support to zlib-ng. In order to enable it, the following build commands should be used: $ ./configure --with-dfltcc-deflate --with-dfltcc-inflate $ make When built like this, zlib-ng would compress in hardware on level 1, and in software on all other levels. Decompression will always happen in hardware. In order to enable DFLTCC compression for levels 1-6 (i.e. to make it used by default) one could add -DDFLTCC_LEVEL_MASK=0x7e to CFLAGS when building zlib-ng. Two DFLTCC compression calls produce the same results only when they both are made on machines of the same generation, and when the respective buffers have the same offset relative to the start of the page. Therefore care should be taken when using hardware compression when reproducible results are desired. DFLTCC does not support every single zlib-ng feature, in particular: * inflate(Z_BLOCK) and inflate(Z_TREES) * inflateMark() * inflatePrime() * deflateParams() after the first deflate() call When used, these functions will either switch to software, or, in case this is not possible, gracefully fail. This patch tries to add DFLTCC support in a least intrusive way. All SystemZ-specific code was placed into a separate file, but unfortunately there is still a noticeable amount of changes in the main zlib-ng code. Below is the summary of those changes. DFLTCC takes as arguments a parameter block, an input buffer, an output buffer and a window. Since DFLTCC requires parameter block to be doubleword-aligned, and it's reasonable to allocate it alongside deflate and inflate states, ZALLOC_STATE, ZFREE_STATE and ZCOPY_STATE macros were introduced in order to encapsulate the allocation details. The same is true for window, for which ZALLOC_WINDOW and TRY_FREE_WINDOW macros were introduced. While for inflate software and hardware window formats match, this is not the case for deflate. Therefore, deflateSetDictionary and deflateGetDictionary need special handling, which is triggered using the new DEFLATE_SET_DICTIONARY_HOOK and DEFLATE_GET_DICTIONARY_HOOK macros. deflateResetKeep() and inflateResetKeep() now update the DFLTCC parameter block, which is allocated alongside zlib-ng state, using the new DEFLATE_RESET_KEEP_HOOK and INFLATE_RESET_KEEP_HOOK macros. In order to make unsupported deflateParams(), inflatePrime() and inflateMark() calls to fail gracefully, the new DEFLATE_PARAMS_HOOK, INFLATE_PRIME_HOOK and INFLATE_MARK_HOOK macros were introduced. The algorithm implemented in hardware has different compression ratio than the one implemented in software. In order for deflateBound() to return the correct results for the hardware implementation, the new DEFLATE_BOUND_ADJUST_COMPLEN and DEFLATE_NEED_CONSERVATIVE_BOUND macros were introduced. Actual compression and decompression are handled by the new DEFLATE_HOOK and INFLATE_TYPEDO_HOOK macros. Since inflation with DFLTCC manages the window on its own, calling updatewindow() is suppressed using the new INFLATE_NEED_UPDATEWINDOW() macro. In addition to compression, DFLTCC computes CRC-32 and Adler-32 checksums, therefore, whenever it's used, software checksumming needs to be suppressed using the new DEFLATE_NEED_CHECKSUM and INFLATE_NEED_CHECKSUM macros. DFLTCC will refuse to write an End-of-block Symbol if there is no input data, thus in some cases it is necessary to do this manually. In order to achieve this, bi_reverse and flush_pending were promoted from static to ZLIB_INTERNAL and exposed via deflate.h. Since the first call to dfltcc_inflate already needs the window, and it might be not allocated yet, inflate_ensure_window was factored out of updatewindow and made ZLIB_INTERNAL. --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 90fc64f8..ba55fb5a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,6 +66,7 @@ endif() check_include_file(sys/types.h HAVE_SYS_TYPES_H) check_include_file(stdint.h HAVE_STDINT_H) check_include_file(stddef.h HAVE_STDDEF_H) +check_include_file(sys/sdt.h HAVE_SYS_SDT_H) # # Options parsing @@ -108,6 +109,11 @@ if("${ARCH}" MATCHES "arm" OR "${ARCH}" MATCHES "aarch64") option(WITH_NEON "Build with NEON intrinsics" ON) endif() +if("${ARCH}" MATCHES "s390x") + option(WITH_DFLTCC_DEFLATE "Use DEFLATE COMPRESSION CALL instruction for compression on IBM Z" OFF) + option(WITH_DFLTCC_INFLATE "Use DEFLATE COMPRESSION CALL instruction for decompression on IBM Z" OFF) +endif() + if(${CMAKE_C_COMPILER} MATCHES "icc" OR ${CMAKE_C_COMPILER} MATCHES "icpc" OR ${CMAKE_C_COMPILER} MATCHES "icl") if(WITH_NATIVE_INSTRUCTIONS) message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not supported on this configuration") @@ -517,6 +523,8 @@ if("${ARCH}" MATCHES "x86_64" OR "${ARCH}" MATCHES "AMD64" OR "${ARCH}" MATCHES elseif("${ARCH}" MATCHES "arm" OR "${ARCH}" MATCHES "aarch64") set(ARCHDIR "arch/arm") add_definitions(-DUNALIGNED_OK) +elseif("${ARCH}" MATCHES "s390x") + set(ARCHDIR "arch/s390") else() message(STATUS "No optimized architecture: using ${ARCHDIR}") endif() @@ -595,6 +603,18 @@ if(WITH_OPTIM) add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${PCLMULFLAG} ${SSE4FLAG}\"") endif() endif() + elseif("${ARCH}" MATCHES "s390x") + if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE) + set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/dfltcc_common.c) + endif() + if(WITH_DFLTCC_DEFLATE) + add_definitions(-DS390_DFLTCC_DEFLATE) + set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/dfltcc_deflate.c) + endif() + if(WITH_DFLTCC_INFLATE) + add_definitions(-DS390_DFLTCC_INFLATE) + set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/dfltcc_inflate.c) + endif() endif() endif() message(STATUS "Architecture-specific source files: ${ZLIB_ARCH_SRCS}") diff --git a/arch/s390/Makefile.in b/arch/s390/Makefile.in new file mode 100644 index 00000000..caf7340a --- /dev/null +++ b/arch/s390/Makefile.in @@ -0,0 +1,40 @@ +# Makefile for zlib +# Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler +# For conditions of distribution and use, see copyright notice in zlib.h + +CC= +CFLAGS= +SFLAGS= +INCLUDES= +SUFFIX= + +SRCDIR=. +SRCTOP=../.. +TOPDIR=$(SRCTOP) + +dfltcc_common.o: + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_common.c + +dfltcc_common.lo: + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_common.c + +dfltcc_deflate.o: + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_deflate.c + +dfltcc_deflate.lo: + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_deflate.c + +dfltcc_inflate.o: + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_inflate.c + +dfltcc_inflate.lo: + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_inflate.c + +mostlyclean: clean +clean: + rm -f *.o *.lo *~ + rm -rf objs + rm -f *.gcda *.gcno *.gcov + +distclean: + rm -f Makefile diff --git a/arch/s390/README.md b/arch/s390/README.md new file mode 100644 index 00000000..81da220a --- /dev/null +++ b/arch/s390/README.md @@ -0,0 +1,64 @@ +This directory contains IBM Z DEFLATE CONVERSION CALL support to +zlib-ng. In order to enable it, the following build commands should be +used: + + $ ./configure --with-dfltcc-deflate --with-dfltcc-inflate + $ make + +When built like this, zlib-ng would compress in hardware on level 1, +and in software on all other levels. Decompression will always happen +in hardware. In order to enable DFLTCC compression for levels 1-6 (i.e. +to make it used by default) one could add -DDFLTCC_LEVEL_MASK=0x7e to +CFLAGS when building zlib-ng. + +Two DFLTCC compression calls produce the same results only when they +both are made on machines of the same generation, and when the +respective buffers have the same offset relative to the start of the +page. Therefore care should be taken when using hardware compression +when reproducible results are desired. + +DFLTCC does not support every single zlib-ng feature, in particular: + +* inflate(Z_BLOCK) and inflate(Z_TREES) +* inflateMark() +* inflatePrime() +* deflateParams() after the first deflate() call + +When used, these functions will either switch to software, or, in case +this is not possible, gracefully fail. + +All SystemZ-specific code lives in a separate file and is integrated +with the rest of zlib-ng using hook macros, which are explained below. + +DFLTCC takes as arguments a parameter block, an input buffer, an output +buffer and a window. ZALLOC_STATE, ZFREE_STATE, ZCOPY_STATE, +ZALLOC_WINDOW and TRY_FREE_WINDOW macros encapsulate allocation details +for the parameter block (which is allocated alongside zlib-ng state) +and the window (which must be page-aligned). + +While for inflate software and hardware window formats match, this is +not the case for deflate. Therefore, deflateSetDictionary and +deflateGetDictionary need special handling, which is triggered using +the DEFLATE_SET_DICTIONARY_HOOK and DEFLATE_GET_DICTIONARY_HOOK macros. + +deflateResetKeep() and inflateResetKeep() update the DFLTCC parameter +block using DEFLATE_RESET_KEEP_HOOK and INFLATE_RESET_KEEP_HOOK macros. + +DEFLATE_PARAMS_HOOK, INFLATE_PRIME_HOOK and INFLATE_MARK_HOOK macros +make the unsupported deflateParams(), inflatePrime() and inflateMark() +calls fail gracefully. + +The algorithm implemented in hardware has different compression ratio +than the one implemented in software. DEFLATE_BOUND_ADJUST_COMPLEN and +DEFLATE_NEED_CONSERVATIVE_BOUND macros make deflateBound() return the +correct results for the hardware implementation. + +Actual compression and decompression are handled by the new DEFLATE_HOOK +and INFLATE_TYPEDO_HOOK macros. Since inflation with DFLTCC manages the +window on its own, calling updatewindow() is suppressed using the new +INFLATE_NEED_UPDATEWINDOW() macro. + +In addition to compression, DFLTCC computes CRC-32 and Adler-32 +checksums, therefore, whenever it's used, software checksumming is +suppressed using DEFLATE_NEED_CHECKSUM and INFLATE_NEED_CHECKSUM +macros. diff --git a/arch/s390/dfltcc_common.c b/arch/s390/dfltcc_common.c new file mode 100644 index 00000000..9eb51d62 --- /dev/null +++ b/arch/s390/dfltcc_common.c @@ -0,0 +1,88 @@ +/* dfltcc_deflate.c - IBM Z DEFLATE CONVERSION CALL general support. */ + +#include "zbuild.h" +#include "dfltcc_common.h" +#include "dfltcc_detail.h" + +/* + Memory management. + + DFLTCC requires parameter blocks and window to be aligned. zlib allows + users to specify their own allocation functions, so using e.g. + `posix_memalign' is not an option. Thus, we overallocate and take the + aligned portion of the buffer. +*/ +static inline int is_dfltcc_enabled(void) +{ + uint64_t facilities[(DFLTCC_FACILITY / 64) + 1]; + register int r0 __asm__("r0"); + + r0 = sizeof(facilities) / sizeof(facilities[0]); + __asm__ volatile("stfle %[facilities]\n" + : [facilities] "=Q" (facilities) + : [r0] "r" (r0) + : "cc", "memory"); + return is_bit_set((const char *)facilities, DFLTCC_FACILITY); +} + +void ZLIB_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size) +{ + struct dfltcc_state *dfltcc_state = (struct dfltcc_state *)((char *)strm->state + size); + struct dfltcc_qaf_param *param = (struct dfltcc_qaf_param *)&dfltcc_state->param; + + /* Initialize available functions */ + if (is_dfltcc_enabled()) { + dfltcc(DFLTCC_QAF, param, NULL, NULL, NULL, NULL, NULL); + memmove(&dfltcc_state->af, param, sizeof(dfltcc_state->af)); + } else + memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); + + /* Initialize parameter block */ + memset(&dfltcc_state->param, 0, sizeof(dfltcc_state->param)); + dfltcc_state->param.nt = 1; + + /* Initialize tuning parameters */ + dfltcc_state->level_mask = DFLTCC_LEVEL_MASK; + dfltcc_state->block_size = DFLTCC_BLOCK_SIZE; + dfltcc_state->block_threshold = DFLTCC_FIRST_FHT_BLOCK_SIZE; + dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE; + dfltcc_state->param.ribm = DFLTCC_RIBM; +} + +void ZLIB_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size) +{ + Assert((items * size) % 8 == 0, + "The size of zlib state must be a multiple of 8"); + return ZALLOC(strm, items * size + sizeof(struct dfltcc_state), sizeof(unsigned char)); +} + +void ZLIB_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size) +{ + memcpy(dst, src, size + sizeof(struct dfltcc_state)); +} + +static const int PAGE_ALIGN = 0x1000; + +#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) + +void ZLIB_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size) +{ + void *p; + void *w; + + /* To simplify freeing, we store the pointer to the allocated buffer right + * before the window. + */ + p = ZALLOC(strm, sizeof(void *) + items * size + PAGE_ALIGN, sizeof(unsigned char)); + if (p == NULL) + return NULL; + w = ALIGN_UP((char *)p + sizeof(void *), PAGE_ALIGN); + *(void **)((char *)w - sizeof(void *)) = p; + return w; +} + +void ZLIB_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w) +{ + if (w) + ZFREE(strm, *(void **)((unsigned char *)w - sizeof(void *))); +} diff --git a/arch/s390/dfltcc_common.h b/arch/s390/dfltcc_common.h new file mode 100644 index 00000000..e206e043 --- /dev/null +++ b/arch/s390/dfltcc_common.h @@ -0,0 +1,29 @@ +#ifndef DFLTCC_COMMON_H +#define DFLTCC_COMMON_H + +#ifdef ZLIB_COMPAT +#include "zlib.h" +#else +#include "zlib-ng.h" +#endif +#include "zutil.h" + +void ZLIB_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size); +void ZLIB_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size); +void ZLIB_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size); +void ZLIB_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size); +void ZLIB_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w); + +#define ZALLOC_STATE dfltcc_alloc_state + +#define ZFREE_STATE ZFREE + +#define ZCOPY_STATE dfltcc_copy_state + +#define ZALLOC_WINDOW dfltcc_alloc_window + +#define ZFREE_WINDOW dfltcc_free_window + +#define TRY_FREE_WINDOW dfltcc_free_window + +#endif diff --git a/arch/s390/dfltcc_deflate.c b/arch/s390/dfltcc_deflate.c new file mode 100644 index 00000000..adab3e00 --- /dev/null +++ b/arch/s390/dfltcc_deflate.c @@ -0,0 +1,365 @@ +/* dfltcc_deflate.c - IBM Z DEFLATE CONVERSION CALL compression support. */ + +/* + Use the following commands to build zlib-ng with DFLTCC compression support: + + $ ./configure --with-dfltcc-deflate + or + + $ cmake -DWITH_DFLTCC_DEFLATE=1 . + + and then + + $ make +*/ + +#include "zbuild.h" +#include "zutil.h" +#include "deflate.h" +#include "dfltcc_deflate.h" +#include "dfltcc_detail.h" + +static inline int dfltcc_are_params_ok(int level, uInt window_bits, int strategy, uint16_t level_mask) +{ + return (level_mask & ((uint16_t)1 << level)) != 0 && + (window_bits == HB_BITS) && + (strategy == Z_FIXED || strategy == Z_DEFAULT_STRATEGY); +} + + +int ZLIB_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + + /* Unsupported compression settings */ + if (!dfltcc_are_params_ok(state->level, state->w_bits, state->strategy, dfltcc_state->level_mask)) + return 0; + + /* Unsupported hardware */ + if (!is_bit_set(dfltcc_state->af.fns, DFLTCC_GDHT) || + !is_bit_set(dfltcc_state->af.fns, DFLTCC_CMPR) || + !is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0)) + return 0; + + return 1; +} + +static inline void dfltcc_gdht(PREFIX3(streamp) strm) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; + size_t avail_in = strm->avail_in; + + dfltcc(DFLTCC_GDHT, param, NULL, NULL, &strm->next_in, &avail_in, NULL); +} + +static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; + size_t avail_in = strm->avail_in; + size_t avail_out = strm->avail_out; + dfltcc_cc cc; + + cc = dfltcc(DFLTCC_CMPR | HBT_CIRCULAR, + param, &strm->next_out, &avail_out, + &strm->next_in, &avail_in, state->window); + strm->total_in += (strm->avail_in - avail_in); + strm->total_out += (strm->avail_out - avail_out); + strm->avail_in = avail_in; + strm->avail_out = avail_out; + return cc; +} + +static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 *param) +{ + deflate_state *state = (deflate_state *)strm->state; + + send_bits(state, bi_reverse(param->eobs >> (15 - param->eobl), param->eobl), param->eobl); + flush_pending(strm); + if (state->pending != 0) { + /* The remaining data is located in pending_out[0:pending]. If someone + * calls put_byte() - this might happen in deflate() - the byte will be + * placed into pending_buf[pending], which is incorrect. Move the + * remaining data to the beginning of pending_buf so that put_byte() is + * usable again. + */ + memmove(state->pending_buf, state->pending_out, state->pending); + state->pending_out = state->pending_buf; + } +#ifdef ZLIB_DEBUG + state->compressed_len += param->eobl; +#endif +} + +int ZLIB_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + uInt masked_avail_in; + dfltcc_cc cc; + int need_empty_block; + int soft_bcc; + int no_flush; + + if (!dfltcc_can_deflate(strm)) + return 0; + +again: + masked_avail_in = 0; + soft_bcc = 0; + no_flush = flush == Z_NO_FLUSH; + + /* Trailing empty block. Switch to software, except when Continuation Flag + * is set, which means that DFLTCC has buffered some output in the + * parameter block and needs to be called again in order to flush it. + */ + if (flush == Z_FINISH && strm->avail_in == 0 && !param->cf) { + if (param->bcf) { + /* A block is still open, and the hardware does not support closing + * blocks without adding data. Thus, close it manually. + */ + send_eobs(strm, param); + param->bcf = 0; + } + return 0; + } + + if (strm->avail_in == 0 && !param->cf) { + *result = need_more; + return 1; + } + + /* There is an open non-BFINAL block, we are not going to close it just + * yet, we have compressed more than DFLTCC_BLOCK_SIZE bytes and we see + * more than DFLTCC_DHT_MIN_SAMPLE_SIZE bytes. Open a new block with a new + * DHT in order to adapt to a possibly changed input data distribution. + */ + if (param->bcf && no_flush && + strm->total_in > dfltcc_state->block_threshold && + strm->avail_in >= dfltcc_state->dht_threshold) { + if (param->cf) { + /* We need to flush the DFLTCC buffer before writing the + * End-of-block Symbol. Mask the input data and proceed as usual. + */ + masked_avail_in += strm->avail_in; + strm->avail_in = 0; + no_flush = 0; + } else { + /* DFLTCC buffer is empty, so we can manually write the + * End-of-block Symbol right away. + */ + send_eobs(strm, param); + param->bcf = 0; + dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size; + if (strm->avail_out == 0) { + *result = need_more; + return 1; + } + } + } + + /* The caller gave us too much data. Pass only one block worth of + * uncompressed data to DFLTCC and mask the rest, so that on the next + * iteration we start a new block. + */ + if (no_flush && strm->avail_in > dfltcc_state->block_size) { + masked_avail_in += (strm->avail_in - dfltcc_state->block_size); + strm->avail_in = dfltcc_state->block_size; + } + + /* When we have an open non-BFINAL deflate block and caller indicates that + * the stream is ending, we need to close an open deflate block and open a + * BFINAL one. + */ + need_empty_block = flush == Z_FINISH && param->bcf && !param->bhf; + + /* Translate stream to parameter block */ + param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32; + if (!no_flush) + /* We need to close a block. Always do this in software - when there is + * no input data, the hardware will not nohor BCC. */ + soft_bcc = 1; + if (flush == Z_FINISH && !param->bcf) + /* We are about to open a BFINAL block, set Block Header Final bit + * until the stream ends. + */ + param->bhf = 1; + /* DFLTCC-CMPR will write to next_out, so make sure that buffers with + * higher precedence are empty. + */ + Assert(state->pending == 0, "There must be no pending bytes"); + Assert(state->bi_valid < 8, "There must be less than 8 pending bits"); + param->sbb = (unsigned int)state->bi_valid; + if (param->sbb > 0) + *strm->next_out = (unsigned char)state->bi_buf; + if (param->hl) + param->nt = 0; /* Honor history */ + param->cv = state->wrap == 2 ? ZSWAP32(strm->adler) : strm->adler; + + /* When opening a block, choose a Huffman-Table Type */ + if (!param->bcf) { + if (state->strategy == Z_FIXED || (strm->total_in == 0 && dfltcc_state->block_threshold > 0)) + param->htt = HTT_FIXED; + else { + param->htt = HTT_DYNAMIC; + dfltcc_gdht(strm); + } + } + + /* Deflate */ + do { + cc = dfltcc_cmpr(strm); + if (strm->avail_in < 4096 && masked_avail_in > 0) + /* We are about to call DFLTCC with a small input buffer, which is + * inefficient. Since there is masked data, there will be at least + * one more DFLTCC call, so skip the current one and make the next + * one handle more data. + */ + break; + } while (cc == DFLTCC_CC_AGAIN); + + /* Translate parameter block to stream */ + strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); + state->bi_valid = param->sbb; + if (state->bi_valid == 0) + state->bi_buf = 0; /* Avoid accessing next_out */ + else + state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1); + strm->adler = state->wrap == 2 ? ZSWAP32(param->cv) : param->cv; + + /* Unmask the input data */ + strm->avail_in += masked_avail_in; + masked_avail_in = 0; + + /* If we encounter an error, it means there is a bug in DFLTCC call */ + Assert(cc != DFLTCC_CC_OP2_CORRUPT || param->oesc == 0, "BUG"); + + /* Update Block-Continuation Flag. It will be used to check whether to call + * GDHT the next time. + */ + if (cc == DFLTCC_CC_OK) { + if (soft_bcc) { + send_eobs(strm, param); + param->bcf = 0; + dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size; + } else + param->bcf = 1; + if (flush == Z_FINISH) { + if (need_empty_block) + /* Make the current deflate() call also close the stream */ + return 0; + else { + bi_windup(state); + *result = finish_done; + } + } else { + if (flush == Z_FULL_FLUSH) + param->hl = 0; /* Clear history */ + *result = flush == Z_NO_FLUSH ? need_more : block_done; + } + } else { + param->bcf = 1; + *result = need_more; + } + if (strm->avail_in != 0 && strm->avail_out != 0) + goto again; /* deflate() must use all input or all output */ + return 1; +} + +/* + Switching between hardware and software compression. + + DFLTCC does not support all zlib settings, e.g. generation of non-compressed + blocks or alternative window sizes. When such settings are applied on the + fly with deflateParams, we need to convert between hardware and software + window formats. +*/ +int ZLIB_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + int could_deflate = dfltcc_can_deflate(strm); + int can_deflate = dfltcc_are_params_ok(level, state->w_bits, strategy, dfltcc_state->level_mask); + + if (can_deflate == could_deflate) + /* We continue to work in the same mode - no changes needed */ + return Z_OK; + + if (strm->total_in == 0 && param->nt == 1 && param->hl == 0) + /* DFLTCC was not used yet - no changes needed */ + return Z_OK; + + /* Switching between hardware and software is not implemented */ + return Z_STREAM_ERROR; +} + +/* + Preloading history. +*/ +static void append_history(struct dfltcc_param_v0 *param, unsigned char *history, const unsigned char *buf, uInt count) +{ + size_t offset; + size_t n; + + /* Do not use more than 32K */ + if (count > HB_SIZE) { + buf += count - HB_SIZE; + count = HB_SIZE; + } + offset = (param->ho + param->hl) % HB_SIZE; + if (offset + count <= HB_SIZE) + /* Circular history buffer does not wrap - copy one chunk */ + memcpy(history + offset, buf, count); + else { + /* Circular history buffer wraps - copy two chunks */ + n = HB_SIZE - offset; + memcpy(history + offset, buf, n); + memcpy(history, buf + n, count - n); + } + n = param->hl + count; + if (n <= HB_SIZE) + /* All history fits into buffer - no need to discard anything */ + param->hl = n; + else { + /* History does not fit into buffer - discard extra bytes */ + param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE; + param->hl = HB_SIZE; + } +} + +int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm, + const unsigned char *dictionary, uInt dict_length) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + + append_history(param, state->window, dictionary, dict_length); + state->strstart = 1; /* Add FDICT to zlib header */ + return Z_OK; +} + +int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + + if (dictionary) { + if (param->ho + param->hl <= HB_SIZE) + /* Circular history buffer does not wrap - copy one chunk */ + memcpy(dictionary, state->window + param->ho, param->hl); + else { + /* Circular history buffer wraps - copy two chunks */ + memcpy(dictionary, state->window + param->ho, HB_SIZE - param->ho); + memcpy(dictionary + HB_SIZE - param->ho, state->window, param->ho + param->hl - HB_SIZE); + } + } + if (dict_length) + *dict_length = param->hl; + return Z_OK; +} diff --git a/arch/s390/dfltcc_deflate.h b/arch/s390/dfltcc_deflate.h new file mode 100644 index 00000000..93945f0a --- /dev/null +++ b/arch/s390/dfltcc_deflate.h @@ -0,0 +1,50 @@ +#ifndef DFLTCC_DEFLATE_H +#define DFLTCC_DEFLATE_H + +#include "dfltcc_common.h" + +int ZLIB_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm); +int ZLIB_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result); +int ZLIB_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy); +int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm, + const unsigned char *dictionary, uInt dict_length); +int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length); + +#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ + if (dfltcc_can_deflate((strm))) \ + return dfltcc_deflate_set_dictionary((strm), (dict), (dict_len)); \ + } while (0) + +#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ + if (dfltcc_can_deflate((strm))) \ + return dfltcc_deflate_get_dictionary((strm), (dict), (dict_len)); \ + } while (0) + +#define DEFLATE_RESET_KEEP_HOOK(strm) \ + dfltcc_reset((strm), sizeof(deflate_state)) + +#define DEFLATE_PARAMS_HOOK(strm, level, strategy) \ + do { \ + int err; \ +\ + err = dfltcc_deflate_params((strm), (level), (strategy)); \ + if (err == Z_STREAM_ERROR) \ + return err; \ + } while (0) + +#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \ + do { \ + if (dfltcc_can_deflate((strm))) \ + (complen) = (3 + 5 + 5 + 4 + 19 * 3 + (286 + 30) * 7 + \ + (source_len) * 16 + 15 + 7) >> 3; \ + } while (0) + +#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (dfltcc_can_deflate((strm))) + +#define DEFLATE_HOOK dfltcc_deflate + +#define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm))) + +#endif diff --git a/arch/s390/dfltcc_detail.h b/arch/s390/dfltcc_detail.h new file mode 100644 index 00000000..156b2a28 --- /dev/null +++ b/arch/s390/dfltcc_detail.h @@ -0,0 +1,201 @@ +#include +#include +#include + +#ifdef HAVE_SYS_SDT_H +#include +#endif + +/* + Tuning parameters. + */ +#ifndef DFLTCC_LEVEL_MASK +#define DFLTCC_LEVEL_MASK 0x2 +#endif +#ifndef DFLTCC_BLOCK_SIZE +#define DFLTCC_BLOCK_SIZE 1048576 +#endif +#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE +#define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096 +#endif +#ifndef DFLTCC_DHT_MIN_SAMPLE_SIZE +#define DFLTCC_DHT_MIN_SAMPLE_SIZE 4096 +#endif +#ifndef DFLTCC_RIBM +#define DFLTCC_RIBM 0 +#endif + +/* + C wrapper for the DEFLATE CONVERSION CALL instruction. + */ +typedef enum { + DFLTCC_CC_OK = 0, + DFLTCC_CC_OP1_TOO_SHORT = 1, + DFLTCC_CC_OP2_TOO_SHORT = 2, + DFLTCC_CC_OP2_CORRUPT = 2, + DFLTCC_CC_AGAIN = 3, +} dfltcc_cc; + +#define DFLTCC_QAF 0 +#define DFLTCC_GDHT 1 +#define DFLTCC_CMPR 2 +#define DFLTCC_XPND 4 +#define HBT_CIRCULAR (1 << 7) +#define HB_BITS 15 +#define HB_SIZE (1 << HB_BITS) +#define DFLTCC_FACILITY 151 + +static inline dfltcc_cc dfltcc(int fn, void *param, + unsigned char **op1, size_t *len1, const unsigned char **op2, size_t *len2, void *hist) +{ + unsigned char *t2 = op1 ? *op1 : NULL; + size_t t3 = len1 ? *len1 : 0; + const unsigned char *t4 = op2 ? *op2 : NULL; + size_t t5 = len2 ? *len2 : 0; + register int r0 __asm__("r0") = fn; + register void *r1 __asm__("r1") = param; + register unsigned char *r2 __asm__("r2") = t2; + register size_t r3 __asm__("r3") = t3; + register const unsigned char *r4 __asm__("r4") = t4; + register size_t r5 __asm__("r5") = t5; + int cc; + + __asm__ volatile( +#ifdef HAVE_SYS_SDT_H + STAP_PROBE_ASM(zlib, dfltcc_entry, STAP_PROBE_ASM_TEMPLATE(5)) +#endif + ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n" +#ifdef HAVE_SYS_SDT_H + STAP_PROBE_ASM(zlib, dfltcc_exit, STAP_PROBE_ASM_TEMPLATE(5)) +#endif + "ipm %[cc]\n" + : [r2] "+r" (r2) + , [r3] "+r" (r3) + , [r4] "+r" (r4) + , [r5] "+r" (r5) + , [cc] "=r" (cc) + : [r0] "r" (r0) + , [r1] "r" (r1) + , [hist] "r" (hist) +#ifdef HAVE_SYS_SDT_H + , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist) +#endif + : "cc", "memory"); + t2 = r2; t3 = r3; t4 = r4; t5 = r5; + + if (op1) + *op1 = t2; + if (len1) + *len1 = t3; + if (op2) + *op2 = t4; + if (len2) + *len2 = t5; + return (cc >> 28) & 3; +} + +/* + Parameter Block for Query Available Functions. + */ +#define static_assert(c, msg) __attribute__((unused)) static char static_assert_failed_ ## msg[c ? 1 : -1] + +struct dfltcc_qaf_param { + char fns[16]; + char reserved1[8]; + char fmts[2]; + char reserved2[6]; +}; + +static_assert(sizeof(struct dfltcc_qaf_param) == 32, sizeof_struct_dfltcc_qaf_param_is_32); + +static inline int is_bit_set(const char *bits, int n) +{ + return bits[n / 8] & (1 << (7 - (n % 8))); +} + +static inline void clear_bit(char *bits, int n) +{ + bits[n / 8] &= ~(1 << (7 - (n % 8))); +} + +#define DFLTCC_FMT0 0 + +/* + Parameter Block for Generate Dynamic-Huffman Table, Compress and Expand. + */ +#define CVT_CRC32 0 +#define CVT_ADLER32 1 +#define HTT_FIXED 0 +#define HTT_DYNAMIC 1 + +struct dfltcc_param_v0 { + uint16_t pbvn; /* Parameter-Block-Version Number */ + uint8_t mvn; /* Model-Version Number */ + uint8_t ribm; /* Reserved for IBM use */ + uint32_t reserved32 : 31; + uint32_t cf : 1; /* Continuation Flag */ + uint8_t reserved64[8]; + uint32_t nt : 1; /* New Task */ + uint32_t reserved129 : 1; + uint32_t cvt : 1; /* Check Value Type */ + uint32_t reserved131 : 1; + uint32_t htt : 1; /* Huffman-Table Type */ + uint32_t bcf : 1; /* Block-Continuation Flag */ + uint32_t bcc : 1; /* Block Closing Control */ + uint32_t bhf : 1; /* Block Header Final */ + uint32_t reserved136 : 1; + uint32_t reserved137 : 1; + uint32_t dhtgc : 1; /* DHT Generation Control */ + uint32_t reserved139 : 5; + uint32_t reserved144 : 5; + uint32_t sbb : 3; /* Sub-Byte Boundary */ + uint8_t oesc; /* Operation-Ending-Supplemental Code */ + uint32_t reserved160 : 12; + uint32_t ifs : 4; /* Incomplete-Function Status */ + uint16_t ifl; /* Incomplete-Function Length */ + uint8_t reserved192[8]; + uint8_t reserved256[8]; + uint8_t reserved320[4]; + uint16_t hl; /* History Length */ + uint32_t reserved368 : 1; + uint16_t ho : 15; /* History Offset */ + uint32_t cv; /* Check Value */ + uint32_t eobs : 15; /* End-of-block Symbol */ + uint32_t reserved431: 1; + uint8_t eobl : 4; /* End-of-block Length */ + uint32_t reserved436 : 12; + uint32_t reserved448 : 4; + uint16_t cdhtl : 12; /* Compressed-Dynamic-Huffman Table + Length */ + uint8_t reserved464[6]; + uint8_t cdht[288]; + uint8_t reserved[32]; + uint8_t csb[1152]; +}; + +static_assert(sizeof(struct dfltcc_param_v0) == 1536, sizeof_struct_dfltcc_param_v0_is_1536); + +static inline const char *oesc_msg(char *buf, int oesc) +{ + if (oesc == 0x00) + return NULL; /* Successful completion */ + else { + sprintf(buf, "Operation-Ending-Supplemental Code is 0x%.2X", oesc); + return buf; + } +} + +/* + Extension of inflate_state and deflate_state. Must be doubleword-aligned. +*/ +struct dfltcc_state { + struct dfltcc_param_v0 param; /* Parameter block. */ + struct dfltcc_qaf_param af; /* Available functions. */ + uint16_t level_mask; /* Levels on which to use DFLTCC */ + uint32_t block_size; /* New block each X bytes */ + size_t block_threshold; /* New block after total_in > X */ + uint32_t dht_threshold; /* New block only if avail_in >= X */ + char msg[64]; /* Buffer for strm->msg */ +}; + +#define GET_DFLTCC_STATE(state) ((struct dfltcc_state *)((state) + 1)) diff --git a/arch/s390/dfltcc_inflate.c b/arch/s390/dfltcc_inflate.c new file mode 100644 index 00000000..cec25f41 --- /dev/null +++ b/arch/s390/dfltcc_inflate.c @@ -0,0 +1,142 @@ +/* dfltcc_inflate.c - IBM Z DEFLATE CONVERSION CALL decompression support. */ + +/* + Use the following commands to build zlib-ng with DFLTCC decompression support: + + $ ./configure --with-dfltcc-inflate + or + + $ cmake -DWITH_DFLTCC_INFLATE=1 . + + and then + + $ make +*/ + +#include "zbuild.h" +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "dfltcc_inflate.h" +#include "dfltcc_detail.h" + +int ZLIB_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm) +{ + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + + /* Unsupported compression settings */ + if (state->wbits != HB_BITS) + return 0; + + /* Unsupported hardware */ + return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0); +} + +static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm) +{ + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; + size_t avail_in = strm->avail_in; + size_t avail_out = strm->avail_out; + dfltcc_cc cc; + + cc = dfltcc(DFLTCC_XPND | HBT_CIRCULAR, + param, &strm->next_out, &avail_out, + &strm->next_in, &avail_in, state->window); + strm->avail_in = avail_in; + strm->avail_out = avail_out; + return cc; +} + +dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret) +{ + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + dfltcc_cc cc; + + if (flush == Z_BLOCK || flush == Z_TREES) { + /* DFLTCC does not support stopping on block boundaries */ + if (dfltcc_inflate_disable(strm)) { + *ret = Z_STREAM_ERROR; + return DFLTCC_INFLATE_BREAK; + } else + return DFLTCC_INFLATE_SOFTWARE; + } + + if (state->last) { + if (state->bits != 0) { + strm->next_in++; + strm->avail_in--; + state->bits = 0; + } + state->mode = CHECK; + return DFLTCC_INFLATE_CONTINUE; + } + + if (strm->avail_in == 0 && !param->cf) + return DFLTCC_INFLATE_BREAK; + + if (inflate_ensure_window(state)) { + state->mode = MEM; + return DFLTCC_INFLATE_CONTINUE; + } + + /* Translate stream to parameter block */ + param->cvt = state->flags ? CVT_CRC32 : CVT_ADLER32; + param->sbb = state->bits; + param->hl = state->whave; /* Software and hardware history formats match */ + param->ho = (state->wnext - state->whave) & ((1 << HB_BITS) - 1); + if (param->hl) + param->nt = 0; /* Honor history for the first block */ + param->cv = state->flags ? ZSWAP32(state->check) : state->check; + + /* Inflate */ + do { + cc = dfltcc_xpnd(strm); + } while (cc == DFLTCC_CC_AGAIN); + + /* Translate parameter block to stream */ + strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); + state->last = cc == DFLTCC_CC_OK; + state->bits = param->sbb; + state->whave = param->hl; + state->wnext = (param->ho + param->hl) & ((1 << HB_BITS) - 1); + state->check = state->flags ? ZSWAP32(param->cv) : param->cv; + if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { + /* Report an error if stream is corrupted */ + state->mode = BAD; + return DFLTCC_INFLATE_CONTINUE; + } + state->mode = TYPEDO; + /* Break if operands are exhausted, otherwise continue looping */ + return (cc == DFLTCC_CC_OP1_TOO_SHORT || cc == DFLTCC_CC_OP2_TOO_SHORT) ? + DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE; +} + +int ZLIB_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm) +{ + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; + + return !param->nt; +} + +int ZLIB_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm) +{ + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + + if (!dfltcc_can_inflate(strm)) + return 0; + if (dfltcc_was_inflate_used(strm)) + /* DFLTCC has already decompressed some data. Since there is not + * enough information to resume decompression in software, the call + * must fail. + */ + return 1; + /* DFLTCC was not used yet - decompress in software */ + memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); + return 0; +} diff --git a/arch/s390/dfltcc_inflate.h b/arch/s390/dfltcc_inflate.h new file mode 100644 index 00000000..aea2bbb7 --- /dev/null +++ b/arch/s390/dfltcc_inflate.h @@ -0,0 +1,44 @@ +#ifndef DFLTCC_INFLATE_H +#define DFLTCC_INFLATE_H + +#include "dfltcc_common.h" + +int ZLIB_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm); +typedef enum { + DFLTCC_INFLATE_CONTINUE, + DFLTCC_INFLATE_BREAK, + DFLTCC_INFLATE_SOFTWARE, +} dfltcc_inflate_action; +dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret); +int ZLIB_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm); +int ZLIB_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm); + +#define INFLATE_RESET_KEEP_HOOK(strm) \ + dfltcc_reset((strm), sizeof(struct inflate_state)) + +#define INFLATE_PRIME_HOOK(strm, bits, value) \ + do { if (dfltcc_inflate_disable((strm))) return Z_STREAM_ERROR; } while (0) + +#define INFLATE_TYPEDO_HOOK(strm, flush) \ + if (dfltcc_can_inflate((strm))) { \ + dfltcc_inflate_action action; \ +\ + RESTORE(); \ + action = dfltcc_inflate((strm), (flush), &ret); \ + LOAD(); \ + if (action == DFLTCC_INFLATE_CONTINUE) \ + break; \ + else if (action == DFLTCC_INFLATE_BREAK) \ + goto inf_leave; \ + } + +#define INFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_inflate((strm))) + +#define INFLATE_NEED_UPDATEWINDOW(strm) (!dfltcc_can_inflate((strm))) + +#define INFLATE_MARK_HOOK(strm) \ + do { \ + if (dfltcc_was_inflate_used((strm))) return -(1L << 16); \ + } while (0) + +#endif diff --git a/configure b/configure index f57f8327..6f592c34 100755 --- a/configure +++ b/configure @@ -92,6 +92,8 @@ build32=0 build64=0 buildacle=1 buildneon=1 +builddfltccdeflate=0 +builddfltccinflate=0 with_sanitizers=0 with_msan=0 with_fuzzers=0 @@ -146,6 +148,8 @@ case "$1" in echo ' [--without-new-strategies] Compiles without using new additional deflate strategies' | tee -a configure.log echo ' [--without-acle] Compiles without ARM C Language Extensions' | tee -a configure.log echo ' [--without-neon] Compiles without ARM Neon SIMD instruction set' | tee -a configure.log + echo ' [--with-dfltcc-deflate] Use DEFLATE COMPRESSION CALL instruction for compression on IBM Z' | tee -a configure.log + echo ' [--with-dfltcc-inflate] Use DEFLATE COMPRESSION CALL instruction for decompression on IBM Z' | tee -a configure.log echo ' [--force-sse2] Assume SSE2 instructions are always available (disabled by default on x86, enabled on x86_64)' | tee -a configure.log echo ' [--with-sanitizers] Build with address sanitizer and all supported sanitizers other than memory sanitizer (disabled by default)' | tee -a configure.log echo ' [--with-msan] Build with memory sanitizer (disabled by default)' | tee -a configure.log @@ -170,6 +174,8 @@ case "$1" in -6* | --64) build64=1; shift ;; --without-acle) buildacle=0; shift ;; --without-neon) buildneon=0; shift ;; + --with-dfltcc-deflate) builddfltccdeflate=1; shift ;; + --with-dfltcc-inflate) builddfltccinflate=1; shift ;; --force-sse2) forcesse2=1; shift ;; -n | --native) native=1; shift ;; -a*=* | --archs=*) ARCHS=`echo $1 | sed 's/.*=//'`; shift ;; @@ -182,6 +188,7 @@ case "$1" in --with-sanitizers) with_sanitizers=1; shift ;; --with-msan) with_msan=1; shift ;; --with-fuzzers) with_fuzzers=1; shift ;; + *) echo "unknown option: $1" | tee -a configure.log echo "$0 --help for help" | tee -a configure.log @@ -950,6 +957,19 @@ EOF ;; esac +# Check whether sys/sdt.h is available +cat > $test.c << EOF +#include +int main() { return 0; } +EOF +if try ${CC} ${CFLAGS} $test.c; then + echo "Checking for sys/sdt.h ... Yes." | tee -a configure.log + CFLAGS="$CFLAGS -DHAVE_SYS_SDT_H" + SFLAGS="$SFLAGS -DHAVE_SYS_SDT_H" +else + echo "Checking for sys/sdt.h ... No." | tee -a configure.log +fi + ARCHDIR='arch/generic' ARCH_STATIC_OBJS='' ARCH_SHARED_OBJS='' @@ -1193,6 +1213,28 @@ case "${ARCH}" in powerpc64) [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64 ;; + s390x) + [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=s390x + ARCHDIR=arch/s390 + if test $builddfltccdeflate -eq 1 -o $builddfltccinflate -eq 1; then + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} dfltcc_common.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} dfltcc_common.lo" + fi + if test $builddfltccdeflate -eq 1; then + CFLAGS="${CFLAGS} -DS390_DFLTCC_DEFLATE" + SFLAGS="${SFLAGS} -DS390_DFLTCC_DEFLATE" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} dfltcc_deflate.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} dfltcc_deflate.lo" + ARCH="${ARCH}+dfltcc-deflate" + fi + if test $builddfltccinflate -eq 1; then + CFLAGS="${CFLAGS} -DS390_DFLTCC_INFLATE" + SFLAGS="${SFLAGS} -DS390_DFLTCC_INFLATE" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} dfltcc_inflate.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} dfltcc_inflate.lo" + ARCH="${ARCH}+dfltcc-inflate" + fi + ;; *) [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=$ARCH ;; diff --git a/deflate.c b/deflate.c index a77db08f..dd4edff0 100644 --- a/deflate.c +++ b/deflate.c @@ -66,31 +66,35 @@ const char deflate_copyright[] = " deflate 1.2.11.f Copyright 1995-2016 Jean-lou /* =========================================================================== * Architecture-specific hooks. */ +#ifdef S390_DFLTCC_DEFLATE +# include "arch/s390/dfltcc_deflate.h" +#else /* Memory management for the deflate state. Useful for allocating arch-specific extension blocks. */ -#define ZALLOC_STATE(strm, items, size) ZALLOC(strm, items, size) -#define ZFREE_STATE(strm, addr) ZFREE(strm, addr) -#define ZCOPY_STATE(dst, src, size) memcpy(dst, src, size) +# define ZALLOC_STATE(strm, items, size) ZALLOC(strm, items, size) +# define ZFREE_STATE(strm, addr) ZFREE(strm, addr) +# define ZCOPY_STATE(dst, src, size) memcpy(dst, src, size) /* Memory management for the window. Useful for allocation the aligned window. */ -#define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size) -#define TRY_FREE_WINDOW(strm, addr) TRY_FREE(strm, addr) +# define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size) +# define TRY_FREE_WINDOW(strm, addr) TRY_FREE(strm, addr) /* Invoked at the beginning of deflateSetDictionary(). Useful for checking arch-specific window data. */ -#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) +# define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) /* Invoked at the beginning of deflateGetDictionary(). Useful for adjusting arch-specific window data. */ -#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) +# define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) /* Invoked at the end of deflateResetKeep(). Useful for initializing arch-specific extension blocks. */ -#define DEFLATE_RESET_KEEP_HOOK(strm) do {} while (0) +# define DEFLATE_RESET_KEEP_HOOK(strm) do {} while (0) /* Invoked at the beginning of deflateParams(). Useful for updating arch-specific compression parameters. */ -#define DEFLATE_PARAMS_HOOK(strm, level, strategy) do {} while (0) +# define DEFLATE_PARAMS_HOOK(strm, level, strategy) do {} while (0) /* Adjusts the upper bound on compressed data length based on compression parameters and uncompressed data length. * Useful when arch-specific deflation code behaves differently than regular zlib-ng algorithms. */ -#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, sourceLen) do {} while (0) +# define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, sourceLen) do {} while (0) /* Returns whether an optimistic upper bound on compressed data length should *not* be used. * Useful when arch-specific deflation code behaves differently than regular zlib-ng algorithms. */ -#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) 0 +# define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) 0 /* Invoked for each deflate() call. Useful for plugging arch-specific deflation code. */ -#define DEFLATE_HOOK(strm, flush, bstate) 0 +# define DEFLATE_HOOK(strm, flush, bstate) 0 /* Returns whether zlib-ng should compute a checksum. Set to 0 if arch-specific deflation code already does that. */ -#define DEFLATE_NEED_CHECKSUM(strm) 1 +# define DEFLATE_NEED_CHECKSUM(strm) 1 +#endif /* =========================================================================== * Function prototypes. diff --git a/gzguts.h b/gzguts.h index 5c1d7d8e..c2ff7bb9 100644 --- a/gzguts.h +++ b/gzguts.h @@ -98,7 +98,11 @@ /* default i/o buffer size -- double this for output when reading (this and twice this must be able to fit in an unsigned type) */ +#if defined(S390_DFLTCC_DEFLATE) || defined(S390_DFLTCC_INFLATE) +#define GZBUFSIZE 262144 /* DFLTCC works faster with larger buffers */ +#else #define GZBUFSIZE 8192 +#endif /* gzip modes, also provide a little integrity check on the passed structure */ #define GZ_NONE 0 diff --git a/inflate.c b/inflate.c index 4776b20a..8a8d328f 100644 --- a/inflate.c +++ b/inflate.c @@ -89,25 +89,29 @@ #include "functable.h" /* Architecture-specific hooks. */ +#ifdef S390_DFLTCC_INFLATE +# include "arch/s390/dfltcc_inflate.h" +#else /* Memory management for the inflate state. Useful for allocating arch-specific extension blocks. */ -#define ZALLOC_STATE(strm, items, size) ZALLOC(strm, items, size) -#define ZFREE_STATE(strm, addr) ZFREE(strm, addr) -#define ZCOPY_STATE(dst, src, size) memcpy(dst, src, size) +# define ZALLOC_STATE(strm, items, size) ZALLOC(strm, items, size) +# define ZFREE_STATE(strm, addr) ZFREE(strm, addr) +# define ZCOPY_STATE(dst, src, size) memcpy(dst, src, size) /* Memory management for the window. Useful for allocation the aligned window. */ -#define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size) -#define ZFREE_WINDOW(strm, addr) ZFREE(strm, addr) +# define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size) +# define ZFREE_WINDOW(strm, addr) ZFREE(strm, addr) /* Invoked at the end of inflateResetKeep(). Useful for initializing arch-specific extension blocks. */ -#define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0) +# define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0) /* Invoked at the beginning of inflatePrime(). Useful for updating arch-specific buffers. */ -#define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0) +# define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0) /* Invoked at the beginning of each block. Useful for plugging arch-specific inflation code. */ -#define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0) +# define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0) /* Returns whether zlib-ng should compute a checksum. Set to 0 if arch-specific inflation code already does that. */ -#define INFLATE_NEED_CHECKSUM(strm) 1 +# define INFLATE_NEED_CHECKSUM(strm) 1 /* Returns whether zlib-ng should update a window. Set to 0 if arch-specific inflation code already does that. */ -#define INFLATE_NEED_UPDATEWINDOW(strm) 1 +# define INFLATE_NEED_UPDATEWINDOW(strm) 1 /* Invoked at the beginning of inflateMark(). Useful for updating arch-specific pointers and offsets. */ -#define INFLATE_MARK_HOOK(strm) do {} while (0) +# define INFLATE_MARK_HOOK(strm) do {} while (0) +#endif #ifdef MAKEFIXED # ifndef BUILDFIXED diff --git a/test/minigzip.c b/test/minigzip.c index e8ae9ef8..67892d40 100644 --- a/test/minigzip.c +++ b/test/minigzip.c @@ -61,7 +61,11 @@ #endif #define SUFFIX_LEN (sizeof(GZ_SUFFIX)-1) +#if defined(S390_DFLTCC_DEFLATE) || defined(S390_DFLTCC_INFLATE) +#define BUFLEN 262144 /* DFLTCC works faster with larger buffers */ +#else #define BUFLEN 16384 /* read buffer size */ +#endif #define BUFLENW (BUFLEN * 3) /* write buffer size */ #define MAX_NAME_LEN 1024