From: Nathan Moinvaziri Date: Fri, 26 Jun 2020 18:00:15 +0000 (-0700) Subject: Rename from memchunk to chunkset. X-Git-Tag: 1.9.9-b1~171 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2026830ceab48a364d0bc8d5d36f31fa745f1368;p=thirdparty%2Fzlib-ng.git Rename from memchunk to chunkset. --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b0d97743..5385565a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -675,8 +675,8 @@ if(WITH_OPTIM) list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm.h) list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/armfeature.c) if(WITH_NEON) - add_definitions(-DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH) - set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/memchunk_neon.c ${ARCHDIR}/slide_neon.c) + add_definitions(-DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH) + set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c ${ARCHDIR}/slide_neon.c) list(APPEND ZLIB_ARCH_SRCS ${NEON_SRCS}) set_intrinsics_option("${NEONFLAG}" ${NEON_SRCS}) if(MSVC) @@ -753,8 +753,8 @@ if(WITH_OPTIM) set_intrinsics_option("${SSE4FLAG}" ${SSE42_SRCS}) endif() if(WITH_SSE2 AND HAVE_SSE2_INTRIN) - add_definitions(-DX86_SSE2 -DX86_SSE2_MEMCHUNK) - set(SSE2_SRCS ${ARCHDIR}/memchunk_sse.c ${ARCHDIR}/slide_sse.c) + add_definitions(-DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE2_SLIDEHASH) + set(SSE2_SRCS ${ARCHDIR}/chunkset_sse.c ${ARCHDIR}/slide_sse.c) list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS}) if(NOT ${ARCH} MATCHES "x86_64") set_intrinsics_option("${SSE2FLAG}" ${SSE2_SRCS}) @@ -851,6 +851,7 @@ set(ZLIB_PUBLIC_HDRS ) set(ZLIB_PRIVATE_HDRS adler32_p.h + chunkset_tpl.h crc32.h crc32_p.h deflate.h @@ -864,7 +865,6 @@ set(ZLIB_PRIVATE_HDRS inftrees.h insert_string_tpl.h match_tpl.h - memchunk_tpl.h trees.h trees_emit.h trees_p.h @@ -874,6 +874,7 @@ set(ZLIB_PRIVATE_HDRS ) set(ZLIB_SRCS adler32.c + chunkset.c compare258.c compress.c crc32.c @@ -888,7 +889,6 @@ set(ZLIB_SRCS inftrees.c inffast.c insert_string.c - memchunk.c trees.c uncompr.c zutil.c diff --git a/INDEX.md b/INDEX.md index c5816024f..e5dc0eb18 100644 --- a/INDEX.md +++ b/INDEX.md @@ -12,6 +12,7 @@ Contents | CMakeLists.txt | Cmake build script | | configure | Bash configure/build script | | adler32.c | Compute the Adler-32 checksum of a data stream | +| chunkset.* | Inline functions to copy small data chunks | | compress.c | Compress a memory buffer | | deflate.* | Compress data using the deflate algorithm | | deflate_fast.c | Compress data using the deflate algorithm with fast strategy | @@ -28,7 +29,6 @@ Contents | inffast.* | Decompress data with speed optimizations | | inffixed.h | Table for decoding fixed codes | | inftrees.h | Generate Huffman trees for efficient decoding | -| memchunk.* | Inline functions to copy small data chunks | | trees.* | Output deflated data using Huffman coding | | uncompr.c | Decompress a memory buffer | | zconf.h.cmakein | zconf.h template for cmake | diff --git a/Makefile.in b/Makefile.in index 9433bb400..21d4e9b58 100644 --- a/Makefile.in +++ b/Makefile.in @@ -73,6 +73,7 @@ pkgconfigdir = ${libdir}/pkgconfig OBJZ = \ adler32.o \ + chunkset.o \ compare258.o \ compress.o \ crc32.o \ @@ -87,7 +88,6 @@ OBJZ = \ inflate.o \ inftrees.o \ insert_string.o \ - memchunk.o \ trees.o \ uncompr.o \ zutil.o \ @@ -103,6 +103,7 @@ OBJC = $(OBJZ) $(OBJG) PIC_OBJZ = \ adler32.lo \ + chunkset.lo \ compare258.lo \ compress.lo \ crc32.lo \ @@ -117,7 +118,6 @@ PIC_OBJZ = \ inflate.lo \ inftrees.lo \ insert_string.lo \ - memchunk.lo \ trees.lo \ uncompr.lo \ zutil.lo \ diff --git a/arch/arm/Makefile.in b/arch/arm/Makefile.in index db4bab3ff..05e14d671 100644 --- a/arch/arm/Makefile.in +++ b/arch/arm/Makefile.in @@ -15,8 +15,8 @@ TOPDIR=$(SRCTOP) all: \ adler32_neon.o adler32_neon.lo \ armfeature.o armfeature.lo \ + chunkset_neon.o chunkset_neon.lo \ crc32_acle.o crc32_acle.lo \ - memchunk_neon.o memchunk_neon.lo \ slide_neon.o slide_neon.lo \ insert_string_acle.o insert_string_acle.lo @@ -32,6 +32,12 @@ armfeature.o: armfeature.lo: $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c +chunkset_neon.o: + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_neon.c + +chunkset_neon.lo: + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_neon.c + crc32_acle.o: $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c @@ -50,12 +56,6 @@ insert_string_acle.o: insert_string_acle.lo: $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c -memchunk_neon.o: - $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/memchunk_neon.c - -memchunk_neon.lo: - $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/memchunk_neon.c - mostlyclean: clean clean: rm -f *.o *.lo *~ diff --git a/arch/arm/memchunk_neon.c b/arch/arm/chunkset_neon.c similarity index 77% rename from arch/arm/memchunk_neon.c rename to arch/arm/chunkset_neon.c index cc165afef..2721596c6 100644 --- a/arch/arm/memchunk_neon.c +++ b/arch/arm/chunkset_neon.c @@ -1,8 +1,8 @@ -/* memchunk_neon.c -- NEON inline functions to copy small data chunks. +/* chunkset_neon.c -- NEON inline functions to copy small data chunks. * For conditions of distribution and use, see copyright notice in zlib.h */ -#ifdef ARM_NEON_MEMCHUNK +#ifdef ARM_NEON_CHUNKSET #ifdef _M_ARM64 # include #else @@ -11,7 +11,7 @@ #include "../../zbuild.h" #include "../../zutil.h" -typedef uint8x16_t memchunk_t; +typedef uint8x16_t chunk_t; #define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_2 @@ -19,19 +19,19 @@ typedef uint8x16_t memchunk_t; #define HAVE_CHUNKMEMSET_4 #define HAVE_CHUNKMEMSET_8 -static inline void chunkmemset_1(uint8_t *from, memchunk_t *chunk) { +static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) { *chunk = vld1q_dup_u8(from); } -static inline void chunkmemset_2(uint8_t *from, memchunk_t *chunk) { +static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { *chunk = vreinterpretq_u8_s16(vdupq_n_s16(*(int16_t *)from)); } -static inline void chunkmemset_4(uint8_t *from, memchunk_t *chunk) { +static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { *chunk = vreinterpretq_u8_s32(vdupq_n_s32(*(int32_t *)from)); } -static inline void chunkmemset_8(uint8_t *from, memchunk_t *chunk) { +static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { *chunk = vcombine_u8(vld1_u8(from), vld1_u8(from)); } @@ -113,20 +113,15 @@ static inline uint8_t *chunkmemset_6(uint8_t *out, uint8_t *from, unsigned dist, #endif -static inline void loadchunk(uint8_t const *s, memchunk_t *chunk) { - *chunk = *(memchunk_t *)s; +static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { + *chunk = *(chunk_t *)s; } -static inline void storechunk(uint8_t *out, memchunk_t *chunk) { -#ifdef _MSC_VER - /* Cast to memchunk_t pointer to avoid compiler error on MSVC ARM */ - memchunk_t *target = (memchunk_t *)chunk; - memcpy(target, &chunk, sizeof(chunk)); -#else - memcpy(out, chunk, sizeof(memchunk_t)); -#endif +static inline void storechunk(uint8_t *out, chunk_t *chunk) { + /* Cast to chunk_t pointer to avoid compiler error on MSVC ARM */ + memcpy((chunk_t *)out, chunk, sizeof(chunk_t)); } -#include "memchunk_tpl.h" +#include "chunkset_tpl.h" #endif diff --git a/arch/x86/Makefile.in b/arch/x86/Makefile.in index f14f2520d..1e163778d 100644 --- a/arch/x86/Makefile.in +++ b/arch/x86/Makefile.in @@ -22,11 +22,11 @@ all: \ x86.o x86.lo \ adler32_avx.o adler32.lo \ adler32_ssse3.o adler32_ssse3.lo \ + chunkset_sse.o chunkset_sse.lo \ compare258_avx.o compare258_avx.lo \ compare258_sse.o compare258_sse.lo \ insert_string_sse.o insert_string_sse.lo \ crc_folding.o crc_folding.lo \ - memchunk_sse.o memchunk_sse.lo \ slide_avx.o slide_avx.lo \ slide_sse.o slide_sse.lo @@ -36,6 +36,12 @@ x86.o: x86.lo: $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c +chunkset_sse.o: + $(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse.c + +chunkset_sse.lo: + $(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse.c + compare258_avx.o: $(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_avx.c @@ -60,12 +66,6 @@ crc_folding.o: crc_folding.lo: $(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c -memchunk_sse.o: - $(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/memchunk_sse.c - -memchunk_sse.lo: - $(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/memchunk_sse.c - slide_avx.o: $(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_avx.c diff --git a/arch/x86/memchunk_sse.c b/arch/x86/chunkset_sse.c similarity index 61% rename from arch/x86/memchunk_sse.c rename to arch/x86/chunkset_sse.c index d92198dd3..1d5a0faa9 100644 --- a/arch/x86/memchunk_sse.c +++ b/arch/x86/chunkset_sse.c @@ -1,4 +1,4 @@ -/* memchunk_sse.c -- SSE inline functions to copy small data chunks. +/* chunkset_sse.c -- SSE inline functions to copy small data chunks. * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -8,34 +8,34 @@ #ifdef X86_SSE2 #include -typedef __m128i memchunk_t; +typedef __m128i chunk_t; #define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_2 #define HAVE_CHUNKMEMSET_4 #define HAVE_CHUNKMEMSET_8 -static inline void chunkmemset_1(uint8_t *from, memchunk_t *chunk) { +static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) { *chunk = _mm_set1_epi8(*(int8_t *)from); } -static inline void chunkmemset_2(uint8_t *from, memchunk_t *chunk) { +static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { *chunk = _mm_set1_epi16(*(int16_t *)from); } -static inline void chunkmemset_4(uint8_t *from, memchunk_t *chunk) { +static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { *chunk = _mm_set1_epi32(*(int32_t *)from); } -static inline void chunkmemset_8(uint8_t *from, memchunk_t *chunk) { +static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { *chunk = _mm_set1_epi64x(*(int64_t *)from); } -static inline void loadchunk(uint8_t const *s, memchunk_t *chunk) { +static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { *chunk = _mm_loadu_si128((__m128i *)s); } -static inline void storechunk(uint8_t *out, memchunk_t *chunk) { +static inline void storechunk(uint8_t *out, chunk_t *chunk) { _mm_storeu_si128((__m128i *)out, *chunk); } @@ -46,6 +46,6 @@ static inline void storechunk(uint8_t *out, memchunk_t *chunk) { #define CHUNKMEMSET chunkmemset_sse2 #define CHUNKMEMSET_SAFE chunkmemset_safe_sse2 -#include "memchunk_tpl.h" +#include "chunkset_tpl.h" #endif diff --git a/memchunk.c b/chunkset.c similarity index 55% rename from memchunk.c rename to chunkset.c index 1dbde6736..2e48dda90 100644 --- a/memchunk.c +++ b/chunkset.c @@ -1,4 +1,4 @@ -/* memchunk.c -- inline functions to copy small data chunks. +/* chunkset.c -- inline functions to copy small data chunks. * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -6,24 +6,24 @@ #include "zutil.h" #ifdef UNALIGNED_OK -typedef uint64_t memchunk_t; +typedef uint64_t chunk_t; #else -typedef uint8_t memchunk_t[8]; +typedef uint8_t chunk_t[8]; #endif #define HAVE_CHUNKMEMSET_1 #define HAVE_CHUNKMEMSET_4 #define HAVE_CHUNKMEMSET_8 -static inline void chunkmemset_1(uint8_t *from, memchunk_t *chunk) { +static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) { #ifdef UNALIGNED_OK *chunk = 0x0101010101010101 * (uint8_t)*from; #else - memset(chunk, *from, sizeof(memchunk_t)); + memset(chunk, *from, sizeof(chunk_t)); #endif } -static inline void chunkmemset_4(uint8_t *from, memchunk_t *chunk) { +static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { #ifdef UNALIGNED_OK uint32_t half_chunk; half_chunk = *(uint32_t *)from; @@ -35,27 +35,24 @@ static inline void chunkmemset_4(uint8_t *from, memchunk_t *chunk) { #endif } -static inline void chunkmemset_8(uint8_t *from, memchunk_t *chunk) { +static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { #ifdef UNALIGNED_OK *chunk = *(uint64_t *)from; #else - memcpy(chunk, from, sizeof(memchunk_t)); + memcpy(chunk, from, sizeof(chunk_t)); #endif } -static inline void loadchunk(uint8_t const *s, memchunk_t *chunk) { +static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { chunkmemset_8((uint8_t *)s, chunk); } -static inline void storechunk(uint8_t *out, memchunk_t *chunk) { +static inline void storechunk(uint8_t *out, chunk_t *chunk) { #ifdef UNALIGNED_OK *(uint64_t *)out = *chunk; -#elif defined(_MSC_VER) - /* Cast to memchunk_t pointer to avoid compiler error on MSVC ARM */ - memchunk_t *target = (memchunk_t *)chunk; - memcpy(target, &chunk, sizeof(chunk)); #else - memcpy(out, chunk, sizeof(memchunk_t)); + /* Cast to chunk_t pointer to avoid compiler error on MSVC ARM */ + memcpy((chunk_t *)out, chunk, sizeof(chunk_t )); #endif } @@ -66,4 +63,4 @@ static inline void storechunk(uint8_t *out, memchunk_t *chunk) { #define CHUNKMEMSET chunkmemset_c #define CHUNKMEMSET_SAFE chunkmemset_safe_c -#include "memchunk_tpl.h" +#include "chunkset_tpl.h" diff --git a/memchunk_tpl.h b/chunkset_tpl.h similarity index 82% rename from memchunk_tpl.h rename to chunkset_tpl.h index 507e5ed5b..60b2db293 100644 --- a/memchunk_tpl.h +++ b/chunkset_tpl.h @@ -1,35 +1,35 @@ -/* memchunk_tpl.h -- inline functions to copy small data chunks. +/* chunkset_tpl.h -- inline functions to copy small data chunks. * For conditions of distribution and use, see copyright notice in zlib.h */ /* Returns the chunk size */ uint32_t CHUNKSIZE(void) { - return sizeof(memchunk_t); + return sizeof(chunk_t); } /* Behave like memcpy, but assume that it's OK to overwrite at least - memchunk_t bytes of output even if the length is shorter than this, + chunk_t bytes of output even if the length is shorter than this, that the length is non-zero, and that `from` lags `out` by at least - sizeof memchunk_t bytes (or that they don't overlap at all or simply that + sizeof chunk_t bytes (or that they don't overlap at all or simply that the distance is less than the length of the copy). Aside from better memory bus utilisation, this means that short copies - (memchunk_t bytes or fewer) will fall straight through the loop + (chunk_t bytes or fewer) will fall straight through the loop without iteration, which will hopefully make the branch prediction more reliable. */ uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { - memchunk_t chunk; + chunk_t chunk; --len; loadchunk(from, &chunk); storechunk(out, &chunk); - out += (len % sizeof(memchunk_t)) + 1; - from += (len % sizeof(memchunk_t)) + 1; - len /= sizeof(memchunk_t); + out += (len % sizeof(chunk_t)) + 1; + from += (len % sizeof(chunk_t)) + 1; + len /= sizeof(chunk_t); while (len > 0) { loadchunk(from, &chunk); storechunk(out, &chunk); - out += sizeof(memchunk_t); - from += sizeof(memchunk_t); + out += sizeof(chunk_t); + from += sizeof(chunk_t); --len; } return out; @@ -37,7 +37,7 @@ uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { /* Behave like chunkcopy, but avoid writing beyond of legal output. */ uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) { - if ((safe - out) < (ptrdiff_t)sizeof(memchunk_t)) { + if ((safe - out) < (ptrdiff_t)sizeof(chunk_t)) { if (len & 8) { memcpy(out, from, 8); out += 8; @@ -62,17 +62,17 @@ uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned len, uint8_t } /* Perform short copies until distance can be rewritten as being at least - sizeof memchunk_t. + sizeof chunk_t. This assumes that it's OK to overwrite at least the first - 2*sizeof(memchunk_t) bytes of output even if the copy is shorter than this. + 2*sizeof(chunk_t) bytes of output even if the copy is shorter than this. This assumption holds because inflate_fast() starts every iteration with at least 258 bytes of output space available (258 being the maximum length output from a single token; see inflate_fast()'s assumptions below). */ uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) { unsigned char const *from = out - *dist; - memchunk_t chunk; - while (*dist < *len && *dist < sizeof(memchunk_t)) { + chunk_t chunk; + while (*dist < *len && *dist < sizeof(chunk_t)) { loadchunk(from, &chunk); storechunk(out, &chunk); out += *dist; @@ -90,7 +90,7 @@ uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) { Assert(dist > 0, "cannot have a distance 0"); unsigned char *from = out - dist; - memchunk_t chunk; + chunk_t chunk; unsigned sz = sizeof(chunk); if (len < sz) { do { @@ -154,7 +154,7 @@ uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) { } uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, unsigned dist, unsigned len, unsigned left) { - if (left < (unsigned)(3 * sizeof(memchunk_t))) { + if (left < (unsigned)(3 * sizeof(chunk_t))) { while (len > 0) { *out = *(out - dist); out++; diff --git a/configure b/configure index e2d3209e6..edbc51fcc 100755 --- a/configure +++ b/configure @@ -1111,10 +1111,10 @@ case "${ARCH}" in ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86.lo" if test ${HAVE_SSE2_INTRIN} -eq 1; then - CFLAGS="${CFLAGS} -DX86_SSE2 -DX86_SSE2_MEMCHUNK" - SFLAGS="${SFLAGS} -DX86_SSE2 -DX86_SSE2_MEMCHUNK" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} slide_sse.o memchunk_sse.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_sse.lo memchunk_sse.lo" + CFLAGS="${CFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET" + SFLAGS="${SFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} chunkset_sse.o slide_sse.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} chunkset_sse.lo slide_sse.lo" if test $forcesse2 -eq 1; then CFLAGS="${CFLAGS} -DX86_NOCHECK_SSE2" @@ -1173,11 +1173,11 @@ case "${ARCH}" in # Enable arch-specific optimizations? if test $without_optimizations -eq 0; then - CFLAGS="${CFLAGS} -DX86_CPUID -DX86_SSE2 -DX86_SSE2_MEMCHUNK -DX86_SSE42_CRC_HASH" - SFLAGS="${SFLAGS} -DX86_CPUID -DX86_SSE2 -DX86_SSE2_MEMCHUNK -DX86_SSE42_CRC_HASH" + CFLAGS="${CFLAGS} -DX86_CPUID -DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE42_CRC_HASH" + SFLAGS="${SFLAGS} -DX86_CPUID -DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE42_CRC_HASH" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} x86.o insert_string_sse.o memchunk_sse.o slide_sse.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86.lo insert_string_sse.lo memchunk_sse.lo slide_sse.lo" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} x86.o chunkset_sse.o insert_string_sse.o slide_sse.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86.lo chunkset_sse.lo insert_string_sse.lo slide_sse.lo" if test ${HAVE_SSSE3_INTRIN} -eq 1; then CFLAGS="${CFLAGS} -DX86_SSSE3 -DX86_SSSE3_ADLER32" @@ -1274,11 +1274,11 @@ case "${ARCH}" in fi if test $buildneon -eq 1; then - CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH" - SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH" + CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH" + SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o memchunk_neon.o slide_neon.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo memchunk_neon.lo slide_neon.lo" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo" fi fi ;; @@ -1297,11 +1297,11 @@ case "${ARCH}" in SFLAGS="${SFLAGS} -mfpu=neon" fi - CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH" - SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH" + CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH" + SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o memchunk_neon.o slide_neon.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo memchunk_neon.lo slide_neon.lo" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo" fi fi ;; @@ -1321,11 +1321,11 @@ case "${ARCH}" in SFLAGS="${SFLAGS} -mfpu=neon" fi - CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH" - SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH" + CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH" + SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o memchunk_neon.o slide_neon.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo memchunk_neon.lo slide_neon.lo" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo" fi fi ;; @@ -1363,10 +1363,10 @@ case "${ARCH}" in if test $native -eq 0; then ARCH="${ARCH}+simd" fi - CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH" - SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH" - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o memchunk_neon.o slide_neon.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo memchunk_neon.lo slide_neon.lo" + CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH" + SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo" fi fi diff --git a/functable.c b/functable.c index 81bd786f9..73e3e3977 100644 --- a/functable.c +++ b/functable.c @@ -64,7 +64,7 @@ extern uint8_t* chunkcopy_safe_c(uint8_t *out, uint8_t const *from, unsigned len extern uint8_t* chunkunroll_c(uint8_t *out, unsigned *dist, unsigned *len); extern uint8_t* chunkmemset_c(uint8_t *out, unsigned dist, unsigned len); extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#ifdef X86_SSE2_MEMCHUNK +#ifdef X86_SSE2_CHUNKSET extern uint32_t chunksize_sse2(void); extern uint8_t* chunkcopy_sse2(uint8_t *out, uint8_t const *from, unsigned len); extern uint8_t* chunkcopy_safe_sse2(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe); @@ -72,7 +72,7 @@ extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len); extern uint8_t* chunkmemset_sse2(uint8_t *out, unsigned dist, unsigned len); extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left); #endif -#ifdef ARM_NEON_MEMCHUNK +#ifdef ARM_NEON_CHUNKSET extern uint32_t chunksize_neon(void); extern uint8_t* chunkcopy_neon(uint8_t *out, uint8_t const *from, unsigned len); extern uint8_t* chunkcopy_safe_neon(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe); @@ -264,13 +264,13 @@ ZLIB_INTERNAL uint32_t chunksize_stub(void) { // Initialize default functable.chunksize = &chunksize_c; -#ifdef X86_SSE2_MEMCHUNK +#ifdef X86_SSE2_CHUNKSET # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) if (x86_cpu_has_sse2) # endif functable.chunksize = &chunksize_sse2; #endif -#ifdef ARM_NEON_MEMCHUNK +#ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) functable.chunksize = &chunksize_neon; #endif @@ -282,13 +282,13 @@ ZLIB_INTERNAL uint8_t* chunkcopy_stub(uint8_t *out, uint8_t const *from, unsigne // Initialize default functable.chunkcopy = &chunkcopy_c; -#ifdef X86_SSE2_MEMCHUNK +#ifdef X86_SSE2_CHUNKSET # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) if (x86_cpu_has_sse2) # endif functable.chunkcopy = &chunkcopy_sse2; #endif -#ifdef ARM_NEON_MEMCHUNK +#ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) functable.chunkcopy = &chunkcopy_neon; #endif @@ -300,13 +300,13 @@ ZLIB_INTERNAL uint8_t* chunkcopy_safe_stub(uint8_t *out, uint8_t const *from, un // Initialize default functable.chunkcopy_safe = &chunkcopy_safe_c; -#ifdef X86_SSE2_MEMCHUNK +#ifdef X86_SSE2_CHUNKSET # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) if (x86_cpu_has_sse2) # endif functable.chunkcopy_safe = &chunkcopy_safe_sse2; #endif -#ifdef ARM_NEON_MEMCHUNK +#ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) functable.chunkcopy_safe = &chunkcopy_safe_neon; #endif @@ -318,13 +318,13 @@ ZLIB_INTERNAL uint8_t* chunkunroll_stub(uint8_t *out, unsigned *dist, unsigned * // Initialize default functable.chunkunroll = &chunkunroll_c; -#ifdef X86_SSE2_MEMCHUNK +#ifdef X86_SSE2_CHUNKSET # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) if (x86_cpu_has_sse2) # endif functable.chunkunroll = &chunkunroll_sse2; #endif -#ifdef ARM_NEON_MEMCHUNK +#ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) functable.chunkunroll = &chunkunroll_neon; #endif @@ -336,13 +336,13 @@ ZLIB_INTERNAL uint8_t* chunkmemset_stub(uint8_t *out, unsigned dist, unsigned le // Initialize default functable.chunkmemset = &chunkmemset_c; -#ifdef X86_SSE2_MEMCHUNK +#ifdef X86_SSE2_CHUNKSET # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) if (x86_cpu_has_sse2) # endif functable.chunkmemset = &chunkmemset_sse2; #endif -#ifdef ARM_NEON_MEMCHUNK +#ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) functable.chunkmemset = &chunkmemset_neon; #endif @@ -354,13 +354,13 @@ ZLIB_INTERNAL uint8_t* chunkmemset_safe_stub(uint8_t *out, unsigned dist, unsign // Initialize default functable.chunkmemset_safe = &chunkmemset_safe_c; -#ifdef X86_SSE2_MEMCHUNK +#ifdef X86_SSE2_CHUNKSET # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) if (x86_cpu_has_sse2) # endif functable.chunkmemset_safe = &chunkmemset_safe_sse2; #endif -#ifdef ARM_NEON_MEMCHUNK +#ifdef ARM_NEON_CHUNKSET if (arm_cpu_has_neon) functable.chunkmemset_safe = &chunkmemset_safe_neon; #endif diff --git a/win32/Makefile.a64 b/win32/Makefile.a64 index bd12e0719..763736c65 100644 --- a/win32/Makefile.a64 +++ b/win32/Makefile.a64 @@ -43,6 +43,7 @@ SUFFIX = OBJS = \ adler32.obj \ armfeature.obj \ + chunkset.obj \ compare258.obj \ compress.obj \ crc32.obj \ @@ -57,7 +58,6 @@ OBJS = \ inftrees.obj \ inffast.obj \ insert_string.obj \ - memchunk.obj \ trees.obj \ uncompr.obj \ zutil.obj \ @@ -79,11 +79,11 @@ WFLAGS = $(WFLAGS) \ -DARM_ACLE_CRC_HASH \ -D__ARM_NEON__=1 \ -DARM_NEON_ADLER32 \ - -DARM_NEON_MEMCHUNK \ + -DARM_NEON_CHUNKSET \ -DARM_NEON_SLIDEHASH \ -DARM_NOCHECK_NEON \ # -OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj adler32_neon.obj memchunk_neon.obj slide_neon.obj +OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj adler32_neon.obj chunkset_neon.obj slide_neon.obj # targets all: $(STATICLIB) $(SHAREDLIB) $(IMPLIB) \ @@ -149,6 +149,7 @@ $(TOP)/zconf$(SUFFIX).h: zconf SRCDIR = $(TOP) # Keep the dependences in sync with top-level Makefile.in adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h +chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86.h gzclose.obj: $(SRCDIR)/gzclose.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h @@ -166,7 +167,6 @@ infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/ inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h -memchunk.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees.h zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h diff --git a/win32/Makefile.arm b/win32/Makefile.arm index 69f694a82..881b10fef 100644 --- a/win32/Makefile.arm +++ b/win32/Makefile.arm @@ -46,6 +46,7 @@ SUFFIX = OBJS = \ adler32.obj \ armfeature.obj \ + chunkset.obj \ compare258.obj \ compress.obj \ crc32.obj \ @@ -60,7 +61,6 @@ OBJS = \ inftrees.obj \ inffast.obj \ insert_string.obj \ - memchunk.obj \ trees.obj \ uncompr.obj \ zutil.obj \ @@ -90,11 +90,11 @@ CFLAGS = $(CFLAGS) $(NEON_ARCH) WFLAGS = $(WFLAGS) \ -D__ARM_NEON__=1 \ -DARM_NEON_ADLER32 \ - -DARM_NEON_MEMCHUNK \ + -DARM_NEON_CHUNKSET \ -DARM_NEON_SLIDEHASH \ -DARM_NOCHECK_NEON \ # -OBJS = $(OBJS) adler32_neon.obj memchunk_neon.obj slide_neon.obj +OBJS = $(OBJS) adler32_neon.obj chunkset_neon.obj slide_neon.obj !endif # targets @@ -168,6 +168,7 @@ gzread.obj: $(SRCDIR)/gzread.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h gzwrite.obj: $(SRCDIR)/gzwrite.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h +chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h crc32.obj: $(SRCDIR)/crc32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32.h deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h @@ -178,7 +179,6 @@ infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/ inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h -memchunk.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees.h zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h diff --git a/win32/Makefile.msc b/win32/Makefile.msc index f7b0bac78..4e3f09a94 100644 --- a/win32/Makefile.msc +++ b/win32/Makefile.msc @@ -32,7 +32,7 @@ WFLAGS = \ -DX86_SSE42_CRC_INTRIN \ -DX86_SSE42_CRC_HASH \ -DX86_AVX2 \ - -DX86_SSE2_MEMCHUNK \ + -DX86_SSE2_CHUNKSET \ -DUNALIGNED_OK \ -DUNALIGNED64_OK \ # @@ -48,6 +48,8 @@ SUFFIX = OBJS = \ adler32.obj \ + chunkset.obj \ + chunkset_sse.obj \ compare258.obj \ compare258_avx.obj \ compare258_sse.obj \ @@ -66,8 +68,6 @@ OBJS = \ inffast.obj \ insert_string.obj \ insert_string_sse.obj \ - memchunk.obj \ - memchunk_sse.obj \ slide_avx.obj \ slide_sse.obj \ trees.obj \ @@ -165,6 +165,8 @@ gzread.obj: $(SRCDIR)/gzread.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h gzwrite.obj: $(SRCDIR)/gzwrite.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h +chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h +chunkset_sse.obj: $(SRCDIR)/arch/x86/chunkset_sse.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h crc32.obj: $(SRCDIR)/crc32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32.h deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h @@ -175,8 +177,6 @@ infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/ inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h -memchunk.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h -memchunk_sse.obj: $(SRCDIR)/arch/x86/memchunk_sse.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h slide_sse.obj: $(SRCDIR)/arch/x86/slide_sse.c $(SRCDIR)/deflate.h trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees.h zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h