list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/armfeature.c)
if(WITH_NEON)
- add_definitions(-DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH)
- set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/memchunk_neon.c ${ARCHDIR}/slide_neon.c)
+ add_definitions(-DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH)
+ set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c ${ARCHDIR}/slide_neon.c)
list(APPEND ZLIB_ARCH_SRCS ${NEON_SRCS})
set_intrinsics_option("${NEONFLAG}" ${NEON_SRCS})
if(MSVC)
set_intrinsics_option("${SSE4FLAG}" ${SSE42_SRCS})
endif()
if(WITH_SSE2 AND HAVE_SSE2_INTRIN)
- add_definitions(-DX86_SSE2 -DX86_SSE2_MEMCHUNK)
- set(SSE2_SRCS ${ARCHDIR}/memchunk_sse.c ${ARCHDIR}/slide_sse.c)
+ add_definitions(-DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE2_SLIDEHASH)
+ set(SSE2_SRCS ${ARCHDIR}/chunkset_sse.c ${ARCHDIR}/slide_sse.c)
list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
if(NOT ${ARCH} MATCHES "x86_64")
set_intrinsics_option("${SSE2FLAG}" ${SSE2_SRCS})
)
set(ZLIB_PRIVATE_HDRS
adler32_p.h
+ chunkset_tpl.h
crc32.h
crc32_p.h
deflate.h
inftrees.h
insert_string_tpl.h
match_tpl.h
- memchunk_tpl.h
trees.h
trees_emit.h
trees_p.h
)
set(ZLIB_SRCS
adler32.c
+ chunkset.c
compare258.c
compress.c
crc32.c
inftrees.c
inffast.c
insert_string.c
- memchunk.c
trees.c
uncompr.c
zutil.c
| CMakeLists.txt | Cmake build script |
| configure | Bash configure/build script |
| adler32.c | Compute the Adler-32 checksum of a data stream |
+| chunkset.* | Inline functions to copy small data chunks |
| compress.c | Compress a memory buffer |
| deflate.* | Compress data using the deflate algorithm |
| deflate_fast.c | Compress data using the deflate algorithm with fast strategy |
| inffast.* | Decompress data with speed optimizations |
| inffixed.h | Table for decoding fixed codes |
| inftrees.h | Generate Huffman trees for efficient decoding |
-| memchunk.* | Inline functions to copy small data chunks |
| trees.* | Output deflated data using Huffman coding |
| uncompr.c | Decompress a memory buffer |
| zconf.h.cmakein | zconf.h template for cmake |
OBJZ = \
adler32.o \
+ chunkset.o \
compare258.o \
compress.o \
crc32.o \
inflate.o \
inftrees.o \
insert_string.o \
- memchunk.o \
trees.o \
uncompr.o \
zutil.o \
PIC_OBJZ = \
adler32.lo \
+ chunkset.lo \
compare258.lo \
compress.lo \
crc32.lo \
inflate.lo \
inftrees.lo \
insert_string.lo \
- memchunk.lo \
trees.lo \
uncompr.lo \
zutil.lo \
all: \
adler32_neon.o adler32_neon.lo \
armfeature.o armfeature.lo \
+ chunkset_neon.o chunkset_neon.lo \
crc32_acle.o crc32_acle.lo \
- memchunk_neon.o memchunk_neon.lo \
slide_neon.o slide_neon.lo \
insert_string_acle.o insert_string_acle.lo
armfeature.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c
+chunkset_neon.o:
+ $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_neon.c
+
+chunkset_neon.lo:
+ $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_neon.c
+
crc32_acle.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
insert_string_acle.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
-memchunk_neon.o:
- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/memchunk_neon.c
-
-memchunk_neon.lo:
- $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/memchunk_neon.c
-
mostlyclean: clean
clean:
rm -f *.o *.lo *~
-/* memchunk_neon.c -- NEON inline functions to copy small data chunks.
+/* chunkset_neon.c -- NEON inline functions to copy small data chunks.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifdef ARM_NEON_MEMCHUNK
+#ifdef ARM_NEON_CHUNKSET
#ifdef _M_ARM64
# include <arm64_neon.h>
#else
#include "../../zbuild.h"
#include "../../zutil.h"
-typedef uint8x16_t memchunk_t;
+typedef uint8x16_t chunk_t;
#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_2
#define HAVE_CHUNKMEMSET_4
#define HAVE_CHUNKMEMSET_8
-static inline void chunkmemset_1(uint8_t *from, memchunk_t *chunk) {
+static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
*chunk = vld1q_dup_u8(from);
}
-static inline void chunkmemset_2(uint8_t *from, memchunk_t *chunk) {
+static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
*chunk = vreinterpretq_u8_s16(vdupq_n_s16(*(int16_t *)from));
}
-static inline void chunkmemset_4(uint8_t *from, memchunk_t *chunk) {
+static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
*chunk = vreinterpretq_u8_s32(vdupq_n_s32(*(int32_t *)from));
}
-static inline void chunkmemset_8(uint8_t *from, memchunk_t *chunk) {
+static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
*chunk = vcombine_u8(vld1_u8(from), vld1_u8(from));
}
#endif
-static inline void loadchunk(uint8_t const *s, memchunk_t *chunk) {
- *chunk = *(memchunk_t *)s;
+static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
+ *chunk = *(chunk_t *)s;
}
-static inline void storechunk(uint8_t *out, memchunk_t *chunk) {
-#ifdef _MSC_VER
- /* Cast to memchunk_t pointer to avoid compiler error on MSVC ARM */
- memchunk_t *target = (memchunk_t *)chunk;
- memcpy(target, &chunk, sizeof(chunk));
-#else
- memcpy(out, chunk, sizeof(memchunk_t));
-#endif
+static inline void storechunk(uint8_t *out, chunk_t *chunk) {
+ /* Cast to chunk_t pointer to avoid compiler error on MSVC ARM */
+ memcpy((chunk_t *)out, chunk, sizeof(chunk_t));
}
-#include "memchunk_tpl.h"
+#include "chunkset_tpl.h"
#endif
x86.o x86.lo \
adler32_avx.o adler32.lo \
adler32_ssse3.o adler32_ssse3.lo \
+ chunkset_sse.o chunkset_sse.lo \
compare258_avx.o compare258_avx.lo \
compare258_sse.o compare258_sse.lo \
insert_string_sse.o insert_string_sse.lo \
crc_folding.o crc_folding.lo \
- memchunk_sse.o memchunk_sse.lo \
slide_avx.o slide_avx.lo \
slide_sse.o slide_sse.lo
x86.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
+chunkset_sse.o:
+ $(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse.c
+
+chunkset_sse.lo:
+ $(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse.c
+
compare258_avx.o:
$(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_avx.c
crc_folding.lo:
$(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c
-memchunk_sse.o:
- $(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/memchunk_sse.c
-
-memchunk_sse.lo:
- $(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/memchunk_sse.c
-
slide_avx.o:
$(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_avx.c
-/* memchunk_sse.c -- SSE inline functions to copy small data chunks.
+/* chunkset_sse.c -- SSE inline functions to copy small data chunks.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifdef X86_SSE2
#include <immintrin.h>
-typedef __m128i memchunk_t;
+typedef __m128i chunk_t;
#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_2
#define HAVE_CHUNKMEMSET_4
#define HAVE_CHUNKMEMSET_8
-static inline void chunkmemset_1(uint8_t *from, memchunk_t *chunk) {
+static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
*chunk = _mm_set1_epi8(*(int8_t *)from);
}
-static inline void chunkmemset_2(uint8_t *from, memchunk_t *chunk) {
+static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
*chunk = _mm_set1_epi16(*(int16_t *)from);
}
-static inline void chunkmemset_4(uint8_t *from, memchunk_t *chunk) {
+static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
*chunk = _mm_set1_epi32(*(int32_t *)from);
}
-static inline void chunkmemset_8(uint8_t *from, memchunk_t *chunk) {
+static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
*chunk = _mm_set1_epi64x(*(int64_t *)from);
}
-static inline void loadchunk(uint8_t const *s, memchunk_t *chunk) {
+static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
*chunk = _mm_loadu_si128((__m128i *)s);
}
-static inline void storechunk(uint8_t *out, memchunk_t *chunk) {
+static inline void storechunk(uint8_t *out, chunk_t *chunk) {
_mm_storeu_si128((__m128i *)out, *chunk);
}
#define CHUNKMEMSET chunkmemset_sse2
#define CHUNKMEMSET_SAFE chunkmemset_safe_sse2
-#include "memchunk_tpl.h"
+#include "chunkset_tpl.h"
#endif
-/* memchunk.c -- inline functions to copy small data chunks.
+/* chunkset.c -- inline functions to copy small data chunks.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zutil.h"
#ifdef UNALIGNED_OK
-typedef uint64_t memchunk_t;
+typedef uint64_t chunk_t;
#else
-typedef uint8_t memchunk_t[8];
+typedef uint8_t chunk_t[8];
#endif
#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_4
#define HAVE_CHUNKMEMSET_8
-static inline void chunkmemset_1(uint8_t *from, memchunk_t *chunk) {
+static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
#ifdef UNALIGNED_OK
*chunk = 0x0101010101010101 * (uint8_t)*from;
#else
- memset(chunk, *from, sizeof(memchunk_t));
+ memset(chunk, *from, sizeof(chunk_t));
#endif
}
-static inline void chunkmemset_4(uint8_t *from, memchunk_t *chunk) {
+static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
#ifdef UNALIGNED_OK
uint32_t half_chunk;
half_chunk = *(uint32_t *)from;
#endif
}
-static inline void chunkmemset_8(uint8_t *from, memchunk_t *chunk) {
+static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
#ifdef UNALIGNED_OK
*chunk = *(uint64_t *)from;
#else
- memcpy(chunk, from, sizeof(memchunk_t));
+ memcpy(chunk, from, sizeof(chunk_t));
#endif
}
-static inline void loadchunk(uint8_t const *s, memchunk_t *chunk) {
+static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
chunkmemset_8((uint8_t *)s, chunk);
}
-static inline void storechunk(uint8_t *out, memchunk_t *chunk) {
+static inline void storechunk(uint8_t *out, chunk_t *chunk) {
#ifdef UNALIGNED_OK
*(uint64_t *)out = *chunk;
-#elif defined(_MSC_VER)
- /* Cast to memchunk_t pointer to avoid compiler error on MSVC ARM */
- memchunk_t *target = (memchunk_t *)chunk;
- memcpy(target, &chunk, sizeof(chunk));
#else
- memcpy(out, chunk, sizeof(memchunk_t));
+ /* Cast to chunk_t pointer to avoid compiler error on MSVC ARM */
+ memcpy((chunk_t *)out, chunk, sizeof(chunk_t ));
#endif
}
#define CHUNKMEMSET chunkmemset_c
#define CHUNKMEMSET_SAFE chunkmemset_safe_c
-#include "memchunk_tpl.h"
+#include "chunkset_tpl.h"
-/* memchunk_tpl.h -- inline functions to copy small data chunks.
+/* chunkset_tpl.h -- inline functions to copy small data chunks.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* Returns the chunk size */
uint32_t CHUNKSIZE(void) {
- return sizeof(memchunk_t);
+ return sizeof(chunk_t);
}
/* Behave like memcpy, but assume that it's OK to overwrite at least
- memchunk_t bytes of output even if the length is shorter than this,
+ chunk_t bytes of output even if the length is shorter than this,
that the length is non-zero, and that `from` lags `out` by at least
- sizeof memchunk_t bytes (or that they don't overlap at all or simply that
+ sizeof chunk_t bytes (or that they don't overlap at all or simply that
the distance is less than the length of the copy).
Aside from better memory bus utilisation, this means that short copies
- (memchunk_t bytes or fewer) will fall straight through the loop
+ (chunk_t bytes or fewer) will fall straight through the loop
without iteration, which will hopefully make the branch prediction more
reliable. */
uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) {
- memchunk_t chunk;
+ chunk_t chunk;
--len;
loadchunk(from, &chunk);
storechunk(out, &chunk);
- out += (len % sizeof(memchunk_t)) + 1;
- from += (len % sizeof(memchunk_t)) + 1;
- len /= sizeof(memchunk_t);
+ out += (len % sizeof(chunk_t)) + 1;
+ from += (len % sizeof(chunk_t)) + 1;
+ len /= sizeof(chunk_t);
while (len > 0) {
loadchunk(from, &chunk);
storechunk(out, &chunk);
- out += sizeof(memchunk_t);
- from += sizeof(memchunk_t);
+ out += sizeof(chunk_t);
+ from += sizeof(chunk_t);
--len;
}
return out;
/* Behave like chunkcopy, but avoid writing beyond of legal output. */
uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) {
- if ((safe - out) < (ptrdiff_t)sizeof(memchunk_t)) {
+ if ((safe - out) < (ptrdiff_t)sizeof(chunk_t)) {
if (len & 8) {
memcpy(out, from, 8);
out += 8;
}
/* Perform short copies until distance can be rewritten as being at least
- sizeof memchunk_t.
+ sizeof chunk_t.
This assumes that it's OK to overwrite at least the first
- 2*sizeof(memchunk_t) bytes of output even if the copy is shorter than this.
+ 2*sizeof(chunk_t) bytes of output even if the copy is shorter than this.
This assumption holds because inflate_fast() starts every iteration with at
least 258 bytes of output space available (258 being the maximum length
output from a single token; see inflate_fast()'s assumptions below). */
uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) {
unsigned char const *from = out - *dist;
- memchunk_t chunk;
- while (*dist < *len && *dist < sizeof(memchunk_t)) {
+ chunk_t chunk;
+ while (*dist < *len && *dist < sizeof(chunk_t)) {
loadchunk(from, &chunk);
storechunk(out, &chunk);
out += *dist;
Assert(dist > 0, "cannot have a distance 0");
unsigned char *from = out - dist;
- memchunk_t chunk;
+ chunk_t chunk;
unsigned sz = sizeof(chunk);
if (len < sz) {
do {
}
uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, unsigned dist, unsigned len, unsigned left) {
- if (left < (unsigned)(3 * sizeof(memchunk_t))) {
+ if (left < (unsigned)(3 * sizeof(chunk_t))) {
while (len > 0) {
*out = *(out - dist);
out++;
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86.lo"
if test ${HAVE_SSE2_INTRIN} -eq 1; then
- CFLAGS="${CFLAGS} -DX86_SSE2 -DX86_SSE2_MEMCHUNK"
- SFLAGS="${SFLAGS} -DX86_SSE2 -DX86_SSE2_MEMCHUNK"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} slide_sse.o memchunk_sse.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_sse.lo memchunk_sse.lo"
+ CFLAGS="${CFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET"
+ SFLAGS="${SFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} chunkset_sse.o slide_sse.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} chunkset_sse.lo slide_sse.lo"
if test $forcesse2 -eq 1; then
CFLAGS="${CFLAGS} -DX86_NOCHECK_SSE2"
# Enable arch-specific optimizations?
if test $without_optimizations -eq 0; then
- CFLAGS="${CFLAGS} -DX86_CPUID -DX86_SSE2 -DX86_SSE2_MEMCHUNK -DX86_SSE42_CRC_HASH"
- SFLAGS="${SFLAGS} -DX86_CPUID -DX86_SSE2 -DX86_SSE2_MEMCHUNK -DX86_SSE42_CRC_HASH"
+ CFLAGS="${CFLAGS} -DX86_CPUID -DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE42_CRC_HASH"
+ SFLAGS="${SFLAGS} -DX86_CPUID -DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE42_CRC_HASH"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} x86.o insert_string_sse.o memchunk_sse.o slide_sse.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86.lo insert_string_sse.lo memchunk_sse.lo slide_sse.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} x86.o chunkset_sse.o insert_string_sse.o slide_sse.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86.lo chunkset_sse.lo insert_string_sse.lo slide_sse.lo"
if test ${HAVE_SSSE3_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSSE3 -DX86_SSSE3_ADLER32"
fi
if test $buildneon -eq 1; then
- CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH"
- SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH"
+ CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
+ SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o memchunk_neon.o slide_neon.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo memchunk_neon.lo slide_neon.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo"
fi
fi
;;
SFLAGS="${SFLAGS} -mfpu=neon"
fi
- CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH"
- SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH"
+ CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
+ SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o memchunk_neon.o slide_neon.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo memchunk_neon.lo slide_neon.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo"
fi
fi
;;
SFLAGS="${SFLAGS} -mfpu=neon"
fi
- CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH"
- SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH"
+ CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
+ SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o memchunk_neon.o slide_neon.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo memchunk_neon.lo slide_neon.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo"
fi
fi
;;
if test $native -eq 0; then
ARCH="${ARCH}+simd"
fi
- CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH"
- SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_MEMCHUNK -DARM_NEON_SLIDEHASH"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o memchunk_neon.o slide_neon.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo memchunk_neon.lo slide_neon.lo"
+ CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
+ SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo"
fi
fi
extern uint8_t* chunkunroll_c(uint8_t *out, unsigned *dist, unsigned *len);
extern uint8_t* chunkmemset_c(uint8_t *out, unsigned dist, unsigned len);
extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left);
-#ifdef X86_SSE2_MEMCHUNK
+#ifdef X86_SSE2_CHUNKSET
extern uint32_t chunksize_sse2(void);
extern uint8_t* chunkcopy_sse2(uint8_t *out, uint8_t const *from, unsigned len);
extern uint8_t* chunkcopy_safe_sse2(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
extern uint8_t* chunkmemset_sse2(uint8_t *out, unsigned dist, unsigned len);
extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#endif
-#ifdef ARM_NEON_MEMCHUNK
+#ifdef ARM_NEON_CHUNKSET
extern uint32_t chunksize_neon(void);
extern uint8_t* chunkcopy_neon(uint8_t *out, uint8_t const *from, unsigned len);
extern uint8_t* chunkcopy_safe_neon(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
// Initialize default
functable.chunksize = &chunksize_c;
-#ifdef X86_SSE2_MEMCHUNK
+#ifdef X86_SSE2_CHUNKSET
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
if (x86_cpu_has_sse2)
# endif
functable.chunksize = &chunksize_sse2;
#endif
-#ifdef ARM_NEON_MEMCHUNK
+#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunksize = &chunksize_neon;
#endif
// Initialize default
functable.chunkcopy = &chunkcopy_c;
-#ifdef X86_SSE2_MEMCHUNK
+#ifdef X86_SSE2_CHUNKSET
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
if (x86_cpu_has_sse2)
# endif
functable.chunkcopy = &chunkcopy_sse2;
#endif
-#ifdef ARM_NEON_MEMCHUNK
+#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkcopy = &chunkcopy_neon;
#endif
// Initialize default
functable.chunkcopy_safe = &chunkcopy_safe_c;
-#ifdef X86_SSE2_MEMCHUNK
+#ifdef X86_SSE2_CHUNKSET
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
if (x86_cpu_has_sse2)
# endif
functable.chunkcopy_safe = &chunkcopy_safe_sse2;
#endif
-#ifdef ARM_NEON_MEMCHUNK
+#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkcopy_safe = &chunkcopy_safe_neon;
#endif
// Initialize default
functable.chunkunroll = &chunkunroll_c;
-#ifdef X86_SSE2_MEMCHUNK
+#ifdef X86_SSE2_CHUNKSET
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
if (x86_cpu_has_sse2)
# endif
functable.chunkunroll = &chunkunroll_sse2;
#endif
-#ifdef ARM_NEON_MEMCHUNK
+#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkunroll = &chunkunroll_neon;
#endif
// Initialize default
functable.chunkmemset = &chunkmemset_c;
-#ifdef X86_SSE2_MEMCHUNK
+#ifdef X86_SSE2_CHUNKSET
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
if (x86_cpu_has_sse2)
# endif
functable.chunkmemset = &chunkmemset_sse2;
#endif
-#ifdef ARM_NEON_MEMCHUNK
+#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkmemset = &chunkmemset_neon;
#endif
// Initialize default
functable.chunkmemset_safe = &chunkmemset_safe_c;
-#ifdef X86_SSE2_MEMCHUNK
+#ifdef X86_SSE2_CHUNKSET
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
if (x86_cpu_has_sse2)
# endif
functable.chunkmemset_safe = &chunkmemset_safe_sse2;
#endif
-#ifdef ARM_NEON_MEMCHUNK
+#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkmemset_safe = &chunkmemset_safe_neon;
#endif
OBJS = \
adler32.obj \
armfeature.obj \
+ chunkset.obj \
compare258.obj \
compress.obj \
crc32.obj \
inftrees.obj \
inffast.obj \
insert_string.obj \
- memchunk.obj \
trees.obj \
uncompr.obj \
zutil.obj \
-DARM_ACLE_CRC_HASH \
-D__ARM_NEON__=1 \
-DARM_NEON_ADLER32 \
- -DARM_NEON_MEMCHUNK \
+ -DARM_NEON_CHUNKSET \
-DARM_NEON_SLIDEHASH \
-DARM_NOCHECK_NEON \
#
-OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj adler32_neon.obj memchunk_neon.obj slide_neon.obj
+OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj adler32_neon.obj chunkset_neon.obj slide_neon.obj
# targets
all: $(STATICLIB) $(SHAREDLIB) $(IMPLIB) \
SRCDIR = $(TOP)
# Keep the dependences in sync with top-level Makefile.in
adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h
+chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86.h
gzclose.obj: $(SRCDIR)/gzclose.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h
gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h
inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h
inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h
inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
-memchunk.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees.h
zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h
OBJS = \
adler32.obj \
armfeature.obj \
+ chunkset.obj \
compare258.obj \
compress.obj \
crc32.obj \
inftrees.obj \
inffast.obj \
insert_string.obj \
- memchunk.obj \
trees.obj \
uncompr.obj \
zutil.obj \
WFLAGS = $(WFLAGS) \
-D__ARM_NEON__=1 \
-DARM_NEON_ADLER32 \
- -DARM_NEON_MEMCHUNK \
+ -DARM_NEON_CHUNKSET \
-DARM_NEON_SLIDEHASH \
-DARM_NOCHECK_NEON \
#
-OBJS = $(OBJS) adler32_neon.obj memchunk_neon.obj slide_neon.obj
+OBJS = $(OBJS) adler32_neon.obj chunkset_neon.obj slide_neon.obj
!endif
# targets
gzwrite.obj: $(SRCDIR)/gzwrite.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h
compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h
uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h
+chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
crc32.obj: $(SRCDIR)/crc32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32.h
deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h
inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h
inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
-memchunk.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees.h
zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h
-DX86_SSE42_CRC_INTRIN \
-DX86_SSE42_CRC_HASH \
-DX86_AVX2 \
- -DX86_SSE2_MEMCHUNK \
+ -DX86_SSE2_CHUNKSET \
-DUNALIGNED_OK \
-DUNALIGNED64_OK \
#
OBJS = \
adler32.obj \
+ chunkset.obj \
+ chunkset_sse.obj \
compare258.obj \
compare258_avx.obj \
compare258_sse.obj \
inffast.obj \
insert_string.obj \
insert_string_sse.obj \
- memchunk.obj \
- memchunk_sse.obj \
slide_avx.obj \
slide_sse.obj \
trees.obj \
gzwrite.obj: $(SRCDIR)/gzwrite.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h
compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h
uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h
+chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
+chunkset_sse.obj: $(SRCDIR)/arch/x86/chunkset_sse.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
crc32.obj: $(SRCDIR)/crc32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32.h
deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h
inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h
inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
-memchunk.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
-memchunk_sse.obj: $(SRCDIR)/arch/x86/memchunk_sse.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
slide_sse.obj: $(SRCDIR)/arch/x86/slide_sse.c $(SRCDIR)/deflate.h
trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees.h
zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h