add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"")
list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx.c)
add_feature_info(AVX_CHUNKSET 1 "Support AVX optimized chunkset, using \"${AVX2FLAG}\"")
- list(APPEND AVX2_SRCS ${ARCHDIR}/compare258_avx2.c)
- add_feature_info(AVX2_COMPARE258 1 "Support AVX2 optimized compare258, using \"${AVX2FLAG}\"")
+ list(APPEND AVX2_SRCS ${ARCHDIR}/compare256_avx2.c)
+ add_feature_info(AVX2_COMPARE256 1 "Support AVX2 optimized compare256, using \"${AVX2FLAG}\"")
list(APPEND AVX2_SRCS ${ARCHDIR}/adler32_avx2.c)
add_feature_info(AVX2_ADLER32 1 "Support AVX2-accelerated adler32, using \"${AVX2FLAG}\"")
list(APPEND ZLIB_ARCH_SRCS ${AVX2_SRCS})
endif()
if(HAVE_SSE42CMPSTR_INTRIN)
add_definitions(-DX86_SSE42_CMP_STR)
- set(SSE42_SRCS ${ARCHDIR}/compare258_sse42.c)
- add_feature_info(SSE42_COMPARE258 1 "Support SSE4.2 optimized compare258, using \"${SSE42FLAG}\"")
+ set(SSE42_SRCS ${ARCHDIR}/compare256_sse42.c)
+ add_feature_info(SSE42_COMPARE256 1 "Support SSE4.2 optimized compare256, using \"${SSE42FLAG}\"")
list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}")
endif()
set(ZLIB_SRCS
adler32.c
chunkset.c
- compare258.c
+ compare256.c
compress.c
crc32.c
crc32_comb.c
OBJZ = \
adler32.o \
chunkset.o \
- compare258.o \
+ compare256.o \
compress.o \
crc32.o \
crc32_comb.o \
PIC_OBJZ = \
adler32.lo \
chunkset.lo \
- compare258.lo \
+ compare256.lo \
compress.lo \
crc32.lo \
crc32_comb.lo \
* CRC32-B implementation using PCLMULQDQ, VPCLMULQDQ, & ACLE
* Hash table implementation using CRC32-C intrinsics on x86 and ARM
* Slide hash implementations using SSE2, AVX2, Neon, VMX & VSX
- * Compare256/258 implementations using SSE4.2 & AVX2
+ * Compare256 implementations using SSE4.2 & AVX2
* Inflate chunk copying using SSE2, AVX2, Neon & VSX
* CRC32 implementation using IBM Z vector instructions
* Support for hardware-accelerated deflate using IBM Z DFLTCC
adler32_ssse3.o adler32_ssse3.lo \
chunkset_avx.o chunkset_avx.lo \
chunkset_sse2.o chunkset_sse2.lo \
- compare258_avx2.o compare258_avx2.lo \
- compare258_sse42.o compare258_sse42.lo \
+ compare256_avx2.o compare256_avx2.lo \
+ compare256_sse42.o compare256_sse42.lo \
insert_string_sse42.o insert_string_sse42.lo \
crc32_fold_pclmulqdq.o crc32_fold_pclmulqdq.lo \
crc32_fold_vpclmulqdq.o crc32_fold_vpclmulqdq.lo \
chunkset_sse2.lo:
$(CC) $(SFLAGS) $(SSE2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse2.c
-compare258_avx2.o:
- $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_avx2.c
+compare256_avx2.o:
+ $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_avx2.c
-compare258_avx2.lo:
- $(CC) $(SFLAGS) $(AVX2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_avx2.c
+compare256_avx2.lo:
+ $(CC) $(SFLAGS) $(AVX2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_avx2.c
-compare258_sse42.o:
- $(CC) $(CFLAGS) $(SSE42FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_sse42.c
+compare256_sse42.o:
+ $(CC) $(CFLAGS) $(SSE42FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_sse42.c
-compare258_sse42.lo:
- $(CC) $(SFLAGS) $(SSE42FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_sse42.c
+compare256_sse42.lo:
+ $(CC) $(SFLAGS) $(SSE42FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_sse42.c
insert_string_sse42.o:
$(CC) $(CFLAGS) $(SSE42FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse42.c
-/* compare258_avx2.c -- AVX2 version of compare258
+/* compare256_avx2.c -- AVX2 version of compare256
* Copyright Mika T. Lindqvist <postmaster@raasu.org>
* For conditions of distribution and use, see copyright notice in zlib.h
*/
return 256;
}
-static inline uint32_t compare258_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) {
- if (*(uint16_t *)src0 != *(uint16_t *)src1)
- return (*src0 == *src1);
-
- return compare256_unaligned_avx2_static(src0+2, src1+2) + 2;
-}
-
-Z_INTERNAL uint32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char *src1) {
- return compare258_unaligned_avx2_static(src0, src1);
+Z_INTERNAL uint32_t compare256_unaligned_avx2(const unsigned char *src0, const unsigned char *src1) {
+ return compare256_unaligned_avx2_static(src0, src1);
}
#define LONGEST_MATCH longest_match_unaligned_avx2
-/* compare258_sse42.c -- SSE4.2 version of compare258
+/* compare256_sse42.c -- SSE4.2 version of compare256
*
* Copyright (C) 2013 Intel Corporation. All rights reserved.
* Authors:
return 256;
}
-static inline uint32_t compare258_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
- if (*(uint16_t *)src0 != *(uint16_t *)src1)
- return (*src0 == *src1);
-
- return compare256_unaligned_sse4_static(src0+2, src1+2) + 2;
-}
-
-Z_INTERNAL uint32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1) {
- return compare258_unaligned_sse4_static(src0, src1);
+Z_INTERNAL uint32_t compare256_unaligned_sse4(const unsigned char *src0, const unsigned char *src1) {
+ return compare256_unaligned_sse4_static(src0, src1);
}
#define LONGEST_MATCH longest_match_unaligned_sse4
-/* compare258.c -- aligned and unaligned versions of compare258
+/* compare256.c -- 256 byte memory comparison with match length return
* Copyright (C) 2020 Nathan Moinvaziri
* For conditions of distribution and use, see copyright notice in zlib.h
*/
return 256;
}
-static inline uint32_t compare258_c_static(const unsigned char *src0, const unsigned char *src1) {
- if (*src0 != *src1)
- return 0;
- src0 += 1, src1 += 1;
- if (*src0 != *src1)
- return 1;
- src0 += 1, src1 += 1;
-
- return compare256_c_static(src0, src1) + 2;
-}
-
-Z_INTERNAL uint32_t compare258_c(const unsigned char *src0, const unsigned char *src1) {
- return compare258_c_static(src0, src1);
+Z_INTERNAL uint32_t compare256_c(const unsigned char *src0, const unsigned char *src1) {
+ return compare256_c_static(src0, src1);
}
#define LONGEST_MATCH longest_match_c
return 256;
}
-static inline uint32_t compare258_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) {
- if (*(uint16_t *)src0 != *(uint16_t *)src1)
- return (*src0 == *src1);
-
- return compare256_unaligned_16_static(src0+2, src1+2) + 2;
-}
-
-Z_INTERNAL uint32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1) {
- return compare258_unaligned_16_static(src0, src1);
+Z_INTERNAL uint32_t compare256_unaligned_16(const unsigned char *src0, const unsigned char *src1) {
+ return compare256_unaligned_16_static(src0, src1);
}
#define LONGEST_MATCH longest_match_unaligned_16
return 256;
}
-static inline uint32_t compare258_unaligned_32_static(const unsigned char *src0, const unsigned char *src1) {
- if (*(uint16_t *)src0 != *(uint16_t *)src1)
- return (*src0 == *src1);
-
- return compare256_unaligned_32_static(src0+2, src1+2) + 2;
-}
-
-Z_INTERNAL uint32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1) {
- return compare258_unaligned_32_static(src0, src1);
+Z_INTERNAL uint32_t compare256_unaligned_32(const unsigned char *src0, const unsigned char *src1) {
+ return compare256_unaligned_32_static(src0, src1);
}
#define LONGEST_MATCH longest_match_unaligned_32
return 256;
}
-static inline uint32_t compare258_unaligned_64_static(const unsigned char *src0, const unsigned char *src1) {
- if (*(uint16_t *)src0 != *(uint16_t *)src1)
- return (*src0 == *src1);
-
- return compare256_unaligned_64_static(src0+2, src1+2) + 2;
-}
-
-Z_INTERNAL uint32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1) {
- return compare258_unaligned_64_static(src0, src1);
+Z_INTERNAL uint32_t compare256_unaligned_64(const unsigned char *src0, const unsigned char *src1) {
+ return compare256_unaligned_64_static(src0, src1);
}
#define LONGEST_MATCH longest_match_unaligned_64
if test ${HAVE_AVX2_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET"
SFLAGS="${SFLAGS} -DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} slide_hash_avx2.o chunkset_avx.o compare258_avx2.o adler32_avx2.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_hash_avx2.lo chunkset_avx.lo compare258_avx2.lo adler32_avx2.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} slide_hash_avx2.o chunkset_avx.o compare256_avx2.o adler32_avx2.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_hash_avx2.lo chunkset_avx.lo compare256_avx2.lo adler32_avx2.lo"
fi
check_avx512_intrinsics
CFLAGS="${CFLAGS} -DX86_SSE42_CMP_STR"
SFLAGS="${SFLAGS} -DX86_SSE42_CMP_STR"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} compare258_sse42.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} compare258_sse42.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} compare256_sse42.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} compare256_sse42.lo"
fi
check_sse2_intrinsics
dist = (int64_t)s->strstart - hash_head;
if (dist <= MAX_DIST(s) && dist > 0) {
- match_len = functable.compare258(s->window + s->strstart, s->window + hash_head);
+ const uint8_t *str_start = s->window + s->strstart;
+ const uint8_t *match_start = s->window + hash_head;
- if (match_len >= WANT_MIN_MATCH) {
- if (UNLIKELY(match_len > s->lookahead))
- match_len = s->lookahead;
+ if (*(uint16_t *)str_start == *(uint16_t *)match_start) {
+ match_len = functable.compare256(str_start+2, match_start+2) + 2;
- check_match(s, s->strstart, hash_head, match_len);
+ if (match_len >= WANT_MIN_MATCH) {
+ if (UNLIKELY(match_len > s->lookahead))
+ match_len = s->lookahead;
- zng_tr_emit_dist(s, static_ltree, static_dtree, match_len - STD_MIN_MATCH, (uint32_t)dist);
- s->lookahead -= match_len;
- s->strstart += match_len;
- continue;
+ check_match(s, s->strstart, hash_head, match_len);
+
+ zng_tr_emit_dist(s, static_ltree, static_dtree, match_len - STD_MIN_MATCH, (uint32_t)dist);
+ s->lookahead -= match_len;
+ s->strstart += match_len;
+ continue;
+ }
}
}
}
extern uint32_t s390_crc32_vx(uint32_t, const unsigned char *, uint64_t);
#endif
-/* compare258 */
-extern uint32_t compare258_c(const unsigned char *src0, const unsigned char *src1);
+/* compare256 */
+extern uint32_t compare256_c(const unsigned char *src0, const unsigned char *src1);
#ifdef UNALIGNED_OK
-extern uint32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1);
-extern uint32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1);
+extern uint32_t compare256_unaligned_16(const unsigned char *src0, const unsigned char *src1);
+extern uint32_t compare256_unaligned_32(const unsigned char *src0, const unsigned char *src1);
#ifdef UNALIGNED64_OK
-extern uint32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1);
+extern uint32_t compare256_unaligned_64(const unsigned char *src0, const unsigned char *src1);
#endif
#ifdef X86_SSE42_CMP_STR
-extern uint32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1);
+extern uint32_t compare256_unaligned_sse4(const unsigned char *src0, const unsigned char *src1);
#endif
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
-extern uint32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char *src1);
+extern uint32_t compare256_unaligned_avx2(const unsigned char *src0, const unsigned char *src1);
#endif
#endif
return functable.crc32(crc, buf, len);
}
-Z_INTERNAL uint32_t compare258_stub(const unsigned char *src0, const unsigned char *src1) {
+Z_INTERNAL uint32_t compare256_stub(const unsigned char *src0, const unsigned char *src1) {
#ifdef UNALIGNED_OK
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
- functable.compare258 = &compare258_unaligned_64;
+ functable.compare256 = &compare256_unaligned_64;
# elif defined(HAVE_BUILTIN_CTZ)
- functable.compare258 = &compare258_unaligned_32;
+ functable.compare256 = &compare256_unaligned_32;
# else
- functable.compare258 = &compare258_unaligned_16;
+ functable.compare256 = &compare256_unaligned_16;
# endif
# ifdef X86_SSE42_CMP_STR
if (x86_cpu_has_sse42)
- functable.compare258 = &compare258_unaligned_sse4;
+ functable.compare256 = &compare256_unaligned_sse4;
# endif
# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
if (x86_cpu_has_avx2)
- functable.compare258 = &compare258_unaligned_avx2;
+ functable.compare256 = &compare256_unaligned_avx2;
# endif
#else
- functable.compare258 = &compare258_c;
+ functable.compare256 = &compare256_c;
#endif
- return functable.compare258(src0, src1);
+ return functable.compare256(src0, src1);
}
Z_INTERNAL uint32_t longest_match_stub(deflate_state *const s, Pos cur_match) {
crc32_fold_copy_stub,
crc32_fold_final_stub,
slide_hash_stub,
- compare258_stub,
+ compare256_stub,
longest_match_stub,
longest_match_slow_stub,
chunksize_stub,
void (* crc32_fold_copy) (crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
uint32_t (* crc32_fold_final) (crc32_fold *crc);
void (* slide_hash) (deflate_state *s);
- uint32_t (* compare258) (const unsigned char *src0, const unsigned char *src1);
+ uint32_t (* compare256) (const unsigned char *src0, const unsigned char *src1);
uint32_t (* longest_match) (deflate_state *const s, Pos cur_match);
uint32_t (* longest_match_slow) (deflate_state *const s, Pos cur_match);
uint32_t (* chunksize) (void);
adler32.obj \
armfeature.obj \
chunkset.obj \
- compare258.obj \
+ compare256.obj \
compress.obj \
crc32.obj \
crc32_comb.obj \
adler32.obj \
armfeature.obj \
chunkset.obj \
- compare258.obj \
+ compare256.obj \
compress.obj \
crc32.obj \
crc32_comb.obj \
chunkset.obj \
chunkset_avx.obj \
chunkset_sse2.obj \
- compare258.obj \
- compare258_avx2.obj \
- compare258_sse42.obj \
+ compare256.obj \
+ compare256_avx2.obj \
+ compare256_sse42.obj \
compress.obj \
crc32.obj \
crc32_comb.obj \