add_definitions(-DX86_AVX2)
set(AVX2_SRCS ${ARCHDIR}/slide_hash_avx2.c)
add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"")
- list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx.c)
- add_feature_info(AVX_CHUNKSET 1 "Support AVX optimized chunkset, using \"${AVX2FLAG}\"")
+ list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx2.c)
+ add_feature_info(AVX2_CHUNKSET 1 "Support AVX2 optimized chunkset, using \"${AVX2FLAG}\"")
list(APPEND AVX2_SRCS ${ARCHDIR}/compare256_avx2.c)
add_feature_info(AVX2_COMPARE256 1 "Support AVX2 optimized compare256, using \"${AVX2FLAG}\"")
list(APPEND AVX2_SRCS ${ARCHDIR}/adler32_avx2.c)
adler32_avx512_vnni.o adler32_avx512_vnni.lo \
adler32_sse42.o adler32_sse42.lo \
adler32_ssse3.o adler32_ssse3.lo \
- chunkset_avx.o chunkset_avx.lo \
+ chunkset_avx2.o chunkset_avx2.lo \
chunkset_sse2.o chunkset_sse2.lo \
chunkset_ssse3.o chunkset_ssse3.lo \
compare256_avx2.o compare256_avx2.lo \
x86_features.lo:
$(CC) $(SFLAGS) $(XSAVEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/x86_features.c
-chunkset_avx.o:
- $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_avx.c
+chunkset_avx2.o:
+ $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_avx2.c
-chunkset_avx.lo:
- $(CC) $(SFLAGS) $(AVX2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_avx.c
+chunkset_avx2.lo:
+ $(CC) $(SFLAGS) $(AVX2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_avx2.c
chunkset_sse2.o:
$(CC) $(CFLAGS) $(SSE2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse2.c
-/* chunkset_avx.c -- AVX inline functions to copy small data chunks.
+/* chunkset_avx2.c -- AVX2 inline functions to copy small data chunks.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
return ret_vec;
}
-#define CHUNKSIZE chunksize_avx
-#define CHUNKCOPY chunkcopy_avx
-#define CHUNKUNROLL chunkunroll_avx
-#define CHUNKMEMSET chunkmemset_avx
-#define CHUNKMEMSET_SAFE chunkmemset_safe_avx
+#define CHUNKSIZE chunksize_avx2
+#define CHUNKCOPY chunkcopy_avx2
+#define CHUNKUNROLL chunkunroll_avx2
+#define CHUNKMEMSET chunkmemset_avx2
+#define CHUNKMEMSET_SAFE chunkmemset_safe_avx2
#include "chunkset_tpl.h"
-#define INFLATE_FAST inflate_fast_avx
+#define INFLATE_FAST inflate_fast_avx2
#include "inffast_tpl.h"
if test ${HAVE_AVX2_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_AVX2"
SFLAGS="${SFLAGS} -DX86_AVX2"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} slide_hash_avx2.o chunkset_avx.o compare256_avx2.o adler32_avx2.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_hash_avx2.lo chunkset_avx.lo compare256_avx2.lo adler32_avx2.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} slide_hash_avx2.o chunkset_avx2.o compare256_avx2.o adler32_avx2.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_hash_avx2.lo chunkset_avx2.lo compare256_avx2.lo adler32_avx2.lo"
fi
check_avx512_intrinsics
extern uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#endif
#ifdef X86_AVX2
-extern uint32_t chunksize_avx(void);
-extern uint8_t* chunkmemset_safe_avx(uint8_t *out, unsigned dist, unsigned len, unsigned left);
+extern uint32_t chunksize_avx2(void);
+extern uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#endif
#ifdef ARM_NEON
extern uint32_t chunksize_neon(void);
extern void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
#endif
#ifdef X86_AVX2
-extern void inflate_fast_avx(PREFIX3(stream) *strm, uint32_t start);
+extern void inflate_fast_avx2(PREFIX3(stream) *strm, uint32_t start);
#endif
#ifdef ARM_NEON
extern void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start);
if (cf.x86.has_avx2) {
ft.adler32 = &adler32_avx2;
ft.adler32_fold_copy = &adler32_fold_copy_avx2;
- ft.chunkmemset_safe = &chunkmemset_safe_avx;
- ft.chunksize = &chunksize_avx;
- ft.inflate_fast = &inflate_fast_avx;
+ ft.chunkmemset_safe = &chunkmemset_safe_avx2;
+ ft.chunksize = &chunksize_avx2;
+ ft.inflate_fast = &inflate_fast_avx2;
ft.slide_hash = &slide_hash_avx2;
# ifdef HAVE_BUILTIN_CTZ
ft.compare256 = &compare256_avx2;
adler32_ssse3.obj \
adler32_fold.obj \
chunkset.obj \
- chunkset_avx.obj \
+ chunkset_avx2.obj \
chunkset_sse2.obj \
chunkset_ssse3.obj \
compare256.obj \
compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h
uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h
chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
-chunkset_avx.obj: $(SRCDIR)/arch/x86/chunkset_avx.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
+chunkset_avx2.obj: $(SRCDIR)/arch/x86/chunkset_avx2.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
chunkset_sse2.obj: $(SRCDIR)/arch/x86/chunkset_sse2.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
chunkset_ssse3.obj: $(SRCDIR)/arch/x86/chunkset_ssse3.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
cpu_features.obj: $(SRCDIR)/cpu_features.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h