check_neon_intrinsics()
if(MFPU_NEON_AVAILABLE)
add_definitions(-DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH)
- set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c ${ARCHDIR}/slide_neon.c)
+ set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c ${ARCHDIR}/slide_hash_neon.c)
list(APPEND ZLIB_ARCH_SRCS ${NEON_SRCS})
set_property(SOURCE ${NEON_SRCS} PROPERTY COMPILE_FLAGS "${NEONFLAG} ${NOLTOFLAG}")
if(MSVC)
add_definitions(-DPOWER8_VSX_SLIDEHASH)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power.c)
- set(POWER8_SRCS ${ARCHDIR}/adler32_power8.c ${ARCHDIR}/chunkset_power8.c ${ARCHDIR}/slide_power8.c)
+ set(POWER8_SRCS ${ARCHDIR}/adler32_power8.c ${ARCHDIR}/chunkset_power8.c ${ARCHDIR}/slide_hash_power8.c)
list(APPEND ZLIB_ARCH_SRCS ${POWER8_SRCS})
set_property(SOURCE ${POWER8_SRCS} PROPERTY COMPILE_FLAGS "${POWER8FLAG} ${NOLTOFLAG}")
else()
check_avx2_intrinsics()
if(HAVE_AVX2_INTRIN)
add_definitions(-DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET)
- set(AVX2_SRCS ${ARCHDIR}/slide_avx.c)
+ set(AVX2_SRCS ${ARCHDIR}/slide_hash_avx.c)
add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"")
list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx.c)
add_feature_info(AVX_CHUNKSET 1 "Support AVX optimized chunkset, using \"${AVX2FLAG}\"")
check_sse2_intrinsics()
if(HAVE_SSE2_INTRIN)
add_definitions(-DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE2_SLIDEHASH)
- set(SSE2_SRCS ${ARCHDIR}/chunkset_sse.c ${ARCHDIR}/slide_sse.c)
+ set(SSE2_SRCS ${ARCHDIR}/chunkset_sse.c ${ARCHDIR}/slide_hash_sse.c)
list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
if(NOT ${ARCH} MATCHES "x86_64")
set_property(SOURCE ${SSE2_SRCS} PROPERTY COMPILE_FLAGS "${SSE2FLAG} ${NOLTOFLAG}")
armfeature.o armfeature.lo \
chunkset_neon.o chunkset_neon.lo \
crc32_acle.o crc32_acle.lo \
- slide_neon.o slide_neon.lo \
+ slide_hash_neon.o slide_hash_neon.lo \
insert_string_acle.o insert_string_acle.lo
adler32_neon.o:
crc32_acle.lo:
$(CC) $(SFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
-slide_neon.o:
- $(CC) $(CFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_neon.c
+slide_hash_neon.o:
+ $(CC) $(CFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_neon.c
-slide_neon.lo:
- $(CC) $(SFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_neon.c
+slide_hash_neon.lo:
+ $(CC) $(SFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_neon.c
insert_string_acle.o:
$(CC) $(CFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
-/* slide_neon.c -- Optimized hash table shifting for ARM with support for NEON instructions
+/* slide_hash_neon.c -- Optimized hash table shifting for ARM with support for NEON instructions
* Copyright (C) 2017-2020 Mika T. Lindqvist
*
* Authors:
adler32_power8.lo \
chunkset_power8.o \
chunkset_power8.lo \
- slide_power8.o \
- slide_power8.lo
+ slide_hash_power8.o \
+ slide_hash_power8.lo
power.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c
chunkset_power8.lo:
$(CC) $(SFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_power8.c
-slide_power8.o:
- $(CC) $(CFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_power8.c
+slide_hash_power8.o:
+ $(CC) $(CFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_power8.c
-slide_power8.lo:
- $(CC) $(SFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_power8.c
+slide_hash_power8.lo:
+ $(CC) $(SFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_power8.c
mostlyclean: clean
clean:
|:-|:-|
|deflate_quick.c|SSE4 optimized deflate strategy for use as level 1|
|crc_folding.c|SSE4 + PCLMULQDQ optimized CRC folding implementation|
-|slide_sse2.c|SSE2 optimized slide_hash|
+|slide_hash_sse2.c|SSE2 optimized slide_hash|
compare258_sse.o compare258_sse.lo \
insert_string_sse.o insert_string_sse.lo \
crc_folding.o crc_folding.lo \
- slide_avx.o slide_avx.lo \
- slide_sse.o slide_sse.lo
+ slide_hash_avx.o slide_hash_avx.lo \
+ slide_hash_sse.o slide_hash_sse.lo
x86.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
crc_folding.lo:
$(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE4FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c
-slide_avx.o:
- $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_avx.c
+slide_hash_avx.o:
+ $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_avx.c
-slide_avx.lo:
- $(CC) $(SFLAGS) $(AVX2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_avx.c
+slide_hash_avx.lo:
+ $(CC) $(SFLAGS) $(AVX2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_avx.c
-slide_sse.o:
- $(CC) $(CFLAGS) $(SSE2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c
+slide_hash_sse.o:
+ $(CC) $(CFLAGS) $(SSE2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_sse.c
-slide_sse.lo:
- $(CC) $(SFLAGS) $(SSE2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c
+slide_hash_sse.lo:
+ $(CC) $(SFLAGS) $(SSE2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_sse.c
adler32_avx.o: $(SRCDIR)/adler32_avx.c
$(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx.c
if test ${HAVE_AVX2_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET"
SFLAGS="${SFLAGS} -DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} slide_avx.o chunkset_avx.o compare258_avx.o adler32_avx.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_avx.lo chunkset_avx.lo compare258_avx.lo adler32_avx.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} slide_hash_avx.o chunkset_avx.o compare258_avx.o adler32_avx.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_hash_avx.lo chunkset_avx.lo compare258_avx.lo adler32_avx.lo"
fi
check_sse4_intrinsics
if test ${HAVE_SSE2_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET"
SFLAGS="${SFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} chunkset_sse.o slide_sse.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} chunkset_sse.lo slide_sse.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} chunkset_sse.o slide_hash_sse.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} chunkset_sse.lo slide_hash_sse.lo"
if test $forcesse2 -eq 1; then
CFLAGS="${CFLAGS} -DX86_NOCHECK_SSE2"
CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_hash_neon.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_hash_neon.lo"
fi
fi
;;
CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_hash_neon.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_hash_neon.lo"
fi
fi
;;
CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_hash_neon.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_hash_neon.lo"
fi
fi
;;
fi
CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_hash_neon.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_hash_neon.lo"
fi
fi
CFLAGS="${CFLAGS} -DPOWER8 -DPOWER_FEATURES -DPOWER8_VSX_ADLER32 -DPOWER8_VSX_CHUNKSET -DPOWER8_VSX_SLIDEHASH"
SFLAGS="${SFLAGS} -DPOWER8 -DPOWER_FEATURES -DPOWER8_VSX_ADLER32 -DPOWER8_VSX_CHUNKSET -DPOWER8_VSX_SLIDEHASH"
- ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} power.o adler32_power8.o chunkset_power8.o slide_power8.o"
- ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} power.lo adler32_power8.lo chunkset_power8.lo slide_power8.lo"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} power.o adler32_power8.o chunkset_power8.o slide_hash_power8.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} power.lo adler32_power8.lo chunkset_power8.lo slide_hash_power8.lo"
fi
fi
;;
-DARM_NEON_SLIDEHASH \
-DARM_NOCHECK_NEON \
#
-OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj adler32_neon.obj chunkset_neon.obj slide_neon.obj
+OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj adler32_neon.obj chunkset_neon.obj slide_hash_neon.obj
# targets
all: $(STATICLIB) $(SHAREDLIB) $(IMPLIB) \
inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h
inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
slide_hash.obj: $(SRCDIR)/slide_hash.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
+slide_hash_neon.obj: $(SRCDIR)/arch/arm/slide_hash_neon.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees_tbl.h
zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/zutil_p.h
-DARM_NEON_SLIDEHASH \
-DARM_NOCHECK_NEON \
#
-OBJS = $(OBJS) adler32_neon.obj chunkset_neon.obj slide_neon.obj
+OBJS = $(OBJS) adler32_neon.obj chunkset_neon.obj slide_hash_neon.obj
!endif
# targets
insert_string_roll.obj \
insert_string_sse.obj \
slide_hash.obj \
- slide_avx.obj \
- slide_sse.obj \
+ slide_hash_avx.obj \
+ slide_hash_sse.obj \
trees.obj \
uncompr.obj \
zutil.obj \
inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h
inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
slide_hash.obj: $(SRCDIR)/slide_hash.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
-slide_sse.obj: $(SRCDIR)/arch/x86/slide_sse.c $(SRCDIR)/deflate.h
+slide_hash_avx.obj: $(SRCDIR)/arch/x86/slide_hash_avx.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
+slide_hash_sse.obj: $(SRCDIR)/arch/x86/slide_hash_sse.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees_tbl.h
zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/zutil_p.h