Also removed a reference to a nonexistant adler32_sse41 in test/test_adler32.cc.
if(WITH_ACLE AND NOT "${ARCH}" MATCHES "armv[2-7]")
check_acle_compiler_flag()
if(HAVE_ACLE_FLAG)
- add_definitions(-DARM_ACLE_CRC_HASH)
+ add_definitions(-DARM_ACLE)
set(ACLE_SRCS ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c)
set_property(SOURCE ${ACLE_SRCS} PROPERTY COMPILE_FLAGS "${ACLEFLAG} ${NOLTOFLAG}")
list(APPEND ZLIB_ARCH_SRCS ${ACLE_SRCS})
if(WITH_NEON)
check_neon_compiler_flag()
if(MFPU_NEON_AVAILABLE)
- add_definitions(-DARM_NEON -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH)
+ add_definitions(-DARM_NEON)
set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c
${ARCHDIR}/compare256_neon.c ${ARCHDIR}/slide_hash_neon.c)
list(APPEND ZLIB_ARCH_SRCS ${NEON_SRCS})
if(HAVE_VMX)
add_definitions(-DPPC_FEATURES)
if(HAVE_ALTIVEC)
- add_definitions(-DPPC_VMX_ADLER32)
- add_definitions(-DPPC_VMX_SLIDEHASH)
+ add_definitions(-DPPC_VMX)
set(PPC_SRCS ${ARCHDIR}/adler32_vmx.c ${ARCHDIR}/slide_hash_vmx.c)
list(APPEND ZLIB_ARCH_SRCS ${PPC_SRCS})
add_feature_info(ALTIVEC 1 "Support the AltiVec instruction set, using \"-maltivec\"")
# Power8 specific options and files
if(WITH_POWER8)
if(HAVE_POWER8_INTRIN)
- add_definitions(-DPOWER8)
+ add_definitions(-DPOWER8_VSX)
add_definitions(-DPOWER_FEATURES)
- add_definitions(-DPOWER8_VSX_ADLER32)
- add_definitions(-DPOWER8_VSX_CHUNKSET)
- add_definitions(-DPOWER8_VSX_SLIDEHASH)
set(POWER8_SRCS ${ARCHDIR}/adler32_power8.c ${ARCHDIR}/chunkset_power8.c ${ARCHDIR}/slide_hash_power8.c)
if("${ARCH}" MATCHES "powerpc64(le)?")
add_definitions(-DPOWER8_VSX_CRC32)
if(WITH_AVX2)
check_avx2_intrinsics()
if(HAVE_AVX2_INTRIN)
- add_definitions(-DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET)
+ add_definitions(-DX86_AVX2)
set(AVX2_SRCS ${ARCHDIR}/slide_hash_avx2.c)
add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"")
list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx.c)
if(WITH_AVX512)
check_avx512_intrinsics()
if(HAVE_AVX512_INTRIN)
- add_definitions(-DX86_AVX512 -DX86_AVX512_ADLER32)
+ add_definitions(-DX86_AVX512)
list(APPEND AVX512_SRCS ${ARCHDIR}/adler32_avx512.c)
add_feature_info(AVX512_ADLER32 1 "Support AVX512-accelerated adler32, using \"${AVX512FLAG}\"")
list(APPEND ZLIB_ARCH_SRCS ${AVX512_SRCS})
if(WITH_AVX512VNNI)
check_avx512vnni_intrinsics()
if(HAVE_AVX512VNNI_INTRIN)
- add_definitions(-DX86_AVX512VNNI -DX86_AVX512VNNI_ADLER32)
+ add_definitions(-DX86_AVX512VNNI)
add_feature_info(AVX512VNNI_ADLER32 1 "Support AVX512VNNI adler32, using \"${AVX512VNNIFLAG}\"")
list(APPEND AVX512VNNI_SRCS ${ARCHDIR}/adler32_avx512_vnni.c)
list(APPEND ZLIB_ARCH_SRCS ${AVX512VNNI_SRCS})
if(WITH_SSE42)
check_sse42_intrinsics()
if(HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN)
- add_definitions(-DX86_SSE42_CRC_HASH -DX86_SSE42_ADLER32)
+ add_definitions(-DX86_SSE42)
set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c)
add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE42FLAG}\"")
list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
if(WITH_SSE2)
check_sse2_intrinsics()
if(HAVE_SSE2_INTRIN)
- add_definitions(-DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE2_SLIDEHASH)
+ add_definitions(-DX86_SSE2)
set(SSE2_SRCS ${ARCHDIR}/chunkset_sse2.c ${ARCHDIR}/compare256_sse2.c ${ARCHDIR}/slide_hash_sse2.c)
list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
if(NOT ${ARCH} MATCHES "x86_64")
if(WITH_SSSE3)
check_ssse3_intrinsics()
if(HAVE_SSSE3_INTRIN)
- add_definitions(-DX86_SSSE3 -DX86_SSSE3_ADLER32)
+ add_definitions(-DX86_SSSE3)
set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c)
add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"")
list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS})
* Adam Stylinski <kungfujesus06@gmail.com>
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifdef ARM_NEON_ADLER32
+#ifdef ARM_NEON
#include "neon_intrins.h"
#include "../../zbuild.h"
#include "../../adler32_p.h"
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifdef ARM_NEON_CHUNKSET
+#ifdef ARM_NEON
#include "neon_intrins.h"
#include "../../zbuild.h"
#include "../generic/chunk_permute_table.h"
*
*/
-#ifdef ARM_ACLE_CRC_HASH
+#ifdef ARM_ACLE
#ifdef _MSC_VER
# include <intrin.h>
#else
*
*/
-#ifdef ARM_ACLE_CRC_HASH
+#ifdef ARM_ACLE
#ifndef _MSC_VER
# include <arm_acle.h>
#endif
# include <arm_neon.h>
#endif
-#if defined(ARM_NEON_ADLER32) && !defined(__aarch64__) && !defined(_M_ARM64)
+#if defined(ARM_NEON) && !defined(__aarch64__) && !defined(_M_ARM64)
/* Compatibility shim for the _high family of functions */
#define vmull_high_u8(a, b) vmull_u8(vget_high_u8(a), vget_high_u8(b))
#define vmlal_high_u8(a, b, c) vmlal_u8(a, vget_high_u8(b), vget_high_u8(c))
#define vaddw_high_u8(a, b) vaddw_u8(a, vget_high_u8(b))
#endif
-#ifdef ARM_NEON_SLIDEHASH
+#ifdef ARM_NEON
#define vqsubq_u16_x4_x1(out, a, b) do { \
out.val[0] = vqsubq_u16(a.val[0], b); \
out.val[3] = vqsubq_u16(a.val[3], b); \
} while (0)
-#endif
-#if !defined(ARM_NEON_HASLD4) && (defined(ARM_NEON_ADLER32) || defined(ARM_NEON_SLIDEHASH))
+# ifndef ARM_NEON_HASLD4
static inline uint16x8x4_t vld1q_u16_x4(uint16_t const *a) {
uint16x8x4_t ret = (uint16x8x4_t) {{
vst1q_u16(p + 16, a.val[2]);
vst1q_u16(p + 24, a.val[3]);
}
-#endif // HASLD4 check
+# endif // HASLD4 check
+#endif
#endif // include guard ARM_NEON_INTRINS_H
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#if defined(ARM_NEON_SLIDEHASH)
+#ifdef ARM_NEON
#include "neon_intrins.h"
#include "../../zbuild.h"
#include "../../deflate.h"
* https://www.ietf.org/rfc/rfc1950.txt
*/
-#ifdef POWER8_VSX_ADLER32
+#ifdef POWER8_VSX
#include <altivec.h>
#include "zbuild.h"
return adler32_len_16(s1, buf, len, s2);
}
-#endif /* POWER8_VSX_ADLER32 */
+#endif /* POWER8_VSX */
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifdef PPC_VMX_ADLER32
+#ifdef PPC_VMX
#include <altivec.h>
#include "zbuild.h"
#include "adler32_p.h"
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifdef POWER8_VSX_CHUNKSET
+#ifdef POWER8_VSX
#include <altivec.h>
#include "../../zbuild.h"
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifdef POWER8_VSX_SLIDEHASH
+#ifdef POWER8_VSX
#define SLIDE_PPC slide_hash_power8
#include "slide_ppc_tpl.h"
-#endif /* POWER8_VSX_SLIDEHASH */
+#endif /* POWER8_VSX */
* Copyright (C) 2017-2021 Mika T. Lindqvist <postmaster@raasu.org>
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifdef PPC_VMX_SLIDEHASH
+#ifdef PPC_VMX
#define SLIDE_PPC slide_hash_vmx
#include "slide_ppc_tpl.h"
-#endif /* PPC_VMX_SLIDEHASH */
+#endif /* PPC_VMX */
#include <immintrin.h>
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
#include "adler32_avx2_tpl.h"
#ifndef ADLER32_AVX2_P_H_
#define ADLER32_AVX2_P_H_
-#if defined(X86_AVX2_ADLER32) || defined(X86_AVX512VNNI_ADLER32)
+#if defined(X86_AVX2) || defined(X86_AVX512VNNI)
/* 32 bit horizontal sum, adapted from Agner Fog's vector library. */
static inline uint32_t hsum256(__m256i x) {
#include "../../fallback_builtins.h"
#include "adler32_avx2_p.h"
-#ifdef X86_SSE42_ADLER32
+#ifdef X86_SSE42
extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, size_t len);
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
#include "adler32_avx512_tpl.h"
#include <immintrin.h>
#include "adler32_avx512_p.h"
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
#ifdef COPY
Z_INTERNAL uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
_mm512_mask_storeu_epi8(dst, storemask, copy_vec);
#endif
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
return adler32_avx2(adler, src, len);
-#elif defined(X86_SSSE3_ADLER32)
+#elif defined(X86_SSSE3)
return adler32_ssse3(adler, src, len);
#else
return adler32_len_16(adler0, src, len, adler1);
* For conditions of distribution and use, see copyright notice in zlib.h
*/
-#ifdef X86_AVX512VNNI_ADLER32
+#ifdef X86_AVX512VNNI
#include "../../zbuild.h"
#include "../../adler32_p.h"
rem_peel:
if (len < 32)
-#if defined(X86_SSSE3_ADLER32)
+#if defined(X86_SSSE3)
return adler32_ssse3(adler, src, len);
#else
return adler32_len_16(adler0, src, len, adler1);
#endif
if (len < 64)
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
return adler32_avx2(adler, src, len);
-#elif defined(X86_SSE3_ADLER32)
+#elif defined(X86_SSE3)
return adler32_ssse3(adler, src, len);
#else
return adler32_len_16(adler0, src, len, adler1);
__m256i copy_vec = _mm256_maskz_loadu_epi8(storemask, src);
_mm256_mask_storeu_epi8(dst, storemask, copy_vec);
-#if defined(X86_SSSE3_ADLER32)
+#if defined(X86_SSSE3)
return adler32_ssse3(adler, src, len);
#else
return adler32_len_16(adler0, src, len, adler1);
#include "adler32_ssse3_p.h"
#include <immintrin.h>
-#ifdef X86_SSE42_ADLER32
+#ifdef X86_SSE42
Z_INTERNAL uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
uint32_t adler0, adler1;
#include "../../adler32_p.h"
#include "adler32_ssse3_p.h"
-#ifdef X86_SSSE3_ADLER32
+#ifdef X86_SSSE3
#include <immintrin.h>
#ifndef ADLER32_SSSE3_P_H_
#define ADLER32_SSSE3_P_H_
-#ifdef X86_SSSE3_ADLER32
+#ifdef X86_SSSE3
#include <immintrin.h>
#include <stdint.h>
*/
#include "zbuild.h"
-#ifdef X86_AVX_CHUNKSET
+#ifdef X86_AVX2
#include <immintrin.h>
#include "../generic/chunk_permute_table.h"
#define INSERT_STRING insert_string_sse4
#define QUICK_INSERT_STRING quick_insert_string_sse4
-#ifdef X86_SSE42_CRC_HASH
+#ifdef X86_SSE42
# include "../../insert_string_tpl.h"
#endif
check_avx2_intrinsics
if test ${HAVE_AVX2_INTRIN} -eq 1; then
- CFLAGS="${CFLAGS} -DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET"
- SFLAGS="${SFLAGS} -DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET"
+ CFLAGS="${CFLAGS} -DX86_AVX2"
+ SFLAGS="${SFLAGS} -DX86_AVX2"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} slide_hash_avx2.o chunkset_avx.o compare256_avx2.o adler32_avx2.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_hash_avx2.lo chunkset_avx.lo compare256_avx2.lo adler32_avx2.lo"
fi
check_avx512_intrinsics
if test ${HAVE_AVX512_INTRIN} -eq 1; then
- CFLAGS="${CFLAGS} -DX86_AVX512 -DX86_AVX512_ADLER32"
- SFLAGS="${SFLAGS} -DX86_AVX512 -DX86_AVX512_ADLER32"
+ CFLAGS="${CFLAGS} -DX86_AVX512"
+ SFLAGS="${SFLAGS} -DX86_AVX512"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_avx512.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_avx512.lo"
check_avx512vnni_intrinsics
if test ${HAVE_AVX512VNNI_INTRIN} -eq 1; then
- CFLAGS="${CFLAGS} -DX86_AVX512VNNI -DX86_AVX512VNNI_ADLER32"
- SFLAGS="${SFLAGS} -DX86_AVX512VNNI -DX86_AVX512VNNI_ADLER32"
+ CFLAGS="${CFLAGS} -DX86_AVX512VNNI"
+ SFLAGS="${SFLAGS} -DX86_AVX512VNNI"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_avx512_vnni.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_avx512_vnni.lo"
fi
check_sse42_intrinsics
if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then
- CFLAGS="${CFLAGS} -DX86_SSE42_CRC_HASH -DX86_SSE42_ADLER32"
- SFLAGS="${SFLAGS} -DX86_SSE42_CRC_HASH -DX86_SSE42_ADLER32"
+ CFLAGS="${CFLAGS} -DX86_SSE42"
+ SFLAGS="${SFLAGS} -DX86_SSE42"
if test ${HAVE_SSE42CRC_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE42_CRC_INTRIN"
check_sse2_intrinsics
if test ${HAVE_SSE2_INTRIN} -eq 1; then
- CFLAGS="${CFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET"
- SFLAGS="${SFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET"
+ CFLAGS="${CFLAGS} -DX86_SSE2"
+ SFLAGS="${SFLAGS} -DX86_SSE2"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} chunkset_sse2.o compare256_sse2.o slide_hash_sse2.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} chunkset_sse2.lo compare256_sse2.lo slide_hash_sse2.lo"
check_ssse3_intrinsics
if test ${HAVE_SSSE3_INTRIN} -eq 1; then
- CFLAGS="${CFLAGS} -DX86_SSSE3 -DX86_SSSE3_ADLER32"
- SFLAGS="${SFLAGS} -DX86_SSSE3 -DX86_SSSE3_ADLER32"
+ CFLAGS="${CFLAGS} -DX86_SSSE3"
+ SFLAGS="${SFLAGS} -DX86_SSSE3"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_ssse3.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_ssse3.lo"
fi
SFLAGS="${SFLAGS} -DARM_NEON_HASLD4"
fi
- CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
- SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
-
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o compare256_neon.o slide_hash_neon.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo compare256_neon.lo slide_hash_neon.lo"
fi
SFLAGS="${SFLAGS} -DARM_NEON_HASLD4"
fi
- CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
- SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
-
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o compare256_neon.o slide_hash_neon.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo compare256_neon.lo slide_hash_neon.lo"
fi
if test $without_optimizations -eq 0; then
if test $ACLE_AVAILABLE -eq 1; then
- CFLAGS="${CFLAGS} -DARM_ACLE_CRC_HASH"
- SFLAGS="${SFLAGS} -DARM_ACLE_CRC_HASH"
+ CFLAGS="${CFLAGS} -DARM_ACLE"
+ SFLAGS="${SFLAGS} -DARM_ACLE"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_acle.o insert_string_acle.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_acle.lo insert_string_acle.lo"
SFLAGS="${SFLAGS} -DARM_NEON_HASLD4"
fi
- CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
- SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
-
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o compare256_neon.o slide_hash_neon.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo compare256_neon.lo slide_hash_neon.lo"
fi
if test $native -eq 0; then
ARCH="${ARCH}+crc"
fi
- CFLAGS="${CFLAGS} -DARM_ACLE_CRC_HASH"
- SFLAGS="${SFLAGS} -DARM_ACLE_CRC_HASH"
+ CFLAGS="${CFLAGS} -DARM_ACLE"
+ SFLAGS="${SFLAGS} -DARM_ACLE"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_acle.o insert_string_acle.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_acle.lo insert_string_acle.lo"
fi
if test $native -eq 0; then
ARCH="${ARCH}+simd"
fi
- CFLAGS="${CFLAGS} -DARM_NEON -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
- SFLAGS="${SFLAGS} -DARM_NEON -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
+ CFLAGS="${CFLAGS} -DARM_NEON"
+ SFLAGS="${SFLAGS} -DARM_NEON"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o compare256_neon.o slide_hash_neon.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo compare256_neon.lo slide_hash_neon.lo"
fi
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} power_features.lo"
fi
if test $HAVE_VMX -eq 1 -a $HAVE_ALTIVEC_INTRIN -eq 1; then
- CFLAGS="${CFLAGS} -DPPC_VMX_ADLER32 -DPPC_VMX_SLIDEHASH"
- SFLAGS="${SFLAGS} -DPPC_VMX_ADLER32 -DPPC_VMX_SLIDEHASH"
+ CFLAGS="${CFLAGS} -DPPC_VMX"
+ SFLAGS="${SFLAGS} -DPPC_VMX"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_vmx.o slide_hash_vmx.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_vmx.lo slide_hash_vmx.lo"
fi
if test $HAVE_POWER8_INTRIN -eq 1; then
- CFLAGS="${CFLAGS} -DPOWER8 -DPOWER_FEATURES -DPOWER8_VSX_ADLER32 -DPOWER8_VSX_CHUNKSET -DPOWER8_VSX_SLIDEHASH"
- SFLAGS="${SFLAGS} -DPOWER8 -DPOWER_FEATURES -DPOWER8_VSX_ADLER32 -DPOWER8_VSX_CHUNKSET -DPOWER8_VSX_SLIDEHASH"
+ CFLAGS="${CFLAGS} -DPOWER8_VSX -DPOWER_FEATURES"
+ SFLAGS="${SFLAGS} -DPOWER8_VSX -DPOWER_FEATURES"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_power8.o chunkset_power8.o slide_hash_power8.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_power8.lo chunkset_power8.lo slide_hash_power8.lo"
typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);
extern uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len);
-#ifdef ARM_NEON_ADLER32
+#ifdef ARM_NEON
extern uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
#endif
-#ifdef PPC_VMX_ADLER32
+#ifdef PPC_VMX
extern uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len);
#endif
-#ifdef X86_SSSE3_ADLER32
+#ifdef X86_SSSE3
extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
#endif
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
extern uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
#endif
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
extern uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len);
#endif
-#ifdef X86_AVX512VNNI_ADLER32
+#ifdef X86_AVX512VNNI
extern uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len);
#endif
-#ifdef POWER8_VSX_ADLER32
+#ifdef POWER8_VSX
extern uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len);
#endif
/* adler32 folding */
-#ifdef X86_SSE42_ADLER32
+#ifdef X86_SSE42
extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
#endif
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
#endif
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
#endif
-#ifdef X86_AVX512VNNI_ADLER32
+#ifdef X86_AVX512VNNI
extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
#endif
/* memory chunking */
extern uint32_t chunksize_c(void);
extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left);
-#ifdef X86_SSE2_CHUNKSET
+#ifdef X86_SSE2
extern uint32_t chunksize_sse2(void);
extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#endif
#ifdef X86_SSE41
extern uint8_t* chunkmemset_safe_sse41(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#endif
-#ifdef X86_AVX_CHUNKSET
+#ifdef X86_AVX2
extern uint32_t chunksize_avx(void);
extern uint8_t* chunkmemset_safe_avx(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#endif
-#ifdef ARM_NEON_CHUNKSET
+#ifdef ARM_NEON
extern uint32_t chunksize_neon(void);
extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#endif
-#ifdef POWER8_VSX_CHUNKSET
+#ifdef POWER8_VSX
extern uint32_t chunksize_power8(void);
extern uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#endif
/* inflate fast loop */
extern void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
-#ifdef X86_SSE2_CHUNKSET
+#ifdef X86_SSE2
extern void inflate_fast_sse2(PREFIX3(stream) *strm, uint32_t start);
#endif
#ifdef X86_SSE41
extern void inflate_fast_sse41(PREFIX3(stream) *strm, uint32_t start);
#endif
-#ifdef X86_AVX_CHUNKSET
+#ifdef X86_AVX2
extern void inflate_fast_avx(PREFIX3(stream) *strm, uint32_t start);
#endif
-#ifdef ARM_NEON_CHUNKSET
+#ifdef ARM_NEON
extern void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start);
#endif
-#ifdef POWER8_VSX_CHUNKSET
+#ifdef POWER8_VSX
extern void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start);
#endif
typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len);
extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
-#ifdef ARM_ACLE_CRC_HASH
+#ifdef ARM_ACLE
extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
-#elif defined(POWER8_VSX_CRC32)
+#elif defined(POWER8_VSX)
extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len);
#elif defined(S390_CRC32_VX)
extern uint32_t PREFIX(s390_crc32_vx)(uint32_t crc, const uint8_t *buf, size_t len);
#ifdef DEFLATE_H_
/* insert_string */
extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count);
-#ifdef X86_SSE42_CRC_HASH
+#ifdef X86_SSE42
extern void insert_string_sse4(deflate_state *const s, const uint32_t str, uint32_t count);
-#elif defined(ARM_ACLE_CRC_HASH)
+#elif defined(ARM_ACLE)
extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
#endif
/* quick_insert_string */
extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
-#ifdef X86_SSE42_CRC_HASH
+#ifdef X86_SSE42
extern Pos quick_insert_string_sse4(deflate_state *const s, const uint32_t str);
-#elif defined(ARM_ACLE_CRC_HASH)
+#elif defined(ARM_ACLE)
extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
#endif
#ifdef X86_SSE2
extern void slide_hash_sse2(deflate_state *s);
-#elif defined(ARM_NEON_SLIDEHASH)
+#elif defined(ARM_NEON)
extern void slide_hash_neon(deflate_state *s);
#endif
-#if defined(PPC_VMX_SLIDEHASH)
+#if defined(PPC_VMX)
extern void slide_hash_vmx(deflate_state *s);
#endif
-#if defined(POWER8_VSX_SLIDEHASH)
+#if defined(POWER8_VSX)
extern void slide_hash_power8(deflate_state *s);
#endif
#ifdef X86_AVX2
/* update_hash */
extern uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val);
-#ifdef X86_SSE42_CRC_HASH
+#ifdef X86_SSE42
extern uint32_t update_hash_sse4(deflate_state *const s, uint32_t h, uint32_t val);
-#elif defined(ARM_ACLE_CRC_HASH)
+#elif defined(ARM_ACLE)
extern uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val);
#endif
#endif
// Select arch-optimized functions
// X86 - SSE2
-#if defined(X86_SSE2) || defined(X86_SSE2_CHUNKSET)
+#ifdef X86_SSE2
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
if (x86_cpu_has_sse2)
# endif
{
-# ifdef X86_SSE2
ft.slide_hash = &slide_hash_sse2;
-# ifdef HAVE_BUILTIN_CTZ
- ft.longest_match = &longest_match_sse2;
- ft.longest_match_slow = &longest_match_slow_sse2;
- ft.compare256 = &compare256_sse2;
-# endif
-# endif
-# ifdef X86_SSE2_CHUNKSET
ft.chunksize = &chunksize_sse2;
ft.chunkmemset_safe = &chunkmemset_safe_sse2;
ft.inflate_fast = &inflate_fast_sse2;
+# ifdef HAVE_BUILTIN_CTZ
+ ft.longest_match = &longest_match_sse2;
+ ft.longest_match_slow = &longest_match_slow_sse2;
+ ft.compare256 = &compare256_sse2;
# endif
}
#endif
// X86 - SSSE3
-#ifdef X86_SSSE3_ADLER32
+#ifdef X86_SSSE3
if (x86_cpu_has_ssse3)
ft.adler32 = &adler32_ssse3;
#endif
ft.inflate_fast = &inflate_fast_sse41;
}
#endif
-#ifdef X86_SSE42_ADLER32
- if (x86_cpu_has_sse42)
- ft.adler32_fold_copy = &adler32_fold_copy_sse42;
-#endif
-#ifdef X86_SSE42_CRC_HASH
+#ifdef X86_SSE42
if (x86_cpu_has_sse42) {
+ ft.adler32_fold_copy = &adler32_fold_copy_sse42;
ft.update_hash = &update_hash_sse4;
ft.insert_string = &insert_string_sse4;
ft.quick_insert_string = &quick_insert_string_sse4;
}
#endif
// X86 - AVX
-#ifdef X86_AVX_CHUNKSET
+#ifdef X86_AVX2
if (x86_cpu_has_avx2) {
ft.chunksize = &chunksize_avx;
ft.chunkmemset_safe = &chunkmemset_safe_avx;
ft.inflate_fast = &inflate_fast_avx;
- }
-#endif
-#ifdef X86_AVX2
- if (x86_cpu_has_avx2) {
ft.slide_hash = &slide_hash_avx2;
+ ft.adler32 = &adler32_avx2;
+ ft.adler32_fold_copy = &adler32_fold_copy_avx2;
# ifdef HAVE_BUILTIN_CTZ
ft.longest_match = &longest_match_avx2;
ft.longest_match_slow = &longest_match_slow_avx2;
# endif
}
#endif
-#ifdef X86_AVX2_ADLER32
- if (x86_cpu_has_avx2) {
- ft.adler32 = &adler32_avx2;
- ft.adler32_fold_copy = &adler32_fold_copy_avx2;
- }
-#endif
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
if (x86_cpu_has_avx512) {
ft.adler32 = &adler32_avx512;
ft.adler32_fold_copy = &adler32_fold_copy_avx512;
}
#endif
-#ifdef X86_AVX512VNNI_ADLER32
+#ifdef X86_AVX512VNNI
if (x86_cpu_has_avx512vnni) {
ft.adler32 = &adler32_avx512_vnni;
ft.adler32_fold_copy = &adler32_fold_copy_avx512_vnni;
// ARM - NEON
-#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
- if (arm_cpu_has_neon) {
- ft.compare256 = &compare256_neon;
- ft.longest_match = &longest_match_neon;
- ft.longest_match_slow = &longest_match_slow_neon;
- }
-#endif
-#ifdef ARM_NEON_ADLER32
+#ifdef ARM_NEON
# ifndef ARM_NOCHECK_NEON
if (arm_cpu_has_neon)
# endif
+ {
ft.adler32 = &adler32_neon;
-#endif
-#ifdef ARM_NEON_SLIDEHASH
-# ifndef ARM_NOCHECK_NEON
- if (arm_cpu_has_neon)
-# endif
ft.slide_hash = &slide_hash_neon;
-#endif
-#ifdef ARM_NEON_CHUNKSET
- if (arm_cpu_has_neon) {
ft.chunksize = &chunksize_neon;
ft.chunkmemset_safe = &chunkmemset_safe_neon;
ft.inflate_fast = &inflate_fast_neon;
+# ifdef HAVE_BUILTIN_CTZLL
+ ft.compare256 = &compare256_neon;
+ ft.longest_match = &longest_match_neon;
+ ft.longest_match_slow = &longest_match_slow_neon;
+# endif
}
#endif
// ARM - ACLE
-#ifdef ARM_ACLE_CRC_HASH
+#ifdef ARM_ACLE
if (arm_cpu_has_crc32) {
ft.crc32 = &crc32_acle;
ft.update_hash = &update_hash_acle;
}
#endif
+
// Power - VMX
-#ifdef PPC_VMX_SLIDEHASH
- if (power_cpu_has_altivec)
- ft.slide_hash = &slide_hash_vmx;
-#endif
-#ifdef PPC_VMX_ADLER32
- if (power_cpu_has_altivec)
+#ifdef PPC_VMX
+ if (power_cpu_has_altivec) {
ft.adler32 = &adler32_vmx;
+ ft.slide_hash = &slide_hash_vmx;
+ }
#endif
// Power8 - VSX
-#ifdef POWER8_VSX_SLIDEHASH
- if (power_cpu_has_arch_2_07)
- ft.slide_hash = &slide_hash_power8;
-#endif
-#ifdef POWER8_VSX_ADLER32
- if (power_cpu_has_arch_2_07)
+#ifdef POWER8_VSX
+ if (power_cpu_has_arch_2_07) {
ft.adler32 = &adler32_power8;
+ ft.chunkmemset_safe = &chunkmemset_safe_power8;
+ ft.chunksize = &chunksize_power8;
+ ft.inflate_fast = &inflate_fast_power8;
+ ft.slide_hash = &slide_hash_power8;
+ }
#endif
#ifdef POWER8_VSX_CRC32
if (power_cpu_has_arch_2_07)
ft.crc32 = &crc32_power8;
-#endif
-#ifdef POWER8_VSX_CHUNKSET
- if (power_cpu_has_arch_2_07) {
- ft.chunksize = &chunksize_power8;
- ft.chunkmemset_safe = &chunkmemset_safe_power8;
- ft.inflate_fast = &inflate_fast_power8;
- }
#endif
// Power9
#ifdef POWER9
}
#endif
+
// S390
#ifdef S390_CRC32_VX
if (PREFIX(s390_cpu_has_vx))
BENCHMARK_ADLER32(c, adler32_c, 1);
-#ifdef ARM_NEON_ADLER32
+#ifdef ARM_NEON
BENCHMARK_ADLER32(neon, adler32_neon, arm_cpu_has_neon);
#endif
-#ifdef PPC_VMX_ADLER32
+#ifdef PPC_VMX
BENCHMARK_ADLER32(vmx, adler32_vmx, power_cpu_has_altivec);
#endif
-#ifdef POWER8_VSX_ADLER32
+#ifdef POWER8_VSX
BENCHMARK_ADLER32(power8, adler32_power8, power_cpu_has_arch_2_07);
#endif
-#ifdef X86_SSSE3_ADLER32
+#ifdef X86_SSSE3
BENCHMARK_ADLER32(ssse3, adler32_ssse3, x86_cpu_has_ssse3);
#endif
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
BENCHMARK_ADLER32(avx2, adler32_avx2, x86_cpu_has_avx2);
#endif
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
BENCHMARK_ADLER32(avx512, adler32_avx512, x86_cpu_has_avx512);
#endif
-#ifdef X86_AVX512VNNI_ADLER32
+#ifdef X86_AVX512VNNI
BENCHMARK_ADLER32(avx512_vnni, adler32_avx512_vnni, x86_cpu_has_avx512vnni);
#endif
BENCHMARK_ADLER32_BASELINE_COPY(c, adler32_c, 1);
-#ifdef ARM_NEON_ADLER32
+#ifdef ARM_NEON
/* If we inline this copy for neon, the function would go here */
//BENCHMARK_ADLER32_COPY(neon, adler32_neon, arm_cpu_has_neon);
BENCHMARK_ADLER32_BASELINE_COPY(neon_copy_baseline, adler32_neon, arm_cpu_has_neon);
#endif
-#ifdef PPC_VMX_ADLER32
+#ifdef PPC_VMX
//BENCHMARK_ADLER32_COPY(vmx_inline_copy, adler32_fold_copy_vmx, power_cpu_has_altivec);
BENCHMARK_ADLER32_BASELINE_COPY(vmx_copy_baseline, adler32_vmx, power_cpu_has_altivec);
#endif
-#ifdef POWER8_VSX_ADLER32
+#ifdef POWER8_VSX
//BENCHMARK_ADLER32_COPY(power8_inline_copy, adler32_fold_copy_power8, power_cpu_has_arch_2_07);
BENCHMARK_ADLER32_BASELINE_COPY(power8, adler32_power8, power_cpu_has_arch_2_07);
#endif
-#ifdef X86_SSE42_ADLER32
+#ifdef X86_SSE42
BENCHMARK_ADLER32_BASELINE_COPY(sse42_baseline, adler32_ssse3, x86_cpu_has_ssse3);
BENCHMARK_ADLER32_COPY(sse42, adler32_fold_copy_sse42, x86_cpu_has_sse42);
#endif
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
BENCHMARK_ADLER32_BASELINE_COPY(avx2_baseline, adler32_avx2, x86_cpu_has_avx2);
BENCHMARK_ADLER32_COPY(avx2, adler32_fold_copy_avx2, x86_cpu_has_avx2);
#endif
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
BENCHMARK_ADLER32_BASELINE_COPY(avx512_baseline, adler32_avx512, x86_cpu_has_avx512);
BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, x86_cpu_has_avx512);
#endif
-#ifdef X86_AVX512VNNI_ADLER32
+#ifdef X86_AVX512VNNI
BENCHMARK_ADLER32_BASELINE_COPY(avx512_vnni_baseline, adler32_avx512_vnni, x86_cpu_has_avx512vnni);
BENCHMARK_ADLER32_COPY(avx512_vnni, adler32_fold_copy_avx512_vnni, x86_cpu_has_avx512vnni);
#endif
BENCHMARK_CRC32(braid, PREFIX(crc32_braid), 1);
-#ifdef ARM_ACLE_CRC_HASH
+#ifdef ARM_ACLE
BENCHMARK_CRC32(acle, crc32_acle, arm_cpu_has_crc32);
-#elif defined(POWER8_VSX_CRC32)
+#elif defined(POWER8_VSX)
BENCHMARK_CRC32(power8, crc32_power8, power_cpu_has_arch_2_07);
#elif defined(S390_CRC32_VX)
BENCHMARK_CRC32(vx, PREFIX(s390_crc32_vx), PREFIX(s390_cpu_has_vx));
BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);
-#ifdef ARM_NEON_SLIDEHASH
+#ifdef ARM_NEON
BENCHMARK_SLIDEHASH(neon, slide_hash_neon, arm_cpu_has_neon);
#endif
-#ifdef POWER8_VSX_SLIDEHASH
+#ifdef POWER8_VSX
BENCHMARK_SLIDEHASH(power8, slide_hash_power8, power_cpu_has_arch_2_07);
#endif
-#ifdef PPC_VMX_SLIDEHASH
+#ifdef PPC_VMX
BENCHMARK_SLIDEHASH(vmx, slide_hash_vmx, power_cpu_has_altivec);
#endif
TEST_ADLER32(c, adler32_c, 1)
-#ifdef ARM_NEON_ADLER32
+#ifdef ARM_NEON
TEST_ADLER32(neon, adler32_neon, arm_cpu_has_neon)
-#elif defined(POWER8_VSX_ADLER32)
+#elif defined(POWER8_VSX)
TEST_ADLER32(power8, adler32_power8, power_cpu_has_arch_2_07)
-#elif defined(PPC_VMX_ADLER32)
+#elif defined(PPC_VMX)
TEST_ADLER32(vmx, adler32_vmx, power_cpu_has_altivec)
#endif
-#ifdef X86_SSSE3_ADLER32
+#ifdef X86_SSSE3
TEST_ADLER32(ssse3, adler32_ssse3, x86_cpu_has_ssse3)
#endif
-#ifdef X86_SSE41_ADLER32
-TEST_ADLER32(sse41, adler32_sse41, x86_cpu_has_sse41)
-#endif
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
TEST_ADLER32(avx2, adler32_avx2, x86_cpu_has_avx2)
#endif
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
TEST_ADLER32(avx512, adler32_avx512, x86_cpu_has_avx512)
#endif
-#ifdef X86_AVX512VNNI_ADLER32
+#ifdef X86_AVX512VNNI
TEST_ADLER32(avx512_vnni, adler32_avx512_vnni, x86_cpu_has_avx512vnni)
#endif
TEST_CRC32(braid, PREFIX(crc32_braid), 1)
-#ifdef ARM_ACLE_CRC_HASH
+#ifdef ARM_ACLE
TEST_CRC32(acle, crc32_acle, arm_cpu_has_crc32)
#elif defined(POWER8_VSX_CRC32)
TEST_CRC32(power8, crc32_power8, power_cpu_has_arch_2_07)
!endif
WFLAGS = $(WFLAGS) \
- -DARM_ACLE_CRC_HASH \
+ -DARM_ACLE \
-D__ARM_NEON__=1 \
-DARM_NEON \
- -DARM_NEON_ADLER32 \
- -DARM_NEON_CHUNKSET \
- -DARM_NEON_SLIDEHASH \
-DARM_NOCHECK_NEON \
#
OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj adler32_neon.obj chunkset_neon.obj compare256_neon.obj slide_hash_neon.obj
!endif
!if "$(WITH_ACLE)" != ""
-WFLAGS = $(WFLAGS) -DARM_ACLE_CRC_HASH
+WFLAGS = $(WFLAGS) -DARM_ACLE
OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj
!endif
!if "$(WITH_VFPV3)" != ""
WFLAGS = $(WFLAGS) \
-D__ARM_NEON__=1 \
-DARM_NEON \
- -DARM_NEON_ADLER32 \
- -DARM_NEON_CHUNKSET \
- -DARM_NEON_SLIDEHASH \
-DARM_NOCHECK_NEON \
#
OBJS = $(OBJS) adler32_neon.obj chunkset_neon.obj compare256_neon.obj slide_hash_neon.obj
-DX86_FEATURES \
-DX86_PCLMULQDQ_CRC \
-DX86_SSE2 \
- -DX86_SSE42_ADLER32 \
+ -DX86_SSE42 \
-DX86_SSE42_CRC_INTRIN \
- -DX86_SSE42_CRC_HASH \
- -DX86_SSSE3_ADLER32 \
- -DX86_AVX2 \
- -DX86_AVX2_ADLER32 \
- -DX86_AVX_CHUNKSET \
- -DX86_SSE2_CHUNKSET
+ -DX86_SSSE3 \
+ -DX86_AVX2
LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest
ARFLAGS = -nologo