]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Reduce the amount of different defines required for arch-specific optimizations.
authorHans Kristian Rosbach <hk-git@circlestorm.org>
Mon, 6 Feb 2023 13:41:32 +0000 (14:41 +0100)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Fri, 17 Feb 2023 14:11:25 +0000 (15:11 +0100)
Also removed a reference to a nonexistant adler32_sse41 in test/test_adler32.cc.

35 files changed:
CMakeLists.txt
arch/arm/adler32_neon.c
arch/arm/chunkset_neon.c
arch/arm/crc32_acle.c
arch/arm/insert_string_acle.c
arch/arm/neon_intrins.h
arch/arm/slide_hash_neon.c
arch/power/adler32_power8.c
arch/power/adler32_vmx.c
arch/power/chunkset_power8.c
arch/power/slide_hash_power8.c
arch/power/slide_hash_vmx.c
arch/x86/adler32_avx2.c
arch/x86/adler32_avx2_p.h
arch/x86/adler32_avx2_tpl.h
arch/x86/adler32_avx512.c
arch/x86/adler32_avx512_tpl.h
arch/x86/adler32_avx512_vnni.c
arch/x86/adler32_sse42.c
arch/x86/adler32_ssse3.c
arch/x86/adler32_ssse3_p.h
arch/x86/chunkset_avx.c
arch/x86/insert_string_sse42.c
configure
cpu_features.h
functable.c
test/benchmarks/benchmark_adler32.cc
test/benchmarks/benchmark_adler32_copy.cc
test/benchmarks/benchmark_crc32.cc
test/benchmarks/benchmark_slidehash.cc
test/test_adler32.cc
test/test_crc32.cc
win32/Makefile.a64
win32/Makefile.arm
win32/Makefile.msc

index 8aad597f98b430064db3d935aaa1ba7f7b1ff913..df5cc03e9db2df1a94fbb85706ed7bf5ef34c937 100644 (file)
@@ -616,7 +616,7 @@ if(WITH_OPTIM)
         if(WITH_ACLE AND NOT "${ARCH}" MATCHES "armv[2-7]")
             check_acle_compiler_flag()
             if(HAVE_ACLE_FLAG)
-                add_definitions(-DARM_ACLE_CRC_HASH)
+                add_definitions(-DARM_ACLE)
                 set(ACLE_SRCS ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c)
                 set_property(SOURCE ${ACLE_SRCS} PROPERTY COMPILE_FLAGS "${ACLEFLAG} ${NOLTOFLAG}")
                 list(APPEND ZLIB_ARCH_SRCS ${ACLE_SRCS})
@@ -630,7 +630,7 @@ if(WITH_OPTIM)
         if(WITH_NEON)
             check_neon_compiler_flag()
             if(MFPU_NEON_AVAILABLE)
-                add_definitions(-DARM_NEON -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH)
+                add_definitions(-DARM_NEON)
                 set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c
                     ${ARCHDIR}/compare256_neon.c ${ARCHDIR}/slide_hash_neon.c)
                 list(APPEND ZLIB_ARCH_SRCS ${NEON_SRCS})
@@ -668,8 +668,7 @@ if(WITH_OPTIM)
             if(HAVE_VMX)
                 add_definitions(-DPPC_FEATURES)
                 if(HAVE_ALTIVEC)
-                    add_definitions(-DPPC_VMX_ADLER32)
-                    add_definitions(-DPPC_VMX_SLIDEHASH)
+                    add_definitions(-DPPC_VMX)
                     set(PPC_SRCS ${ARCHDIR}/adler32_vmx.c ${ARCHDIR}/slide_hash_vmx.c)
                     list(APPEND ZLIB_ARCH_SRCS ${PPC_SRCS})
                     add_feature_info(ALTIVEC 1 "Support the AltiVec instruction set, using \"-maltivec\"")
@@ -682,11 +681,8 @@ if(WITH_OPTIM)
         # Power8 specific options and files
         if(WITH_POWER8)
             if(HAVE_POWER8_INTRIN)
-                add_definitions(-DPOWER8)
+                add_definitions(-DPOWER8_VSX)
                 add_definitions(-DPOWER_FEATURES)
-                add_definitions(-DPOWER8_VSX_ADLER32)
-                add_definitions(-DPOWER8_VSX_CHUNKSET)
-                add_definitions(-DPOWER8_VSX_SLIDEHASH)
                 set(POWER8_SRCS ${ARCHDIR}/adler32_power8.c ${ARCHDIR}/chunkset_power8.c ${ARCHDIR}/slide_hash_power8.c)
                 if("${ARCH}" MATCHES "powerpc64(le)?")
                     add_definitions(-DPOWER8_VSX_CRC32)
@@ -748,7 +744,7 @@ if(WITH_OPTIM)
         if(WITH_AVX2)
             check_avx2_intrinsics()
             if(HAVE_AVX2_INTRIN)
-                add_definitions(-DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET)
+                add_definitions(-DX86_AVX2)
                 set(AVX2_SRCS ${ARCHDIR}/slide_hash_avx2.c)
                 add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"")
                 list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx.c)
@@ -766,7 +762,7 @@ if(WITH_OPTIM)
         if(WITH_AVX512)
             check_avx512_intrinsics()
             if(HAVE_AVX512_INTRIN)
-                add_definitions(-DX86_AVX512 -DX86_AVX512_ADLER32)
+                add_definitions(-DX86_AVX512)
                 list(APPEND AVX512_SRCS ${ARCHDIR}/adler32_avx512.c)
                 add_feature_info(AVX512_ADLER32 1 "Support AVX512-accelerated adler32, using \"${AVX512FLAG}\"")
                 list(APPEND ZLIB_ARCH_SRCS ${AVX512_SRCS})
@@ -782,7 +778,7 @@ if(WITH_OPTIM)
         if(WITH_AVX512VNNI)
             check_avx512vnni_intrinsics()
             if(HAVE_AVX512VNNI_INTRIN)
-                add_definitions(-DX86_AVX512VNNI -DX86_AVX512VNNI_ADLER32)
+                add_definitions(-DX86_AVX512VNNI)
                 add_feature_info(AVX512VNNI_ADLER32 1 "Support AVX512VNNI adler32, using \"${AVX512VNNIFLAG}\"")
                 list(APPEND AVX512VNNI_SRCS ${ARCHDIR}/adler32_avx512_vnni.c)
                 list(APPEND ZLIB_ARCH_SRCS ${AVX512VNNI_SRCS})
@@ -805,7 +801,7 @@ if(WITH_OPTIM)
         if(WITH_SSE42)
             check_sse42_intrinsics()
             if(HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN)
-                add_definitions(-DX86_SSE42_CRC_HASH -DX86_SSE42_ADLER32)
+                add_definitions(-DX86_SSE42)
                 set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c)
                 add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE42FLAG}\"")
                 list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
@@ -821,7 +817,7 @@ if(WITH_OPTIM)
         if(WITH_SSE2)
             check_sse2_intrinsics()
             if(HAVE_SSE2_INTRIN)
-                add_definitions(-DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE2_SLIDEHASH)
+                add_definitions(-DX86_SSE2)
                 set(SSE2_SRCS ${ARCHDIR}/chunkset_sse2.c ${ARCHDIR}/compare256_sse2.c ${ARCHDIR}/slide_hash_sse2.c)
                 list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
                 if(NOT ${ARCH} MATCHES "x86_64")
@@ -838,7 +834,7 @@ if(WITH_OPTIM)
         if(WITH_SSSE3)
             check_ssse3_intrinsics()
             if(HAVE_SSSE3_INTRIN)
-                add_definitions(-DX86_SSSE3 -DX86_SSSE3_ADLER32)
+                add_definitions(-DX86_SSSE3)
                 set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c)
                 add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"")
                 list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS})
index 7f898d18a71dbb0a20f622894d7fba1d22019b72..f1c43ff0474973cadbdb3bc9c6e5126b67a7013d 100644 (file)
@@ -5,7 +5,7 @@
  *   Adam Stylinski <kungfujesus06@gmail.com>
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
-#ifdef ARM_NEON_ADLER32
+#ifdef ARM_NEON
 #include "neon_intrins.h"
 #include "../../zbuild.h"
 #include "../../adler32_p.h"
index b119f212455d261b0935cb2fa2bd4d1b0bd1dfe8..668c0019e96da5d7da072736548591f76e0b8614 100644 (file)
@@ -2,7 +2,7 @@
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
-#ifdef ARM_NEON_CHUNKSET
+#ifdef ARM_NEON
 #include "neon_intrins.h"
 #include "../../zbuild.h"
 #include "../generic/chunk_permute_table.h"
index 445c370aa814e1bdc1b0a259218d412b09a0e825..a4e54d7182719740ba56899e849885af3c05302c 100644 (file)
@@ -5,7 +5,7 @@
  *
 */
 
-#ifdef ARM_ACLE_CRC_HASH
+#ifdef ARM_ACLE
 #ifdef _MSC_VER
 #  include <intrin.h>
 #else
index de9902384408bc53c094ed0fa6133ab89131d0f0..9ac3ccb42f234f3200133713ca4e6de339ffbd34 100644 (file)
@@ -5,7 +5,7 @@
  *
  */
 
-#ifdef ARM_ACLE_CRC_HASH
+#ifdef ARM_ACLE
 #ifndef _MSC_VER
 #  include <arm_acle.h>
 #endif
index 06e310c9f9e613a315e12a66699e614b0b499729..d6b57f6414c402af02e8ebfd43feddbd84837534 100644 (file)
@@ -7,7 +7,7 @@
 #  include <arm_neon.h>
 #endif
 
-#if defined(ARM_NEON_ADLER32) && !defined(__aarch64__) && !defined(_M_ARM64)
+#if defined(ARM_NEON) && !defined(__aarch64__) && !defined(_M_ARM64)
 /* Compatibility shim for the _high family of functions */
 #define vmull_high_u8(a, b) vmull_u8(vget_high_u8(a), vget_high_u8(b))
 #define vmlal_high_u8(a, b, c) vmlal_u8(a, vget_high_u8(b), vget_high_u8(c))
@@ -15,7 +15,7 @@
 #define vaddw_high_u8(a, b) vaddw_u8(a, vget_high_u8(b))
 #endif
 
-#ifdef ARM_NEON_SLIDEHASH
+#ifdef ARM_NEON
 
 #define vqsubq_u16_x4_x1(out, a, b) do { \
     out.val[0] = vqsubq_u16(a.val[0], b); \
@@ -24,9 +24,8 @@
     out.val[3] = vqsubq_u16(a.val[3], b); \
 } while (0)
 
-#endif
 
-#if !defined(ARM_NEON_HASLD4) && (defined(ARM_NEON_ADLER32) || defined(ARM_NEON_SLIDEHASH))
+#  ifndef ARM_NEON_HASLD4
 
 static inline uint16x8x4_t vld1q_u16_x4(uint16_t const *a) {
     uint16x8x4_t ret = (uint16x8x4_t) {{
@@ -52,6 +51,7 @@ static inline void vst1q_u16_x4(uint16_t *p, uint16x8x4_t a) {
     vst1q_u16(p + 16, a.val[2]);
     vst1q_u16(p + 24, a.val[3]);
 }
-#endif // HASLD4 check
+#  endif // HASLD4 check
+#endif
 
 #endif // include guard ARM_NEON_INTRINS_H
index 5bb4dc505c6cd6c4c1f4bbe4288125053ff40203..a96ca11799b5a82cfdfffaf396aa5eb0ceaec277 100644 (file)
@@ -8,7 +8,7 @@
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
-#if defined(ARM_NEON_SLIDEHASH)
+#ifdef ARM_NEON
 #include "neon_intrins.h"
 #include "../../zbuild.h"
 #include "../../deflate.h"
index 737c6f2fbcae6324a1dc003a795673b382a2f7e9..4aaea9f50b3a1d59c107311f79da06be1a033398 100644 (file)
@@ -36,7 +36,7 @@
  * https://www.ietf.org/rfc/rfc1950.txt
  */
 
-#ifdef POWER8_VSX_ADLER32
+#ifdef POWER8_VSX
 
 #include <altivec.h>
 #include "zbuild.h"
@@ -150,4 +150,4 @@ Z_INTERNAL uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t le
     return adler32_len_16(s1, buf, len, s2);
 }
 
-#endif /* POWER8_VSX_ADLER32 */
+#endif /* POWER8_VSX */
index 47193286a79a6da71ff5c1cf41978f6fb915b1d9..ef1649b5890927730ab8f00e08b783bc8b47a6f7 100644 (file)
@@ -5,7 +5,7 @@
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
-#ifdef PPC_VMX_ADLER32
+#ifdef PPC_VMX
 #include <altivec.h>
 #include "zbuild.h"
 #include "adler32_p.h"
index 389be0817168a90b73369a7d1e7152626fe7a259..443aae92f116f42d625a49258c3c1ee5c4c06776 100644 (file)
@@ -2,7 +2,7 @@
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
-#ifdef POWER8_VSX_CHUNKSET
+#ifdef POWER8_VSX
 #include <altivec.h>
 #include "../../zbuild.h"
 
index 5b078ec9f937bac94bee5be7fce0645f38b219a5..d01e0acd5661574db3537822d3c15d1b6e5ca3a9 100644 (file)
@@ -4,9 +4,9 @@
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
-#ifdef POWER8_VSX_SLIDEHASH
+#ifdef POWER8_VSX
 
 #define SLIDE_PPC slide_hash_power8
 #include "slide_ppc_tpl.h"
 
-#endif /* POWER8_VSX_SLIDEHASH */
+#endif /* POWER8_VSX */
index cf9bd7b797ef702a28f9c36a1afa17fbdb3b59e8..5a87ef7d9aa0b1e54554f7ab6049238681fc0a6b 100644 (file)
@@ -2,9 +2,9 @@
  * Copyright (C) 2017-2021 Mika T. Lindqvist <postmaster@raasu.org>
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
-#ifdef PPC_VMX_SLIDEHASH
+#ifdef PPC_VMX
 
 #define SLIDE_PPC slide_hash_vmx
 #include "slide_ppc_tpl.h"
 
-#endif /* PPC_VMX_SLIDEHASH */
+#endif /* PPC_VMX */
index dcd1166f342b1bd9505eab289e955985be75b9a6..797d299e09b2efdb9fbcf49c03f4ca8728e5ff5d 100644 (file)
@@ -7,7 +7,7 @@
 
 #include <immintrin.h>
 
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
 
 #include "adler32_avx2_tpl.h"
 
index f7079bf3eb2c73383ed6718ef6d69d8287664b1b..f0f8a4a887b13957f4a3f764ae6b1412db19268f 100644 (file)
@@ -6,7 +6,7 @@
 #ifndef ADLER32_AVX2_P_H_
 #define ADLER32_AVX2_P_H_
 
-#if defined(X86_AVX2_ADLER32) || defined(X86_AVX512VNNI_ADLER32)
+#if defined(X86_AVX2) || defined(X86_AVX512VNNI)
 
 /* 32 bit horizontal sum, adapted from Agner Fog's vector library. */
 static inline uint32_t hsum256(__m256i x) {
index 0b2e89be43a813a0abb6dd6875726bee6bed0a53..a94f44b4f948734a81c5733eb7af6a0b3f36d321 100644 (file)
@@ -10,7 +10,7 @@
 #include "../../fallback_builtins.h"
 #include "adler32_avx2_p.h"
 
-#ifdef X86_SSE42_ADLER32
+#ifdef X86_SSE42
 extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
 extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, size_t len);
 
index c0bf0721f2c8662c69d966d70dab931e9660851a..e6ebb05dc87c5de4ff3f49ea1f12344163bb8ba9 100644 (file)
@@ -6,7 +6,7 @@
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
 
 #include "adler32_avx512_tpl.h"
 
index 6ed39b45dfa5c4b5e7358a9ca2d1f172a344a401..7546afef5e34c9dbd0f3d721c1f36e44414a44e4 100644 (file)
@@ -11,7 +11,7 @@
 #include <immintrin.h>
 #include "adler32_avx512_p.h"
 
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
 
 #ifdef COPY
 Z_INTERNAL uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
@@ -35,9 +35,9 @@ rem_peel:
         _mm512_mask_storeu_epi8(dst, storemask, copy_vec);
 #endif
 
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
         return adler32_avx2(adler, src, len);
-#elif defined(X86_SSSE3_ADLER32)
+#elif defined(X86_SSSE3)
         return adler32_ssse3(adler, src, len);
 #else
         return adler32_len_16(adler0, src, len, adler1);
index 42a166062fb4cbcd5aa64d68881e6b3e5aed39d6..8dcc93d05050862c89ed8b56ea53f9ace4c66bb4 100644 (file)
@@ -7,7 +7,7 @@
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
-#ifdef X86_AVX512VNNI_ADLER32
+#ifdef X86_AVX512VNNI
 
 #include "../../zbuild.h"
 #include "../../adler32_p.h"
@@ -28,16 +28,16 @@ Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, size
 
 rem_peel:
     if (len < 32)
-#if defined(X86_SSSE3_ADLER32)
+#if defined(X86_SSSE3)
         return adler32_ssse3(adler, src, len);
 #else
         return adler32_len_16(adler0, src, len, adler1);
 #endif
 
     if (len < 64)
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
         return adler32_avx2(adler, src, len);
-#elif defined(X86_SSE3_ADLER32)
+#elif defined(X86_SSE3)
         return adler32_ssse3(adler, src, len);
 #else
         return adler32_len_16(adler0, src, len, adler1);
@@ -135,7 +135,7 @@ rem_peel_copy:
         __m256i copy_vec = _mm256_maskz_loadu_epi8(storemask, src);
         _mm256_mask_storeu_epi8(dst, storemask, copy_vec);
 
-#if defined(X86_SSSE3_ADLER32)
+#if defined(X86_SSSE3)
         return adler32_ssse3(adler, src, len);
 #else
         return adler32_len_16(adler0, src, len, adler1);
index ec0513409bb044b57e1f57b21f4361f404741b9d..257a360982ed8737536929cb9a59e6a472d1ffec 100644 (file)
@@ -12,7 +12,7 @@
 #include "adler32_ssse3_p.h"
 #include <immintrin.h>
 
-#ifdef X86_SSE42_ADLER32
+#ifdef X86_SSE42
 
 Z_INTERNAL uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
     uint32_t adler0, adler1;
index 1f4abba507807133637fc4b97e7b9f57c167bc04..99ce795823b7b09b0381ac015906d13ecd768712 100644 (file)
@@ -10,7 +10,7 @@
 #include "../../adler32_p.h"
 #include "adler32_ssse3_p.h"
 
-#ifdef X86_SSSE3_ADLER32
+#ifdef X86_SSSE3
 
 #include <immintrin.h>
 
index 0b7ddcf9da426ad6b9fe3a58a0c3491e1c110758..d7ec3fe0d5a306dca303128644cfc7c4d13181ea 100644 (file)
@@ -6,7 +6,7 @@
 #ifndef ADLER32_SSSE3_P_H_
 #define ADLER32_SSSE3_P_H_
 
-#ifdef X86_SSSE3_ADLER32
+#ifdef X86_SSSE3
 
 #include <immintrin.h>
 #include <stdint.h>
index e128e8f7081978c3fee10ea513c9b920e599f378..c2df2322fe248568f8727bcd1032cd03abda10e3 100644 (file)
@@ -3,7 +3,7 @@
  */
 #include "zbuild.h"
 
-#ifdef X86_AVX_CHUNKSET
+#ifdef X86_AVX2
 #include <immintrin.h>
 #include "../generic/chunk_permute_table.h"
 
index 6fe4c81e08b0c74164a199a453a86692deb66739..2668f0eaa8d010a622e3c7ec227b123e6b342bd5 100644 (file)
@@ -45,6 +45,6 @@
 #define INSERT_STRING       insert_string_sse4
 #define QUICK_INSERT_STRING quick_insert_string_sse4
 
-#ifdef X86_SSE42_CRC_HASH
+#ifdef X86_SSE42
 #  include "../../insert_string_tpl.h"
 #endif
index c0f9524b5ff549b7cc26cfd3c9296c2b7733fb6e..fdb5b69d7efb6647a012f2a913e93cf4d5783017 100755 (executable)
--- a/configure
+++ b/configure
@@ -1545,8 +1545,8 @@ case "${ARCH}" in
             check_avx2_intrinsics
 
             if test ${HAVE_AVX2_INTRIN} -eq 1; then
-                CFLAGS="${CFLAGS} -DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET"
-                SFLAGS="${SFLAGS} -DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET"
+                CFLAGS="${CFLAGS} -DX86_AVX2"
+                SFLAGS="${SFLAGS} -DX86_AVX2"
                 ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} slide_hash_avx2.o chunkset_avx.o compare256_avx2.o adler32_avx2.o"
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_hash_avx2.lo chunkset_avx.lo compare256_avx2.lo adler32_avx2.lo"
             fi
@@ -1554,8 +1554,8 @@ case "${ARCH}" in
             check_avx512_intrinsics
 
             if test ${HAVE_AVX512_INTRIN} -eq 1; then
-                CFLAGS="${CFLAGS} -DX86_AVX512 -DX86_AVX512_ADLER32"
-                SFLAGS="${SFLAGS} -DX86_AVX512 -DX86_AVX512_ADLER32"
+                CFLAGS="${CFLAGS} -DX86_AVX512"
+                SFLAGS="${SFLAGS} -DX86_AVX512"
                 ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_avx512.o"
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_avx512.lo"
 
@@ -1570,8 +1570,8 @@ case "${ARCH}" in
             check_avx512vnni_intrinsics
 
             if test ${HAVE_AVX512VNNI_INTRIN} -eq 1; then
-                CFLAGS="${CFLAGS} -DX86_AVX512VNNI -DX86_AVX512VNNI_ADLER32"
-                SFLAGS="${SFLAGS} -DX86_AVX512VNNI -DX86_AVX512VNNI_ADLER32"
+                CFLAGS="${CFLAGS} -DX86_AVX512VNNI"
+                SFLAGS="${SFLAGS} -DX86_AVX512VNNI"
                 ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_avx512_vnni.o"
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_avx512_vnni.lo"
             fi
@@ -1589,8 +1589,8 @@ case "${ARCH}" in
             check_sse42_intrinsics
 
             if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then
-                CFLAGS="${CFLAGS} -DX86_SSE42_CRC_HASH -DX86_SSE42_ADLER32"
-                SFLAGS="${SFLAGS} -DX86_SSE42_CRC_HASH -DX86_SSE42_ADLER32"
+                CFLAGS="${CFLAGS} -DX86_SSE42"
+                SFLAGS="${SFLAGS} -DX86_SSE42"
 
                 if test ${HAVE_SSE42CRC_INTRIN} -eq 1; then
                   CFLAGS="${CFLAGS} -DX86_SSE42_CRC_INTRIN"
@@ -1604,8 +1604,8 @@ case "${ARCH}" in
             check_sse2_intrinsics
 
             if test ${HAVE_SSE2_INTRIN} -eq 1; then
-                CFLAGS="${CFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET"
-                SFLAGS="${SFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET"
+                CFLAGS="${CFLAGS} -DX86_SSE2"
+                SFLAGS="${SFLAGS} -DX86_SSE2"
                 ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} chunkset_sse2.o compare256_sse2.o slide_hash_sse2.o"
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} chunkset_sse2.lo compare256_sse2.lo slide_hash_sse2.lo"
 
@@ -1618,8 +1618,8 @@ case "${ARCH}" in
             check_ssse3_intrinsics
 
             if test ${HAVE_SSSE3_INTRIN} -eq 1; then
-                CFLAGS="${CFLAGS} -DX86_SSSE3 -DX86_SSSE3_ADLER32"
-                SFLAGS="${SFLAGS} -DX86_SSSE3 -DX86_SSSE3_ADLER32"
+                CFLAGS="${CFLAGS} -DX86_SSSE3"
+                SFLAGS="${SFLAGS} -DX86_SSSE3"
                 ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_ssse3.o"
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_ssse3.lo"
             fi
@@ -1785,9 +1785,6 @@ EOF
                             SFLAGS="${SFLAGS} -DARM_NEON_HASLD4"
                         fi
 
-                        CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
-                        SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
-
                         ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o compare256_neon.o slide_hash_neon.o"
                         ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo compare256_neon.lo slide_hash_neon.lo"
                     fi
@@ -1812,9 +1809,6 @@ EOF
                             SFLAGS="${SFLAGS} -DARM_NEON_HASLD4"
                         fi
 
-                        CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
-                        SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
-
                         ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o compare256_neon.o slide_hash_neon.o"
                         ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo compare256_neon.lo slide_hash_neon.lo"
                     fi
@@ -1825,8 +1819,8 @@ EOF
 
                 if test $without_optimizations -eq 0; then
                     if test $ACLE_AVAILABLE -eq 1; then
-                        CFLAGS="${CFLAGS} -DARM_ACLE_CRC_HASH"
-                        SFLAGS="${SFLAGS} -DARM_ACLE_CRC_HASH"
+                        CFLAGS="${CFLAGS} -DARM_ACLE"
+                        SFLAGS="${SFLAGS} -DARM_ACLE"
 
                         ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_acle.o insert_string_acle.o"
                         ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_acle.lo insert_string_acle.lo"
@@ -1845,9 +1839,6 @@ EOF
                             SFLAGS="${SFLAGS} -DARM_NEON_HASLD4"
                         fi
 
-                        CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
-                        SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
-
                         ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o compare256_neon.o slide_hash_neon.o"
                         ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo compare256_neon.lo slide_hash_neon.lo"
                     fi
@@ -1899,8 +1890,8 @@ EOF
                 if test $native -eq 0; then
                     ARCH="${ARCH}+crc"
                 fi
-                CFLAGS="${CFLAGS} -DARM_ACLE_CRC_HASH"
-                SFLAGS="${SFLAGS} -DARM_ACLE_CRC_HASH"
+                CFLAGS="${CFLAGS} -DARM_ACLE"
+                SFLAGS="${SFLAGS} -DARM_ACLE"
                 ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_acle.o insert_string_acle.o"
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_acle.lo insert_string_acle.lo"
             fi
@@ -1909,8 +1900,8 @@ EOF
                 if test $native -eq 0; then
                     ARCH="${ARCH}+simd"
                 fi
-                CFLAGS="${CFLAGS} -DARM_NEON -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
-                SFLAGS="${SFLAGS} -DARM_NEON -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
+                CFLAGS="${CFLAGS} -DARM_NEON"
+                SFLAGS="${SFLAGS} -DARM_NEON"
                 ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o compare256_neon.o slide_hash_neon.o"
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo compare256_neon.lo slide_hash_neon.lo"
             fi
@@ -1949,15 +1940,15 @@ EOF
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} power_features.lo"
             fi
             if test $HAVE_VMX -eq 1 -a $HAVE_ALTIVEC_INTRIN -eq 1; then
-                CFLAGS="${CFLAGS} -DPPC_VMX_ADLER32 -DPPC_VMX_SLIDEHASH"
-                SFLAGS="${SFLAGS} -DPPC_VMX_ADLER32 -DPPC_VMX_SLIDEHASH"
+                CFLAGS="${CFLAGS} -DPPC_VMX"
+                SFLAGS="${SFLAGS} -DPPC_VMX"
 
                 ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_vmx.o slide_hash_vmx.o"
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_vmx.lo slide_hash_vmx.lo"
             fi
             if test $HAVE_POWER8_INTRIN -eq 1; then
-                CFLAGS="${CFLAGS} -DPOWER8 -DPOWER_FEATURES -DPOWER8_VSX_ADLER32 -DPOWER8_VSX_CHUNKSET -DPOWER8_VSX_SLIDEHASH"
-                SFLAGS="${SFLAGS} -DPOWER8 -DPOWER_FEATURES -DPOWER8_VSX_ADLER32 -DPOWER8_VSX_CHUNKSET -DPOWER8_VSX_SLIDEHASH"
+                CFLAGS="${CFLAGS} -DPOWER8_VSX -DPOWER_FEATURES"
+                SFLAGS="${SFLAGS} -DPOWER8_VSX -DPOWER_FEATURES"
 
                 ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_power8.o chunkset_power8.o slide_hash_power8.o"
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_power8.lo chunkset_power8.lo slide_hash_power8.lo"
index f71372dd69e207ba717ba4c5d9cb7f86e175b044..c098ee2d342762677be8816ff78f9f8d3026d59a 100644 (file)
@@ -26,39 +26,39 @@ extern void cpu_check_features(void);
 typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);
 
 extern uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len);
-#ifdef ARM_NEON_ADLER32
+#ifdef ARM_NEON
 extern uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
 #endif
-#ifdef PPC_VMX_ADLER32
+#ifdef PPC_VMX
 extern uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len);
 #endif
-#ifdef X86_SSSE3_ADLER32
+#ifdef X86_SSSE3
 extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
 #endif
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
 extern uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
 #endif
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
 extern uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len);
 #endif
-#ifdef X86_AVX512VNNI_ADLER32
+#ifdef X86_AVX512VNNI
 extern uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len);
 #endif
-#ifdef POWER8_VSX_ADLER32
+#ifdef POWER8_VSX
 extern uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len);
 #endif
 
 /* adler32 folding */
-#ifdef X86_SSE42_ADLER32
+#ifdef X86_SSE42
 extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
 extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
 extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
-#ifdef X86_AVX512VNNI_ADLER32
+#ifdef X86_AVX512VNNI
 extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
 
@@ -74,22 +74,22 @@ extern uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
 /* memory chunking */
 extern uint32_t chunksize_c(void);
 extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left);
-#ifdef X86_SSE2_CHUNKSET
+#ifdef X86_SSE2
 extern uint32_t chunksize_sse2(void);
 extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
 #endif
 #ifdef X86_SSE41
 extern uint8_t* chunkmemset_safe_sse41(uint8_t *out, unsigned dist, unsigned len, unsigned left);
 #endif
-#ifdef X86_AVX_CHUNKSET
+#ifdef X86_AVX2
 extern uint32_t chunksize_avx(void);
 extern uint8_t* chunkmemset_safe_avx(uint8_t *out, unsigned dist, unsigned len, unsigned left);
 #endif
-#ifdef ARM_NEON_CHUNKSET
+#ifdef ARM_NEON
 extern uint32_t chunksize_neon(void);
 extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);
 #endif
-#ifdef POWER8_VSX_CHUNKSET
+#ifdef POWER8_VSX
 extern uint32_t chunksize_power8(void);
 extern uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left);
 #endif
@@ -102,19 +102,19 @@ typedef struct zng_stream_s zng_stream;
 
 /* inflate fast loop */
 extern void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
-#ifdef X86_SSE2_CHUNKSET
+#ifdef X86_SSE2
 extern void inflate_fast_sse2(PREFIX3(stream) *strm, uint32_t start);
 #endif
 #ifdef X86_SSE41
 extern void inflate_fast_sse41(PREFIX3(stream) *strm, uint32_t start);
 #endif
-#ifdef X86_AVX_CHUNKSET
+#ifdef X86_AVX2
 extern void inflate_fast_avx(PREFIX3(stream) *strm, uint32_t start);
 #endif
-#ifdef ARM_NEON_CHUNKSET
+#ifdef ARM_NEON
 extern void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start);
 #endif
-#ifdef POWER8_VSX_CHUNKSET
+#ifdef POWER8_VSX
 extern void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start);
 #endif
 
@@ -122,9 +122,9 @@ extern void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start);
 typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len);
 
 extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
-#ifdef ARM_ACLE_CRC_HASH
+#ifdef ARM_ACLE
 extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
-#elif defined(POWER8_VSX_CRC32)
+#elif defined(POWER8_VSX)
 extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len);
 #elif defined(S390_CRC32_VX)
 extern uint32_t PREFIX(s390_crc32_vx)(uint32_t crc, const uint8_t *buf, size_t len);
@@ -159,9 +159,9 @@ extern uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1);
 #ifdef DEFLATE_H_
 /* insert_string */
 extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count);
-#ifdef X86_SSE42_CRC_HASH
+#ifdef X86_SSE42
 extern void insert_string_sse4(deflate_state *const s, const uint32_t str, uint32_t count);
-#elif defined(ARM_ACLE_CRC_HASH)
+#elif defined(ARM_ACLE)
 extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
 #endif
 
@@ -213,9 +213,9 @@ extern uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match)
 
 /* quick_insert_string */
 extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
-#ifdef X86_SSE42_CRC_HASH
+#ifdef X86_SSE42
 extern Pos quick_insert_string_sse4(deflate_state *const s, const uint32_t str);
-#elif defined(ARM_ACLE_CRC_HASH)
+#elif defined(ARM_ACLE)
 extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
 #endif
 
@@ -224,13 +224,13 @@ typedef void (*slide_hash_func)(deflate_state *s);
 
 #ifdef X86_SSE2
 extern void slide_hash_sse2(deflate_state *s);
-#elif defined(ARM_NEON_SLIDEHASH)
+#elif defined(ARM_NEON)
 extern void slide_hash_neon(deflate_state *s);
 #endif
-#if defined(PPC_VMX_SLIDEHASH)
+#if defined(PPC_VMX)
 extern void slide_hash_vmx(deflate_state *s);
 #endif
-#if defined(POWER8_VSX_SLIDEHASH)
+#if defined(POWER8_VSX)
 extern void slide_hash_power8(deflate_state *s);
 #endif
 #ifdef X86_AVX2
@@ -239,9 +239,9 @@ extern void slide_hash_avx2(deflate_state *s);
 
 /* update_hash */
 extern uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val);
-#ifdef X86_SSE42_CRC_HASH
+#ifdef X86_SSE42
 extern uint32_t update_hash_sse4(deflate_state *const s, uint32_t h, uint32_t val);
-#elif defined(ARM_ACLE_CRC_HASH)
+#elif defined(ARM_ACLE)
 extern uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val);
 #endif
 #endif
index b64b4bd65ab955fcae5d3ce0b0a7813e2f8cfbe9..a02aae77f0149a5b0e474008f98057a34fe89d7d 100644 (file)
@@ -56,28 +56,24 @@ static void init_functable(void) {
     // Select arch-optimized functions
 
     // X86 - SSE2
-#if defined(X86_SSE2) || defined(X86_SSE2_CHUNKSET)
+#ifdef X86_SSE2
 #  if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
     if (x86_cpu_has_sse2)
 #  endif
     {
-#  ifdef X86_SSE2
         ft.slide_hash = &slide_hash_sse2;
-#    ifdef HAVE_BUILTIN_CTZ
-        ft.longest_match = &longest_match_sse2;
-        ft.longest_match_slow = &longest_match_slow_sse2;
-        ft.compare256 = &compare256_sse2;
-#    endif
-#  endif
-#  ifdef X86_SSE2_CHUNKSET
         ft.chunksize = &chunksize_sse2;
         ft.chunkmemset_safe = &chunkmemset_safe_sse2;
         ft.inflate_fast = &inflate_fast_sse2;
+#  ifdef HAVE_BUILTIN_CTZ
+        ft.longest_match = &longest_match_sse2;
+        ft.longest_match_slow = &longest_match_slow_sse2;
+        ft.compare256 = &compare256_sse2;
 #  endif
     }
 #endif
     // X86 - SSSE3
-#ifdef X86_SSSE3_ADLER32
+#ifdef X86_SSSE3
     if (x86_cpu_has_ssse3)
         ft.adler32 = &adler32_ssse3;
 #endif
@@ -88,12 +84,9 @@ static void init_functable(void) {
         ft.inflate_fast = &inflate_fast_sse41;
     }
 #endif
-#ifdef X86_SSE42_ADLER32
-    if (x86_cpu_has_sse42)
-        ft.adler32_fold_copy = &adler32_fold_copy_sse42;
-#endif
-#ifdef X86_SSE42_CRC_HASH
+#ifdef X86_SSE42
     if (x86_cpu_has_sse42) {
+        ft.adler32_fold_copy = &adler32_fold_copy_sse42;
         ft.update_hash = &update_hash_sse4;
         ft.insert_string = &insert_string_sse4;
         ft.quick_insert_string = &quick_insert_string_sse4;
@@ -110,16 +103,14 @@ static void init_functable(void) {
     }
 #endif
     // X86 - AVX
-#ifdef X86_AVX_CHUNKSET
+#ifdef X86_AVX2
     if (x86_cpu_has_avx2) {
         ft.chunksize = &chunksize_avx;
         ft.chunkmemset_safe = &chunkmemset_safe_avx;
         ft.inflate_fast = &inflate_fast_avx;
-    }
-#endif
-#ifdef X86_AVX2
-    if (x86_cpu_has_avx2) {
         ft.slide_hash = &slide_hash_avx2;
+        ft.adler32 = &adler32_avx2;
+        ft.adler32_fold_copy = &adler32_fold_copy_avx2;
 #  ifdef HAVE_BUILTIN_CTZ
         ft.longest_match = &longest_match_avx2;
         ft.longest_match_slow = &longest_match_slow_avx2;
@@ -127,19 +118,13 @@ static void init_functable(void) {
 #  endif
     }
 #endif
-#ifdef X86_AVX2_ADLER32
-    if (x86_cpu_has_avx2) {
-        ft.adler32 = &adler32_avx2;
-        ft.adler32_fold_copy = &adler32_fold_copy_avx2;
-    }
-#endif
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
     if (x86_cpu_has_avx512) {
         ft.adler32 = &adler32_avx512;
         ft.adler32_fold_copy = &adler32_fold_copy_avx512;
     }
 #endif
-#ifdef X86_AVX512VNNI_ADLER32
+#ifdef X86_AVX512VNNI
     if (x86_cpu_has_avx512vnni) {
         ft.adler32 = &adler32_avx512_vnni;
         ft.adler32_fold_copy = &adler32_fold_copy_avx512_vnni;
@@ -148,34 +133,25 @@ static void init_functable(void) {
 
 
     // ARM - NEON
-#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
-    if (arm_cpu_has_neon) {
-        ft.compare256 = &compare256_neon;
-        ft.longest_match = &longest_match_neon;
-        ft.longest_match_slow = &longest_match_slow_neon;
-    }
-#endif
-#ifdef ARM_NEON_ADLER32
+#ifdef ARM_NEON
 #  ifndef ARM_NOCHECK_NEON
     if (arm_cpu_has_neon)
 #  endif
+    {
         ft.adler32 = &adler32_neon;
-#endif
-#ifdef ARM_NEON_SLIDEHASH
-#  ifndef ARM_NOCHECK_NEON
-    if (arm_cpu_has_neon)
-#  endif
         ft.slide_hash = &slide_hash_neon;
-#endif
-#ifdef ARM_NEON_CHUNKSET
-    if (arm_cpu_has_neon) {
         ft.chunksize = &chunksize_neon;
         ft.chunkmemset_safe = &chunkmemset_safe_neon;
         ft.inflate_fast = &inflate_fast_neon;
+#  ifdef HAVE_BUILTIN_CTZLL
+        ft.compare256 = &compare256_neon;
+        ft.longest_match = &longest_match_neon;
+        ft.longest_match_slow = &longest_match_slow_neon;
+#  endif
     }
 #endif
     // ARM - ACLE
-#ifdef ARM_ACLE_CRC_HASH
+#ifdef ARM_ACLE
     if (arm_cpu_has_crc32) {
         ft.crc32 = &crc32_acle;
         ft.update_hash = &update_hash_acle;
@@ -184,34 +160,27 @@ static void init_functable(void) {
     }
 #endif
 
+
     // Power - VMX
-#ifdef PPC_VMX_SLIDEHASH
-    if (power_cpu_has_altivec)
-        ft.slide_hash = &slide_hash_vmx;
-#endif
-#ifdef PPC_VMX_ADLER32
-    if (power_cpu_has_altivec)
+#ifdef PPC_VMX
+    if (power_cpu_has_altivec) {
         ft.adler32 = &adler32_vmx;
+        ft.slide_hash = &slide_hash_vmx;
+    }
 #endif
     // Power8 - VSX
-#ifdef POWER8_VSX_SLIDEHASH
-    if (power_cpu_has_arch_2_07)
-        ft.slide_hash = &slide_hash_power8;
-#endif
-#ifdef POWER8_VSX_ADLER32
-    if (power_cpu_has_arch_2_07)
+#ifdef POWER8_VSX
+    if (power_cpu_has_arch_2_07) {
         ft.adler32 = &adler32_power8;
+        ft.chunkmemset_safe = &chunkmemset_safe_power8;
+        ft.chunksize = &chunksize_power8;
+        ft.inflate_fast = &inflate_fast_power8;
+        ft.slide_hash = &slide_hash_power8;
+    }
 #endif
 #ifdef POWER8_VSX_CRC32
     if (power_cpu_has_arch_2_07)
         ft.crc32 = &crc32_power8;
-#endif
-#ifdef POWER8_VSX_CHUNKSET
-    if (power_cpu_has_arch_2_07) {
-        ft.chunksize = &chunksize_power8;
-        ft.chunkmemset_safe = &chunkmemset_safe_power8;
-        ft.inflate_fast = &inflate_fast_power8;
-    }
 #endif
     // Power9
 #ifdef POWER9
@@ -222,6 +191,7 @@ static void init_functable(void) {
     }
 #endif
 
+
     // S390
 #ifdef S390_CRC32_VX
     if (PREFIX(s390_cpu_has_vx))
index b94912ac9679944fcefd399694bbe48518a7f9e2..19691376fbb1eef7ec6fb91a26f2a740746693e1 100644 (file)
@@ -64,26 +64,26 @@ public:
 
 BENCHMARK_ADLER32(c, adler32_c, 1);
 
-#ifdef ARM_NEON_ADLER32
+#ifdef ARM_NEON
 BENCHMARK_ADLER32(neon, adler32_neon, arm_cpu_has_neon);
 #endif
 
-#ifdef PPC_VMX_ADLER32
+#ifdef PPC_VMX
 BENCHMARK_ADLER32(vmx, adler32_vmx, power_cpu_has_altivec);
 #endif
-#ifdef POWER8_VSX_ADLER32
+#ifdef POWER8_VSX
 BENCHMARK_ADLER32(power8, adler32_power8, power_cpu_has_arch_2_07);
 #endif
 
-#ifdef X86_SSSE3_ADLER32
+#ifdef X86_SSSE3
 BENCHMARK_ADLER32(ssse3, adler32_ssse3, x86_cpu_has_ssse3);
 #endif
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
 BENCHMARK_ADLER32(avx2, adler32_avx2, x86_cpu_has_avx2);
 #endif
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
 BENCHMARK_ADLER32(avx512, adler32_avx512, x86_cpu_has_avx512);
 #endif
-#ifdef X86_AVX512VNNI_ADLER32
+#ifdef X86_AVX512VNNI
 BENCHMARK_ADLER32(avx512_vnni, adler32_avx512_vnni, x86_cpu_has_avx512vnni);
 #endif
index 62998d41ee4538203e6847afe079ac481435d0b4..d508a004aac03ae0e63903e3b96794ce138c9be7 100644 (file)
@@ -85,34 +85,34 @@ public:
 
 BENCHMARK_ADLER32_BASELINE_COPY(c, adler32_c, 1);
 
-#ifdef ARM_NEON_ADLER32
+#ifdef ARM_NEON
 /* If we inline this copy for neon, the function would go here */
 //BENCHMARK_ADLER32_COPY(neon, adler32_neon, arm_cpu_has_neon);
 BENCHMARK_ADLER32_BASELINE_COPY(neon_copy_baseline, adler32_neon, arm_cpu_has_neon);
 #endif
 
-#ifdef PPC_VMX_ADLER32
+#ifdef PPC_VMX
 //BENCHMARK_ADLER32_COPY(vmx_inline_copy, adler32_fold_copy_vmx, power_cpu_has_altivec);
 BENCHMARK_ADLER32_BASELINE_COPY(vmx_copy_baseline, adler32_vmx, power_cpu_has_altivec);
 #endif
-#ifdef POWER8_VSX_ADLER32
+#ifdef POWER8_VSX
 //BENCHMARK_ADLER32_COPY(power8_inline_copy, adler32_fold_copy_power8, power_cpu_has_arch_2_07);
 BENCHMARK_ADLER32_BASELINE_COPY(power8, adler32_power8, power_cpu_has_arch_2_07);
 #endif
 
-#ifdef X86_SSE42_ADLER32
+#ifdef X86_SSE42
 BENCHMARK_ADLER32_BASELINE_COPY(sse42_baseline, adler32_ssse3, x86_cpu_has_ssse3);
 BENCHMARK_ADLER32_COPY(sse42, adler32_fold_copy_sse42, x86_cpu_has_sse42);
 #endif
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
 BENCHMARK_ADLER32_BASELINE_COPY(avx2_baseline, adler32_avx2, x86_cpu_has_avx2);
 BENCHMARK_ADLER32_COPY(avx2, adler32_fold_copy_avx2, x86_cpu_has_avx2);
 #endif
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
 BENCHMARK_ADLER32_BASELINE_COPY(avx512_baseline, adler32_avx512, x86_cpu_has_avx512);
 BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, x86_cpu_has_avx512);
 #endif
-#ifdef X86_AVX512VNNI_ADLER32
+#ifdef X86_AVX512VNNI
 BENCHMARK_ADLER32_BASELINE_COPY(avx512_vnni_baseline, adler32_avx512_vnni, x86_cpu_has_avx512vnni);
 BENCHMARK_ADLER32_COPY(avx512_vnni, adler32_fold_copy_avx512_vnni, x86_cpu_has_avx512vnni);
 #endif
index f17ebf6c115fe4cc8eb6507cc4544fb98cac2fd0..b5ecda517954c8f7c57888f6cd16341f5a0a059e 100644 (file)
@@ -57,9 +57,9 @@ public:
 
 BENCHMARK_CRC32(braid, PREFIX(crc32_braid), 1);
 
-#ifdef ARM_ACLE_CRC_HASH
+#ifdef ARM_ACLE
 BENCHMARK_CRC32(acle, crc32_acle, arm_cpu_has_crc32);
-#elif defined(POWER8_VSX_CRC32)
+#elif defined(POWER8_VSX)
 BENCHMARK_CRC32(power8, crc32_power8, power_cpu_has_arch_2_07);
 #elif defined(S390_CRC32_VX)
 BENCHMARK_CRC32(vx, PREFIX(s390_crc32_vx), PREFIX(s390_cpu_has_vx));
index 4ec87b6d8593d0f9d9a14cc39c74a9bd863700f2..5ffa7039d0ee05f4889cb1e564c2c88431f6b1a0 100644 (file)
@@ -68,13 +68,13 @@ public:
 
 BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);
 
-#ifdef ARM_NEON_SLIDEHASH
+#ifdef ARM_NEON
 BENCHMARK_SLIDEHASH(neon, slide_hash_neon, arm_cpu_has_neon);
 #endif
-#ifdef POWER8_VSX_SLIDEHASH
+#ifdef POWER8_VSX
 BENCHMARK_SLIDEHASH(power8, slide_hash_power8, power_cpu_has_arch_2_07);
 #endif
-#ifdef PPC_VMX_SLIDEHASH
+#ifdef PPC_VMX
 BENCHMARK_SLIDEHASH(vmx, slide_hash_vmx, power_cpu_has_altivec);
 #endif
 
index fa113da5aeade2ceb23f43ea3243c315b72143f9..7f88f255654fca5cb53feb542f1f91993754253a 100644 (file)
@@ -364,26 +364,23 @@ INSTANTIATE_TEST_SUITE_P(adler32, adler32_variant, testing::ValuesIn(tests));
 
 TEST_ADLER32(c, adler32_c, 1)
 
-#ifdef ARM_NEON_ADLER32
+#ifdef ARM_NEON
 TEST_ADLER32(neon, adler32_neon, arm_cpu_has_neon)
-#elif defined(POWER8_VSX_ADLER32)
+#elif defined(POWER8_VSX)
 TEST_ADLER32(power8, adler32_power8, power_cpu_has_arch_2_07)
-#elif defined(PPC_VMX_ADLER32)
+#elif defined(PPC_VMX)
 TEST_ADLER32(vmx, adler32_vmx, power_cpu_has_altivec)
 #endif
 
-#ifdef X86_SSSE3_ADLER32
+#ifdef X86_SSSE3
 TEST_ADLER32(ssse3, adler32_ssse3, x86_cpu_has_ssse3)
 #endif
-#ifdef X86_SSE41_ADLER32
-TEST_ADLER32(sse41, adler32_sse41, x86_cpu_has_sse41)
-#endif
-#ifdef X86_AVX2_ADLER32
+#ifdef X86_AVX2
 TEST_ADLER32(avx2, adler32_avx2, x86_cpu_has_avx2)
 #endif
-#ifdef X86_AVX512_ADLER32
+#ifdef X86_AVX512
 TEST_ADLER32(avx512, adler32_avx512, x86_cpu_has_avx512)
 #endif
-#ifdef X86_AVX512VNNI_ADLER32
+#ifdef X86_AVX512VNNI
 TEST_ADLER32(avx512_vnni, adler32_avx512_vnni, x86_cpu_has_avx512vnni)
 #endif
index 6b6af4bce5e259bfe096332fd306b4be588f4c80..c46d0eb80a8164aaf2f3cd0d850588fc7ac8c143 100644 (file)
@@ -208,7 +208,7 @@ INSTANTIATE_TEST_SUITE_P(crc32, crc32_variant, testing::ValuesIn(tests));
 
 TEST_CRC32(braid, PREFIX(crc32_braid), 1)
 
-#ifdef ARM_ACLE_CRC_HASH
+#ifdef ARM_ACLE
 TEST_CRC32(acle, crc32_acle, arm_cpu_has_crc32)
 #elif defined(POWER8_VSX_CRC32)
 TEST_CRC32(power8, crc32_power8, power_cpu_has_arch_2_07)
index 8537bd5fe982c68928f4d539bf7ae564837806fa..2a0f3cfe4e7b2efbd39774500799f39267476a4f 100644 (file)
@@ -91,12 +91,9 @@ OBJS = $(OBJS) gzlib.obj gzread.obj gzwrite.obj
 !endif
 
 WFLAGS = $(WFLAGS) \
-       -DARM_ACLE_CRC_HASH \
+       -DARM_ACLE \
        -D__ARM_NEON__=1 \
        -DARM_NEON \
-       -DARM_NEON_ADLER32 \
-       -DARM_NEON_CHUNKSET \
-       -DARM_NEON_SLIDEHASH \
        -DARM_NOCHECK_NEON \
        #
 OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj adler32_neon.obj chunkset_neon.obj compare256_neon.obj slide_hash_neon.obj
index 58a7fc8661b1cb513e17530e2db3e4e22f6e1481..7d3f1b58a931cb0bb88dd2bb94bd31cdcd7ce8c1 100644 (file)
@@ -95,7 +95,7 @@ OBJS = $(OBJS) gzlib.obj gzread.obj gzwrite.obj
 !endif
 
 !if "$(WITH_ACLE)" != ""
-WFLAGS = $(WFLAGS) -DARM_ACLE_CRC_HASH
+WFLAGS = $(WFLAGS) -DARM_ACLE
 OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj
 !endif
 !if "$(WITH_VFPV3)" != ""
@@ -106,9 +106,6 @@ CFLAGS = $(CFLAGS) $(NEON_ARCH)
 WFLAGS = $(WFLAGS) \
        -D__ARM_NEON__=1 \
        -DARM_NEON \
-       -DARM_NEON_ADLER32 \
-       -DARM_NEON_CHUNKSET \
-       -DARM_NEON_SLIDEHASH \
        -DARM_NOCHECK_NEON \
        #
 OBJS = $(OBJS) adler32_neon.obj chunkset_neon.obj compare256_neon.obj slide_hash_neon.obj
index 9c00737a8f8b192b999c0a0e44e08de1feda7741..f2f0631a15fa94a15729ec8d819e82328780dc9c 100644 (file)
@@ -30,14 +30,10 @@ WFLAGS  = \
        -DX86_FEATURES \
        -DX86_PCLMULQDQ_CRC \
        -DX86_SSE2 \
-       -DX86_SSE42_ADLER32 \
+       -DX86_SSE42 \
        -DX86_SSE42_CRC_INTRIN \
-       -DX86_SSE42_CRC_HASH \
-       -DX86_SSSE3_ADLER32 \
-       -DX86_AVX2 \
-       -DX86_AVX2_ADLER32 \
-       -DX86_AVX_CHUNKSET \
-       -DX86_SSE2_CHUNKSET
+       -DX86_SSSE3 \
+       -DX86_AVX2
 
 LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest
 ARFLAGS = -nologo