]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Add option for selecting a minimum x86-64 architecture version, no-c-fallback-v2 1972/head
authorHans Kristian Rosbach <hk-git@circlestorm.org>
Sat, 27 Sep 2025 20:23:05 +0000 (22:23 +0200)
committerHans Kristian Rosbach <hk-git@circlestorm.org>
Sat, 27 Sep 2025 21:03:20 +0000 (23:03 +0200)
this drops compilation of optimized functions below that minimum.
It also compiles the optimized functions at or below that minimum
with the same instruction set, potentially allowing the compiler to
make minor further optimizations.

14 files changed:
CMakeLists.txt
arch/x86/adler32_avx512.c
arch/x86/adler32_avx512_vnni.c
arch/x86/x86_functions.h
cmake/arch-x86.cmake [new file with mode: 0644]
functable.c
test/benchmarks/benchmark_adler32.cc
test/benchmarks/benchmark_adler32_copy.cc
test/benchmarks/benchmark_compare256.cc
test/benchmarks/benchmark_crc32.cc
test/benchmarks/benchmark_slidehash.cc
test/test_adler32.cc
test/test_compare256.cc
test/test_crc32.cc

index 5f43a2302b9b2701882892ed529dc630e8dd10b4..4227b867ff9b4009391b1057711a240b4c69f211 100644 (file)
@@ -1,7 +1,4 @@
-cmake_minimum_required(VERSION 3.5.1...3.29.0)
-if(CMAKE_VERSION VERSION_LESS 3.12)
-    cmake_policy(VERSION ${CMAKE_VERSION})
-endif()
+cmake_minimum_required(VERSION 3.12...3.29.0)
 message(STATUS "Using CMake version ${CMAKE_VERSION}")
 
 if(POLICY CMP0169)
@@ -112,10 +109,17 @@ set(ZLIB_SYMBOL_PREFIX "" CACHE STRING "Give this prefix to all publicly exporte
 Useful when embedding into a larger library.
 Default is no prefix (empty prefix).")
 
-# Add multi-choice option
+# Add multi-choice options
 set(WITH_SANITIZER AUTO CACHE STRING "Enable sanitizer support")
 set_property(CACHE WITH_SANITIZER PROPERTY STRINGS "Memory" "Address" "Undefined" "Thread")
 
+if(${ARCH} MATCHES "x86_64")
+    set(WITH_X86_64_ARCHVER "1" CACHE STRING "Minimum arch version required, default x86-64-v1")
+    set_property(CACHE WITH_X86_64_ARCHVER PROPERTY STRINGS "1" "2" "3" "4")
+else()
+    set(WITH_X86_64_ARCHVER "0" CACHE STRING "Disabled on non-x86-64 arch")
+endif()
+
 if(BASEARCH_ARM_FOUND)
     option(WITH_ARMV8 "Build with ARMv8 CRC32 intrinsics" ON)
     option(WITH_NEON "Build with NEON intrinsics" ON)
@@ -200,10 +204,6 @@ if(NOT WITH_CRC32_CHORBA)
     add_definitions(-DWITHOUT_CHORBA)
 endif()
 
-if(${ARCH} MATCHES "x86_64")
-
-endif()
-
 if(NOT WITH_C_FALLBACK)
     add_definitions(-DNO_C_FALLBACK=1)
 endif()
@@ -1037,152 +1037,7 @@ if(WITH_OPTIM)
         endif()
 
     elseif(BASEARCH_X86_FOUND)
-        add_definitions(-DX86_FEATURES)
-        list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_functions.h)
-        if(WITH_RUNTIME_CPU_DETECTION)
-            list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h)
-            list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c)
-        endif()
-        if(MSVC)
-            list(APPEND ZLIB_ARCH_HDRS fallback_builtins.h)
-        endif()
-        check_xsave_intrinsics()
-        if(HAVE_XSAVE_INTRIN)
-            add_feature_info(XSAVE 1 "Support XSAVE intrinsics using \"${XSAVEFLAG}\"")
-            if(WITH_RUNTIME_CPU_DETECTION)
-                set_property(SOURCE ${ARCHDIR}/x86_features.c PROPERTY COMPILE_FLAGS "${XSAVEFLAG}")
-            endif()
-            if(NOT (CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 8.2))
-                add_definitions(-DX86_HAVE_XSAVE_INTRIN)
-            endif()
-        endif()
-        if(WITH_SSE2)
-            check_sse2_intrinsics()
-            # FORCE_SSE2 option will only be shown if HAVE_SSE2_INTRIN is true
-            if("${ARCH}" MATCHES "i[3-6]86")
-                cmake_dependent_option(FORCE_SSE2 "Always assume CPU is SSE2 capable" OFF "HAVE_SSE2_INTRIN" OFF)
-            endif()
-            if(HAVE_SSE2_INTRIN)
-                add_definitions(-DX86_SSE2)
-                set(SSE2_SRCS ${ARCHDIR}/chunkset_sse2.c ${ARCHDIR}/chorba_sse2.c ${ARCHDIR}/compare256_sse2.c ${ARCHDIR}/slide_hash_sse2.c)
-                list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
-                if(NOT ${ARCH} MATCHES "x86_64")
-                    set_property(SOURCE ${SSE2_SRCS} PROPERTY COMPILE_FLAGS "${SSE2FLAG} ${NOLTOFLAG}")
-                    add_feature_info(FORCE_SSE2 FORCE_SSE2 "Assume CPU is SSE2 capable")
-                    if(FORCE_SSE2)
-                        add_definitions(-DX86_NOCHECK_SSE2)
-                    endif()
-                endif()
-            else()
-                set(WITH_SSE2 OFF)
-            endif()
-        endif()
-        if(WITH_SSSE3)
-            check_ssse3_intrinsics()
-            if(HAVE_SSSE3_INTRIN AND WITH_SSE2)
-                add_definitions(-DX86_SSSE3)
-                set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c ${ARCHDIR}/chunkset_ssse3.c)
-                add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"")
-                list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS})
-                set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}")
-            else()
-                set(WITH_SSSE3 OFF)
-            endif()
-        endif()
-        if(WITH_SSE41)
-            check_sse41_intrinsics()
-            if(HAVE_SSE41_INTRIN AND WITH_SSSE3)
-                add_definitions(-DX86_SSE41)
-                set(SSE41_SRCS ${ARCHDIR}/chorba_sse41.c)
-                list(APPEND ZLIB_ARCH_SRCS ${SSE41_SRCS})
-                set_property(SOURCE ${SSE41_SRCS} PROPERTY COMPILE_FLAGS "${SSE41FLAG} ${NOLTOFLAG}")
-            else()
-                set(WITH_SSE41 OFF)
-            endif()
-        endif()
-        if(WITH_SSE42)
-            check_sse42_intrinsics()
-            if(HAVE_SSE42_INTRIN AND WITH_SSE41)
-                add_definitions(-DX86_SSE42)
-                set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c)
-                add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized adler32 hash generation, using \"${SSE42FLAG}\"")
-                list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
-                set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}")
-            else()
-                set(WITH_SSE42 OFF)
-            endif()
-        endif()
-        if(WITH_PCLMULQDQ)
-            check_pclmulqdq_intrinsics()
-            if(HAVE_PCLMULQDQ_INTRIN AND WITH_SSE42)
-                add_definitions(-DX86_PCLMULQDQ_CRC)
-                set(PCLMULQDQ_SRCS ${ARCHDIR}/crc32_pclmulqdq.c)
-                add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSE42FLAG} ${PCLMULFLAG}\"")
-                list(APPEND ZLIB_ARCH_SRCS ${PCLMULQDQ_SRCS})
-                set_property(SOURCE ${PCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${PCLMULFLAG} ${NOLTOFLAG}")
-            else()
-                set(WITH_PCLMULQDQ OFF)
-            endif()
-        endif()
-        if(WITH_AVX2)
-            check_avx2_intrinsics()
-            if(HAVE_AVX2_INTRIN AND WITH_SSE42)
-                add_definitions(-DX86_AVX2)
-                set(AVX2_SRCS ${ARCHDIR}/slide_hash_avx2.c)
-                add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"")
-                list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx2.c)
-                add_feature_info(AVX2_CHUNKSET 1 "Support AVX2 optimized chunkset, using \"${AVX2FLAG}\"")
-                list(APPEND AVX2_SRCS ${ARCHDIR}/compare256_avx2.c)
-                add_feature_info(AVX2_COMPARE256 1 "Support AVX2 optimized compare256, using \"${AVX2FLAG}\"")
-                list(APPEND AVX2_SRCS ${ARCHDIR}/adler32_avx2.c)
-                add_feature_info(AVX2_ADLER32 1 "Support AVX2-accelerated adler32, using \"${AVX2FLAG}\"")
-                list(APPEND ZLIB_ARCH_SRCS ${AVX2_SRCS})
-                set_property(SOURCE ${AVX2_SRCS} PROPERTY COMPILE_FLAGS "${AVX2FLAG} ${NOLTOFLAG}")
-            else()
-                set(WITH_AVX2 OFF)
-            endif()
-        endif()
-        if(WITH_AVX512)
-            check_avx512_intrinsics()
-            if(HAVE_AVX512_INTRIN AND WITH_AVX2)
-                add_definitions(-DX86_AVX512)
-                list(APPEND AVX512_SRCS ${ARCHDIR}/adler32_avx512.c)
-                add_feature_info(AVX512_ADLER32 1 "Support AVX512-accelerated adler32, using \"${AVX512FLAG}\"")
-                list(APPEND AVX512_SRCS ${ARCHDIR}/chunkset_avx512.c)
-                add_feature_info(AVX512_CHUNKSET 1 "Support AVX512 optimized chunkset, using \"${AVX512FLAG}\"")
-                list(APPEND AVX512_SRCS ${ARCHDIR}/compare256_avx512.c)
-                add_feature_info(AVX512_COMPARE256 1 "Support AVX512 optimized compare256, using \"${AVX512FLAG}\"")
-                list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/adler32_avx512_p.h)
-                list(APPEND ZLIB_ARCH_SRCS ${AVX512_SRCS})
-                set_property(SOURCE ${AVX512_SRCS} PROPERTY COMPILE_FLAGS "${AVX512FLAG} ${NOLTOFLAG}")
-            else()
-                set(WITH_AVX512 OFF)
-            endif()
-        endif()
-        if(WITH_AVX512VNNI)
-            check_avx512vnni_intrinsics()
-            if(HAVE_AVX512VNNI_INTRIN AND WITH_AVX2)
-                add_definitions(-DX86_AVX512VNNI)
-                add_feature_info(AVX512VNNI_ADLER32 1 "Support AVX512VNNI adler32, using \"${AVX512VNNIFLAG}\"")
-                list(APPEND AVX512VNNI_SRCS ${ARCHDIR}/adler32_avx512_vnni.c)
-                list(APPEND ZLIB_ARCH_SRCS ${AVX512VNNI_SRCS})
-                set_property(SOURCE ${AVX512VNNI_SRCS} PROPERTY COMPILE_FLAGS "${AVX512VNNIFLAG} ${NOLTOFLAG}")
-            else()
-                set(WITH_AVX512VNNI OFF)
-            endif()
-        endif()
-        if(WITH_VPCLMULQDQ)
-            check_vpclmulqdq_intrinsics()
-            if(HAVE_VPCLMULQDQ_INTRIN AND WITH_PCLMULQDQ AND WITH_AVX512)
-                add_definitions(-DX86_VPCLMULQDQ_CRC)
-                set(VPCLMULQDQ_SRCS ${ARCHDIR}/crc32_vpclmulqdq.c)
-                add_feature_info(VPCLMUL_CRC 1 "Support CRC hash generation using VPCLMULQDQ, using \"${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG}\"")
-                list(APPEND ZLIB_ARCH_SRCS ${VPCLMULQDQ_SRCS})
-                set_property(SOURCE ${VPCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG} ${NOLTOFLAG}")
-            else()
-                set(WITH_VPCLMULQDQ OFF)
-            endif()
-        endif()
+        include(cmake/arch-x86.cmake)
     endif()
 endif()
 
index 626c4807f821bbfae34849a98cf5b57bee247fd1..21d2d1b801dc2cfa8e74778bb5938370d1716bb7 100644 (file)
@@ -15,6 +15,8 @@
 #include "x86_intrins.h"
 #include "adler32_avx512_p.h"
 
+uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
+
 static inline uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) {
     if (src == NULL) return 1L;
     if (len == 0) return adler;
index 4c5cfc1cadb46159655a8bf592d2badc0ed219bf..700fbf8396f42914a632308534f6a5a4afb76ede 100644 (file)
@@ -17,6 +17,8 @@
 #include "adler32_avx512_p.h"
 #include "adler32_avx2_p.h"
 
+uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
+
 Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, size_t len) {
     if (src == NULL) return 1L;
     if (len == 0) return adler;
index 9cf4b4313334af6734c182a9791abfdfa9275983..1bc32eac4b42c6eb39c66a63cf1f6e4fcc6ccc6d 100644 (file)
 #endif
 
 #ifdef X86_SSE2
-uint32_t chunksize_sse2(void);
-uint8_t* chunkmemset_safe_sse2(uint8_t *out, uint8_t *from, unsigned len, unsigned left);
+#  if !defined(ZARCHVER) || ZARCHVER == 1
+    uint32_t chunksize_sse2(void);
+    uint8_t* chunkmemset_safe_sse2(uint8_t *out, uint8_t *from, unsigned len, unsigned left);
+    void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start);
+#      if !defined(WITHOUT_CHORBA)
+        uint32_t crc32_chorba_sse2(uint32_t crc32, const uint8_t *buf, size_t len);
+#      endif
+#  endif
 
-#  ifdef HAVE_BUILTIN_CTZ
-    uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1);
-    uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match);
-    uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match);
+#  if !defined(ZARCHVER) || ZARCHVER <= 2
     void slide_hash_sse2(deflate_state *s);
+#      ifdef HAVE_BUILTIN_CTZ
+        uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1);
+        uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match);
+        uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match);
+#      endif
 #  endif
-    void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start);
-#   if !defined(WITHOUT_CHORBA)
-    uint32_t crc32_chorba_sse2(uint32_t crc32, const uint8_t *buf, size_t len);
-#   endif
 #endif
 
 #ifdef X86_SSSE3
-uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
-uint8_t* chunkmemset_safe_ssse3(uint8_t *out, uint8_t *from, unsigned len, unsigned left);
-void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
+#  if !defined(ZARCHVER) || ZARCHVER == 1 || X86_AVX512VNNI
+    uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
+#  endif
+
+#  if !defined(ZARCHVER) || ZARCHVER <= 2
+    uint8_t* chunkmemset_safe_ssse3(uint8_t *out, uint8_t *from, unsigned len, unsigned left);
+    void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
+#  endif
 #endif
 
 #ifdef X86_SSE41
-#   if !defined(WITHOUT_CHORBA)
+#  if !defined(WITHOUT_CHORBA)
     uint32_t crc32_chorba_sse41(uint32_t crc32, const uint8_t *buf, size_t len);
-#   endif
+#  endif
 #endif
 
 #ifdef X86_SSE42
-uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+#  if !defined(ZARCHVER) || ZARCHVER <= 2
+    uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+#  endif
 #endif
 
 #ifdef X86_AVX2
-uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
-uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
-uint32_t chunksize_avx2(void);
-uint8_t* chunkmemset_safe_avx2(uint8_t *out, uint8_t *from, unsigned len, unsigned left);
+#  if !defined(ZARCHVER) || ZARCHVER <= 3
+    uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
+    uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+    uint32_t chunksize_avx2(void);
+    uint8_t* chunkmemset_safe_avx2(uint8_t *out, uint8_t *from, unsigned len, unsigned left);
+    void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start);
+#  endif
 
 #  ifdef HAVE_BUILTIN_CTZ
-    uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1);
-    uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match);
-    uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match);
+#      if !defined(ZARCHVER) || ZARCHVER <= 3
+        uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1);
+        uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match);
+        uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match);
+#      endif
     void slide_hash_avx2(deflate_state *s);
 #  endif
-    void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start);
 #endif
+
 #ifdef X86_AVX512
 uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len);
 uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
diff --git a/cmake/arch-x86.cmake b/cmake/arch-x86.cmake
new file mode 100644 (file)
index 0000000..c38757c
--- /dev/null
@@ -0,0 +1,241 @@
+add_definitions(-DX86_FEATURES)
+list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_functions.h)
+if(WITH_RUNTIME_CPU_DETECTION)
+    list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h)
+    list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c)
+endif()
+
+if(MSVC)
+    list(APPEND ZLIB_ARCH_HDRS fallback_builtins.h)
+endif()
+
+check_xsave_intrinsics()
+check_sse2_intrinsics()
+check_ssse3_intrinsics()
+check_sse41_intrinsics()
+check_sse42_intrinsics()
+check_avx2_intrinsics()
+check_avx512_intrinsics()
+check_avx512vnni_intrinsics()
+check_pclmulqdq_intrinsics()
+check_vpclmulqdq_intrinsics()
+
+if(${ARCH} MATCHES "x86_64" AND ${WITH_X86_64_ARCHVER} GREATER_EQUAL "2" AND ${WITH_X86_64_ARCHVER} LESS_EQUAL "4")
+    if(WITH_NATIVE_INSTRUCTIONS)
+        MESSAGE(FATAL_ERROR "WITH_NATIVE_INSTRUCTIONS cannot be enabled together with WITH_X86_64_ARCHVER > 1")
+    endif()
+    add_definitions(-DZARCHVER=${WITH_X86_64_ARCHVER})
+endif()
+
+## Handle sets of instruction sets based on different x86-64 arch versions
+# x86-64-v2 (up to SSE4.2)
+if(${WITH_X86_64_ARCHVER} EQUAL "2")
+    if(WITH_SSE2 AND HAVE_SSE2_INTRIN AND WITH_SSSE3 AND HAVE_SSSE3_INTRIN AND WITH_SSE41 AND HAVE_SSE41_INTRIN
+        AND WITH_SSE42 AND HAVE_SSE42_INTRIN)
+
+        add_definitions(-DX86_SSE2 -DX86_SSSE3 -DX86_SSE41 -DX86_SSE42)
+        set(ARCHVER_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/chunkset_ssse3.c ${ARCHDIR}/chorba_sse2.c ${ARCHDIR}/chorba_sse41.c ${ARCHDIR}/compare256_sse2.c ${ARCHDIR}/slide_hash_sse2.c)
+        set_property(SOURCE ${ARCHVER_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}")
+
+        add_feature_info(SSE42_ADLER32 1 "Support SSE42-accelerated adler32, using \"${SSE42FLAG}\"")
+        add_feature_info(SSSE3_CHUNKSET 1 "Support SSSE3-accelerated chunkset, using \"${SSE42FLAG}\"")
+        add_feature_info(SSE41_CHORBA 1 "Support SSE41-accelerated chorba, using \"${SSE42FLAG}\"")
+        add_feature_info(SSE2_COMPARE256 1 "Support SSE2-accelerated compare256, using \"${SSE42FLAG}\"")
+        add_feature_info(SSE2_SLIDEHASH 1 "Support SSE2-accelerated slidehash, using \"${SSE42FLAG}\"")
+
+    else()
+        MESSAGE(FATAL_ERROR "WITH_X86_64_ARCHVER=2 Requires SSE2, SSSE3, SSE41 and SSE42, and their intrinsics to be supported and enabled.")
+    endif()
+endif()
+
+# x86-64-v3 (up to AVX2)
+if(${WITH_X86_64_ARCHVER} EQUAL "3")
+    if(WITH_SSE2 AND HAVE_SSE2_INTRIN AND WITH_SSSE3 AND HAVE_SSSE3_INTRIN AND WITH_SSE41 AND HAVE_SSE41_INTRIN
+        AND WITH_SSE42 AND HAVE_SSE42_INTRIN AND WITH_AVX2 AND HAVE_AVX2_INTRIN)
+
+        add_definitions(-DX86_SSE2 -DX86_SSSE3 -DX86_SSE41 -DX86_SSE42 -DX86_AVX2)
+        set(ARCHVER_SRCS ${ARCHDIR}/adler32_ssse3.c ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/adler32_avx2.c ${ARCHDIR}/chunkset_avx2.c ${ARCHDIR}/chorba_sse2.c ${ARCHDIR}/chorba_sse41.c ${ARCHDIR}/compare256_avx2.c ${ARCHDIR}/slide_hash_avx2.c)
+        set_property(SOURCE ${ARCHVER_SRCS} PROPERTY COMPILE_FLAGS "${AVX2FLAG} ${NOLTOFLAG}")
+
+        add_feature_info(AVX2_ADLER32 1 "Support AVX2-accelerated adler32, using \"${AVX2FLAG}\"")
+        add_feature_info(AVX2_CHUNKSET 1 "Support AVX2-accelerated chunkset, using \"${AVX2FLAG}\"")
+        add_feature_info(SSE41_CHORBA 1 "Support SSE41-accelerated chorba, using \"${AVX2FLAG}\"")
+        add_feature_info(AVX2_COMPARE256 1 "Support AVX2-accelerated compare256, using \"${AVX2FLAG}\"")
+        add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2-accelerated slidehash, using \"${AVX2FLAG}\"")
+
+    else()
+        MESSAGE(FATAL_ERROR "WITH_X86_64_ARCHVER=2 Requires SSE2, SSSE3, SSE41, SSE42 and AVX2, and their intrinsics to be supported and enabled.")
+    endif()
+endif()
+
+# x86-64-v4 (up to AVX512)
+if(${WITH_X86_64_ARCHVER} EQUAL "4")
+    if(WITH_SSE2 AND HAVE_SSE2_INTRIN AND WITH_SSSE3 AND HAVE_SSSE3_INTRIN AND WITH_SSE41 AND HAVE_SSE41_INTRIN
+        AND WITH_SSE42 AND HAVE_SSE42_INTRIN AND WITH_AVX2 AND HAVE_AVX2_INTRIN AND WITH_AVX512 AND HAVE_AVX512_INTRIN)
+
+        add_definitions(-DX86_SSE2 -DX86_SSSE3 -DX86_SSE41 -DX86_SSE42 -DX86_AVX2 -DX86_AVX512)
+        set(ARCHVER_SRCS ${ARCHDIR}/adler32_ssse3.c ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/adler32_avx2.c ${ARCHDIR}/adler32_avx512.c ${ARCHDIR}/chunkset_avx512.c ${ARCHDIR}/chorba_sse2.c ${ARCHDIR}/chorba_sse41.c ${ARCHDIR}/compare256_avx512.c ${ARCHDIR}/slide_hash_avx2.c)
+        list(APPEND ARCHVER_HDRS ${ARCHDIR}/adler32_avx512_p.h)
+        set_property(SOURCE ${ARCHVER_SRCS} PROPERTY COMPILE_FLAGS "${AVX512FLAG} ${NOLTOFLAG}")
+
+        add_feature_info(AVX512_ADLER32 1 "Support AVX512-accelerated adler32, using \"${AVX512FLAG}\"")
+        add_feature_info(AVX512_CHUNKSET 1 "Support AVX512-accelerated chunkset, using \"${AVX512FLAG}\"")
+        add_feature_info(SSE41_CHORBA 1 "Support SSE41-accelerated chorba, using \"${AVX512FLAG}\"")
+        add_feature_info(AVX512_COMPARE256 1 "Support AVX512-accelerated compare256, using \"${AVX512FLAG}\"")
+        add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2-accelerated slidehash, using \"${AVX512FLAG}\"")
+
+    else()
+        MESSAGE(FATAL_ERROR "WITH_X86_64_ARCHVER=2 Requires SSE2, SSSE3, SSE41, SSE42, AVX2 and AVX512, and their intrinsics to be supported and enabled.")
+    endif()
+endif()
+
+list(APPEND ZLIB_ARCH_SRCS ${ARCHVER_SRCS})
+list(APPEND ZLIB_ARCH_HDRS ${ARCHVER_HDRS})
+
+
+# Handle Individual instruction sets from x86-64-v1
+if(NOT ${ARCH} MATCHES "x86_64" OR ${WITH_X86_64_ARCHVER} EQUAL "1")
+    if(WITH_SSE2)
+        if("${ARCH}" MATCHES "i[3-6]86")
+            cmake_dependent_option(FORCE_SSE2 "Always assume CPU is SSE2 capable" OFF "HAVE_SSE2_INTRIN" OFF)
+        endif()
+        if(HAVE_SSE2_INTRIN)
+            add_definitions(-DX86_SSE2)
+            set(SSE2_SRCS ${ARCHDIR}/chunkset_sse2.c ${ARCHDIR}/chorba_sse2.c ${ARCHDIR}/compare256_sse2.c ${ARCHDIR}/slide_hash_sse2.c)
+            list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
+            if(NOT ${ARCH} MATCHES "x86_64")
+                set_property(SOURCE ${SSE2_SRCS} PROPERTY COMPILE_FLAGS "${SSE2FLAG} ${NOLTOFLAG}")
+            endif()
+        else()
+            set(WITH_SSE2 OFF)
+        endif()
+    endif()
+
+    if(WITH_SSSE3)
+        if(HAVE_SSSE3_INTRIN AND WITH_SSE2)
+            add_definitions(-DX86_SSSE3)
+            set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c ${ARCHDIR}/chunkset_ssse3.c)
+            add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"")
+            list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS})
+            set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}")
+        else()
+            set(WITH_SSSE3 OFF)
+        endif()
+    endif()
+
+    if(WITH_SSE41)
+        if(HAVE_SSE41_INTRIN AND WITH_SSSE3)
+            add_definitions(-DX86_SSE41)
+            set(SSE41_SRCS ${ARCHDIR}/chorba_sse41.c)
+            list(APPEND ZLIB_ARCH_SRCS ${SSE41_SRCS})
+            set_property(SOURCE ${SSE41_SRCS} PROPERTY COMPILE_FLAGS "${SSE41FLAG} ${NOLTOFLAG}")
+        else()
+            set(WITH_SSE41 OFF)
+        endif()
+    endif()
+
+    if(WITH_SSE42)
+        if(HAVE_SSE42_INTRIN AND WITH_SSE41)
+            add_definitions(-DX86_SSE42)
+            set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c)
+            add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized adler32 hash generation, using \"${SSE42FLAG}\"")
+            list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
+            set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}")
+        else()
+            set(WITH_SSE42 OFF)
+        endif()
+    endif()
+endif()
+
+# Handle Individual instruction sets from x86-64-v2
+if(NOT ${ARCH} MATCHES "x86_64" OR ${WITH_X86_64_ARCHVER} LESS_EQUAL "2")
+    if(WITH_AVX2)
+        if(HAVE_AVX2_INTRIN AND WITH_SSE42)
+            add_definitions(-DX86_AVX2)
+            set(AVX2_SRCS ${ARCHDIR}/adler32_avx2.c)
+            add_feature_info(AVX2_ADLER32 1 "Support AVX2-accelerated adler32, using \"${AVX2FLAG}\"")
+            list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx2.c)
+            add_feature_info(AVX2_CHUNKSET 1 "Support AVX2 optimized chunkset, using \"${AVX2FLAG}\"")
+            list(APPEND AVX2_SRCS ${ARCHDIR}/compare256_avx2.c)
+            add_feature_info(AVX2_COMPARE256 1 "Support AVX2 optimized compare256, using \"${AVX2FLAG}\"")
+            list(APPEND AVX2_SRCS ${ARCHDIR}/slide_hash_avx2.c)
+            add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"")
+            list(APPEND ZLIB_ARCH_SRCS ${AVX2_SRCS})
+            set_property(SOURCE ${AVX2_SRCS} PROPERTY COMPILE_FLAGS "${AVX2FLAG} ${NOLTOFLAG}")
+        else()
+            set(WITH_AVX2 OFF)
+        endif()
+    endif()
+endif()
+
+# Handle Individual instruction sets from x86-64-v3
+if(NOT ${ARCH} MATCHES "x86_64" OR ${WITH_X86_64_ARCHVER} LESS_EQUAL "3")
+    if(WITH_AVX512)
+        if(HAVE_AVX512_INTRIN AND WITH_AVX2)
+            add_definitions(-DX86_AVX512)
+            list(APPEND AVX512_SRCS ${ARCHDIR}/adler32_avx512.c)
+            add_feature_info(AVX512_ADLER32 1 "Support AVX512-accelerated adler32, using \"${AVX512FLAG}\"")
+            list(APPEND AVX512_SRCS ${ARCHDIR}/chunkset_avx512.c)
+            add_feature_info(AVX512_CHUNKSET 1 "Support AVX512 optimized chunkset, using \"${AVX512FLAG}\"")
+            list(APPEND AVX512_SRCS ${ARCHDIR}/compare256_avx512.c)
+            add_feature_info(AVX512_COMPARE256 1 "Support AVX512 optimized compare256, using \"${AVX512FLAG}\"")
+            list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/adler32_avx512_p.h)
+            list(APPEND ZLIB_ARCH_SRCS ${AVX512_SRCS})
+            set_property(SOURCE ${AVX512_SRCS} PROPERTY COMPILE_FLAGS "${AVX512FLAG} ${NOLTOFLAG}")
+        else()
+            set(WITH_AVX512 OFF)
+        endif()
+    endif()
+endif()
+
+# Handle Individual instruction sets not part of any arch-version
+if(HAVE_XSAVE_INTRIN)
+    add_feature_info(XSAVE 1 "Support XSAVE intrinsics using \"${XSAVEFLAG}\"")
+    if(WITH_RUNTIME_CPU_DETECTION)
+        set_property(SOURCE ${ARCHDIR}/x86_features.c PROPERTY COMPILE_FLAGS "${XSAVEFLAG}")
+    endif()
+    if(NOT (CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 8.2))
+        add_definitions(-DX86_HAVE_XSAVE_INTRIN)
+    endif()
+endif()
+
+if(WITH_PCLMULQDQ)
+    if(HAVE_PCLMULQDQ_INTRIN AND WITH_SSE42)
+        add_definitions(-DX86_PCLMULQDQ_CRC)
+        set(PCLMULQDQ_SRCS ${ARCHDIR}/crc32_pclmulqdq.c)
+        add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSE42FLAG} ${PCLMULFLAG}\"")
+        list(APPEND ZLIB_ARCH_SRCS ${PCLMULQDQ_SRCS})
+        set_property(SOURCE ${PCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${PCLMULFLAG} ${NOLTOFLAG}")
+    else()
+        set(WITH_PCLMULQDQ OFF)
+    endif()
+endif()
+
+if(WITH_VPCLMULQDQ)
+    if(HAVE_VPCLMULQDQ_INTRIN AND WITH_PCLMULQDQ AND WITH_AVX512)
+        add_definitions(-DX86_VPCLMULQDQ_CRC)
+        set(VPCLMULQDQ_SRCS ${ARCHDIR}/crc32_vpclmulqdq.c)
+        add_feature_info(VPCLMUL_CRC 1 "Support CRC hash generation using VPCLMULQDQ, using \"${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG}\"")
+        list(APPEND ZLIB_ARCH_SRCS ${VPCLMULQDQ_SRCS})
+        set_property(SOURCE ${VPCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG} ${NOLTOFLAG}")
+    else()
+        set(WITH_VPCLMULQDQ OFF)
+    endif()
+endif()
+
+if(WITH_AVX512VNNI)
+    if(HAVE_AVX512VNNI_INTRIN AND WITH_AVX2)
+        add_definitions(-DX86_AVX512VNNI)
+        add_feature_info(AVX512VNNI_ADLER32 1 "Support AVX512VNNI adler32, using \"${AVX512VNNIFLAG}\"")
+        list(APPEND AVX512VNNI_SRCS ${ARCHDIR}/adler32_avx512_vnni.c)
+        if(${WITH_X86_64_ARCHVER} EQUAL "2")
+            add_definitions(-DX86_SSSE3)
+            list(APPEND AVX512VNNI_SRCS ${ARCHDIR}/adler32_ssse3.c)
+        endif()
+        list(APPEND ZLIB_ARCH_SRCS ${AVX512VNNI_SRCS})
+        set_property(SOURCE ${AVX512VNNI_SRCS} PROPERTY COMPILE_FLAGS "${AVX512VNNIFLAG} ${NOLTOFLAG}")
+    else()
+        set(WITH_AVX512VNNI OFF)
+    endif()
+endif()
+
index 78104b23c4dcc66d0c39a48dbf98479e63195c77..e26d9e27266bf3e6c082455e9ffa882d039d172d 100644 (file)
@@ -82,30 +82,38 @@ static void init_functable(void) {
 
     // X86 - SSE2
 #ifdef X86_SSE2
-#  if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
+#  if !defined(__x86_64__) && !defined(_M_X64)
     if (cf.x86.has_sse2)
 #  endif
     {
+#  if !defined(ZARCHVER) || ZARCHVER == 1
         ft.chunkmemset_safe = &chunkmemset_safe_sse2;
         ft.chunksize = &chunksize_sse2;
-#if !defined(WITHOUT_CHORBA) && !defined(NO_CHORBA_SSE)
-        ft.crc32 = &crc32_chorba_sse2;
-#endif
         ft.inflate_fast = &inflate_fast_sse2;
+#      if !defined(WITHOUT_CHORBA) && !defined(NO_CHORBA_SSE)
+        ft.crc32 = &crc32_chorba_sse2;
+#      endif
+#  endif
+#  if !defined(ZARCHVER) || ZARCHVER <= 2
         ft.slide_hash = &slide_hash_sse2;
-#  ifdef HAVE_BUILTIN_CTZ
+#      ifdef HAVE_BUILTIN_CTZ
         ft.compare256 = &compare256_sse2;
         ft.longest_match = &longest_match_sse2;
         ft.longest_match_slow = &longest_match_slow_sse2;
+#      endif
 #  endif
     }
 #endif
     // X86 - SSSE3
 #ifdef X86_SSSE3
     if (cf.x86.has_ssse3) {
+#      if !defined(ZARCHVER) || ZARCHVER == 1
         ft.adler32 = &adler32_ssse3;
+#      endif
+#      if !defined(ZARCHVER) || ZARCHVER <= 2
         ft.chunkmemset_safe = &chunkmemset_safe_ssse3;
         ft.inflate_fast = &inflate_fast_ssse3;
+#      endif
     }
 #endif
 
@@ -121,7 +129,9 @@ static void init_functable(void) {
     // X86 - SSE4.2
 #ifdef X86_SSE42
     if (cf.x86.has_sse42) {
+#      if !defined(ZARCHVER) || ZARCHVER <= 2
         ft.adler32_fold_copy = &adler32_fold_copy_sse42;
+#      endif
     }
 #endif
     // X86 - PCLMUL
@@ -141,16 +151,20 @@ static void init_functable(void) {
      * for the shift results as an operand, eliminating several register-register moves when the original value needs
      * to remain intact. They also allow for a count operand that isn't the CL register, avoiding contention there */
     if (cf.x86.has_avx2 && cf.x86.has_bmi2) {
+#      if !defined(ZARCHVER) || ZARCHVER <= 3
         ft.adler32 = &adler32_avx2;
         ft.adler32_fold_copy = &adler32_fold_copy_avx2;
         ft.chunkmemset_safe = &chunkmemset_safe_avx2;
         ft.chunksize = &chunksize_avx2;
         ft.inflate_fast = &inflate_fast_avx2;
+#      endif
         ft.slide_hash = &slide_hash_avx2;
 #  ifdef HAVE_BUILTIN_CTZ
+#      if !defined(ZARCHVER) || ZARCHVER <= 3
         ft.compare256 = &compare256_avx2;
         ft.longest_match = &longest_match_avx2;
         ft.longest_match_slow = &longest_match_slow_avx2;
+#      endif
 #  endif
     }
 #endif
index 6c307098ddcce1bcc3178031dd4ab33cc7bb902e..ca8291cd2f89e090c8d184c8b2e4acc386c934e4 100644 (file)
@@ -63,7 +63,9 @@ public:
     } \
     BENCHMARK_REGISTER_F(adler32, name)->Arg(1)->Arg(8)->Arg(12)->Arg(16)->Arg(32)->Arg(64)->Arg(512)->Arg(4<<10)->Arg(32<<10)->Arg(256<<10)->Arg(4096<<10)
 
+#ifndef NO_C_FALLBACK
 BENCHMARK_ADLER32(c, adler32_c, 1);
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 BENCHMARK_ADLER32(native, native_adler32, 1);
@@ -88,7 +90,9 @@ BENCHMARK_ADLER32(rvv, adler32_rvv, test_cpu_features.riscv.has_rvv);
 BENCHMARK_ADLER32(ssse3, adler32_ssse3, test_cpu_features.x86.has_ssse3);
 #endif
 #ifdef X86_AVX2
-BENCHMARK_ADLER32(avx2, adler32_avx2, test_cpu_features.x86.has_avx2);
+#  if !defined(ZARCHVER) || ZARCHVER <= 3
+    BENCHMARK_ADLER32(avx2, adler32_avx2, test_cpu_features.x86.has_avx2);
+#  endif
 #endif
 #ifdef X86_AVX512
 BENCHMARK_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512_common);
index 610c1c55353dbb9d7fd00c49d3bc76feda7bc342..ee0b342aaccb6394fcf24dfa7408f263c0e68e9a 100644 (file)
@@ -113,12 +113,16 @@ BENCHMARK_ADLER32_BASELINE_COPY(rvv, adler32_rvv, test_cpu_features.riscv.has_rv
 #endif
 
 #ifdef X86_SSE42
-BENCHMARK_ADLER32_BASELINE_COPY(sse42_baseline, adler32_ssse3, test_cpu_features.x86.has_ssse3);
-BENCHMARK_ADLER32_COPY(sse42, adler32_fold_copy_sse42, test_cpu_features.x86.has_sse42);
+#  if !defined(ZARCHVER) || ZARCHVER <= 2
+    BENCHMARK_ADLER32_BASELINE_COPY(sse42_baseline, adler32_ssse3, test_cpu_features.x86.has_ssse3);
+    BENCHMARK_ADLER32_COPY(sse42, adler32_fold_copy_sse42, test_cpu_features.x86.has_sse42);
+#  endif
 #endif
 #ifdef X86_AVX2
-BENCHMARK_ADLER32_BASELINE_COPY(avx2_baseline, adler32_avx2, test_cpu_features.x86.has_avx2);
-BENCHMARK_ADLER32_COPY(avx2, adler32_fold_copy_avx2, test_cpu_features.x86.has_avx2);
+#  if !defined(ZARCHVER) || ZARCHVER <= 3
+    BENCHMARK_ADLER32_BASELINE_COPY(avx2_baseline, adler32_avx2, test_cpu_features.x86.has_avx2);
+    BENCHMARK_ADLER32_COPY(avx2, adler32_fold_copy_avx2, test_cpu_features.x86.has_avx2);
+#  endif
 #endif
 #ifdef X86_AVX512
 BENCHMARK_ADLER32_BASELINE_COPY(avx512_baseline, adler32_avx512, test_cpu_features.x86.has_avx512_common);
index 689aa6e934f9692f08dfef432cc4e9904208eecb..5544bff51ac1fd45a7c4336d4580604667a49a2d 100644 (file)
@@ -75,10 +75,14 @@ BENCHMARK_COMPARE256(64, compare256_64, 1);
 #endif
 
 #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
-BENCHMARK_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2);
+#  if !defined(ZARCHVER) || ZARCHVER <= 2
+    BENCHMARK_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2);
+#  endif
 #endif
 #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
-BENCHMARK_COMPARE256(avx2, compare256_avx2, test_cpu_features.x86.has_avx2);
+#  if !defined(ZARCHVER) || ZARCHVER <= 3
+    BENCHMARK_COMPARE256(avx2, compare256_avx2, test_cpu_features.x86.has_avx2);
+#  endif
 #endif
 #if defined(X86_AVX512) && defined(HAVE_BUILTIN_CTZLL)
 BENCHMARK_COMPARE256(avx512, compare256_avx512, test_cpu_features.x86.has_avx512_common);
index d308ce2debadc6557ed9992eb9fa47842d13035d..06c2cc37f2bf3816f1b09cbc37889ce6aa712d8a 100644 (file)
@@ -72,9 +72,11 @@ BENCHMARK_CRC32(native, native_crc32, 1);
 
 #ifndef WITHOUT_CHORBA
 #   if defined(X86_SSE2) && !defined(NO_CHORBA_SSE)
-    BENCHMARK_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2);
+#       if !defined(ZARCHVER) || ZARCHVER == 1
+         BENCHMARK_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2);
+#       endif
 #       if defined(X86_SSE41) && !defined(NO_CHORBA_SSE)
-        BENCHMARK_CRC32(chorba_sse41, crc32_chorba_sse41, test_cpu_features.x86.has_sse41);
+         BENCHMARK_CRC32(chorba_sse41, crc32_chorba_sse41, test_cpu_features.x86.has_sse41);
 #       endif
 #   endif
 #endif
index 7348e4d1129cc7a551aaf3328eacf1e01e96cd71..72369ea900332b6b45d8d3dee5a16024d67ba160 100644 (file)
@@ -92,7 +92,9 @@ BENCHMARK_SLIDEHASH(vmx, slide_hash_vmx, test_cpu_features.power.has_altivec);
 BENCHMARK_SLIDEHASH(rvv, slide_hash_rvv, test_cpu_features.riscv.has_rvv);
 #endif
 #ifdef X86_SSE2
-BENCHMARK_SLIDEHASH(sse2, slide_hash_sse2, test_cpu_features.x86.has_sse2);
+#  if !defined(ZARCHVER) || ZARCHVER <= 2
+    BENCHMARK_SLIDEHASH(sse2, slide_hash_sse2, test_cpu_features.x86.has_sse2);
+#  endif
 #endif
 #ifdef X86_AVX2
 BENCHMARK_SLIDEHASH(avx2, slide_hash_avx2, test_cpu_features.x86.has_avx2);
index 5a5912b3385070fc388b866766d1509f18abcfca..c467391bf7b7059ee0f355080d3b976d3cb64cfc 100644 (file)
@@ -385,7 +385,9 @@ TEST_ADLER32(rvv, adler32_rvv, test_cpu_features.riscv.has_rvv)
 TEST_ADLER32(ssse3, adler32_ssse3, test_cpu_features.x86.has_ssse3)
 #endif
 #ifdef X86_AVX2
-TEST_ADLER32(avx2, adler32_avx2, test_cpu_features.x86.has_avx2)
+#  if !defined(ZARCHVER) || ZARCHVER <= 3
+    TEST_ADLER32(avx2, adler32_avx2, test_cpu_features.x86.has_avx2)
+#  endif
 #endif
 #ifdef X86_AVX512
 TEST_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512_common)
index 1b52082ed032817d1cf132312b6834f1c25e6b33..a623ce15150741b923b247db9f56ae2925562b65 100644 (file)
@@ -74,10 +74,14 @@ TEST_COMPARE256(64, compare256_64, 1)
 #endif
 
 #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
-TEST_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2)
+#  if !defined(ZARCHVER) || ZARCHVER <= 2
+    TEST_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2)
+#  endif
 #endif
 #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
-TEST_COMPARE256(avx2, compare256_avx2, test_cpu_features.x86.has_avx2)
+#  if !defined(ZARCHVER) || ZARCHVER <= 3
+    TEST_COMPARE256(avx2, compare256_avx2, test_cpu_features.x86.has_avx2)
+#  endif
 #endif
 #if defined(X86_AVX512) && defined(HAVE_BUILTIN_CTZLL)
 TEST_COMPARE256(avx512, compare256_avx512, test_cpu_features.x86.has_avx512_common)
index 1b728e7ca3f2f58d99945d6f4ba8522ca988c65e..1768bc31739c0314ffb2db06d1563357936820b5 100644 (file)
@@ -320,7 +320,9 @@ TEST_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq)
 TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq))
 #endif
 #if !defined(WITHOUT_CHORBA) && defined(X86_SSE2) && !defined(NO_CHORBA_SSE)
-TEST_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2)
+#    if !defined(ZARCHVER) || ZARCHVER == 1
+        TEST_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2)
+#    endif
 #endif
 #if !defined(WITHOUT_CHORBA) && defined(X86_SSE41) && !defined(NO_CHORBA_SSE)
 TEST_CRC32(chorba_sse41, crc32_chorba_sse41, test_cpu_features.x86.has_sse41)