]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Improve x86 intrinsics dependencies.
authorVladislav Shchapov <vladislav@shchapov.ru>
Sat, 20 Jan 2024 18:59:54 +0000 (23:59 +0500)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Thu, 25 Jan 2024 09:21:49 +0000 (10:21 +0100)
Signed-off-by: Vladislav Shchapov <vladislav@shchapov.ru>
CMakeLists.txt
arch/x86/Makefile.in
arch/x86/crc32_vpclmulqdq.c
cmake/detect-intrinsics.cmake
cpu_features.h
functable.c
test/test_crc32.cc

index caaa621fe98dddc8b9a1ae6090bcd0a88aac5f2e..a1dad7c4c9c2e1cbd43062ff01e379ca5dbcfa85 100644 (file)
@@ -808,7 +808,7 @@ if(WITH_OPTIM)
         endif()
         if(WITH_SSSE3)
             check_ssse3_intrinsics()
-            if(HAVE_SSSE3_INTRIN)
+            if(HAVE_SSSE3_INTRIN AND WITH_SSE2)
                 add_definitions(-DX86_SSSE3)
                 set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c ${ARCHDIR}/chunkset_ssse3.c)
                 add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"")
@@ -820,7 +820,7 @@ if(WITH_OPTIM)
         endif()
         if(WITH_SSE42)
             check_sse42_intrinsics()
-            if(HAVE_SSE42_INTRIN)
+            if(HAVE_SSE42_INTRIN AND WITH_SSSE3)
                 add_definitions(-DX86_SSE42)
                 set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c)
                 add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE42FLAG}\"")
@@ -830,23 +830,21 @@ if(WITH_OPTIM)
                 set(WITH_SSE42 OFF)
             endif()
         endif()
-        if(WITH_PCLMULQDQ AND WITH_SSSE3 AND WITH_SSE42)
+        if(WITH_PCLMULQDQ)
             check_pclmulqdq_intrinsics()
-            if(HAVE_PCLMULQDQ_INTRIN AND HAVE_SSSE3_INTRIN)
+            if(HAVE_PCLMULQDQ_INTRIN AND WITH_SSE42)
                 add_definitions(-DX86_PCLMULQDQ_CRC)
                 set(PCLMULQDQ_SRCS ${ARCHDIR}/crc32_pclmulqdq.c)
-                add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG}\"")
+                add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSE42FLAG} ${PCLMULFLAG}\"")
                 list(APPEND ZLIB_ARCH_SRCS ${PCLMULQDQ_SRCS})
-                set_property(SOURCE ${PCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG} ${NOLTOFLAG}")
+                set_property(SOURCE ${PCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${PCLMULFLAG} ${NOLTOFLAG}")
             else()
                 set(WITH_PCLMULQDQ OFF)
             endif()
-        else()
-            set(WITH_PCLMULQDQ OFF)
         endif()
         if(WITH_AVX2)
             check_avx2_intrinsics()
-            if(HAVE_AVX2_INTRIN)
+            if(HAVE_AVX2_INTRIN AND WITH_SSE42)
                 add_definitions(-DX86_AVX2)
                 set(AVX2_SRCS ${ARCHDIR}/slide_hash_avx2.c)
                 add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"")
@@ -864,7 +862,7 @@ if(WITH_OPTIM)
         endif()
         if(WITH_AVX512)
             check_avx512_intrinsics()
-            if(HAVE_AVX512_INTRIN)
+            if(HAVE_AVX512_INTRIN AND WITH_AVX2)
                 add_definitions(-DX86_AVX512)
                 list(APPEND AVX512_SRCS ${ARCHDIR}/adler32_avx512.c)
                 add_feature_info(AVX512_ADLER32 1 "Support AVX512-accelerated adler32, using \"${AVX512FLAG}\"")
@@ -877,7 +875,7 @@ if(WITH_OPTIM)
         endif()
         if(WITH_AVX512VNNI)
             check_avx512vnni_intrinsics()
-            if(HAVE_AVX512VNNI_INTRIN)
+            if(HAVE_AVX512VNNI_INTRIN AND WITH_AVX2)
                 add_definitions(-DX86_AVX512VNNI)
                 add_feature_info(AVX512VNNI_ADLER32 1 "Support AVX512VNNI adler32, using \"${AVX512VNNIFLAG}\"")
                 list(APPEND AVX512VNNI_SRCS ${ARCHDIR}/adler32_avx512_vnni.c)
@@ -887,19 +885,17 @@ if(WITH_OPTIM)
                 set(WITH_AVX512VNNI OFF)
             endif()
         endif()
-        if(WITH_VPCLMULQDQ AND WITH_PCLMULQDQ AND WITH_AVX512)
+        if(WITH_VPCLMULQDQ)
             check_vpclmulqdq_intrinsics()
-            if(HAVE_VPCLMULQDQ_INTRIN)
+            if(HAVE_VPCLMULQDQ_INTRIN AND WITH_PCLMULQDQ AND WITH_AVX512)
                 add_definitions(-DX86_VPCLMULQDQ_CRC)
                 set(VPCLMULQDQ_SRCS ${ARCHDIR}/crc32_vpclmulqdq.c)
-                add_feature_info(VPCLMUL_CRC 1 "Support CRC hash generation using VPCLMULQDQ, using \"${VPCLMULFLAG} ${AVX512FLAG}\"")
+                add_feature_info(VPCLMUL_CRC 1 "Support CRC hash generation using VPCLMULQDQ, using \"${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG}\"")
                 list(APPEND ZLIB_ARCH_SRCS ${VPCLMULQDQ_SRCS})
-                set_property(SOURCE ${VPCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG} ${NOLTOFLAG}")
+                set_property(SOURCE ${VPCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG} ${NOLTOFLAG}")
             else()
                 set(WITH_VPCLMULQDQ OFF)
             endif()
-        else()
-            set(WITH_VPCLMULQDQ OFF)
         endif()
     endif()
 endif()
index 7c052469b2987f6c375a7b56be9bbb33ee790087..f84fe6e6be51212ee9aca764d70ae17ddd10c865 100644 (file)
@@ -90,10 +90,10 @@ crc32_pclmulqdq.lo:
        $(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_pclmulqdq.c
 
 crc32_vpclmulqdq.o:
-       $(CC) $(CFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c
+       $(CC) $(CFLAGS) $(PCLMULFLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c
 
 crc32_vpclmulqdq.lo:
-       $(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c
+       $(CC) $(SFLAGS) $(PCLMULFLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c
 
 slide_hash_avx2.o:
        $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_avx2.c
index ec641b43263be120d6c8282714267138b7922df7..cad35b14eefc2346f21f286d2dcdf04fee02e9f0 100644 (file)
@@ -3,7 +3,7 @@
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
-#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC)
+#ifdef X86_VPCLMULQDQ_CRC
 
 #define X86_VPCLMULQDQ
 #define CRC32_FOLD_COPY  crc32_fold_vpclmulqdq_copy
index f2448103cd09a55c1344c2c800b37b9f1f8e58d7..b186d181f3c2592eb658ad4c6ace0025e6697e3d 100644 (file)
@@ -114,7 +114,7 @@ endmacro()
 macro(check_avx512vnni_intrinsics)
     if(CMAKE_C_COMPILER_ID MATCHES "Intel")
         if(CMAKE_HOST_UNIX OR APPLE)
-            set(AVX512VNNIFLAG "-mavx512f -mavx512bw -mavx512dq -mavx512vl -mavx512vnni")
+            set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni")
         else()
             set(AVX512VNNIFLAG "/arch:AVX512")
         endif()
index 05ee407bf50557f6f40e1d59ebbf75ea1149b2e7..b095d79a8a9ea2c9092cfa923981ea42605abf63 100644 (file)
@@ -93,7 +93,7 @@ extern void     crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t
 extern uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc);
 extern uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
 #endif
-#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC)
+#ifdef X86_VPCLMULQDQ_CRC
 extern uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc);
 extern void     crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
 extern void     crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
index 179126fcab0bea5c69a2da5d56f887960361e0e9..1cdaeab371222f149b35f19cd8ceff5ccd42d80c 100644 (file)
@@ -162,7 +162,7 @@ static void init_functable(void) {
     }
 #endif
     // X86 - VPCLMULQDQ
-#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC)
+#ifdef X86_VPCLMULQDQ_CRC
     if (cf.x86.has_pclmulqdq && cf.x86.has_avx512 && cf.x86.has_vpclmulqdq) {
         ft.crc32 = &crc32_vpclmulqdq;
         ft.crc32_fold = &crc32_fold_vpclmulqdq;
index f194b4ccf57a6d311cd328b159ba8526ff536bf2..dcf7d48fd8d14140bafe98cc639cf18d9476555b 100644 (file)
@@ -214,8 +214,10 @@ TEST_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32)
 TEST_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07)
 #elif defined(S390_CRC32_VX)
 TEST_CRC32(vx, crc32_s390_vx, test_cpu_features.s390.has_vx)
-#elif defined(X86_PCLMULQDQ_CRC)
+#elif defined(X86_FEATURES)
+#  ifdef X86_PCLMULQDQ_CRC
 TEST_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq)
+#  endif
 #  ifdef X86_VPCLMULQDQ_CRC
 TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512 && test_cpu_features.x86.has_vpclmulqdq))
 #  endif