From: Vladislav Shchapov Date: Sat, 20 Jan 2024 18:59:54 +0000 (+0500) Subject: Improve x86 intrinsics dependencies. X-Git-Tag: 2.2.0~115 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1aa53f40fc84cd3e9264ef9b5878d641b5ee500a;p=thirdparty%2Fzlib-ng.git Improve x86 intrinsics dependencies. Signed-off-by: Vladislav Shchapov --- diff --git a/CMakeLists.txt b/CMakeLists.txt index caaa621f..a1dad7c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -808,7 +808,7 @@ if(WITH_OPTIM) endif() if(WITH_SSSE3) check_ssse3_intrinsics() - if(HAVE_SSSE3_INTRIN) + if(HAVE_SSSE3_INTRIN AND WITH_SSE2) add_definitions(-DX86_SSSE3) set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c ${ARCHDIR}/chunkset_ssse3.c) add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"") @@ -820,7 +820,7 @@ if(WITH_OPTIM) endif() if(WITH_SSE42) check_sse42_intrinsics() - if(HAVE_SSE42_INTRIN) + if(HAVE_SSE42_INTRIN AND WITH_SSSE3) add_definitions(-DX86_SSE42) set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c) add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE42FLAG}\"") @@ -830,23 +830,21 @@ if(WITH_OPTIM) set(WITH_SSE42 OFF) endif() endif() - if(WITH_PCLMULQDQ AND WITH_SSSE3 AND WITH_SSE42) + if(WITH_PCLMULQDQ) check_pclmulqdq_intrinsics() - if(HAVE_PCLMULQDQ_INTRIN AND HAVE_SSSE3_INTRIN) + if(HAVE_PCLMULQDQ_INTRIN AND WITH_SSE42) add_definitions(-DX86_PCLMULQDQ_CRC) set(PCLMULQDQ_SRCS ${ARCHDIR}/crc32_pclmulqdq.c) - add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG}\"") + add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSE42FLAG} ${PCLMULFLAG}\"") list(APPEND ZLIB_ARCH_SRCS ${PCLMULQDQ_SRCS}) - set_property(SOURCE ${PCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG} ${NOLTOFLAG}") + set_property(SOURCE ${PCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${PCLMULFLAG} ${NOLTOFLAG}") else() set(WITH_PCLMULQDQ OFF) endif() - else() - set(WITH_PCLMULQDQ OFF) endif() if(WITH_AVX2) check_avx2_intrinsics() - if(HAVE_AVX2_INTRIN) + if(HAVE_AVX2_INTRIN AND WITH_SSE42) add_definitions(-DX86_AVX2) set(AVX2_SRCS ${ARCHDIR}/slide_hash_avx2.c) add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"") @@ -864,7 +862,7 @@ if(WITH_OPTIM) endif() if(WITH_AVX512) check_avx512_intrinsics() - if(HAVE_AVX512_INTRIN) + if(HAVE_AVX512_INTRIN AND WITH_AVX2) add_definitions(-DX86_AVX512) list(APPEND AVX512_SRCS ${ARCHDIR}/adler32_avx512.c) add_feature_info(AVX512_ADLER32 1 "Support AVX512-accelerated adler32, using \"${AVX512FLAG}\"") @@ -877,7 +875,7 @@ if(WITH_OPTIM) endif() if(WITH_AVX512VNNI) check_avx512vnni_intrinsics() - if(HAVE_AVX512VNNI_INTRIN) + if(HAVE_AVX512VNNI_INTRIN AND WITH_AVX2) add_definitions(-DX86_AVX512VNNI) add_feature_info(AVX512VNNI_ADLER32 1 "Support AVX512VNNI adler32, using \"${AVX512VNNIFLAG}\"") list(APPEND AVX512VNNI_SRCS ${ARCHDIR}/adler32_avx512_vnni.c) @@ -887,19 +885,17 @@ if(WITH_OPTIM) set(WITH_AVX512VNNI OFF) endif() endif() - if(WITH_VPCLMULQDQ AND WITH_PCLMULQDQ AND WITH_AVX512) + if(WITH_VPCLMULQDQ) check_vpclmulqdq_intrinsics() - if(HAVE_VPCLMULQDQ_INTRIN) + if(HAVE_VPCLMULQDQ_INTRIN AND WITH_PCLMULQDQ AND WITH_AVX512) add_definitions(-DX86_VPCLMULQDQ_CRC) set(VPCLMULQDQ_SRCS ${ARCHDIR}/crc32_vpclmulqdq.c) - add_feature_info(VPCLMUL_CRC 1 "Support CRC hash generation using VPCLMULQDQ, using \"${VPCLMULFLAG} ${AVX512FLAG}\"") + add_feature_info(VPCLMUL_CRC 1 "Support CRC hash generation using VPCLMULQDQ, using \"${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG}\"") list(APPEND ZLIB_ARCH_SRCS ${VPCLMULQDQ_SRCS}) - set_property(SOURCE ${VPCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG} ${NOLTOFLAG}") + set_property(SOURCE ${VPCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG} ${NOLTOFLAG}") else() set(WITH_VPCLMULQDQ OFF) endif() - else() - set(WITH_VPCLMULQDQ OFF) endif() endif() endif() diff --git a/arch/x86/Makefile.in b/arch/x86/Makefile.in index 7c052469..f84fe6e6 100644 --- a/arch/x86/Makefile.in +++ b/arch/x86/Makefile.in @@ -90,10 +90,10 @@ crc32_pclmulqdq.lo: $(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_pclmulqdq.c crc32_vpclmulqdq.o: - $(CC) $(CFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c + $(CC) $(CFLAGS) $(PCLMULFLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c crc32_vpclmulqdq.lo: - $(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c + $(CC) $(SFLAGS) $(PCLMULFLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c slide_hash_avx2.o: $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_avx2.c diff --git a/arch/x86/crc32_vpclmulqdq.c b/arch/x86/crc32_vpclmulqdq.c index ec641b43..cad35b14 100644 --- a/arch/x86/crc32_vpclmulqdq.c +++ b/arch/x86/crc32_vpclmulqdq.c @@ -3,7 +3,7 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC) +#ifdef X86_VPCLMULQDQ_CRC #define X86_VPCLMULQDQ #define CRC32_FOLD_COPY crc32_fold_vpclmulqdq_copy diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake index f2448103..b186d181 100644 --- a/cmake/detect-intrinsics.cmake +++ b/cmake/detect-intrinsics.cmake @@ -114,7 +114,7 @@ endmacro() macro(check_avx512vnni_intrinsics) if(CMAKE_C_COMPILER_ID MATCHES "Intel") if(CMAKE_HOST_UNIX OR APPLE) - set(AVX512VNNIFLAG "-mavx512f -mavx512bw -mavx512dq -mavx512vl -mavx512vnni") + set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni") else() set(AVX512VNNIFLAG "/arch:AVX512") endif() diff --git a/cpu_features.h b/cpu_features.h index 05ee407b..b095d79a 100644 --- a/cpu_features.h +++ b/cpu_features.h @@ -93,7 +93,7 @@ extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t extern uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc); extern uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); #endif -#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC) +#ifdef X86_VPCLMULQDQ_CRC extern uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc); extern void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); extern void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); diff --git a/functable.c b/functable.c index 179126fc..1cdaeab3 100644 --- a/functable.c +++ b/functable.c @@ -162,7 +162,7 @@ static void init_functable(void) { } #endif // X86 - VPCLMULQDQ -#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC) +#ifdef X86_VPCLMULQDQ_CRC if (cf.x86.has_pclmulqdq && cf.x86.has_avx512 && cf.x86.has_vpclmulqdq) { ft.crc32 = &crc32_vpclmulqdq; ft.crc32_fold = &crc32_fold_vpclmulqdq; diff --git a/test/test_crc32.cc b/test/test_crc32.cc index f194b4cc..dcf7d48f 100644 --- a/test/test_crc32.cc +++ b/test/test_crc32.cc @@ -214,8 +214,10 @@ TEST_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32) TEST_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07) #elif defined(S390_CRC32_VX) TEST_CRC32(vx, crc32_s390_vx, test_cpu_features.s390.has_vx) -#elif defined(X86_PCLMULQDQ_CRC) +#elif defined(X86_FEATURES) +# ifdef X86_PCLMULQDQ_CRC TEST_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq) +# endif # ifdef X86_VPCLMULQDQ_CRC TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512 && test_cpu_features.x86.has_vpclmulqdq)) # endif