From: Hans Kristian Rosbach Date: Fri, 21 Jul 2023 11:43:15 +0000 (+0200) Subject: Clean up SSE4.2 support, and no longer use asm fallback or gcc builtin. X-Git-Tag: 2.1.4~52 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2167377c46e1a1a117e97573dc8acdc95c2ec85e;p=thirdparty%2Fzlib-ng.git Clean up SSE4.2 support, and no longer use asm fallback or gcc builtin. Defines changing meaning: X86_SSE42 used to mean the compiler supports crc asm fallback. X86_SSE42_CRC_INTRIN used to mean compiler supports SSE4.2 intrinsics. X86_SSE42 now means compiler supports SSE4.2 intrinsics. This therefore also fixes the adler32_sse42 checks, since those were depending on SSE4.2 intrinsics but was mistakenly checking the X86_SSE42 define. Now the X86_SSE42 define actually means what it appears to. --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d5d5ae9d..9fc865928 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -827,15 +827,12 @@ if(WITH_OPTIM) endif() if(WITH_SSE42) check_sse42_intrinsics() - if(HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN) + if(HAVE_SSE42_INTRIN) add_definitions(-DX86_SSE42) set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c) add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE42FLAG}\"") list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS}) set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}") - if(HAVE_SSE42CRC_INTRIN) - add_definitions(-DX86_SSE42_CRC_INTRIN) - endif() else() set(WITH_SSE42 OFF) endif() diff --git a/arch/x86/insert_string_sse42.c b/arch/x86/insert_string_sse42.c index 565d92f97..ae092a7e4 100644 --- a/arch/x86/insert_string_sse42.c +++ b/arch/x86/insert_string_sse42.c @@ -5,38 +5,13 @@ * */ +#ifdef X86_SSE42 #include "../../zbuild.h" -#include -#ifdef _MSC_VER -# include -#endif +#include #include "../../deflate.h" -#ifdef X86_SSE42_CRC_INTRIN -# ifdef _MSC_VER -# define HASH_CALC(s, h, val)\ - h = _mm_crc32_u32(h, val) -# else -# define HASH_CALC(s, h, val)\ - h = __builtin_ia32_crc32si(h, val) -# endif -#else -# ifdef _MSC_VER -# define HASH_CALC(s, h, val) {\ - __asm mov edx, h\ - __asm mov eax, val\ - __asm crc32 eax, edx\ - __asm mov h, eax\ - } -# else -# define HASH_CALC(s, h, val) \ - __asm__ __volatile__ (\ - "crc32 %1,%0\n\t"\ - : "+r" (h)\ - : "r" (val)\ - ); -# endif -#endif +#define HASH_CALC(s, h, val)\ + h = _mm_crc32_u32(h, val) #define HASH_CALC_VAR h #define HASH_CALC_VAR_INIT uint32_t h = 0 @@ -45,6 +20,5 @@ #define INSERT_STRING insert_string_sse42 #define QUICK_INSERT_STRING quick_insert_string_sse42 -#ifdef X86_SSE42 -# include "../../insert_string_tpl.h" +#include "../../insert_string_tpl.h" #endif diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake index b0b0516e3..f5cc06bf7 100644 --- a/cmake/detect-intrinsics.cmake +++ b/cmake/detect-intrinsics.cmake @@ -481,35 +481,18 @@ macro(check_sse42_intrinsics) set(SSE42FLAG "-msse4.2") endif() endif() - # Check whether compiler supports SSE4.2 CRC inline asm + # Check whether compiler supports SSE4.2 intrinsics set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG}") check_c_source_compile_or_run( - "int main(void) { - unsigned val = 0, h = 0; - #if defined(_MSC_VER) - { __asm mov edx, h __asm mov eax, val __asm crc32 eax, edx __asm mov h, eax } - #else - __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) ); - #endif - return (int)h; - }" - HAVE_SSE42CRC_INLINE_ASM - ) - # Check whether compiler supports SSE4.2 CRC intrinsics - check_c_source_compile_or_run( - "#include + "#include int main(void) { unsigned crc = 0; char c = 'c'; - #if defined(_MSC_VER) crc = _mm_crc32_u32(crc, c); - #else - crc = __builtin_ia32_crc32qi(crc, c); - #endif (void)crc; return 0; }" - HAVE_SSE42CRC_INTRIN + HAVE_SSE42_INTRIN ) set(CMAKE_REQUIRED_FLAGS) endmacro() diff --git a/configure b/configure index bb37b9664..7461a1dce 100755 --- a/configure +++ b/configure @@ -1447,38 +1447,23 @@ EOF } check_sse42_intrinsics() { - # Check whether compiler supports SSE4.2 CRC inline asm - cat > $test.c << EOF -int main(void) { - unsigned val = 0, h = 0; - __asm__ __volatile__ ( "crc32 %1,%0" : "+r" (h) : "r" (val) ); - return (int) h; -} -EOF - if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then - echo "Checking for SSE4.2 CRC inline assembly ... Yes." | tee -a configure.log - HAVE_SSE42CRC_INLINE_ASM=1 - else - echo "Checking for SSE4.2 CRC inline assembly ... No." | tee -a configure.log - HAVE_SSE42CRC_INLINE_ASM=0 - fi - - # Check whether compiler supports SSE4.2 CRC intrinsics + # Check whether compiler supports SSE4.2 intrinsics cat > $test.c << EOF +#include int main(void) { unsigned crc = 0; char c = 'c'; - crc = __builtin_ia32_crc32qi(crc, c); + crc = _mm_crc32_u32(crc, c); (void)crc; return 0; } EOF if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then - echo "Checking for SSE4.2 CRC intrinsics ... Yes." | tee -a configure.log - HAVE_SSE42CRC_INTRIN=1 + echo "Checking for SSE4.2 intrinsics ... Yes." | tee -a configure.log + HAVE_SSE42_INTRIN=1 else - echo "Checking for SSE4.2 CRC intrinsics ... No." | tee -a configure.log - HAVE_SSE42CRC_INTRIN=0 + echo "Checking for SSE4.2 intrinsics ... No." | tee -a configure.log + HAVE_SSE42_INTRIN=0 fi } @@ -1613,15 +1598,9 @@ case "${ARCH}" in check_sse42_intrinsics - if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then + if test ${HAVE_SSE42_INTRIN} -eq 1; then CFLAGS="${CFLAGS} -DX86_SSE42" SFLAGS="${SFLAGS} -DX86_SSE42" - - if test ${HAVE_SSE42CRC_INTRIN} -eq 1; then - CFLAGS="${CFLAGS} -DX86_SSE42_CRC_INTRIN" - SFLAGS="${SFLAGS} -DX86_SSE42_CRC_INTRIN" - fi - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse42.o insert_string_sse42.o" ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse42.lo insert_string_sse42.lo" fi diff --git a/win32/Makefile.msc b/win32/Makefile.msc index 9ed26f283..3035072be 100644 --- a/win32/Makefile.msc +++ b/win32/Makefile.msc @@ -31,7 +31,6 @@ WFLAGS = \ -DX86_PCLMULQDQ_CRC \ -DX86_SSE2 \ -DX86_SSE42 \ - -DX86_SSE42_CRC_INTRIN \ -DX86_SSSE3 \ -DX86_AVX2