From: Adam Stylinski Date: Tue, 4 Jan 2022 15:37:24 +0000 (-0500) Subject: Add SSE4.1 detection X-Git-Tag: 2.1.0-beta1~453 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5ef8e3b979065c04b863b71065323c2635cdbf02;p=thirdparty%2Fzlib-ng.git Add SSE4.1 detection Code leveraging this for the adler checksum is forthcoming --- diff --git a/arch/x86/x86.c b/arch/x86/x86.c index f02e1a349..065a71703 100644 --- a/arch/x86/x86.c +++ b/arch/x86/x86.c @@ -24,6 +24,7 @@ Z_INTERNAL int x86_cpu_has_avx512; Z_INTERNAL int x86_cpu_has_avx512vnni; Z_INTERNAL int x86_cpu_has_sse2; Z_INTERNAL int x86_cpu_has_ssse3; +Z_INTERNAL int x86_cpu_has_sse41; Z_INTERNAL int x86_cpu_has_sse42; Z_INTERNAL int x86_cpu_has_pclmulqdq; Z_INTERNAL int x86_cpu_has_tzcnt; @@ -78,6 +79,7 @@ void Z_INTERNAL x86_check_features(void) { x86_cpu_has_sse2 = edx & 0x4000000; x86_cpu_has_ssse3 = ecx & 0x200; + x86_cpu_has_sse41 = ecx & 0x80000; x86_cpu_has_sse42 = ecx & 0x100000; x86_cpu_has_pclmulqdq = ecx & 0x2; x86_cpu_well_suited_avx512 = 0; diff --git a/arch/x86/x86.h b/arch/x86/x86.h index 4274ed09f..80da6f32a 100644 --- a/arch/x86/x86.h +++ b/arch/x86/x86.h @@ -11,6 +11,7 @@ extern int x86_cpu_has_avx512; extern int x86_cpu_has_avx512vnni; extern int x86_cpu_has_sse2; extern int x86_cpu_has_ssse3; +extern int x86_cpu_has_sse41; extern int x86_cpu_has_sse42; extern int x86_cpu_has_pclmulqdq; extern int x86_cpu_has_tzcnt; diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake index 9f7a97190..2facf5051 100644 --- a/cmake/detect-intrinsics.cmake +++ b/cmake/detect-intrinsics.cmake @@ -294,20 +294,48 @@ macro(check_ssse3_intrinsics) ) endmacro() -macro(check_sse4_intrinsics) +macro(check_sse41_intrinsics) if(CMAKE_C_COMPILER_ID MATCHES "Intel") if(CMAKE_HOST_UNIX OR APPLE) - set(SSE4FLAG "-msse4.2") + set(SSE41FLAG "-msse4.1") else() - set(SSE4FLAG "/arch:SSE4.2") + set(SSE41FLAG "/arch:SSE4.1") endif() elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") if(NOT NATIVEFLAG) - set(SSE4FLAG "-msse4.2") + set(SSE41FLAG "-msse4.1") + endif() + endif() + # Check whether compiler supports SSE4.1 intrinsics + set(CMAKE_REQUIRED_FLAGS "${SSE41FLAG}") + check_c_source_compile_or_run( + "#include + int main(void) { + __m128i u, v, w; + u = _mm_set1_epi8(1); + v = _mm_set1_epi8(2); + w = _mm_sad_epu8(u, v); + (void)w; + return 0; + }" + HAVE_SSE41_INTRIN + ) +endmacro() + +macro(check_sse42_intrinsics) + if(CMAKE_C_COMPILER_ID MATCHES "Intel") + if(CMAKE_HOST_UNIX OR APPLE) + set(SSE42FLAG "-msse4.2") + else() + set(SSE42FLAG "/arch:SSE4.2") + endif() + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") + if(NOT NATIVEFLAG) + set(SSE42FLAG "-msse4.2") endif() endif() # Check whether compiler supports SSE4 CRC inline asm - set(CMAKE_REQUIRED_FLAGS "${SSE4FLAG}") + set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG}") check_c_source_compile_or_run( "int main(void) { unsigned val = 0, h = 0; diff --git a/configure b/configure index 0d79bca50..5b1c5e65b 100755 --- a/configure +++ b/configure @@ -109,7 +109,7 @@ avx512vnniflag="-mavx512vnni ${avx512flag}" avx2flag="-mavx2" sse2flag="-msse2" ssse3flag="-mssse3" -sse4flag="-msse4" +sse41flag="-msse4.1" sse42flag="-msse4.2" pclmulflag="-mpclmul" acleflag= @@ -1232,7 +1232,30 @@ EOF fi } -check_sse4_intrinsics() { +check_sse41_intrinsics() { + # Check whether compiler supports SSE4.1 intrinsics + cat > $test.c << EOF +#include +int main(void) +{ + __m128i u, v, w; + u = _mm_set1_epi8(1); + v = _mm_set1_epi8(2); + w = _mm_sad_epu8(u, v); + (void)w; + return 0; +} +EOF + if try ${CC} ${CFLAGS} ${sse41flag} $test.c; then + echo "Checking for SSE4.1 intrinsics ... Yes." | tee -a configure.log + HAVE_SSE41_INTRIN=1 + else + echo "Checking for SSE4.1 intrinsics ... No." | tee -a configure.log + HAVE_SSE41_INTRIN=0 + fi +} + +check_sse42_intrinsics() { # Check whether compiler supports SSE4 CRC inline asm cat > $test.c << EOF int main(void) { @@ -1418,7 +1441,16 @@ case "${ARCH}" in ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_avx512_vnni.lo" fi - check_sse4_intrinsics + check_sse41_intrinsics + + if test ${HAVE_SSE41_INTRIN} -eq 1; then + CFLAGS="${CFLAGS} -DX86_SSE41_ADLER32" + SFLAGS="${SFLAGS} -DX86_SSE41_ADLER32" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse41.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse41.o" + fi + + check_sse42_intrinsics if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then CFLAGS="${CFLAGS} -DX86_SSE42_CRC_HASH" @@ -1432,6 +1464,7 @@ case "${ARCH}" in ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} insert_string_sse.o" ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} insert_string_sse.lo" fi + if test ${HAVE_SSE42CMPSTR_INTRIN} -eq 1; then CFLAGS="${CFLAGS} -DX86_SSE42_CMP_STR" SFLAGS="${SFLAGS} -DX86_SSE42_CMP_STR" @@ -1876,7 +1909,8 @@ echo sharedlibdir = $sharedlibdir >> configure.log echo uname = $uname >> configure.log echo sse2flag = $sse2flag >> configure.log echo ssse3flag = $ssse3flag >> configure.log -echo sse4flag = $sse4flag >> configure.log +echo sse41flag = $sse41flag >> configure.log +echo sse42flag = $sse42flag >> configure.log echo pclmulflag = $pclmulflag >> configure.log echo acleflag = $acleflag >> configure.log echo neonflag = $neonflag >> configure.log @@ -2012,7 +2046,8 @@ sed < $SRCDIR/$ARCHDIR/Makefile.in " /^AVX512VNNIFLAG *=/s#=.*#=$avx512vnniflag# /^SSE2FLAG *=/s#=.*#=$sse2flag# /^SSSE3FLAG *=/s#=.*#=$ssse3flag# -/^SSE4FLAG *=/s#=.*#=$sse4flag# +/^SSE41FLAG *=/s#=.*#=$sse41flag# +/^SSE42FLAG *=/s#=.*#=$sse42flag# /^PCLMULFLAG *=/s#=.*#=$pclmulflag# /^ACLEFLAG *=/s#=.*#=$acleflag# /^NEONFLAG *=/s#=.*#=$neonflag#