]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Add SSE4.1 detection
authorAdam Stylinski <kungfujesus06@gmail.com>
Tue, 4 Jan 2022 15:37:24 +0000 (10:37 -0500)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Sat, 8 Jan 2022 18:27:28 +0000 (19:27 +0100)
Code leveraging this for the adler checksum is forthcoming

arch/x86/x86.c
arch/x86/x86.h
cmake/detect-intrinsics.cmake
configure

index f02e1a349110743260f6da4b2fea08e48ae79540..065a717032d8dd3a1f9579314956a598b3ceb6a6 100644 (file)
@@ -24,6 +24,7 @@ Z_INTERNAL int x86_cpu_has_avx512;
 Z_INTERNAL int x86_cpu_has_avx512vnni;
 Z_INTERNAL int x86_cpu_has_sse2;
 Z_INTERNAL int x86_cpu_has_ssse3;
+Z_INTERNAL int x86_cpu_has_sse41;
 Z_INTERNAL int x86_cpu_has_sse42;
 Z_INTERNAL int x86_cpu_has_pclmulqdq;
 Z_INTERNAL int x86_cpu_has_tzcnt;
@@ -78,6 +79,7 @@ void Z_INTERNAL x86_check_features(void) {
 
     x86_cpu_has_sse2 = edx & 0x4000000;
     x86_cpu_has_ssse3 = ecx & 0x200;
+    x86_cpu_has_sse41 = ecx & 0x80000;
     x86_cpu_has_sse42 = ecx & 0x100000;
     x86_cpu_has_pclmulqdq = ecx & 0x2;
     x86_cpu_well_suited_avx512 = 0;
index 4274ed09f6b770cbc30566a5686e41fe8ff6fc6f..80da6f32aac3a85a6702a4ee338713be539b8b64 100644 (file)
@@ -11,6 +11,7 @@ extern int x86_cpu_has_avx512;
 extern int x86_cpu_has_avx512vnni;
 extern int x86_cpu_has_sse2;
 extern int x86_cpu_has_ssse3;
+extern int x86_cpu_has_sse41;
 extern int x86_cpu_has_sse42;
 extern int x86_cpu_has_pclmulqdq;
 extern int x86_cpu_has_tzcnt;
index 9f7a971907dd4e4dd33010f6fbc161bbd9ad8209..2facf505139d11cd61102020d99cc252ce5df9a4 100644 (file)
@@ -294,20 +294,48 @@ macro(check_ssse3_intrinsics)
     )
 endmacro()
 
-macro(check_sse4_intrinsics)
+macro(check_sse41_intrinsics)
     if(CMAKE_C_COMPILER_ID MATCHES "Intel")
         if(CMAKE_HOST_UNIX OR APPLE)
-            set(SSE4FLAG "-msse4.2")
+            set(SSE41FLAG "-msse4.1")
         else()
-            set(SSE4FLAG "/arch:SSE4.2")
+            set(SSE41FLAG "/arch:SSE4.1")
         endif()
     elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
         if(NOT NATIVEFLAG)
-            set(SSE4FLAG "-msse4.2")
+            set(SSE41FLAG "-msse4.1")
+        endif()
+    endif()
+    # Check whether compiler supports SSE4.1 intrinsics
+    set(CMAKE_REQUIRED_FLAGS "${SSE41FLAG}")
+    check_c_source_compile_or_run(
+        "#include <immintrin.h>
+        int main(void) {
+            __m128i u, v, w;
+            u = _mm_set1_epi8(1);
+            v = _mm_set1_epi8(2);
+            w = _mm_sad_epu8(u, v);
+            (void)w;
+            return 0;
+        }"
+        HAVE_SSE41_INTRIN
+    )
+endmacro()
+
+macro(check_sse42_intrinsics)
+    if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+        if(CMAKE_HOST_UNIX OR APPLE)
+            set(SSE42FLAG "-msse4.2")
+        else()
+            set(SSE42FLAG "/arch:SSE4.2")
+        endif()
+    elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+        if(NOT NATIVEFLAG)
+            set(SSE42FLAG "-msse4.2")
         endif()
     endif()
     # Check whether compiler supports SSE4 CRC inline asm
-    set(CMAKE_REQUIRED_FLAGS "${SSE4FLAG}")
+    set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG}")
     check_c_source_compile_or_run(
         "int main(void) {
             unsigned val = 0, h = 0;
index 0d79bca50e97d1b20bbb98e48287f282f61a402a..5b1c5e65b265098f22ad67885457d1785e566575 100755 (executable)
--- a/configure
+++ b/configure
@@ -109,7 +109,7 @@ avx512vnniflag="-mavx512vnni ${avx512flag}"
 avx2flag="-mavx2"
 sse2flag="-msse2"
 ssse3flag="-mssse3"
-sse4flag="-msse4"
+sse41flag="-msse4.1"
 sse42flag="-msse4.2"
 pclmulflag="-mpclmul"
 acleflag=
@@ -1232,7 +1232,30 @@ EOF
     fi
 }
 
-check_sse4_intrinsics() {
+check_sse41_intrinsics() {
+    # Check whether compiler supports SSE4.1 intrinsics
+    cat > $test.c << EOF
+#include <smmintrin.h>
+int main(void)
+{
+    __m128i u, v, w;
+    u = _mm_set1_epi8(1);
+    v = _mm_set1_epi8(2);
+    w = _mm_sad_epu8(u, v);
+    (void)w;
+    return 0;
+}
+EOF
+    if try ${CC} ${CFLAGS} ${sse41flag} $test.c; then
+        echo "Checking for SSE4.1 intrinsics ... Yes." | tee -a configure.log
+        HAVE_SSE41_INTRIN=1
+    else
+        echo "Checking for SSE4.1 intrinsics ... No." | tee -a configure.log
+        HAVE_SSE41_INTRIN=0
+    fi
+}
+
+check_sse42_intrinsics() {
     # Check whether compiler supports SSE4 CRC inline asm
     cat > $test.c << EOF
 int main(void) {
@@ -1418,7 +1441,16 @@ case "${ARCH}" in
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_avx512_vnni.lo"
             fi
 
-            check_sse4_intrinsics
+            check_sse41_intrinsics
+
+            if test ${HAVE_SSE41_INTRIN} -eq 1; then
+                CFLAGS="${CFLAGS} -DX86_SSE41_ADLER32"
+                SFLAGS="${SFLAGS} -DX86_SSE41_ADLER32"
+                ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse41.o"
+                ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse41.o"
+            fi
+
+            check_sse42_intrinsics
 
             if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then
                 CFLAGS="${CFLAGS} -DX86_SSE42_CRC_HASH"
@@ -1432,6 +1464,7 @@ case "${ARCH}" in
                 ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} insert_string_sse.o"
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} insert_string_sse.lo"
             fi
+
             if test ${HAVE_SSE42CMPSTR_INTRIN} -eq 1; then
                 CFLAGS="${CFLAGS} -DX86_SSE42_CMP_STR"
                 SFLAGS="${SFLAGS} -DX86_SSE42_CMP_STR"
@@ -1876,7 +1909,8 @@ echo sharedlibdir = $sharedlibdir >> configure.log
 echo uname = $uname >> configure.log
 echo sse2flag = $sse2flag >> configure.log
 echo ssse3flag = $ssse3flag >> configure.log
-echo sse4flag = $sse4flag >> configure.log
+echo sse41flag = $sse41flag >> configure.log
+echo sse42flag = $sse42flag >> configure.log
 echo pclmulflag = $pclmulflag >> configure.log
 echo acleflag = $acleflag >> configure.log
 echo neonflag = $neonflag >> configure.log
@@ -2012,7 +2046,8 @@ sed < $SRCDIR/$ARCHDIR/Makefile.in "
 /^AVX512VNNIFLAG *=/s#=.*#=$avx512vnniflag#
 /^SSE2FLAG *=/s#=.*#=$sse2flag#
 /^SSSE3FLAG *=/s#=.*#=$ssse3flag#
-/^SSE4FLAG *=/s#=.*#=$sse4flag#
+/^SSE41FLAG *=/s#=.*#=$sse41flag#
+/^SSE42FLAG *=/s#=.*#=$sse42flag#
 /^PCLMULFLAG *=/s#=.*#=$pclmulflag#
 /^ACLEFLAG *=/s#=.*#=$acleflag#
 /^NEONFLAG *=/s#=.*#=$neonflag#