]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Fix AVX512-VNNI compile flags.
authorHans Kristian Rosbach <hk-git@circlestorm.org>
Sat, 13 May 2023 11:17:04 +0000 (13:17 +0200)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Sat, 13 May 2023 18:15:00 +0000 (20:15 +0200)
cmake/detect-intrinsics.cmake
configure

index c20c9e16074b7d1939683d034b5f6ea451091720..60f34b80bf4cde7e142609db0d2f7713f98e6cfb 100644 (file)
@@ -100,7 +100,13 @@ macro(check_avx512vnni_intrinsics)
         if(NOT NATIVEFLAG)
             set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni")
             if(NOT CMAKE_GENERATOR_TOOLSET MATCHES "ClangCl")
-                set(AVX512VNNIFLAG "${AVX512VNNIFLAG} -mtune=cascadelake")
+                check_c_compiler_flag("-mtune=cascadelake" HAVE_CASCADE_LAKE)
+                if(HAVE_CASCADE_LAKE)
+                    set(AVX512FLAG "${AVX512VNNIFLAG} -mtune=cascadelake")
+                else()
+                    set(AVX512FLAG "${AVX512VNNIFLAG} -mtune=skylake-avx512")
+                endif()
+                unset(HAVE_CASCADE_LAKE)
             endif()
         endif()
     elseif(MSVC)
index 1f3ef7a02995081a4d11b60d21b5133888fe7105..145cd6fb6ff14398bede907a9189e9de38551e7a 100755 (executable)
--- a/configure
+++ b/configure
@@ -106,7 +106,7 @@ forcesse2=0
 # For CPUs that can benefit from AVX512, it seems GCC generates suboptimal
 # instruction scheduling unless you specify a reasonable -mtune= target
 avx512flag="-mavx512f -mavx512dq -mavx512bw -mavx512vl"
-avx512vnniflag="-mavx512vnni ${avx512flag}"
+avx512vnniflag="${avx512flag} -mavx512vnni"
 avx2flag="-mavx2"
 sse2flag="-msse2"
 ssse3flag="-mssse3"
@@ -1570,9 +1570,11 @@ case "${ARCH}" in
 
             if test ${MTUNE_CASCADELAKE_AVAILABLE} -eq 1; then
                 avx512flag="${avx512flag} -mtune=cascadelake"
+                avx512flag="${avx512vnniflag} -mtune=cascadelake"
             else
                 if test ${MTUNE_SKYLAKE_AVX512_AVAILABLE} -eq 1; then
                     avx512flag="${avx512flag} -mtune=skylake-avx512"
+                    avx512flag="${avx512vnniflag} -mtune=skylake-avx512"
                 fi
             fi