if(NOT NATIVEFLAG)
set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni")
if(NOT CMAKE_GENERATOR_TOOLSET MATCHES "ClangCl")
- set(AVX512VNNIFLAG "${AVX512VNNIFLAG} -mtune=cascadelake")
+ check_c_compiler_flag("-mtune=cascadelake" HAVE_CASCADE_LAKE)
+ if(HAVE_CASCADE_LAKE)
+ set(AVX512FLAG "${AVX512VNNIFLAG} -mtune=cascadelake")
+ else()
+ set(AVX512FLAG "${AVX512VNNIFLAG} -mtune=skylake-avx512")
+ endif()
+ unset(HAVE_CASCADE_LAKE)
endif()
endif()
elseif(MSVC)
# For CPUs that can benefit from AVX512, it seems GCC generates suboptimal
# instruction scheduling unless you specify a reasonable -mtune= target
avx512flag="-mavx512f -mavx512dq -mavx512bw -mavx512vl"
-avx512vnniflag="-mavx512vnni ${avx512flag}"
+avx512vnniflag="${avx512flag} -mavx512vnni"
avx2flag="-mavx2"
sse2flag="-msse2"
ssse3flag="-mssse3"
if test ${MTUNE_CASCADELAKE_AVAILABLE} -eq 1; then
avx512flag="${avx512flag} -mtune=cascadelake"
+ avx512flag="${avx512vnniflag} -mtune=cascadelake"
else
if test ${MTUNE_SKYLAKE_AVX512_AVAILABLE} -eq 1; then
avx512flag="${avx512flag} -mtune=skylake-avx512"
+ avx512flag="${avx512vnniflag} -mtune=skylake-avx512"
fi
fi