From: Mika Lindqvist Date: Sun, 2 Nov 2025 16:57:16 +0000 (+0200) Subject: Initial support for nVidia toolchain X-Git-Tag: 2.3.0-rc1~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7bee4b6ee49d6219fc81a779ec1b50a22a3262e8;p=thirdparty%2Fzlib-ng.git Initial support for nVidia toolchain * Supports native and non-native builds for x86_64 using CMake --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 9f9bd2c26..15654f49b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -230,7 +230,7 @@ elseif(MSVC) set(NEONFLAG "/arch:VFPv4") endif() endif() -elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") +elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC") # Enable warnings in GCC and Clang set(WARNFLAGS -Wall) set(WARNFLAGS_MAINTAINER -Wextra) @@ -276,6 +276,9 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") message(STATUS "ARM floating point arch not auto-detected") endif() endif() + elseif(BASEARCH_X86_FOUND AND CMAKE_C_COMPILER_ID MATCHES "NVHPC") + # nVidia compiler defaults to native build, so set target to any x86-64 processor + add_compile_options(-tp px) endif() # Disable LTO unless Native Instructions are enabled if(FNO_LTO_AVAILABLE) @@ -309,6 +312,12 @@ if(WITH_NATIVE_INSTRUCTIONS) endif() # Fall through endif() + if(CMAKE_C_COMPILER_ID MATCHES "NVHPC") + check_c_compiler_flag("-tp native" HAVE_TP_NATIVE) + if(HAVE_TP_NATIVE) + set(NATIVEFLAG "-tp native") + endif() + endif() endif() if(NATIVEFLAG) # Apply flags to all source files and compilation checks diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake index 568b8bd7a..e7592f95b 100644 --- a/cmake/detect-intrinsics.cmake +++ b/cmake/detect-intrinsics.cmake @@ -109,7 +109,7 @@ macro(check_avx512_intrinsics) else() set(AVX512FLAG "/arch:AVX512") endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC") # For CPUs that can benefit from AVX512, it seems GCC generates suboptimal # instruction scheduling unless you specify a reasonable -mtune= target set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mbmi2") @@ -147,7 +147,7 @@ macro(check_avx512vnni_intrinsics) else() set(AVX512VNNIFLAG "/arch:AVX512") endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC") set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni -mbmi2") if(NOT MSVC) check_c_compiler_flag("-mtune=cascadelake" HAVE_CASCADE_LAKE) @@ -188,7 +188,7 @@ macro(check_avx2_intrinsics) else() set(AVX2FLAG "/arch:AVX2") endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC") set(AVX2FLAG "-mavx2 -mbmi2") elseif(MSVC) set(AVX2FLAG "/arch:AVX2") @@ -297,7 +297,7 @@ endmacro() macro(check_pclmulqdq_intrinsics) if(NOT NATIVEFLAG) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM") + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC") set(PCLMULFLAG "-mpclmul") endif() endif() @@ -320,7 +320,7 @@ endmacro() macro(check_vpclmulqdq_intrinsics) if(NOT NATIVEFLAG) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM") + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC") set(VPCLMULFLAG "-mvpclmulqdq -mavx512f") endif() endif() @@ -551,7 +551,7 @@ macro(check_sse2_intrinsics) if(NOT "${ARCH}" MATCHES "x86_64") set(SSE2FLAG "/arch:SSE2") endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC") set(SSE2FLAG "-msse2") endif() endif() @@ -574,7 +574,7 @@ macro(check_ssse3_intrinsics) else() set(SSSE3FLAG "/arch:SSSE3") endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC") set(SSSE3FLAG "-mssse3") endif() endif() @@ -599,7 +599,7 @@ macro(check_sse41_intrinsics) else() set(SSE41FLAG "/arch:SSE4.1") endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC") set(SSE41FLAG "-msse4.1") endif() endif() @@ -622,7 +622,7 @@ macro(check_sse42_intrinsics) else() set(SSE42FLAG "/arch:SSE4.2") endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC") set(SSE42FLAG "-msse4.2") endif() endif()