]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Modify cmake to apply native flag when using intrinsic flags same as configure.
authorNathan Moinvaziri <nathan@nathanm.com>
Sun, 28 Jun 2020 19:37:50 +0000 (12:37 -0700)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Thu, 2 Jul 2020 11:10:03 +0000 (13:10 +0200)
CMakeLists.txt

index 5385565a28935274e7758642a8bd16c745f9f7dc..b0c6ee0ad8195956a62ae1ed0a432d33ce93685f 100644 (file)
@@ -146,9 +146,6 @@ if(WITH_GZFILEOP)
 endif()
 
 if(${CMAKE_C_COMPILER} MATCHES "icc" OR ${CMAKE_C_COMPILER} MATCHES "icpc" OR ${CMAKE_C_COMPILER} MATCHES "icl")
-    if(WITH_NATIVE_INSTRUCTIONS)
-        message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not supported on this configuration")
-    endif()
     if(CMAKE_HOST_UNIX OR APPLE)
         set(WARNFLAGS "-w3")
         set(WARNFLAGS_MAINTAINER "-w3 -Wcheck -Wremarks")
@@ -170,6 +167,9 @@ if(${CMAKE_C_COMPILER} MATCHES "icc" OR ${CMAKE_C_COMPILER} MATCHES "icpc" OR ${
             set(SSE4FLAG "/arch:SSE4.2")
         endif()
     endif()
+    if(WITH_NATIVE_INSTRUCTIONS)
+        message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not supported on this configuration")
+    endif()
 elseif(MSVC)
     # TODO. ICC can be used through MSVC. I'm not sure if we'd ever see that combination
     # (who'd use cmake from an IDE...) but checking for ICC before checking for MSVC should
@@ -252,19 +252,6 @@ else()
                 set(PCLMULFLAG "-mpclmul")
             endif()
         endif()
-    else()
-        if(BASEARCH_ARM_FOUND)
-            set(ACLEFLAG "${NATIVEFLAG}")
-            if("${ARCH}" MATCHES "aarch64")
-                set(NEONFLAG "${NATIVEFLAG}")
-            endif()
-        elseif(BASEARCH_X86_FOUND)
-            set(AVX2FLAG ${NATIVEFLAG})
-            set(SSE2FLAG ${NATIVEFLAG})
-            set(SSSE3FLAG ${NATIVEFLAG})
-            set(SSE4FLAG ${NATIVEFLAG})
-            set(PCLMULFLAG ${NATIVEFLAG})
-        endif()
     endif()
 endif()
 
@@ -319,6 +306,12 @@ if(WITH_CODE_COVERAGE)
     endif()
 endif()
 
+# Set native instruction set compiler flag
+if(WITH_NATIVE_INSTRUCTIONS AND DEFINED NATIVEFLAG)
+    # Apply flag to all source files and compilation checks
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${NATIVEFLAG}")
+endif()
+
 #
 # Check for stndard/system includes
 #
@@ -482,11 +475,7 @@ if(BASEARCH_PPC_FOUND)
     set(CMAKE_REQUIRED_FLAGS)
 elseif(BASEARCH_X86_FOUND)
     # Check whether compiler supports SSE2 instrinics
-    if(WITH_NATIVE_INSTRUCTIONS)
-        set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}")
-    else()
-        set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG}")
-    endif()
+    set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG}")
     check_c_source_compile_or_run(
         "#include <immintrin.h>
         int main(void)
@@ -498,11 +487,7 @@ elseif(BASEARCH_X86_FOUND)
         HAVE_SSE2_INTRIN
     )
     # Check whether compiler supports SSSE3 intrinsics
-    if(WITH_NATIVE_INSTRUCTIONS)
-        set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}")
-    else()
-        set(CMAKE_REQUIRED_FLAGS "${SSSE3FLAG}")
-    endif()
+    set(CMAKE_REQUIRED_FLAGS "${SSSE3FLAG}")
     check_c_source_compile_or_run(
         "#include <immintrin.h>
 
@@ -518,11 +503,7 @@ elseif(BASEARCH_X86_FOUND)
         HAVE_SSSE3_INTRIN
     )
     # Check whether compiler supports SSE4 CRC inline asm
-    if(WITH_NATIVE_INSTRUCTIONS)
-        set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}")
-    else()
-        set(CMAKE_REQUIRED_FLAGS "${SSE4FLAG}")
-    endif()
+    set(CMAKE_REQUIRED_FLAGS "${SSE4FLAG}")
     check_c_source_compile_or_run(
         "int main(void)
         {
@@ -568,11 +549,7 @@ elseif(BASEARCH_X86_FOUND)
         HAVE_SSE42CMPSTR_INTRIN
     )
     # Check whether compiler supports PCLMULQDQ intrinsics
-    if(WITH_NATIVE_INSTRUCTIONS)
-        set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}")
-    else()
-        set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG}")
-    endif()
+    set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG}")
     if(NOT (APPLE AND ${ARCH} MATCHES "i386"))
         # The pclmul code currently crashes on Mac in 32bit mode. Avoid for now.
         check_c_source_compile_or_run(
@@ -591,11 +568,7 @@ elseif(BASEARCH_X86_FOUND)
         set(HAVE_PCLMULQDQ_INTRIN NO)
     endif()
     # Check whether compiler supports AVX2 intrinics
-    if(WITH_NATIVE_INSTRUCTIONS)
-        set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}")
-    else()
-        set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG}")
-    endif()
+    set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG}")
     check_c_source_compile_or_run(
         "#include <immintrin.h>
         int main(void) {
@@ -639,17 +612,6 @@ if(WITH_INFLATE_ALLOW_INVALID_DIST)
     message(STATUS "Inflate zero data for invalid distances enabled")
 endif()
 
-#
-# Macro to set the given intrinsics option to the specified files,
-# or ${NATIVEFLAG} (-march=native) if that is appropriate and possible.
-#
-macro(set_intrinsics_option flag)
-    if(WITH_NATIVE_INSTRUCTIONS AND NATIVEFLAG)
-        set_property(SOURCE ${ARGN} PROPERTY COMPILE_FLAGS ${NATIVEFLAG})
-    else()
-        set_property(SOURCE ${ARGN} PROPERTY COMPILE_FLAGS ${flag})
-    endif()
-endmacro()
 
 set(ZLIB_ARCH_SRCS)
 set(ZLIB_ARCH_HDRS)
@@ -678,7 +640,7 @@ if(WITH_OPTIM)
             add_definitions(-DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH)
             set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c ${ARCHDIR}/slide_neon.c)
             list(APPEND ZLIB_ARCH_SRCS ${NEON_SRCS})
-            set_intrinsics_option("${NEONFLAG}" ${NEON_SRCS})
+            set_property(SOURCE ${NEON_SRCS} PROPERTY COMPILE_FLAGS "${NEONFLAG}")
             if(MSVC)
                 add_definitions(-D__ARM_NEON__)
             endif()
@@ -688,7 +650,7 @@ if(WITH_OPTIM)
         if(WITH_ACLE AND NOT MSVC)
             add_definitions(-DARM_ACLE_CRC_HASH)
             set(ACLE_SRCS ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c)
-            set_intrinsics_option("${ACLEFLAG}" ${ACLE_SRCS})
+            set_property(SOURCE ${ACLE_SRCS} PROPERTY COMPILE_FLAGS "${ACLEFLAG}")
             list(APPEND ZLIB_ARCH_SRCS ${ACLE_SRCS})
             add_feature_info(ACLE_CRC 1 "Support ACLE optimized CRC hash generation, using \"${ACLEFLAG}\"")
         endif()
@@ -702,7 +664,7 @@ if(WITH_OPTIM)
             list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power.c)
             set(POWER8_SRCS ${ARCHDIR}/adler32_power8.c ${ARCHDIR}/slide_hash_power8.c)
             list(APPEND ZLIB_ARCH_SRCS ${POWER8_SRCS})
-            set_intrinsics_option("${POWER8FLAG}" ${POWER8_SRCS})
+            set_property(SOURCE ${POWER8_SRCS} PROPERTY COMPILE_FLAGS "${POWER8FLAG}")
         endif()
     elseif(BASEARCH_S360_FOUND AND "${ARCH}" MATCHES "s390x")
         if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE)
@@ -733,14 +695,14 @@ if(WITH_OPTIM)
             list(APPEND AVX2_SRCS ${ARCHDIR}/adler32_avx.c)
             add_feature_info(AVX2_ADLER32 1 "Support AVX2-accelerated adler32, using \"${AVX2FLAG}\"")
             list(APPEND ZLIB_ARCH_SRCS ${AVX2_SRCS})
-            set_intrinsics_option("${AVX2FLAG}" ${AVX2_SRCS})
+            set_property(SOURCE ${AVX2_SRCS} PROPERTY COMPILE_FLAGS "${AVX2FLAG}")
         endif()
         if(WITH_SSE4 AND (HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN))
             add_definitions(-DX86_SSE42_CRC_HASH)
             set(SSE42_SRCS ${ARCHDIR}/insert_string_sse.c)
             add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE4FLAG}\"")
             list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
-            set_intrinsics_option("${SSE4FLAG}" ${SSE42_SRCS})
+            set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE4FLAG}")
             if(HAVE_SSE42CRC_INTRIN)
                 add_definitions(-DX86_SSE42_CRC_INTRIN)
             endif()
@@ -750,14 +712,14 @@ if(WITH_OPTIM)
             set(SSE42_SRCS ${ARCHDIR}/compare258_sse.c)
             add_feature_info(SSE42_COMPARE258 1 "Support SSE4.2 optimized compare258, using \"${SSE4FLAG}\"")
             list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
-            set_intrinsics_option("${SSE4FLAG}" ${SSE42_SRCS})
+            set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE4FLAG}")
         endif()
         if(WITH_SSE2 AND HAVE_SSE2_INTRIN)
             add_definitions(-DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE2_SLIDEHASH)
             set(SSE2_SRCS ${ARCHDIR}/chunkset_sse.c ${ARCHDIR}/slide_sse.c)
             list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
             if(NOT ${ARCH} MATCHES "x86_64")
-                set_intrinsics_option("${SSE2FLAG}" ${SSE2_SRCS})
+                set_property(SOURCE ${SSE2_SRCS} PROPERTY COMPILE_FLAGS "${SSE2FLAG}")
                 add_feature_info(FORCE_SSE2 FORCE_SSE2 "Assume CPU is SSE2 capable")
                 if(FORCE_SSE2)
                     add_definitions(-DX86_NOCHECK_SSE2)
@@ -769,18 +731,14 @@ if(WITH_OPTIM)
             set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c)
             add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"")
             list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS})
-            set_intrinsics_option("${SSSE3FLAG}" ${SSSE3_SRCS})
+            set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG}")
         endif()
-        if(WITH_PCLMULQDQ AND HAVE_PCLMULQDQ_INTRIN)
+        if(WITH_PCLMULQDQ AND HAVE_PCLMULQDQ_INTRIN AND WITH_SSSE3 AND WITH_SSE4)
             add_definitions(-DX86_PCLMULQDQ_CRC)
             set(PCLMULQDQ_SRCS ${ARCHDIR}/crc_folding.c)
+            add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSSE3FLAG} ${SSE4FLAG} ${PCLMULFLAG}\"")
             list(APPEND ZLIB_ARCH_SRCS ${PCLMULQDQ_SRCS})
-            set_intrinsics_option("${SSE4FLAG} ${PCLMULFLAG}" ${PCLMULQDQ_SRCS})
-            if(HAVE_SSE42CRC_INLINE_ASM)
-                add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${PCLMULFLAG}\"")
-            else()
-                add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${PCLMULFLAG} ${SSE4FLAG}\"")
-            endif()
+            set_property(SOURCE ${PCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE4FLAG} ${PCLMULFLAG}")
         endif()
     endif()
 endif()