]> git.ipfire.org Git - thirdparty/zlib-ng.git/commitdiff
Only run checks for intrinsics if optimizations are enabled.
authorNathan Moinvaziri <nathan@solidstatenetworks.com>
Sat, 20 Mar 2021 21:10:14 +0000 (14:10 -0700)
committerHans Kristian Rosbach <hk-github@circlestorm.org>
Sun, 13 Jun 2021 18:52:56 +0000 (20:52 +0200)
CMakeLists.txt
cmake/detect-intrinsics.cmake [new file with mode: 0644]
configure

index 9462652893f32cc0394884c891b14ed7ed5a77b5..9fcef690425401ab44d2696e1d6db1c6f581ded6 100644 (file)
@@ -49,6 +49,7 @@ include(FeatureSummary)
 include(cmake/detect-arch.cmake)
 include(cmake/detect-install-dirs.cmake)
 include(cmake/detect-coverage.cmake)
+include(cmake/detect-intrinsics.cmake)
 include(cmake/detect-sanitizer.cmake)
 
 if(CMAKE_TOOLCHAIN_FILE)
@@ -139,23 +140,11 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
         set(WARNFLAGS "-w3")
         set(WARNFLAGS_MAINTAINER "-w3 -Wcheck -Wremarks")
         set(WARNFLAGS_DISABLE "")
-        if(BASEARCH_X86_FOUND)
-            set(AVX2FLAG "-mavx2")
-            set(SSE2FLAG "-msse2")
-            set(SSSE3FLAG "-mssse3")
-            set(SSE4FLAG "-msse4.2")
-        endif()
     else()
         set(WARNFLAGS "/W3")
         set(WARNFLAGS_MAINTAINER "/W5")
         set(WARNFLAGS_DISABLE "")
-        if(BASEARCH_X86_FOUND)
-            set(AVX2FLAG "/arch:AVX2")
-            set(SSE2FLAG "/arch:SSE2")
-            set(SSSE3FLAG "/arch:SSSE3")
-            set(SSE4FLAG "/arch:SSE4.2")
         endif()
-    endif()
     if(WITH_NATIVE_INSTRUCTIONS)
         message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not supported on this configuration")
     endif()
@@ -177,38 +166,22 @@ elseif(MSVC)
         if(NOT "${ARCH}" MATCHES "aarch64")
             set(NEONFLAG "/arch:VFPv4")
         endif()
-    elseif(BASEARCH_X86_FOUND)
-        if(NOT "${ARCH}" MATCHES "x86_64")
-            set(SSE2FLAG "/arch:SSE2")
         endif()
-    endif()
     if(WITH_NATIVE_INSTRUCTIONS)
         message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not supported on this configuration")
     endif()
-else()
-    # catch all GNU C compilers as well as Clang and AppleClang
-    if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
-        set(__GNUC__ ON)
-    endif()
+elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
     # Enable warnings in GCC and Clang
-    if(__GNUC__)
         set(WARNFLAGS "-Wall")
         set(WARNFLAGS_MAINTAINER "-Wextra -Wpedantic")
         set(WARNFLAGS_DISABLE "-Wno-implicit-fallthrough")
-    endif()
     if(WITH_NATIVE_INSTRUCTIONS)
-        if(__GNUC__)
             if(BASEARCH_PPC_FOUND)
                 set(NATIVEFLAG "-mcpu=native")
             else()
                 set(NATIVEFLAG "-march=native")
             endif()
         else()
-            message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration")
-        endif()
-    endif()
-    if(NOT NATIVEFLAG)
-        if(__GNUC__)
             if(BASEARCH_ARM_FOUND)
                 if("${ARCH}" MATCHES "arm" AND NOT CMAKE_C_FLAGS MATCHES "-mfloat-abi")
                     # Auto-detect support for ARM floating point ABI
@@ -228,31 +201,7 @@ else()
                         message(STATUS "ARM floating point arch not auto-detected")
                     endif()
                 endif()
-                # NEON
-                if("${ARCH}" MATCHES "aarch64")
-                    set(NEONFLAG "-march=armv8-a+simd")
-                else()
-                    # Check whether -mfpu=neon is available
-                    set(CMAKE_REQUIRED_FLAGS "-mfpu=neon")
-                    check_c_source_compiles(
-                        "int main() { return 0; }"
-                        MFPU_NEON_AVAILABLE FAIL_REGEX "not supported")
-                    set(CMAKE_REQUIRED_FLAGS)
-                    if(MFPU_NEON_AVAILABLE)
-                        set(NEONFLAG "-mfpu=neon")
                     endif()
-                endif()
-                # ACLE
-                set(ACLEFLAG "-march=armv8-a+crc")
-            elseif(BASEARCH_PPC_FOUND)
-                set(POWER8FLAG "-mcpu=power8")
-            elseif(BASEARCH_X86_FOUND)
-                set(AVX2FLAG "-mavx2")
-                set(SSE2FLAG "-msse2")
-                set(SSSE3FLAG "-mssse3")
-                set(SSE4FLAG "-msse4")
-                set(PCLMULFLAG "-mpclmul")
-            endif()
             # Check whether -fno-lto is available
             set(CMAKE_REQUIRED_FLAGS "-fno-lto")
             check_c_source_compiles(
@@ -263,6 +212,9 @@ else()
                 set(NOLTOFLAG "-fno-lto")
             endif()
         endif()
+else()
+    if(WITH_NATIVE_INSTRUCTIONS)
+        message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration")
     endif()
 endif()
 
@@ -483,119 +435,7 @@ if(MSVC)
     add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE)
 endif()
 
-if(BASEARCH_PPC_FOUND)
-    # Check if we have what we need for POWER8 optimizations
-    set(CMAKE_REQUIRED_FLAGS "${POWER8FLAG}")
-    check_c_source_compiles(
-        "#include <sys/auxv.h>
-        int main() {
-            return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
-        }"
-        HAVE_POWER8
-    )
-    set(CMAKE_REQUIRED_FLAGS)
-elseif(BASEARCH_X86_FOUND)
-    # Check whether compiler supports SSE2 instrinics
-    set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG}")
-    check_c_source_compile_or_run(
-        "#include <immintrin.h>
-        int main(void) {
-            __m128i zero = _mm_setzero_si128();
-            (void)zero;
-            return 0;
-        }"
-        HAVE_SSE2_INTRIN
-    )
-    # Check whether compiler supports SSSE3 intrinsics
-    set(CMAKE_REQUIRED_FLAGS "${SSSE3FLAG}")
-    check_c_source_compile_or_run(
-        "#include <immintrin.h>
-        int main(void) {
-            __m128i u, v, w;
-            u = _mm_set1_epi32(1);
-            v = _mm_set1_epi32(2);
-            w = _mm_hadd_epi32(u, v);
-            (void)w;
-            return 0;
-        }"
-        HAVE_SSSE3_INTRIN
-    )
-    # Check whether compiler supports SSE4 CRC inline asm
-    set(CMAKE_REQUIRED_FLAGS "${SSE4FLAG}")
-    check_c_source_compile_or_run(
-        "int main(void) {
-            unsigned val = 0, h = 0;
-        #if defined(_MSC_VER)
-            { __asm mov edx, h __asm mov eax, val __asm crc32 eax, edx __asm mov val, eax }
-        #else
-            __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) );
-        #endif
-            return (int)h;
-        }"
-        HAVE_SSE42CRC_INLINE_ASM
-    )
-    # Check whether compiler supports SSE4 CRC intrinsics
-    check_c_source_compile_or_run(
-        "#include <immintrin.h>
-        int main(void) {
-            unsigned crc = 0;
-            char c = 'c';
-        #if defined(_MSC_VER)
-            crc = _mm_crc32_u32(crc, c);
-        #else
-            crc = __builtin_ia32_crc32qi(crc, c);
-        #endif
-            (void)crc;
-            return 0;
-        }"
-        HAVE_SSE42CRC_INTRIN
-    )
-    # Check whether compiler supports SSE4.2 compare string instrinics
-    check_c_source_compile_or_run(
-        "#include <immintrin.h>
-        int main(void) {
-            unsigned char a[64] = { 0 };
-            unsigned char b[64] = { 0 };
-            __m128i xmm_src0, xmm_src1;
-            xmm_src0 = _mm_loadu_si128((__m128i *)(char *)a);
-            xmm_src1 = _mm_loadu_si128((__m128i *)(char *)b);
-            return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0);
-        }"
-        HAVE_SSE42CMPSTR_INTRIN
-    )
-    # Check whether compiler supports PCLMULQDQ intrinsics
-    set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG}")
-    if(NOT (APPLE AND "${ARCH}" MATCHES "i386"))
-        # The pclmul code currently crashes on Mac in 32bit mode. Avoid for now.
-        check_c_source_compile_or_run(
-            "#include <immintrin.h>
-            int main(void) {
-                __m128i a = _mm_setzero_si128();
-                __m128i b = _mm_setzero_si128();
-                __m128i c = _mm_clmulepi64_si128(a, b, 0x10);
-                (void)c;
-                return 0;
-            }"
-            HAVE_PCLMULQDQ_INTRIN
-        )
-    else()
-        set(HAVE_PCLMULQDQ_INTRIN NO)
-    endif()
-    # Check whether compiler supports AVX2 intrinics
-    set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG}")
-    check_c_source_compile_or_run(
-        "#include <immintrin.h>
-        int main(void) {
-            __m256i x = _mm256_set1_epi16(2);
-            const __m256i y = _mm256_set1_epi16(1);
-            x = _mm256_subs_epu16(x, y);
-            (void)x;
-            return 0;
-        }"
-        HAVE_AVX2_INTRIN
-    )
-    set(CMAKE_REQUIRED_FLAGS)
-
+if(BASEARCH_X86_FOUND)
     # FORCE_SSE2 option will only be shown if HAVE_SSE2_INTRIN is true
     if("${ARCH}" MATCHES "i[3-6]86")
         cmake_dependent_option(FORCE_SSE2 "Always assume CPU is SSE2 capable" OFF "HAVE_SSE2_INTRIN" OFF)
@@ -717,13 +557,18 @@ if(WITH_OPTIM)
         list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm.h)
         list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/armfeature.c)
         if(WITH_ACLE AND NOT MSVC)
+            check_acle_intrinsics()
+            if(HAVE_ACLE_INTRIN)
             add_definitions(-DARM_ACLE_CRC_HASH)
             set(ACLE_SRCS ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c)
             set_property(SOURCE ${ACLE_SRCS} PROPERTY COMPILE_FLAGS "${ACLEFLAG} ${NOLTOFLAG}")
             list(APPEND ZLIB_ARCH_SRCS ${ACLE_SRCS})
             add_feature_info(ACLE_CRC 1 "Support ACLE optimized CRC hash generation, using \"${ACLEFLAG}\"")
         endif()
+        endif()
         if(WITH_NEON)
+            check_neon_intrinsics()
+            if(MFPU_NEON_AVAILABLE)
             add_definitions(-DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH)
             set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c ${ARCHDIR}/slide_neon.c)
             list(APPEND ZLIB_ARCH_SRCS ${NEON_SRCS})
@@ -734,8 +579,11 @@ if(WITH_OPTIM)
             add_feature_info(NEON_ADLER32 1 "Support NEON instructions in adler32, using \"${NEONFLAG}\"")
             add_feature_info(NEON_SLIDEHASH 1 "Support NEON instructions in slide_hash, using \"${NEONFLAG}\"")
         endif()
+        endif()
     elseif(BASEARCH_PPC_FOUND)
-        if(WITH_POWER8 AND HAVE_POWER8)
+        if(WITH_POWER8)
+            check_power8_intrinsics()
+            if(HAVE_POWER8_INTRIN)
             add_definitions(-DPOWER8)
             add_definitions(-DPOWER_FEATURES)
             add_definitions(-DPOWER8_VSX_ADLER32)
@@ -746,6 +594,7 @@ if(WITH_OPTIM)
             list(APPEND ZLIB_ARCH_SRCS ${POWER8_SRCS})
             set_property(SOURCE ${POWER8_SRCS} PROPERTY COMPILE_FLAGS "${POWER8FLAG} ${NOLTOFLAG}")
         endif()
+        endif()
     elseif(BASEARCH_S360_FOUND)
         if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE)
             list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/dfltcc_common.c)
@@ -765,7 +614,9 @@ if(WITH_OPTIM)
         if(MSVC)
             list(APPEND ZLIB_ARCH_HDRS fallback_builtins.h)
         endif()
-        if(WITH_AVX2 AND HAVE_AVX2_INTRIN)
+        if(WITH_AVX2)
+            check_avx2_intrinsics()
+                if(HAVE_AVX2_INTRIN)
             add_definitions(-DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET)
             set(AVX2_SRCS ${ARCHDIR}/slide_avx.c)
             add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"")
@@ -778,7 +629,10 @@ if(WITH_OPTIM)
             list(APPEND ZLIB_ARCH_SRCS ${AVX2_SRCS})
             set_property(SOURCE ${AVX2_SRCS} PROPERTY COMPILE_FLAGS "${AVX2FLAG} ${NOLTOFLAG}")
         endif()
-        if(WITH_SSE4 AND (HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN))
+        endif()
+        if(WITH_SSE4)
+            check_sse4_intrinsics()
+            if(HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN)
             add_definitions(-DX86_SSE42_CRC_HASH)
             set(SSE42_SRCS ${ARCHDIR}/insert_string_sse.c)
             add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE4FLAG}\"")
@@ -795,7 +649,10 @@ if(WITH_OPTIM)
             list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
             set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE4FLAG} ${NOLTOFLAG}")
         endif()
-        if(WITH_SSE2 AND HAVE_SSE2_INTRIN)
+        endif()
+        if(WITH_SSE2)
+            check_sse2_intrinsics()
+            if(HAVE_SSE2_INTRIN)
             add_definitions(-DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE2_SLIDEHASH)
             set(SSE2_SRCS ${ARCHDIR}/chunkset_sse.c ${ARCHDIR}/slide_sse.c)
             list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
@@ -807,14 +664,20 @@ if(WITH_OPTIM)
                 endif()
             endif()
         endif()
-        if(WITH_SSSE3 AND HAVE_SSSE3_INTRIN)
+        endif()
+        if(WITH_SSSE3)
+            check_ssse3_intrinsics()
+            if(HAVE_SSSE3_INTRIN)
             add_definitions(-DX86_SSSE3 -DX86_SSSE3_ADLER32)
             set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c)
             add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"")
             list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS})
             set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}")
         endif()
-        if(WITH_PCLMULQDQ AND HAVE_PCLMULQDQ_INTRIN AND WITH_SSSE3 AND WITH_SSE4)
+        endif()
+        if(WITH_PCLMULQDQ AND WITH_SSSE3 AND WITH_SSE4)
+            check_pclmulqdq_intrinsics()
+            if(HAVE_PCLMULQDQ_INTRIN AND HAVE_SSSE3_INTRIN)
             add_definitions(-DX86_PCLMULQDQ_CRC)
             set(PCLMULQDQ_SRCS ${ARCHDIR}/crc_folding.c)
             add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSSE3FLAG} ${SSE4FLAG} ${PCLMULFLAG}\"")
@@ -823,6 +686,8 @@ if(WITH_OPTIM)
         endif()
     endif()
 endif()
+endif()
+
 message(STATUS "Architecture-specific source files: ${ZLIB_ARCH_SRCS}")
 
 #============================================================================
diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake
new file mode 100644 (file)
index 0000000..a360b7e
--- /dev/null
@@ -0,0 +1,221 @@
+# detect-intrinsics.cmake -- Detect compiler intrinsics support
+# Licensed under the Zlib license, see LICENSE.md for details
+
+macro(check_acle_intrinsics)
+    if(NOT NATIVEFLAG)
+        set(ACLEFLAG "-march=armv8-a+crc")
+    endif()
+    # Check whether compiler supports ACLE flag
+    set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG}")
+    check_c_source_compiles(
+        "int main() { return 0; }"
+        HAVE_ACLE_INTRIN FAIL_REGEX "not supported")
+    set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_avx2_intrinsics)
+    if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+        if(CMAKE_HOST_UNIX OR APPLE)
+            set(AVX2FLAG "-mavx2")
+        else()
+            set(AVX2FLAG "/arch:AVX2")
+        endif()
+    elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+        if(NOT NATIVEFLAG)
+            set(AVX2FLAG "-mavx2")
+        endif()
+    endif()
+    # Check whether compiler supports AVX2 intrinics
+    set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG}")
+    check_c_source_compile_or_run(
+        "#include <immintrin.h>
+        int main(void) {
+            __m256i x = _mm256_set1_epi16(2);
+            const __m256i y = _mm256_set1_epi16(1);
+            x = _mm256_subs_epu16(x, y);
+            (void)x;
+            return 0;
+        }"
+        HAVE_AVX2_INTRIN
+    )
+    set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_neon_intrinsics)
+    if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+        if(NOT NATIVEFLAG)
+            if("${ARCH}" MATCHES "aarch64")
+                set(NEONFLAG "-march=armv8-a+simd")
+            else()
+                set(NEONFLAG "-mfpu=neon")
+            endif()
+        endif()
+    endif()
+    # Check whether compiler supports NEON flag
+    set(CMAKE_REQUIRED_FLAGS "${NEONFLAG}")
+    check_c_source_compiles(
+        "int main() { return 0; }"
+        MFPU_NEON_AVAILABLE FAIL_REGEX "not supported")
+    set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_pclmulqdq_intrinsics)
+    if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+        if(NOT NATIVEFLAG)
+            set(PCLMULFLAG "-mpclmul")
+        endif()
+    endif()
+    # Check whether compiler supports PCLMULQDQ intrinsics
+    if(NOT (APPLE AND "${ARCH}" MATCHES "i386"))
+        # The pclmul code currently crashes on Mac in 32bit mode. Avoid for now.
+        set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG}")
+        check_c_source_compile_or_run(
+            "#include <immintrin.h>
+            int main(void) {
+                __m128i a = _mm_setzero_si128();
+                __m128i b = _mm_setzero_si128();
+                __m128i c = _mm_clmulepi64_si128(a, b, 0x10);
+                (void)c;
+                return 0;
+            }"
+            HAVE_PCLMULQDQ_INTRIN
+        )
+        set(CMAKE_REQUIRED_FLAGS)
+    else()
+        set(HAVE_PCLMULQDQ_INTRIN OFF)
+    endif()
+endmacro()
+
+macro(check_power8_intrinsics)
+    if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+        if(NOT NATIVEFLAG)
+            set(POWER8FLAG "-mcpu=power8")
+        endif()
+    endif()
+    # Check if we have what we need for POWER8 optimizations
+    set(CMAKE_REQUIRED_FLAGS "${POWER8FLAG}")
+    check_c_source_compiles(
+        "#include <sys/auxv.h>
+        int main() {
+            return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
+        }"
+        HAVE_POWER8_INTRIN
+    )
+    set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_sse2_intrinsics)
+    if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+        if(CMAKE_HOST_UNIX OR APPLE)
+            set(SSE2FLAG "-msse2")
+        else()
+            set(SSE2FLAG "/arch:SSE2")
+        endif()
+    elseif(MSVC)
+        if(NOT "${ARCH}" MATCHES "x86_64")
+            set(SSE2FLAG "/arch:SSE2")
+        endif()
+    elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+        if(NOT NATIVEFLAG)
+            set(SSE2FLAG "-msse2")
+        endif()
+    endif()
+    # Check whether compiler supports SSE2 instrinics
+    set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG}")
+    check_c_source_compile_or_run(
+        "#include <immintrin.h>
+        int main(void) {
+            __m128i zero = _mm_setzero_si128();
+            (void)zero;
+            return 0;
+        }"
+        HAVE_SSE2_INTRIN
+    )
+    set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_ssse3_intrinsics)
+    if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+        if(CMAKE_HOST_UNIX OR APPLE)
+            set(SSSE3FLAG "-mssse3")
+        else()
+            set(SSSE3FLAG "/arch:SSSE3")
+        endif()
+    elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+        if(NOT NATIVEFLAG)
+            set(SSSE3FLAG "-mssse3")
+        endif()
+    endif()
+    # Check whether compiler supports SSSE3 intrinsics
+    set(CMAKE_REQUIRED_FLAGS "${SSSE3FLAG}")
+    check_c_source_compile_or_run(
+        "#include <immintrin.h>
+        int main(void) {
+            __m128i u, v, w;
+            u = _mm_set1_epi32(1);
+            v = _mm_set1_epi32(2);
+            w = _mm_hadd_epi32(u, v);
+            (void)w;
+            return 0;
+        }"
+        HAVE_SSSE3_INTRIN
+    )
+endmacro()
+
+macro(check_sse4_intrinsics)
+    if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+        if(CMAKE_HOST_UNIX OR APPLE)
+            set(SSE4FLAG "-msse4.2")
+        else()
+            set(SSE4FLAG "/arch:SSE4.2")
+        endif()
+    elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+        if(NOT NATIVEFLAG)
+            set(SSE4FLAG "-msse4")
+        endif()
+    endif()
+    # Check whether compiler supports SSE4 CRC inline asm
+    set(CMAKE_REQUIRED_FLAGS "${SSE4FLAG}")
+    check_c_source_compile_or_run(
+        "int main(void) {
+            unsigned val = 0, h = 0;
+        #if defined(_MSC_VER)
+            { __asm mov edx, h __asm mov eax, val __asm crc32 eax, edx __asm mov val, eax }
+        #else
+            __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) );
+        #endif
+            return (int)h;
+        }"
+        HAVE_SSE42CRC_INLINE_ASM
+    )
+    # Check whether compiler supports SSE4 CRC intrinsics
+    check_c_source_compile_or_run(
+        "#include <immintrin.h>
+        int main(void) {
+            unsigned crc = 0;
+            char c = 'c';
+        #if defined(_MSC_VER)
+            crc = _mm_crc32_u32(crc, c);
+        #else
+            crc = __builtin_ia32_crc32qi(crc, c);
+        #endif
+            (void)crc;
+            return 0;
+        }"
+        HAVE_SSE42CRC_INTRIN
+    )
+    # Check whether compiler supports SSE4.2 compare string instrinics
+    check_c_source_compile_or_run(
+        "#include <immintrin.h>
+        int main(void) {
+            unsigned char a[64] = { 0 };
+            unsigned char b[64] = { 0 };
+            __m128i xmm_src0, xmm_src1;
+            xmm_src0 = _mm_loadu_si128((__m128i *)(char *)a);
+            xmm_src1 = _mm_loadu_si128((__m128i *)(char *)b);
+            return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0);
+        }"
+        HAVE_SSE42CMPSTR_INTRIN
+    )
+    set(CMAKE_REQUIRED_FLAGS)
+endmacro()
index 8cd48786ee6ec2cb34c95712d5ad3f24c9b4015b..3e69da8976f2599547c672bee5e9ed2756dda275 100755 (executable)
--- a/configure
+++ b/configure
@@ -972,76 +972,116 @@ else
     echo "Checking for __builtin_ctzll ... No." | tee -a configure.log
 fi
 
-# Check for SSE2 intrinsics
-case "${ARCH}" in
-    i386 | i486 | i586 | i686 | x86_64)
-        cat > $test.c << EOF
+check_avx2_intrinsics() {
+    # Check whether compiler supports AVX2 intrinsics
+    cat > $test.c << EOF
 #include <immintrin.h>
 int main(void) {
-    __m128i zero = _mm_setzero_si128();
-    (void)zero;
+    __m256i x = _mm256_set1_epi16(2);
+    const __m256i y = _mm256_set1_epi16(1);
+    x = _mm256_subs_epu16(x, y);
+    (void)x;
     return 0;
 }
 EOF
-        if try ${CC} ${CFLAGS} ${sse2flag} $test.c; then
-            echo "Checking for SSE2 intrinsics ... Yes." | tee -a configure.log
-            HAVE_SSE2_INTRIN=1
-        else
-            echo "Checking for SSE2 intrinsics ... No." | tee -a configure.log
-            HAVE_SSE2_INTRIN=0
-        fi
-        ;;
-esac
+    if try ${CC} ${CFLAGS} ${avx2flag} $test.c; then
+        echo "Checking for AVX2 intrinsics ... Yes." | tee -a configure.log
+        HAVE_AVX2_INTRIN=1
+    else
+        echo "Checking for AVX2 intrinsics ... No." | tee -a configure.log
+        HAVE_AVX2_INTRIN=0
+    fi
+}
 
-# Check for SSSE3 intrinsics
-case "${ARCH}" in
-    i386 | i486 | i586 | i686 | x86_64)
-        cat > $test.c << EOF
-#include <x86intrin.h>
-int main(void)
-{
-    __m128i u, v, w;
-    u = _mm_set1_epi32(1);
-    v = _mm_set1_epi32(2);
-    w = _mm_hadd_epi32(u, v);
-    (void)w;
+check_neon_intrinsics() {
+    # Check whether -mfpu=neon is available on ARM processors.
+    cat > $test.c << EOF
+int main() { return 0; }
+EOF
+    if try $CC -c $CFLAGS -mfpu=neon $test.c; then
+        MFPU_NEON_AVAILABLE=1
+        echo "Check whether -mfpu=neon is available ... Yes." | tee -a configure.log
+    else
+        MFPU_NEON_AVAILABLE=0
+        echo "Check whether -mfpu=neon is available ... No." | tee -a configure.log
+    fi
+}
+
+check_pclmulqdq_intrinsics() {
+    # Check whether compiler supports PCLMULQDQ intrinsics
+    cat > $test.c << EOF
+#include <immintrin.h>
+#include <wmmintrin.h>
+int main(void) {
+    __m128i a = _mm_setzero_si128();
+    __m128i b = _mm_setzero_si128();
+    __m128i c = _mm_clmulepi64_si128(a, b, 0x10);
+    (void)c;
     return 0;
 }
 EOF
-        if try ${CC} ${CFLAGS} ${ssse3flag} $test.c; then
-            echo "Checking for SSSE3 intrinsics ... Yes." | tee -a configure.log
-            HAVE_SSSE3_INTRIN=1
-        else
-            echo "Checking for SSSE3 intrinsics ... No." | tee -a configure.log
-            HAVE_SSSE3_INTRIN=0
-        fi
-        ;;
-esac
+    if try ${CC} ${CFLAGS} ${pclmulflag} $test.c; then
+        echo "Checking for PCLMULQDQ intrinsics ... Yes." | tee -a configure.log
+        HAVE_PCLMULQDQ_INTRIN=1
+    else
+        echo "Checking for PCLMULQDQ intrinsics ... No." | tee -a configure.log
+        HAVE_PCLMULQDQ_INTRIN=0
+    fi
+}
 
-# Check for SSE4.2 CRC inline assembly
-case "${ARCH}" in
-    i386 | i486 | i586 | i686 | x86_64)
-        cat > $test.c << EOF
+check_power8_intrinsics() {
+    # Check whether features needed by POWER optimisations are available
+    cat > $test.c << EOF
+#include <sys/auxv.h>
+int main() { return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); }
+EOF
+    if try $CC -c $CFLAGS -mcpu=power8 $test.c; then
+        HAVE_POWER8_INTRIN=1
+        echo "Check whether POWER8 instructions are available ... Yes." | tee -a configure.log
+    else
+        HAVE_POWER8_INTRIN=0
+        echo "Check whether POWER8 instructions are available ... No." | tee -a configure.log
+    fi
+}
+
+check_sse2_intrinsics() {
+    # Check whether compiler supports SSE2 intrinsics
+    cat > $test.c << EOF
+#include <immintrin.h>
+int main(void) {
+    __m128i zero = _mm_setzero_si128();
+    (void)zero;
+    return 0;
+}
+EOF
+    if try ${CC} ${CFLAGS} ${sse2flag} $test.c; then
+        echo "Checking for SSE2 intrinsics ... Yes." | tee -a configure.log
+        HAVE_SSE2_INTRIN=1
+    else
+        echo "Checking for SSE2 intrinsics ... No." | tee -a configure.log
+        HAVE_SSE2_INTRIN=0
+    fi
+}
+
+check_sse4_intrinsics() {
+    # Check whether compiler supports SSE4 CRC inline asm
+    cat > $test.c << EOF
 int main(void) {
     unsigned val = 0, h = 0;
     __asm__ __volatile__ ( "crc32 %1,%0" : "+r" (h) : "r" (val) );
     return (int) h;
 }
 EOF
-        if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
-            echo "Checking for SSE4.2 CRC inline assembly ... Yes." | tee -a configure.log
-            HAVE_SSE42CRC_INLINE_ASM=1
-        else
-            echo "Checking for SSE4.2 CRC inline assembly ... No." | tee -a configure.log
-            HAVE_SSE42CRC_INLINE_ASM=0
-        fi
-        ;;
-esac
+    if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
+        echo "Checking for SSE4.2 CRC inline assembly ... Yes." | tee -a configure.log
+        HAVE_SSE42CRC_INLINE_ASM=1
+    else
+        echo "Checking for SSE4.2 CRC inline assembly ... No." | tee -a configure.log
+        HAVE_SSE42CRC_INLINE_ASM=0
+    fi
 
-# Check for SSE4.2 CRC intrinsics
-case "${ARCH}" in
-    i386 | i486 | i586 | i686 | x86_64)
-        cat > $test.c << EOF
+    # Check whether compiler supports SSE4.2 CRC intrinsics
+    cat > $test.c << EOF
 int main(void) {
     unsigned crc = 0;
     char c = 'c';
@@ -1050,20 +1090,16 @@ int main(void) {
     return 0;
 }
 EOF
-        if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
-            echo "Checking for SSE4.2 CRC intrinsics ... Yes." | tee -a configure.log
-            HAVE_SSE42CRC_INTRIN=1
-        else
-            echo "Checking for SSE4.2 CRC intrinsics ... No." | tee -a configure.log
-            HAVE_SSE42CRC_INTRIN=0
-        fi
-        ;;
-esac
+    if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
+        echo "Checking for SSE4.2 CRC intrinsics ... Yes." | tee -a configure.log
+        HAVE_SSE42CRC_INTRIN=1
+    else
+        echo "Checking for SSE4.2 CRC intrinsics ... No." | tee -a configure.log
+        HAVE_SSE42CRC_INTRIN=0
+    fi
 
-# Check for SSE4.2 compare string intrinsics
-case "${ARCH}" in
-    i386 | i486 | i586 | i686 | x86_64)
-        cat > $test.c << EOF
+    # Check whether compiler supports SSE4.2 compare string intrinsics
+    cat > $test.c << EOF
 #include <immintrin.h>
 int main(void)
 {
@@ -1075,38 +1111,40 @@ int main(void)
     return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0);
 }
 EOF
-        if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
-            echo "Checking for SSE4.2 compare string intrinsics ... Yes." | tee -a configure.log
-            HAVE_SSE42CMPSTR_INTRIN=1
-        else
-            echo "Checking for SSE4.2 compare string intrinsics ... No." | tee -a configure.log
-            HAVE_SSE42CMPSTR_INTRIN=0
-        fi
-        ;;
-esac
+    if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
+        echo "Checking for SSE4.2 compare string intrinsics ... Yes." | tee -a configure.log
+        HAVE_SSE42CMPSTR_INTRIN=1
+    else
+        echo "Checking for SSE4.2 compare string intrinsics ... No." | tee -a configure.log
+        HAVE_SSE42CMPSTR_INTRIN=0
+    fi
+}
 
-# Check for PCLMULQDQ intrinsics
-case "${ARCH}" in
-    i386 | i486 | i586 | i686 | x86_64)
-        cat > $test.c << EOF
-#include <immintrin.h>
-#include <wmmintrin.h>
-int main(void) {
-    __m128i a = _mm_setzero_si128();
-    __m128i b = _mm_setzero_si128();
-    __m128i c = _mm_clmulepi64_si128(a, b, 0x10);
-    (void)c;
+check_ssse3_intrinsics() {
+    # Check whether compiler supports SSSE3 intrinsics
+    cat > $test.c << EOF
+#include <x86intrin.h>
+int main(void)
+{
+    __m128i u, v, w;
+    u = _mm_set1_epi32(1);
+    v = _mm_set1_epi32(2);
+    w = _mm_hadd_epi32(u, v);
+    (void)w;
     return 0;
 }
 EOF
-        if try ${CC} ${CFLAGS} ${pclmulflag} $test.c; then
-            echo "Checking for PCLMULQDQ intrinsics ... Yes." | tee -a configure.log
-            HAVE_PCLMULQDQ_INTRIN=1
-        else
-            echo "Checking for PCLMULQDQ intrinsics ... No." | tee -a configure.log
-            HAVE_PCLMULQDQ_INTRIN=0
-        fi
+    if try ${CC} ${CFLAGS} ${ssse3flag} $test.c; then
+        echo "Checking for SSSE3 intrinsics ... Yes." | tee -a configure.log
+        HAVE_SSSE3_INTRIN=1
+    else
+        echo "Checking for SSSE3 intrinsics ... No." | tee -a configure.log
+        HAVE_SSSE3_INTRIN=0
+    fi
+}
 
+case "${ARCH}" in
+    i386 | i486 | i586 | i686 | x86_64)
         # Enable deflate_medium at level 1
         if test $without_new_strategies -eq 1; then
             CFLAGS="${CFLAGS} -DNO_QUICK_STRATEGY"
@@ -1120,75 +1158,6 @@ EOF
         ;;
 esac
 
-# Check for AVX2 intrinsics
-case "${ARCH}" in
-    i386 | i486 | i586 | i686 | x86_64)
-        cat > $test.c << EOF
-#include <immintrin.h>
-int main(void) {
-    __m256i x = _mm256_set1_epi16(2);
-    const __m256i y = _mm256_set1_epi16(1);
-    x = _mm256_subs_epu16(x, y);
-    (void)x;
-    return 0;
-}
-EOF
-        if try ${CC} ${CFLAGS} ${avx2flag} $test.c; then
-            echo "Checking for AVX2 intrinsics ... Yes." | tee -a configure.log
-            HAVE_AVX2_INTRIN=1
-        else
-            echo "Checking for AVX2 intrinsics ... No." | tee -a configure.log
-            HAVE_AVX2_INTRIN=0
-        fi
-        ;;
-esac
-
-
-# Check whether -mfpu=neon is available on ARM processors.
-case "${ARCH}" in
-    arm*)
-        cat > $test.c << EOF
-int main() { return 0; }
-EOF
-        if try $CC -c $CFLAGS -mfpu=neon $test.c; then
-            MFPU_NEON_AVAILABLE=1
-            echo "Check whether -mfpu=neon is available ... Yes." | tee -a configure.log
-        else
-            MFPU_NEON_AVAILABLE=0
-            echo "Check whether -mfpu=neon is available ... No." | tee -a configure.log
-        fi
-        ;;
-esac
-
-# Check whether features needed by POWER optimisations are available
-case "${ARCH}" in
-    powerpc*)
-        cat > $test.c << EOF
-#include <sys/auxv.h>
-int main() { return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); }
-EOF
-        if try $CC -c $CFLAGS -mcpu=power8 $test.c; then
-            HAVE_POWER8=1
-            echo "Check whether POWER8 instructions are available ... Yes." | tee -a configure.log
-        else
-            HAVE_POWER8=0
-            echo "Check whether POWER8 instructions are available ... No." | tee -a configure.log
-        fi
-esac
-
-# Check whether sys/sdt.h is available
-cat > $test.c << EOF
-#include <sys/sdt.h>
-int main() { return 0; }
-EOF
-if try ${CC} ${CFLAGS} $test.c; then
-    echo "Checking for sys/sdt.h ... Yes." | tee -a configure.log
-    CFLAGS="$CFLAGS -DHAVE_SYS_SDT_H"
-    SFLAGS="$SFLAGS -DHAVE_SYS_SDT_H"
-else
-    echo "Checking for sys/sdt.h ... No." | tee -a configure.log
-fi
-
 ARCHDIR='arch/generic'
 ARCH_STATIC_OBJS=''
 ARCH_SHARED_OBJS=''
@@ -1210,6 +1179,8 @@ case "${ARCH}" in
             ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} x86.o"
             ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86.lo"
 
+            check_avx2_intrinsics
+
             if test ${HAVE_AVX2_INTRIN} -eq 1; then
                 CFLAGS="${CFLAGS} -DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET"
                 SFLAGS="${SFLAGS} -DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET"
@@ -1217,6 +1188,8 @@ case "${ARCH}" in
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_avx.lo chunkset_avx.lo compare258_avx.lo adler32_avx.lo"
             fi
 
+            check_sse4_intrinsics
+
             if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then
                 CFLAGS="${CFLAGS} -DX86_SSE42_CRC_HASH"
                 SFLAGS="${SFLAGS} -DX86_SSE42_CRC_HASH"
@@ -1230,6 +1203,8 @@ case "${ARCH}" in
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} insert_string_sse.lo"
             fi
 
+            check_sse4_intrinsics
+
             if test ${HAVE_SSE42CMPSTR_INTRIN} -eq 1; then
                 CFLAGS="${CFLAGS} -DX86_SSE42_CMP_STR"
                 SFLAGS="${SFLAGS} -DX86_SSE42_CMP_STR"
@@ -1238,6 +1213,8 @@ case "${ARCH}" in
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} compare258_sse.lo"
             fi
 
+            check_sse2_intrinsics
+
             if test ${HAVE_SSE2_INTRIN} -eq 1; then
                 CFLAGS="${CFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET"
                 SFLAGS="${SFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET"
@@ -1250,6 +1227,8 @@ case "${ARCH}" in
                 fi
             fi
 
+            check_ssse3_intrinsics
+
             if test ${HAVE_SSSE3_INTRIN} -eq 1; then
                 CFLAGS="${CFLAGS} -DX86_SSSE3 -DX86_SSSE3_ADLER32"
                 SFLAGS="${SFLAGS} -DX86_SSSE3 -DX86_SSSE3_ADLER32"
@@ -1257,6 +1236,8 @@ case "${ARCH}" in
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_ssse3.lo"
             fi
 
+            check_pclmulqdq_intrinsics
+
             if test ${HAVE_PCLMULQDQ_INTRIN} -eq 1; then
                 CFLAGS="${CFLAGS} -DX86_PCLMULQDQ_CRC"
                 SFLAGS="${SFLAGS} -DX86_PCLMULQDQ_CRC"
@@ -1351,6 +1332,10 @@ EOF
             SFLAGS="${SFLAGS} ${floatabi}"
         fi
 
+        if test $without_optimizations -eq 0; then
+            check_neon_intrinsics
+        fi
+
         case "${ARCH}" in
             armv[345]*)
                 if test $without_optimizations -eq 0; then
@@ -1527,7 +1512,10 @@ EOF
         ARCHDIR=arch/power
 
         if test $without_optimizations -eq 0; then
-            if test $HAVE_POWER8 -eq 1; then
+
+            check_power8_intrinsics
+
+            if test $HAVE_POWER8_INTRIN -eq 1; then
                 CFLAGS="${CFLAGS} -DPOWER8 -DPOWER_FEATURES -DPOWER8_VSX_ADLER32 -DPOWER8_VSX_SLIDEHASH"
                 SFLAGS="${SFLAGS} -DPOWER8 -DPOWER_FEATURES -DPOWER8_VSX_ADLER32 -DPOWER8_VSX_SLIDEHASH"