include(cmake/detect-arch.cmake)
include(cmake/detect-install-dirs.cmake)
include(cmake/detect-coverage.cmake)
+include(cmake/detect-intrinsics.cmake)
include(cmake/detect-sanitizer.cmake)
if(CMAKE_TOOLCHAIN_FILE)
set(WARNFLAGS "-w3")
set(WARNFLAGS_MAINTAINER "-w3 -Wcheck -Wremarks")
set(WARNFLAGS_DISABLE "")
- if(BASEARCH_X86_FOUND)
- set(AVX2FLAG "-mavx2")
- set(SSE2FLAG "-msse2")
- set(SSSE3FLAG "-mssse3")
- set(SSE4FLAG "-msse4.2")
- endif()
else()
set(WARNFLAGS "/W3")
set(WARNFLAGS_MAINTAINER "/W5")
set(WARNFLAGS_DISABLE "")
- if(BASEARCH_X86_FOUND)
- set(AVX2FLAG "/arch:AVX2")
- set(SSE2FLAG "/arch:SSE2")
- set(SSSE3FLAG "/arch:SSSE3")
- set(SSE4FLAG "/arch:SSE4.2")
endif()
- endif()
if(WITH_NATIVE_INSTRUCTIONS)
message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not supported on this configuration")
endif()
if(NOT "${ARCH}" MATCHES "aarch64")
set(NEONFLAG "/arch:VFPv4")
endif()
- elseif(BASEARCH_X86_FOUND)
- if(NOT "${ARCH}" MATCHES "x86_64")
- set(SSE2FLAG "/arch:SSE2")
endif()
- endif()
if(WITH_NATIVE_INSTRUCTIONS)
message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not supported on this configuration")
endif()
-else()
- # catch all GNU C compilers as well as Clang and AppleClang
- if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- set(__GNUC__ ON)
- endif()
+elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
# Enable warnings in GCC and Clang
- if(__GNUC__)
set(WARNFLAGS "-Wall")
set(WARNFLAGS_MAINTAINER "-Wextra -Wpedantic")
set(WARNFLAGS_DISABLE "-Wno-implicit-fallthrough")
- endif()
if(WITH_NATIVE_INSTRUCTIONS)
- if(__GNUC__)
if(BASEARCH_PPC_FOUND)
set(NATIVEFLAG "-mcpu=native")
else()
set(NATIVEFLAG "-march=native")
endif()
else()
- message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration")
- endif()
- endif()
- if(NOT NATIVEFLAG)
- if(__GNUC__)
if(BASEARCH_ARM_FOUND)
if("${ARCH}" MATCHES "arm" AND NOT CMAKE_C_FLAGS MATCHES "-mfloat-abi")
# Auto-detect support for ARM floating point ABI
message(STATUS "ARM floating point arch not auto-detected")
endif()
endif()
- # NEON
- if("${ARCH}" MATCHES "aarch64")
- set(NEONFLAG "-march=armv8-a+simd")
- else()
- # Check whether -mfpu=neon is available
- set(CMAKE_REQUIRED_FLAGS "-mfpu=neon")
- check_c_source_compiles(
- "int main() { return 0; }"
- MFPU_NEON_AVAILABLE FAIL_REGEX "not supported")
- set(CMAKE_REQUIRED_FLAGS)
- if(MFPU_NEON_AVAILABLE)
- set(NEONFLAG "-mfpu=neon")
endif()
- endif()
- # ACLE
- set(ACLEFLAG "-march=armv8-a+crc")
- elseif(BASEARCH_PPC_FOUND)
- set(POWER8FLAG "-mcpu=power8")
- elseif(BASEARCH_X86_FOUND)
- set(AVX2FLAG "-mavx2")
- set(SSE2FLAG "-msse2")
- set(SSSE3FLAG "-mssse3")
- set(SSE4FLAG "-msse4")
- set(PCLMULFLAG "-mpclmul")
- endif()
# Check whether -fno-lto is available
set(CMAKE_REQUIRED_FLAGS "-fno-lto")
check_c_source_compiles(
set(NOLTOFLAG "-fno-lto")
endif()
endif()
+else()
+ if(WITH_NATIVE_INSTRUCTIONS)
+ message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration")
endif()
endif()
add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE)
endif()
-if(BASEARCH_PPC_FOUND)
- # Check if we have what we need for POWER8 optimizations
- set(CMAKE_REQUIRED_FLAGS "${POWER8FLAG}")
- check_c_source_compiles(
- "#include <sys/auxv.h>
- int main() {
- return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
- }"
- HAVE_POWER8
- )
- set(CMAKE_REQUIRED_FLAGS)
-elseif(BASEARCH_X86_FOUND)
- # Check whether compiler supports SSE2 instrinics
- set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG}")
- check_c_source_compile_or_run(
- "#include <immintrin.h>
- int main(void) {
- __m128i zero = _mm_setzero_si128();
- (void)zero;
- return 0;
- }"
- HAVE_SSE2_INTRIN
- )
- # Check whether compiler supports SSSE3 intrinsics
- set(CMAKE_REQUIRED_FLAGS "${SSSE3FLAG}")
- check_c_source_compile_or_run(
- "#include <immintrin.h>
- int main(void) {
- __m128i u, v, w;
- u = _mm_set1_epi32(1);
- v = _mm_set1_epi32(2);
- w = _mm_hadd_epi32(u, v);
- (void)w;
- return 0;
- }"
- HAVE_SSSE3_INTRIN
- )
- # Check whether compiler supports SSE4 CRC inline asm
- set(CMAKE_REQUIRED_FLAGS "${SSE4FLAG}")
- check_c_source_compile_or_run(
- "int main(void) {
- unsigned val = 0, h = 0;
- #if defined(_MSC_VER)
- { __asm mov edx, h __asm mov eax, val __asm crc32 eax, edx __asm mov val, eax }
- #else
- __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) );
- #endif
- return (int)h;
- }"
- HAVE_SSE42CRC_INLINE_ASM
- )
- # Check whether compiler supports SSE4 CRC intrinsics
- check_c_source_compile_or_run(
- "#include <immintrin.h>
- int main(void) {
- unsigned crc = 0;
- char c = 'c';
- #if defined(_MSC_VER)
- crc = _mm_crc32_u32(crc, c);
- #else
- crc = __builtin_ia32_crc32qi(crc, c);
- #endif
- (void)crc;
- return 0;
- }"
- HAVE_SSE42CRC_INTRIN
- )
- # Check whether compiler supports SSE4.2 compare string instrinics
- check_c_source_compile_or_run(
- "#include <immintrin.h>
- int main(void) {
- unsigned char a[64] = { 0 };
- unsigned char b[64] = { 0 };
- __m128i xmm_src0, xmm_src1;
- xmm_src0 = _mm_loadu_si128((__m128i *)(char *)a);
- xmm_src1 = _mm_loadu_si128((__m128i *)(char *)b);
- return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0);
- }"
- HAVE_SSE42CMPSTR_INTRIN
- )
- # Check whether compiler supports PCLMULQDQ intrinsics
- set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG}")
- if(NOT (APPLE AND "${ARCH}" MATCHES "i386"))
- # The pclmul code currently crashes on Mac in 32bit mode. Avoid for now.
- check_c_source_compile_or_run(
- "#include <immintrin.h>
- int main(void) {
- __m128i a = _mm_setzero_si128();
- __m128i b = _mm_setzero_si128();
- __m128i c = _mm_clmulepi64_si128(a, b, 0x10);
- (void)c;
- return 0;
- }"
- HAVE_PCLMULQDQ_INTRIN
- )
- else()
- set(HAVE_PCLMULQDQ_INTRIN NO)
- endif()
- # Check whether compiler supports AVX2 intrinics
- set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG}")
- check_c_source_compile_or_run(
- "#include <immintrin.h>
- int main(void) {
- __m256i x = _mm256_set1_epi16(2);
- const __m256i y = _mm256_set1_epi16(1);
- x = _mm256_subs_epu16(x, y);
- (void)x;
- return 0;
- }"
- HAVE_AVX2_INTRIN
- )
- set(CMAKE_REQUIRED_FLAGS)
-
+if(BASEARCH_X86_FOUND)
# FORCE_SSE2 option will only be shown if HAVE_SSE2_INTRIN is true
if("${ARCH}" MATCHES "i[3-6]86")
cmake_dependent_option(FORCE_SSE2 "Always assume CPU is SSE2 capable" OFF "HAVE_SSE2_INTRIN" OFF)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/armfeature.c)
if(WITH_ACLE AND NOT MSVC)
+ check_acle_intrinsics()
+ if(HAVE_ACLE_INTRIN)
add_definitions(-DARM_ACLE_CRC_HASH)
set(ACLE_SRCS ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c)
set_property(SOURCE ${ACLE_SRCS} PROPERTY COMPILE_FLAGS "${ACLEFLAG} ${NOLTOFLAG}")
list(APPEND ZLIB_ARCH_SRCS ${ACLE_SRCS})
add_feature_info(ACLE_CRC 1 "Support ACLE optimized CRC hash generation, using \"${ACLEFLAG}\"")
endif()
+ endif()
if(WITH_NEON)
+ check_neon_intrinsics()
+ if(MFPU_NEON_AVAILABLE)
add_definitions(-DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH)
set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c ${ARCHDIR}/slide_neon.c)
list(APPEND ZLIB_ARCH_SRCS ${NEON_SRCS})
add_feature_info(NEON_ADLER32 1 "Support NEON instructions in adler32, using \"${NEONFLAG}\"")
add_feature_info(NEON_SLIDEHASH 1 "Support NEON instructions in slide_hash, using \"${NEONFLAG}\"")
endif()
+ endif()
elseif(BASEARCH_PPC_FOUND)
- if(WITH_POWER8 AND HAVE_POWER8)
+ if(WITH_POWER8)
+ check_power8_intrinsics()
+ if(HAVE_POWER8_INTRIN)
add_definitions(-DPOWER8)
add_definitions(-DPOWER_FEATURES)
add_definitions(-DPOWER8_VSX_ADLER32)
list(APPEND ZLIB_ARCH_SRCS ${POWER8_SRCS})
set_property(SOURCE ${POWER8_SRCS} PROPERTY COMPILE_FLAGS "${POWER8FLAG} ${NOLTOFLAG}")
endif()
+ endif()
elseif(BASEARCH_S360_FOUND)
if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/dfltcc_common.c)
if(MSVC)
list(APPEND ZLIB_ARCH_HDRS fallback_builtins.h)
endif()
- if(WITH_AVX2 AND HAVE_AVX2_INTRIN)
+ if(WITH_AVX2)
+ check_avx2_intrinsics()
+ if(HAVE_AVX2_INTRIN)
add_definitions(-DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET)
set(AVX2_SRCS ${ARCHDIR}/slide_avx.c)
add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"")
list(APPEND ZLIB_ARCH_SRCS ${AVX2_SRCS})
set_property(SOURCE ${AVX2_SRCS} PROPERTY COMPILE_FLAGS "${AVX2FLAG} ${NOLTOFLAG}")
endif()
- if(WITH_SSE4 AND (HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN))
+ endif()
+ if(WITH_SSE4)
+ check_sse4_intrinsics()
+ if(HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN)
add_definitions(-DX86_SSE42_CRC_HASH)
set(SSE42_SRCS ${ARCHDIR}/insert_string_sse.c)
add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE4FLAG}\"")
list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE4FLAG} ${NOLTOFLAG}")
endif()
- if(WITH_SSE2 AND HAVE_SSE2_INTRIN)
+ endif()
+ if(WITH_SSE2)
+ check_sse2_intrinsics()
+ if(HAVE_SSE2_INTRIN)
add_definitions(-DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE2_SLIDEHASH)
set(SSE2_SRCS ${ARCHDIR}/chunkset_sse.c ${ARCHDIR}/slide_sse.c)
list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
endif()
endif()
endif()
- if(WITH_SSSE3 AND HAVE_SSSE3_INTRIN)
+ endif()
+ if(WITH_SSSE3)
+ check_ssse3_intrinsics()
+ if(HAVE_SSSE3_INTRIN)
add_definitions(-DX86_SSSE3 -DX86_SSSE3_ADLER32)
set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c)
add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"")
list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS})
set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}")
endif()
- if(WITH_PCLMULQDQ AND HAVE_PCLMULQDQ_INTRIN AND WITH_SSSE3 AND WITH_SSE4)
+ endif()
+ if(WITH_PCLMULQDQ AND WITH_SSSE3 AND WITH_SSE4)
+ check_pclmulqdq_intrinsics()
+ if(HAVE_PCLMULQDQ_INTRIN AND HAVE_SSSE3_INTRIN)
add_definitions(-DX86_PCLMULQDQ_CRC)
set(PCLMULQDQ_SRCS ${ARCHDIR}/crc_folding.c)
add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSSE3FLAG} ${SSE4FLAG} ${PCLMULFLAG}\"")
endif()
endif()
endif()
+endif()
+
message(STATUS "Architecture-specific source files: ${ZLIB_ARCH_SRCS}")
#============================================================================
--- /dev/null
+# detect-intrinsics.cmake -- Detect compiler intrinsics support
+# Licensed under the Zlib license, see LICENSE.md for details
+
+macro(check_acle_intrinsics)
+ if(NOT NATIVEFLAG)
+ set(ACLEFLAG "-march=armv8-a+crc")
+ endif()
+ # Check whether compiler supports ACLE flag
+ set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG}")
+ check_c_source_compiles(
+ "int main() { return 0; }"
+ HAVE_ACLE_INTRIN FAIL_REGEX "not supported")
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_avx2_intrinsics)
+ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+ if(CMAKE_HOST_UNIX OR APPLE)
+ set(AVX2FLAG "-mavx2")
+ else()
+ set(AVX2FLAG "/arch:AVX2")
+ endif()
+ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ if(NOT NATIVEFLAG)
+ set(AVX2FLAG "-mavx2")
+ endif()
+ endif()
+ # Check whether compiler supports AVX2 intrinics
+ set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG}")
+ check_c_source_compile_or_run(
+ "#include <immintrin.h>
+ int main(void) {
+ __m256i x = _mm256_set1_epi16(2);
+ const __m256i y = _mm256_set1_epi16(1);
+ x = _mm256_subs_epu16(x, y);
+ (void)x;
+ return 0;
+ }"
+ HAVE_AVX2_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_neon_intrinsics)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ if(NOT NATIVEFLAG)
+ if("${ARCH}" MATCHES "aarch64")
+ set(NEONFLAG "-march=armv8-a+simd")
+ else()
+ set(NEONFLAG "-mfpu=neon")
+ endif()
+ endif()
+ endif()
+ # Check whether compiler supports NEON flag
+ set(CMAKE_REQUIRED_FLAGS "${NEONFLAG}")
+ check_c_source_compiles(
+ "int main() { return 0; }"
+ MFPU_NEON_AVAILABLE FAIL_REGEX "not supported")
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_pclmulqdq_intrinsics)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ if(NOT NATIVEFLAG)
+ set(PCLMULFLAG "-mpclmul")
+ endif()
+ endif()
+ # Check whether compiler supports PCLMULQDQ intrinsics
+ if(NOT (APPLE AND "${ARCH}" MATCHES "i386"))
+ # The pclmul code currently crashes on Mac in 32bit mode. Avoid for now.
+ set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG}")
+ check_c_source_compile_or_run(
+ "#include <immintrin.h>
+ int main(void) {
+ __m128i a = _mm_setzero_si128();
+ __m128i b = _mm_setzero_si128();
+ __m128i c = _mm_clmulepi64_si128(a, b, 0x10);
+ (void)c;
+ return 0;
+ }"
+ HAVE_PCLMULQDQ_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+ else()
+ set(HAVE_PCLMULQDQ_INTRIN OFF)
+ endif()
+endmacro()
+
+macro(check_power8_intrinsics)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ if(NOT NATIVEFLAG)
+ set(POWER8FLAG "-mcpu=power8")
+ endif()
+ endif()
+ # Check if we have what we need for POWER8 optimizations
+ set(CMAKE_REQUIRED_FLAGS "${POWER8FLAG}")
+ check_c_source_compiles(
+ "#include <sys/auxv.h>
+ int main() {
+ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
+ }"
+ HAVE_POWER8_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_sse2_intrinsics)
+ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+ if(CMAKE_HOST_UNIX OR APPLE)
+ set(SSE2FLAG "-msse2")
+ else()
+ set(SSE2FLAG "/arch:SSE2")
+ endif()
+ elseif(MSVC)
+ if(NOT "${ARCH}" MATCHES "x86_64")
+ set(SSE2FLAG "/arch:SSE2")
+ endif()
+ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ if(NOT NATIVEFLAG)
+ set(SSE2FLAG "-msse2")
+ endif()
+ endif()
+ # Check whether compiler supports SSE2 instrinics
+ set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG}")
+ check_c_source_compile_or_run(
+ "#include <immintrin.h>
+ int main(void) {
+ __m128i zero = _mm_setzero_si128();
+ (void)zero;
+ return 0;
+ }"
+ HAVE_SSE2_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_ssse3_intrinsics)
+ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+ if(CMAKE_HOST_UNIX OR APPLE)
+ set(SSSE3FLAG "-mssse3")
+ else()
+ set(SSSE3FLAG "/arch:SSSE3")
+ endif()
+ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ if(NOT NATIVEFLAG)
+ set(SSSE3FLAG "-mssse3")
+ endif()
+ endif()
+ # Check whether compiler supports SSSE3 intrinsics
+ set(CMAKE_REQUIRED_FLAGS "${SSSE3FLAG}")
+ check_c_source_compile_or_run(
+ "#include <immintrin.h>
+ int main(void) {
+ __m128i u, v, w;
+ u = _mm_set1_epi32(1);
+ v = _mm_set1_epi32(2);
+ w = _mm_hadd_epi32(u, v);
+ (void)w;
+ return 0;
+ }"
+ HAVE_SSSE3_INTRIN
+ )
+endmacro()
+
+macro(check_sse4_intrinsics)
+ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+ if(CMAKE_HOST_UNIX OR APPLE)
+ set(SSE4FLAG "-msse4.2")
+ else()
+ set(SSE4FLAG "/arch:SSE4.2")
+ endif()
+ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ if(NOT NATIVEFLAG)
+ set(SSE4FLAG "-msse4")
+ endif()
+ endif()
+ # Check whether compiler supports SSE4 CRC inline asm
+ set(CMAKE_REQUIRED_FLAGS "${SSE4FLAG}")
+ check_c_source_compile_or_run(
+ "int main(void) {
+ unsigned val = 0, h = 0;
+ #if defined(_MSC_VER)
+ { __asm mov edx, h __asm mov eax, val __asm crc32 eax, edx __asm mov val, eax }
+ #else
+ __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) );
+ #endif
+ return (int)h;
+ }"
+ HAVE_SSE42CRC_INLINE_ASM
+ )
+ # Check whether compiler supports SSE4 CRC intrinsics
+ check_c_source_compile_or_run(
+ "#include <immintrin.h>
+ int main(void) {
+ unsigned crc = 0;
+ char c = 'c';
+ #if defined(_MSC_VER)
+ crc = _mm_crc32_u32(crc, c);
+ #else
+ crc = __builtin_ia32_crc32qi(crc, c);
+ #endif
+ (void)crc;
+ return 0;
+ }"
+ HAVE_SSE42CRC_INTRIN
+ )
+ # Check whether compiler supports SSE4.2 compare string instrinics
+ check_c_source_compile_or_run(
+ "#include <immintrin.h>
+ int main(void) {
+ unsigned char a[64] = { 0 };
+ unsigned char b[64] = { 0 };
+ __m128i xmm_src0, xmm_src1;
+ xmm_src0 = _mm_loadu_si128((__m128i *)(char *)a);
+ xmm_src1 = _mm_loadu_si128((__m128i *)(char *)b);
+ return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0);
+ }"
+ HAVE_SSE42CMPSTR_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
echo "Checking for __builtin_ctzll ... No." | tee -a configure.log
fi
-# Check for SSE2 intrinsics
-case "${ARCH}" in
- i386 | i486 | i586 | i686 | x86_64)
- cat > $test.c << EOF
+check_avx2_intrinsics() {
+ # Check whether compiler supports AVX2 intrinsics
+ cat > $test.c << EOF
#include <immintrin.h>
int main(void) {
- __m128i zero = _mm_setzero_si128();
- (void)zero;
+ __m256i x = _mm256_set1_epi16(2);
+ const __m256i y = _mm256_set1_epi16(1);
+ x = _mm256_subs_epu16(x, y);
+ (void)x;
return 0;
}
EOF
- if try ${CC} ${CFLAGS} ${sse2flag} $test.c; then
- echo "Checking for SSE2 intrinsics ... Yes." | tee -a configure.log
- HAVE_SSE2_INTRIN=1
- else
- echo "Checking for SSE2 intrinsics ... No." | tee -a configure.log
- HAVE_SSE2_INTRIN=0
- fi
- ;;
-esac
+ if try ${CC} ${CFLAGS} ${avx2flag} $test.c; then
+ echo "Checking for AVX2 intrinsics ... Yes." | tee -a configure.log
+ HAVE_AVX2_INTRIN=1
+ else
+ echo "Checking for AVX2 intrinsics ... No." | tee -a configure.log
+ HAVE_AVX2_INTRIN=0
+ fi
+}
-# Check for SSSE3 intrinsics
-case "${ARCH}" in
- i386 | i486 | i586 | i686 | x86_64)
- cat > $test.c << EOF
-#include <x86intrin.h>
-int main(void)
-{
- __m128i u, v, w;
- u = _mm_set1_epi32(1);
- v = _mm_set1_epi32(2);
- w = _mm_hadd_epi32(u, v);
- (void)w;
+check_neon_intrinsics() {
+ # Check whether -mfpu=neon is available on ARM processors.
+ cat > $test.c << EOF
+int main() { return 0; }
+EOF
+ if try $CC -c $CFLAGS -mfpu=neon $test.c; then
+ MFPU_NEON_AVAILABLE=1
+ echo "Check whether -mfpu=neon is available ... Yes." | tee -a configure.log
+ else
+ MFPU_NEON_AVAILABLE=0
+ echo "Check whether -mfpu=neon is available ... No." | tee -a configure.log
+ fi
+}
+
+check_pclmulqdq_intrinsics() {
+ # Check whether compiler supports PCLMULQDQ intrinsics
+ cat > $test.c << EOF
+#include <immintrin.h>
+#include <wmmintrin.h>
+int main(void) {
+ __m128i a = _mm_setzero_si128();
+ __m128i b = _mm_setzero_si128();
+ __m128i c = _mm_clmulepi64_si128(a, b, 0x10);
+ (void)c;
return 0;
}
EOF
- if try ${CC} ${CFLAGS} ${ssse3flag} $test.c; then
- echo "Checking for SSSE3 intrinsics ... Yes." | tee -a configure.log
- HAVE_SSSE3_INTRIN=1
- else
- echo "Checking for SSSE3 intrinsics ... No." | tee -a configure.log
- HAVE_SSSE3_INTRIN=0
- fi
- ;;
-esac
+ if try ${CC} ${CFLAGS} ${pclmulflag} $test.c; then
+ echo "Checking for PCLMULQDQ intrinsics ... Yes." | tee -a configure.log
+ HAVE_PCLMULQDQ_INTRIN=1
+ else
+ echo "Checking for PCLMULQDQ intrinsics ... No." | tee -a configure.log
+ HAVE_PCLMULQDQ_INTRIN=0
+ fi
+}
-# Check for SSE4.2 CRC inline assembly
-case "${ARCH}" in
- i386 | i486 | i586 | i686 | x86_64)
- cat > $test.c << EOF
+check_power8_intrinsics() {
+ # Check whether features needed by POWER optimisations are available
+ cat > $test.c << EOF
+#include <sys/auxv.h>
+int main() { return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); }
+EOF
+ if try $CC -c $CFLAGS -mcpu=power8 $test.c; then
+ HAVE_POWER8_INTRIN=1
+ echo "Check whether POWER8 instructions are available ... Yes." | tee -a configure.log
+ else
+ HAVE_POWER8_INTRIN=0
+ echo "Check whether POWER8 instructions are available ... No." | tee -a configure.log
+ fi
+}
+
+check_sse2_intrinsics() {
+ # Check whether compiler supports SSE2 intrinsics
+ cat > $test.c << EOF
+#include <immintrin.h>
+int main(void) {
+ __m128i zero = _mm_setzero_si128();
+ (void)zero;
+ return 0;
+}
+EOF
+ if try ${CC} ${CFLAGS} ${sse2flag} $test.c; then
+ echo "Checking for SSE2 intrinsics ... Yes." | tee -a configure.log
+ HAVE_SSE2_INTRIN=1
+ else
+ echo "Checking for SSE2 intrinsics ... No." | tee -a configure.log
+ HAVE_SSE2_INTRIN=0
+ fi
+}
+
+check_sse4_intrinsics() {
+ # Check whether compiler supports SSE4 CRC inline asm
+ cat > $test.c << EOF
int main(void) {
unsigned val = 0, h = 0;
__asm__ __volatile__ ( "crc32 %1,%0" : "+r" (h) : "r" (val) );
return (int) h;
}
EOF
- if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
- echo "Checking for SSE4.2 CRC inline assembly ... Yes." | tee -a configure.log
- HAVE_SSE42CRC_INLINE_ASM=1
- else
- echo "Checking for SSE4.2 CRC inline assembly ... No." | tee -a configure.log
- HAVE_SSE42CRC_INLINE_ASM=0
- fi
- ;;
-esac
+ if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
+ echo "Checking for SSE4.2 CRC inline assembly ... Yes." | tee -a configure.log
+ HAVE_SSE42CRC_INLINE_ASM=1
+ else
+ echo "Checking for SSE4.2 CRC inline assembly ... No." | tee -a configure.log
+ HAVE_SSE42CRC_INLINE_ASM=0
+ fi
-# Check for SSE4.2 CRC intrinsics
-case "${ARCH}" in
- i386 | i486 | i586 | i686 | x86_64)
- cat > $test.c << EOF
+ # Check whether compiler supports SSE4.2 CRC intrinsics
+ cat > $test.c << EOF
int main(void) {
unsigned crc = 0;
char c = 'c';
return 0;
}
EOF
- if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
- echo "Checking for SSE4.2 CRC intrinsics ... Yes." | tee -a configure.log
- HAVE_SSE42CRC_INTRIN=1
- else
- echo "Checking for SSE4.2 CRC intrinsics ... No." | tee -a configure.log
- HAVE_SSE42CRC_INTRIN=0
- fi
- ;;
-esac
+ if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
+ echo "Checking for SSE4.2 CRC intrinsics ... Yes." | tee -a configure.log
+ HAVE_SSE42CRC_INTRIN=1
+ else
+ echo "Checking for SSE4.2 CRC intrinsics ... No." | tee -a configure.log
+ HAVE_SSE42CRC_INTRIN=0
+ fi
-# Check for SSE4.2 compare string intrinsics
-case "${ARCH}" in
- i386 | i486 | i586 | i686 | x86_64)
- cat > $test.c << EOF
+ # Check whether compiler supports SSE4.2 compare string intrinsics
+ cat > $test.c << EOF
#include <immintrin.h>
int main(void)
{
return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0);
}
EOF
- if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
- echo "Checking for SSE4.2 compare string intrinsics ... Yes." | tee -a configure.log
- HAVE_SSE42CMPSTR_INTRIN=1
- else
- echo "Checking for SSE4.2 compare string intrinsics ... No." | tee -a configure.log
- HAVE_SSE42CMPSTR_INTRIN=0
- fi
- ;;
-esac
+ if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
+ echo "Checking for SSE4.2 compare string intrinsics ... Yes." | tee -a configure.log
+ HAVE_SSE42CMPSTR_INTRIN=1
+ else
+ echo "Checking for SSE4.2 compare string intrinsics ... No." | tee -a configure.log
+ HAVE_SSE42CMPSTR_INTRIN=0
+ fi
+}
-# Check for PCLMULQDQ intrinsics
-case "${ARCH}" in
- i386 | i486 | i586 | i686 | x86_64)
- cat > $test.c << EOF
-#include <immintrin.h>
-#include <wmmintrin.h>
-int main(void) {
- __m128i a = _mm_setzero_si128();
- __m128i b = _mm_setzero_si128();
- __m128i c = _mm_clmulepi64_si128(a, b, 0x10);
- (void)c;
+check_ssse3_intrinsics() {
+ # Check whether compiler supports SSSE3 intrinsics
+ cat > $test.c << EOF
+#include <x86intrin.h>
+int main(void)
+{
+ __m128i u, v, w;
+ u = _mm_set1_epi32(1);
+ v = _mm_set1_epi32(2);
+ w = _mm_hadd_epi32(u, v);
+ (void)w;
return 0;
}
EOF
- if try ${CC} ${CFLAGS} ${pclmulflag} $test.c; then
- echo "Checking for PCLMULQDQ intrinsics ... Yes." | tee -a configure.log
- HAVE_PCLMULQDQ_INTRIN=1
- else
- echo "Checking for PCLMULQDQ intrinsics ... No." | tee -a configure.log
- HAVE_PCLMULQDQ_INTRIN=0
- fi
+ if try ${CC} ${CFLAGS} ${ssse3flag} $test.c; then
+ echo "Checking for SSSE3 intrinsics ... Yes." | tee -a configure.log
+ HAVE_SSSE3_INTRIN=1
+ else
+ echo "Checking for SSSE3 intrinsics ... No." | tee -a configure.log
+ HAVE_SSSE3_INTRIN=0
+ fi
+}
+case "${ARCH}" in
+ i386 | i486 | i586 | i686 | x86_64)
# Enable deflate_medium at level 1
if test $without_new_strategies -eq 1; then
CFLAGS="${CFLAGS} -DNO_QUICK_STRATEGY"
;;
esac
-# Check for AVX2 intrinsics
-case "${ARCH}" in
- i386 | i486 | i586 | i686 | x86_64)
- cat > $test.c << EOF
-#include <immintrin.h>
-int main(void) {
- __m256i x = _mm256_set1_epi16(2);
- const __m256i y = _mm256_set1_epi16(1);
- x = _mm256_subs_epu16(x, y);
- (void)x;
- return 0;
-}
-EOF
- if try ${CC} ${CFLAGS} ${avx2flag} $test.c; then
- echo "Checking for AVX2 intrinsics ... Yes." | tee -a configure.log
- HAVE_AVX2_INTRIN=1
- else
- echo "Checking for AVX2 intrinsics ... No." | tee -a configure.log
- HAVE_AVX2_INTRIN=0
- fi
- ;;
-esac
-
-
-# Check whether -mfpu=neon is available on ARM processors.
-case "${ARCH}" in
- arm*)
- cat > $test.c << EOF
-int main() { return 0; }
-EOF
- if try $CC -c $CFLAGS -mfpu=neon $test.c; then
- MFPU_NEON_AVAILABLE=1
- echo "Check whether -mfpu=neon is available ... Yes." | tee -a configure.log
- else
- MFPU_NEON_AVAILABLE=0
- echo "Check whether -mfpu=neon is available ... No." | tee -a configure.log
- fi
- ;;
-esac
-
-# Check whether features needed by POWER optimisations are available
-case "${ARCH}" in
- powerpc*)
- cat > $test.c << EOF
-#include <sys/auxv.h>
-int main() { return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); }
-EOF
- if try $CC -c $CFLAGS -mcpu=power8 $test.c; then
- HAVE_POWER8=1
- echo "Check whether POWER8 instructions are available ... Yes." | tee -a configure.log
- else
- HAVE_POWER8=0
- echo "Check whether POWER8 instructions are available ... No." | tee -a configure.log
- fi
-esac
-
-# Check whether sys/sdt.h is available
-cat > $test.c << EOF
-#include <sys/sdt.h>
-int main() { return 0; }
-EOF
-if try ${CC} ${CFLAGS} $test.c; then
- echo "Checking for sys/sdt.h ... Yes." | tee -a configure.log
- CFLAGS="$CFLAGS -DHAVE_SYS_SDT_H"
- SFLAGS="$SFLAGS -DHAVE_SYS_SDT_H"
-else
- echo "Checking for sys/sdt.h ... No." | tee -a configure.log
-fi
-
ARCHDIR='arch/generic'
ARCH_STATIC_OBJS=''
ARCH_SHARED_OBJS=''
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} x86.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86.lo"
+ check_avx2_intrinsics
+
if test ${HAVE_AVX2_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET"
SFLAGS="${SFLAGS} -DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_avx.lo chunkset_avx.lo compare258_avx.lo adler32_avx.lo"
fi
+ check_sse4_intrinsics
+
if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE42_CRC_HASH"
SFLAGS="${SFLAGS} -DX86_SSE42_CRC_HASH"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} insert_string_sse.lo"
fi
+ check_sse4_intrinsics
+
if test ${HAVE_SSE42CMPSTR_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE42_CMP_STR"
SFLAGS="${SFLAGS} -DX86_SSE42_CMP_STR"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} compare258_sse.lo"
fi
+ check_sse2_intrinsics
+
if test ${HAVE_SSE2_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET"
SFLAGS="${SFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET"
fi
fi
+ check_ssse3_intrinsics
+
if test ${HAVE_SSSE3_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSSE3 -DX86_SSSE3_ADLER32"
SFLAGS="${SFLAGS} -DX86_SSSE3 -DX86_SSSE3_ADLER32"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_ssse3.lo"
fi
+ check_pclmulqdq_intrinsics
+
if test ${HAVE_PCLMULQDQ_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_PCLMULQDQ_CRC"
SFLAGS="${SFLAGS} -DX86_PCLMULQDQ_CRC"
SFLAGS="${SFLAGS} ${floatabi}"
fi
+ if test $without_optimizations -eq 0; then
+ check_neon_intrinsics
+ fi
+
case "${ARCH}" in
armv[345]*)
if test $without_optimizations -eq 0; then
ARCHDIR=arch/power
if test $without_optimizations -eq 0; then
- if test $HAVE_POWER8 -eq 1; then
+
+ check_power8_intrinsics
+
+ if test $HAVE_POWER8_INTRIN -eq 1; then
CFLAGS="${CFLAGS} -DPOWER8 -DPOWER_FEATURES -DPOWER8_VSX_ADLER32 -DPOWER8_VSX_SLIDEHASH"
SFLAGS="${SFLAGS} -DPOWER8 -DPOWER_FEATURES -DPOWER8_VSX_ADLER32 -DPOWER8_VSX_SLIDEHASH"