endif()
if(WITH_SSE42)
check_sse42_intrinsics()
- if(HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN)
+ if(HAVE_SSE42_INTRIN)
add_definitions(-DX86_SSE42)
set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c)
add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE42FLAG}\"")
list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}")
- if(HAVE_SSE42CRC_INTRIN)
- add_definitions(-DX86_SSE42_CRC_INTRIN)
- endif()
else()
set(WITH_SSE42 OFF)
endif()
*
*/
+#ifdef X86_SSE42
#include "../../zbuild.h"
-#include <immintrin.h>
-#ifdef _MSC_VER
-# include <nmmintrin.h>
-#endif
+#include <nmmintrin.h>
#include "../../deflate.h"
-#ifdef X86_SSE42_CRC_INTRIN
-# ifdef _MSC_VER
-# define HASH_CALC(s, h, val)\
- h = _mm_crc32_u32(h, val)
-# else
-# define HASH_CALC(s, h, val)\
- h = __builtin_ia32_crc32si(h, val)
-# endif
-#else
-# ifdef _MSC_VER
-# define HASH_CALC(s, h, val) {\
- __asm mov edx, h\
- __asm mov eax, val\
- __asm crc32 eax, edx\
- __asm mov h, eax\
- }
-# else
-# define HASH_CALC(s, h, val) \
- __asm__ __volatile__ (\
- "crc32 %1,%0\n\t"\
- : "+r" (h)\
- : "r" (val)\
- );
-# endif
-#endif
+#define HASH_CALC(s, h, val)\
+ h = _mm_crc32_u32(h, val)
#define HASH_CALC_VAR h
#define HASH_CALC_VAR_INIT uint32_t h = 0
#define INSERT_STRING insert_string_sse42
#define QUICK_INSERT_STRING quick_insert_string_sse42
-#ifdef X86_SSE42
-# include "../../insert_string_tpl.h"
+#include "../../insert_string_tpl.h"
#endif
set(SSE42FLAG "-msse4.2")
endif()
endif()
- # Check whether compiler supports SSE4.2 CRC inline asm
+ # Check whether compiler supports SSE4.2 intrinsics
set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG}")
check_c_source_compile_or_run(
- "int main(void) {
- unsigned val = 0, h = 0;
- #if defined(_MSC_VER)
- { __asm mov edx, h __asm mov eax, val __asm crc32 eax, edx __asm mov h, eax }
- #else
- __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) );
- #endif
- return (int)h;
- }"
- HAVE_SSE42CRC_INLINE_ASM
- )
- # Check whether compiler supports SSE4.2 CRC intrinsics
- check_c_source_compile_or_run(
- "#include <immintrin.h>
+ "#include <nmmintrin.h>
int main(void) {
unsigned crc = 0;
char c = 'c';
- #if defined(_MSC_VER)
crc = _mm_crc32_u32(crc, c);
- #else
- crc = __builtin_ia32_crc32qi(crc, c);
- #endif
(void)crc;
return 0;
}"
- HAVE_SSE42CRC_INTRIN
+ HAVE_SSE42_INTRIN
)
set(CMAKE_REQUIRED_FLAGS)
endmacro()
}
check_sse42_intrinsics() {
- # Check whether compiler supports SSE4.2 CRC inline asm
- cat > $test.c << EOF
-int main(void) {
- unsigned val = 0, h = 0;
- __asm__ __volatile__ ( "crc32 %1,%0" : "+r" (h) : "r" (val) );
- return (int) h;
-}
-EOF
- if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
- echo "Checking for SSE4.2 CRC inline assembly ... Yes." | tee -a configure.log
- HAVE_SSE42CRC_INLINE_ASM=1
- else
- echo "Checking for SSE4.2 CRC inline assembly ... No." | tee -a configure.log
- HAVE_SSE42CRC_INLINE_ASM=0
- fi
-
- # Check whether compiler supports SSE4.2 CRC intrinsics
+ # Check whether compiler supports SSE4.2 intrinsics
cat > $test.c << EOF
+#include <nmmintrin.h>
int main(void) {
unsigned crc = 0;
char c = 'c';
- crc = __builtin_ia32_crc32qi(crc, c);
+ crc = _mm_crc32_u32(crc, c);
(void)crc;
return 0;
}
EOF
if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
- echo "Checking for SSE4.2 CRC intrinsics ... Yes." | tee -a configure.log
- HAVE_SSE42CRC_INTRIN=1
+ echo "Checking for SSE4.2 intrinsics ... Yes." | tee -a configure.log
+ HAVE_SSE42_INTRIN=1
else
- echo "Checking for SSE4.2 CRC intrinsics ... No." | tee -a configure.log
- HAVE_SSE42CRC_INTRIN=0
+ echo "Checking for SSE4.2 intrinsics ... No." | tee -a configure.log
+ HAVE_SSE42_INTRIN=0
fi
}
check_sse42_intrinsics
- if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then
+ if test ${HAVE_SSE42_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE42"
SFLAGS="${SFLAGS} -DX86_SSE42"
-
- if test ${HAVE_SSE42CRC_INTRIN} -eq 1; then
- CFLAGS="${CFLAGS} -DX86_SSE42_CRC_INTRIN"
- SFLAGS="${SFLAGS} -DX86_SSE42_CRC_INTRIN"
- fi
-
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse42.o insert_string_sse42.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse42.lo insert_string_sse42.lo"
fi
-DX86_PCLMULQDQ_CRC \
-DX86_SSE2 \
-DX86_SSE42 \
- -DX86_SSE42_CRC_INTRIN \
-DX86_SSSE3 \
-DX86_AVX2