Z_INTERNAL int x86_cpu_has_avx512vnni;
Z_INTERNAL int x86_cpu_has_sse2;
Z_INTERNAL int x86_cpu_has_ssse3;
+Z_INTERNAL int x86_cpu_has_sse41;
Z_INTERNAL int x86_cpu_has_sse42;
Z_INTERNAL int x86_cpu_has_pclmulqdq;
Z_INTERNAL int x86_cpu_has_tzcnt;
x86_cpu_has_sse2 = edx & 0x4000000;
x86_cpu_has_ssse3 = ecx & 0x200;
+ x86_cpu_has_sse41 = ecx & 0x80000;
x86_cpu_has_sse42 = ecx & 0x100000;
x86_cpu_has_pclmulqdq = ecx & 0x2;
x86_cpu_well_suited_avx512 = 0;
extern int x86_cpu_has_avx512vnni;
extern int x86_cpu_has_sse2;
extern int x86_cpu_has_ssse3;
+extern int x86_cpu_has_sse41;
extern int x86_cpu_has_sse42;
extern int x86_cpu_has_pclmulqdq;
extern int x86_cpu_has_tzcnt;
)
endmacro()
-macro(check_sse4_intrinsics)
+macro(check_sse41_intrinsics)
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
if(CMAKE_HOST_UNIX OR APPLE)
- set(SSE4FLAG "-msse4.2")
+ set(SSE41FLAG "-msse4.1")
else()
- set(SSE4FLAG "/arch:SSE4.2")
+ set(SSE41FLAG "/arch:SSE4.1")
endif()
elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
if(NOT NATIVEFLAG)
- set(SSE4FLAG "-msse4.2")
+ set(SSE41FLAG "-msse4.1")
+ endif()
+ endif()
+ # Check whether compiler supports SSE4.1 intrinsics
+ set(CMAKE_REQUIRED_FLAGS "${SSE41FLAG}")
+ check_c_source_compile_or_run(
+ "#include <immintrin.h>
+ int main(void) {
+ __m128i u, v, w;
+ u = _mm_set1_epi8(1);
+ v = _mm_set1_epi8(2);
+ w = _mm_sad_epu8(u, v);
+ (void)w;
+ return 0;
+ }"
+ HAVE_SSE41_INTRIN
+ )
+endmacro()
+
+macro(check_sse42_intrinsics)
+ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+ if(CMAKE_HOST_UNIX OR APPLE)
+ set(SSE42FLAG "-msse4.2")
+ else()
+ set(SSE42FLAG "/arch:SSE4.2")
+ endif()
+ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ if(NOT NATIVEFLAG)
+ set(SSE42FLAG "-msse4.2")
endif()
endif()
# Check whether compiler supports SSE4 CRC inline asm
- set(CMAKE_REQUIRED_FLAGS "${SSE4FLAG}")
+ set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG}")
check_c_source_compile_or_run(
"int main(void) {
unsigned val = 0, h = 0;
avx2flag="-mavx2"
sse2flag="-msse2"
ssse3flag="-mssse3"
-sse4flag="-msse4"
+sse41flag="-msse4.1"
sse42flag="-msse4.2"
pclmulflag="-mpclmul"
acleflag=
fi
}
-check_sse4_intrinsics() {
+check_sse41_intrinsics() {
+ # Check whether compiler supports SSE4.1 intrinsics
+ cat > $test.c << EOF
+#include <smmintrin.h>
+int main(void)
+{
+ __m128i u, v, w;
+ u = _mm_set1_epi8(1);
+ v = _mm_set1_epi8(2);
+ w = _mm_sad_epu8(u, v);
+ (void)w;
+ return 0;
+}
+EOF
+ if try ${CC} ${CFLAGS} ${sse41flag} $test.c; then
+ echo "Checking for SSE4.1 intrinsics ... Yes." | tee -a configure.log
+ HAVE_SSE41_INTRIN=1
+ else
+ echo "Checking for SSE4.1 intrinsics ... No." | tee -a configure.log
+ HAVE_SSE41_INTRIN=0
+ fi
+}
+
+check_sse42_intrinsics() {
# Check whether compiler supports SSE4 CRC inline asm
cat > $test.c << EOF
int main(void) {
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_avx512_vnni.lo"
fi
- check_sse4_intrinsics
+ check_sse41_intrinsics
+
+ if test ${HAVE_SSE41_INTRIN} -eq 1; then
+ CFLAGS="${CFLAGS} -DX86_SSE41_ADLER32"
+ SFLAGS="${SFLAGS} -DX86_SSE41_ADLER32"
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse41.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse41.o"
+ fi
+
+ check_sse42_intrinsics
if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE42_CRC_HASH"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} insert_string_sse.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} insert_string_sse.lo"
fi
+
if test ${HAVE_SSE42CMPSTR_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE42_CMP_STR"
SFLAGS="${SFLAGS} -DX86_SSE42_CMP_STR"
echo uname = $uname >> configure.log
echo sse2flag = $sse2flag >> configure.log
echo ssse3flag = $ssse3flag >> configure.log
-echo sse4flag = $sse4flag >> configure.log
+echo sse41flag = $sse41flag >> configure.log
+echo sse42flag = $sse42flag >> configure.log
echo pclmulflag = $pclmulflag >> configure.log
echo acleflag = $acleflag >> configure.log
echo neonflag = $neonflag >> configure.log
/^AVX512VNNIFLAG *=/s#=.*#=$avx512vnniflag#
/^SSE2FLAG *=/s#=.*#=$sse2flag#
/^SSSE3FLAG *=/s#=.*#=$ssse3flag#
-/^SSE4FLAG *=/s#=.*#=$sse4flag#
+/^SSE41FLAG *=/s#=.*#=$sse41flag#
+/^SSE42FLAG *=/s#=.*#=$sse42flag#
/^PCLMULFLAG *=/s#=.*#=$pclmulflag#
/^ACLEFLAG *=/s#=.*#=$acleflag#
/^NEONFLAG *=/s#=.*#=$neonflag#