From: Cameron Cawley Date: Tue, 28 Mar 2023 21:05:20 +0000 (+0100) Subject: Clean up SSE4.2 detection X-Git-Tag: 2.1.0-beta1~13 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b1aafe5c674909ab134301b86c41f1badc3fa83b;p=thirdparty%2Fzlib-ng.git Clean up SSE4.2 detection --- diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index b4b5897f..d50a2dcf 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -105,12 +105,12 @@ jobs: cmake-args: -DWITH_SSE2=OFF -DWITH_SANITIZER=Undefined codecov: ubuntu_gcc_no_sse2 - - name: Ubuntu GCC No SSE4 UBSAN + - name: Ubuntu GCC No SSE4.2 UBSAN os: ubuntu-latest compiler: gcc cxx-compiler: g++ - cmake-args: -DWITH_SSE4=OFF -DWITH_SANITIZER=Undefined - codecov: ubuntu_gcc_no_sse4 + cmake-args: -DWITH_SSE42=OFF -DWITH_SANITIZER=Undefined + codecov: ubuntu_gcc_no_sse42 - name: Ubuntu GCC No PCLMULQDQ UBSAN os: ubuntu-latest diff --git a/CMakeLists.txt b/CMakeLists.txt index ffde3d29..285974c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -810,9 +810,8 @@ if(WITH_OPTIM) if(HAVE_SSE42CRC_INTRIN) add_definitions(-DX86_SSE42_CRC_INTRIN) endif() - endif() - if(NOT HAVE_SSE42CRC_INLINE_ASM AND NOT HAVE_SSE42CRC_INTRIN AND NOT HAVE_SSE42CMPSTR_INTRIN) - set(WITH_SSE4 OFF) + else() + set(WITH_SSE42 OFF) endif() endif() if(WITH_SSE2) diff --git a/arch/x86/insert_string_sse42.c b/arch/x86/insert_string_sse42.c index 2668f0ea..565d92f9 100644 --- a/arch/x86/insert_string_sse42.c +++ b/arch/x86/insert_string_sse42.c @@ -41,9 +41,9 @@ #define HASH_CALC_VAR h #define HASH_CALC_VAR_INIT uint32_t h = 0 -#define UPDATE_HASH update_hash_sse4 -#define INSERT_STRING insert_string_sse4 -#define QUICK_INSERT_STRING quick_insert_string_sse4 +#define UPDATE_HASH update_hash_sse42 +#define INSERT_STRING insert_string_sse42 +#define QUICK_INSERT_STRING quick_insert_string_sse42 #ifdef X86_SSE42 # include "../../insert_string_tpl.h" diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake index 186d87d8..0491d53b 100644 --- a/cmake/detect-intrinsics.cmake +++ b/cmake/detect-intrinsics.cmake @@ -447,7 +447,7 @@ macro(check_sse42_intrinsics) set(SSE42FLAG "-msse4.2") endif() endif() - # Check whether compiler supports SSE4 CRC inline asm + # Check whether compiler supports SSE4.2 CRC inline asm set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG}") check_c_source_compile_or_run( "int main(void) { @@ -461,7 +461,7 @@ macro(check_sse42_intrinsics) }" HAVE_SSE42CRC_INLINE_ASM ) - # Check whether compiler supports SSE4 CRC intrinsics + # Check whether compiler supports SSE4.2 CRC intrinsics check_c_source_compile_or_run( "#include int main(void) { @@ -477,19 +477,6 @@ macro(check_sse42_intrinsics) }" HAVE_SSE42CRC_INTRIN ) - # Check whether compiler supports SSE4.2 compare string intrinsics - check_c_source_compile_or_run( - "#include - int main(void) { - unsigned char a[64] = { 0 }; - unsigned char b[64] = { 0 }; - __m128i xmm_src0, xmm_src1; - xmm_src0 = _mm_loadu_si128((__m128i *)(char *)a); - xmm_src1 = _mm_loadu_si128((__m128i *)(char *)b); - return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0); - }" - HAVE_SSE42CMPSTR_INTRIN - ) set(CMAKE_REQUIRED_FLAGS) endmacro() diff --git a/configure b/configure index 896b0b8a..2867302b 100755 --- a/configure +++ b/configure @@ -262,7 +262,6 @@ if test $native -eq 1; then avx2flag="" sse2flag="" ssse3flag="" - sse4flag="" sse42flag="" pclmulflag="" vpclmulflag="" @@ -1415,7 +1414,7 @@ EOF } check_sse42_intrinsics() { - # Check whether compiler supports SSE4 CRC inline asm + # Check whether compiler supports SSE4.2 CRC inline asm cat > $test.c << EOF int main(void) { unsigned val = 0, h = 0; @@ -1448,27 +1447,6 @@ EOF echo "Checking for SSE4.2 CRC intrinsics ... No." | tee -a configure.log HAVE_SSE42CRC_INTRIN=0 fi - - # Check whether compiler supports SSE4.2 compare string intrinsics - cat > $test.c << EOF -#include -int main(void) -{ - unsigned char a[64] = { 0 }; - unsigned char b[64] = { 0 }; - __m128i xmm_src0, xmm_src1; - xmm_src0 = _mm_loadu_si128((__m128i *)(char *)a); - xmm_src1 = _mm_loadu_si128((__m128i *)(char *)b); - return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0); -} -EOF - if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then - echo "Checking for SSE4.2 compare string intrinsics ... Yes." | tee -a configure.log - HAVE_SSE42CMPSTR_INTRIN=1 - else - echo "Checking for SSE4.2 compare string intrinsics ... No." | tee -a configure.log - HAVE_SSE42CMPSTR_INTRIN=0 - fi } check_ssse3_intrinsics() { diff --git a/cpu_features.h b/cpu_features.h index e47f9478..c742f32c 100644 --- a/cpu_features.h +++ b/cpu_features.h @@ -181,7 +181,7 @@ extern uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1); /* insert_string */ extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count); #ifdef X86_SSE42 -extern void insert_string_sse4(deflate_state *const s, const uint32_t str, uint32_t count); +extern void insert_string_sse42(deflate_state *const s, const uint32_t str, uint32_t count); #elif defined(ARM_ACLE) extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count); #endif @@ -235,7 +235,7 @@ extern uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match) /* quick_insert_string */ extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str); #ifdef X86_SSE42 -extern Pos quick_insert_string_sse4(deflate_state *const s, const uint32_t str); +extern Pos quick_insert_string_sse42(deflate_state *const s, const uint32_t str); #elif defined(ARM_ACLE) extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str); #endif @@ -261,7 +261,7 @@ extern void slide_hash_avx2(deflate_state *s); /* update_hash */ extern uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val); #ifdef X86_SSE42 -extern uint32_t update_hash_sse4(deflate_state *const s, uint32_t h, uint32_t val); +extern uint32_t update_hash_sse42(deflate_state *const s, uint32_t h, uint32_t val); #elif defined(ARM_ACLE) extern uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val); #endif diff --git a/functable.c b/functable.c index 4212da09..f9e8163d 100644 --- a/functable.c +++ b/functable.c @@ -87,9 +87,9 @@ static void init_functable(void) { #ifdef X86_SSE42 if (cf.x86.has_sse42) { ft.adler32_fold_copy = &adler32_fold_copy_sse42; - ft.insert_string = &insert_string_sse4; - ft.quick_insert_string = &quick_insert_string_sse4; - ft.update_hash = &update_hash_sse4; + ft.insert_string = &insert_string_sse42; + ft.quick_insert_string = &quick_insert_string_sse42; + ft.update_hash = &update_hash_sse42; } #endif // X86 - PCLMUL