From: Vladislav Shchapov Date: Fri, 19 Jan 2024 18:42:19 +0000 (+0500) Subject: Remove always true arch conditions. X-Git-Tag: 2.2.0~119 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0b856b7351791bf7ea175c96bb89039cdc6c9910;p=thirdparty%2Fzlib-ng.git Remove always true arch conditions. Signed-off-by: Vladislav Shchapov --- diff --git a/arch/x86/adler32_avx2.c b/arch/x86/adler32_avx2.c index 5b0d32d7..38e7f068 100644 --- a/arch/x86/adler32_avx2.c +++ b/arch/x86/adler32_avx2.c @@ -15,17 +15,9 @@ #include "adler32_avx2_p.h" #include "x86_intrins.h" -#ifdef X86_SSE42 extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, size_t len); -#define copy_sub32(a, b, c, d) adler32_fold_copy_sse42(a, b, c, d) -#define sub32(a, b, c) adler32_ssse3(a, b, c) -#else -#define copy_sub32(a, b, c, d) adler32_copy_len_16(adler0, c, b, d, adler1) -#define sub32(a, b, c) adler32_len_16(adler0, b, c, adler1) -#endif - static inline uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) { if (src == NULL) return 1L; if (len == 0) return adler; @@ -43,9 +35,9 @@ rem_peel: } } else if (len < 32) { if (COPY) { - return copy_sub32(adler, dst, src, len); + return adler32_fold_copy_sse42(adler, dst, src, len); } else { - return sub32(adler, src, len); + return adler32_ssse3(adler, src, len); } } diff --git a/arch/x86/adler32_avx512.c b/arch/x86/adler32_avx512.c index 83d521ab..cc6bfa64 100644 --- a/arch/x86/adler32_avx512.c +++ b/arch/x86/adler32_avx512.c @@ -32,13 +32,7 @@ rem_peel: _mm512_mask_storeu_epi8(dst, storemask, copy_vec); } -#ifdef X86_AVX2 return adler32_avx2(adler, src, len); -#elif defined(X86_SSSE3) - return adler32_ssse3(adler, src, len); -#else - return adler32_len_16(adler0, src, len, adler1); -#endif } __m512i vbuf, vs1_0, vs3; diff --git a/arch/x86/adler32_avx512_vnni.c b/arch/x86/adler32_avx512_vnni.c index b8ccd9ac..3dd26d7d 100644 --- a/arch/x86/adler32_avx512_vnni.c +++ b/arch/x86/adler32_avx512_vnni.c @@ -27,20 +27,10 @@ Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, size rem_peel: if (len < 32) -#if defined(X86_SSSE3) return adler32_ssse3(adler, src, len); -#else - return adler32_len_16(adler0, src, len, adler1); -#endif if (len < 64) -#ifdef X86_AVX2 return adler32_avx2(adler, src, len); -#elif defined(X86_SSE3) - return adler32_ssse3(adler, src, len); -#else - return adler32_len_16(adler0, src, len, adler1); -#endif const __m512i dot2v = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, @@ -134,11 +124,7 @@ rem_peel_copy: __m256i copy_vec = _mm256_maskz_loadu_epi8(storemask, src); _mm256_mask_storeu_epi8(dst, storemask, copy_vec); -#if defined(X86_SSSE3) return adler32_ssse3(adler, src, len); -#else - return adler32_len_16(adler0, src, len, adler1); -#endif } const __m256i dot2v = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, diff --git a/arch/x86/chunkset_ssse3.c b/arch/x86/chunkset_ssse3.c index c06d1b37..7ff16a4f 100644 --- a/arch/x86/chunkset_ssse3.c +++ b/arch/x86/chunkset_ssse3.c @@ -7,7 +7,7 @@ /* This requires SSE2 support. While it's implicit with SSSE3, we can minimize * code size by sharing the chunkcopy functions, which will certainly compile * to identical machine code */ -#if defined(X86_SSSE3) && defined(X86_SSE2) +#if defined(X86_SSSE3) #include #include "../generic/chunk_permute_table.h" diff --git a/chunkset_tpl.h b/chunkset_tpl.h index f909a125..250fdc36 100644 --- a/chunkset_tpl.h +++ b/chunkset_tpl.h @@ -5,7 +5,7 @@ #include "zbuild.h" #include -#if CHUNK_SIZE == 32 && defined(X86_SSSE3) && defined(X86_SSE2) +#if CHUNK_SIZE == 32 && defined(X86_SSSE3) extern uint8_t* chunkmemset_ssse3(uint8_t *out, unsigned dist, unsigned len); #endif @@ -98,7 +98,7 @@ Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) { Assert(len >= sizeof(uint64_t), "chunkmemset should be called on larger chunks"); */ Assert(dist > 0, "chunkmemset cannot have a distance 0"); /* Only AVX2 */ -#if CHUNK_SIZE == 32 && defined(X86_SSSE3) && defined(X86_SSE2) +#if CHUNK_SIZE == 32 && defined(X86_SSSE3) if (len <= 16) { return chunkmemset_ssse3(out, dist, len); } diff --git a/functable.c b/functable.c index 37c4aeef..179126fc 100644 --- a/functable.c +++ b/functable.c @@ -110,10 +110,8 @@ static void init_functable(void) { #ifdef X86_SSSE3 if (cf.x86.has_ssse3) { ft.adler32 = &adler32_ssse3; -# ifdef X86_SSE2 ft.chunkmemset_safe = &chunkmemset_safe_ssse3; ft.inflate_fast = &inflate_fast_ssse3; -# endif } #endif // X86 - SSE4.2