From c6bfa9698eafa77eece7ee2d29fbe51d602b41d7 Mon Sep 17 00:00:00 2001 From: Vladislav Shchapov Date: Wed, 18 Jun 2025 18:43:26 +0500 Subject: [PATCH] Simplify compatibility intrinsics for x86 Signed-off-by: Vladislav Shchapov --- arch/x86/chorba_sse2.c | 12 ++++++------ arch/x86/chorba_sse41.c | 2 +- arch/x86/x86_intrins.h | 16 +++++----------- 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/arch/x86/chorba_sse2.c b/arch/x86/chorba_sse2.c index 61dbb955..ac98e994 100644 --- a/arch/x86/chorba_sse2.c +++ b/arch/x86/chorba_sse2.c @@ -33,7 +33,7 @@ Z_INTERNAL uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint64_ uint64_t next4 = 0; uint64_t next5 = 0; - __m128i next12 = _mm_cvtsi64x_si128(next1); + __m128i next12 = _mm_cvtsi64_si128(next1); __m128i next34 = _mm_setzero_si128(); __m128i next56 = _mm_setzero_si128(); __m128i ab1, ab2, ab3, ab4, cd1, cd2, cd3, cd4; @@ -802,11 +802,11 @@ Z_INTERNAL uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint64_ next56 = _mm_unpackhi_epi64(cd4, _mm_setzero_si128()); } - next1 = _mm_cvtsi128_si64x(next12); - next2 = _mm_cvtsi128_si64x(_mm_unpackhi_epi64(next12, next12)); - next3 = _mm_cvtsi128_si64x(next34); - next4 = _mm_cvtsi128_si64x(_mm_unpackhi_epi64(next34, next34)); - next5 = _mm_cvtsi128_si64x(next56); + next1 = _mm_cvtsi128_si64(next12); + next2 = _mm_cvtsi128_si64(_mm_unpackhi_epi64(next12, next12)); + next3 = _mm_cvtsi128_si64(next34); + next4 = _mm_cvtsi128_si64(_mm_unpackhi_epi64(next34, next34)); + next5 = _mm_cvtsi128_si64(next56); /* Skip the call to memcpy */ size_t copy_len = len - i; diff --git a/arch/x86/chorba_sse41.c b/arch/x86/chorba_sse41.c index b179f225..53d6e156 100644 --- a/arch/x86/chorba_sse41.c +++ b/arch/x86/chorba_sse41.c @@ -93,7 +93,7 @@ static Z_FORCEINLINE uint32_t crc32_chorba_32768_nondestructive_sse41(uint32_t c } /* We need to mix this in */ - __m128i init_crc = _mm_cvtsi64x_si128(crc); + __m128i init_crc = _mm_cvtsi64_si128(crc); crc = 0; size_t i = 0; diff --git a/arch/x86/x86_intrins.h b/arch/x86/x86_intrins.h index b2775583..47e94341 100644 --- a/arch/x86/x86_intrins.h +++ b/arch/x86/x86_intrins.h @@ -98,31 +98,25 @@ static inline __m512i _mm512_zextsi128_si512(__m128i a) { #if !defined(_M_AMD64) /* So, while we can't move directly to a GPR, hopefully this move to * a stack resident variable doesn't equate to something awful */ -static inline int64_t _mm_cvtsi128_si64x(__m128i a) { +static inline int64_t _mm_cvtsi128_si64(__m128i a) { union { __m128i v; int64_t i; } u; u.v = a; return u.i; } -static inline __m128i _mm_cvtsi64x_si128(int64_t a) { +static inline __m128i _mm_cvtsi64_si128(int64_t a) { return _mm_set_epi64x(0, a); } #endif #endif - -#if defined(__clang__) -#define _mm_cvtsi64x_si128(v) _mm_cvtsi64_si128(v) -#define _mm_cvtsi128_si64x(v) _mm_cvtsi128_si64(v) -#endif - -#if defined(__GNUC__) && !defined( __x86_64__) && !defined(__clang__) -static inline int64_t _mm_cvtsi128_si64x(__m128i a) { +#if defined(__GNUC__) && defined(__i386__) && !defined(__clang__) +static inline int64_t _mm_cvtsi128_si64(__m128i a) { union { __m128i v; int64_t i; } u; u.v = a; return u.i; } -#define _mm_cvtsi64x_si128(a) _mm_set_epi64x(0, a) +#define _mm_cvtsi64_si128(a) _mm_set_epi64x(0, a) #endif #endif // include guard X86_INTRINS_H -- 2.47.2