Simplify compatibility intrinsics for x86

author Vladislav Shchapov <vladislav@shchapov.ru>

Wed, 18 Jun 2025 13:43:26 +0000 (18:43 +0500)

committer Hans Kristian Rosbach <hk-github@circlestorm.org>

Wed, 25 Jun 2025 10:06:02 +0000 (12:06 +0200)
author Vladislav Shchapov <vladislav@shchapov.ru>
Wed, 18 Jun 2025 13:43:26 +0000 (18:43 +0500)
committer Hans Kristian Rosbach <hk-github@circlestorm.org>
Wed, 25 Jun 2025 10:06:02 +0000 (12:06 +0200)
diff --git a/arch/x86/chorba_sse2.c b/arch/x86/chorba_sse2.c

index 61dbb955861d260521b95f19a5488991106694b2..ac98e994c650b525f2336de508d823e6b0edb788 100644 (file)
--- a/arch/x86/chorba_sse2.c
+++ b/arch/x86/chorba_sse2.c
@@ -33,7 +33,7 @@ Z_INTERNAL uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint64_
      uint64_t next4 = 0;
      uint64_t next5 = 0;
  
-    __m128i next12 = _mm_cvtsi64x_si128(next1);
+    __m128i next12 = _mm_cvtsi64_si128(next1);
      __m128i next34 = _mm_setzero_si128();
      __m128i next56 = _mm_setzero_si128();
      __m128i ab1, ab2, ab3, ab4, cd1, cd2, cd3, cd4;
@@ -802,11 +802,11 @@ Z_INTERNAL uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint64_
          next56 = _mm_unpackhi_epi64(cd4, _mm_setzero_si128());
      }
  
-    next1 = _mm_cvtsi128_si64x(next12);
-    next2 = _mm_cvtsi128_si64x(_mm_unpackhi_epi64(next12, next12));
-    next3 = _mm_cvtsi128_si64x(next34);
-    next4 = _mm_cvtsi128_si64x(_mm_unpackhi_epi64(next34, next34));
-    next5 = _mm_cvtsi128_si64x(next56);
+    next1 = _mm_cvtsi128_si64(next12);
+    next2 = _mm_cvtsi128_si64(_mm_unpackhi_epi64(next12, next12));
+    next3 = _mm_cvtsi128_si64(next34);
+    next4 = _mm_cvtsi128_si64(_mm_unpackhi_epi64(next34, next34));
+    next5 = _mm_cvtsi128_si64(next56);
  
      /* Skip the call to memcpy */
      size_t copy_len = len - i;
diff --git a/arch/x86/chorba_sse41.c b/arch/x86/chorba_sse41.c

index b179f22598e5e29764101e8aaf7e5c4181c04d0d..53d6e156c43fa22b84b0f29e5cb516b129db2d58 100644 (file)
--- a/arch/x86/chorba_sse41.c
+++ b/arch/x86/chorba_sse41.c
@@ -93,7 +93,7 @@ static Z_FORCEINLINE uint32_t crc32_chorba_32768_nondestructive_sse41(uint32_t c
      }
  
      /* We need to mix this in */
-    __m128i init_crc = _mm_cvtsi64x_si128(crc);
+    __m128i init_crc = _mm_cvtsi64_si128(crc);
      crc = 0;
  
      size_t i = 0;
diff --git a/arch/x86/x86_intrins.h b/arch/x86/x86_intrins.h

index b27755834644dd5f88409beb782e8b7b42d6728e..47e943410210da10a63a7caf80ad186b2d89e3ff 100644 (file)
--- a/arch/x86/x86_intrins.h
+++ b/arch/x86/x86_intrins.h
@@ -98,31 +98,25 @@ static inline __m512i _mm512_zextsi128_si512(__m128i a) {
  #if !defined(_M_AMD64)
  /* So, while we can't move directly to a GPR, hopefully this move to
   * a stack resident variable doesn't equate to something awful */
-static inline int64_t _mm_cvtsi128_si64x(__m128i a) {
+static inline int64_t _mm_cvtsi128_si64(__m128i a) {
      union { __m128i v; int64_t i; } u;
      u.v = a;
      return u.i;
  }
  
-static inline __m128i _mm_cvtsi64x_si128(int64_t a) {
+static inline __m128i _mm_cvtsi64_si128(int64_t a) {
     return _mm_set_epi64x(0, a);
  }
  #endif
  #endif
  
-
-#if defined(__clang__)
-#define _mm_cvtsi64x_si128(v) _mm_cvtsi64_si128(v)
-#define _mm_cvtsi128_si64x(v) _mm_cvtsi128_si64(v)
-#endif
-
-#if defined(__GNUC__) && !defined( __x86_64__) && !defined(__clang__)
-static inline int64_t _mm_cvtsi128_si64x(__m128i a) {
+#if defined(__GNUC__) && defined(__i386__) && !defined(__clang__)
+static inline int64_t _mm_cvtsi128_si64(__m128i a) {
      union { __m128i v; int64_t i; } u;
      u.v = a;
      return u.i;
  }
-#define _mm_cvtsi64x_si128(a) _mm_set_epi64x(0, a)
+#define _mm_cvtsi64_si128(a) _mm_set_epi64x(0, a)
  #endif
  
  #endif // include guard X86_INTRINS_H
author	Vladislav Shchapov <vladislav@shchapov.ru>
	Wed, 18 Jun 2025 13:43:26 +0000 (18:43 +0500)
committer	Hans Kristian Rosbach <hk-github@circlestorm.org>
	Wed, 25 Jun 2025 10:06:02 +0000 (12:06 +0200)
arch/x86/chorba_sse2.c		patch \| blob \| blame \| history
arch/x86/chorba_sse41.c		patch \| blob \| blame \| history
arch/x86/x86_intrins.h		patch \| blob \| blame \| history