fix some AVX512 function names, to fix AVX512 build failure, also rename the expand...

author Konstantinos Margaritis <markos@freevec.org>

Mon, 15 Feb 2021 11:51:11 +0000 (13:51 +0200)

committer Konstantinos Margaritis <markos@users.noreply.github.com>

Mon, 15 Feb 2021 11:54:19 +0000 (13:54 +0200)
author Konstantinos Margaritis <markos@freevec.org>
Mon, 15 Feb 2021 11:51:11 +0000 (13:51 +0200)
committer Konstantinos Margaritis <markos@users.noreply.github.com>
Mon, 15 Feb 2021 11:54:19 +0000 (13:54 +0200)
diff --git a/src/util/arch/x86/simd_utils.h b/src/util/arch/x86/simd_utils.h

index fd13d6766cd3744ed3984e5dc1a479f046bf49b0..52b4eb65e0b491fbe0cf1d95e0b743f82f288f7d 100644 (file)
--- a/src/util/arch/x86/simd_utils.h
+++ b/src/util/arch/x86/simd_utils.h
@@ -188,7 +188,7 @@ static really_inline m128 or128(m128 a, m128 b) {
  }
  
  #if defined(HAVE_AVX512VBMI)
-static really_inline m512 expand128(m128 a) {
+static really_inline m512 broadcast128(m128 a) {
      return _mm512_broadcast_i32x4(a);
  }
  #endif
@@ -381,7 +381,7 @@ static really_inline m256 or256(m256 a, m256 b) {
  }
  
  #if defined(HAVE_AVX512VBMI)
-static really_inline m512 expand256(m256 a) {
+static really_inline m512 broadcast256(m256 a) {
      return _mm512_broadcast_i64x4(a);
  }
  #endif
@@ -450,7 +450,7 @@ static really_inline m256 loadu256(const void *ptr) {
      return _mm256_loadu_si256((const m256 *)ptr);
  }
  
-static really_inline
+static really_really_inline
  m256 loadu_maskz_m256(__mmask32 k, const void *ptr) {
      return _mm256_maskz_loadu_epi8(k, ptr);
  }
@@ -535,7 +535,7 @@ m128 movdq_lo(m256 x) {
  #define lshift128_m256(a, count_immed) _mm256_slli_si256(a, count_immed)
  #define extract64from256(a, imm) _mm_extract_epi64(_mm256_extracti128_si256(a, imm >> 1), imm % 2)
  #define extract32from256(a, imm) _mm_extract_epi32(_mm256_extracti128_si256(a, imm >> 2), imm % 4)
-#define extractlow64from256(a) _mm_cvtsi128_si64(cast256to128(a))
+#define extractlow64from256(a) movq(cast256to128(a))
  #define extractlow32from256(a) movd(cast256to128(a))
  #define interleave256hi(a, b) _mm256_unpackhi_epi8(a, b)
  #define interleave256lo(a, b) _mm256_unpacklo_epi8(a, b)
@@ -591,7 +591,7 @@ static really_inline u32 movd512(const m512 in) {
  static really_inline u64a movq512(const m512 in) {
      // NOTE: seems AVX512 doesn't support _mm512_cvtsi512_si64(in),
      //       so we use 2-step convertions to work around.
-    return _mm_cvtsi128_si64(_mm512_castsi512_si128(in));
+    return movq(_mm512_castsi512_si128(in));
  }
  
  static really_inline
@@ -639,7 +639,7 @@ m512 set1_8x64(u64a a) {
  }
  
  static really_inline
-m512 set16x32(u32 a) {
+m512 set1_16x32(u32 a) {
      return _mm512_set1_epi32(a);
  }
  
@@ -652,7 +652,7 @@ m512 set8x64(u64a hi_3, u64a hi_2, u64a hi_1, u64a hi_0,
  
  static really_inline
  m512 swap256in512(m512 a) {
-    m512 idx = set512_64(3ULL, 2ULL, 1ULL, 0ULL, 7ULL, 6ULL, 5ULL, 4ULL);
+    m512 idx = set8x64(3ULL, 2ULL, 1ULL, 0ULL, 7ULL, 6ULL, 5ULL, 4ULL);
      return vpermq512(idx, a);
  }
  
@@ -683,7 +683,7 @@ m512 sub_u8_m512(m512 a, m512 b) {
  
  static really_inline m512
  add512(m512 a, m512 b) {
-    return _mm512_add_epu64(a, b);
+    return _mm512_add_epi64(a, b);
  }
  
  static really_inline
@@ -697,7 +697,7 @@ m512 or512(m512 a, m512 b) {
  }
  
  #if defined(HAVE_AVX512VBMI)
-static really_inline m512 expand384(m384 a) {
+static really_inline m512 broadcast384(m384 a) {
      u64a *lo = (u64a*)&a.lo;
      u64a *mid = (u64a*)&a.mid;
      u64a *hi = (u64a*)&a.hi;
diff --git a/src/util/uniform_ops.h b/src/util/uniform_ops.h

index 262104aca2d9af9c775daccc58f7f7575ec30fe3..1c39c936d816305b844b89e71b40f7f89c3c3e26 100644 (file)
--- a/src/util/uniform_ops.h
+++ b/src/util/uniform_ops.h
@@ -102,10 +102,10 @@
  #define or_m512(a, b)       (or512(a, b))
  
  #if defined(HAVE_AVX512VBMI)
-#define expand_m128(a)      (expand128(a))
-#define expand_m256(a)      (expand256(a))
-#define expand_m384(a)      (expand384(a))
-#define expand_m512(a)      (a)
+#define broadcast_m128(a)      (broadcast128(a))
+#define broadcast_m256(a)      (broadcast256(a))
+#define broadcast_m384(a)      (broadcast384(a))
+#define broadcast_m512(a)      (a)
  
  #define shuffle_byte_m128(a, b)       (pshufb_m512(b, a))
  #define shuffle_byte_m256(a, b)       (vpermb512(a, b))
author	Konstantinos Margaritis <markos@freevec.org>
	Mon, 15 Feb 2021 11:51:11 +0000 (13:51 +0200)
committer	Konstantinos Margaritis <markos@users.noreply.github.com>
	Mon, 15 Feb 2021 11:54:19 +0000 (13:54 +0200)
src/util/arch/x86/simd_utils.h		patch \| blob \| blame \| history
src/util/uniform_ops.h		patch \| blob \| blame \| history