[VSX] add algorithm for alignr w/o use of immediates

author Konstantinos Margaritis <konstantinos@vectorcamp.gr>

Tue, 6 Sep 2022 21:01:54 +0000 (00:01 +0300)

committer Konstantinos Margaritis <konstantinos@vectorcamp.gr>

Tue, 6 Sep 2022 21:01:54 +0000 (00:01 +0300)
author Konstantinos Margaritis <konstantinos@vectorcamp.gr>
Tue, 6 Sep 2022 21:01:54 +0000 (00:01 +0300)
committer Konstantinos Margaritis <konstantinos@vectorcamp.gr>
Tue, 6 Sep 2022 21:01:54 +0000 (00:01 +0300)
diff --git a/src/util/arch/ppc64el/simd_utils.h b/src/util/arch/ppc64el/simd_utils.h

index 32014e541a5133c2b82fd4d19d8f4315bbe3345c..ea1766b264f8e3bee7e9e55f4fedcfb73e86a3ee 100644 (file)
--- a/src/util/arch/ppc64el/simd_utils.h
+++ b/src/util/arch/ppc64el/simd_utils.h
@@ -313,12 +313,18 @@ m128 palignr_imm(m128 r, m128 l, int offset) {
  
  static really_really_inline
  m128 palignr(m128 r, m128 l, int offset) {
-#if defined(HS_OPTIMIZE)
-    // need a faster way to do this.
-    return palignr_imm(r, l, offset);
-#else
-    return palignr_imm(r, l, offset);
+    if (offset == 0) return l;
+    if (offset == 16) return r;
+#if defined(HAVE__BUILTIN_CONSTANT_P)
+    if (__builtin_constant_p(offset)) {
+        return (m128)vec_sld((int8x16_t)(r), (int8x16_t)(l), 16 - offset);
+    }
  #endif
+    m128 sl = (m128) vec_splats((uint8_t) (offset << 3));
+    m128 sr = (m128) vec_splats((uint8_t) ((16 - offset) << 3));
+    m128 rhs = (m128) vec_slo((uint8x16_t) r, (uint8x16_t) sr);
+    m128 lhs = (m128) vec_sro((uint8x16_t) l, (uint8x16_t) sl);
+    return or128(lhs, rhs);
  }
  
  #undef CASE_ALIGN_VECTORS
author	Konstantinos Margaritis <konstantinos@vectorcamp.gr>
	Tue, 6 Sep 2022 21:01:54 +0000 (00:01 +0300)
committer	Konstantinos Margaritis <konstantinos@vectorcamp.gr>
	Tue, 6 Sep 2022 21:01:54 +0000 (00:01 +0300)