blockSigleMask implementations for ARCH_PPC64 added

author apostolos <apostolos.tapsas@vectorcamp.gr>

Thu, 14 Oct 2021 12:56:13 +0000 (15:56 +0300)

committer apostolos <apostolos.tapsas@vectorcamp.gr>

Thu, 14 Oct 2021 12:56:13 +0000 (15:56 +0300)
author apostolos <apostolos.tapsas@vectorcamp.gr>
Thu, 14 Oct 2021 12:56:13 +0000 (15:56 +0300)
committer apostolos <apostolos.tapsas@vectorcamp.gr>
Thu, 14 Oct 2021 12:56:13 +0000 (15:56 +0300)
diff --git a/src/nfa/shufti_simd.hpp b/src/nfa/shufti_simd.hpp

index e7f3f6c94e836b4239d69a2d08df0a02bb67ec63..83ab428b0b79da20581ce17911b0b9a59161b8a5 100644 (file)
--- a/src/nfa/shufti_simd.hpp
+++ b/src/nfa/shufti_simd.hpp
@@ -56,6 +56,8 @@ SuperVector<S> blockDoubleMask(SuperVector<S> mask1_lo, SuperVector<S> mask1_hi,
  #include "x86/shufti.hpp"
  #elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
  #include "arm/shufti.hpp"
+#elif defined(ARCH_PPC64EL)
+#include "ppc64el/shufti.hpp"
  #endif
  
  template <uint16_t S>
diff --git a/src/nfa/truffle_simd.hpp b/src/nfa/truffle_simd.hpp

index 8d61722bbb9948e8ee801a36011376e7b7fcb950..b3a82266e1a8df29479de5dbf5014d8a55fe36fb 100644 (file)
--- a/src/nfa/truffle_simd.hpp
+++ b/src/nfa/truffle_simd.hpp
@@ -49,6 +49,8 @@ const SuperVector<S> blockSingleMask(SuperVector<S> shuf_mask_lo_highclear, Supe
  #include "x86/truffle.hpp"
  #elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
  #include "arm/truffle.hpp"
+#elif defined(ARCH_PPC64EL)
+#include "ppc64el/truffle.hpp"
  #endif
  
  template <uint16_t S>
diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp

index 478a195fe5186bb45508ccab1cd0c417dbf82961..89fe89c6796dd77f6f868d89d29cff0caf3b9d94 100644 (file)
--- a/src/util/supervector/arch/ppc64el/impl.cpp
+++ b/src/util/supervector/arch/ppc64el/impl.cpp
@@ -444,7 +444,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl(uint8_t const N) const
  }
  
  template <>
-really_inline SuperVector<16> SuperVector<16>::vshr_8  (uint8_t const N) const
+really_inline SuperVector<16> SuperVector<16>::vshr_8  (uint8_t const UNUSED N) const
  {
      //if (N == 0) return *this;
      //if (N == 16) return Zeroes();
@@ -456,7 +456,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_8  (uint8_t const N) const
  }
  
  template <>
-really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const
+really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const UNUSED N) const
  {
      //if (N == 0) return *this;
      //if (N == 16) return Zeroes();
@@ -468,7 +468,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const
  }
  
  template <>
-really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const
+really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const UNUSED N) const
  {
      //if (N == 0) return *this;
      //if (N == 16) return Zeroes();
@@ -480,7 +480,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const
  }
  
  template <>
-really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const
+really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const UNUSED N) const
  {
      //if (N == 0) return *this;
      //if (N == 16) return Zeroes();
@@ -492,7 +492,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const
  }
  
  template <>
-really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const
+really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const UNUSED N) const
  {
      //if (N == 0) return *this;
      //if (N == 16) return Zeroes();
@@ -594,12 +594,6 @@ really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint
      return mask & v;
  }
  
-template<>
-really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b)
-{
-    return (m128) vec_permxor((int8x16_t)vec_splat_s8(0), (int8x16_t)u.v128[0], (int8x16_t) b.u.v128[0]);
-}
-
  template<>
  really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, int8_t offset)
  {   
@@ -626,6 +620,24 @@ really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, in
      return *this;
  }
  
+template<>
+template<>
+really_inline SuperVector<16> SuperVector<16>::pshufb<false>(SuperVector<16> b)
+{
+    return (m128) vec_permxor((int8x16_t)vec_splat_s8(0), (int8x16_t)u.v128[0], (int8x16_t) b.u.v128[0]);
+}
+
+template<>
+template<>
+really_inline SuperVector<16> SuperVector<16>::pshufb<true>(SuperVector<16> b)
+{
+    /* On Intel, if bit 0x80 is set, then result is zero, otherwise which the lane it is &0xf.
+       In NEON, if >=16, then the result is zero, otherwise it is that lane.
+       btranslated is the version that is converted from Intel to NEON.  */
+    SuperVector<16> btranslated = b & SuperVector<16>::dup_s8(0x8f);
+    return pshufb<false>(btranslated);
+}
+
  
  template<>
  really_inline SuperVector<16> SuperVector<16>::pshufb_maskz(SuperVector<16> b, uint8_t const len)
@@ -635,6 +647,8 @@ really_inline SuperVector<16> SuperVector<16>::pshufb_maskz(SuperVector<16> b, u
  }
  
  
+
+/*
  template<>
  really_inline SuperVector<16> SuperVector<16>::lshift64(uint8_t const N)
  {
@@ -661,4 +675,5 @@ really_inline SuperVector<16> SuperVector<16>::rshift128(uint8_t const N)
  {
      return *this >> N;
  }
+*/
  #endif
author	apostolos <apostolos.tapsas@vectorcamp.gr>
	Thu, 14 Oct 2021 12:56:13 +0000 (15:56 +0300)
committer	apostolos <apostolos.tapsas@vectorcamp.gr>
	Thu, 14 Oct 2021 12:56:13 +0000 (15:56 +0300)
src/nfa/shufti_simd.hpp		patch \| blob \| blame \| history
src/nfa/truffle_simd.hpp		patch \| blob \| blame \| history
src/util/supervector/arch/ppc64el/impl.cpp		patch \| blob \| blame \| history