Shuffle simd and SuperVector implementetions as well as their test realy fixed

author Apostolos Tapsas <apostolos@vm01.debian11>

Sun, 24 Oct 2021 16:52:12 +0000 (16:52 +0000)

committer apostolos <apostolos.tapsas@vectorcamp.gr>

Mon, 25 Oct 2021 06:19:30 +0000 (09:19 +0300)
author Apostolos Tapsas <apostolos@vm01.debian11>
Sun, 24 Oct 2021 16:52:12 +0000 (16:52 +0000)
committer apostolos <apostolos.tapsas@vectorcamp.gr>
Mon, 25 Oct 2021 06:19:30 +0000 (09:19 +0300)
diff --git a/src/nfa/limex_shuffle.h b/src/nfa/limex_shuffle.h

index a1728e6a8ef93b96873f349608187e4f5b3c9b94..365d47296e76a1de2eefe20a21e1f06d05faf6d9 100644 (file)
--- a/src/nfa/limex_shuffle.h
+++ b/src/nfa/limex_shuffle.h
@@ -45,7 +45,6 @@
  static really_inline
  u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
      m128 shuffled = pshufb_m128(s, permute);
-    print_m128_16x8("shufled", shuffled);
      m128 compared = and128(shuffled, compare);
      u16 rv = ~movemask128(eq128(compared, shuffled));
      return (u32)rv;
diff --git a/src/util/arch/ppc64el/simd_utils.h b/src/util/arch/ppc64el/simd_utils.h

index 9e8c59bf6ce1c8704aec0688de55fcb6c6c32f2c..107ca110627a40f8a53c0dd966bd0337794a6061 100644 (file)
--- a/src/util/arch/ppc64el/simd_utils.h
+++ b/src/util/arch/ppc64el/simd_utils.h
@@ -462,7 +462,9 @@ char testbit128(m128 val, unsigned int n) {
  
  static really_inline
  m128 pshufb_m128(m128 a, m128 b) {
-    return (m128) vec_perm((uint8x16_t)a, (uint8x16_t)a, (uint8x16_t)b);
+    uint8x16_t mask =(uint8x16_t)vec_cmpge((uint8x16_t)b, (uint8x16_t)vec_splats((uint8_t)0x80));
+    uint8x16_t res = vec_perm ((uint8x16_t)a, (uint8x16_t)a, (uint8x16_t)b);
+    return (m128) vec_sel((uint8x16_t)res, (uint8x16_t)zeroes128(), (uint8x16_t)mask);
  }
  
  static really_inline
diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp

index dc318c826dcd1d215fa121d02881e6de5b74b9ba..0af136a55684457193e603f2f0f980e62d12f177 100644 (file)
--- a/src/util/supervector/arch/ppc64el/impl.cpp
+++ b/src/util/supervector/arch/ppc64el/impl.cpp
@@ -603,7 +603,9 @@ template<>
  template<>
  really_inline SuperVector<16> SuperVector<16>::pshufb<false>(SuperVector<16> b)
  {
-    return (m128) vec_perm((uint8x16_t)u.v128[0], (uint8x16_t)u.v128[0], (uint8x16_t)b.u.v128[0]);
+    uint8x16_t mask =(uint8x16_t)vec_cmpge((uint8x16_t)b.u.v128[0], (uint8x16_t)vec_splats((uint8_t)0x80));
+    uint8x16_t res = vec_perm ((uint8x16_t)u.v128[0], (uint8x16_t)u.v128[0], (uint8x16_t)b.u.v128[0]);
+    return (m128) vec_sel((uint8x16_t)res, (uint8x16_t)vec_splat_s8(0), (uint8x16_t)mask);
  }
  
  template<>
diff --git a/unit/internal/shuffle.cpp b/unit/internal/shuffle.cpp

index b7c1b4f5c1c29dfb3fac640647782b81b1c7758b..038c61930a2480a25c850a1eeb158ce1033d4ce2 100644 (file)
--- a/unit/internal/shuffle.cpp
+++ b/unit/internal/shuffle.cpp
@@ -187,7 +187,7 @@ TEST(Shuffle, PackedExtract128_1) {
          // shuffle a single 1 bit to the front
          m128 permute, compare;
          build_pshufb_masks_onebit(i, &permute, &compare);
-           EXPECT_EQ(1U, packedExtract128(setbit<m128>(i), permute, compare));
+       EXPECT_EQ(1U, packedExtract128(setbit<m128>(i), permute, compare));
          EXPECT_EQ(1U, packedExtract128(ones128(), permute, compare));
          // we should get zero out of these cases
          EXPECT_EQ(0U, packedExtract128(zeroes128(), permute, compare));
@@ -199,7 +199,7 @@ TEST(Shuffle, PackedExtract128_1) {
      }
  }
  
-/*
+
  TEST(Shuffle, PackedExtract_templatized_128_1) {
      // Try all possible one-bit masks
      for (unsigned int i = 0; i < 128; i++) {
@@ -218,7 +218,7 @@ TEST(Shuffle, PackedExtract_templatized_128_1) {
          }
      }
  }
-*/
+
  
  
  #if defined(HAVE_AVX2)
diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp

index 2085c9df3bf0d4e3e05acca67bc234a5afda1817..037230d0a32077a9ec168ac482f44d742de39121 100644 (file)
--- a/unit/internal/simd_utils.cpp
+++ b/unit/internal/simd_utils.cpp
@@ -849,15 +849,22 @@ TEST(SimdUtilsTest, pshufb_m128) {
      }
      u8 vec2[16];
      for (int i=0; i<16; i++) {
-        vec2[i]=i + (rand() % 16 + 0);
-    } 
+        vec2[i]=i + (rand() % 15 + 0);
+    }
+
      m128 v1 = loadu128(vec);
      m128 v2 = loadu128(vec2);
-    m128 vres = pshufb_m128(v1, v2);
+    m128 vres = pshufb_m128(v1, v2); 
+    
      u8 res[16];
-    store128(res, vres);
+    storeu128(res, vres);
+
      for (int i=0; i<16; i++) {
-        ASSERT_EQ(vec[vec2[i] % 16 ], res[i]);
+       if(vec2[i] & 0x80){
+          ASSERT_EQ(res[i], 0);
+        }else{    
+           ASSERT_EQ(vec[vec2[i] % 16 ], res[i]);
+       }
      }
  }
  
diff --git a/unit/internal/supervector.cpp b/unit/internal/supervector.cpp

index 4be93aa8ce3a51b09bd9a498357a75e2d2961edf..9c5f8f3ac3355acc9e1128413284e5902c6e8903 100644 (file)
--- a/unit/internal/supervector.cpp
+++ b/unit/internal/supervector.cpp
@@ -286,7 +286,11 @@ TEST(SuperVectorUtilsTest,pshufb128c) {
      auto SP2 = SuperVector<16>::loadu(vec2);
      auto SResult = SP1.template pshufb<true>(SP2);
      for (int i=0; i<16; i++) {
-        ASSERT_EQ(vec[vec2[i] % 16 ],SResult.u.u8[i]);
+       if(vec2[i] & 0x80){
+          ASSERT_EQ(SResult.u.u8[i], 0);
+       }else{
+           ASSERT_EQ(vec[vec2[i] % 16 ],SResult.u.u8[i]);
+       }
      }
  }
author	Apostolos Tapsas <apostolos@vm01.debian11>
	Sun, 24 Oct 2021 16:52:12 +0000 (16:52 +0000)
committer	apostolos <apostolos.tapsas@vectorcamp.gr>
	Mon, 25 Oct 2021 06:19:30 +0000 (09:19 +0300)
src/nfa/limex_shuffle.h		patch \| blob \| blame \| history
src/util/arch/ppc64el/simd_utils.h		patch \| blob \| blame \| history
src/util/supervector/arch/ppc64el/impl.cpp		patch \| blob \| blame \| history
unit/internal/shuffle.cpp		patch \| blob \| blame \| history
unit/internal/simd_utils.cpp		patch \| blob \| blame \| history
unit/internal/supervector.cpp		patch \| blob \| blame \| history