shufti: slightly faster short shufti operation

author Matthew Barr <matthew.barr@intel.com>

Tue, 13 Sep 2016 05:07:31 +0000 (15:07 +1000)

committer Matthew Barr <matthew.barr@intel.com>

Fri, 2 Dec 2016 00:33:51 +0000 (11:33 +1100)
author Matthew Barr <matthew.barr@intel.com>
Tue, 13 Sep 2016 05:07:31 +0000 (15:07 +1000)
committer Matthew Barr <matthew.barr@intel.com>
Fri, 2 Dec 2016 00:33:51 +0000 (11:33 +1100)
diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c

index 2e63be9fdca846c4f1a5ac0b559d088e3197eb48..d68b1b0475b61b50ce0a740a4cbca79db1698a71 100644 (file)
--- a/src/nfa/shufti.c
+++ b/src/nfa/shufti.c
@@ -307,8 +307,7 @@ static really_inline
  const u8 *fwdBlockShort(m256 mask, m128 chars, const u8 *buf,
                          const m256 low4bits) {
      // do the hi and lo shuffles in the one avx register
-    m256 c = set2x128(chars);
-    c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0));
+    m256 c = combine2x128(rshift64_m128(chars, 4), chars);
      c = and256(c, low4bits);
      m256 c_shuf = vpshufb(mask, c);
      m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf));
@@ -439,8 +438,7 @@ static really_inline
  const u8 *revBlockShort(m256 mask, m128 chars, const u8 *buf,
                          const m256 low4bits) {
      // do the hi and lo shuffles in the one avx register
-    m256 c = set2x128(chars);
-    c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0));
+    m256 c = combine2x128(rshift64_m128(chars, 4), chars);
      c = and256(c, low4bits);
      m256 c_shuf = vpshufb(mask, c);
      m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf));
@@ -564,8 +562,7 @@ static really_inline
  const u8 *fwdBlockShort2(m256 mask1, m256 mask2, m128 chars, const u8 *buf,
                           const m256 low4bits) {
      // do the hi and lo shuffles in the one avx register
-    m256 c = set2x128(chars);
-    c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0));
+    m256 c = combine2x128(rshift64_m128(chars, 4), chars);
      c = and256(c, low4bits);
      m256 c_shuf1 = vpshufb(mask1, c);
      m256 c_shuf2 = rshift128_m256(vpshufb(mask2, c), 1);
author	Matthew Barr <matthew.barr@intel.com>
	Tue, 13 Sep 2016 05:07:31 +0000 (15:07 +1000)
committer	Matthew Barr <matthew.barr@intel.com>
	Fri, 2 Dec 2016 00:33:51 +0000 (11:33 +1100)