Fix combine2x128

author Matthew Barr <matthew.barr@intel.com>

Tue, 13 Sep 2016 05:07:16 +0000 (15:07 +1000)

committer Matthew Barr <matthew.barr@intel.com>

Fri, 2 Dec 2016 00:33:48 +0000 (11:33 +1100)
author Matthew Barr <matthew.barr@intel.com>
Tue, 13 Sep 2016 05:07:16 +0000 (15:07 +1000)
committer Matthew Barr <matthew.barr@intel.com>
Fri, 2 Dec 2016 00:33:48 +0000 (11:33 +1100)
diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c

index 578904784997a7ca53e453bc32b85fe604d0ca96..2e63be9fdca846c4f1a5ac0b559d088e3197eb48 100644 (file)
--- a/src/nfa/shufti.c
+++ b/src/nfa/shufti.c
@@ -308,7 +308,7 @@ const u8 *fwdBlockShort(m256 mask, m128 chars, const u8 *buf,
                          const m256 low4bits) {
      // do the hi and lo shuffles in the one avx register
      m256 c = set2x128(chars);
-    c = _mm256_srlv_epi64(c, _mm256_set_epi64x(0, 0, 4, 4));
+    c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0));
      c = and256(c, low4bits);
      m256 c_shuf = vpshufb(mask, c);
      m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf));
@@ -440,7 +440,7 @@ const u8 *revBlockShort(m256 mask, m128 chars, const u8 *buf,
                          const m256 low4bits) {
      // do the hi and lo shuffles in the one avx register
      m256 c = set2x128(chars);
-    c = _mm256_srlv_epi64(c, _mm256_set_epi64x(0, 0, 4, 4));
+    c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0));
      c = and256(c, low4bits);
      m256 c_shuf = vpshufb(mask, c);
      m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf));
@@ -565,7 +565,7 @@ const u8 *fwdBlockShort2(m256 mask1, m256 mask2, m128 chars, const u8 *buf,
                           const m256 low4bits) {
      // do the hi and lo shuffles in the one avx register
      m256 c = set2x128(chars);
-    c = _mm256_srlv_epi64(c, _mm256_set_epi64x(0, 0, 4, 4));
+    c = _mm256_srlv_epi64(c, _mm256_set_epi64x(4, 4, 0, 0));
      c = and256(c, low4bits);
      m256 c_shuf1 = vpshufb(mask1, c);
      m256 c_shuf2 = rshift128_m256(vpshufb(mask2, c), 1);
diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp

index 43df79628242bff72db7c09dd338668e5bbfa521..f074973d75e4a1eb841aa6bae5d3966f9bce978a 100644 (file)
--- a/src/rose/rose_build_bytecode.cpp
+++ b/src/rose/rose_build_bytecode.cpp
@@ -3054,8 +3054,8 @@ bool makeRoleShufti(const vector<LookEntry> &look,
              neg_mask &= 0xffff;
              array<u8, 32> nib_mask;
              array<u8, 16> bucket_select_mask_16;
-            copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin());
-            copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin() + 16);
+            copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin());
+            copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16);
              copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16,
                   bucket_select_mask_16.begin());
              auto ri = make_unique<RoseInstrCheckShufti16x8>
diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h

index afa8c7f8e55b2ac1e94ac5bb0a1bf0aab7faa863..35e1a390539ea10f3c5fb9cecf832787b1b88b2c 100644 (file)
--- a/src/util/simd_utils.h
+++ b/src/util/simd_utils.h
@@ -658,8 +658,8 @@ m128 movdq_lo(m256 x) {
  }
  
  static really_inline
-m256 combine2x128(m128 a, m128 b) {
-    m256 rv = {a, b};
+m256 combine2x128(m128 hi, m128 lo) {
+    m256 rv = {lo, hi};
      return rv;
  }
  
@@ -712,7 +712,7 @@ m256 combine2x128(m128 hi, m128 lo) {
  #if defined(_mm256_set_m128i)
      return _mm256_set_m128i(hi, lo);
  #else
-    return insert128to256(cast128to256(hi), lo, 1);
+    return insert128to256(cast128to256(lo), hi, 1);
  #endif
  }
  #endif //AVX2
author	Matthew Barr <matthew.barr@intel.com>
	Tue, 13 Sep 2016 05:07:16 +0000 (15:07 +1000)
committer	Matthew Barr <matthew.barr@intel.com>
	Fri, 2 Dec 2016 00:33:48 +0000 (11:33 +1100)
src/nfa/shufti.c		patch \| blob \| blame \| history
src/rose/rose_build_bytecode.cpp		patch \| blob \| blame \| history
src/util/simd_utils.h		patch \| blob \| blame \| history