#include "shufticompile.h"
#include "ue2common.h"
#include "util/charreach.h"
+#include "util/container.h"
#include "util/ue2_containers.h"
#include <array>
return bit_index;
}
-void shuftiBuildDoubleMasks(const CharReach &onechar,
+static
+array<u16, 4> or_array(array<u16, 4> a, const array<u16, 4> &b) {
+ a[0] |= b[0];
+ a[1] |= b[1];
+ a[2] |= b[2];
+ a[3] |= b[3];
+
+ return a;
+}
+
+
+#define MAX_BUCKETS 8
+static
+void set_buckets_from_mask(u16 nibble_mask, u32 bucket,
+ array<u8, 16> &byte_mask) {
+ assert(bucket < MAX_BUCKETS);
+
+ u32 mask = nibble_mask;
+ while (mask) {
+ u32 n = findAndClearLSB_32(&mask);
+ byte_mask[n] &= ~(1 << bucket);
+ }
+}
+
+bool shuftiBuildDoubleMasks(const CharReach &onechar,
const flat_set<pair<u8, u8>> &twochar,
m128 *lo1, m128 *hi1, m128 *lo2, m128 *hi2) {
DEBUG_PRINTF("unibytes %zu dibytes %zu\n", onechar.size(),
twochar.size());
- assert(onechar.count() + twochar.size() <= 8);
-
array<u8, 16> lo1_a;
array<u8, 16> lo2_a;
array<u8, 16> hi1_a;
hi1_a.fill(0xff);
hi2_a.fill(0xff);
- u32 i = 0;
-
// two-byte literals
- for (flat_set<pair<u8, u8>>::const_iterator it = twochar.begin();
- it != twochar.end(); ++it, i++) {
- DEBUG_PRINTF("%u: %02hhx %02hhx\n", i, it->first, it->second);
- u8 b1 = it->first & 0xf;
- u8 t1 = it->first >> 4;
- u8 b2 = it->second & 0xf;
- u8 t2 = it->second >> 4;
-
- lo1_a[b1] &= ~(1 << i);
- hi1_a[t1] &= ~(1 << i);
- lo2_a[b2] &= ~(1 << i);
- hi2_a[t2] &= ~(1 << i);
+ vector<array<u16, 4>> nibble_masks;
+ for (const auto &p : twochar) {
+ DEBUG_PRINTF("%02hhx %02hhx\n", p.first, p.second);
+ u16 a_lo = 1U << (p.first & 0xf);
+ u16 a_hi = 1U << (p.first >> 4);
+ u16 b_lo = 1U << (p.second & 0xf);
+ u16 b_hi = 1U << (p.second >> 4);
+ nibble_masks.push_back({a_lo, a_hi, b_lo, b_hi});
}
// one-byte literals (second byte is a wildcard)
for (size_t it = onechar.find_first(); it != CharReach::npos;
- it = onechar.find_next(it), i++) {
- DEBUG_PRINTF("%u: %02hhx\n", i, (u8)it);
- u8 b1 = it & 0xf;
- u8 t1 = it >> 4;
-
- lo1_a[b1] &= ~(1 << i);
- hi1_a[t1] &= ~(1 << i);
+ it = onechar.find_next(it)) {
+ DEBUG_PRINTF("%02hhx\n", (u8)it);
+ nibble_masks.push_back({(u16)(1U << (it & 0xf)), (u16)(1U << (it >> 4)),
+ 0xffff, 0xffff});
+ }
- for (int j = 0; j < 16; j++) {
- lo2_a[j] &= ~(1 << i);
- hi2_a[j] &= ~(1 << i);
+ // try to merge strings into shared buckets
+ for (u32 i = 0; i < 4; i++) {
+ map<array<u16, 4>, array<u16, 4>> new_masks;
+ for (const auto &a : nibble_masks) {
+ auto key = a;
+ key[i] = 0;
+ if (!contains(new_masks, key)) {
+ new_masks[key] = a;
+ } else {
+ new_masks[key] = or_array(new_masks[key], a);
+ }
}
+ nibble_masks.clear();
+ for (const auto &e : new_masks) {
+ nibble_masks.push_back(e.second);
+ }
+ }
+
+ if (nibble_masks.size() > MAX_BUCKETS) {
+ DEBUG_PRINTF("too many buckets needed (%zu)\n", nibble_masks.size());
+ return false;
+ }
+
+ u32 i = 0;
+ for (const auto &a : nibble_masks) {
+ set_buckets_from_mask(a[0], i, lo1_a);
+ set_buckets_from_mask(a[1], i, hi1_a);
+ set_buckets_from_mask(a[2], i, lo2_a);
+ set_buckets_from_mask(a[3], i, hi2_a);
+ i++;
}
memcpy(lo1, lo1_a.data(), sizeof(m128));
memcpy(lo2, lo2_a.data(), sizeof(m128));
memcpy(hi1, hi1_a.data(), sizeof(m128));
memcpy(hi2, hi2_a.data(), sizeof(m128));
+
+ return true;
}
#ifdef DUMP_SUPPORT
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
lits.insert(make_pair('a', 'B'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, &lo2m, &hi2m);
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m,
+ &lo2m, &hi2m);
+ ASSERT_TRUE(rv);
u8 *lo1 = (u8 *)&lo1m;
u8 *lo2 = (u8 *)&lo2m;
lits.insert(make_pair('a','z'));
lits.insert(make_pair('B','z'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, &lo2m, &hi2m);
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m,
+ &lo2m, &hi2m);
+ ASSERT_TRUE(rv);
u8 *lo1 = (u8 *)&lo1m;
u8 *lo2 = (u8 *)&lo2m;
lits.insert(make_pair('A','z'));
lits.insert(make_pair('b','z'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, &lo2m, &hi2m);
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m,
+ &lo2m, &hi2m);
+ ASSERT_TRUE(rv);
u8 *lo1 = (u8 *)&lo1m;
u8 *lo2 = (u8 *)&lo2m;
CharReach bytes;
bytes.set('X');
- shuftiBuildDoubleMasks(bytes, lits, &lo1m, &hi1m, &lo2m, &hi2m);
+ bool rv = shuftiBuildDoubleMasks(bytes, lits, &lo1m, &hi1m,
+ &lo2m, &hi2m);
+ ASSERT_TRUE(rv);
u8 *lo1 = (u8 *)&lo1m;
u8 *lo2 = (u8 *)&lo2m;
lo1['B' % 16] | hi1['B' >> 4] | lo2['X' % 16] | hi2['X' >> 4]);
}
+TEST(DoubleShufti, BuildMask6) {
+ m128 lo1m, hi1m, lo2m, hi2m;
+
+ flat_set<pair<u8, u8>> lits;
+
+ lits.insert(make_pair('a','z'));
+ lits.insert(make_pair('B','z'));
+ lits.insert(make_pair('A','z'));
+ lits.insert(make_pair('b','z'));
+ lits.insert(make_pair('a','y'));
+ lits.insert(make_pair('B','y'));
+ lits.insert(make_pair('A','y'));
+ lits.insert(make_pair('b','y'));
+ lits.insert(make_pair('a','x'));
+ lits.insert(make_pair('B','x'));
+ lits.insert(make_pair('A','x'));
+ lits.insert(make_pair('b','x'));
+
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m,
+ &lo2m, &hi2m);
+ ASSERT_TRUE(rv);
+
+ u8 *lo1 = (u8 *)&lo1m;
+ u8 *lo2 = (u8 *)&lo2m;
+ u8 *hi1 = (u8 *)&hi1m;
+ u8 *hi2 = (u8 *)&hi2m;
+ ASSERT_NE(0xff,
+ lo1['a' % 16] | hi1['a' >> 4] | lo2['z' % 16] | hi2['z' >> 4]);
+ ASSERT_NE(0xff,
+ lo1['A' % 16] | hi1['A' >> 4] | lo2['z' % 16] | hi2['z' >> 4]);
+ ASSERT_NE(0xff,
+ lo1['b' % 16] | hi1['b' >> 4] | lo2['z' % 16] | hi2['z' >> 4]);
+ ASSERT_NE(0xff,
+ lo1['B' % 16] | hi1['B' >> 4] | lo2['z' % 16] | hi2['z' >> 4]);
+ ASSERT_NE(0xff,
+ lo1['a' % 16] | hi1['a' >> 4] | lo2['y' % 16] | hi2['y' >> 4]);
+ ASSERT_NE(0xff,
+ lo1['A' % 16] | hi1['A' >> 4] | lo2['y' % 16] | hi2['y' >> 4]);
+ ASSERT_NE(0xff,
+ lo1['b' % 16] | hi1['b' >> 4] | lo2['y' % 16] | hi2['y' >> 4]);
+ ASSERT_NE(0xff,
+ lo1['B' % 16] | hi1['B' >> 4] | lo2['y' % 16] | hi2['y' >> 4]);
+ ASSERT_NE(0xff,
+ lo1['a' % 16] | hi1['a' >> 4] | lo2['x' % 16] | hi2['x' >> 4]);
+ ASSERT_NE(0xff,
+ lo1['A' % 16] | hi1['A' >> 4] | lo2['x' % 16] | hi2['x' >> 4]);
+ ASSERT_NE(0xff,
+ lo1['b' % 16] | hi1['b' >> 4] | lo2['x' % 16] | hi2['x' >> 4]);
+ ASSERT_NE(0xff,
+ lo1['B' % 16] | hi1['B' >> 4] | lo2['x' % 16] | hi2['x' >> 4]);
+}
+
+TEST(DoubleShufti, BuildMask7) {
+ m128 lo1m, hi1m, lo2m, hi2m;
+
+ flat_set<pair<u8, u8>> lits;
+
+ lits.insert(make_pair('a','b'));
+ lits.insert(make_pair('c','d'));
+ lits.insert(make_pair('e','f'));
+ lits.insert(make_pair('g','h'));
+ lits.insert(make_pair('i','j'));
+ lits.insert(make_pair('k','l'));
+ lits.insert(make_pair('m','n'));
+ lits.insert(make_pair('o','p'));
+ lits.insert(make_pair('q','r'));
+ lits.insert(make_pair('s','t'));
+ lits.insert(make_pair('u','v'));
+ lits.insert(make_pair('w','x'));
+
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m,
+ &lo2m, &hi2m);
+ ASSERT_FALSE(rv);
+}
+
TEST(DoubleShufti, ExecNoMatch1) {
m128 lo1, hi1, lo2, hi2;
lits.insert(make_pair('a','b'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1,
+ &lo2, &hi2);
+ ASSERT_TRUE(rv);
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
lits.insert(make_pair('b','a'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ ASSERT_TRUE(rv);
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
lits.insert(make_pair('a','b'));
lits.insert(make_pair('B','b'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ ASSERT_TRUE(rv);
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
lits.insert(make_pair('b','a'));
lits.insert(make_pair('b','B'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ ASSERT_TRUE(rv);
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
lits.insert(make_pair('V','e'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ ASSERT_TRUE(rv);
char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee";
lits.insert(make_pair('e','V'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ ASSERT_TRUE(rv);
char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee";
lits.insert(make_pair('a','b'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ ASSERT_TRUE(rv);
/* 0123456789012345678901234567890 */
char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbb";
lits.insert(make_pair('a','a'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ ASSERT_TRUE(rv);
/* 0123456789012345678901234567890 */
char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb";
lits.insert(make_pair('B','a'));
lits.insert(make_pair('a','a'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ ASSERT_TRUE(rv);
/* 0123456789012345678901234567890 */
char t1[] = "bbbbbbbbbbbbbbbbbBaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb";
lits.insert(make_pair('C','a'));
lits.insert(make_pair('c','a'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
-
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ ASSERT_TRUE(rv);
/* 0123456789012345678901234567890 */
char t1[] = "bbbbbbbbbbbbbbbbbAaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb";
lits.insert(make_pair('a','C'));
lits.insert(make_pair('a','c'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
-
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ ASSERT_TRUE(rv);
/* 0123456789012345678901234567890 */
char t1[] = "bbbbbbbbbbbbbbbbbaAaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb";
lits.insert(make_pair('a','A'));
- shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2);
+ ASSERT_TRUE(rv);
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
// just one one-byte literal
onebyte.set('a');
- shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2);
+ bool rv = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2);
+ ASSERT_TRUE(rv);
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
onebyte.set('a');
twobyte.insert(make_pair('x', 'y'));
- shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2);
+ bool rv = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2);
+ ASSERT_TRUE(rv);
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
char t2[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
onebyte.set('a');
twobyte.insert(make_pair('x', 'y'));
- shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2);
+ bool rv = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2);
+ ASSERT_TRUE(rv);
const int len = 420;
char t1[len + 1];