}
static
-void doAccelCommon(NGHolder &g,
- ue2::unordered_map<NFAVertex, AccelScheme> &accel_map,
- const ue2::unordered_map<NFAVertex, u32> &state_ids,
- const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
- const u32 num_states, limex_accel_info *accel,
- const CompileContext &cc) {
+bool is_too_wide(const AccelScheme &as) {
+ return as.cr.count() > MAX_MERGED_ACCEL_STOPS;
+}
+
+static
+void fillAccelInfo(build_info &bi) {
+ if (!bi.do_accel) {
+ return;
+ }
+
+ NGHolder &g = bi.h;
+ limex_accel_info &accel = bi.accel;
+ unordered_map<NFAVertex, AccelScheme> &accel_map = accel.accel_map;
+ const map<NFAVertex, BoundedRepeatSummary> &br_cyclic = bi.br_cyclic;
+ const CompileContext &cc = bi.cc;
+ const unordered_map<NFAVertex, u32> &state_ids = bi.state_ids;
+ const u32 num_states = bi.num_states;
+
+ nfaFindAccelSchemes(g, br_cyclic, &accel_map);
+ filterAccelStates(g, bi.tops, &accel_map);
+
+ assert(accel_map.size() <= NFA_MAX_ACCEL_STATES);
+
vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
vector<NFAVertex> astates;
}
}
- if (containsBadSubset(*accel, state_set, effective_sds)) {
+ if (containsBadSubset(accel, state_set, effective_sds)) {
DEBUG_PRINTF("accel %u has bad subset\n", i);
continue; /* if a subset failed to build we would too */
}
const bool allow_wide = allow_wide_accel(states, g, sds_or_proxy);
AccelScheme as = nfaFindAccel(g, states, refined_cr, br_cyclic,
- allow_wide);
- if (as.cr.count() > MAX_MERGED_ACCEL_STOPS) {
+ allow_wide, true);
+ if (is_too_wide(as)) {
DEBUG_PRINTF("accel %u too wide (%zu, %d)\n", i,
as.cr.count(), MAX_MERGED_ACCEL_STOPS);
continue;
}
- DEBUG_PRINTF("accel %u ok with offset %u\n", i, as.offset);
+ DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset,
+ as.double_offset);
// try multibyte acceleration first
MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc);
- precalcAccel &pa = accel->precalc[state_set];
+ precalcAccel &pa = accel.precalc[state_set];
useful |= state_set;
// if we successfully built a multibyte accel scheme, use that
pa.single_offset = as.offset;
pa.single_cr = as.cr;
-
- if (states.size() == 1) {
- DoubleAccelInfo b = findBestDoubleAccelInfo(g, states.front());
- if (pa.single_cr.count() > b.stop1.count()) {
- /* insert this information into the precalc accel info as it is
- * better than the single scheme */
- pa.double_offset = b.offset;
- pa.double_lits = b.stop2;
- pa.double_cr = b.stop1;
- }
- }
+ if (as.double_byte.size() != 0) {
+ pa.double_offset = as.double_offset;
+ pa.double_lits = as.double_byte;
+ pa.double_cr = as.double_cr;
+ };
}
for (const auto &m : accel_map) {
state_set.set(state_id);
bool is_multi = false;
- auto p_it = accel->precalc.find(state_set);
- if (p_it != accel->precalc.end()) {
+ auto p_it = accel.precalc.find(state_set);
+ if (p_it != accel.precalc.end()) {
const precalcAccel &pa = p_it->second;
offset = max(pa.double_offset, pa.single_offset);
is_multi = pa.ma_info.type != MultibyteAccelInfo::MAT_NONE;
assert(offset <= MAX_ACCEL_DEPTH);
}
- accel->accelerable.insert(v);
- if (!is_multi)
- findAccelFriends(g, v, br_cyclic, offset, &accel->friends[v]);
- }
-}
-
-static
-void fillAccelInfo(build_info &bi) {
- if (!bi.do_accel) {
- return;
+ accel.accelerable.insert(v);
+ if (!is_multi) {
+ findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]);
+ }
}
-
- nfaFindAccelSchemes(bi.h, bi.br_cyclic, &bi.accel.accel_map);
- filterAccelStates(bi.h, bi.tops, &bi.accel.accel_map);
- assert(bi.accel.accel_map.size() <= NFA_MAX_ACCEL_STATES);
- doAccelCommon(bi.h, bi.accel.accel_map, bi.state_ids, bi.br_cyclic,
- bi.num_states, &bi.accel, bi.cc);
}
/** The AccelAux structure has large alignment specified, and this makes some
}
}
-static
-void buildTwoByteStops(flat_set<pair<u8, u8>> &twobyte, const CharReach &cr1,
- const CharReach &cr2) {
- for (size_t c1 = cr1.find_first(); c1 != cr1.npos; c1 = cr1.find_next(c1)) {
- for (size_t c2 = cr2.find_first(); c2 != cr2.npos;
- c2 = cr2.find_next(c2)) {
- twobyte.emplace((u8)c1, (u8)c2);
- }
- }
-}
-
-static
-void findStopLiteralsAtVertex(NFAVertex v, const NGHolder &g,
- DoubleAccelInfo &build) {
- DEBUG_PRINTF("state %u\n", g[v].index);
-
- // double-byte accel is possible: calculate all single- and double-byte
- // accel literals.
- const CharReach &cr1 = g[v].char_reach;
-
- if (edge(v, g.accept, g).second) {
- // If this first byte is an accept state, it must contribute a
- // single-byte escape. We can still go on and calculate additional
- // double-byte ones, though.
- /* TODO: fix for rose */
- build.stop1 |= cr1;
- }
-
- flat_set<pair<u8, u8>> twobyte; // for just this starting state
- bool single = false;
-
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w == g.accept || w == g.acceptEod) {
- continue;
- }
- const CharReach &cr2 = g[w].char_reach;
- size_t count = cr1.count() * cr2.count() + build.stop2.size();
- if (count > 0 && count <= 8) { // can't do more than 8 two-byte
- buildTwoByteStops(twobyte, cr1, cr2);
- } else {
- // two many two-byte literals, add the first byte as single
- single = true;
- break;
- }
- }
-
- if (single || twobyte.empty()) {
- assert(!cr1.none());
- build.stop1 |= cr1;
- } else {
- assert(!twobyte.empty());
- build.stop2.insert(twobyte.begin(), twobyte.end());
- }
-}
-
-static
-bool is_bit5_insensitive(const flat_set<pair<u8, u8>> &stop) {
- if (stop.size() != 4) {
- return false;
- }
-
- const u8 a = stop.begin()->first & CASE_CLEAR;
- const u8 b = stop.begin()->second & CASE_CLEAR;
-
- for (flat_set<pair<u8, u8>>::const_iterator it = stop.begin();
- it != stop.end(); ++it) {
- if ((it->first & CASE_CLEAR) != a || (it->second & CASE_CLEAR) != b) {
- return false;
- }
- }
-
- return true;
-}
-
-static
-bool is_dverm(const DoubleAccelInfo &a) {
- if (a.stop1.any()) {
- return false;
- }
-
- if (a.stop2.size() == 1) {
- return true;
- }
-
- return is_bit5_insensitive(a.stop2);
-}
-
-static
-bool is_double_better(const DoubleAccelInfo &a, const DoubleAccelInfo &b) {
- /* Note: this is not an operator< */
-
- if (a.stop2.empty()) {
- return false;
- }
-
- if (b.stop2.empty()) {
- return true;
- }
-
- if (a.stop1.count() > b.stop1.count()) {
- return false;
- }
-
- if (a.stop1.count() < b.stop1.count()) {
- return true;
- }
-
- bool a_dvm = is_dverm(a);
- bool b_dvm = is_dverm(b);
-
- if (b_dvm && !a_dvm) {
- return false;
- }
-
- if (!b_dvm && a_dvm) {
- return true;
- }
-
- if (a.stop2.size() > b.stop2.size()) {
- return false;
- }
-
- if (a.stop2.size() < b.stop2.size()) {
- return true;
- }
-
- return a.offset < b.offset;
-}
-
-/** \brief Find the escape literals for a two byte accel at the given accel
- * offset */
-static
-void findDoubleAccel(const NGHolder &g, NFAVertex v, u32 accel_offset,
- DoubleAccelInfo &build) {
- DEBUG_PRINTF("find double accel +%u for vertex %u\n", accel_offset,
- g[v].index);
- build.offset = accel_offset;
-
- // Our accel state contributes single-byte escapes
- build.stop1 |= ~g[v].char_reach;
-
- flat_set<NFAVertex> searchStates; // states that contribute stop literals
- searchStates.insert(v); /* TODO: verify */
-
- /* Note: We cannot search past an accepting state */
- /* TODO: remove restriction for non-callback generating */
- flat_set<NFAVertex> nextStates;
-
- insert(&nextStates, adjacent_vertices(v, g));
- nextStates.erase(v);
- nextStates.erase(g.accept);
- nextStates.erase(g.acceptEod);
-
- searchStates.swap(nextStates);
- nextStates.clear();
-
- // subsequent iterations are simpler, just follow all edges
- for (u32 j = 1; j <= accel_offset; j++) {
- for (auto u : searchStates) {
- insert(&nextStates, adjacent_vertices(u, g));
- if (edge(u, g.accept, g).second) {
- nextStates.clear();
- break;
- }
- nextStates.erase(g.accept);
- nextStates.erase(g.acceptEod);
- }
-
- searchStates.swap(nextStates);
- nextStates.clear();
- }
-
- vector<NFAVertex> sorted;
- insert(&sorted, sorted.end(), searchStates);
- sort(sorted.begin(), sorted.end(), make_index_ordering(g));
- for (auto sv : sorted) {
- findStopLiteralsAtVertex(sv, g, build);
- }
-}
-
-DoubleAccelInfo findBestDoubleAccelInfo(const NGHolder &g, NFAVertex v) {
- DoubleAccelInfo rv;
- for (u32 offset = 0; offset <= MAX_ACCEL_DEPTH; offset++) {
- DoubleAccelInfo b_temp;
- findDoubleAccel(g, v, offset, b_temp);
- if (is_double_better(b_temp, rv)) {
- rv = b_temp;
- }
- }
-
- return rv;
-}
-
static
void findPaths(const NGHolder &g, NFAVertex v,
const vector<CharReach> &refined_cr,
}
static
-AccelScheme merge(const AccelScheme &a, const AccelScheme &b) {
- return AccelScheme(a.cr | b.cr, MAX(a.offset, b.offset));
+AccelScheme merge(AccelScheme a, const AccelScheme &b) {
+ a.cr |= b.cr;
+ ENSURE_AT_LEAST(&a.offset, b.offset);
+ a.double_cr |= b.double_cr;
+ insert(&a.double_byte, b.double_byte);
+ ENSURE_AT_LEAST(&a.double_offset, b.double_offset);
+ return a;
}
static
}
}
-#ifdef DEBUG
+static
+AccelScheme make_double_accel(AccelScheme as, CharReach cr_1,
+ const CharReach &cr_2_in, u32 offset_in) {
+ cr_1 &= ~as.double_cr;
+ CharReach cr_2 = cr_2_in & ~as.double_cr;
+ u32 offset = offset_in;
+
+ if (cr_1.none()) {
+ DEBUG_PRINTF("empty first element\n");
+ as.double_offset = offset;
+ return as;
+ }
+
+ if (cr_2_in != cr_2 || cr_2.none()) {
+ offset = offset_in + 1;
+ }
+
+ size_t two_count = cr_1.count() * cr_2.count();
+
+ DEBUG_PRINTF("will generate raw %zu pairs\n", two_count);
+
+ if (!two_count) {
+ DEBUG_PRINTF("empty element\n");
+ as.double_offset = offset;
+ return as;
+ }
+
+ if (two_count > 8) {
+ if (cr_2.count() < cr_1.count()) {
+ as.double_cr |= cr_2;
+ offset = offset_in + 1;
+ } else {
+ as.double_cr |= cr_1;
+ }
+ } else {
+ for (auto i = cr_1.find_first(); i != CharReach::npos;
+ i = cr_1.find_next(i)) {
+ for (auto j = cr_2.find_first(); j != CharReach::npos;
+ j = cr_2.find_next(j)) {
+ as.double_byte.insert(make_pair(i, j));
+ }
+ }
+ }
+
+ as.double_offset = offset;
+ DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n",
+ as.double_byte.size(), as.double_cr.count(), as.offset);
+ return as;
+}
+static
+void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
+ vector<vector<CharReach> >::const_iterator pe,
+ const AccelScheme &curr, AccelScheme *best) {
+ assert(curr.offset <= MAX_ACCEL_DEPTH);
+ DEBUG_PRINTF("paths left %zu\n", pe - pb);
+ if (pb == pe) {
+ *best = curr;
+ return;
+ }
+
+ DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
+
+ vector<AccelScheme> priority_path;
+ u32 i = 0;
+ for (vector<CharReach>::const_iterator p = pb->begin();
+ p != pb->end() && next(p) != pb->end();
+ ++p, i++) {
+ priority_path.push_back(make_double_accel(curr, *p, *next(p), i));
+ }
+
+ sort(priority_path.begin(), priority_path.end());
+
+ DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n",
+ best->double_byte.size(), best->double_cr.count(),
+ best->offset);
+
+ for (vector<AccelScheme>::const_iterator it = priority_path.begin();
+ it != priority_path.end(); ++it) {
+
+ AccelScheme in = merge(curr, *it);
+ DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n",
+ in.double_byte.size(), in.double_cr.count(), in.offset);
+
+ if (in > *best) {
+ DEBUG_PRINTF("worse\n");
+ continue;
+ }
+ AccelScheme temp = *best;
+ findDoubleBest(pb + 1, pe, in, &temp);
+ if (temp < *best) {
+ *best = temp;
+ DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n",
+ best->double_byte.size(), best->double_cr.count(),
+ best->offset);
+ }
+ }
+}
+
+#ifdef DEBUG
static
void dumpPaths(const vector<vector<CharReach> > &paths) {
for (vector<vector<CharReach> >::const_iterator p = paths.begin();
#endif
}
+#define MAX_DOUBLE_ACCEL_PATHS 10
+
+static
+AccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths,
+ const CharReach &terminating) {
+ DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n",
+ terminating.count());
+ unifyPathsLastSegment(paths);
+ AccelScheme curr;
+ curr.double_cr = terminating;
+ curr.offset = 0;
+ /* if there are too many paths, shorten the paths to reduce the number of
+ * distinct paths we have to consider */
+ while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) {
+ for (auto &p : paths) {
+ if (p.empty()) {
+ return curr;
+ }
+ p.pop_back();
+ }
+ unifyPathsLastSegment(paths);
+ }
+
+ if (paths.empty()) {
+ return curr;
+ }
+
+ AccelScheme best;
+ best.double_cr = CharReach::dot();
+ findDoubleBest(paths.begin(), paths.end(), curr, &best);
+ curr = best;
+ DEBUG_PRINTF("da %zu pairs, %zu singles\n", curr.double_byte.size(),
+ curr.double_cr.count());
+ return curr;
+}
+
AccelScheme findBestAccelScheme(vector<vector<CharReach> > paths,
- const CharReach &terminating) {
+ const CharReach &terminating,
+ bool look_for_double_byte) {
+ AccelScheme da;
+
+ if (look_for_double_byte) {
+ da = findBestDoubleAccelScheme(paths, terminating);
+ }
+
improvePaths(paths);
DEBUG_PRINTF("we have %zu paths\n", paths.size());
if (paths.size() > 40) {
- return AccelScheme(); /* too many paths to explore */
+ return da; /* too many paths to explore */
}
/* if we were smart we would do something netflowy on the paths to find the
assert(offset <= best.offset);
best.offset = offset;
+ /* merge best single and best double */
+ if (!da.double_byte.empty() && da.double_byte.size() <= 8
+ && da.double_cr.count() < best.cr.count()) {
+ best.double_byte = da.double_byte;
+ best.double_cr = da.double_cr;
+ best.double_offset = da.double_offset;
+ }
+
return best;
}
AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
const vector<CharReach> &refined_cr,
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
- bool allow_wide) {
+ bool allow_wide, bool look_for_double_byte) {
CharReach terminating;
for (auto v : verts) {
if (!hasSelfLoop(v, g)) {
reverse(it->begin(), it->end());
}
- return findBestAccelScheme(std::move(paths), terminating);
+ return findBestAccelScheme(std::move(paths), terminating,
+ look_for_double_byte);
}
NFAVertex get_sds_or_proxy(const NGHolder &g) {
}
}
- // Look for one byte accel schemes verm/shufti;
+ // Look for offset accel schemes verm/shufti;
vector<NFAVertex> verts(1, v);
- *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide);
+ *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true);
DEBUG_PRINTF("as width %zu\n", as->cr.count());
return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide;
}
u32 offset,
ue2::flat_set<NFAVertex> *friends);
-struct DoubleAccelInfo {
- DoubleAccelInfo() : offset(0) {}
- u32 offset; //!< offset correction to apply
- CharReach stop1; //!< single-byte accel stop literals
- flat_set<std::pair<u8, u8>> stop2; //!< double-byte accel stop literals
-};
-
-DoubleAccelInfo findBestDoubleAccelInfo(const NGHolder &g, NFAVertex v);
-
struct AccelScheme {
AccelScheme(const CharReach &cr_in, u32 offset_in)
: cr(cr_in), offset(offset_in) {
// Don't use ORDER_CHECK as it will (stupidly) eval count() too many
// times.
+ size_t a_dcount = double_cr.count();
+ size_t b_dcount = b.double_cr.count();
+
+ bool feasible_double_a
+ = !a.double_byte.empty() && a.double_byte.size() <= 8;
+ bool feasible_double_b
+ = !b.double_byte.empty() && b.double_byte.size() <= 8;
+
+ if (feasible_double_a != feasible_double_b) {
+ return feasible_double_a > feasible_double_b;
+ }
+
+ if (feasible_double_a) {
+ if (a_dcount != b_dcount) {
+ return a_dcount < b_dcount;
+ }
+
+ if ((a.double_byte.size() == 1) != (b.double_byte.size() == 1)) {
+ return a.double_byte.size() < b.double_byte.size();
+ }
+
+ bool cd_a = isCaselessDouble(a.double_byte);
+ bool cd_b = isCaselessDouble(b.double_byte);
+ if (cd_a != cd_b) {
+ return cd_a > cd_b;
+ }
+ ORDER_CHECK(double_byte.size());
+ ORDER_CHECK(double_offset);
+ }
+
const size_t a_count = cr.count(), b_count = b.cr.count();
if (a_count != b_count) {
return a_count < b_count;
/* TODO: give bonus if one is a 'caseless' character */
ORDER_CHECK(offset);
ORDER_CHECK(cr);
+ ORDER_CHECK(double_byte);
+ ORDER_CHECK(double_cr);
+ ORDER_CHECK(double_offset);
return false;
}
return b < *this;
}
+ ue2::flat_set<std::pair<u8, u8> > double_byte;
CharReach cr;
+ CharReach double_cr;
u32 offset;
+ u32 double_offset = 0;
};
NFAVertex get_sds_or_proxy(const NGHolder &g);
AccelScheme nfaFindAccel(const NGHolder &g, const std::vector<NFAVertex> &verts,
const std::vector<CharReach> &refined_cr,
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
- bool allow_wide);
+ bool allow_wide, bool look_for_double_byte = false);
AccelScheme findBestAccelScheme(std::vector<std::vector<CharReach> > paths,
- const CharReach &terminating);
+ const CharReach &terminating,
+ bool look_for_double_byte = false);
-/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
+/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). If a
+ * single byte accel scheme is found it is placed into *as
+ */
bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
const std::vector<CharReach> &refined_cr,
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,