static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32;
static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10;
static const size_t MERGE_GROUP_SIZE_MAX = 200;
+static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000;
/** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */
static const size_t DFA_CHUNK_SIZE_MAX = 200;
}
}
+static
+bool stringsCanFinishAtSameSpot(const ue2_literal &u,
+ ue2_literal::const_iterator v_b,
+ ue2_literal::const_iterator v_e) {
+ ue2_literal::const_iterator u_e = u.end();
+ ue2_literal::const_iterator u_b = u.begin();
+
+ while (u_e != u_b && v_e != v_b) {
+ --u_e;
+ --v_e;
+
+ if (!overlaps(*u_e, *v_e)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
/**
- * Prefix analysis: For lit1 with delay1 and lit2 with delay2, let L be the
- * length of the largest suffix of lit1 that is a prefix of lit2. A merge is
- * bad if L - delay1 > len(lit2) - delay2.
+ * Check that if after u has been seen, that it is impossible for the arrival of
+ * v to require the inspection of an engine earlier than u did.
+ *
+ * Let delta be the earliest that v can be seen after u (may be zero)
*
- * OR if we would have to check 2 literals of differing lags at the same
- * point.
+ * ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta)
+ * ==> - ulag <= delta - vlag
+ * ==> vlag - ulag <= delta
*/
static
bool checkPrefix(const rose_literal_id &ul, const u32 ulag,
const rose_literal_id &vl, const u32 vlag) {
- DEBUG_PRINTF("%s %s\n", escapeString(ul.s).c_str(),
- escapeString(vl.s).c_str());
- if (ulag != vlag && (vl.delay || ul.delay || isSuffix(ul.s, vl.s))) {
- /* rose literals should not be delayed anyway */
+ DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag,
+ escapeString(vl.s).c_str(), vlag);
+
+ if (vl.delay || ul.delay) {
+ /* engine related literals should not be delayed anyway */
return false;
}
- // Note that maxOverlap also picks up infixes.
- size_t overlap = maxOverlap(ul, vl);
- if (overlap < ulag) {
- return true; /* avoiding underflow */
+ if (ulag >= vlag) {
+ assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag);
+ return true;
}
- return overlap - ulag <= vl.elength() - vlag;
-}
-
-bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
- RoseVertex v) {
- assert(u != v);
- const auto &ulits = tbi.g[u].literals;
- const auto &vlits = tbi.g[v].literals;
+ size_t min_allowed_delta = vlag - ulag;
+ DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta);
- // We cannot merge roses that prefix literals in different tables.
- if (tbi.literals.right.at(*ulits.begin()).table !=
- tbi.literals.right.at(*vlits.begin()).table) {
- DEBUG_PRINTF("literals in different tables\n");
- return false;
+ for (size_t i = 0; i < min_allowed_delta; i++) {
+ if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
+ DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
+ return false;
+ }
}
- const left_id u_left(tbi.g[u].left), v_left(tbi.g[v].left);
+ DEBUG_PRINTF("OK\n");
+ return true;
+}
+
+static
+bool hasSameEngineType(const RoseVertexProps &u_prop,
+ const RoseVertexProps &v_prop) {
+ const left_id u_left(u_prop.left), v_left(v_prop.left);
if (u_left.haig() || v_left.haig()) {
if (u_left.graph() != v_left.graph()) {
}
}
+ return true;
+}
+
+static
+bool compatibleLiteralsForMerge(
+ const vector<pair<const rose_literal_id *, u32>> &ulits,
+ const vector<pair<const rose_literal_id *, u32>> &vlits) {
+ assert(!ulits.empty());
+ assert(!vlits.empty());
+
+ // We cannot merge engines that prefix literals in different tables.
+ if (ulits[0].first->table != vlits[0].first->table) {
+ DEBUG_PRINTF("literals in different tables\n");
+ return false;
+ }
+
+ /* An engine requires that all accesses to it are ordered by offsets. (ie,
+ we can not check an engine's state at offset Y, if we have already
+ checked its status at offset X and X > Y). If we can not establish that
+ the literals used for triggering will statisfy this property, then it is
+ not safe to merge the engine. */
+ for (const auto &ue : ulits) {
+ const rose_literal_id &ul = *ue.first;
+ u32 ulag = ue.second;
+
+ if (ul.delay) {
+ return false; // We don't handle delayed cases yet.
+ }
+
+ for (const auto &ve : vlits) {
+ const rose_literal_id &vl = *ve.first;
+ u32 vlag = ve.second;
+
+ if (vl.delay) {
+ return false; // We don't handle delayed cases yet.
+ }
+
+ if (!checkPrefix(ul, ulag, vl, vlag)
+ || !checkPrefix(vl, vlag, ul, ulag)) {
+ DEBUG_PRINTF("prefix check failed\n");
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
+ RoseVertex v) {
+ assert(u != v);
+
+ if (!hasSameEngineType(tbi.g[u], tbi.g[v])) {
+ return false;
+ }
+
// UE-1675: in block mode, we want to be a little more selective -- only
// merge prefix roses when the literal sets are the same.
if (!tbi.cc.streaming && tbi.isRootSuccessor(u)) {
assert(tbi.isRootSuccessor(v));
- if (ulits != vlits) {
+
+ if (tbi.g[u].literals != tbi.g[v].literals) {
DEBUG_PRINTF("literals aren't identical (block mode prefix)\n");
return false;
}
}
}
- // We accept any pair of literal sets A and B where no literal in A
- // contains a literal in B and no literal in B contains a literal in A.
-
- const u32 ulag = tbi.g[u].left.lag;
- const u32 vlag = tbi.g[v].left.lag;
-
- for (const u32 &ulit : ulits) {
- const rose_literal_id &ul = tbi.literals.right.at(ulit);
-
- if (ul.delay) {
- return false; // We don't handle delayed cases here.
- }
-
- for (const u32 &vlit : vlits) {
- const rose_literal_id &vl = tbi.literals.right.at(vlit);
+ u32 ulag = tbi.g[u].left.lag;
+ vector<pair<const rose_literal_id *, u32>> ulits;
+ ulits.reserve(tbi.g[u].literals.size());
+ for (u32 id : tbi.g[u].literals) {
+ ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag));
+ }
- if (vl.delay) {
- return false; // We don't handle delayed cases here.
- }
+ u32 vlag = tbi.g[v].left.lag;
+ vector<pair<const rose_literal_id *, u32>> vlits;
+ vlits.reserve(tbi.g[v].literals.size());
+ for (u32 id : tbi.g[v].literals) {
+ vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag));
+ }
- if (!checkPrefix(ul, ulag, vl, vlag) ||
- !checkPrefix(vl, vlag, ul, ulag)) {
- DEBUG_PRINTF("prefix check failed\n");
- return false;
- }
- }
+ if (!compatibleLiteralsForMerge(ulits, vlits)) {
+ return false;
}
- DEBUG_PRINTF("roses on %zu and %zu are mergeable\n",
- tbi.g[u].idx, tbi.g[v].idx);
+ DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].idx,
+ tbi.g[v].idx);
return true;
}
+/* We cannot merge an engine, if a trigger literal and a post literal overlap
+ * in such a way that engine status needs to be check at a point before the
+ * engine's current location.
+ *
+ * i.e., for a trigger literal u and a pos literal v,
+ * where delta is the earliest v can appear after t,
+ * we require that v_loc - v_lag >= u_loc
+ * ==> u_loc + delta - v_lag >= u_loc
+ * ==> delta >= v_lag
+ *
+ */
static
-bool mergeableDelays(const RoseBuildImpl &tbi, const flat_set<u32> &ulits,
- const flat_set<u32> &vlits, u32 vlag) {
- for (const u32 &ulit : ulits) {
- const rose_literal_id &ul = tbi.literals.right.at(ulit);
- assert(!ul.delay); // this should never have got this far?
- for (const u32 vlit : vlits) {
- const rose_literal_id &vl = tbi.literals.right.at(vlit);
- assert(!vl.delay); // this should never have got this far?
-
- DEBUG_PRINTF("%s %s (lag %u, overlap %zu)\n",
- escapeString(ul.s).c_str(),
- escapeString(vl.s).c_str(), vlag,
- maxOverlap(ul, vl));
- size_t l = vl.elength() - maxOverlap(ul, vl);
- if (vlag > l) {
- DEBUG_PRINTF("failed lag check!\n");
- return false;
- }
+bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl,
+ u32 vlag) {
+ DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(),
+ escapeString(vl.s).c_str(), vlag);
+
+ for (size_t i = 0; i < vlag; i++) {
+ if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
+ DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
+ return false;
}
}
+
+ DEBUG_PRINTF("OK\n");
return true;
}
-static
+static never_inline
bool checkPredDelays(const RoseBuildImpl &tbi, const deque<RoseVertex> &v1,
const deque<RoseVertex> &v2) {
- set<RoseVertex> preds;
+ flat_set<RoseVertex> preds;
for (auto v : v1) {
insert(&preds, inv_adjacent_vertices(v, tbi.g));
}
+ flat_set<u32> pred_lits;
+
+ /* No need to examine delays of a common pred - as it must already have
+ * survived the delay checks.
+ *
+ * This is important when the pred is in the anchored table as
+ * the literal is no longer available. */
+ flat_set<RoseVertex> known_good_preds;
+ for (auto v : v2) {
+ insert(&known_good_preds, inv_adjacent_vertices(v, tbi.g));
+ }
+
for (auto u : preds) {
- const auto &pred_lits = tbi.g[u].literals;
- for (auto v : v2) {
- u32 vlag = tbi.g[v].left.lag;
- DEBUG_PRINTF("consider (%zu, %zu) lag=%u\n", tbi.g[u].idx,
- tbi.g[v].idx, vlag);
- if (edge_by_target(u, v, tbi.g).second) {
- /* no need to examine delays as it is a common pred - so checks
- * must already have survived the delay checks.
- * This is important when the pred is in the anchored table as
- * the literal is no longer available. */
- DEBUG_PRINTF("ok, also %zu is also a pred of %zu\n",
- tbi.g[u].idx, tbi.g[v].idx);
- continue;
- }
- if (!mergeableDelays(tbi, pred_lits, tbi.g[v].literals, vlag)) {
- return false;
+ if (!contains(known_good_preds, &u)) {
+ insert(&pred_lits, tbi.g[u].literals);
+ }
+ }
+
+ vector<const rose_literal_id *> pred_rose_lits;
+ pred_rose_lits.reserve(pred_lits.size());
+ for (const auto &p : pred_lits) {
+ pred_rose_lits.push_back(&tbi.literals.right.at(p));
+ }
+
+ for (auto v : v2) {
+ u32 vlag = tbi.g[v].left.lag;
+ if (!vlag) {
+ continue;
+ }
+
+ for (const u32 vlit : tbi.g[v].literals) {
+ const rose_literal_id &vl = tbi.literals.right.at(vlit);
+ assert(!vl.delay); // this should never have got this far?
+ for (const auto &ul : pred_rose_lits) {
+ assert(!ul->delay); // this should never have got this far?
+
+ if (!checkPredDelay(*ul, vl, vlag)) {
+ return false;
+ }
}
}
}
bool mergeableRoseVertices(const RoseBuildImpl &tbi,
const deque<RoseVertex> &verts1,
const deque<RoseVertex> &verts2) {
- for (auto v1 : verts1) {
- for (auto v2 : verts2) {
- if (!mergeableRoseVertices(tbi, v1, v2)) {
+ assert(!verts1.empty());
+ assert(!verts2.empty());
+
+ RoseVertex u_front = verts1.front();
+ RoseVertex v_front = verts2.front();
+
+ /* all vertices must have the same engine type: assume all verts in each
+ * group are already of the same type */
+ if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) {
+ return false;
+ }
+
+ bool is_prefix = tbi.isRootSuccessor(u_front);
+
+ /* We cannot merge prefixes/vertices if they are successors of different
+ * root vertices: similarly, assume the grouped vertices are compatible */
+ if (is_prefix) {
+ assert(tbi.isRootSuccessor(v_front));
+ set<RoseVertex> u_preds;
+ set<RoseVertex> v_preds;
+ insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g));
+ insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g));
+
+ if (u_preds != v_preds) {
+ return false;
+ }
+ }
+
+ vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */
+ for (auto a : verts1) {
+ // UE-1675: in block mode, we want to be a little more selective --
+ // only merge prefix roses when the literal sets are the same.
+ if (!tbi.cc.streaming && is_prefix) {
+ assert(tbi.isRootSuccessor(a));
+
+ if (tbi.g[u_front].literals != tbi.g[a].literals) {
+ DEBUG_PRINTF("literals aren't identical (block mode prefix)\n");
return false;
}
}
+
+ u32 ulag = tbi.g[a].left.lag;
+ for (u32 id : tbi.g[a].literals) {
+ ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag));
+ }
+ }
+
+ vector<pair<const rose_literal_id *, u32>> vlits;
+ for (auto a : verts2) {
+ // UE-1675: in block mode, we want to be a little more selective --
+ // only merge prefix roses when the literal sets are the same.
+ if (!tbi.cc.streaming && is_prefix) {
+ assert(tbi.isRootSuccessor(a));
+
+ if (tbi.g[u_front].literals != tbi.g[a].literals) {
+ DEBUG_PRINTF("literals aren't identical (block mode prefix)\n");
+ return false;
+ }
+ }
+
+ u32 vlag = tbi.g[a].left.lag;
+ for (u32 id : tbi.g[a].literals) {
+ vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag));
+ }
+ }
+
+ if (!compatibleLiteralsForMerge(ulits, vlits)) {
+ return false;
}
// Check preds are compatible as well.
- if (!checkPredDelays(tbi, verts1, verts2) ||
- !checkPredDelays(tbi, verts2, verts1)) {
+ if (!checkPredDelays(tbi, verts1, verts2)
+ || !checkPredDelays(tbi, verts2, verts1)) {
return false;
}
}
static
-void mergeCastleRoses(RoseBuildImpl &tbi, RoseBouquet &roses) {
+void mergeCastleChunk(RoseBuildImpl &tbi, RoseBouquet &cands) {
+ /* caller must have already ensured that candidates have the same reach */
RoseGraph &g = tbi.g;
- DEBUG_PRINTF("%zu castle rose merge candidates\n", roses.size());
+ DEBUG_PRINTF("%zu castle rose merge candidates\n", cands.size());
deque<left_id> merged;
- for (auto it = roses.begin(); it != roses.end(); ++it) {
+ for (auto it = cands.begin(); it != cands.end(); ++it) {
left_id r1 = *it;
CastleProto &castle1 = *r1.castle();
- const deque<RoseVertex> &verts1 = roses.vertices(r1);
+ const deque<RoseVertex> &verts1 = cands.vertices(r1);
merged.clear();
- for (auto jt = next(it); jt != roses.end(); ++jt) {
+ for (auto jt = next(it); jt != cands.end(); ++jt) {
left_id r2 = *jt;
CastleProto &castle2 = *r2.castle();
- const deque<RoseVertex> &verts2 = roses.vertices(r2);
+ const deque<RoseVertex> &verts2 = cands.vertices(r2);
if (castle1.repeats.size() == castle1.max_occupancy) {
DEBUG_PRINTF("castle1 has hit max occupancy\n");
break; // next castle1
}
- if (castle1.reach() != castle2.reach()) {
- DEBUG_PRINTF("different reach\n");
- continue; // next castle2
- }
+ assert(castle1.reach() == castle2.reach());
if (!mergeableRoseVertices(tbi, verts1, verts2)) {
DEBUG_PRINTF("not mergeable\n");
}
}
- roses.insert(r1, verts2);
+ cands.insert(r1, verts2);
merged.push_back(r2);
}
DEBUG_PRINTF("%zu roses merged\n", merged.size());
- roses.erase_all(merged.begin(), merged.end());
+ cands.erase_all(merged.begin(), merged.end());
}
}
for (auto &m : by_reach) {
DEBUG_PRINTF("%zu castles for reach: %s\n", m.second.size(),
describeClass(m.first).c_str());
- RoseBouquet &roses = m.second;
- deque<RoseBouquet> rose_groups;
- chunkBouquets(roses, rose_groups, MERGE_GROUP_SIZE_MAX);
- roses.clear();
+ RoseBouquet &candidates = m.second;
+ deque<RoseBouquet> cand_groups;
+ chunkBouquets(candidates, cand_groups, MERGE_CASTLE_GROUP_SIZE_MAX);
+ candidates.clear();
- for (auto &group : rose_groups) {
- mergeCastleRoses(tbi, group);
+ for (auto &group : cand_groups) {
+ mergeCastleChunk(tbi, group);
}
}
}