return true;
}
+static
+double calcSplitRatio(const NGHolder &g, const vector<NFAVertex> &vv) {
+ flat_set<NFAVertex> not_reachable;
+ find_unreachable(g, vv, ¬_reachable);
+ double rv = (double)not_reachable.size() / num_vertices(g);
+ rv = rv > 0.5 ? 1 - rv : rv;
+
+ return rv;
+}
+
+static
+size_t shorter_than(const set<ue2_literal> &s, size_t limit) {
+ return count_if(s.begin(), s.end(),
+ [&](const ue2_literal &a) { return a.length() < limit; });
+}
+
+static
+u32 min_len(const set<ue2_literal> &s) {
+ u32 rv = ~0U;
+
+ for (const auto &lit : s) {
+ rv = min(rv, (u32)lit.length());
+ }
+
+ return rv;
+}
+
+static
+u32 min_period(const set<ue2_literal> &s) {
+ u32 rv = ~0U;
+
+ for (const auto &lit : s) {
+ rv = min(rv, (u32)minStringPeriod(lit));
+ }
+ DEBUG_PRINTF("min period %u\n", rv);
+ return rv;
+}
+
namespace {
/**
* Information on a cut: vertices and literals.
bool creates_anchored = false;
bool creates_transient = false;
+ double split_ratio = 0;
};
+#define LAST_CHANCE_STRONG_LEN 1
+
/**
- * \brief Comparator class for sorting LitCollection::lits.
- *
- * This is separated out from LitCollection itself as passing LitCollection to
- * std::sort() would incur a (potentially expensive) copy.
+ * \brief Comparator class for comparing different literal cuts.
*/
class LitComparator {
public:
- LitComparator(const NGHolder &g_in, bool sa, bool st)
- : g(g_in), seeking_anchored(sa), seeking_transient(st) {}
+ LitComparator(const NGHolder &g_in, bool sa, bool st, bool lc)
+ : g(g_in), seeking_anchored(sa), seeking_transient(st),
+ last_chance(lc) {}
bool operator()(const unique_ptr<VertLitInfo> &a,
const unique_ptr<VertLitInfo> &b) const {
assert(a && b);
}
}
+ if (last_chance
+ && min_len(a->lit) > LAST_CHANCE_STRONG_LEN
+ && min_len(b->lit) > LAST_CHANCE_STRONG_LEN) {
+ DEBUG_PRINTF("using split ratio %g , %g\n", a->split_ratio,
+ b->split_ratio);
+ return a->split_ratio < b->split_ratio;
+ }
+
u64a score_a = scoreSet(a->lit);
u64a score_b = scoreSet(b->lit);
bool seeking_anchored;
bool seeking_transient;
+ bool last_chance;
};
}
-static
-size_t shorter_than(const set<ue2_literal> &s, size_t limit) {
- size_t count = 0;
-
- for (const auto &lit : s) {
- if (lit.length() < limit) {
- count++;
- }
- }
-
- return count;
-}
-
-static
-u32 min_len(const set<ue2_literal> &s) {
- u32 rv = ~0U;
-
- for (const auto &lit : s) {
- rv = min(rv, (u32)lit.length());
- }
-
- return rv;
-}
-
-static
-u32 min_period(const set<ue2_literal> &s) {
- u32 rv = ~0U;
-
- for (const auto &lit : s) {
- rv = min(rv, (u32)minStringPeriod(lit));
- }
- DEBUG_PRINTF("min period %u\n", rv);
- return rv;
-}
-
#define MIN_ANCHORED_LEN 2
+#define MIN_ANCHORED_DESPERATE_LEN 1
+/* anchored here means that the cut creates a 'usefully' anchored LHS */
static
bool validateRoseLiteralSetQuality(const set<ue2_literal> &s, u64a score,
bool anchored, u32 min_allowed_floating_len,
- bool desperation) {
+ bool desperation, bool last_chance) {
u32 min_allowed_len = anchored ? MIN_ANCHORED_LEN
: min_allowed_floating_len;
+ if (anchored && last_chance) {
+ min_allowed_len = MIN_ANCHORED_DESPERATE_LEN;
+ }
+ if (last_chance) {
+ desperation = true;
+ }
+
+ DEBUG_PRINTF("validating%s set, min allowed len %u\n",
+ anchored ? " anchored" : "", min_allowed_len);
assert(none_of(begin(s), end(s), bad_mixed_sensitivity));
if (s.size() > 10 /* magic number is magic */
|| s_min_len < min_allowed_len
|| (s_min_period <= 1 && min_allowed_len != 1)) {
+ DEBUG_PRINTF("candidate may be bad\n");
ok = false;
}
const set<NFAVertex> &a_dom,
vector<unique_ptr<VertLitInfo>> *lits,
u32 min_allowed_len, bool desperation,
- const CompileContext &cc) {
+ bool last_chance, const CompileContext &cc) {
assert(depths || !seeking_anchored);
map<NFAVertex, u64a> scores;
}
if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len,
- desperation)) {
+ desperation, last_chance)) {
continue;
}
const set<NFAVertex> *allowed,
vector<unique_ptr<VertLitInfo>> *lits,
u32 min_allowed_len, bool desperation,
- const CompileContext &cc) {
+ bool last_chance, const CompileContext &cc) {
/* This allows us to get more places to split the graph as we are not
limited to points where there is a single vertex to split at. */
}
if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len,
- desperation)) {
+ desperation, last_chance)) {
goto next_cand;
}
bool for_prefix, u32 min_len,
const set<NFAVertex> *allowed_cand,
const set<NFAVertex> *disallowed_cand,
+ bool last_chance,
const CompileContext &cc) {
assert(!for_prefix || depths);
DEBUG_PRINTF("|cand| = %zu\n", cand.size());
bool seeking_anchored = for_prefix;
- bool seeking_transient = for_prefix; //cc.streaming;
+ bool seeking_transient = for_prefix;
- /* TODO: revisit when backstop goes away */
bool desperation = for_prefix && cc.streaming;
vector<unique_ptr<VertLitInfo>> lits; /**< sorted list of potential cuts */
getSimpleRoseLiterals(g, seeking_anchored, depths, cand, &lits, min_len,
- desperation, cc);
+ desperation, last_chance, cc);
getRegionRoseLiterals(g, seeking_anchored, depths, cand_raw, allowed_cand,
- &lits, min_len, desperation, cc);
+ &lits, min_len, desperation, last_chance, cc);
if (lits.empty()) {
DEBUG_PRINTF("no literals found\n");
}
}
- auto cmp = LitComparator(g, seeking_anchored, seeking_transient);
+ if (last_chance) {
+ for (auto &a : lits) {
+ a->split_ratio = calcSplitRatio(g, a->vv);
+ }
+ }
+
+ auto cmp = LitComparator(g, seeking_anchored, seeking_transient,
+ last_chance);
unique_ptr<VertLitInfo> best = move(lits.back());
lits.pop_back();
set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey);
return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength,
- nullptr, &bad_vertices, cc);
+ nullptr, &bad_vertices, false, cc);
+}
+
+static
+unique_ptr<VertLitInfo> findBestLastChanceSplit(const NGHolder &g,
+ const RoseInGraph &vg,
+ const vector<RoseInEdge> &ee,
+ const CompileContext &cc) {
+ assert(g.kind == NFA_OUTFIX || g.kind == NFA_INFIX || g.kind == NFA_SUFFIX);
+ set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey);
+
+ return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength,
+ nullptr, &bad_vertices, true, cc);
}
static
const vector<NFAVertexDepth> &depths,
const RoseInGraph &vg,
const vector<RoseInEdge> &ee,
+ bool last_chance,
const CompileContext &cc) {
- assert(g.kind == NFA_PREFIX);
+ assert(g.kind == NFA_PREFIX || g.kind == NFA_OUTFIX);
set<NFAVertex> bad_vertices = poisonVertices(g, vg, ee, cc.grey);
auto rv = findBestSplit(g, &depths, true, cc.grey.minRoseLiteralLength,
- nullptr, &bad_vertices, cc);
+ nullptr, &bad_vertices, last_chance, cc);
/* large back edges may prevent us identifying anchored or transient cases
* properly - use a simple walk instead */
return nullptr;
}
return findBestSplit(g, nullptr, false, cc.grey.violetEarlyCleanLiteralLen,
- &cleanSplits, nullptr, cc);
+ &cleanSplits, nullptr, false, cc);
}
static
}
static
-void avoidOutfixes(RoseInGraph &vg, const CompileContext &cc) {
+void avoidOutfixes(RoseInGraph &vg, bool last_chance,
+ const CompileContext &cc) {
STAGE_DEBUG_PRINTF("AVOIDING OUTFIX\n");
- if (num_vertices(vg) > 2) {
- /* must be at least one literal aside from start and accept */
- return;
- }
+ assert(num_vertices(vg) == 2);
+ assert(num_edges(vg) == 1);
RoseInEdge e = *edges(vg).first;
renumber_vertices(h);
renumber_edges(h);
- unique_ptr<VertLitInfo> split = findBestNormalSplit(h, vg, {e}, cc);
+ unique_ptr<VertLitInfo> split = findBestNormalSplit(h, vg, {e}, cc);
if (split && splitRoseEdge(h, vg, {e}, *split)) {
DEBUG_PRINTF("split on simple literal\n");
- } else {
- doNetflowCut(h, nullptr, vg, {e}, false, cc.grey);
+ return;
}
+
+ if (last_chance) {
+ /* look for a prefix split as it allows us to accept very weak anchored
+ * literals. */
+ vector<NFAVertexDepth> depths;
+ calcDepths(h, depths);
+
+ split = findBestPrefixSplit(h, depths, vg, {e}, last_chance, cc);
+
+ if (split && splitRoseEdge(h, vg, {e}, *split)) {
+ DEBUG_PRINTF("split on simple literal\n");
+ return;
+ }
+ }
+
+ doNetflowCut(h, nullptr, vg, {e}, false, cc.grey);
}
static
return true;
}
- unique_ptr<VertLitInfo> split = findBestPrefixSplit(h, depths, vg, ee, cc);
+ auto split = findBestPrefixSplit(h, depths, vg, ee, false, cc);
if (split && (split->creates_transient || split->creates_anchored)
&& splitRoseEdge(h, vg, ee, *split)) {
if (vli.lit.empty()
|| !validateRoseLiteralSetQuality(vli.lit, score, false, min_len,
- false)) {
+ false, false)) {
return false;
}
}
}
unique_ptr<VertLitInfo> split;
+ bool last_chance = true;
if (h.kind == NFA_PREFIX) {
vector<NFAVertexDepth> depths;
calcDepths(h, depths);
- split = findBestPrefixSplit(h, depths, vg, edges, cc);
+ split = findBestPrefixSplit(h, depths, vg, edges, last_chance, cc);
} else {
- split = findBestNormalSplit(h, vg, edges, cc);
+ split = findBestLastChanceSplit(h, vg, edges, cc);
}
if (split && splitRoseEdge(h, vg, edges, *split)) {
bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes,
bool final_chance, const ReportManager &rm,
const CompileContext &cc) {
- DEBUG_PRINTF("checking for impl\n");
+ DEBUG_PRINTF("checking for impl %d\n", final_chance);
bool changed = false;
bool need_to_recalc = false;
u32 added_count = 0;
}
static
-RoseInGraph doInitialVioletTransform(const NGHolder &h,
+RoseInGraph doInitialVioletTransform(const NGHolder &h, bool last_chance,
const CompileContext &cc) {
assert(!can_never_match(h));
DEBUG_PRINTF("hello world\n");
/* Step 1: avoid outfixes as we always have to run them. */
- avoidOutfixes(vg, cc);
+ avoidOutfixes(vg, last_chance, cc);
if (num_vertices(vg) <= 2) {
return vg; /* unable to transform pattern */
bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter,
bool last_chance, const ReportManager &rm,
const CompileContext &cc) {
- auto vg = doInitialVioletTransform(h, cc);
+ auto vg = doInitialVioletTransform(h, last_chance, cc);
if (num_vertices(vg) <= 2) {
return false;
}
bool checkViolet(const ReportManager &rm, const NGHolder &h, bool prefilter,
const CompileContext &cc) {
- auto vg = doInitialVioletTransform(h, cc);
+ auto vg = doInitialVioletTransform(h, true, cc);
if (num_vertices(vg) <= 2) {
return false;
}