/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
return v;
}
-// Returns the first and last vertices.
-static
-pair<NFAVertex, NFAVertex> addLiteralVertices(const RoseGraph &g,
- const RoseLiteralMap &literals,
- const RoseVertex &t_v,
- NGHolder &out) {
- // We have limited cases that we support: one literal of arbitrary length,
- // or a bunch of literals of length one that just become a vertex with
- // their reach unioned together.
-
- // TODO: generalise this and handle more cases.
-
- const auto &litids = g[t_v].literals;
- if (litids.size() > 1) {
- // Multiple literals of len 1.
- CharReach v_cr;
- for (const auto &lit_id : litids) {
- const rose_literal_id &litv = literals.right.at(lit_id);
- assert(litv.s.length() == 1);
- v_cr |= *litv.s.begin();
- }
-
- NFAVertex v = addHolderVertex(v_cr, out);
- return make_pair(v, v);
- }
-
- // Otherwise, we have a single literal, could be of arbitrary length.
- assert(litids.size() == 1);
- u32 lit_id = *(litids.begin());
- const rose_literal_id &litv = literals.right.at(lit_id);
- assert(!litv.s.empty());
-
- ue2_literal::const_iterator it = litv.s.begin(), ite = litv.s.end();
- NFAVertex first = addHolderVertex(*it, out), last = first;
- for (++it; it != ite; ++it) {
- NFAVertex v = addHolderVertex(*it, out);
- add_edge(last, v, out);
- last = v;
- }
-
- return make_pair(first, last);
-}
-
-static
-unique_ptr<NGHolder> convertLeafToHolder(const RoseGraph &g,
- const RoseEdge &t_e,
- const RoseLiteralMap &literals) {
- RoseVertex t_v = target(t_e, g); // leaf vertex for demolition.
- u32 minBound = g[t_e].minBound;
- u32 maxBound = g[t_e].maxBound;
-
- const CharReach dot = CharReach::dot();
-
- assert(!g[t_v].left);
-
- auto out = ue2::make_unique<NGHolder>(NFA_SUFFIX);
-
- // Repeats wired to the start of the graph.
- DEBUG_PRINTF("bounds [%u, %u]\n", minBound, maxBound);
- u32 i = 1;
- NFAVertex last = out->start;
- for (; i <= minBound; i++) {
- NFAVertex v = addHolderVertex(dot, *out);
- add_edge(last, v, *out);
- last = v;
- }
- NFAVertex last_mand = last;
- if (maxBound != ROSE_BOUND_INF) {
- for (; i <= maxBound; i++) {
- NFAVertex v = addHolderVertex(dot, *out);
- add_edge(last_mand, v, *out);
- if (last != last_mand) {
- add_edge(last, v, *out);
- }
- last = v;
- }
- } else {
- if (minBound) {
- add_edge(last_mand, last_mand, *out);
- } else {
- NFAVertex v = addHolderVertex(dot, *out);
- add_edge(last_mand, v, *out);
- add_edge(v, v, *out);
- last = v;
- }
- }
-
- setTops(*out);
-
- // Literal vertices wired to accept.
- NFAVertex litfirst, litlast;
- tie(litfirst, litlast) = addLiteralVertices(g, literals, t_v, *out);
- add_edge(last, litfirst, *out);
- if (last != last_mand) {
- add_edge(last_mand, litfirst, *out);
- }
- add_edge(litlast, out->accept, *out);
- insert(&(*out)[litlast].reports, g[t_v].reports);
- return out;
-}
-
-static
-bool areLiteralsConvertible(const RoseLiteralMap &literals,
- const flat_set<u32> &ids) {
- // Every literal in v must have the same length.
-
- // TODO: at the moment, we only handle two cases in construction: (a) one
- // literal of arbitrary length, and (b) many literals, but all with length
- // 1.
-
- if (ids.empty()) {
- return false;
- }
-
- auto it = ids.begin(), ite = ids.end();
- const size_t len = literals.right.at(*it).elength();
-
- // Note: len may be 0 for cases with special literals, like EOD prefixes.
-
- if (len != 1 && ids.size() != 1) {
- DEBUG_PRINTF("more than one literal of len > 1\n");
- return false;
- }
-
- // Check the others all have the same length.
- while (++it != ite) {
- if (literals.right.at(*it).elength() != len) {
- DEBUG_PRINTF("literals have different lengths\n");
- return false;
- }
- }
-
- return true;
-}
-
-// Returns true if the given vertex doesn't qualify as a bad leaf to be eaten
-// by an NFA.
-static
-bool isUnconvertibleLeaf(const RoseBuildImpl &tbi, const RoseVertex v) {
- const RoseGraph &g = tbi.g;
-
- if (in_degree(v, g) != 1) {
- DEBUG_PRINTF("more than one in-edge\n");
- return true;
- }
-
- const RoseEdge &e = *(in_edges(v, g).first);
- RoseVertex u = source(e, g);
-
- if (!g[u].reports.empty()) {
- DEBUG_PRINTF("pred has accept\n");
- return true;
- }
-
- if (g[u].suffix) {
- // TODO: this could be handled by adding new vertices to the existing
- // suffix.
- DEBUG_PRINTF("pred already has suffix\n");
- return true;
- }
-
- if (tbi.isAnyStart(u)) {
- DEBUG_PRINTF("fail start\n");
- return true;
- }
-
- if (tbi.isAnchored(u)) {
- /* TODO need to check for possible anchored queue overflow? maybe? */
- DEBUG_PRINTF("fail anchored\n");
- return true;
- }
-
- if (g[v].reports.empty() || g[v].eod_accept) {
- DEBUG_PRINTF("bad accept\n");
- return true;
- }
-
- if (g[v].suffix) {
- DEBUG_PRINTF("suffix\n");
- return true;
- }
-
- if (g[v].left) {
- /* TODO: we really should handle this case as we would be checking
- * an nfa each time. However it requires completely different graph
- * fiddling logic */
- DEBUG_PRINTF("rose prefix action\n");
- return true;
- }
-
- if (!areLiteralsConvertible(tbi.literals, g[v].literals)) {
- DEBUG_PRINTF("fail length\n");
- return true;
- }
-
- u32 max_lit_len = tbi.maxLiteralLen(v);
-
- u32 maxbound = max_lit_len == 1 ? 124 : 32; // arbitrary magic numbers
- if (g[e].maxBound > maxbound && g[e].maxBound != ROSE_BOUND_INF) {
- DEBUG_PRINTF("fail maxbound (%u)\n", maxbound);
- return true;
- }
-
- if (g[e].maxBound == ROSE_BOUND_INF) {
- /* slightly risky as nfa won't die */
- DEBUG_PRINTF("fail: .*\n");
- return true;
- }
-
- return false;
-}
-
-// Find all of the leaves with literals whose length is <= len.
-static
-void findBadLeaves(RoseBuildImpl &tbi, set<RoseVertex> &bad) {
- RoseGraph &g = tbi.g;
- u32 len = tbi.cc.grey.roseMaxBadLeafLength;
-
- for (const auto &m : tbi.literals.right) {
- if (m.second.s.length() > len) {
- continue;
- }
- u32 lid = m.first;
- DEBUG_PRINTF("%u is a short lit (length %zu)\n", lid,
- m.second.s.length());
-
- if (tbi.isDelayed(lid)) {
- DEBUG_PRINTF("delayed, skipping!\n");
- continue;
- }
-
- const rose_literal_info &info = tbi.literal_info[lid];
-
- for (auto v : info.vertices) {
- if (!isLeafNode(v, g)) {
- continue;
- }
- if (isUnconvertibleLeaf(tbi, v)) {
- continue; // we don't want to touch it
- }
-
- // This leaf may have a predecessor with more than one successor,
- // in which case we want to clone the pred just to support this
- // leaf.
- const RoseEdge &e = *in_edges(v, g).first;
- RoseVertex u = source(e, g);
- if (out_degree(u, g) != 1) {
- DEBUG_PRINTF("re-homing %zu to cloned pred\n", g[v].index);
- RoseVertex u2 = tbi.cloneVertex(u);
- for (const auto &e_in : in_edges_range(u, g)) {
- add_edge(source(e_in, g), u2, g[e_in], g);
- }
- add_edge(u2, v, g[e], g);
- remove_edge(e, g);
- }
-
- DEBUG_PRINTF("%zu is a bad leaf vertex\n", g[v].index);
- bad.insert(v);
- }
- }
-}
-
-void convertBadLeaves(RoseBuildImpl &tbi) {
- RoseGraph &g = tbi.g;
- set<RoseVertex> bad;
- findBadLeaves(tbi, bad);
- DEBUG_PRINTF("found %zu bad leaves\n", bad.size());
-
- if (bad.empty()) {
- return;
- }
-
- vector<RoseVertex> dead;
- for (auto v : bad) {
- assert(in_degree(v, g));
-
- const RoseEdge &e = *(in_edges(v, g).first);
-
- shared_ptr<NGHolder> h = convertLeafToHolder(g, e, tbi.literals);
- if (num_vertices(*h) >= NFA_MAX_STATES) {
- assert(0); // too big!
- continue;
- }
-
- RoseVertex u = source(e, g);
- assert(!g[u].suffix);
- g[u].suffix.graph = h;
- DEBUG_PRINTF("%zu's nfa holder %p\n", g[u].index, h.get());
-
- dead.push_back(v);
- }
-
- tbi.removeVertices(dead);
-}
-
static
size_t suffixFloodLen(const ue2_literal &s) {
if (s.empty()) {