]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
ng_execute: update interface to use flat_set
authorJustin Viiret <justin.viiret@intel.com>
Fri, 13 Nov 2015 03:36:28 +0000 (14:36 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Wed, 18 Nov 2015 04:27:17 +0000 (15:27 +1100)
This changes all the execute_graph() interfaces so that instead of
mutating a std::set of vertices, they accept an initial flat_set of
states and return a resultant flat_set of states after execution.

(Note that internally execute_graph() still uses bitsets)

This is both faster and more flexible.

src/nfagraph/ng_execute.cpp
src/nfagraph/ng_execute.h
src/nfagraph/ng_som.cpp
src/nfagraph/ng_som_util.cpp
src/rose/rose_build_compile.cpp

index aebfa7123905039c37794aaed868d7681df6ff65..92bef73796398f9e1f4735c0a008b1c13d693ff3 100644 (file)
@@ -125,61 +125,62 @@ void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info,
 }
 
 static
-void fillStateBitset(const NGHolder &g, const set<NFAVertex> &in,
-                     dynamic_bitset<> &out) {
-    out.reset();
-    for (auto v : in) {
+dynamic_bitset<> makeStateBitset(const NGHolder &g,
+                                 const flat_set<NFAVertex> &in) {
+    dynamic_bitset<> work_states(num_vertices(g));
+    for (const auto &v : in) {
         u32 idx = g[v].index;
-        out.set(idx);
+        work_states.set(idx);
     }
+    return work_states;
 }
 
 static
-void fillVertexSet(const dynamic_bitset<> &in,
-                   const vector<StateInfo> &info, set<NFAVertex> &out) {
-    out.clear();
+flat_set<NFAVertex> getVertices(const dynamic_bitset<> &in,
+                                const vector<StateInfo> &info) {
+    flat_set<NFAVertex> out;
     for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
         out.insert(info[i].vertex);
     }
+    return out;
 }
 
 static
-void fillInfoTable(const NGHolder &g, vector<StateInfo> &info) {
-    info.resize(num_vertices(g));
+vector<StateInfo> makeInfoTable(const NGHolder &g) {
+    vector<StateInfo> info(num_vertices(g));
     for (auto v : vertices_range(g)) {
         u32 idx = g[v].index;
         const CharReach &cr = g[v].char_reach;
         assert(idx < info.size());
         info[idx] = StateInfo(v, cr);
     }
+    return info;
 }
 
-void execute_graph(const NGHolder &g, const ue2_literal &input,
-                   set<NFAVertex> *states, bool kill_sds) {
+flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
+                                  const flat_set<NFAVertex> &initial_states,
+                                  bool kill_sds) {
     assert(hasCorrectlyNumberedVertices(g));
 
-    vector<StateInfo> info;
-    fillInfoTable(g, info);
-    dynamic_bitset<> work_states(num_vertices(g));
-    fillStateBitset(g, *states, work_states);
+    auto info = makeInfoTable(g);
+    auto work_states = makeStateBitset(g, initial_states);
 
     execute_graph_i(g, info, input, &work_states, kill_sds);
 
-    fillVertexSet(work_states, info, *states);
+    return getVertices(work_states, info);
 }
 
-void execute_graph(const NGHolder &g, const vector<CharReach> &input,
-                   set<NFAVertex> *states) {
+flat_set<NFAVertex> execute_graph(const NGHolder &g,
+                                  const vector<CharReach> &input,
+                                  const flat_set<NFAVertex> &initial_states) {
     assert(hasCorrectlyNumberedVertices(g));
 
-    vector<StateInfo> info;
-    fillInfoTable(g, info);
-    dynamic_bitset<> work_states(num_vertices(g));
-    fillStateBitset(g, *states, work_states);
+    auto info = makeInfoTable(g);
+    auto work_states = makeStateBitset(g, initial_states);
 
     execute_graph_i(g, info, input, &work_states, false);
 
-    fillVertexSet(work_states, info, *states);
+    return getVertices(work_states, info);
 }
 
 typedef boost::reverse_graph<const NFAGraph, const NFAGraph &> RevNFAGraph;
@@ -276,9 +277,10 @@ private:
 };
 } // namespace
 
-void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
-                   const set<NFAVertex> &input_start_states,
-                   set<NFAVertex> *states) {
+flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
+                                  const NGHolder &input_dag,
+                                  const flat_set<NFAVertex> &input_start_states,
+                                  const flat_set<NFAVertex> &initial_states) {
     DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n",
                  num_vertices(running_g), num_vertices(input_dag));
     assert(hasCorrectlyNumberedVertices(running_g));
@@ -290,10 +292,8 @@ void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
     RevNFAGraph revg(input_dag.g);
     map<NFAVertex, dynamic_bitset<> > dfs_states;
 
-    vector<StateInfo> info;
-    fillInfoTable(running_g, info);
-    dynamic_bitset<> input_fs(num_vertices(running_g));
-    fillStateBitset(running_g, *states, input_fs);
+    auto info = makeInfoTable(running_g);
+    auto input_fs = makeStateBitset(running_g, initial_states);
 
     for (auto v : input_start_states) {
         dfs_states[v] = input_fs;
@@ -303,21 +303,25 @@ void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
                       eg_visitor(running_g, info, input_dag, dfs_states),
                       make_assoc_property_map(colours));
 
-    fillVertexSet(dfs_states[input_dag.accept], info, *states);
+    auto states = getVertices(dfs_states[input_dag.accept], info);
 
 #ifdef DEBUG
-        DEBUG_PRINTF("  output rstates:");
-        for (auto v : *states) {
-            printf(" %u", running_g[v].index);
-        }
-        printf("\n");
+    DEBUG_PRINTF("  output rstates:");
+    for (const auto &v : states) {
+        printf(" %u", running_g[v].index);
+    }
+    printf("\n");
 #endif
+
+    return states;
 }
 
-void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
-                   set<NFAVertex> *states) {
-    set<NFAVertex> input_start_states = {input_dag.start, input_dag.startDs};
-    execute_graph(running_g, input_dag, input_start_states, states);
+flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
+                                  const NGHolder &input_dag,
+                                  const flat_set<NFAVertex> &initial_states) {
+    auto input_start_states = {input_dag.start, input_dag.startDs};
+    return execute_graph(running_g, input_dag, input_start_states,
+                         initial_states);
 }
 
 } // namespace ue2
index 80fdcbd57c8b551e774db222f42cbaeffe39318f..e2c7c72d041a2580957eb2335b26a3c59a44279c 100644 (file)
@@ -35,8 +35,8 @@
 #define NG_EXECUTE_H
 
 #include "ng_holder.h"
+#include "util/ue2_containers.h"
 
-#include <set>
 #include <vector>
 
 namespace ue2 {
@@ -44,23 +44,25 @@ namespace ue2 {
 class CharReach;
 struct ue2_literal;
 
-void execute_graph(const NGHolder &g, const ue2_literal &input,
-                   std::set<NFAVertex> *states, bool kill_sds = false);
+flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
+                                  const flat_set<NFAVertex> &initial,
+                                  bool kill_sds = false);
 
-void execute_graph(const NGHolder &g, const std::vector<CharReach> &input,
-                   std::set<NFAVertex> *states);
+flat_set<NFAVertex> execute_graph(const NGHolder &g,
+                                  const std::vector<CharReach> &input,
+                                  const flat_set<NFAVertex> &initial);
 
 /** on exit, states contains any state which may still be enabled after
  * receiving an input which corresponds to some path through the input_dag from
  * start or startDs to accept. input_dag MUST be acyclic aside from self-loops.
  */
-void execute_graph(const NGHolder &g, const NGHolder &input_dag,
-                   std::set<NFAVertex> *states);
+flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
+                                  const flat_set<NFAVertex> &initial);
 
 /* as above, but able to specify the source states for the input graph */
-void execute_graph(const NGHolder &g, const NGHolder &input_dag,
-                   const std::set<NFAVertex> &input_start_states,
-                   std::set<NFAVertex> *states);
+flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
+                                  const flat_set<NFAVertex> &input_start_states,
+                                  const flat_set<NFAVertex> &initial);
 
 } // namespace ue2
 
index 90ebb5c3aa4d370468039c91497d15cd69f15a71..f26b62aa60e152bee31c974e28dbaa250b324bed 100644 (file)
@@ -266,7 +266,7 @@ bool validateEXSL(const NGHolder &g,
     const vector<CharReach> escapes_vec(1, escapes);
     const vector<CharReach> notescapes_vec(1, ~escapes);
 
-    set<NFAVertex> states;
+    ue2::flat_set<NFAVertex> states;
     /* turn on all states past the prefix */
     DEBUG_PRINTF("region %u is cutover\n", region);
     for (auto v : vertices_range(g)) {
@@ -276,20 +276,20 @@ bool validateEXSL(const NGHolder &g,
     }
 
     /* process the escapes */
-    execute_graph(g, escapes_vec, &states);
+    states = execute_graph(g, escapes_vec, states);
 
     /* flood with any number of not escapes */
-    set<NFAVertex> prev_states;
+    ue2::flat_set<NFAVertex> prev_states;
     while (prev_states != states) {
         prev_states = states;
-        execute_graph(g, notescapes_vec, &states);
+        states = execute_graph(g, notescapes_vec, states);
         insert(&states, prev_states);
     }
 
     /* find input starts to use for when we are running the prefix through as
      * when the escape character arrives we may be in matching the prefix
      * already */
-    set<NFAVertex> prefix_start_states;
+    ue2::flat_set<NFAVertex> prefix_start_states;
     for (auto v : vertices_range(prefix)) {
         if (v != prefix.accept && v != prefix.acceptEod
             /* and as we have already made it past the prefix once */
@@ -298,11 +298,12 @@ bool validateEXSL(const NGHolder &g,
         }
     }
 
-    execute_graph(prefix, escapes_vec, &prefix_start_states);
+    prefix_start_states =
+        execute_graph(prefix, escapes_vec, prefix_start_states);
 
     assert(contains(prefix_start_states, prefix.startDs));
     /* see what happens after we feed it the prefix */
-    execute_graph(g, prefix, prefix_start_states, &states);
+    states = execute_graph(g, prefix, prefix_start_states, states);
 
     for (auto v : states) {
         assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be
index 7f487f89586642d253c9a73dad153a7b6aec78e9..a0829451dc180af93d645f21ec2ee9565051de7a 100644 (file)
@@ -136,7 +136,7 @@ bool firstMatchIsFirst(const NGHolder &p) {
         return false;
     }
 
-    set<NFAVertex> states;
+    ue2::flat_set<NFAVertex> states;
     /* turn on all states (except starts - avoid suffix matches) */
     /* If we were doing (1) we would also except states leading to accepts -
        avoid prefix matches */
@@ -149,7 +149,7 @@ bool firstMatchIsFirst(const NGHolder &p) {
     }
 
     /* run the prefix the main graph */
-    execute_graph(p, p, &states);
+    states = execute_graph(p, p, states);
 
     for (auto v : states) {
         /* need to check if this vertex may represent an infix match - ie
@@ -313,7 +313,7 @@ bool sentClearsTail(const NGHolder &g,
      */
 
     u32 first_bad_region = ~0U;
-    set<NFAVertex> states;
+    ue2::flat_set<NFAVertex> states;
     /* turn on all states */
     DEBUG_PRINTF("region %u is cutover\n", last_head_region);
     for (auto v : vertices_range(g)) {
@@ -327,7 +327,7 @@ bool sentClearsTail(const NGHolder &g,
     }
 
     /* run the prefix the main graph */
-    execute_graph(g, sent, &states);
+    states = execute_graph(g, sent, states);
 
     /* .. and check if we are left with anything in the tail region */
     for (auto v : states) {
index 34e7626980be825214befa50520c2b7e6be162c6..a2bd971e21e2d982b607d75f05ef49f38fed9936 100644 (file)
@@ -1631,20 +1631,23 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left,
     assert(left.graph());
     const NGHolder &h = *left.graph();
 
+    ue2::flat_set<NFAVertex> all_states;
+    insert(&all_states, vertices(h));
+    assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */
+    DEBUG_PRINTF("removing sds\n");
+    all_states.erase(h.startDs);
+
+    ue2::flat_set<NFAVertex> states;
+
     /* check each pred literal to see if they all kill previous graph
      * state */
     for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) {
         const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id);
         const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s);
 
-        set<NFAVertex> states;
-        insert(&states, vertices(h));
-        assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */
-        DEBUG_PRINTF("removing sds\n");
-        states.erase(h.startDs);
         DEBUG_PRINTF("running graph %zu\n", states.size());
-        execute_graph(h, s, &states, true);
-        DEBUG_PRINTF("ran\n");
+        states = execute_graph(h, s, all_states, true);
+        DEBUG_PRINTF("ran, %zu states on\n", states.size());
 
         if (!states.empty()) {
             return false;