]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
reduce memory use in ng_small_literal_set/ng_literal_decorated
authorAlex Coyte <a.coyte@intel.com>
Thu, 29 Oct 2015 03:35:02 +0000 (14:35 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Fri, 30 Oct 2015 00:28:37 +0000 (11:28 +1100)
These passes kept temporary strings/paths alive longer than was needed which
lead to high memory usage during these passes in pathological cases.

src/nfagraph/ng_literal_decorated.cpp
src/nfagraph/ng_small_literal_set.cpp

index 652fd14a8b353196e7d69e59f8462e7ff5d497e3..02b25a730f2ed820fcb6cc58f0bfc34eeade83eb 100644 (file)
@@ -67,6 +67,7 @@ static
 bool findPaths(const NGHolder &g, vector<Path> &paths) {
     vector<NFAVertex> order = getTopoOrdering(g);
 
+    vector<size_t> read_count(num_vertices(g));
     vector<vector<Path>> built(num_vertices(g));
 
     for (auto it = order.rbegin(); it != order.rend(); ++it) {
@@ -74,6 +75,11 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
         auto &out = built[g[v].index];
         assert(out.empty());
 
+        read_count[g[v].index] = out_degree(v, g);
+
+        DEBUG_PRINTF("setting read_count to %zu for %u\n",
+                      read_count[g[v].index], g[v].index);
+
         if (v == g.start || v == g.startDs) {
             out.push_back({v});
             continue;
@@ -94,6 +100,9 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
                 continue;
             }
 
+            assert(!built[g[u].index].empty());
+            assert(read_count[g[u].index]);
+
             for (const auto &p : built[g[u].index]) {
                 out.push_back(p);
                 out.back().push_back(v);
@@ -105,6 +114,13 @@ bool findPaths(const NGHolder &g, vector<Path> &paths) {
                     return false;
                 }
             }
+
+            read_count[g[u].index]--;
+            if (!read_count[g[u].index]) {
+                DEBUG_PRINTF("clearing %u as finished reading\n", g[u].index);
+                built[g[u].index].clear();
+                built[g[u].index].shrink_to_fit();
+            }
         }
     }
 
index 89cb0ff8b7389743ffb13811fdad0bd72b5b7c68..b5867bb91ff2b365e884a2797391d6825f1ef594 100644 (file)
@@ -118,10 +118,15 @@ bool findLiterals(const NGHolder &g,
     vector<NFAVertex> order = getTopoOrdering(g);
 
     vector<set<sls_literal>> built(num_vertices(g));
+    vector<size_t> read_count(num_vertices(g));
 
     for (auto it = order.rbegin(); it != order.rend(); ++it) {
         NFAVertex v = *it;
         set<sls_literal> &out = built[g[v].index];
+        read_count[g[v].index] = out_degree(v, g);
+
+        DEBUG_PRINTF("setting read_count to %zu for %u\n",
+                      read_count[g[v].index], g[v].index);
 
         assert(out.empty());
         if (v == g.start) {
@@ -149,7 +154,10 @@ bool findLiterals(const NGHolder &g,
             }
 
             set<sls_literal> &in = built[g[u].index];
+            DEBUG_PRINTF("getting from %u (%zu reads to go)\n",
+                          g[u].index, read_count[g[u].index]);
             assert(!in.empty());
+            assert(read_count[g[u].index]);
 
             for (const sls_literal &lit : in) {
                 if (accept) {
@@ -171,10 +179,18 @@ bool findLiterals(const NGHolder &g,
                     out.insert(lit.append((u8)c, nocase));
 
                     if (out.size() + literals->size() > MAX_LITERAL_SET_SIZE) {
+                        DEBUG_PRINTF("too big %zu + %zu\n", out.size(),
+                                      literals->size());
                         return false;
                     }
                 }
             }
+
+            read_count[g[u].index]--;
+            if (!read_count[g[u].index]) {
+                DEBUG_PRINTF("clearing %u as finished reading\n", g[u].index);
+                in.clear();
+            }
         }
     }
 
@@ -206,6 +222,8 @@ bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g,
         return false;
     }
 
+    DEBUG_PRINTF("looking for literals\n");
+
     map<sls_literal, ue2::flat_set<ReportID>> literals;
     if (!findLiterals(g, &literals)) {
         DEBUG_PRINTF(":(\n");