]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
rose_build_groups: move assignGroupsToLiterals
authorJustin Viiret <justin.viiret@intel.com>
Thu, 2 Jun 2016 03:10:42 +0000 (13:10 +1000)
committerMatthew Barr <matthew.barr@intel.com>
Fri, 8 Jul 2016 00:47:07 +0000 (10:47 +1000)
src/rose/rose_build_compile.cpp
src/rose/rose_build_groups.cpp

index 7f3501097594c3b9e0e726324a6480e4214c15dc..a6d18f67d99c0e1071f14492017f652f3d173289 100644 (file)
@@ -69,7 +69,6 @@
 #include <algorithm>
 #include <functional>
 #include <map>
-#include <queue>
 #include <set>
 #include <string>
 #include <vector>
 #include <boost/range/adaptor/map.hpp>
 
 using namespace std;
-using boost::adaptors::map_keys;
 using boost::adaptors::map_values;
 
 namespace ue2 {
 
-#define ROSE_LONG_LITERAL_LEN 8
-
 #define ANCHORED_REHOME_MIN_FLOATING 800
 #define ANCHORED_REHOME_MIN_FLOATING_SHORT 50
 #define ANCHORED_REHOME_ALLOW_SHORT 20
 #define ANCHORED_REHOME_DEEP 25
 #define ANCHORED_REHOME_SHORT_LEN 3
 
-static
-bool superStrong(const rose_literal_id &lit) {
-    if (lit.s.length() < ROSE_LONG_LITERAL_LEN) {
-        return false;
-    }
-
-    const u32 EXPECTED_FDR_BUCKET_LENGTH = 8;
-
-    assert(lit.s.length() >= EXPECTED_FDR_BUCKET_LENGTH);
-    size_t len = lit.s.length();
-    const string &s = lit.s.get_string();
-
-    for (size_t i = 1; i < EXPECTED_FDR_BUCKET_LENGTH; i++) {
-        if (s[len - 1 - i] != s[len - 1]) {
-            return true; /* we have at least some variation in the tail */
-        }
-    }
-    DEBUG_PRINTF("lit '%s' is not superstrong due to tail\n",
-                 escapeString(s).c_str());
-    return false;
-}
-
 rose_group RoseBuildImpl::getGroups(RoseVertex v) const {
     rose_group groups = 0;
 
@@ -863,274 +837,6 @@ bool RoseBuildImpl::hasFinalId(u32 id) const {
     return literal_info.at(id).final_id != MO_INVALID_IDX;
 }
 
-static
-bool eligibleForAlwaysOnGroup(const RoseBuildImpl &tbi, u32 id) {
-    /* returns true if it or any of its delay versions have root role */
-    for (auto v : tbi.literal_info[id].vertices) {
-        if (tbi.isRootSuccessor(v)) {
-            NGHolder *h = tbi.g[v].left.graph.get();
-            if (!h || proper_out_degree(h->startDs, *h)) {
-                return true;
-            }
-        }
-    }
-
-    for (u32 delayed_id : tbi.literal_info[id].delayed_ids) {
-        for (auto v : tbi.literal_info[delayed_id].vertices) {
-            if (tbi.isRootSuccessor(v)) {
-                NGHolder *h = tbi.g[v].left.graph.get();
-                if (!h || proper_out_degree(h->startDs, *h)) {
-                    return true;
-                }
-            }
-        }
-    }
-
-    return false;
-}
-
-static
-bool requires_group_assignment(const rose_literal_id &lit,
-                               const rose_literal_info &info) {
-    if (lit.delay) { /* we will check the shadow's master */
-        return false;
-    }
-
-    if (lit.table == ROSE_ANCHORED || lit.table == ROSE_EVENT) {
-        return false;
-    }
-
-    // If we already have a group applied, skip.
-    if (info.group_mask) {
-        return false;
-    }
-
-    if (info.vertices.empty() && info.delayed_ids.empty()) {
-        DEBUG_PRINTF("literal is good for nothing\n");
-        return false;
-    }
-
-    return true;
-}
-
-static
-rose_group calcLocalGroup(const RoseVertex v, const RoseGraph &g,
-                          const deque<rose_literal_info> &literal_info,
-                          const bool small_literal_count) {
-    rose_group local_group = 0;
-
-    for (auto u : inv_adjacent_vertices_range(v, g)) {
-        /* In small cases, ensure that siblings have the same rose parentage to
-         * allow rose squashing. In larger cases, don't do this as groups are
-         * probably too scarce. */
-        for (auto w : adjacent_vertices_range(u, g)) {
-            if (!small_literal_count || g[v].left == g[w].left) {
-                for (u32 lit_id : g[w].literals) {
-                    local_group |= literal_info[lit_id].group_mask;
-                }
-            } else {
-                DEBUG_PRINTF("not sibling different mother %zu %zu\n",
-                             g[v].idx, g[w].idx);
-            }
-        }
-    }
-
-    return local_group;
-}
-
-/* group constants */
-#define MAX_LIGHT_LITERAL_CASE 200 /* allow rose to affect group decisions below
-                                    * this */
-
-static
-flat_set<RoseVertex> getAssociatedVertices(const RoseBuildImpl &build, u32 id) {
-    flat_set<RoseVertex> out;
-    const auto &info = build.literal_info[id];
-    insert(&out, info.vertices);
-    for (const auto &delayed : info.delayed_ids) {
-        insert(&out, build.literal_info[delayed].vertices);
-    }
-    return out;
-}
-
-static
-u32 next_available_group(u32 counter, u32 min_start_group) {
-    counter++;
-    if (counter == ROSE_GROUPS_MAX) {
-        DEBUG_PRINTF("resetting groups\n");
-        counter = min_start_group;
-    }
-
-    return counter;
-}
-
-// Assigns groups to literals in the general case, when we have more literals
-// than available groups.
-void RoseBuildImpl::assignGroupsToLiterals() {
-    bool small_literal_count = literal_info.size() <= MAX_LIGHT_LITERAL_CASE;
-
-    map<u8, u32> groupCount; /* group index to number of members */
-
-    u32 counter = 0;
-    u32 group_always_on = 0;
-
-    // First pass: handle always on literals.
-    for (const auto &e : literals.right) {
-        u32 id = e.first;
-        const rose_literal_id &lit = e.second;
-        rose_literal_info &info = literal_info[id];
-
-        if (!requires_group_assignment(lit, info)) {
-            continue;
-        }
-
-        // If this literal has a root role, we always have to search for it
-        // anyway, so it goes in the always-on group.
-        /* We could end up squashing it if it is followed by a .* */
-        if (eligibleForAlwaysOnGroup(*this, id)) {
-            info.group_mask = 1ULL << group_always_on;
-            groupCount[group_always_on]++;
-            continue;
-        }
-    }
-
-    u32 group_long_lit;
-    if (groupCount[group_always_on]) {
-        DEBUG_PRINTF("%u always on literals\n", groupCount[group_always_on]);
-        group_long_lit = group_always_on;
-        counter++;
-    } else {
-        group_long_lit = counter;
-        counter++;
-    }
-
-    u32 min_start_group = counter;
-    priority_queue<pair<pair<s32, s32>, u32> > pq;
-
-    // Second pass: the other literals.
-    for (const auto &e : literals.right) {
-        u32 id = e.first;
-        const rose_literal_id &lit = e.second;
-        rose_literal_info &info = literal_info[id];
-
-        if (!requires_group_assignment(lit, info)) {
-            continue;
-        }
-
-        assert(!eligibleForAlwaysOnGroup(*this, id));
-        pq.push(make_pair(make_pair(-(s32)literal_info[id].vertices.size(),
-                                    -(s32)lit.s.length()), id));
-    }
-
-    vector<u32> long_lits;
-    while (!pq.empty()) {
-        u32 id = pq.top().second;
-        pq.pop();
-        UNUSED const rose_literal_id &lit = literals.right.at(id);
-        DEBUG_PRINTF("assigning groups to lit %u (v %zu l %zu)\n", id,
-                     literal_info[id].vertices.size(), lit.s.length());
-
-        u8 group_id = 0;
-        rose_group group = ~0ULL;
-        for (auto v : getAssociatedVertices(*this, id)) {
-            rose_group local_group = calcLocalGroup(v, g, literal_info,
-                                                    small_literal_count);
-            group &= local_group;
-            if (!group) {
-                break;
-            }
-        }
-
-        if (group == ~0ULL) {
-            goto boring;
-        }
-
-        group &= ~((1ULL << min_start_group) - 1); /* ensure the purity of the
-                                                    * always_on groups */
-        if (!group) {
-            goto boring;
-        }
-
-        group_id = ctz64(group);
-
-        /* TODO: fairness */
-        DEBUG_PRINTF("picking sibling group %hhd\n", group_id);
-        literal_info[id].group_mask = 1ULL << group_id;
-        groupCount[group_id]++;
-
-        continue;
-
-    boring:
-        /* long literals will either be stuck in a mega group or spread around
-         * depending on availability */
-        if (superStrong(lit)) {
-            long_lits.push_back(id);
-            continue;
-        }
-
-        // Other literals are assigned to our remaining groups round-robin.
-        group_id = counter;
-
-        DEBUG_PRINTF("picking boring group %hhd\n", group_id);
-        literal_info[id].group_mask = 1ULL << group_id;
-        groupCount[group_id]++;
-        counter = next_available_group(counter, min_start_group);
-    }
-
-    /* spread long literals out amongst unused groups if any, otherwise stick
-     * them in the always on the group */
-
-    if (groupCount[counter]) {
-        DEBUG_PRINTF("sticking long literals in the image of the always on\n");
-        for (u32 lit_id : long_lits) {
-            literal_info[lit_id].group_mask = 1ULL << group_long_lit;
-            groupCount[group_long_lit]++;
-        }
-    } else {
-        u32 min_long_counter = counter;
-        DEBUG_PRINTF("base long lit group = %u\n", min_long_counter);
-        for (u32 lit_id : long_lits) {
-            u8 group_id = counter;
-            literal_info[lit_id].group_mask = 1ULL << group_id;
-            groupCount[group_id]++;
-            counter = next_available_group(counter, min_long_counter);
-        }
-    }
-
-    /* assign delayed literals to the same group as their parent */
-    for (const auto &e : literals.right) {
-        u32 id = e.first;
-        const rose_literal_id &lit = e.second;
-
-        if (!lit.delay) {
-            continue;
-        }
-
-        u32 parent = literal_info[id].undelayed_id;
-        DEBUG_PRINTF("%u is shadow picking up groups from %u\n", id, parent);
-        assert(literal_info[parent].undelayed_id == parent);
-        assert(literal_info[parent].group_mask);
-        literal_info[id].group_mask = literal_info[parent].group_mask;
-        /* don't increment the group count - these don't really exist */
-    }
-
-    DEBUG_PRINTF("populate group to literal mapping\n");
-    for (const u32 id : literals.right | map_keys) {
-        rose_group groups = literal_info[id].group_mask;
-        while (groups) {
-            u32 group_id = findAndClearLSB_64(&groups);
-            group_to_literal[group_id].insert(id);
-        }
-    }
-
-    /* find how many groups we allocated */
-    for (u32 i = 0; i < ROSE_GROUPS_MAX; i++) {
-        if (groupCount[i]) {
-            group_end = MAX(group_end, i + 1);
-        }
-    }
-}
-
 bool RoseBuildImpl::hasDelayedLiteral(RoseVertex v) const {
     for (u32 lit_id : g[v].literals) {
         if (literals.right.at(lit_id).delay) {
index f99ac171f67a03f6fc3fe4286f1856252791e061..127731be57e7b280f6a8fc19fd4ff30eb565ba4f 100644 (file)
 
 #include "rose_build_groups.h"
 
+#include <queue>
 #include <vector>
 
 #include <boost/graph/topological_sort.hpp>
+#include <boost/range/adaptor/map.hpp>
 #include <boost/range/adaptor/reversed.hpp>
 
 using namespace std;
+using boost::adaptors::map_keys;
 
 namespace ue2 {
 
+#define ROSE_LONG_LITERAL_LEN 8
+
+static
+bool superStrong(const rose_literal_id &lit) {
+    if (lit.s.length() < ROSE_LONG_LITERAL_LEN) {
+        return false;
+    }
+
+    const u32 EXPECTED_FDR_BUCKET_LENGTH = 8;
+
+    assert(lit.s.length() >= EXPECTED_FDR_BUCKET_LENGTH);
+    size_t len = lit.s.length();
+    const string &s = lit.s.get_string();
+
+    for (size_t i = 1; i < EXPECTED_FDR_BUCKET_LENGTH; i++) {
+        if (s[len - 1 - i] != s[len - 1]) {
+            return true; /* we have at least some variation in the tail */
+        }
+    }
+    DEBUG_PRINTF("lit '%s' is not superstrong due to tail\n",
+                 escapeString(s).c_str());
+    return false;
+}
+
+static
+bool eligibleForAlwaysOnGroup(const RoseBuildImpl &build, u32 id) {
+    /* returns true if it or any of its delay versions have root role */
+    for (auto v : build.literal_info[id].vertices) {
+        if (build.isRootSuccessor(v)) {
+            NGHolder *h = build.g[v].left.graph.get();
+            if (!h || proper_out_degree(h->startDs, *h)) {
+                return true;
+            }
+        }
+    }
+
+    for (u32 delayed_id : build.literal_info[id].delayed_ids) {
+        for (auto v : build.literal_info[delayed_id].vertices) {
+            if (build.isRootSuccessor(v)) {
+                NGHolder *h = build.g[v].left.graph.get();
+                if (!h || proper_out_degree(h->startDs, *h)) {
+                    return true;
+                }
+            }
+        }
+    }
+
+    return false;
+}
+
+static
+bool requires_group_assignment(const rose_literal_id &lit,
+                               const rose_literal_info &info) {
+    if (lit.delay) { /* we will check the shadow's master */
+        return false;
+    }
+
+    if (lit.table == ROSE_ANCHORED || lit.table == ROSE_EVENT) {
+        return false;
+    }
+
+    // If we already have a group applied, skip.
+    if (info.group_mask) {
+        return false;
+    }
+
+    if (info.vertices.empty() && info.delayed_ids.empty()) {
+        DEBUG_PRINTF("literal is good for nothing\n");
+        return false;
+    }
+
+    return true;
+}
+
+static
+rose_group calcLocalGroup(const RoseVertex v, const RoseGraph &g,
+                          const deque<rose_literal_info> &literal_info,
+                          const bool small_literal_count) {
+    rose_group local_group = 0;
+
+    for (auto u : inv_adjacent_vertices_range(v, g)) {
+        /* In small cases, ensure that siblings have the same rose parentage to
+         * allow rose squashing. In larger cases, don't do this as groups are
+         * probably too scarce. */
+        for (auto w : adjacent_vertices_range(u, g)) {
+            if (!small_literal_count || g[v].left == g[w].left) {
+                for (u32 lit_id : g[w].literals) {
+                    local_group |= literal_info[lit_id].group_mask;
+                }
+            } else {
+                DEBUG_PRINTF("not sibling different mother %zu %zu\n",
+                             g[v].idx, g[w].idx);
+            }
+        }
+    }
+
+    return local_group;
+}
+
+/* group constants */
+#define MAX_LIGHT_LITERAL_CASE 200 /* allow rose to affect group decisions below
+                                    * this */
+
+static
+flat_set<RoseVertex> getAssociatedVertices(const RoseBuildImpl &build, u32 id) {
+    flat_set<RoseVertex> out;
+    const auto &info = build.literal_info[id];
+    insert(&out, info.vertices);
+    for (const auto &delayed : info.delayed_ids) {
+        insert(&out, build.literal_info[delayed].vertices);
+    }
+    return out;
+}
+
+static
+u32 next_available_group(u32 counter, u32 min_start_group) {
+    counter++;
+    if (counter == ROSE_GROUPS_MAX) {
+        DEBUG_PRINTF("resetting groups\n");
+        counter = min_start_group;
+    }
+
+    return counter;
+}
+
+// Assigns groups to literals in the general case, when we have more literals
+// than available groups.
+void RoseBuildImpl::assignGroupsToLiterals() {
+    bool small_literal_count = literal_info.size() <= MAX_LIGHT_LITERAL_CASE;
+
+    map<u8, u32> groupCount; /* group index to number of members */
+
+    u32 counter = 0;
+    u32 group_always_on = 0;
+
+    // First pass: handle always on literals.
+    for (const auto &e : literals.right) {
+        u32 id = e.first;
+        const rose_literal_id &lit = e.second;
+        rose_literal_info &info = literal_info[id];
+
+        if (!requires_group_assignment(lit, info)) {
+            continue;
+        }
+
+        // If this literal has a root role, we always have to search for it
+        // anyway, so it goes in the always-on group.
+        /* We could end up squashing it if it is followed by a .* */
+        if (eligibleForAlwaysOnGroup(*this, id)) {
+            info.group_mask = 1ULL << group_always_on;
+            groupCount[group_always_on]++;
+            continue;
+        }
+    }
+
+    u32 group_long_lit;
+    if (groupCount[group_always_on]) {
+        DEBUG_PRINTF("%u always on literals\n", groupCount[group_always_on]);
+        group_long_lit = group_always_on;
+        counter++;
+    } else {
+        group_long_lit = counter;
+        counter++;
+    }
+
+    u32 min_start_group = counter;
+    priority_queue<pair<pair<s32, s32>, u32> > pq;
+
+    // Second pass: the other literals.
+    for (const auto &e : literals.right) {
+        u32 id = e.first;
+        const rose_literal_id &lit = e.second;
+        rose_literal_info &info = literal_info[id];
+
+        if (!requires_group_assignment(lit, info)) {
+            continue;
+        }
+
+        assert(!eligibleForAlwaysOnGroup(*this, id));
+        pq.push(make_pair(make_pair(-(s32)literal_info[id].vertices.size(),
+                                    -(s32)lit.s.length()), id));
+    }
+    vector<u32> long_lits;
+    while (!pq.empty()) {
+        u32 id = pq.top().second;
+        pq.pop();
+        UNUSED const rose_literal_id &lit = literals.right.at(id);
+        DEBUG_PRINTF("assigning groups to lit %u (v %zu l %zu)\n", id,
+                     literal_info[id].vertices.size(), lit.s.length());
+
+        u8 group_id = 0;
+        rose_group group = ~0ULL;
+        for (auto v : getAssociatedVertices(*this, id)) {
+            rose_group local_group = calcLocalGroup(v, g, literal_info,
+                                                    small_literal_count);
+            group &= local_group;
+            if (!group) {
+                break;
+            }
+        }
+
+        if (group == ~0ULL) {
+            goto boring;
+        }
+
+        group &= ~((1ULL << min_start_group) - 1); /* ensure the purity of the
+                                                    * always_on groups */
+        if (!group) {
+            goto boring;
+        }
+
+        group_id = ctz64(group);
+
+        /* TODO: fairness */
+        DEBUG_PRINTF("picking sibling group %hhd\n", group_id);
+        literal_info[id].group_mask = 1ULL << group_id;
+        groupCount[group_id]++;
+
+        continue;
+
+    boring:
+        /* long literals will either be stuck in a mega group or spread around
+         * depending on availability */
+        if (superStrong(lit)) {
+            long_lits.push_back(id);
+            continue;
+        }
+
+        // Other literals are assigned to our remaining groups round-robin.
+        group_id = counter;
+
+        DEBUG_PRINTF("picking boring group %hhd\n", group_id);
+        literal_info[id].group_mask = 1ULL << group_id;
+        groupCount[group_id]++;
+        counter = next_available_group(counter, min_start_group);
+    }
+
+    /* spread long literals out amongst unused groups if any, otherwise stick
+     * them in the always on the group */
+
+    if (groupCount[counter]) {
+        DEBUG_PRINTF("sticking long literals in the image of the always on\n");
+        for (u32 lit_id : long_lits) {
+            literal_info[lit_id].group_mask = 1ULL << group_long_lit;
+            groupCount[group_long_lit]++;
+        }
+    } else {
+        u32 min_long_counter = counter;
+        DEBUG_PRINTF("base long lit group = %u\n", min_long_counter);
+        for (u32 lit_id : long_lits) {
+            u8 group_id = counter;
+            literal_info[lit_id].group_mask = 1ULL << group_id;
+            groupCount[group_id]++;
+            counter = next_available_group(counter, min_long_counter);
+        }
+    }
+    /* assign delayed literals to the same group as their parent */
+    for (const auto &e : literals.right) {
+        u32 id = e.first;
+        const rose_literal_id &lit = e.second;
+
+        if (!lit.delay) {
+            continue;
+        }
+
+        u32 parent = literal_info[id].undelayed_id;
+        DEBUG_PRINTF("%u is shadow picking up groups from %u\n", id, parent);
+        assert(literal_info[parent].undelayed_id == parent);
+        assert(literal_info[parent].group_mask);
+        literal_info[id].group_mask = literal_info[parent].group_mask;
+        /* don't increment the group count - these don't really exist */
+    }
+
+    DEBUG_PRINTF("populate group to literal mapping\n");
+    for (const u32 id : literals.right | map_keys) {
+        rose_group groups = literal_info[id].group_mask;
+        while (groups) {
+            u32 group_id = findAndClearLSB_64(&groups);
+            group_to_literal[group_id].insert(id);
+        }
+    }
+
+    /* find how many groups we allocated */
+    for (u32 i = 0; i < ROSE_GROUPS_MAX; i++) {
+        if (groupCount[i]) {
+            group_end = MAX(group_end, i + 1);
+        }
+    }
+}
+
 /**
  * \brief Returns a mapping from each graph vertex v to the intersection of the
  * groups switched on by all of the paths leading up to (and including) v from