ng_equivalence: reduce compile time on large cases

author Justin Viiret <justin.viiret@intel.com>

Thu, 2 Jun 2016 06:25:36 +0000 (16:25 +1000)

committer Matthew Barr <matthew.barr@intel.com>

Fri, 8 Jul 2016 00:47:33 +0000 (10:47 +1000)
author Justin Viiret <justin.viiret@intel.com>
Thu, 2 Jun 2016 06:25:36 +0000 (16:25 +1000)
committer Matthew Barr <matthew.barr@intel.com>
Fri, 8 Jul 2016 00:47:33 +0000 (10:47 +1000)
diff --git a/src/nfagraph/ng_equivalence.cpp b/src/nfagraph/ng_equivalence.cpp

index b8e5a8d6dcbc93ff95b9615b74af2beaa9e25ae4..d0ab7c4af1a97675ff5ad4e5d09f22bf3758845c 100644 (file)
--- a/src/nfagraph/ng_equivalence.cpp
+++ b/src/nfagraph/ng_equivalence.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -38,7 +38,7 @@
  #include "ng_util.h"
  #include "util/compile_context.h"
  #include "util/graph_range.h"
-#include "util/order_check.h"
+#include "util/ue2_containers.h"
  
  #include <algorithm>
  #include <set>
@@ -53,9 +53,8 @@ using boost::ptr_vector;
  namespace ue2 {
  
  enum EquivalenceType {
-    LEFT_EQUIVALENCE = 0,
+    LEFT_EQUIVALENCE,
      RIGHT_EQUIVALENCE,
-    MAX_EQUIVALENCE
  };
  
  namespace {
@@ -91,7 +90,6 @@ public:
  }
  
  typedef ue2::unordered_set<VertexInfo *, VertexInfoPtrCmp> VertexInfoSet;
-typedef ue2::unordered_map<unsigned, VertexInfoSet> ClassMap;
  
  // compare two vertex info pointers on their vertex index
  bool VertexInfoPtrCmp::operator()(const VertexInfo *a,
@@ -118,27 +116,34 @@ public:
          DepthMinMax d1;
          DepthMinMax d2;
      };
-    ClassInfo(const NGHolder &g, VertexInfo &vi, ClassDepth &d_in,
+    ClassInfo(const NGHolder &g, const VertexInfo &vi, const ClassDepth &d_in,
                EquivalenceType eq)
-        : vertex_flags(vi.vertex_flags), edge_top(vi.edge_top), cr(vi.cr),
-          depth(d_in) {
-
-        // hackety-hack!
-        node_type = g[vi.v].index;
-        if (node_type > N_SPECIALS) {
-            // we treat all regular vertices the same
-            node_type = N_SPECIALS;
-        }
-
-        // get all the adjacent vertices' CharReach
-        adjacent_cr = eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr;
-
-        if (eq == RIGHT_EQUIVALENCE) {
-            rs = g[vi.v].reports;
-        }
+        : /* reports only matter for right-equiv */
+          rs(eq == RIGHT_EQUIVALENCE ? g[vi.v].reports : flat_set<ReportID>()),
+          vertex_flags(vi.vertex_flags), edge_top(vi.edge_top), cr(vi.cr),
+          adjacent_cr(eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr),
+          /* treat non-special vertices the same */
+          node_type(min(g[vi.v].index, u32{N_SPECIALS})), depth(d_in) {}
+
+    bool operator==(const ClassInfo &b) const {
+        return node_type == b.node_type && depth.d1 == b.depth.d1 &&
+               depth.d2 == b.depth.d2 && cr == b.cr &&
+               adjacent_cr == b.adjacent_cr && edge_top == b.edge_top &&
+               vertex_flags == b.vertex_flags && rs == b.rs;
      }
  
-    bool operator<(const ClassInfo &b) const;
+    friend size_t hash_value(const ClassInfo &c) {
+        size_t val = 0;
+        boost::hash_combine(val, boost::hash_range(begin(c.rs), end(c.rs)));
+        boost::hash_combine(val, c.vertex_flags);
+        boost::hash_combine(val, c.edge_top);
+        boost::hash_combine(val, c.cr);
+        boost::hash_combine(val, c.adjacent_cr);
+        boost::hash_combine(val, c.node_type);
+        boost::hash_combine(val, c.depth.d1);
+        boost::hash_combine(val, c.depth.d2);
+        return val;
+    }
  
  private:
      flat_set<ReportID> rs; /* for right equiv only */
@@ -200,26 +205,12 @@ public:
          return q.capacity();
      }
  private:
-    set<unsigned> ids; //!< stores id's, for uniqueness
+    unordered_set<unsigned> ids; //!< stores id's, for uniqueness
      vector<unsigned> q; //!< vector of id's that we use as FILO.
  };
  
  }
  
-bool ClassInfo::operator<(const ClassInfo &b) const {
-    const ClassInfo &a = *this;
-
-    ORDER_CHECK(node_type);
-    ORDER_CHECK(depth.d1);
-    ORDER_CHECK(depth.d2);
-    ORDER_CHECK(cr);
-    ORDER_CHECK(adjacent_cr);
-    ORDER_CHECK(edge_top);
-    ORDER_CHECK(vertex_flags);
-    ORDER_CHECK(rs);
-    return false;
-}
-
  static
  bool outIsIrreducible(NFAVertex &v, const NGHolder &g) {
      unsigned nonSpecialVertices = 0;
@@ -286,9 +277,14 @@ bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) {
  
  // populate VertexInfo table
  static
-void getVertexInfos(const NGHolder &g, ptr_vector<VertexInfo> &infos) {
+ptr_vector<VertexInfo> getVertexInfos(const NGHolder &g) {
+    const size_t num_verts = num_vertices(g);
+
+    ptr_vector<VertexInfo> infos;
+    infos.reserve(num_verts * 2);
+
      vector<VertexInfo *> vertex_map; // indexed by vertex_index property
-    vertex_map.resize(num_vertices(g));
+    vertex_map.resize(num_verts);
  
      for (auto v : vertices_range(g)) {
          VertexInfo *vi = new VertexInfo(v, g);
@@ -323,14 +319,24 @@ void getVertexInfos(const NGHolder &g, ptr_vector<VertexInfo> &infos) {
          }
          assert(!hasEdgeAsserts(cur_vi.v, g));
      }
+
+    return infos;
  }
  
  // store equivalence class in VertexInfo for each vertex
  static
-void partitionGraph(ptr_vector<VertexInfo> &infos, ClassMap &classes,
-                    WorkQueue &work_queue, const NGHolder &g,
-                    EquivalenceType eq) {
-    map<ClassInfo, unsigned> classinfomap;
+vector<VertexInfoSet> partitionGraph(ptr_vector<VertexInfo> &infos,
+                                     WorkQueue &work_queue, const NGHolder &g,
+                                     EquivalenceType eq) {
+    const size_t num_verts = infos.size();
+
+    vector<VertexInfoSet> classes;
+    unordered_map<ClassInfo, unsigned> classinfomap;
+
+    // assume we will have lots of classes, so we don't waste time resizing
+    // these structures.
+    classes.reserve(num_verts);
+    classinfomap.reserve(num_verts);
  
      // get distances from start (or accept) for all vertices
      // only one of them is used at a time, never both
@@ -356,28 +362,25 @@ void partitionGraph(ptr_vector<VertexInfo> &infos, ClassMap &classes,
  
          auto ii = classinfomap.find(ci);
          if (ii == classinfomap.end()) {
-            unsigned new_class = classinfomap.size();
-            vi.equivalence_class = new_class;
-
-            classinfomap[ci] = new_class;
-
-            // insert this vertex into the class map
-            VertexInfoSet &vertices = classes[new_class];
-            vertices.insert(&vi);
+            // vertex is in a new equivalence class by itself.
+            unsigned eq_class = classes.size();
+            vi.equivalence_class = eq_class;
+            classes.push_back({&vi});
+            classinfomap.emplace(move(ci), eq_class);
          } else {
+            // vertex is added to an existing class.
              unsigned eq_class = ii->second;
              vi.equivalence_class = eq_class;
-
-            // insert this vertex into the class map
-            VertexInfoSet &vertices = classes[eq_class];
-            vertices.insert(&vi);
+            classes.at(eq_class).insert(&vi);
  
              // we now know that this particular class has more than one
              // vertex, so we add it to the work queue
              work_queue.push(eq_class);
          }
      }
-    DEBUG_PRINTF("partitioned, %zu equivalence classes\n", classinfomap.size());
+
+    DEBUG_PRINTF("partitioned, %zu equivalence classes\n", classes.size());
+    return classes;
  }
  
  // generalized equivalence processing (left and right)
@@ -388,7 +391,7 @@ void partitionGraph(ptr_vector<VertexInfo> &infos, ClassMap &classes,
  // equivalence, predecessors for right equivalence) classes get revalidated in
  // case of a split.
  static
-void equivalence(ClassMap &classmap, WorkQueue &work_queue,
+void equivalence(vector<VertexInfoSet> &classes, WorkQueue &work_queue,
                   EquivalenceType eq_type) {
      // now, go through the work queue until it's empty
      map<flat_set<unsigned>, VertexInfoSet> tentative_classmap;
@@ -397,12 +400,11 @@ void equivalence(ClassMap &classmap, WorkQueue &work_queue,
      WorkQueue reval_queue(work_queue.capacity());
  
      while (!work_queue.empty()) {
-
          // dequeue our class from the work queue
          unsigned cur_class = work_queue.pop();
  
          // get all vertices in current equivalence class
-        VertexInfoSet &cur_class_vertices = classmap[cur_class];
+        VertexInfoSet &cur_class_vertices = classes.at(cur_class);
  
          if (cur_class_vertices.size() < 2) {
              continue;
@@ -445,16 +447,20 @@ void equivalence(ClassMap &classmap, WorkQueue &work_queue,
  
              // start from the second class
              for (++tmi; tmi != tentative_classmap.end(); ++tmi) {
-                unsigned new_class = classmap.size();
                  const VertexInfoSet &vertices_to_split = tmi->second;
-                VertexInfoSet &new_class_vertices = classmap[new_class];
+                unsigned new_class = classes.size();
+                VertexInfoSet new_class_vertices;
  
                  for (VertexInfo *vi : vertices_to_split) {
                      vi->equivalence_class = new_class;
-                    cur_class_vertices.erase(vi);
+                    // note: we cannot use the cur_class_vertices ref, as it is
+                    // invalidated by modifications to the classes vector.
+                    classes[cur_class].erase(vi);
                      new_class_vertices.insert(vi);
                  }
-                if (tmi->first.find(cur_class) != tmi->first.end()) {
+                classes.push_back(move(new_class_vertices));
+
+                if (contains(tmi->first, cur_class)) {
                      reval_queue.push(new_class);
                  }
              }
@@ -619,16 +625,15 @@ void mergeClass(ptr_vector<VertexInfo> &infos, NGHolder &g, unsigned eq_class,
  // vertex (or, in rare cases for left equiv, a pair if we cannot satisfy the
  // report behaviour with a single vertex).
  static
-bool mergeEquivalentClasses(ClassMap &classmap, ptr_vector<VertexInfo> &infos,
-                            NGHolder &g) {
+bool mergeEquivalentClasses(vector<VertexInfoSet> &classes,
+                            ptr_vector<VertexInfo> &infos, NGHolder &g) {
      bool merged = false;
      set<NFAVertex> toRemove;
  
      // go through all classes and merge classes with more than one vertex
-    for (auto &cm : classmap) {
+    for (unsigned eq_class = 0; eq_class < classes.size(); eq_class++) {
          // get all vertices in current equivalence class
-        unsigned eq_class = cm.first;
-        VertexInfoSet &cur_class_vertices = cm.second;
+        VertexInfoSet &cur_class_vertices = classes[eq_class];
  
          // we don't care for single-vertex classes
          if (cur_class_vertices.size() > 1) {
@@ -644,6 +649,26 @@ bool mergeEquivalentClasses(ClassMap &classmap, ptr_vector<VertexInfo> &infos,
      return merged;
  }
  
+static
+bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) {
+    // create a list of equivalence classes to check
+    WorkQueue work_queue(num_vertices(g));
+
+    // get information on every vertex in the graph
+    // new vertices are allocated here, and stored in infos
+    ptr_vector<VertexInfo> infos = getVertexInfos(g);
+
+    // partition the graph
+    auto classes = partitionGraph(infos, work_queue, g, eq_type);
+
+    // do equivalence processing
+    equivalence(classes, work_queue, eq_type);
+
+    // replace equivalent classes with single vertices
+    // new vertices are (possibly) allocated here, and stored in infos
+    return mergeEquivalentClasses(classes, infos, g);
+}
+
  bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) {
      if (!cc.grey.equivalenceEnable) {
          DEBUG_PRINTF("equivalence processing disabled in grey box\n");
@@ -661,34 +686,8 @@ bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) {
  
      // take note if we have merged any vertices
      bool merge = false;
-
-    for (int eqi = 0; eqi < MAX_EQUIVALENCE; ++eqi) {
-        // map of all information pertaining a vertex
-        ptr_vector<VertexInfo> infos;
-        ClassMap classes;
-
-        // create a list of equivalence classes to check
-        WorkQueue work_queue(num_vertices(g));
-        EquivalenceType eq_type = (EquivalenceType) eqi;
-
-        // resize the vector, make room for twice the vertices we have
-        infos.reserve(num_vertices(g) * 2);
-
-        // get information on every vertex in the graph
-        // new vertices are allocated here, and stored in infos
-        getVertexInfos(g, infos);
-
-        // partition the graph
-        partitionGraph(infos, classes, work_queue, g, eq_type);
-
-        // do equivalence processing
-        equivalence(classes, work_queue, eq_type);
-
-        // replace equivalent classes with single vertices
-        // new vertices are (possibly) allocated here, and stored in infos
-        merge |= mergeEquivalentClasses(classes, infos, g);
-    }
-
+    merge |= reduceGraphEquivalences(g, LEFT_EQUIVALENCE);
+    merge |= reduceGraphEquivalences(g, RIGHT_EQUIVALENCE);
      return merge;
  }
author	Justin Viiret <justin.viiret@intel.com>
	Thu, 2 Jun 2016 06:25:36 +0000 (16:25 +1000)
committer	Matthew Barr <matthew.barr@intel.com>
	Fri, 8 Jul 2016 00:47:33 +0000 (10:47 +1000)