Rework literal overlap checks for merging engines

author Alex Coyte <a.coyte@intel.com>

Wed, 2 Dec 2015 04:15:02 +0000 (15:15 +1100)

committer Matthew Barr <matthew.barr@intel.com>

Tue, 1 Mar 2016 00:10:24 +0000 (11:10 +1100)
author Alex Coyte <a.coyte@intel.com>
Wed, 2 Dec 2015 04:15:02 +0000 (15:15 +1100)
committer Matthew Barr <matthew.barr@intel.com>
Tue, 1 Mar 2016 00:10:24 +0000 (11:10 +1100)
diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp

index 4c55a41f42880f1a75e94aec3cebc20953a89e05..e42e0aca6b377a966159839268e14264aaac0032 100644 (file)
--- a/src/rose/rose_build_merge.cpp
+++ b/src/rose/rose_build_merge.cpp
@@ -93,6 +93,7 @@ static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64;
  static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32;
  static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10;
  static const size_t MERGE_GROUP_SIZE_MAX = 200;
+static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000;
  
  /** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */
  static const size_t DFA_CHUNK_SIZE_MAX = 200;
@@ -799,47 +800,69 @@ static void chunkBouquets(const Bouquet<EngineRef> &in,
      }
  }
  
+static
+bool stringsCanFinishAtSameSpot(const ue2_literal &u,
+                                ue2_literal::const_iterator v_b,
+                                ue2_literal::const_iterator v_e) {
+    ue2_literal::const_iterator u_e = u.end();
+    ue2_literal::const_iterator u_b = u.begin();
+
+    while (u_e != u_b && v_e != v_b) {
+        --u_e;
+        --v_e;
+
+        if (!overlaps(*u_e, *v_e)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
  /**
- * Prefix analysis: For lit1 with delay1 and lit2 with delay2, let L be the
- * length of the largest suffix of lit1 that is a prefix of lit2. A merge is
- * bad if L - delay1 > len(lit2) - delay2.
+ * Check that if after u has been seen, that it is impossible for the arrival of
+ * v to require the inspection of an engine earlier than u did.
+ *
+ * Let delta be the earliest that v can be seen after u (may be zero)
   *
- * OR if we would have to check 2 literals of differing lags at the same
- * point.
+ * ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta)
+ * ==> - ulag <= delta - vlag
+ * ==> vlag - ulag <= delta
   */
  static
  bool checkPrefix(const rose_literal_id &ul, const u32 ulag,
                   const rose_literal_id &vl, const u32 vlag) {
-    DEBUG_PRINTF("%s %s\n", escapeString(ul.s).c_str(),
-                 escapeString(vl.s).c_str());
-    if (ulag != vlag && (vl.delay || ul.delay || isSuffix(ul.s, vl.s))) {
-        /* rose literals should not be delayed anyway */
+    DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag,
+                 escapeString(vl.s).c_str(), vlag);
+
+    if (vl.delay || ul.delay) {
+        /* engine related literals should not be delayed anyway */
          return false;
      }
  
-    // Note that maxOverlap also picks up infixes.
-    size_t overlap = maxOverlap(ul, vl);
-    if (overlap < ulag) {
-        return true; /* avoiding underflow */
+    if (ulag >= vlag) {
+        assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag);
+        return true;
      }
-    return overlap - ulag <= vl.elength() - vlag;
-}
-
-bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
-                           RoseVertex v) {
-    assert(u != v);
  
-    const auto &ulits = tbi.g[u].literals;
-    const auto &vlits = tbi.g[v].literals;
+    size_t min_allowed_delta = vlag - ulag;
+    DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta);
  
-    // We cannot merge roses that prefix literals in different tables.
-    if (tbi.literals.right.at(*ulits.begin()).table !=
-            tbi.literals.right.at(*vlits.begin()).table) {
-        DEBUG_PRINTF("literals in different tables\n");
-        return false;
+    for (size_t i = 0; i < min_allowed_delta; i++) {
+        if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
+            DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
+            return false;
+        }
      }
  
-    const left_id u_left(tbi.g[u].left), v_left(tbi.g[v].left);
+    DEBUG_PRINTF("OK\n");
+    return true;
+}
+
+static
+bool hasSameEngineType(const RoseVertexProps &u_prop,
+                       const RoseVertexProps &v_prop) {
+    const left_id u_left(u_prop.left), v_left(v_prop.left);
  
      if (u_left.haig() || v_left.haig()) {
          if (u_left.graph() != v_left.graph()) {
@@ -859,11 +882,68 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
          }
      }
  
+    return true;
+}
+
+static
+bool compatibleLiteralsForMerge(
+                     const vector<pair<const rose_literal_id *, u32>> &ulits,
+                     const vector<pair<const rose_literal_id *, u32>> &vlits) {
+    assert(!ulits.empty());
+    assert(!vlits.empty());
+
+    // We cannot merge engines that prefix literals in different tables.
+    if (ulits[0].first->table != vlits[0].first->table) {
+        DEBUG_PRINTF("literals in different tables\n");
+        return false;
+    }
+
+    /* An engine requires that all accesses to it are ordered by offsets. (ie,
+       we can not check an engine's state at offset Y, if we have already
+       checked its status at offset X and X > Y). If we can not establish that
+       the literals used for triggering will statisfy this property, then it is
+       not safe to merge the engine. */
+    for (const auto &ue : ulits) {
+        const rose_literal_id &ul = *ue.first;
+        u32 ulag = ue.second;
+
+        if (ul.delay) {
+            return false; // We don't handle delayed cases yet.
+        }
+
+        for (const auto &ve : vlits) {
+            const rose_literal_id &vl = *ve.first;
+            u32 vlag = ve.second;
+
+            if (vl.delay) {
+                return false; // We don't handle delayed cases yet.
+            }
+
+            if (!checkPrefix(ul, ulag, vl, vlag)
+                || !checkPrefix(vl, vlag, ul, ulag)) {
+                DEBUG_PRINTF("prefix check failed\n");
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
+                           RoseVertex v) {
+    assert(u != v);
+
+    if (!hasSameEngineType(tbi.g[u], tbi.g[v])) {
+        return false;
+    }
+
      // UE-1675: in block mode, we want to be a little more selective -- only
      // merge prefix roses when the literal sets are the same.
      if (!tbi.cc.streaming && tbi.isRootSuccessor(u)) {
          assert(tbi.isRootSuccessor(v));
-        if (ulits != vlits) {
+
+        if (tbi.g[u].literals != tbi.g[v].literals) {
              DEBUG_PRINTF("literals aren't identical (block mode prefix)\n");
              return false;
          }
@@ -883,88 +963,104 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
          }
      }
  
-    // We accept any pair of literal sets A and B where no literal in A
-    // contains a literal in B and no literal in B contains a literal in A.
-
-    const u32 ulag = tbi.g[u].left.lag;
-    const u32 vlag = tbi.g[v].left.lag;
-
-    for (const u32 &ulit : ulits) {
-        const rose_literal_id &ul = tbi.literals.right.at(ulit);
-
-        if (ul.delay) {
-            return false; // We don't handle delayed cases here.
-        }
-
-        for (const u32 &vlit : vlits) {
-            const rose_literal_id &vl = tbi.literals.right.at(vlit);
+    u32 ulag = tbi.g[u].left.lag;
+    vector<pair<const rose_literal_id *, u32>> ulits;
+    ulits.reserve(tbi.g[u].literals.size());
+    for (u32 id : tbi.g[u].literals) {
+        ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag));
+    }
  
-            if (vl.delay) {
-                return false; // We don't handle delayed cases here.
-            }
+    u32 vlag = tbi.g[v].left.lag;
+    vector<pair<const rose_literal_id *, u32>> vlits;
+    vlits.reserve(tbi.g[v].literals.size());
+    for (u32 id : tbi.g[v].literals) {
+        vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag));
+    }
  
-            if (!checkPrefix(ul, ulag, vl, vlag) ||
-                !checkPrefix(vl, vlag, ul, ulag)) {
-                DEBUG_PRINTF("prefix check failed\n");
-                return false;
-            }
-        }
+    if (!compatibleLiteralsForMerge(ulits, vlits)) {
+        return false;
      }
  
-    DEBUG_PRINTF("roses on %zu and %zu are mergeable\n",
-                 tbi.g[u].idx, tbi.g[v].idx);
+    DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].idx,
+                 tbi.g[v].idx);
      return true;
  }
  
+/* We cannot merge an engine, if a trigger literal and a post literal overlap
+ * in such a way that engine status needs to be check at a point before the
+ * engine's current location.
+ *
+ * i.e., for a trigger literal u and a pos literal v,
+ * where delta is the earliest v can appear after t,
+ * we require that v_loc - v_lag >= u_loc
+ * ==> u_loc + delta - v_lag >= u_loc
+ * ==> delta >= v_lag
+ *
+ */
  static
-bool mergeableDelays(const RoseBuildImpl &tbi, const flat_set<u32> &ulits,
-                     const flat_set<u32> &vlits, u32 vlag) {
-    for (const u32 &ulit : ulits) {
-        const rose_literal_id &ul = tbi.literals.right.at(ulit);
-        assert(!ul.delay); // this should never have got this far?
-        for (const u32 vlit : vlits) {
-            const rose_literal_id &vl = tbi.literals.right.at(vlit);
-            assert(!vl.delay); // this should never have got this far?
-
-            DEBUG_PRINTF("%s %s (lag %u, overlap %zu)\n",
-                         escapeString(ul.s).c_str(),
-                         escapeString(vl.s).c_str(), vlag,
-                         maxOverlap(ul, vl));
-            size_t l = vl.elength() - maxOverlap(ul, vl);
-            if (vlag > l) {
-                DEBUG_PRINTF("failed lag check!\n");
-                return false;
-            }
+bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl,
+                    u32 vlag) {
+    DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(),
+                 escapeString(vl.s).c_str(), vlag);
+
+    for (size_t i = 0; i < vlag; i++) {
+        if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
+            DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
+            return false;
          }
      }
+
+    DEBUG_PRINTF("OK\n");
      return true;
  }
  
-static
+static never_inline
  bool checkPredDelays(const RoseBuildImpl &tbi, const deque<RoseVertex> &v1,
                       const deque<RoseVertex> &v2) {
-    set<RoseVertex> preds;
+    flat_set<RoseVertex> preds;
      for (auto v : v1) {
          insert(&preds, inv_adjacent_vertices(v, tbi.g));
      }
  
+    flat_set<u32> pred_lits;
+
+    /* No need to examine delays of a common pred - as it must already have
+     * survived the delay checks.
+     *
+     * This is important when the pred is in the anchored table as
+     * the literal is no longer available. */
+    flat_set<RoseVertex> known_good_preds;
+    for (auto v : v2) {
+        insert(&known_good_preds, inv_adjacent_vertices(v, tbi.g));
+    }
+
      for (auto u : preds) {
-        const auto &pred_lits = tbi.g[u].literals;
-        for (auto v : v2) {
-            u32 vlag = tbi.g[v].left.lag;
-            DEBUG_PRINTF("consider (%zu, %zu) lag=%u\n", tbi.g[u].idx,
-                         tbi.g[v].idx, vlag);
-            if (edge_by_target(u, v, tbi.g).second) {
-                /* no need to examine delays as it is a common pred - so checks
-                 * must already have survived the delay checks.
-                 * This is important when the pred is in the anchored table as
-                 * the literal is no longer available. */
-                DEBUG_PRINTF("ok, also %zu is also a pred of %zu\n",
-                             tbi.g[u].idx, tbi.g[v].idx);
-                continue;
-            }
-            if (!mergeableDelays(tbi, pred_lits, tbi.g[v].literals, vlag)) {
-                return false;
+        if (!contains(known_good_preds, &u)) {
+            insert(&pred_lits, tbi.g[u].literals);
+        }
+    }
+
+    vector<const rose_literal_id *> pred_rose_lits;
+    pred_rose_lits.reserve(pred_lits.size());
+    for (const auto &p : pred_lits) {
+        pred_rose_lits.push_back(&tbi.literals.right.at(p));
+    }
+
+    for (auto v : v2) {
+        u32 vlag = tbi.g[v].left.lag;
+        if (!vlag) {
+            continue;
+        }
+
+        for (const u32 vlit : tbi.g[v].literals) {
+            const rose_literal_id &vl = tbi.literals.right.at(vlit);
+            assert(!vl.delay); // this should never have got this far?
+            for (const auto &ul : pred_rose_lits) {
+                assert(!ul->delay); // this should never have got this far?
+
+                if (!checkPredDelay(*ul, vl, vlag)) {
+                    return false;
+                }
              }
          }
      }
@@ -976,17 +1072,79 @@ static
  bool mergeableRoseVertices(const RoseBuildImpl &tbi,
                             const deque<RoseVertex> &verts1,
                             const deque<RoseVertex> &verts2) {
-    for (auto v1 : verts1) {
-        for (auto v2 : verts2) {
-            if (!mergeableRoseVertices(tbi, v1, v2)) {
+    assert(!verts1.empty());
+    assert(!verts2.empty());
+
+    RoseVertex u_front = verts1.front();
+    RoseVertex v_front = verts2.front();
+
+    /* all vertices must have the same engine type: assume all verts in each
+     * group are already of the same type */
+    if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) {
+        return false;
+    }
+
+    bool is_prefix = tbi.isRootSuccessor(u_front);
+
+    /* We cannot merge prefixes/vertices if they are successors of different
+     * root vertices: similarly, assume the grouped vertices are compatible */
+    if (is_prefix) {
+        assert(tbi.isRootSuccessor(v_front));
+        set<RoseVertex> u_preds;
+        set<RoseVertex> v_preds;
+        insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g));
+        insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g));
+
+        if (u_preds != v_preds) {
+            return false;
+        }
+    }
+
+    vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */
+    for (auto a : verts1) {
+        // UE-1675: in block mode, we want to be a little more selective --
+        // only merge prefix roses when the literal sets are the same.
+        if (!tbi.cc.streaming && is_prefix) {
+            assert(tbi.isRootSuccessor(a));
+
+            if (tbi.g[u_front].literals != tbi.g[a].literals) {
+                DEBUG_PRINTF("literals aren't identical (block mode prefix)\n");
                  return false;
              }
          }
+
+        u32 ulag = tbi.g[a].left.lag;
+        for (u32 id : tbi.g[a].literals) {
+            ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag));
+        }
+    }
+
+    vector<pair<const rose_literal_id *, u32>> vlits;
+    for (auto a : verts2) {
+        // UE-1675: in block mode, we want to be a little more selective --
+        // only merge prefix roses when the literal sets are the same.
+        if (!tbi.cc.streaming && is_prefix) {
+            assert(tbi.isRootSuccessor(a));
+
+            if (tbi.g[u_front].literals != tbi.g[a].literals) {
+                DEBUG_PRINTF("literals aren't identical (block mode prefix)\n");
+                return false;
+            }
+        }
+
+        u32 vlag = tbi.g[a].left.lag;
+        for (u32 id : tbi.g[a].literals) {
+            vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag));
+        }
+    }
+
+    if (!compatibleLiteralsForMerge(ulits, vlits)) {
+        return false;
      }
  
      // Check preds are compatible as well.
-    if (!checkPredDelays(tbi, verts1, verts2) ||
-        !checkPredDelays(tbi, verts2, verts1)) {
+    if (!checkPredDelays(tbi, verts1, verts2)
+        || !checkPredDelays(tbi, verts2, verts1)) {
          return false;
      }
  
@@ -1741,33 +1899,31 @@ void mergeNfaLeftfixes(RoseBuildImpl &tbi, RoseBouquet &roses) {
  }
  
  static
-void mergeCastleRoses(RoseBuildImpl &tbi, RoseBouquet &roses) {
+void mergeCastleChunk(RoseBuildImpl &tbi, RoseBouquet &cands) {
+    /* caller must have already ensured that candidates have the same reach */
      RoseGraph &g = tbi.g;
-    DEBUG_PRINTF("%zu castle rose merge candidates\n", roses.size());
+    DEBUG_PRINTF("%zu castle rose merge candidates\n", cands.size());
  
      deque<left_id> merged;
  
-    for (auto it = roses.begin(); it != roses.end(); ++it) {
+    for (auto it = cands.begin(); it != cands.end(); ++it) {
          left_id r1 = *it;
          CastleProto &castle1 = *r1.castle();
-        const deque<RoseVertex> &verts1 = roses.vertices(r1);
+        const deque<RoseVertex> &verts1 = cands.vertices(r1);
  
          merged.clear();
  
-        for (auto jt = next(it); jt != roses.end(); ++jt) {
+        for (auto jt = next(it); jt != cands.end(); ++jt) {
              left_id r2 = *jt;
              CastleProto &castle2 = *r2.castle();
-            const deque<RoseVertex> &verts2 = roses.vertices(r2);
+            const deque<RoseVertex> &verts2 = cands.vertices(r2);
  
              if (castle1.repeats.size() == castle1.max_occupancy) {
                  DEBUG_PRINTF("castle1 has hit max occupancy\n");
                  break; // next castle1
              }
  
-            if (castle1.reach() != castle2.reach()) {
-                DEBUG_PRINTF("different reach\n");
-                continue; // next castle2
-            }
+            assert(castle1.reach() == castle2.reach());
  
              if (!mergeableRoseVertices(tbi, verts1, verts2)) {
                  DEBUG_PRINTF("not mergeable\n");
@@ -1793,12 +1949,12 @@ void mergeCastleRoses(RoseBuildImpl &tbi, RoseBouquet &roses) {
                  }
              }
  
-            roses.insert(r1, verts2);
+            cands.insert(r1, verts2);
              merged.push_back(r2);
          }
  
          DEBUG_PRINTF("%zu roses merged\n", merged.size());
-        roses.erase_all(merged.begin(), merged.end());
+        cands.erase_all(merged.begin(), merged.end());
      }
  }
  
@@ -1924,13 +2080,13 @@ void mergeCastleLeftfixes(RoseBuildImpl &tbi) {
      for (auto &m : by_reach) {
          DEBUG_PRINTF("%zu castles for reach: %s\n", m.second.size(),
                       describeClass(m.first).c_str());
-        RoseBouquet &roses = m.second;
-        deque<RoseBouquet> rose_groups;
-        chunkBouquets(roses, rose_groups, MERGE_GROUP_SIZE_MAX);
-        roses.clear();
+        RoseBouquet &candidates = m.second;
+        deque<RoseBouquet> cand_groups;
+        chunkBouquets(candidates, cand_groups, MERGE_CASTLE_GROUP_SIZE_MAX);
+        candidates.clear();
  
-        for (auto &group : rose_groups) {
-            mergeCastleRoses(tbi, group);
+        for (auto &group : cand_groups) {
+            mergeCastleChunk(tbi, group);
          }
      }
  }
author	Alex Coyte <a.coyte@intel.com>
	Wed, 2 Dec 2015 04:15:02 +0000 (15:15 +1100)
committer	Matthew Barr <matthew.barr@intel.com>
	Tue, 1 Mar 2016 00:10:24 +0000 (11:10 +1100)