]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
rose: use program offsets directly in lit tables
authorJustin Viiret <justin.viiret@intel.com>
Sun, 29 Jan 2017 22:14:03 +0000 (09:14 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Wed, 26 Apr 2017 04:46:48 +0000 (14:46 +1000)
src/rose/match.c
src/rose/rose_build_anchored.cpp
src/rose/rose_build_anchored.h
src/rose/rose_build_bytecode.cpp
src/rose/rose_build_impl.h
src/rose/rose_build_matchers.cpp
src/rose/rose_build_matchers.h

index 9a702804e4fad4519ef097a2186a8f09381a41fd..c7c73d2577c4da9ba2d2d27f1ebb35bbed8933fd 100644 (file)
@@ -85,19 +85,13 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id,
 
     DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups);
 
-    const u32 *delayRebuildPrograms =
-        getByOffset(t, t->litDelayRebuildProgramOffset);
-    assert(id < t->literalCount);
-    const u32 program = delayRebuildPrograms[id];
-
-    if (program) {
-        const u64a som = 0;
-        const size_t match_len = end - start + 1;
-        const u8 flags = 0;
-        UNUSED hwlmcb_rv_t rv = roseRunProgram(t, scratch, program, som,
-                                               real_end, match_len, flags);
-        assert(rv != HWLM_TERMINATE_MATCHING);
-    }
+    assert(id < t->size); // id is a program offset
+    const u64a som = 0;
+    const size_t match_len = end - start + 1;
+    const u8 flags = 0;
+    UNUSED hwlmcb_rv_t rv =
+        roseRunProgram(t, scratch, id, som, real_end, match_len, flags);
+    assert(rv != HWLM_TERMINATE_MATCHING);
 
     /* we are just repopulating the delay queue, groups should be
      * already set from the original scan. */
@@ -245,12 +239,10 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t,
                              struct hs_scratch *scratch, u64a end,
                              size_t match_len, u32 id) {
     DEBUG_PRINTF("id=%u\n", id);
-    const u32 *programs = getByOffset(t, t->litProgramOffset);
-    assert(id < t->literalCount);
+    assert(id < t->size); // id is an offset into bytecode
     const u64a som = 0;
     const u8 flags = 0;
-    return roseRunProgram_i(t, scratch, programs[id], som, end, match_len,
-                            flags);
+    return roseRunProgram_i(t, scratch, id, som, end, match_len, flags);
 }
 
 static rose_inline
index ea565eaa46ae73e53b800cc249ef4243695c739f..77eef142a5d51148ba21fd16733dc3c2dce99c69 100644 (file)
@@ -208,8 +208,8 @@ void remapAnchoredReports(RoseBuildImpl &build) {
  * raw_dfa with program offsets.
  */
 static
-void remapIdsToPrograms(raw_dfa &rdfa, const vector<u32> &litPrograms,
-                        const map<u32, u32> &final_to_frag_map) {
+void remapIdsToPrograms(raw_dfa &rdfa,
+                        const map<u32, LitFragment> &final_to_frag_map) {
     for (dstate &ds : rdfa.states) {
         assert(ds.reports_eod.empty()); // Not used in anchored matcher.
         if (ds.reports.empty()) {
@@ -219,9 +219,8 @@ void remapIdsToPrograms(raw_dfa &rdfa, const vector<u32> &litPrograms,
         flat_set<ReportID> new_reports;
         for (auto final_id : ds.reports) {
             assert(contains(final_to_frag_map, final_id));
-            auto frag_id = final_to_frag_map.at(final_id);
-            assert(frag_id < litPrograms.size());
-            new_reports.insert(litPrograms.at(frag_id));
+            auto &frag = final_to_frag_map.at(final_id);
+            new_reports.insert(frag.lit_program_offset);
         }
         ds.reports = move(new_reports);
     }
@@ -849,8 +848,8 @@ vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build) {
 
 aligned_unique_ptr<anchored_matcher_info>
 buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
-                     const vector<u32> &litPrograms,
-                     const map<u32, u32> &final_to_frag_map, size_t *asize) {
+                     const map<u32, LitFragment> &final_to_frag_map,
+                     size_t *asize) {
     const CompileContext &cc = build.cc;
 
     if (dfas.empty()) {
@@ -860,7 +859,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
     }
 
     for (auto &rdfa : dfas) {
-        remapIdsToPrograms(rdfa, litPrograms, final_to_frag_map);
+        remapIdsToPrograms(rdfa, final_to_frag_map);
     }
 
     vector<aligned_unique_ptr<NFA>> nfas;
index fa379ff6ff1629db1d33de950d536cb13dc62931..3d411bd7c210d00a331064c1cff5976d99dad43d 100644 (file)
@@ -30,7 +30,7 @@
 #define ROSE_BUILD_ANCHORED
 
 #include "ue2common.h"
-#include "rose_build.h"
+#include "rose_build_impl.h"
 #include "nfagraph/ng_holder.h"
 #include "util/alloc.h"
 
@@ -59,8 +59,7 @@ std::vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build);
  */
 aligned_unique_ptr<anchored_matcher_info>
 buildAnchoredMatcher(RoseBuildImpl &build, std::vector<raw_dfa> &dfas,
-                     const std::vector<u32> &litPrograms,
-                     const std::map<u32, u32> &final_to_frag_map,
+                     const std::map<u32, LitFragment> &final_to_frag_map,
                      size_t *asize);
 
 u32 anchoredStateSize(const anchored_matcher_info &atable);
index 321d1ca241dda51f3da50d2d0b49ae0c7695157e..92e9aa59f63c3c5e8ebbdbae87dcce62cb247c82 100644 (file)
@@ -213,10 +213,6 @@ struct build_context : boost::noncopyable {
      * that have already been pushed into the engine_blob. */
     ue2::unordered_map<u32, u32> engineOffsets;
 
-    /** \brief Literal programs, indexed by final_id, after they have been
-     * written to the engine_blob. */
-    vector<u32> litPrograms;
-
     /** \brief List of long literals (ones with CHECK_LONG_LIT instructions)
      * that need hash table support. */
     vector<ue2_case_string> longLiterals;
@@ -4578,6 +4574,10 @@ u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc,
 static
 u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc,
                              const flat_set<u32> &final_ids) {
+    if (!build.cc.streaming) {
+        return 0; // We only do delayed rebuild in streaming mode.
+    }
+
     RoseProgram program;
 
     for (const auto &final_id : final_ids) {
@@ -4649,9 +4649,9 @@ rose_literal_id getFragment(const rose_literal_id &lit) {
     return frag;
 }
 
-map<u32, u32> groupByFragment(const RoseBuildImpl &build) {
+map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build) {
     u32 frag_id = 0;
-    map<u32, u32> final_to_frag;
+    map<u32, LitFragment> final_to_frag;
 
     map<rose_literal_id, vector<u32>> frag_lits;
     for (const auto &m : build.final_id_to_literal) {
@@ -4660,21 +4660,21 @@ map<u32, u32> groupByFragment(const RoseBuildImpl &build) {
         assert(!lit_ids.empty());
 
         if (lit_ids.size() > 1) {
-            final_to_frag.emplace(final_id, frag_id++);
+            final_to_frag.emplace(final_id, LitFragment(frag_id++));
             continue;
         }
 
         const auto lit_id = *lit_ids.begin();
         const auto &lit = build.literals.right.at(lit_id);
         if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) {
-            final_to_frag.emplace(final_id, frag_id++);
+            final_to_frag.emplace(final_id, LitFragment(frag_id++));
             continue;
         }
 
         // Combining fragments that squash their groups is unsafe.
         const auto &info = build.literal_info[lit_id];
         if (info.squash_group) {
-            final_to_frag.emplace(final_id, frag_id++);
+            final_to_frag.emplace(final_id, LitFragment(frag_id++));
             continue;
         }
 
@@ -4689,7 +4689,7 @@ map<u32, u32> groupByFragment(const RoseBuildImpl &build) {
                      as_string_list(m.second).c_str());
         for (const auto final_id : m.second) {
             assert(!contains(final_to_frag, final_id));
-            final_to_frag.emplace(final_id, frag_id);
+            final_to_frag.emplace(final_id, LitFragment(frag_id));
         }
         frag_id++;
     }
@@ -4709,11 +4709,11 @@ map<u32, u32> groupByFragment(const RoseBuildImpl &build) {
 static
 tuple<u32, u32, u32>
 buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
-                     const map<u32, u32> &final_to_frag_map) {
+                     map<u32, LitFragment> &final_to_frag_map) {
     // Build a reverse mapping from fragment -> final_id.
     map<u32, flat_set<u32>> frag_to_final_map;
     for (const auto &m : final_to_frag_map) {
-        frag_to_final_map[m.second].insert(m.first);
+        frag_to_final_map[m.second.fragment_id].insert(m.first);
     }
 
     const u32 num_fragments = verify_u32(frag_to_final_map.size());
@@ -4721,7 +4721,7 @@ buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
 
     auto lit_edge_map = findEdgesByLiteral(build);
 
-    bc.litPrograms.resize(num_fragments);
+    vector<u32> litPrograms(num_fragments);
     vector<u32> delayRebuildPrograms(num_fragments);
 
     for (u32 frag_id = 0; frag_id != num_fragments; ++frag_id) {
@@ -4729,14 +4729,20 @@ buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
         DEBUG_PRINTF("frag_id=%u, final_ids=[%s]\n", frag_id,
                      as_string_list(final_ids).c_str());
 
-        bc.litPrograms[frag_id] =
+        litPrograms[frag_id] =
             writeLiteralProgram(build, bc, final_ids, lit_edge_map);
         delayRebuildPrograms[frag_id] =
             buildDelayRebuildProgram(build, bc, final_ids);
     }
 
+    // Update LitFragment entries.
+    for (auto &frag : final_to_frag_map | map_values) {
+        frag.lit_program_offset = litPrograms[frag.fragment_id];
+        frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id];
+    }
+
     u32 litProgramsOffset =
-        bc.engine_blob.add(begin(bc.litPrograms), end(bc.litPrograms));
+        bc.engine_blob.add(begin(litPrograms), end(litPrograms));
     u32 delayRebuildProgramsOffset = bc.engine_blob.add(
         begin(delayRebuildPrograms), end(delayRebuildPrograms));
 
@@ -5513,8 +5519,8 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     // Build anchored matcher.
     size_t asize = 0;
     u32 amatcherOffset = 0;
-    auto atable = buildAnchoredMatcher(*this, anchored_dfas, bc.litPrograms,
-                                       final_to_frag_map, &asize);
+    auto atable =
+        buildAnchoredMatcher(*this, anchored_dfas, final_to_frag_map, &asize);
     if (atable) {
         currOffset = ROUNDUP_CL(currOffset);
         amatcherOffset = currOffset;
index bfdca80ca4bb269086e91e2e14558ad1d46ae79e..e615d42ba7a61fc4bdcfbb65a9494b90a0de746e 100644 (file)
@@ -642,7 +642,14 @@ void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
 bool canImplementGraphs(const RoseBuildImpl &tbi);
 #endif
 
-std::map<u32, u32> groupByFragment(const RoseBuildImpl &build);
+struct LitFragment {
+    explicit LitFragment(u32 fragment_id_in) : fragment_id(fragment_id_in) {}
+    u32 fragment_id;
+    u32 lit_program_offset = 0;
+    u32 delay_program_offset = 0;
+};
+
+std::map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build);
 
 } // namespace ue2
 
index 9f7709737af53d4d8d48e95bdb615fe9d2ecaccb..c725b39d39f7ee7b81ad2b5dbe247a6b0dc3cbf8 100644 (file)
@@ -637,12 +637,12 @@ u64a literalMinReportOffset(const RoseBuildImpl &build,
 
 static
 map<u32, hwlm_group_t> makeFragGroupMap(const RoseBuildImpl &build,
-                 const map<u32, u32> &final_to_frag_map) {
+                 const map<u32, LitFragment> &final_to_frag_map) {
     map<u32, hwlm_group_t> frag_to_group;
 
     for (const auto &m : final_to_frag_map) {
         u32 final_id = m.first;
-        u32 frag_id = m.second;
+        u32 frag_id = m.second.fragment_id;
         hwlm_group_t groups = 0;
         const auto &lits = build.final_id_to_literal.at(final_id);
         for (auto lit_id : lits) {
@@ -665,7 +665,7 @@ void trim_to_suffix(Container &c, size_t len) {
 }
 
 MatcherProto makeMatcherProto(const RoseBuildImpl &build,
-                              const map<u32, u32> &final_to_frag_map,
+                              const map<u32, LitFragment> &final_to_frag_map,
                               rose_literal_table table, bool delay_rebuild,
                               size_t max_len, u32 max_offset) {
     MatcherProto mp;
@@ -758,9 +758,11 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build,
     for (auto &lit : mp.lits) {
         u32 final_id = lit.id;
         assert(contains(final_to_frag_map, final_id));
-        lit.id = final_to_frag_map.at(final_id);
-        assert(contains(frag_group_map, lit.id));
-        lit.groups = frag_group_map.at(lit.id);
+        const auto &frag = final_to_frag_map.at(final_id);
+        lit.id = delay_rebuild ? frag.delay_program_offset
+                               : frag.lit_program_offset;
+        assert(contains(frag_group_map, frag.fragment_id));
+        lit.groups = frag_group_map.at(frag.fragment_id);
     }
 
     sort_and_unique(mp.lits);
@@ -803,7 +805,7 @@ void buildAccel(const RoseBuildImpl &build, const MatcherProto &mp,
 
 aligned_unique_ptr<HWLM>
 buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
-                     const map<u32, u32> &final_to_frag_map,
+                     const map<u32, LitFragment> &final_to_frag_map,
                      rose_group *fgroups, size_t *fsize,
                      size_t *historyRequired) {
     *fsize = 0;
@@ -841,7 +843,7 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
 
 aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
     const RoseBuildImpl &build, size_t longLitLengthThreshold,
-    const map<u32, u32> &final_to_frag_map, size_t *drsize) {
+    const map<u32, LitFragment> &final_to_frag_map, size_t *drsize) {
     *drsize = 0;
 
     if (!build.cc.streaming) {
@@ -871,7 +873,8 @@ aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
 
 aligned_unique_ptr<HWLM>
 buildSmallBlockMatcher(const RoseBuildImpl &build,
-                       const map<u32, u32> &final_to_frag_map, size_t *sbsize) {
+                       const map<u32, LitFragment> &final_to_frag_map,
+                       size_t *sbsize) {
     *sbsize = 0;
 
     if (build.cc.streaming) {
@@ -931,7 +934,8 @@ buildSmallBlockMatcher(const RoseBuildImpl &build,
 
 aligned_unique_ptr<HWLM>
 buildEodAnchoredMatcher(const RoseBuildImpl &build,
-                        const map<u32, u32> &final_to_frag_map, size_t *esize) {
+                        const map<u32, LitFragment> &final_to_frag_map,
+                        size_t *esize) {
     *esize = 0;
 
     auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED,
index b06d460fe424b733d1d1230296c066c78b7c4fad..d7da113c4dc27b7ded08ae0f99716edc553e4ecd 100644 (file)
@@ -67,30 +67,30 @@ struct MatcherProto {
  * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can
  * only lead to a pattern match after max_offset may be excluded.
  */
-MatcherProto makeMatcherProto(const RoseBuildImpl &build,
-                              const std::map<u32, u32> &final_to_frag_map,
-                              rose_literal_table table, bool delay_rebuild,
-                              size_t max_len, u32 max_offset = ROSE_BOUND_INF);
-
-aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
-                            size_t longLitLengthThreshold,
-                            const std::map<u32, u32> &final_to_frag_map,
-                            rose_group *fgroups,
-                            size_t *fsize,
-                            size_t *historyRequired);
+MatcherProto
+makeMatcherProto(const RoseBuildImpl &build,
+                 const std::map<u32, LitFragment> &final_to_frag_map,
+                 rose_literal_table table, bool delay_rebuild, size_t max_len,
+                 u32 max_offset = ROSE_BOUND_INF);
+
+aligned_unique_ptr<HWLM>
+buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
+                     const std::map<u32, LitFragment> &final_to_frag_map,
+                     rose_group *fgroups, size_t *fsize,
+                     size_t *historyRequired);
 
 aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
     const RoseBuildImpl &build, size_t longLitLengthThreshold,
-    const std::map<u32, u32> &final_to_frag_map, size_t *drsize);
+    const std::map<u32, LitFragment> &final_to_frag_map, size_t *drsize);
 
 aligned_unique_ptr<HWLM>
 buildSmallBlockMatcher(const RoseBuildImpl &build,
-                       const std::map<u32, u32> &final_to_frag_map,
+                       const std::map<u32, LitFragment> &final_to_frag_map,
                        size_t *sbsize);
 
 aligned_unique_ptr<HWLM>
 buildEodAnchoredMatcher(const RoseBuildImpl &build,
-                        const std::map<u32, u32> &final_to_frag_map,
+                        const std::map<u32, LitFragment> &final_to_frag_map,
                         size_t *esize);
 
 void findMoreLiteralMasks(RoseBuildImpl &build);