]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
rose: build a separate delay rebuild matcher
authorJustin Viiret <justin.viiret@intel.com>
Wed, 25 Jan 2017 03:45:12 +0000 (14:45 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Wed, 26 Apr 2017 04:46:48 +0000 (14:46 +1000)
src/rose/rose_build_bytecode.cpp
src/rose/rose_build_dump.cpp
src/rose/rose_build_matchers.cpp
src/rose/rose_build_matchers.h
src/rose/rose_dump.cpp
src/rose/rose_internal.h
src/rose/stream.c

index 736e0d357bb868ff25b6715d8a79aa4a1472019f..321d1ca241dda51f3da50d2d0b49ae0c7695157e 100644 (file)
@@ -5535,6 +5535,17 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
         bc.resources.has_floating = true;
     }
 
+    // Build delay rebuild HWLM matcher.
+    size_t drsize = 0;
+    auto drtable = buildDelayRebuildMatcher(*this, bc.longLitLengthThreshold,
+                                            final_to_frag_map, &drsize);
+    u32 drmatcherOffset = 0;
+    if (drtable) {
+        currOffset = ROUNDUP_CL(currOffset);
+        drmatcherOffset = currOffset;
+        currOffset += verify_u32(drsize);
+    }
+
     // Build EOD-anchored HWLM matcher.
     size_t esize = 0;
     auto etable = buildEodAnchoredMatcher(*this, final_to_frag_map, &esize);
@@ -5632,6 +5643,10 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
         assert(fmatcherOffset);
         memcpy(ptr + fmatcherOffset, ftable.get(), fsize);
     }
+    if (drtable) {
+        assert(drmatcherOffset);
+        memcpy(ptr + drmatcherOffset, drtable.get(), drsize);
+    }
     if (etable) {
         assert(ematcherOffset);
         memcpy(ptr + ematcherOffset, etable.get(), esize);
@@ -5724,6 +5739,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     engine->ematcherOffset = ematcherOffset;
     engine->sbmatcherOffset = sbmatcherOffset;
     engine->fmatcherOffset = fmatcherOffset;
+    engine->drmatcherOffset = drmatcherOffset;
     engine->longLitTableOffset = longLitTableOffset;
     engine->amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED);
     engine->fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING);
index 495d6f363646b95accbbb8c575c7da55cf5a7aea..abd3462971e6a516c5cd280e3045d5e0e1f3bce6 100644 (file)
@@ -507,23 +507,27 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) {
 
     const auto final_to_frag_map = groupByFragment(build);
 
-    auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED,
+    auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED, false,
                                longLitLengthThreshold);
     dumpTestLiterals(base + "rose_anchored_test_literals.txt", mp.lits);
 
-    mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING,
+    mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false,
                           longLitLengthThreshold);
     dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits);
 
-    mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED,
+    mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, true,
+                          longLitLengthThreshold);
+    dumpTestLiterals(base + "rose_delay_rebuild_test_literals.txt", mp.lits);
+
+    mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED, false,
                           build.ematcher_region_size);
     dumpTestLiterals(base + "rose_eod_test_literals.txt", mp.lits);
 
     if (!build.cc.streaming) {
-        mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING,
+        mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false,
                               ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
         auto mp2 = makeMatcherProto(build, final_to_frag_map,
-                                    ROSE_ANCHORED_SMALL_BLOCK,
+                                    ROSE_ANCHORED_SMALL_BLOCK, false,
                                     ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
         mp.lits.insert(end(mp.lits), begin(mp2.lits), end(mp2.lits));
         dumpTestLiterals(base + "rose_smallblock_test_literals.txt", mp.lits);
index e16a0ac7ad696458fad2c3dc528461a9291c3028..9f7709737af53d4d8d48e95bdb615fe9d2ecaccb 100644 (file)
@@ -666,8 +666,8 @@ void trim_to_suffix(Container &c, size_t len) {
 
 MatcherProto makeMatcherProto(const RoseBuildImpl &build,
                               const map<u32, u32> &final_to_frag_map,
-                              rose_literal_table table, size_t max_len,
-                              u32 max_offset) {
+                              rose_literal_table table, bool delay_rebuild,
+                              size_t max_len, u32 max_offset) {
     MatcherProto mp;
 
     for (const auto &e : build.literals.right) {
@@ -694,6 +694,13 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build,
         DEBUG_PRINTF("lit='%s' (len %zu)\n", escapeString(lit).c_str(),
                      lit.length());
 
+        // When building the delay rebuild table, we only want to include
+        // literals that have delayed variants.
+        if (delay_rebuild && info.delayed_ids.empty()) {
+            DEBUG_PRINTF("not needed for delay rebuild\n");
+            continue;
+        }
+
         if (max_offset != ROSE_BOUND_INF) {
             u64a min_report = literalMinReportOffset(build, e.second, info);
             if (min_report > max_offset) {
@@ -802,7 +809,7 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
     *fsize = 0;
     *fgroups = 0;
 
-    auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING,
+    auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false,
                                longLitLengthThreshold);
     if (mp.lits.empty()) {
         DEBUG_PRINTF("empty floating matcher\n");
@@ -832,6 +839,36 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
     return hwlm;
 }
 
+aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
+    const RoseBuildImpl &build, size_t longLitLengthThreshold,
+    const map<u32, u32> &final_to_frag_map, size_t *drsize) {
+    *drsize = 0;
+
+    if (!build.cc.streaming) {
+        DEBUG_PRINTF("not streaming\n");
+        return nullptr;
+    }
+
+    auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, true,
+                               longLitLengthThreshold);
+    if (mp.lits.empty()) {
+        DEBUG_PRINTF("empty delay rebuild matcher\n");
+        return nullptr;
+    }
+
+    auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups());
+    if (!hwlm) {
+        throw CompileError("Unable to generate bytecode.");
+    }
+
+    buildAccel(build, mp, *hwlm);
+
+    *drsize = hwlmSize(hwlm.get());
+    assert(*drsize);
+    DEBUG_PRINTF("built delay rebuild table size %zu bytes\n", *drsize);
+    return hwlm;
+}
+
 aligned_unique_ptr<HWLM>
 buildSmallBlockMatcher(const RoseBuildImpl &build,
                        const map<u32, u32> &final_to_frag_map, size_t *sbsize) {
@@ -849,7 +886,7 @@ buildSmallBlockMatcher(const RoseBuildImpl &build,
         return nullptr;
     }
 
-    auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING,
+    auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false,
                                ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
     if (mp.lits.empty()) {
         DEBUG_PRINTF("no floating table\n");
@@ -861,7 +898,7 @@ buildSmallBlockMatcher(const RoseBuildImpl &build,
 
     auto mp_anchored =
         makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED_SMALL_BLOCK,
-                         ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
+                         false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
     if (mp_anchored.lits.empty()) {
         DEBUG_PRINTF("no small-block anchored literals\n");
         return nullptr;
@@ -898,7 +935,7 @@ buildEodAnchoredMatcher(const RoseBuildImpl &build,
     *esize = 0;
 
     auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED,
-                               build.ematcher_region_size);
+                               false, build.ematcher_region_size);
 
     if (mp.lits.empty()) {
         DEBUG_PRINTF("no eod anchored literals\n");
index a1817307038daba49b124cac1137898441264040..b06d460fe424b733d1d1230296c066c78b7c4fad 100644 (file)
@@ -69,8 +69,8 @@ struct MatcherProto {
  */
 MatcherProto makeMatcherProto(const RoseBuildImpl &build,
                               const std::map<u32, u32> &final_to_frag_map,
-                              rose_literal_table table, size_t max_len,
-                              u32 max_offset = ROSE_BOUND_INF);
+                              rose_literal_table table, bool delay_rebuild,
+                              size_t max_len, u32 max_offset = ROSE_BOUND_INF);
 
 aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
                             size_t longLitLengthThreshold,
@@ -79,6 +79,10 @@ aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
                             size_t *fsize,
                             size_t *historyRequired);
 
+aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
+    const RoseBuildImpl &build, size_t longLitLengthThreshold,
+    const std::map<u32, u32> &final_to_frag_map, size_t *drsize);
+
 aligned_unique_ptr<HWLM>
 buildSmallBlockMatcher(const RoseBuildImpl &build,
                        const std::map<u32, u32> &final_to_frag_map,
index 96f496886d385cb206a55c93045d3aed420f95ac..d83f8f9eb4a8a2708740d00ac9d708dee7375db4 100644 (file)
@@ -108,6 +108,11 @@ const HWLM *getFloatingMatcher(const RoseEngine *t) {
     return (const HWLM *)loadFromByteCodeOffset(t, t->fmatcherOffset);
 }
 
+static
+const HWLM *getDelayRebuildMatcher(const RoseEngine *t) {
+    return (const HWLM *)loadFromByteCodeOffset(t, t->drmatcherOffset);
+}
+
 static
 const HWLM *getEodMatcher(const RoseEngine *t) {
     return (const HWLM *)loadFromByteCodeOffset(t, t->ematcherOffset);
@@ -1158,6 +1163,7 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
 
     const void *atable = getAnchoredMatcher(t);
     const HWLM *ftable = getFloatingMatcher(t);
+    const HWLM *drtable = getDelayRebuildMatcher(t);
     const HWLM *etable = getEodMatcher(t);
     const HWLM *sbtable = getSmallBlockMatcher(t);
 
@@ -1212,6 +1218,8 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
     } else {
         fprintf(f, "\n");
     }
+    fprintf(f, " - delay-rb matcher  : %zu bytes\n",
+            drtable ? hwlmSize(drtable) : 0);
     fprintf(f, " - eod-anch matcher  : %zu bytes over last %u bytes\n",
             etable ? hwlmSize(etable) : 0, t->ematcherRegionSize);
     fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n",
@@ -1274,6 +1282,11 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
         hwlmPrintStats(ftable, f);
     }
 
+    if (drtable) {
+        fprintf(f, "\nDelay Rebuild literal matcher stats:\n\n");
+        hwlmPrintStats(drtable, f);
+    }
+
     if (etable) {
         fprintf(f, "\nEOD-anchored literal matcher stats:\n\n");
         hwlmPrintStats(etable, f);
@@ -1322,6 +1335,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
     DUMP_U32(t, amatcherOffset);
     DUMP_U32(t, ematcherOffset);
     DUMP_U32(t, fmatcherOffset);
+    DUMP_U32(t, drmatcherOffset);
     DUMP_U32(t, sbmatcherOffset);
     DUMP_U32(t, longLitTableOffset);
     DUMP_U32(t, amatcherMinWidth);
index bf6e9a864440e848fa28bdfc6ca4ce39df0613c4..8e55a37d1d176e7aeb7786ddb781cbe2733aa846 100644 (file)
@@ -326,6 +326,7 @@ struct RoseEngine {
     u32 amatcherOffset; // offset of the anchored literal matcher (bytes)
     u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes)
     u32 fmatcherOffset; // offset of the floating literal matcher (bytes)
+    u32 drmatcherOffset; // offset of the delayed rebuild table (bytes)
     u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes)
     u32 longLitTableOffset; // offset of the long literal table
     u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern
index 17139b25670f35af52e0c7589e96bd4e225282aa..31224276eee1bd7f12fd3bd646a1794d9344ce97 100644 (file)
@@ -412,16 +412,22 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state,
 }
 
 static really_inline
-void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable,
-                struct hs_scratch *scratch) {
+void do_rebuild(const struct RoseEngine *t, struct hs_scratch *scratch) {
     assert(!can_stop_matching(scratch));
+
+    if (!t->drmatcherOffset) {
+        DEBUG_PRINTF("no delayed rebuild table\n");
+        return;
+    }
+
+    const struct HWLM *hwlm = getByOffset(t, t->drmatcherOffset);
     size_t len = MIN(scratch->core_info.hlen, t->delayRebuildLength);
     const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len;
     DEBUG_PRINTF("BEGIN FLOATING REBUILD over %zu bytes\n", len);
 
     scratch->core_info.status &= ~STATUS_DELAY_DIRTY;
 
-    hwlmExec(ftable, buf, len, 0, roseDelayRebuildCallback, scratch,
+    hwlmExec(hwlm, buf, len, 0, roseDelayRebuildCallback, scratch,
              scratch->tctxt.groups);
     assert(!can_stop_matching(scratch));
 }
@@ -637,13 +643,13 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
 
         if (!flen) {
             if (rebuild) { /* rebuild floating delayed match stuff */
-                do_rebuild(t, ftable, scratch);
+                do_rebuild(t, scratch);
             }
             goto flush_delay_and_exit;
         }
 
         if (rebuild) { /* rebuild floating delayed match stuff */
-            do_rebuild(t, ftable, scratch);
+            do_rebuild(t, scratch);
         }
 
         if (flen + offset <= t->floatingMinDistance) {