]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
Rose: use program for boundary reports
authorJustin Viiret <justin.viiret@intel.com>
Thu, 3 Mar 2016 04:36:10 +0000 (15:36 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Wed, 20 Apr 2016 03:34:54 +0000 (13:34 +1000)
Use the program to handle report lists at boundaries, rather than the
previous list-of-reports approach.

src/rose/match.c
src/rose/rose.h
src/rose/rose_build_bytecode.cpp
src/rose/rose_internal.h
src/runtime.c

index b2b0238de2b463c46118cbd63a707f54dd102dcc..9163cba3a66c52e8c6ba60e387d1f6895aaf6843 100644 (file)
@@ -573,3 +573,39 @@ hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id,
     assert(id < rose->literalCount);
     return roseRunProgram(rose, scratch, programs[id], real_end, match_len, 0);
 }
+
+/**
+ * \brief Execute a boundary report program.
+ *
+ * Returns MO_HALT_MATCHING if the stream is exhausted or the user has
+ * instructed us to halt, or MO_CONTINUE_MATCHING otherwise.
+ */
+int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
+                           u64a stream_offset, struct hs_scratch *scratch) {
+    DEBUG_PRINTF("running boundary program at offset %u\n", program);
+
+    if (can_stop_matching(scratch)) {
+        DEBUG_PRINTF("can stop matching\n");
+        return MO_HALT_MATCHING;
+    }
+
+    if (rose->hasSom && scratch->deduper.current_report_offset == ~0ULL) {
+        /* we cannot delay the initialization of the som deduper logs any longer
+         * as we are reporting matches. This is done explicitly as we are
+         * shortcutting the som handling in the vacuous repeats as we know they
+         * all come from non-som patterns. */
+        fatbit_clear(scratch->deduper.som_log[0]);
+        fatbit_clear(scratch->deduper.som_log[1]);
+        scratch->deduper.som_log_dirty = 0;
+    }
+
+    const size_t match_len = 0;
+    const char in_anchored = 0;
+    hwlmcb_rv_t rv = roseRunProgram(rose, scratch, program, stream_offset,
+                                    match_len, in_anchored);
+    if (rv == HWLM_TERMINATE_MATCHING) {
+        return MO_HALT_MATCHING;
+    }
+
+    return MO_CONTINUE_MATCHING;
+}
index 5dc57bc2ec94bada970f2cc4c7180bca0d9b80db..a9058379e29bbda543d92e97a39d9681b0960c4d 100644 (file)
@@ -131,4 +131,7 @@ void roseEodExec(const struct RoseEngine *t, u64a offset,
 hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id,
                                     void *context);
 
+int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
+                           u64a stream_offset, struct hs_scratch *scratch);
+
 #endif // ROSE_H
index c2d9bccedb7fa023e2e90fcd9b78f1e35759423b..bb11b1a74f261ad9f912a34626ca870a296add47 100644 (file)
@@ -1941,70 +1941,6 @@ struct DerivedBoundaryReports {
     set<ReportID> report_at_0_eod_full;
 };
 
-static
-void reserveBoundaryReports(const BoundaryReports &boundary,
-                            const DerivedBoundaryReports &dboundary,
-                            RoseBoundaryReports *out, u32 *currOffset) {
-    u32 curr = *currOffset;
-    curr = ROUNDUP_N(curr, alignof(ReportID));
-    memset(out, 0, sizeof(*out));
-
-    /* report lists are + 1 in size due to terminator */
-    if (!boundary.report_at_eod.empty()) {
-        out->reportEodOffset = curr;
-        curr += sizeof(ReportID) * (boundary.report_at_eod.size() + 1);
-    }
-    if (!boundary.report_at_0.empty()) {
-        out->reportZeroOffset = curr;
-        curr += sizeof(ReportID) * (boundary.report_at_0.size() + 1);
-    }
-    if (!dboundary.report_at_0_eod_full.empty()) {
-        out->reportZeroEodOffset = curr;
-        curr += sizeof(ReportID) * (dboundary.report_at_0_eod_full.size() + 1);
-    }
-
-    DEBUG_PRINTF("report ^:  %zu\n", boundary.report_at_0.size());
-    DEBUG_PRINTF("report $:  %zu\n", boundary.report_at_eod.size());
-    DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size());
-
-    *currOffset = curr;
-}
-
-static
-void fillInBoundaryReports(RoseEngine *engine, u32 offset,
-                           const set<ReportID> &rl) {
-    if (rl.empty()) {
-        return;
-    }
-
-    u32 *out = (u32 *)((char *)engine + offset);
-    assert(ISALIGNED(out));
-
-    for (ReportID r : rl) {
-        *out = r;
-        ++out;
-    }
-
-    *out = MO_INVALID_IDX;
-}
-
-static
-void populateBoundaryReports(RoseEngine *engine,
-                             const BoundaryReports &boundary,
-                             const DerivedBoundaryReports &dboundary,
-                             const RoseBoundaryReports &offsets) {
-    engine->boundary.reportEodOffset = offsets.reportEodOffset;
-    engine->boundary.reportZeroOffset = offsets.reportZeroOffset;
-    engine->boundary.reportZeroEodOffset = offsets.reportZeroEodOffset;
-
-    fillInBoundaryReports(engine, offsets.reportEodOffset,
-                          boundary.report_at_eod);
-    fillInBoundaryReports(engine, offsets.reportZeroOffset,
-                          boundary.report_at_0);
-    fillInBoundaryReports(engine, offsets.reportZeroEodOffset,
-                          dboundary.report_at_0_eod_full);
-}
-
 static
 void fillInReportInfo(RoseEngine *engine, u32 reportOffset,
                       const ReportManager &rm, const vector<Report> &reports) {
@@ -2917,6 +2853,43 @@ vector<RoseInstruction> makeProgram(RoseBuildImpl &build, build_context &bc,
     return program;
 }
 
+static
+u32 writeBoundaryProgram(RoseBuildImpl &build, build_context &bc,
+                         const set<ReportID> &reports) {
+    if (reports.empty()) {
+        return 0;
+    }
+
+    const bool has_som = false;
+    vector<RoseInstruction> program;
+    for (const auto &id : reports) {
+        makeReport(build, bc, id, has_som, program);
+    }
+    return writeProgram(bc, flattenProgram({program}));
+}
+
+static
+RoseBoundaryReports
+makeBoundaryPrograms(RoseBuildImpl &build, build_context &bc,
+                     const BoundaryReports &boundary,
+                     const DerivedBoundaryReports &dboundary) {
+    RoseBoundaryReports out;
+    memset(&out, 0, sizeof(out));
+
+    DEBUG_PRINTF("report ^:  %zu\n", boundary.report_at_0.size());
+    DEBUG_PRINTF("report $:  %zu\n", boundary.report_at_eod.size());
+    DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size());
+
+    out.reportEodOffset =
+        writeBoundaryProgram(build, bc, boundary.report_at_eod);
+    out.reportZeroOffset =
+        writeBoundaryProgram(build, bc, boundary.report_at_0);
+    out.reportZeroEodOffset =
+        writeBoundaryProgram(build, bc, dboundary.report_at_0_eod_full);
+
+    return out;
+}
+
 static
 void assignStateIndices(const RoseBuildImpl &build, build_context &bc) {
     const auto &g = build.g;
@@ -3788,6 +3761,8 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
         bc.resources.has_anchored = true;
     }
 
+    auto boundary_out = makeBoundaryPrograms(*this, bc, boundary, dboundary);
+
     // Build NFAs
     set<u32> no_retrigger_queues;
     bool mpv_as_outfix;
@@ -3928,9 +3903,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     u32 activeLeftCount = leftInfoTable.size();
     u32 rosePrefixCount = countRosePrefixes(leftInfoTable);
 
-    RoseBoundaryReports boundary_out;
-    reserveBoundaryReports(boundary, dboundary, &boundary_out, &currOffset);
-
     u32 rev_nfa_table_offset;
     vector<u32> rev_nfa_offsets;
     prepSomRevNfas(ssm, &rev_nfa_table_offset, &rev_nfa_offsets, &currOffset);
@@ -4098,7 +4070,10 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     engine->asize = verify_u32(asize);
     engine->ematcherRegionSize = ematcher_region_size;
     engine->floatingStreamState = verify_u32(floatingStreamStateRequired);
-    populateBoundaryReports(engine.get(), boundary, dboundary, boundary_out);
+
+    engine->boundary.reportEodOffset = boundary_out.reportEodOffset;
+    engine->boundary.reportZeroOffset = boundary_out.reportZeroOffset;
+    engine->boundary.reportZeroEodOffset = boundary_out.reportZeroEodOffset;
 
     write_out(&engine->state_init, (char *)engine.get(), state_scatter,
               state_scatter_aux_offset);
index e9edbc154919c9d5abe17fadd796b0f3b1e98d20..37ff9168a68c437c65355b8fcc25ea8ed7c05175 100644 (file)
@@ -235,14 +235,17 @@ struct RoseStateOffsets {
 };
 
 struct RoseBoundaryReports {
-    u32 reportEodOffset; /**< 0 if no reports list, otherwise offset of
-                          * MO_INVALID_IDX terminated list to report at EOD */
-    u32 reportZeroOffset; /**< 0 if no reports list, otherwise offset of
-                           * MO_INVALID_IDX terminated list to report at offset
-                           * 0 */
-    u32 reportZeroEodOffset; /**< 0 if no reports list, otherwise offset of
-                              * MO_INVALID_IDX terminated list to report if eod
-                              * is at offset 0. Superset of other lists. */
+    /** \brief 0 if no reports list, otherwise offset of program to run to
+     * deliver reports at EOD. */
+    u32 reportEodOffset;
+
+    /** \brief 0 if no reports list, otherwise offset of program to run to
+     * deliver reports at offset 0. */
+    u32 reportZeroOffset;
+
+    /** \brief 0 if no reports list, otherwise offset of program to run to
+     * deliver reports if EOD is at offset 0. Superset of other programs. */
+    u32 reportZeroEodOffset;
 };
 
 /* NFA Queue Assignment
index 5f3579186191f5817fab0ff9a1a975f8e1bf705f..8a0cb9f98e9242c9edf25f9412cfd6d02dca3f5b 100644 (file)
@@ -260,57 +260,6 @@ SomNfaCallback selectOutfixSomAdaptor(const struct RoseEngine *rose) {
     return is_simple ? outfixSimpleSomSomAdaptor : outfixSomSomAdaptor;
 }
 
-/**
- * \brief Fire callbacks for a boundary report list.
- *
- * Returns MO_HALT_MATCHING if the user has instructed us to halt, and
- * MO_CONTINUE_MATCHING otherwise.
- */
-
-static never_inline
-int processReportList(const struct RoseEngine *rose, u32 base_offset,
-                      u64a stream_offset, hs_scratch_t *scratch) {
-    DEBUG_PRINTF("running report list at offset %u\n", base_offset);
-
-    if (told_to_stop_matching(scratch)) {
-        DEBUG_PRINTF("matching has been terminated\n");
-        return MO_HALT_MATCHING;
-    }
-
-    if (rose->hasSom && scratch->deduper.current_report_offset == ~0ULL) {
-        /* we cannot delay the initialization of the som deduper logs any longer
-         * as we are reporting matches. This is done explicitly as we are
-         * shortcutting the som handling in the vacuous repeats as we know they
-         * all come from non-som patterns. */
-
-        fatbit_clear(scratch->deduper.som_log[0]);
-        fatbit_clear(scratch->deduper.som_log[1]);
-        scratch->deduper.som_log_dirty = 0;
-    }
-
-    const ReportID *report = getByOffset(rose, base_offset);
-
-    /* never required to do som as vacuous reports are always external */
-
-    if (rose->simpleCallback) {
-        for (; *report != MO_INVALID_IDX; report++) {
-            int rv = roseSimpleAdaptor(stream_offset, *report, scratch);
-            if (rv == MO_HALT_MATCHING) {
-                return MO_HALT_MATCHING;
-            }
-        }
-    } else {
-        for (; *report != MO_INVALID_IDX; report++) {
-            int rv = roseAdaptor(stream_offset, *report, scratch);
-            if (rv == MO_HALT_MATCHING) {
-                return MO_HALT_MATCHING;
-            }
-        }
-    }
-
-    return MO_CONTINUE_MATCHING;
-}
-
 /** \brief Initialise SOM state. Used in both block and streaming mode. */
 static really_inline
 void initSomState(const struct RoseEngine *rose, char *state) {
@@ -488,15 +437,15 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length,
 
     if (!length) {
         if (rose->boundary.reportZeroEodOffset) {
-            processReportList(rose, rose->boundary.reportZeroEodOffset, 0,
-                              scratch);
+            roseRunBoundaryProgram(rose, rose->boundary.reportZeroEodOffset, 0,
+                                   scratch);
         }
         goto set_retval;
     }
 
     if (rose->boundary.reportZeroOffset) {
-        int rv = processReportList(rose, rose->boundary.reportZeroOffset, 0,
-                                   scratch);
+        int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset,
+                                        0, scratch);
         if (rv == MO_HALT_MATCHING) {
             goto set_retval;
         }
@@ -559,8 +508,8 @@ done_scan:
     }
 
     if (rose->boundary.reportEodOffset) {
-        processReportList(rose, rose->boundary.reportEodOffset, length,
-                          scratch);
+        roseRunBoundaryProgram(rose, rose->boundary.reportEodOffset, length,
+                               scratch);
     }
 
 set_retval:
@@ -727,25 +676,28 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
                      getHistory(state, rose, id->offset),
                      getHistoryAmount(rose, id->offset), id->offset, status, 0);
 
+    // Rose program execution (used for some report paths) depends on these
+    // values being initialised.
+    scratch->tctxt.lastMatchOffset = 0;
+    scratch->tctxt.minMatchOffset = id->offset;
+
     if (rose->somLocationCount) {
         loadSomFromStream(scratch, id->offset);
     }
 
     if (!id->offset) {
         if (rose->boundary.reportZeroEodOffset) {
-            int rv = processReportList(rose, rose->boundary.reportZeroEodOffset,
-                                       0, scratch);
+            int rv = roseRunBoundaryProgram(
+                rose, rose->boundary.reportZeroEodOffset, 0, scratch);
             if (rv == MO_HALT_MATCHING) {
-                scratch->core_info.status |= STATUS_TERMINATED;
                 return;
             }
         }
     } else {
         if (rose->boundary.reportEodOffset) {
-            int rv = processReportList(rose, rose->boundary.reportEodOffset,
-                              id->offset, scratch);
+            int rv = roseRunBoundaryProgram(
+                rose, rose->boundary.reportEodOffset, id->offset, scratch);
             if (rv == MO_HALT_MATCHING) {
-                scratch->core_info.status |= STATUS_TERMINATED;
                 return;
             }
         }
@@ -978,9 +930,10 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
 
     if (!id->offset && rose->boundary.reportZeroOffset) {
         DEBUG_PRINTF("zero reports\n");
-        processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch);
-        if (unlikely(can_stop_matching(scratch))) {
-            DEBUG_PRINTF("stream is broken, halting scan\n");
+        int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset,
+                                        0, scratch);
+        if (rv == MO_HALT_MATCHING) {
+            DEBUG_PRINTF("halting scan\n");
             setStreamStatus(state, scratch->core_info.status);
             if (told_to_stop_matching(scratch)) {
                 return HS_SCAN_TERMINATED;