]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
rose: add RECORD_ANCHORED instruction to program
authorJustin Viiret <justin.viiret@intel.com>
Thu, 14 Jul 2016 00:05:47 +0000 (10:05 +1000)
committerMatthew Barr <matthew.barr@intel.com>
Wed, 10 Aug 2016 04:59:10 +0000 (14:59 +1000)
Moves recordAnchoredLiteralMatch from an unconditional call in the
anchored callback to being driven by a program instruction.

src/rose/match.c
src/rose/program_runtime.h
src/rose/rose_build_bytecode.cpp
src/rose/rose_dump.cpp
src/rose/rose_program.h

index 15d3534c38edc00531fce9f3c37857ecaf825eef..95cb141eafa50fced8f537bb549e166559e311cc 100644 (file)
@@ -112,28 +112,6 @@ hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t,
     return ensureQueueFlushed_i(t, scratch, qi, loc, 1, in_chained);
 }
 
-static rose_inline
-void recordAnchoredLiteralMatch(const struct RoseEngine *t,
-                                struct hs_scratch *scratch, u32 literal_id,
-                                u64a end) {
-    assert(end);
-    struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch);
-
-    DEBUG_PRINTF("record %u @ %llu\n", literal_id, end);
-
-    if (!bf64_set(&scratch->al_log_sum, end - 1)) {
-        // first time, clear row
-        DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count);
-        fatbit_clear(anchoredLiteralRows[end - 1]);
-    }
-
-    u32 rel_idx = literal_id - t->anchored_base_id;
-    DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx,
-                 t->anchored_count);
-    assert(rel_idx < t->anchored_count);
-    fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx);
-}
-
 hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t,
                                  struct hs_scratch *scratch, u32 event,
                                  u64a top_squash_distance, u64a end,
@@ -254,10 +232,6 @@ int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) {
 
     DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups);
 
-    if (real_end > t->floatingMinLiteralMatchOffset) {
-        recordAnchoredLiteralMatch(t, scratch, id, real_end);
-    }
-
     return MO_CONTINUE_MATCHING;
 }
 
index fdaa2e0712b5de9f26e523a809a3585d3657056e..fef41269caafbea91b6908e688dfc1cbc9a32782 100644 (file)
@@ -165,6 +165,33 @@ void rosePushDelayedMatch(const struct RoseEngine *t,
     fatbit_set(slot, delay_count, delay_index);
 }
 
+static rose_inline
+void recordAnchoredLiteralMatch(const struct RoseEngine *t,
+                                struct hs_scratch *scratch, u32 literal_id,
+                                u64a end) {
+    assert(end);
+
+    if (end <= t->floatingMinLiteralMatchOffset) {
+        return;
+    }
+
+    struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch);
+
+    DEBUG_PRINTF("record %u @ %llu\n", literal_id, end);
+
+    if (!bf64_set(&scratch->al_log_sum, end - 1)) {
+        // first time, clear row
+        DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count);
+        fatbit_clear(anchoredLiteralRows[end - 1]);
+    }
+
+    u32 rel_idx = literal_id - t->anchored_base_id;
+    DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx,
+                 t->anchored_count);
+    assert(rel_idx < t->anchored_count);
+    fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx);
+}
+
 static rose_inline
 char roseLeftfixCheckMiracles(const struct RoseEngine *t,
                               const struct LeftNfaInfo *left,
@@ -1226,6 +1253,11 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(RECORD_ANCHORED) {
+                recordAnchoredLiteralMatch(t, scratch, ri->id, end);
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(CATCH_UP) {
                 if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) {
                     return HWLM_TERMINATE_MATCHING;
index 37e6ae1320c6993565ece01eec0d6d445e810cf9..5cd8161b384250749815de90ecd8c2b2d798dabc 100644 (file)
@@ -204,6 +204,7 @@ public:
         case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix;
         case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay;
         case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed;
+        case ROSE_INSTR_RECORD_ANCHORED: return &u.recordAnchored;
         case ROSE_INSTR_CATCH_UP: return &u.catchUp;
         case ROSE_INSTR_CATCH_UP_MPV: return &u.catchUpMpv;
         case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust;
@@ -255,6 +256,7 @@ public:
         case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix);
         case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay);
         case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed);
+        case ROSE_INSTR_RECORD_ANCHORED: return sizeof(u.recordAnchored);
         case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp);
         case ROSE_INSTR_CATCH_UP_MPV: return sizeof(u.catchUpMpv);
         case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust);
@@ -305,6 +307,7 @@ public:
         ROSE_STRUCT_CHECK_PREFIX checkPrefix;
         ROSE_STRUCT_ANCHORED_DELAY anchoredDelay;
         ROSE_STRUCT_PUSH_DELAYED pushDelayed;
+        ROSE_STRUCT_RECORD_ANCHORED recordAnchored;
         ROSE_STRUCT_CATCH_UP catchUp;
         ROSE_STRUCT_CATCH_UP_MPV catchUpMpv;
         ROSE_STRUCT_SOM_ADJUST somAdjust;
@@ -4432,6 +4435,49 @@ void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id,
     program.push_back(move(ri));
 }
 
+static
+u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) {
+    const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
+    assert(!lit_vertices.empty());
+
+    u32 max_offset = 0;
+    for (const auto &v : lit_vertices) {
+        max_offset = max(max_offset, build.g[v].max_offset);
+    }
+
+    return max_offset;
+}
+
+static
+void makeRecordAnchoredInstruction(const RoseBuildImpl &build,
+                                   build_context &bc, u32 final_id,
+                                   vector<RoseInstruction> &program) {
+    assert(contains(build.final_id_to_literal, final_id));
+    const auto &lit_ids = build.final_id_to_literal.at(final_id);
+
+    // Must be anchored.
+    assert(!lit_ids.empty());
+    if (build.literals.right.at(*begin(lit_ids)).table != ROSE_ANCHORED) {
+        return;
+    }
+
+    // If this anchored literal can never match past
+    // floatingMinLiteralMatchOffset, we will never have to record it.
+    u32 max_offset = 0;
+    for (u32 lit_id : lit_ids) {
+        assert(build.literals.right.at(lit_id).table == ROSE_ANCHORED);
+        max_offset = max(max_offset, findMaxOffset(build, lit_id));
+    }
+
+    if (max_offset <= bc.floatingMinLiteralMatchOffset) {
+        return;
+    }
+
+    auto ri = RoseInstruction(ROSE_INSTR_RECORD_ANCHORED);
+    ri.u.recordAnchored.id = final_id;
+    program.push_back(move(ri));
+}
+
 static
 u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) {
     const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
@@ -4589,10 +4635,18 @@ vector<RoseInstruction> buildLiteralProgram(RoseBuildImpl &build,
         root_programs.push_back(role_prog);
     }
 
-    // Literal may squash groups.
     if (final_id != MO_INVALID_IDX) {
-        root_programs.push_back({});
-        makeGroupSquashInstruction(build, final_id, root_programs.back());
+        vector<RoseInstruction> prog;
+
+        // Literal may squash groups.
+        makeGroupSquashInstruction(build, final_id, prog);
+
+        // Literal may be anchored and need to be recorded.
+        makeRecordAnchoredInstruction(build, bc, final_id, prog);
+
+        if (!prog.empty()) {
+            root_programs.push_back(move(prog));
+        }
     }
 
     vector<RoseInstruction> root_program;
index c483443c0346654a690ad6ddbf806ccf742ff459..dedd8fcf4c288644569b9e75fd4389d015476566 100644 (file)
@@ -337,6 +337,11 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(RECORD_ANCHORED) {
+                os << "    id " << ri->id << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(CATCH_UP) {}
             PROGRAM_NEXT_INSTRUCTION
 
index 6ca117ea139c726db16771aeb022ff1e223b3759..545e190fe126ff349b03558f7083238ad8d8662e 100644 (file)
@@ -55,6 +55,7 @@ enum RoseInstructionCode {
     ROSE_INSTR_CHECK_INFIX,       //!< Infix engine must be in accept state.
     ROSE_INSTR_CHECK_PREFIX,      //!< Prefix engine must be in accept state.
     ROSE_INSTR_PUSH_DELAYED,      //!< Push delayed literal matches.
+    ROSE_INSTR_RECORD_ANCHORED,   //!< Record an anchored literal match.
     ROSE_INSTR_CATCH_UP,          //!< Catch up engines, anchored matches.
     ROSE_INSTR_CATCH_UP_MPV,      //!< Catch up the MPV.
     ROSE_INSTR_SOM_ADJUST,        //!< Set SOM from a distance to EOM.
@@ -207,6 +208,11 @@ struct ROSE_STRUCT_PUSH_DELAYED {
     u32 index; // Delay literal index (relative to first delay lit).
 };
 
+struct ROSE_STRUCT_RECORD_ANCHORED {
+    u8 code; //!< From enum RoseInstructionCode.
+    u32 id; //!< Literal ID.
+};
+
 struct ROSE_STRUCT_CATCH_UP {
     u8 code; //!< From enum RoseInstructionCode.
 };