]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
eod: move engine checks into ENGINES_EOD instr
authorJustin Viiret <justin.viiret@intel.com>
Thu, 9 Jun 2016 04:41:15 +0000 (14:41 +1000)
committerMatthew Barr <matthew.barr@intel.com>
Fri, 8 Jul 2016 00:54:07 +0000 (10:54 +1000)
src/rose/eod.c
src/rose/program_runtime.h
src/rose/rose_build_bytecode.cpp
src/rose/rose_dump.cpp
src/rose/rose_internal.h
src/rose/rose_program.h

index 32702bed79e9ca186bde24f3e2f4cf2d30d2d56c..4961a728c704ab7b59158040a1b94c1902719e14 100644 (file)
@@ -122,65 +122,6 @@ int roseEodRunIterator(const struct RoseEngine *t, u64a offset,
     return MO_CONTINUE_MATCHING;
 }
 
-/**
- * \brief Check for (and deliver) reports from active output-exposed (suffix
- * or outfix) NFAs.
- *
- * \return MO_HALT_MATCHING if the user instructs us to stop.
- */
-static rose_inline
-int roseCheckNfaEod(const struct RoseEngine *t, struct hs_scratch *scratch,
-                    u64a offset, const char is_streaming) {
-    if (!t->eodNfaIterOffset) {
-        DEBUG_PRINTF("no engines that report at EOD\n");
-        return MO_CONTINUE_MATCHING;
-    }
-
-    /* data, len is used for state decompress, should be full available data */
-    u8 key = 0;
-    if (is_streaming) {
-        const u8 *eod_data = scratch->core_info.hbuf;
-        size_t eod_len = scratch->core_info.hlen;
-        key = eod_len ? eod_data[eod_len - 1] : 0;
-    }
-
-    const u8 *aa = getActiveLeafArray(t, scratch->core_info.state);
-    const u32 aaCount = t->activeArrayCount;
-
-    const struct mmbit_sparse_iter *it = getByOffset(t, t->eodNfaIterOffset);
-    assert(ISALIGNED(it));
-
-    u32 idx = 0;
-    struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
-
-    for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state);
-         qi != MMB_INVALID;
-         qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) {
-        const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
-        const struct NFA *nfa = getNfaByInfo(t, info);
-
-        DEBUG_PRINTF("checking nfa %u\n", qi);
-        assert(nfaAcceptsEod(nfa));
-
-        char *fstate = scratch->fullState + info->fullStateOffset;
-        const char *sstate = scratch->core_info.state + info->stateOffset;
-
-        if (is_streaming) {
-            // Decompress stream state.
-            nfaExpandState(nfa, fstate, sstate, offset, key);
-        }
-
-        if (nfaCheckFinalState(nfa, fstate, sstate, offset, roseReportAdaptor,
-                               roseReportSomAdaptor,
-                               scratch) == MO_HALT_MATCHING) {
-            DEBUG_PRINTF("user instructed us to stop\n");
-            return MO_HALT_MATCHING;
-        }
-    }
-
-    return MO_CONTINUE_MATCHING;
-}
-
 static rose_inline
 void cleanupAfterEodMatcher(const struct RoseEngine *t, u64a offset,
                             struct hs_scratch *scratch) {
@@ -269,10 +210,6 @@ void roseEodExec_i(const struct RoseEngine *t, u64a offset,
         return;
     }
 
-    if (roseCheckNfaEod(t, scratch, offset, is_streaming) == MO_HALT_MATCHING) {
-        return;
-    }
-
     if (!t->eodIterProgramOffset && !t->ematcherOffset) {
         DEBUG_PRINTF("no eod accepts\n");
         return;
index a913ae27c390e7b238884d58c0d92613b71aa952..a656c7151acb9ba2b48f49c435ba877a9124f010 100644 (file)
@@ -800,6 +800,57 @@ char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) {
     return end >= min_bound && end <= max_bound;
 }
 
+static rose_inline
+hwlmcb_rv_t roseEnginesEod(const struct RoseEngine *rose,
+                           struct hs_scratch *scratch, u64a offset,
+                           u32 iter_offset) {
+    const char is_streaming = rose->mode != HS_MODE_BLOCK;
+
+    /* data, len is used for state decompress, should be full available data */
+    u8 key = 0;
+    if (is_streaming) {
+        const u8 *eod_data = scratch->core_info.hbuf;
+        size_t eod_len = scratch->core_info.hlen;
+        key = eod_len ? eod_data[eod_len - 1] : 0;
+    }
+
+    const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state);
+    const u32 aaCount = rose->activeArrayCount;
+
+    const struct mmbit_sparse_iter *it = getByOffset(rose, iter_offset);
+    assert(ISALIGNED(it));
+
+    u32 idx = 0;
+    struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
+
+    for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state);
+         qi != MMB_INVALID;
+         qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) {
+        const struct NfaInfo *info = getNfaInfoByQueue(rose, qi);
+        const struct NFA *nfa = getNfaByInfo(rose, info);
+
+        DEBUG_PRINTF("checking nfa %u\n", qi);
+        assert(nfaAcceptsEod(nfa));
+
+        char *fstate = scratch->fullState + info->fullStateOffset;
+        const char *sstate = scratch->core_info.state + info->stateOffset;
+
+        if (is_streaming) {
+            // Decompress stream state.
+            nfaExpandState(nfa, fstate, sstate, offset, key);
+        }
+
+        if (nfaCheckFinalState(nfa, fstate, sstate, offset, roseReportAdaptor,
+                               roseReportSomAdaptor,
+                               scratch) == MO_HALT_MATCHING) {
+            DEBUG_PRINTF("user instructed us to stop\n");
+            return HWLM_TERMINATE_MATCHING;
+        }
+    }
+
+    return HWLM_CONTINUE_MATCHING;
+}
+
 static
 void updateSeqPoint(struct RoseContext *tctxt, u64a offset,
                     const char from_mpv) {
@@ -1301,6 +1352,14 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(ENGINES_EOD) {
+                if (roseEnginesEod(t, scratch, end, ri->iter_offset) ==
+                    HWLM_TERMINATE_MATCHING) {
+                    return HWLM_TERMINATE_MATCHING;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(END) {
                 DEBUG_PRINTF("finished\n");
                 return HWLM_CONTINUE_MATCHING;
index ea602017bb051379415ac1150509a851cac64fb7..904f8df9a30598a93f6b6ac5d3bca448bc1f49a1 100644 (file)
@@ -223,6 +223,7 @@ public:
         case ROSE_INSTR_CHECK_STATE: return &u.checkState;
         case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin;
         case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext;
+        case ROSE_INSTR_ENGINES_EOD: return &u.enginesEod;
         case ROSE_INSTR_END: return &u.end;
         }
         assert(0);
@@ -269,6 +270,7 @@ public:
         case ROSE_INSTR_CHECK_STATE: return sizeof(u.checkState);
         case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin);
         case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext);
+        case ROSE_INSTR_ENGINES_EOD: return sizeof(u.enginesEod);
         case ROSE_INSTR_END: return sizeof(u.end);
         }
         assert(0);
@@ -314,6 +316,7 @@ public:
         ROSE_STRUCT_CHECK_STATE checkState;
         ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin;
         ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext;
+        ROSE_STRUCT_ENGINES_EOD enginesEod;
         ROSE_STRUCT_END end;
     } u;
 
@@ -3532,7 +3535,7 @@ u32 addPredBlocks(build_context &bc,
  * Returns the pair (program offset, sparse iter offset).
  */
 static
-pair<u32, u32> makeSparseIterProgram(build_context &bc,
+vector<RoseInstruction> makeSparseIterProgram(build_context &bc,
                     map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
                     const vector<RoseInstruction> &root_program,
                     const vector<RoseInstruction> &pre_program) {
@@ -3548,7 +3551,7 @@ pair<u32, u32> makeSparseIterProgram(build_context &bc,
     // Add blocks to deal with non-root edges (triggered by sparse iterator or
     // mmbit_isset checks). This operation will flatten the program up to this
     // point.
-    u32 iter_offset = addPredBlocks(bc, predProgramLists, program, false);
+    addPredBlocks(bc, predProgramLists, program, false);
 
     // If we have a root program, replace the END instruction with it. Note
     // that the root program has already been flattened.
@@ -3559,8 +3562,7 @@ pair<u32, u32> makeSparseIterProgram(build_context &bc,
         program.insert(end(program), begin(root_program), end(root_program));
     }
 
-    applyFinalSpecialisation(program);
-    return {writeProgram(bc, program), iter_offset};
+    return program;
 }
 
 static
@@ -3778,8 +3780,9 @@ vector<RoseInstruction> buildLitInitialProgram(RoseBuildImpl &build,
 }
 
 static
-u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id,
-                        const vector<RoseEdge> &lit_edges) {
+vector<RoseInstruction> buildLiteralProgram(RoseBuildImpl &build,
+                                            build_context &bc, u32 final_id,
+                                            const vector<RoseEdge> &lit_edges) {
     const auto &g = build.g;
 
     DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size());
@@ -3831,7 +3834,19 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id,
 
     // Put it all together.
     return makeSparseIterProgram(bc, predProgramLists, root_program,
-                                 pre_program).first;
+                                 pre_program);
+}
+
+static
+u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id,
+                        const vector<RoseEdge> &lit_edges) {
+    auto program = buildLiteralProgram(build, bc, final_id, lit_edges);
+    if (program.empty()) {
+        return 0;
+    }
+    // Note: already flattened.
+    applyFinalSpecialisation(program);
+    return writeProgram(bc, program);
 }
 
 static
@@ -3904,7 +3919,7 @@ pair<u32, u32> buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
         const auto &lit_edges = lit_edge_map[finalId];
 
         litPrograms[finalId] =
-            buildLiteralProgram(build, bc, finalId, lit_edges);
+            writeLiteralProgram(build, bc, finalId, lit_edges);
         delayRebuildPrograms[finalId] =
             buildDelayRebuildProgram(build, bc, finalId);
     }
@@ -4020,33 +4035,53 @@ pair<u32, u32> buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) {
 }
 
 static
-u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) {
-    if (build.eod_event_literal_id == MO_INVALID_IDX) {
-        return 0;
+u32 writeEodProgram(RoseBuildImpl &build, build_context &bc,
+                    u32 eodNfaIterOffset) {
+    vector<RoseInstruction> program;
+
+    if (build.eod_event_literal_id != MO_INVALID_IDX) {
+        const RoseGraph &g = build.g;
+        const auto &lit_info =
+            build.literal_info.at(build.eod_event_literal_id);
+        assert(lit_info.delayed_ids.empty());
+        assert(!lit_info.squash_group);
+        assert(!lit_info.requires_benefits);
+
+        // Collect all edges leading into EOD event literal vertices.
+        vector<RoseEdge> edge_list;
+        for (const auto &v : lit_info.vertices) {
+            for (const auto &e : in_edges_range(v, g)) {
+                edge_list.push_back(e);
+            }
+        }
+
+        // Sort edge list for determinism, prettiness.
+        sort(begin(edge_list), end(edge_list),
+             [&g](const RoseEdge &a, const RoseEdge &b) {
+                 return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
+                        tie(g[source(b, g)].idx, g[target(b, g)].idx);
+             });
+
+        program = buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list);
     }
 
-    const RoseGraph &g = build.g;
-    const auto &lit_info = build.literal_info.at(build.eod_event_literal_id);
-    assert(lit_info.delayed_ids.empty());
-    assert(!lit_info.squash_group);
-    assert(!lit_info.requires_benefits);
-
-    // Collect all edges leading into EOD event literal vertices.
-    vector<RoseEdge> edge_list;
-    for (const auto &v : lit_info.vertices) {
-        for (const auto &e : in_edges_range(v, g)) {
-            edge_list.push_back(e);
+    if (eodNfaIterOffset) {
+        auto ri = RoseInstruction(ROSE_INSTR_ENGINES_EOD);
+        ri.u.enginesEod.iter_offset = eodNfaIterOffset;
+        if (!program.empty()) {
+            assert(program.back().code() == ROSE_INSTR_END);
+            program.pop_back();
         }
+        program.push_back(move(ri));
+        program = flattenProgram({program});
     }
 
-    // Sort edge list for determinism, prettiness.
-    sort(begin(edge_list), end(edge_list),
-         [&g](const RoseEdge &a, const RoseEdge &b) {
-             return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
-                    tie(g[source(b, g)].idx, g[target(b, g)].idx);
-         });
+    if (program.empty()) {
+        return 0;
+    }
 
-    return buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list);
+    applyFinalSpecialisation(program);
+    return writeProgram(bc, program);
 }
 
 static
@@ -4210,7 +4245,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     tie(litProgramOffset, litDelayRebuildProgramOffset) =
         buildLiteralPrograms(*this, bc);
 
-    u32 eodProgramOffset = writeEodProgram(*this, bc);
+    u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset);
     u32 eodIterProgramOffset;
     u32 eodIterOffset;
     tie(eodIterProgramOffset, eodIterOffset) = buildEodAnchorProgram(*this, bc);
@@ -4412,7 +4447,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     engine->eodProgramOffset = eodProgramOffset;
     engine->eodIterProgramOffset = eodIterProgramOffset;
     engine->eodIterOffset = eodIterOffset;
-    engine->eodNfaIterOffset = eodNfaIterOffset;
 
     engine->lastByteHistoryIterOffset = lastByteOffset;
 
index ad776780139225a630bd9684bfd143369ed5f039..59f7f7512d4184b13ead763e4ee7f1d57761b7b2 100644 (file)
@@ -476,6 +476,11 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(ENGINES_EOD) {
+                os << "    iter_offset " << ri->iter_offset << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(END) { return; }
             PROGRAM_NEXT_INSTRUCTION
 
@@ -1022,7 +1027,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
     DUMP_U32(t, eodProgramOffset);
     DUMP_U32(t, eodIterProgramOffset);
     DUMP_U32(t, eodIterOffset);
-    DUMP_U32(t, eodNfaIterOffset);
     DUMP_U32(t, lastByteHistoryIterOffset);
     DUMP_U32(t, minWidth);
     DUMP_U32(t, minWidthExcludingBoundaries);
index bbe0b1b61fa3947ad8d2854e8caf070d1cc7a1ad..2e921542fcb5c1472067c6a6cea4b5dff9f8c517 100644 (file)
@@ -380,9 +380,6 @@ struct RoseEngine {
     u32 eodIterProgramOffset; // or 0 if no eod iterator program
     u32 eodIterOffset; // offset to EOD sparse iter or 0 if none
 
-    /** \brief Offset to sparse iter over outfix/suffix NFAs that accept EOD. */
-    u32 eodNfaIterOffset;
-
     u32 lastByteHistoryIterOffset; // if non-zero
 
     /** \brief Minimum number of bytes required to match. */
index 5c57bf548cc9811c45c267041c9db4035044be28..b89611171891d35c3715bc92da52b3cf784cedf6 100644 (file)
@@ -96,6 +96,10 @@ enum RoseInstructionCode {
     ROSE_INSTR_CHECK_STATE,       //!< Test a single bit in the state multibit.
     ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states.
     ROSE_INSTR_SPARSE_ITER_NEXT,  //!< Continue running sparse iter over states.
+
+    /** \brief Check outfixes and suffixes for EOD and fire reports if so. */
+    ROSE_INSTR_ENGINES_EOD,
+
     ROSE_INSTR_END                //!< End of program.
 };
 
@@ -352,6 +356,11 @@ struct ROSE_STRUCT_SPARSE_ITER_NEXT {
     u32 fail_jump; //!< Jump forward this many bytes on failure.
 };
 
+struct ROSE_STRUCT_ENGINES_EOD {
+    u8 code; //!< From enum RoseInstructionCode.
+    u32 iter_offset; //!< Offset of mmbit_sparse_iter structure.
+};
+
 struct ROSE_STRUCT_END {
     u8 code; //!< From enum RoseInstructionCode.
 };