]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
Rose: move more report handling work into program
authorJustin Viiret <justin.viiret@intel.com>
Mon, 18 Jan 2016 00:56:01 +0000 (11:56 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Tue, 1 Mar 2016 00:32:01 +0000 (11:32 +1100)
Move report preconditions (bounds, exhaustion, etc) into program
instructions and use a more direct path to the user match callback than
the adaptor functions.

Report handling has been moved to new file src/report.h. Reporting from
EOD now uses the same instructions as normal report handling, rather
than its own.

Jump target tracking in rose_build_bytecode.cpp has been cleaned up.

CMakeLists.txt
src/report.h [new file with mode: 0644]
src/rose/eod.c
src/rose/match.c
src/rose/program_runtime.h
src/rose/rose_build_bytecode.cpp
src/rose/rose_dump.cpp
src/rose/rose_program.h
src/runtime.c
src/som/som_runtime.c
src/util/report.h

index 1abab0fe026641165e682ec85c0ec9203e8a34c7..7ae459d84009648325d5afec755529790c8a3389 100644 (file)
@@ -384,6 +384,7 @@ set (hs_exec_SRCS
     src/ue2common.h
     src/alloc.c
     src/allocator.h
+    src/report.h
     src/runtime.c
     src/fdr/fdr.c
     src/fdr/fdr.h
diff --git a/src/report.h b/src/report.h
new file mode 100644 (file)
index 0000000..2fff3b9
--- /dev/null
@@ -0,0 +1,531 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Runtime functions to do with reports, inlined into callers.
+ */
+
+#ifndef REPORT_H
+#define REPORT_H
+
+#include "hs_internal.h"
+#include "hs_runtime.h"
+#include "scratch.h"
+#include "ue2common.h"
+#include "nfa/callback.h"
+#include "nfa/nfa_internal.h"
+#include "rose/runtime.h"
+#include "som/som_runtime.h"
+#include "util/exhaust.h"
+#include "util/fatbit.h"
+#include "util/internal_report.h"
+
+static really_inline
+int satisfiesMinLength(u64a min_len, u64a from_offset,
+                       u64a to_offset) {
+    assert(min_len);
+
+    if (from_offset == HS_OFFSET_PAST_HORIZON) {
+        DEBUG_PRINTF("SOM beyond horizon\n");
+        return 1;
+    }
+
+    DEBUG_PRINTF("match len=%llu, min len=%llu\n", to_offset - from_offset,
+                 min_len);
+    return to_offset - from_offset >= min_len;
+}
+
+enum DedupeResult {
+    DEDUPE_CONTINUE, //!< Continue with match, not a dupe.
+    DEDUPE_SKIP, //!< Don't report this match, dupe or delayed due to SOM.
+    DEDUPE_HALT //!< User instructed us to stop matching.
+};
+
+static really_inline
+enum DedupeResult dedupeCatchup(const struct RoseEngine *rose,
+                                const struct internal_report *ri,
+                                struct hs_scratch *scratch, u64a offset,
+                                u64a from_offset, u64a to_offset,
+                                const char do_som) {
+    DEBUG_PRINTF("offset=%llu, match=[%llu,%llu], dkey=%u, do_som=%d\n", offset,
+                 from_offset, to_offset, ri->dkey, do_som);
+    DEBUG_PRINTF("report type=%u, quashSom=%d\n", ri->type, ri->quashSom);
+    const u32 dkey = ri->dkey;
+    if (!do_som && dkey == MO_INVALID_IDX) {
+        DEBUG_PRINTF("nothing to do\n");
+        return DEDUPE_CONTINUE;
+    }
+
+    struct match_deduper *deduper = &scratch->deduper;
+    if (offset != deduper->current_report_offset) {
+        assert(deduper->current_report_offset == ~0ULL ||
+               deduper->current_report_offset < offset);
+        if (offset == deduper->current_report_offset + 1) {
+            fatbit_clear(deduper->log[offset % 2]);
+        } else {
+            fatbit_clear(deduper->log[0]);
+            fatbit_clear(deduper->log[1]);
+        }
+
+        if (do_som && flushStoredSomMatches(scratch, offset)) {
+            return DEDUPE_HALT;
+        }
+        deduper->current_report_offset = offset;
+    }
+
+    if (dkey != MO_INVALID_IDX) {
+        const u32 dkeyCount = rose->dkeyCount;
+        const s32 offset_adj = ri->offsetAdjust;
+        if (ri->type == EXTERNAL_CALLBACK || ri->quashSom) {
+            DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
+            assert(offset_adj == 0 || offset_adj == -1);
+            if (fatbit_set(deduper->log[to_offset % 2], dkeyCount, dkey)) {
+                /* we have already raised this report at this offset, squash
+                 * dupe match. */
+                DEBUG_PRINTF("dedupe\n");
+                return DEDUPE_SKIP;
+            }
+        } else if (do_som) {
+            /* SOM external event */
+            DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
+            assert(offset_adj == 0 || offset_adj == -1);
+            u64a *starts = deduper->som_start_log[to_offset % 2];
+            if (fatbit_set(deduper->som_log[to_offset % 2], dkeyCount, dkey)) {
+                starts[dkey] = MIN(starts[dkey], from_offset);
+            } else {
+                starts[dkey] = from_offset;
+            }
+            DEBUG_PRINTF("starts[%u]=%llu\n", dkey, starts[dkey]);
+
+            if (offset_adj) {
+                deduper->som_log_dirty |= 1;
+            } else {
+                deduper->som_log_dirty |= 2;
+            }
+
+            return DEDUPE_SKIP;
+        }
+    }
+
+    return DEDUPE_CONTINUE;
+}
+
+static really_inline
+enum DedupeResult dedupeCatchupSom(const struct RoseEngine *rose,
+                                   const struct internal_report *ri,
+                                   struct hs_scratch *scratch, u64a offset,
+                                   u64a from_offset, u64a to_offset) {
+    DEBUG_PRINTF("offset=%llu, match=[%llu,%llu], dkey=%u\n", offset,
+                 from_offset, to_offset, ri->dkey);
+    DEBUG_PRINTF("report type=%u, quashSom=%d\n", ri->type, ri->quashSom);
+
+    struct match_deduper *deduper = &scratch->deduper;
+    if (offset != deduper->current_report_offset) {
+        assert(deduper->current_report_offset == ~0ULL ||
+               deduper->current_report_offset < offset);
+        if (offset == deduper->current_report_offset + 1) {
+            fatbit_clear(deduper->log[offset % 2]);
+        } else {
+            fatbit_clear(deduper->log[0]);
+            fatbit_clear(deduper->log[1]);
+        }
+
+        if (flushStoredSomMatches(scratch, offset)) {
+            return DEDUPE_HALT;
+        }
+        deduper->current_report_offset = offset;
+    }
+
+    const u32 dkey = ri->dkey;
+    if (dkey != MO_INVALID_IDX) {
+        const u32 dkeyCount = rose->dkeyCount;
+        const s32 offset_adj = ri->offsetAdjust;
+        if (ri->quashSom) {
+            DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
+            assert(offset_adj == 0 || offset_adj == -1);
+            if (fatbit_set(deduper->log[to_offset % 2], dkeyCount, dkey)) {
+                /* we have already raised this report at this offset, squash
+                 * dupe match. */
+                DEBUG_PRINTF("dedupe\n");
+                return DEDUPE_SKIP;
+            }
+        } else {
+            /* SOM external event */
+            DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
+            assert(offset_adj == 0 || offset_adj == -1);
+            u64a *starts = deduper->som_start_log[to_offset % 2];
+            if (fatbit_set(deduper->som_log[to_offset % 2], dkeyCount, dkey)) {
+                starts[dkey] = MIN(starts[dkey], from_offset);
+            } else {
+                starts[dkey] = from_offset;
+            }
+            DEBUG_PRINTF("starts[%u]=%llu\n", dkey, starts[dkey]);
+
+            if (offset_adj) {
+                deduper->som_log_dirty |= 1;
+            } else {
+                deduper->som_log_dirty |= 2;
+            }
+
+            return DEDUPE_SKIP;
+        }
+    }
+
+    return DEDUPE_CONTINUE;
+}
+
+static really_inline
+int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch,
+                  char is_simple, char do_som) {
+    assert(id != MO_INVALID_IDX); // Should never get an invalid ID.
+    assert(scratch);
+    assert(scratch->magic == SCRATCH_MAGIC);
+
+    struct core_info *ci = &scratch->core_info;
+    const struct RoseEngine *rose = ci->rose;
+    DEBUG_PRINTF("internal report %u\n", id);
+    const struct internal_report *ri = getInternalReport(rose, id);
+
+    assert(isExternalReport(ri)); /* only external reports should reach here */
+
+    s32 offset_adj = ri->offsetAdjust;
+    u64a to_offset = offset;
+    u64a from_offset = 0;
+
+    u32 flags = 0;
+#ifndef RELEASE_BUILD
+    if (offset_adj) {
+        // alert testing tools that we've got adjusted matches
+        flags |= HS_MATCH_FLAG_ADJUSTED;
+    }
+#endif
+
+    DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u "
+                 "offsetAdj=%d\n", offset, id, ri->type, ri->onmatch,
+                 offset_adj);
+
+    if (unlikely(can_stop_matching(scratch))) { /* ok - we are from rose */
+        DEBUG_PRINTF("pre broken - halting\n");
+        return MO_HALT_MATCHING;
+    }
+
+    if (!is_simple && ri->hasBounds) {
+        assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET);
+        assert(ri->minOffset <= ri->maxOffset);
+        if (offset < ri->minOffset || offset > ri->maxOffset) {
+            DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n",
+                         offset, ri->minOffset, ri->maxOffset);
+            return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
+        }
+    }
+
+    if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) {
+        DEBUG_PRINTF("ate exhausted match\n");
+        return MO_CONTINUE_MATCHING;
+    }
+
+    if (ri->type == EXTERNAL_CALLBACK) {
+        from_offset = 0;
+    } else if (do_som) {
+        from_offset = handleSomExternal(scratch, ri, to_offset);
+    }
+
+    to_offset += offset_adj;
+    assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset);
+
+    if (do_som && ri->minLength) {
+        if (!satisfiesMinLength(ri->minLength, from_offset, to_offset)) {
+            return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
+        }
+        if (ri->quashSom) {
+            from_offset = 0;
+        }
+    }
+
+    DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n",
+                 from_offset, to_offset, ri->onmatch, ci->userContext);
+
+    int halt = 0;
+
+    enum DedupeResult dedupe_rv = dedupeCatchup(rose, ri, scratch, offset,
+                                                from_offset, to_offset, do_som);
+    switch (dedupe_rv) {
+    case DEDUPE_HALT:
+        halt = 1;
+        goto exit;
+    case DEDUPE_SKIP:
+        halt = 0;
+        goto exit;
+    case DEDUPE_CONTINUE:
+        break;
+    }
+
+    halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset,
+                            flags, ci->userContext);
+exit:
+    if (halt) {
+        DEBUG_PRINTF("callback requested to terminate matches\n");
+        ci->broken = BROKEN_FROM_USER;
+        return MO_HALT_MATCHING;
+    }
+
+    if (!is_simple && ri->ekey != END_EXHAUST) {
+        markAsMatched(ci->exhaustionVector, ri->ekey);
+        return MO_CONTINUE_MATCHING;
+    } else {
+        return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
+    }
+}
+
+/**
+ * \brief Deliver the given report to the user callback.
+ *
+ * Assumes all preconditions (bounds, exhaustion etc) have been checked and
+ * that dedupe catchup has been done.
+ */
+static really_inline
+int roseDeliverReport(u64a offset, ReportID id, struct hs_scratch *scratch,
+                      char is_exhaustible) {
+    assert(id != MO_INVALID_IDX); // Should never get an invalid ID.
+    assert(scratch);
+    assert(scratch->magic == SCRATCH_MAGIC);
+
+    struct core_info *ci = &scratch->core_info;
+    const struct RoseEngine *rose = ci->rose;
+    DEBUG_PRINTF("internal report %u\n", id);
+    const struct internal_report *ri = getInternalReport(rose, id);
+
+    assert(isExternalReport(ri)); /* only external reports should reach here */
+
+    const s32 offset_adj = ri->offsetAdjust;
+    u32 flags = 0;
+#ifndef RELEASE_BUILD
+    if (offset_adj) {
+        // alert testing tools that we've got adjusted matches
+        flags |= HS_MATCH_FLAG_ADJUSTED;
+    }
+#endif
+
+    DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u "
+                 "offsetAdj=%d\n", offset, id, ri->type, ri->onmatch,
+                 offset_adj);
+
+    assert(!can_stop_matching(scratch));
+    assert(!ri->hasBounds ||
+           (offset >= ri->minOffset && offset <= ri->maxOffset));
+    assert(ri->type == EXTERNAL_CALLBACK);
+    assert(!ri->minLength);
+    assert(!ri->quashSom);
+    assert(ri->ekey == INVALID_EKEY ||
+           !isExhausted(ci->exhaustionVector, ri->ekey));
+
+    u64a from_offset = 0;
+    u64a to_offset = offset + offset_adj;
+
+    DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n",
+                 from_offset, to_offset, ri->onmatch, ci->userContext);
+
+    int halt = ci->userCallback((unsigned int)ri->onmatch, from_offset,
+                                to_offset, flags, ci->userContext);
+    if (halt) {
+        DEBUG_PRINTF("callback requested to terminate matches\n");
+        ci->broken = BROKEN_FROM_USER;
+        return MO_HALT_MATCHING;
+    }
+
+    if (is_exhaustible) {
+        assert(ri->ekey != INVALID_EKEY);
+        markAsMatched(ci->exhaustionVector, ri->ekey);
+        return MO_CONTINUE_MATCHING;
+    } else {
+        return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
+    }
+}
+
+static really_inline
+int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id,
+                     struct hs_scratch *scratch, char is_simple) {
+    assert(id != MO_INVALID_IDX); // Should never get an invalid ID.
+    assert(scratch);
+    assert(scratch->magic == SCRATCH_MAGIC);
+
+    u32 flags = 0;
+
+    struct core_info *ci = &scratch->core_info;
+    const struct RoseEngine *rose = ci->rose;
+    const struct internal_report *ri = getInternalReport(rose, id);
+
+    /* internal events should be handled by rose directly */
+    assert(ri->type == EXTERNAL_CALLBACK);
+
+    DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u "
+                 "offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch,
+                 ri->offsetAdjust);
+
+    if (unlikely(can_stop_matching(scratch))) {
+        DEBUG_PRINTF("pre broken - halting\n");
+        return MO_HALT_MATCHING;
+    }
+
+    if (!is_simple && ri->hasBounds) {
+        assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET);
+        if (to_offset < ri->minOffset || to_offset > ri->maxOffset) {
+            DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n",
+                         to_offset, ri->minOffset, ri->maxOffset);
+            return MO_CONTINUE_MATCHING;
+        }
+    }
+
+    int halt = 0;
+
+    if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) {
+        DEBUG_PRINTF("ate exhausted match\n");
+        goto exit;
+    }
+
+    u64a offset = to_offset;
+
+    to_offset += ri->offsetAdjust;
+    assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset);
+
+    if (!is_simple && ri->minLength) {
+        if (!satisfiesMinLength(ri->minLength, from_offset, to_offset)) {
+            return MO_CONTINUE_MATCHING;
+        }
+        if (ri->quashSom) {
+            from_offset = 0;
+        }
+    }
+
+    DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n",
+                 from_offset, to_offset, ri->onmatch, ci->userContext);
+
+#ifndef RELEASE_BUILD
+    if (ri->offsetAdjust != 0) {
+        // alert testing tools that we've got adjusted matches
+        flags |= HS_MATCH_FLAG_ADJUSTED;
+    }
+#endif
+
+    enum DedupeResult dedupe_rv =
+        dedupeCatchupSom(rose, ri, scratch, offset, from_offset, to_offset);
+    switch (dedupe_rv) {
+    case DEDUPE_HALT:
+        halt = 1;
+        goto exit;
+    case DEDUPE_SKIP:
+        halt = 0;
+        goto exit;
+    case DEDUPE_CONTINUE:
+        break;
+    }
+
+    halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset,
+                            flags, ci->userContext);
+
+    if (!is_simple) {
+        markAsMatched(ci->exhaustionVector, ri->ekey);
+    }
+
+exit:
+    if (halt) {
+        DEBUG_PRINTF("callback requested to terminate matches\n");
+        ci->broken = BROKEN_FROM_USER;
+        return MO_HALT_MATCHING;
+    }
+
+    return MO_CONTINUE_MATCHING;
+}
+
+/**
+ * \brief Deliver the given SOM report to the user callback.
+ *
+ * Assumes all preconditions (bounds, exhaustion etc) have been checked and
+ * that dedupe catchup has been done.
+ */
+static really_inline
+int roseDeliverSomReport(u64a from_offset, u64a to_offset, ReportID id,
+                         struct hs_scratch *scratch, char is_exhaustible) {
+    assert(id != MO_INVALID_IDX); // Should never get an invalid ID.
+    assert(scratch);
+    assert(scratch->magic == SCRATCH_MAGIC);
+
+    u32 flags = 0;
+
+    struct core_info *ci = &scratch->core_info;
+    const struct RoseEngine *rose = ci->rose;
+    const struct internal_report *ri = getInternalReport(rose, id);
+
+    assert(isExternalReport(ri)); /* only external reports should reach here */
+
+    DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u "
+                 "offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch,
+                 ri->offsetAdjust);
+
+    assert(!can_stop_matching(scratch));
+    assert(!ri->hasBounds ||
+           (to_offset >= ri->minOffset && to_offset <= ri->maxOffset));
+    assert(ri->ekey == INVALID_EKEY ||
+           !isExhausted(ci->exhaustionVector, ri->ekey));
+
+    to_offset += ri->offsetAdjust;
+    assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset);
+
+    assert(!ri->minLength ||
+           satisfiesMinLength(ri->minLength, from_offset, to_offset));
+    assert(!ri->quashSom || from_offset == 0);
+
+    DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n",
+                 from_offset, to_offset, ri->onmatch, ci->userContext);
+
+#ifndef RELEASE_BUILD
+    if (ri->offsetAdjust != 0) {
+        // alert testing tools that we've got adjusted matches
+        flags |= HS_MATCH_FLAG_ADJUSTED;
+    }
+#endif
+
+    int halt = ci->userCallback((unsigned int)ri->onmatch, from_offset,
+                                to_offset, flags, ci->userContext);
+
+    if (halt) {
+        DEBUG_PRINTF("callback requested to terminate matches\n");
+        ci->broken = BROKEN_FROM_USER;
+        return MO_HALT_MATCHING;
+    }
+
+    if (is_exhaustible) {
+        assert(ri->ekey != INVALID_EKEY);
+        markAsMatched(ci->exhaustionVector, ri->ekey);
+        return MO_CONTINUE_MATCHING;
+    } else {
+        return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
+    }
+}
+
+#endif // REPORT_H
index 7bbf8faf123a8c6b38d7773239145e49e43531c3..ade45727b14ec642a608364305a246e8e9111730 100644 (file)
@@ -47,9 +47,9 @@ void initContext(const struct RoseEngine *t, char *state, u64a offset,
     tctxt->cb = callback;
     tctxt->cb_som = som_callback;
     tctxt->lastMatchOffset = 0;
-    tctxt->minMatchOffset = 0;
-    tctxt->minNonMpvMatchOffset = 0;
-    tctxt->next_mpv_offset = 0;
+    tctxt->minMatchOffset = offset;
+    tctxt->minNonMpvMatchOffset = offset;
+    tctxt->next_mpv_offset = offset;
     tctxt->curr_anchored_loc = MMB_INVALID;
     tctxt->curr_row_offset = 0;
 
@@ -146,14 +146,16 @@ int eodNfaSomCallback(u64a from_offset, u64a to_offset, ReportID report,
 /**
  * \brief Check for (and deliver) reports from active output-exposed (suffix
  * or outfix) NFAs.
+ *
+ * \return MO_HALT_MATCHING if the user instructs us to stop.
  */
 static rose_inline
-void roseCheckNfaEod(const struct RoseEngine *t, char *state,
+int roseCheckNfaEod(const struct RoseEngine *t, char *state,
                      struct hs_scratch *scratch, u64a offset,
                      const char is_streaming) {
     if (!t->eodNfaIterOffset) {
         DEBUG_PRINTF("no engines that report at EOD\n");
-        return;
+        return MO_CONTINUE_MATCHING;
     }
 
     /* data, len is used for state decompress, should be full available data */
@@ -194,9 +196,11 @@ void roseCheckNfaEod(const struct RoseEngine *t, char *state,
                                eodNfaSomCallback,
                                scratch) == MO_HALT_MATCHING) {
             DEBUG_PRINTF("user instructed us to stop\n");
-            return;
+            return MO_HALT_MATCHING;
         }
     }
+
+    return MO_CONTINUE_MATCHING;
 }
 
 static rose_inline
@@ -283,7 +287,10 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset,
         return;
     }
 
-    roseCheckNfaEod(t, state, scratch, offset, is_streaming);
+    if (roseCheckNfaEod(t, state, scratch, offset, is_streaming) ==
+        MO_HALT_MATCHING) {
+        return;
+    }
 
     if (!t->eodIterProgramOffset && !t->ematcherOffset) {
         DEBUG_PRINTF("no eod accepts\n");
@@ -291,8 +298,7 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset,
     }
 
     // Handle pending EOD reports.
-    int itrv = roseEodRunIterator(t, offset, scratch);
-    if (itrv == MO_HALT_MATCHING) {
+    if (roseEodRunIterator(t, offset, scratch) == MO_HALT_MATCHING) {
         return;
     }
 
@@ -303,15 +309,17 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset,
         mmbit_clear(getRoleState(state), t->rolesWithStateCount);
         mmbit_clear(getActiveLeafArray(t, state), t->activeArrayCount);
 
-        hwlmcb_rv_t rv = roseEodRunMatcher(t, offset, scratch, is_streaming);
-        if (rv == HWLM_TERMINATE_MATCHING) {
+        if (roseEodRunMatcher(t, offset, scratch, is_streaming) ==
+            HWLM_TERMINATE_MATCHING) {
             return;
         }
 
         cleanupAfterEodMatcher(t, state, offset, scratch);
 
         // Fire any new EOD reports.
-        roseEodRunIterator(t, offset, scratch);
+        if (roseEodRunIterator(t, offset, scratch) == MO_HALT_MATCHING) {
+            return;
+        }
 
         roseCheckEodSuffixes(t, state, offset, scratch);
     }
index 6397b90eed9acf3515f9928b0e9ef3dee93b772f..a91c036598cce5eb504913f0b128685f2d47c9aa 100644 (file)
@@ -253,33 +253,53 @@ event_enqueued:
     return HWLM_CONTINUE_MATCHING;
 }
 
+/* handles the firing of external matches */
+static rose_inline
+hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, ReportID id, u64a end,
+                            struct hs_scratch *scratch) {
+    struct RoseContext *tctxt = &scratch->tctxt;
+
+    assert(end == tctxt->minMatchOffset);
+    DEBUG_PRINTF("firing callback id=%u, end=%llu\n", id, end);
+    updateLastMatchOffset(tctxt, end);
+
+    int cb_rv = tctxt->cb(end, id, scratch);
+    if (cb_rv == MO_HALT_MATCHING) {
+        DEBUG_PRINTF("termination requested\n");
+        return HWLM_TERMINATE_MATCHING;
+    }
+
+    if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
+        return HWLM_CONTINUE_MATCHING;
+    }
+
+    return roseHaltIfExhausted(t, scratch);
+}
+
 /* handles catchup, som, cb, etc */
 static really_inline
 hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, char *state,
                              struct RoseContext *tctxt, ReportID id,
                              u64a offset, char in_anchored) {
-    const struct internal_report *ri = getInternalReport(t, id);
+    struct hs_scratch *scratch = tctxtToScratch(tctxt);
 
-    if (ri) {
-        // Mildly cheesy performance hack: if this report is already exhausted,
-        // we can quash the match here.
-        if (ri->ekey != INVALID_EKEY) {
-            const struct hs_scratch *scratch = tctxtToScratch(tctxt);
-            if (isExhausted(scratch->core_info.exhaustionVector, ri->ekey)) {
-                DEBUG_PRINTF("eating exhausted match (report %u, ekey %u)\n",
-                             ri->onmatch, ri->ekey);
-                return HWLM_CONTINUE_MATCHING;
-            }
-        }
+    if (roseCatchUpTo(t, state, offset, scratch, in_anchored) ==
+        HWLM_TERMINATE_MATCHING) {
+        return HWLM_TERMINATE_MATCHING;
+    }
 
+    const struct internal_report *ri = getInternalReport(t, id);
+    if (ri) {
         if (isInternalSomReport(ri)) {
-            return roseHandleSom(t, state, id, offset, tctxt, in_anchored);
+            roseHandleSom(t, scratch, id, offset);
+            return HWLM_CONTINUE_MATCHING;
         } else if (ri->type == INTERNAL_ROSE_CHAIN) {
             return roseCatchUpAndHandleChainMatch(t, state, id, offset, tctxt,
                                                   in_anchored);
         }
     }
-    return roseHandleMatch(t, state, id, offset, tctxt, in_anchored);
+
+    return roseHandleMatch(t, id, offset, scratch);
 }
 
 static really_inline
index f76689f4aebe606da00c46a0311d9853e60b613c..1e1356e146c9edc4d4624c4b488eaee47b7adb76 100644 (file)
@@ -34,6 +34,7 @@
 #include "infix.h"
 #include "match.h"
 #include "miracle.h"
+#include "report.h"
 #include "rose.h"
 #include "rose_internal.h"
 #include "rose_program.h"
@@ -566,29 +567,20 @@ void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi,
     pushQueueSom(q, topEvent, loc, start);
 }
 
-/* handles the firing of external matches */
 static rose_inline
-hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, char *state,
-                            ReportID id, u64a end, struct RoseContext *tctxt,
-                            char in_anchored) {
-    struct hs_scratch *scratch = tctxtToScratch(tctxt);
-
-    if (roseCatchUpTo(t, state, end, scratch, in_anchored)
-        == HWLM_TERMINATE_MATCHING) {
-        return HWLM_TERMINATE_MATCHING;
-    }
-
-    assert(end == tctxt->minMatchOffset);
-    DEBUG_PRINTF("firing callback reportId=%u, end=%llu\n", id, end);
-    updateLastMatchOffset(tctxt, end);
+hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch,
+                       ReportID id, u64a end, char is_exhaustible) {
+    assert(end == scratch->tctxt.minMatchOffset);
+    DEBUG_PRINTF("firing callback id=%u, end=%llu\n", id, end);
+    updateLastMatchOffset(&scratch->tctxt, end);
 
-    int cb_rv = tctxt->cb(end, id, scratch);
+    int cb_rv = roseDeliverReport(end, id, scratch, is_exhaustible);
     if (cb_rv == MO_HALT_MATCHING) {
         DEBUG_PRINTF("termination requested\n");
         return HWLM_TERMINATE_MATCHING;
     }
 
-    if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
+    if (!is_exhaustible || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
         return HWLM_CONTINUE_MATCHING;
     }
 
@@ -613,76 +605,38 @@ hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t,
     return roseHandleChainMatch(t, r, end, tctxt, in_anchored, 0);
 }
 
-static rose_inline
-hwlmcb_rv_t roseSomCatchup(const struct RoseEngine *t, char *state, u64a end,
-                           struct RoseContext *tctxt, char in_anchored) {
-    struct hs_scratch *scratch = tctxtToScratch(tctxt);
-
-    // In SOM processing, we may be able to limit or entirely avoid catchup.
-
-    DEBUG_PRINTF("entry\n");
-
-    if (end == tctxt->minMatchOffset) {
-        DEBUG_PRINTF("already caught up\n");
-        return HWLM_CONTINUE_MATCHING;
-    }
-
-    DEBUG_PRINTF("catching up all NFAs\n");
-    if (roseCatchUpTo(t, state, end, scratch, in_anchored)
-        == HWLM_TERMINATE_MATCHING) {
-        return HWLM_TERMINATE_MATCHING;
-    }
-    updateMinMatchOffset(tctxt, end);
-    return HWLM_CONTINUE_MATCHING;
-}
-
 static really_inline
-hwlmcb_rv_t roseHandleSom(const struct RoseEngine *t, char *state, ReportID id,
-                          u64a end, struct RoseContext *tctxt,
-                          char in_anchored) {
-    struct hs_scratch *scratch = tctxtToScratch(tctxt);
-
+void roseHandleSom(const struct RoseEngine *t, struct hs_scratch *scratch,
+                   ReportID id, u64a end) {
     DEBUG_PRINTF("id=%u, end=%llu, minMatchOffset=%llu\n", id, end,
-                  tctxt->minMatchOffset);
+                  scratch->tctxt.minMatchOffset);
 
     // Reach into reports and handle internal reports that just manipulate SOM
     // slots ourselves, rather than going through the callback.
 
-    if (roseSomCatchup(t, state, end, tctxt, in_anchored)
-        == HWLM_TERMINATE_MATCHING) {
-        return HWLM_TERMINATE_MATCHING;
-    }
+    assert(end == scratch->tctxt.minMatchOffset);
+    DEBUG_PRINTF("firing som callback id=%u, end=%llu\n", id, end);
+    updateLastMatchOffset(&scratch->tctxt, end);
 
     const struct internal_report *ri = getInternalReport(t, id);
     handleSomInternal(scratch, ri, end);
-
-    return HWLM_CONTINUE_MATCHING;
 }
 
 static rose_inline
-hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, char *state,
-                               ReportID id, u64a start, u64a end,
-                               struct RoseContext *tctxt, char in_anchored) {
-    struct hs_scratch *scratch = tctxtToScratch(tctxt);
-
-    if (roseCatchUpTo(t, state, end, scratch, in_anchored)
-        == HWLM_TERMINATE_MATCHING) {
-        return HWLM_TERMINATE_MATCHING;
-    }
-
-    DEBUG_PRINTF("firing som callback reportId=%u, start=%llu end=%llu\n", id,
-                 start, end);
-    DEBUG_PRINTF("    last match %llu\n", tctxt->lastMatchOffset);
-    assert(end == tctxt->minMatchOffset);
-
-    updateLastMatchOffset(tctxt, end);
-    int cb_rv = tctxt->cb_som(start, end, id, scratch);
+hwlmcb_rv_t roseReportSom(const struct RoseEngine *t,
+                          struct hs_scratch *scratch, ReportID id, u64a start,
+                          u64a end, char is_exhaustible) {
+    assert(end == scratch->tctxt.minMatchOffset);
+    DEBUG_PRINTF("firing som callback id=%u, end=%llu\n", id, end);
+    updateLastMatchOffset(&scratch->tctxt, end);
+
+    int cb_rv = roseDeliverSomReport(start, end, id, scratch, is_exhaustible);
     if (cb_rv == MO_HALT_MATCHING) {
         DEBUG_PRINTF("termination requested\n");
         return HWLM_TERMINATE_MATCHING;
     }
 
-    if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
+    if (!is_exhaustible || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
         return HWLM_CONTINUE_MATCHING;
     }
 
@@ -690,23 +644,19 @@ hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, char *state,
 }
 
 static rose_inline
-hwlmcb_rv_t roseHandleSomSom(const struct RoseEngine *t, char *state,
-                             ReportID id, u64a start, u64a end,
-                             struct RoseContext *tctxt, char in_anchored) {
+void roseHandleSomSom(const struct RoseEngine *t, ReportID id, u64a start,
+                      u64a end, struct hs_scratch *scratch) {
     DEBUG_PRINTF("id=%u, start=%llu, end=%llu, minMatchOffset=%llu\n",
-                  id, start, end, tctxt->minMatchOffset);
+                  id, start, end, scratch->tctxt.minMatchOffset);
 
     // Reach into reports and handle internal reports that just manipulate SOM
     // slots ourselves, rather than going through the callback.
 
-    if (roseSomCatchup(t, state, end, tctxt, in_anchored)
-        == HWLM_TERMINATE_MATCHING) {
-        return HWLM_TERMINATE_MATCHING;
-    }
+    assert(end == scratch->tctxt.minMatchOffset);
+    updateLastMatchOffset(&scratch->tctxt, end);
 
     const struct internal_report *ri = getInternalReport(t, id);
-    setSomFromSomAware(tctxtToScratch(tctxt), ri, start, end);
-    return HWLM_CONTINUE_MATCHING;
+    setSomFromSomAware(scratch, ri, start, end);
 }
 
 static really_inline
@@ -848,14 +798,11 @@ u64a roseGetHaigSom(const struct RoseEngine *t, const u32 qi,
 }
 
 static rose_inline
-char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) {
-    assert(max_bound <= ROSE_BOUND_INF);
+char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) {
+    DEBUG_PRINTF("check offset=%llu against bounds [%llu,%llu]\n", end,
+                 min_bound, max_bound);
     assert(min_bound <= max_bound);
-
-    if (end < min_bound) {
-        return 0;
-    }
-    return max_bound == ROSE_BOUND_INF || end <= max_bound;
+    return end >= min_bound && end <= max_bound;
 }
 
 
@@ -956,9 +903,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
             PROGRAM_NEXT_INSTRUCTION
 
             PROGRAM_CASE(CHECK_BOUNDS) {
-                if (!in_anchored &&
-                    !roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) {
-                    DEBUG_PRINTF("failed root bounds check\n");
+                if (!roseCheckBounds(end, ri->min_bound, ri->max_bound)) {
+                    DEBUG_PRINTF("failed bounds check\n");
                     assert(ri->fail_jump); // must progress
                     pc += ri->fail_jump;
                     continue;
@@ -1003,6 +949,14 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(CATCH_UP) {
+                if (roseCatchUpTo(t, scratch->core_info.state, end, scratch,
+                                  in_anchored) == HWLM_TERMINATE_MATCHING) {
+                    return HWLM_TERMINATE_MATCHING;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(SOM_ADJUST) {
                 assert(ri->distance <= end);
                 som = end - ri->distance;
@@ -1016,6 +970,20 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(SOM_FROM_REPORT) {
+                const struct internal_report *ir =
+                    getInternalReport(t, ri->report);
+                som = handleSomExternal(scratch, ir, end);
+                DEBUG_PRINTF("som from report %u is %llu\n", ri->report, som);
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(SOM_ZERO) {
+                DEBUG_PRINTF("setting SOM to zero\n");
+                som = 0;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(TRIGGER_INFIX) {
                 roseTriggerInfix(t, som, end, ri->queue, ri->event, ri->cancel,
                                  tctxt);
@@ -1033,13 +1001,40 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
             }
             PROGRAM_NEXT_INSTRUCTION
 
-            PROGRAM_CASE(REPORT) {
-                if (roseHandleMatch(t, scratch->core_info.state,
-                                    ri->report, end, tctxt,
-                                    in_anchored) == HWLM_TERMINATE_MATCHING) {
+            PROGRAM_CASE(DEDUPE) {
+                const struct internal_report *ir =
+                    getInternalReport(t, ri->report);
+                const char do_som = t->hasSom; // FIXME: constant propagate
+                enum DedupeResult rv = dedupeCatchup(
+                    t, ir, scratch, end, som, end + ir->offsetAdjust, do_som);
+                switch (rv) {
+                case DEDUPE_HALT:
                     return HWLM_TERMINATE_MATCHING;
+                case DEDUPE_SKIP:
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    continue;
+                case DEDUPE_CONTINUE:
+                    break;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(DEDUPE_SOM) {
+                const struct internal_report *ir =
+                    getInternalReport(t, ri->report);
+                enum DedupeResult rv = dedupeCatchupSom(
+                    t, ir, scratch, end, som, end + ir->offsetAdjust);
+                switch (rv) {
+                case DEDUPE_HALT:
+                    return HWLM_TERMINATE_MATCHING;
+                case DEDUPE_SKIP:
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    continue;
+                case DEDUPE_CONTINUE:
+                    break;
                 }
-                work_done = 1;
             }
             PROGRAM_NEXT_INSTRUCTION
 
@@ -1053,18 +1048,32 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
             }
             PROGRAM_NEXT_INSTRUCTION
 
-            PROGRAM_CASE(REPORT_EOD) {
-                if (tctxt->cb(end, ri->report, scratch) == MO_HALT_MATCHING) {
+            PROGRAM_CASE(REPORT_SOM_INT) {
+                roseHandleSom(t, scratch, ri->report, end);
+                work_done = 1;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(REPORT_SOM_AWARE) {
+                roseHandleSomSom(t, ri->report, som, end, scratch);
+                work_done = 1;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(REPORT) {
+                const char is_exhaustible = 0;
+                if (roseReport(t, scratch, ri->report, end, is_exhaustible) ==
+                    HWLM_TERMINATE_MATCHING) {
                     return HWLM_TERMINATE_MATCHING;
                 }
                 work_done = 1;
             }
             PROGRAM_NEXT_INSTRUCTION
 
-            PROGRAM_CASE(REPORT_SOM_INT) {
-                if (roseHandleSom(t, scratch->core_info.state, ri->report,
-                                  end, tctxt,
-                                  in_anchored) == HWLM_TERMINATE_MATCHING) {
+            PROGRAM_CASE(REPORT_EXHAUST) {
+                const char is_exhaustible = 1;
+                if (roseReport(t, scratch, ri->report, end, is_exhaustible) ==
+                    HWLM_TERMINATE_MATCHING) {
                     return HWLM_TERMINATE_MATCHING;
                 }
                 work_done = 1;
@@ -1072,25 +1081,57 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
             PROGRAM_NEXT_INSTRUCTION
 
             PROGRAM_CASE(REPORT_SOM) {
-                if (roseHandleSomSom(t, scratch->core_info.state,
-                                     ri->report, som, end, tctxt,
-                                     in_anchored) == HWLM_TERMINATE_MATCHING) {
+                const char is_exhaustible = 0;
+                if (roseReportSom(t, scratch, ri->report, som, end,
+                                  is_exhaustible) == HWLM_TERMINATE_MATCHING) {
                     return HWLM_TERMINATE_MATCHING;
                 }
                 work_done = 1;
             }
             PROGRAM_NEXT_INSTRUCTION
 
-            PROGRAM_CASE(REPORT_SOM_KNOWN) {
-                if (roseHandleSomMatch(t, scratch->core_info.state, ri->report,
-                                       som, end, tctxt, in_anchored) ==
-                    HWLM_TERMINATE_MATCHING) {
+            PROGRAM_CASE(REPORT_SOM_EXHAUST) {
+                const char is_exhaustible = 1;
+                if (roseReportSom(t, scratch, ri->report, som, end,
+                                  is_exhaustible) == HWLM_TERMINATE_MATCHING) {
                     return HWLM_TERMINATE_MATCHING;
                 }
                 work_done = 1;
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(CHECK_EXHAUSTED) {
+                DEBUG_PRINTF("check ekey %u\n", ri->ekey);
+                assert(ri->ekey != INVALID_EKEY);
+                assert(ri->ekey < t->ekeyCount);
+                const char *evec = scratch->core_info.exhaustionVector;
+                if (isExhausted(evec, ri->ekey)) {
+                    DEBUG_PRINTF("ekey %u already set, match is exhausted\n",
+                                 ri->ekey);
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    continue;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_MIN_LENGTH) {
+                DEBUG_PRINTF("check min length %llu (adj %d)\n", ri->min_length,
+                             ri->end_adj);
+                assert(ri->min_length > 0);
+                assert(ri->end_adj == 0 || ri->end_adj == -1);
+                assert(som == HS_OFFSET_PAST_HORIZON || som <= end);
+                if (som != HS_OFFSET_PAST_HORIZON &&
+                    ((end + ri->end_adj) - som < ri->min_length)) {
+                    DEBUG_PRINTF("failed check, match len %llu\n",
+                                 (u64a)((end + ri->end_adj) - som));
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    continue;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(SET_STATE) {
                 DEBUG_PRINTF("set state index %u\n", ri->index);
                 mmbit_set(getRoleState(scratch->core_info.state),
index a042eb2c5d1de666ce119d6d38989fe3003f9b18..c7c0891a86521388a8bd45efe467f424e113d81c 100644 (file)
@@ -141,25 +141,42 @@ struct left_build_info {
     vector<LookEntry> lookaround; // alternative implementation to the NFA
 };
 
+/**
+ * \brief Possible jump targets for roles that perform checks.
+ *
+ * Fixed up into offsets before the program is written to bytecode.
+ */
+enum class JumpTarget {
+    NO_JUMP,        //!< Instruction does not jump.
+    PROGRAM_END,    //!< Jump to end of program.
+    NEXT_BLOCK,     //!< Jump to start of next block (sparse iter check, etc).
+    FIXUP_DONE,     //!< Target fixup already applied.
+};
+
 /** \brief Role instruction model used at compile time. */
 class RoseInstruction {
 public:
-    RoseInstruction() {
-        memset(&u, 0, sizeof(u));
-        u.end.code = ROSE_INSTR_END;
-    }
-
-    explicit RoseInstruction(enum RoseInstructionCode c) {
+    RoseInstruction(enum RoseInstructionCode c, JumpTarget j) : target(j) {
         memset(&u, 0, sizeof(u));
         u.end.code = c;
     }
 
+    explicit RoseInstruction(enum RoseInstructionCode c)
+        : RoseInstruction(c, JumpTarget::NO_JUMP) {}
+
     bool operator<(const RoseInstruction &a) const {
+        if (code() != a.code()) {
+            return code() < a.code();
+        }
+        if (target != a.target) {
+            return target < a.target;
+        }
         return memcmp(&u, &a.u, sizeof(u)) < 0;
     }
 
     bool operator==(const RoseInstruction &a) const {
-        return memcmp(&u, &a.u, sizeof(u)) == 0;
+        return code() == a.code() && target == a.target &&
+               memcmp(&u, &a.u, sizeof(u)) == 0;
     }
 
     enum RoseInstructionCode code() const {
@@ -180,16 +197,24 @@ public:
         case ROSE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix;
         case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay;
         case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed;
+        case ROSE_INSTR_CATCH_UP: return &u.catchUp;
         case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust;
         case ROSE_INSTR_SOM_LEFTFIX: return &u.somLeftfix;
+        case ROSE_INSTR_SOM_FROM_REPORT: return &u.somFromReport;
+        case ROSE_INSTR_SOM_ZERO: return &u.somZero;
         case ROSE_INSTR_TRIGGER_INFIX: return &u.triggerInfix;
         case ROSE_INSTR_TRIGGER_SUFFIX: return &u.triggerSuffix;
-        case ROSE_INSTR_REPORT: return &u.report;
+        case ROSE_INSTR_DEDUPE: return &u.dedupe;
+        case ROSE_INSTR_DEDUPE_SOM: return &u.dedupeSom;
         case ROSE_INSTR_REPORT_CHAIN: return &u.reportChain;
-        case ROSE_INSTR_REPORT_EOD: return &u.reportEod;
         case ROSE_INSTR_REPORT_SOM_INT: return &u.reportSomInt;
+        case ROSE_INSTR_REPORT_SOM_AWARE: return &u.reportSom;
+        case ROSE_INSTR_REPORT: return &u.report;
+        case ROSE_INSTR_REPORT_EXHAUST: return &u.reportExhaust;
         case ROSE_INSTR_REPORT_SOM: return &u.reportSom;
-        case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown;
+        case ROSE_INSTR_REPORT_SOM_EXHAUST: return &u.reportSomExhaust;
+        case ROSE_INSTR_CHECK_EXHAUSTED: return &u.checkExhausted;
+        case ROSE_INSTR_CHECK_MIN_LENGTH: return &u.checkMinLength;
         case ROSE_INSTR_SET_STATE: return &u.setState;
         case ROSE_INSTR_SET_GROUPS: return &u.setGroups;
         case ROSE_INSTR_SQUASH_GROUPS: return &u.squashGroups;
@@ -214,16 +239,24 @@ public:
         case ROSE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix);
         case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay);
         case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed);
+        case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp);
         case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust);
         case ROSE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix);
+        case ROSE_INSTR_SOM_FROM_REPORT: return sizeof(u.somFromReport);
+        case ROSE_INSTR_SOM_ZERO: return sizeof(u.somZero);
         case ROSE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix);
         case ROSE_INSTR_TRIGGER_SUFFIX: return sizeof(u.triggerSuffix);
-        case ROSE_INSTR_REPORT: return sizeof(u.report);
+        case ROSE_INSTR_DEDUPE: return sizeof(u.dedupe);
+        case ROSE_INSTR_DEDUPE_SOM: return sizeof(u.dedupeSom);
         case ROSE_INSTR_REPORT_CHAIN: return sizeof(u.reportChain);
-        case ROSE_INSTR_REPORT_EOD: return sizeof(u.reportEod);
         case ROSE_INSTR_REPORT_SOM_INT: return sizeof(u.reportSomInt);
+        case ROSE_INSTR_REPORT_SOM_AWARE: return sizeof(u.reportSom);
+        case ROSE_INSTR_REPORT: return sizeof(u.report);
+        case ROSE_INSTR_REPORT_EXHAUST: return sizeof(u.reportExhaust);
         case ROSE_INSTR_REPORT_SOM: return sizeof(u.reportSom);
-        case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown);
+        case ROSE_INSTR_REPORT_SOM_EXHAUST: return sizeof(u.reportSomExhaust);
+        case ROSE_INSTR_CHECK_EXHAUSTED: return sizeof(u.checkExhausted);
+        case ROSE_INSTR_CHECK_MIN_LENGTH: return sizeof(u.checkMinLength);
         case ROSE_INSTR_SET_STATE: return sizeof(u.setState);
         case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups);
         case ROSE_INSTR_SQUASH_GROUPS: return sizeof(u.squashGroups);
@@ -232,6 +265,7 @@ public:
         case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext);
         case ROSE_INSTR_END: return sizeof(u.end);
         }
+        assert(0);
         return 0;
     }
 
@@ -246,16 +280,24 @@ public:
         ROSE_STRUCT_CHECK_LEFTFIX checkLeftfix;
         ROSE_STRUCT_ANCHORED_DELAY anchoredDelay;
         ROSE_STRUCT_PUSH_DELAYED pushDelayed;
+        ROSE_STRUCT_CATCH_UP catchUp;
         ROSE_STRUCT_SOM_ADJUST somAdjust;
         ROSE_STRUCT_SOM_LEFTFIX somLeftfix;
+        ROSE_STRUCT_SOM_FROM_REPORT somFromReport;
+        ROSE_STRUCT_SOM_ZERO somZero;
         ROSE_STRUCT_TRIGGER_INFIX triggerInfix;
         ROSE_STRUCT_TRIGGER_SUFFIX triggerSuffix;
-        ROSE_STRUCT_REPORT report;
+        ROSE_STRUCT_DEDUPE dedupe;
+        ROSE_STRUCT_DEDUPE_SOM dedupeSom;
         ROSE_STRUCT_REPORT_CHAIN reportChain;
-        ROSE_STRUCT_REPORT_EOD reportEod;
         ROSE_STRUCT_REPORT_SOM_INT reportSomInt;
+        ROSE_STRUCT_REPORT_SOM_AWARE reportSomAware;
+        ROSE_STRUCT_REPORT report;
+        ROSE_STRUCT_REPORT_EXHAUST reportExhaust;
         ROSE_STRUCT_REPORT_SOM reportSom;
-        ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown;
+        ROSE_STRUCT_REPORT_SOM_EXHAUST reportSomExhaust;
+        ROSE_STRUCT_CHECK_EXHAUSTED checkExhausted;
+        ROSE_STRUCT_CHECK_MIN_LENGTH checkMinLength;
         ROSE_STRUCT_SET_STATE setState;
         ROSE_STRUCT_SET_GROUPS setGroups;
         ROSE_STRUCT_SQUASH_GROUPS squashGroups;
@@ -264,11 +306,15 @@ public:
         ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext;
         ROSE_STRUCT_END end;
     } u;
+
+    JumpTarget target;
 };
 
 static
 size_t hash_value(const RoseInstruction &ri) {
     size_t val = 0;
+    boost::hash_combine(val, ri.code());
+    boost::hash_combine(val, ri.target);
     const char *bytes = (const char *)ri.get();
     const size_t len = ri.length();
     for (size_t i = 0; i < len; i++) {
@@ -2619,61 +2665,100 @@ flattenProgram(const vector<vector<RoseInstruction>> &programs) {
     vector<RoseInstruction> out;
 
     vector<u32> offsets; // offset of each instruction (bytes)
-    vector<u32> targets; // jump target for each instruction
+    vector<u32> blocks; // track which block we're in
+    vector<u32> block_offsets; // start offsets for each block
 
-    DEBUG_PRINTF("%zu programs\n", programs.size());
+    DEBUG_PRINTF("%zu program blocks\n", programs.size());
 
     size_t curr_offset = 0;
     for (const auto &program : programs) {
-        DEBUG_PRINTF("program with %zu instructions\n", program.size());
+        DEBUG_PRINTF("block with %zu instructions\n", program.size());
+        block_offsets.push_back(curr_offset);
         for (const auto &ri : program) {
+            assert(ri.code() != ROSE_INSTR_END);
             out.push_back(ri);
             offsets.push_back(curr_offset);
+            blocks.push_back(block_offsets.size() - 1);
             curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
         }
-        for (size_t i = 0; i < program.size(); i++) {
-            targets.push_back(curr_offset);
-        }
     }
 
-    // Add an END instruction.
+    // Add a final END instruction, which is its own block.
     out.emplace_back(ROSE_INSTR_END);
+    block_offsets.push_back(curr_offset);
     offsets.push_back(curr_offset);
-    targets.push_back(curr_offset);
 
-    assert(targets.size() == out.size());
     assert(offsets.size() == out.size());
 
     for (size_t i = 0; i < out.size(); i++) {
         auto &ri = out[i];
+
+        u32 jump_target = 0;
+        switch (ri.target) {
+        case JumpTarget::NO_JUMP:
+        case JumpTarget::FIXUP_DONE:
+            continue; // Next instruction.
+        case JumpTarget::PROGRAM_END:
+            assert(i != out.size() - 1);
+            jump_target = offsets.back();
+            break;
+        case JumpTarget::NEXT_BLOCK:
+            assert(blocks[i] + 1 < block_offsets.size());
+            jump_target = block_offsets[blocks[i] + 1];
+            break;
+        }
+
+        // We currently always make progress and never jump backwards.
+        assert(jump_target > offsets[i]);
+        assert(jump_target <= offsets.back());
+        u32 jump_val = jump_target - offsets[i];
+
         switch (ri.code()) {
         case ROSE_INSTR_ANCHORED_DELAY:
-            assert(targets[i] > offsets[i]); // jumps always progress
-            ri.u.anchoredDelay.done_jump = targets[i] - offsets[i];
+            ri.u.anchoredDelay.done_jump = jump_val;
             break;
         case ROSE_INSTR_CHECK_ONLY_EOD:
-            assert(targets[i] > offsets[i]);
-            ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i];
+            ri.u.checkOnlyEod.fail_jump = jump_val;
             break;
         case ROSE_INSTR_CHECK_BOUNDS:
-            assert(targets[i] > offsets[i]);
-            ri.u.checkBounds.fail_jump = targets[i] - offsets[i];
+            ri.u.checkBounds.fail_jump = jump_val;
             break;
         case ROSE_INSTR_CHECK_NOT_HANDLED:
-            assert(targets[i] > offsets[i]);
-            ri.u.checkNotHandled.fail_jump = targets[i] - offsets[i];
+            ri.u.checkNotHandled.fail_jump = jump_val;
             break;
         case ROSE_INSTR_CHECK_LOOKAROUND:
-            assert(targets[i] > offsets[i]);
-            ri.u.checkLookaround.fail_jump = targets[i] - offsets[i];
+            ri.u.checkLookaround.fail_jump = jump_val;
             break;
         case ROSE_INSTR_CHECK_LEFTFIX:
-            assert(targets[i] > offsets[i]);
-            ri.u.checkLeftfix.fail_jump = targets[i] - offsets[i];
+            ri.u.checkLeftfix.fail_jump = jump_val;
+            break;
+        case ROSE_INSTR_DEDUPE:
+            ri.u.dedupe.fail_jump = jump_val;
+            break;
+        case ROSE_INSTR_DEDUPE_SOM:
+            ri.u.dedupeSom.fail_jump = jump_val;
+            break;
+        case ROSE_INSTR_CHECK_EXHAUSTED:
+            ri.u.checkExhausted.fail_jump = jump_val;
+            break;
+        case ROSE_INSTR_CHECK_MIN_LENGTH:
+            ri.u.checkMinLength.fail_jump = jump_val;
+            break;
+        case ROSE_INSTR_CHECK_STATE:
+            ri.u.checkState.fail_jump = jump_val;
+            break;
+        case ROSE_INSTR_SPARSE_ITER_BEGIN:
+            ri.u.sparseIterBegin.fail_jump = jump_val;
+            break;
+        case ROSE_INSTR_SPARSE_ITER_NEXT:
+            ri.u.sparseIterNext.fail_jump = jump_val;
             break;
         default:
+            assert(0); // Unhandled opcode?
             break;
         }
+
+        ri.target = JumpTarget::FIXUP_DONE;
     }
 
     return out;
@@ -2689,6 +2774,13 @@ u32 writeProgram(build_context &bc, const vector<RoseInstruction> &program) {
     assert(program.back().code() == ROSE_INSTR_END);
     assert(program.size() >= 1);
 
+    // This program must have been flattened; i.e. all check instructions must
+    // have their jump offsets set.
+    assert(all_of(begin(program), end(program), [](const RoseInstruction &ri) {
+        return ri.target == JumpTarget::NO_JUMP ||
+               ri.target == JumpTarget::FIXUP_DONE;
+    }));
+
     auto it = bc.program_cache.find(program);
     if (it != end(bc.program_cache)) {
         DEBUG_PRINTF("reusing cached program at %u\n", it->second);
@@ -2877,7 +2969,8 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v,
     }
     u32 look_count = verify_u32(look.size());
 
-    auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND);
+    auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND,
+                              JumpTarget::NEXT_BLOCK);
     ri.u.checkLookaround.index = look_idx;
     ri.u.checkLookaround.count = look_count;
     program.push_back(ri);
@@ -2898,7 +2991,7 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v,
     assert(!build.cc.streaming ||
            build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG);
 
-    auto ri = RoseInstruction(ROSE_INSTR_CHECK_LEFTFIX);
+    auto ri = RoseInstruction(ROSE_INSTR_CHECK_LEFTFIX, JumpTarget::NEXT_BLOCK);
     ri.u.checkLeftfix.queue = lni.queue;
     ri.u.checkLeftfix.lag = build.g[v].left.lag;
     ri.u.checkLeftfix.report = build.g[v].left.leftfix_report;
@@ -2906,7 +2999,7 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v,
 }
 
 static
-void makeRoleAnchoredDelay(RoseBuildImpl &build, UNUSED build_context &bc,
+void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc,
                            RoseVertex v, vector<RoseInstruction> &program) {
     // Only relevant for roles that can be triggered by the anchored table.
     if (!build.isAnchored(v)) {
@@ -2919,11 +3012,150 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, UNUSED build_context &bc,
         return;
     }
 
-    auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY);
+    auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY,
+                              JumpTarget::NEXT_BLOCK);
     ri.u.anchoredDelay.groups = build.g[v].groups;
     program.push_back(ri);
 }
 
+static
+void makeDedupe(const ReportID id, vector<RoseInstruction> &report_block) {
+    auto ri = RoseInstruction(ROSE_INSTR_DEDUPE, JumpTarget::NEXT_BLOCK);
+    ri.u.dedupe.report = id;
+    report_block.push_back(move(ri));
+}
+
+static
+void makeDedupeSom(const ReportID id, vector<RoseInstruction> &report_block) {
+    auto ri = RoseInstruction(ROSE_INSTR_DEDUPE_SOM, JumpTarget::NEXT_BLOCK);
+    ri.u.dedupeSom.report = id;
+    report_block.push_back(move(ri));
+}
+
+static
+void makeReport(RoseBuildImpl &build, const ReportID id, const bool has_som,
+                vector<RoseInstruction> &program) {
+    assert(id < build.rm.numReports());
+    const Report &report = build.rm.getReport(id);
+
+    vector<RoseInstruction> report_block;
+
+    // If this report has an exhaustion key, we can check it in the program
+    // rather than waiting until we're in the callback adaptor.
+    if (report.ekey != INVALID_EKEY) {
+        auto ri = RoseInstruction(ROSE_INSTR_CHECK_EXHAUSTED,
+                                  JumpTarget::NEXT_BLOCK);
+        ri.u.checkExhausted.ekey = report.ekey;
+        report_block.push_back(move(ri));
+    }
+
+    // Similarly, we can handle min/max offset checks.
+    if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) {
+        auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS,
+                                  JumpTarget::NEXT_BLOCK);
+        ri.u.checkBounds.min_bound = report.minOffset;
+        ri.u.checkBounds.max_bound = report.maxOffset;
+        report_block.push_back(move(ri));
+    }
+
+    // Catch up -- everything except the INTERNAL_ROSE_CHAIN report needs this.
+    // TODO: this could be floated in front of all the reports and only done
+    // once.
+    if (report.type != INTERNAL_ROSE_CHAIN) {
+        program.emplace_back(ROSE_INSTR_CATCH_UP);
+    }
+
+    // External SOM reports need their SOM value calculated.
+    if (isExternalSomReport(report)) {
+        auto ri = RoseInstruction(ROSE_INSTR_SOM_FROM_REPORT);
+        ri.u.somFromReport.report = id;
+        report_block.push_back(move(ri));
+    }
+
+    // Min length constraint.
+    if (report.minLength > 0) {
+        assert(build.hasSom);
+        auto ri = RoseInstruction(ROSE_INSTR_CHECK_MIN_LENGTH,
+                                  JumpTarget::NEXT_BLOCK);
+        ri.u.checkMinLength.end_adj = report.offsetAdjust;
+        ri.u.checkMinLength.min_length = report.minLength;
+        report_block.push_back(move(ri));
+    }
+
+    if (report.quashSom) {
+        report_block.emplace_back(ROSE_INSTR_SOM_ZERO);
+    }
+
+    switch (report.type) {
+    case EXTERNAL_CALLBACK:
+        if (!has_som) {
+            makeDedupe(id, report_block);
+            if (report.ekey == INVALID_EKEY) {
+                report_block.emplace_back(ROSE_INSTR_REPORT);
+                report_block.back().u.report.report = id;
+            } else {
+                report_block.emplace_back(ROSE_INSTR_REPORT_EXHAUST);
+                report_block.back().u.reportExhaust.report = id;
+            }
+        } else { // has_som
+            makeDedupeSom(id, report_block);
+            if (report.ekey == INVALID_EKEY) {
+                report_block.emplace_back(ROSE_INSTR_REPORT_SOM);
+                report_block.back().u.reportSom.report = id;
+            } else {
+                report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST);
+                report_block.back().u.reportSomExhaust.report = id;
+            }
+        }
+        break;
+    case INTERNAL_SOM_LOC_SET:
+    case INTERNAL_SOM_LOC_SET_IF_UNSET:
+    case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
+    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
+    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
+    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
+    case INTERNAL_SOM_LOC_COPY:
+    case INTERNAL_SOM_LOC_COPY_IF_WRITABLE:
+    case INTERNAL_SOM_LOC_MAKE_WRITABLE:
+    case INTERNAL_SOM_LOC_SET_FROM:
+    case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE:
+        if (has_som) {
+            report_block.emplace_back(ROSE_INSTR_REPORT_SOM_AWARE);
+            report_block.back().u.reportSomAware.report = id;
+        } else {
+            report_block.emplace_back(ROSE_INSTR_REPORT_SOM_INT);
+            report_block.back().u.reportSomInt.report = id;
+        }
+        break;
+    case INTERNAL_ROSE_CHAIN:
+        report_block.emplace_back(ROSE_INSTR_REPORT_CHAIN);
+        report_block.back().u.reportChain.report = id;
+        break;
+    case EXTERNAL_CALLBACK_SOM_REL:
+    case EXTERNAL_CALLBACK_SOM_STORED:
+    case EXTERNAL_CALLBACK_SOM_ABS:
+    case EXTERNAL_CALLBACK_SOM_REV_NFA:
+        makeDedupeSom(id, report_block);
+        if (report.ekey == INVALID_EKEY) {
+            report_block.emplace_back(ROSE_INSTR_REPORT_SOM);
+            report_block.back().u.reportSom.report = id;
+        } else {
+            report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST);
+            report_block.back().u.reportSomExhaust.report = id;
+        }
+        break;
+    default:
+        assert(0);
+        throw CompileError("Unable to generate bytecode.");
+    }
+
+    assert(!report_block.empty());
+    report_block = flattenProgram({report_block});
+    assert(report_block.back().code() == ROSE_INSTR_END);
+    report_block.pop_back();
+    insert(&program, program.end(), report_block);
+}
+
 static
 void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v,
                      vector<RoseInstruction> &program) {
@@ -2947,25 +3179,8 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v,
         has_som = true;
     }
 
-    // Write program instructions for reports.
     for (ReportID id : g[v].reports) {
-        assert(id < build.rm.numReports());
-        const Report &ir = build.rm.getReport(id);
-        if (isInternalSomReport(ir)) {
-            auto ri = RoseInstruction(has_som ? ROSE_INSTR_REPORT_SOM
-                                              : ROSE_INSTR_REPORT_SOM_INT);
-            ri.u.report.report = id;
-            program.push_back(ri);
-        } else if (ir.type == INTERNAL_ROSE_CHAIN) {
-            auto ri = RoseInstruction(ROSE_INSTR_REPORT_CHAIN);
-            ri.u.report.report = id;
-            program.push_back(ri);
-        } else {
-            auto ri = RoseInstruction(has_som ? ROSE_INSTR_REPORT_SOM_KNOWN
-                                              : ROSE_INSTR_REPORT);
-            ri.u.report.report = id;
-            program.push_back(ri);
-        }
+        makeReport(build, id, has_som, program);
     }
 }
 
@@ -3093,10 +3308,10 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v,
     // Use the minimum literal length.
     u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v));
 
-    u32 min_bound = g[e].minBound + lit_length;
-    u32 max_bound = g[e].maxBound == ROSE_BOUND_INF
-                        ? ROSE_BOUND_INF
-                        : g[e].maxBound + lit_length;
+    u64a min_bound = g[e].minBound + lit_length;
+    u64a max_bound = g[e].maxBound == ROSE_BOUND_INF
+                         ? ROSE_BOUND_INF
+                         : g[e].maxBound + lit_length;
 
     if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
         assert(g[u].max_offset != ROSE_BOUND_INF);
@@ -3110,7 +3325,13 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v,
     assert(max_bound <= ROSE_BOUND_INF);
     assert(min_bound <= max_bound);
 
-    auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS);
+    // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET
+    // (max value of a u64a) to represent ROSE_BOUND_INF.
+    if (max_bound == ROSE_BOUND_INF) {
+        max_bound = MAX_OFFSET;
+    }
+
+    auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, JumpTarget::NEXT_BLOCK);
     ri.u.checkBounds.min_bound = min_bound;
     ri.u.checkBounds.max_bound = max_bound;
 
@@ -3138,7 +3359,8 @@ vector<RoseInstruction> makeProgram(RoseBuildImpl &build, build_context &bc,
 
     if (onlyAtEod(build, v)) {
         DEBUG_PRINTF("only at eod\n");
-        program.push_back(RoseInstruction(ROSE_INSTR_CHECK_ONLY_EOD));
+        program.push_back(RoseInstruction(ROSE_INSTR_CHECK_ONLY_EOD,
+                                          JumpTarget::NEXT_BLOCK));
     }
 
     if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
@@ -3287,7 +3509,8 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
 static
 void makeRoleCheckNotHandled(build_context &bc, RoseVertex v,
                              vector<RoseInstruction> &program) {
-    auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED);
+    auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED,
+                              JumpTarget::NEXT_BLOCK);
 
     u32 handled_key;
     if (contains(bc.handledKeys, v)) {
@@ -3328,48 +3551,42 @@ vector<RoseInstruction> makePredProgram(RoseBuildImpl &build, build_context &bc,
 static
 u32 addPredBlocksSingle(
     map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
-    u32 curr_offset, vector<RoseInstruction> &program) {
-    assert(predProgramLists.size() == 1);
+    vector<RoseInstruction> &program) {
 
-    u32 pred_state = predProgramLists.begin()->first;
-    auto subprog = flattenProgram(predProgramLists.begin()->second);
+    vector<vector<RoseInstruction>> prog_blocks;
 
-    // Check our pred state.
-    auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE);
-    ri.u.checkState.index = pred_state;
-    program.push_back(ri);
-    curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
+    for (const auto &m : predProgramLists) {
+        const u32 &pred_state = m.first;
+        auto subprog = flattenProgram(m.second);
 
-    // Add subprogram.
-    for (const auto &ri : subprog) {
-        program.push_back(ri);
-        curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
+        // Check our pred state.
+        auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE,
+                                  JumpTarget::NEXT_BLOCK);
+        ri.u.checkState.index = pred_state;
+        subprog.insert(begin(subprog), ri);
+        assert(subprog.back().code() == ROSE_INSTR_END);
+        subprog.pop_back();
+        prog_blocks.push_back(move(subprog));
     }
 
-    const u32 end_offset =
-        curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
+    auto prog = flattenProgram(prog_blocks);
+    program.insert(end(program), begin(prog), end(prog));
+    return 0; // No iterator.
+}
 
-    // Fix up the instruction operands.
-    curr_offset = 0;
-    for (size_t i = 0; i < program.size(); i++) {
-        auto &ri = program[i];
-        switch (ri.code()) {
-        case ROSE_INSTR_CHECK_STATE:
-            ri.u.checkState.fail_jump = end_offset - curr_offset;
-            break;
-        default:
-            break;
-        }
-        curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
+static
+u32 programLength(const vector<RoseInstruction> &program) {
+    u32 len = 0;
+    for (const auto &ri : program) {
+        len += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
     }
-
-    return 0; // No iterator.
+    return len;
 }
 
 static
 u32 addPredBlocksMulti(build_context &bc,
                     map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
-                    u32 curr_offset, vector<RoseInstruction> &program) {
+                    vector<RoseInstruction> &program) {
     assert(!predProgramLists.empty());
 
     // First, add the iterator itself.
@@ -3386,10 +3603,12 @@ u32 addPredBlocksMulti(build_context &bc,
 
     // Construct our program, starting with the SPARSE_ITER_BEGIN
     // instruction, keeping track of the jump offset for each sub-program.
+    vector<RoseInstruction> sparse_program;
     vector<u32> jump_table;
 
-    program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN));
-    curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
+    sparse_program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN,
+                                             JumpTarget::PROGRAM_END));
+    u32 curr_offset = programLength(program) + programLength(sparse_program);
 
     for (const auto &e : predProgramLists) {
         DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(),
@@ -3402,62 +3621,61 @@ u32 addPredBlocksMulti(build_context &bc,
             // with a SPARSE_ITER_NEXT.
             assert(!subprog.empty());
             assert(subprog.back().code() == ROSE_INSTR_END);
-            subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT);
+            subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT,
+                                             JumpTarget::PROGRAM_END);
         }
 
-        for (const auto &ri : subprog) {
-            program.push_back(ri);
-            curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
-        }
+        curr_offset += programLength(subprog);
+        insert(&sparse_program, end(sparse_program), subprog);
     }
 
-    const u32 end_offset =
-        curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
+    // Strip the END instruction from the last block.
+    assert(sparse_program.back().code() == ROSE_INSTR_END);
+    sparse_program.pop_back();
+
+    sparse_program = flattenProgram({sparse_program});
 
     // Write the jump table into the bytecode.
     const u32 jump_table_offset =
         add_to_engine_blob(bc, begin(jump_table), end(jump_table));
 
-    // Fix up the instruction operands.
+    // Write jump table and iterator offset into sparse iter instructions.
     auto keys_it = begin(keys);
-    curr_offset = 0;
-    for (size_t i = 0; i < program.size(); i++) {
-        auto &ri = program[i];
+    for (auto &ri : sparse_program) {
         switch (ri.code()) {
         case ROSE_INSTR_SPARSE_ITER_BEGIN:
             ri.u.sparseIterBegin.iter_offset = iter_offset;
             ri.u.sparseIterBegin.jump_table = jump_table_offset;
-            ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset;
             break;
         case ROSE_INSTR_SPARSE_ITER_NEXT:
             ri.u.sparseIterNext.iter_offset = iter_offset;
             ri.u.sparseIterNext.jump_table = jump_table_offset;
             assert(keys_it != end(keys));
             ri.u.sparseIterNext.state = *keys_it++;
-            ri.u.sparseIterNext.fail_jump = end_offset - curr_offset;
             break;
         default:
             break;
         }
-        curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
     }
 
+    program.insert(end(program), begin(sparse_program), end(sparse_program));
+
     return iter_offset;
 }
 
 static
 u32 addPredBlocks(build_context &bc,
                   map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
-                  u32 curr_offset, vector<RoseInstruction> &program,
+                  vector<RoseInstruction> &program,
                   bool force_sparse_iter) {
     const size_t num_preds = predProgramLists.size();
     if (num_preds == 0) {
         program = flattenProgram({program});
         return 0; // No iterator.
     } else if (!force_sparse_iter && num_preds == 1) {
-        return addPredBlocksSingle(predProgramLists, curr_offset, program);
+        return addPredBlocksSingle(predProgramLists, program);
     } else {
-        return addPredBlocksMulti(bc, predProgramLists, curr_offset, program);
+        return addPredBlocksMulti(bc, predProgramLists, program);
     }
 }
 
@@ -3481,8 +3699,7 @@ pair<u32, u32> makeSparseIterProgram(build_context &bc,
     // Add blocks to deal with non-root edges (triggered by sparse iterator or
     // mmbit_isset checks). This operation will flatten the program up to this
     // point.
-    u32 iter_offset =
-        addPredBlocks(bc, predProgramLists, curr_offset, program, false);
+    u32 iter_offset = addPredBlocks(bc, predProgramLists, program, false);
 
     // If we have a root program, replace the END instruction with it. Note
     // that the root program has already been flattened.
@@ -3823,10 +4040,8 @@ vector<RoseInstruction> makeEodAnchorProgram(RoseBuildImpl &build,
         makeRoleCheckNotHandled(bc, v, program);
     }
 
-    for (const auto &report : g[v].reports) {
-        auto ri = RoseInstruction(ROSE_INSTR_REPORT_EOD);
-        ri.u.report.report = report;
-        program.push_back(ri);
+    for (const auto &id : g[v].reports) {
+        makeReport(build, id, false, program);
     }
 
     return program;
@@ -3870,7 +4085,7 @@ pair<u32, u32> buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) {
 
     // Note: we force the use of a sparse iterator for the EOD program so we
     // can easily guard EOD execution at runtime.
-    u32 iter_offset = addPredBlocks(bc, predProgramLists, 0, program, true);
+    u32 iter_offset = addPredBlocks(bc, predProgramLists, program, true);
 
     assert(program.size() > 1);
     return {writeProgram(bc, program), iter_offset};
index 25ec7bae66dca36da9b3f6c4d80ae523f0336b31..adf737264d896c87f7bb87293dc2908c91f4b1b9 100644 (file)
@@ -41,6 +41,7 @@
 #include "nfa/nfa_dump_api.h"
 #include "nfa/nfa_internal.h"
 #include "util/dump_charclass.h"
+#include "util/internal_report.h"
 #include "util/multibit_internal.h"
 #include "util/multibit.h"
 
@@ -152,6 +153,61 @@ void dumpLookaround(ofstream &os, const RoseEngine *t,
     }
 }
 
+static
+vector<u32> sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) {
+    vector<u32> keys;
+
+    if (num_bits == 0) {
+        return keys;
+    }
+
+    vector<u8> bits(mmbit_size(num_bits), u8{0xff}); // All bits on.
+    vector<mmbit_sparse_state> state(MAX_SPARSE_ITER_STATES);
+
+    const u8 *b = bits.data();
+    mmbit_sparse_state *s = state.data();
+
+    u32 idx = 0;
+    u32 i = mmbit_sparse_iter_begin(b, num_bits, &idx, it, s);
+    while (i != MMB_INVALID) {
+        keys.push_back(i);
+        i = mmbit_sparse_iter_next(b, num_bits, i, &idx, it, s);
+    }
+
+    return keys;
+}
+
+static
+void dumpJumpTable(ofstream &os, const RoseEngine *t,
+                   const ROSE_STRUCT_SPARSE_ITER_BEGIN *ri) {
+    auto *it =
+        (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, ri->iter_offset);
+    auto *jumps = (const u32 *)loadFromByteCodeOffset(t, ri->jump_table);
+
+    for (const auto &key : sparseIterValues(it, t->rolesWithStateCount)) {
+        os << "      " << std::setw(4) << std::setfill(' ') << key << " : +"
+           << *jumps << endl;
+        ++jumps;
+    }
+}
+
+static
+void dumpReport(ofstream &os, const RoseEngine *t, ReportID report) {
+    const auto *ir =
+        (const internal_report *)loadFromByteCodeOffset(t, t->intReportOffset) +
+        report;
+    os << "      type=" << u32{ir->type};
+    os << ", onmatch=" << ir->onmatch;
+    if (ir->ekey != INVALID_EKEY) {
+        os << ", ekey=" << ir->ekey;
+    }
+    if (ir->dkey != MO_INVALID_IDX) {
+        os << ", dkey=" << ir->dkey;
+    }
+
+    os << endl;
+}
+
 static
 string dumpStrMask(const u8 *mask, size_t len) {
     ostringstream oss;
@@ -211,6 +267,13 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(CHECK_BOUNDS) {
+                os << "    min_bound " << ri->min_bound << endl;
+                os << "    max_bound " << ri->max_bound << endl;
+                os << "    fail_jump +" << ri->fail_jump << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(CHECK_NOT_HANDLED) {
                 os << "    key " << ri->key << endl;
                 os << "    fail_jump +" << ri->fail_jump << endl;
@@ -239,6 +302,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(CATCH_UP) {}
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(SOM_ADJUST) {
                 os << "    distance " << ri->distance << endl;
             }
@@ -250,6 +316,15 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(SOM_FROM_REPORT) {
+                os << "    report " << ri->report << endl;
+                dumpReport(os, t, ri->report);
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(SOM_ZERO) {}
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(TRIGGER_INFIX) {
                 os << "    queue " << ri->queue << endl;
                 os << "    event " << ri->event << endl;
@@ -263,33 +338,72 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
             }
             PROGRAM_NEXT_INSTRUCTION
 
-            PROGRAM_CASE(REPORT) {
+            PROGRAM_CASE(DEDUPE) {
                 os << "    report " << ri->report << endl;
+                dumpReport(os, t, ri->report);
+                os << "    fail_jump +" << ri->fail_jump << endl;
             }
             PROGRAM_NEXT_INSTRUCTION
 
-            PROGRAM_CASE(REPORT_CHAIN) {
+            PROGRAM_CASE(DEDUPE_SOM) {
                 os << "    report " << ri->report << endl;
+                dumpReport(os, t, ri->report);
+                os << "    fail_jump +" << ri->fail_jump << endl;
             }
             PROGRAM_NEXT_INSTRUCTION
 
-            PROGRAM_CASE(REPORT_EOD) {
+            PROGRAM_CASE(REPORT_CHAIN) {
                 os << "    report " << ri->report << endl;
+                dumpReport(os, t, ri->report);
             }
             PROGRAM_NEXT_INSTRUCTION
 
             PROGRAM_CASE(REPORT_SOM_INT) {
                 os << "    report " << ri->report << endl;
+                dumpReport(os, t, ri->report);
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(REPORT_SOM_AWARE) {
+                os << "    report " << ri->report << endl;
+                dumpReport(os, t, ri->report);
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(REPORT) {
+                os << "    report " << ri->report << endl;
+                dumpReport(os, t, ri->report);
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(REPORT_EXHAUST) {
+                os << "    report " << ri->report << endl;
+                dumpReport(os, t, ri->report);
             }
             PROGRAM_NEXT_INSTRUCTION
 
             PROGRAM_CASE(REPORT_SOM) {
                 os << "    report " << ri->report << endl;
+                dumpReport(os, t, ri->report);
             }
             PROGRAM_NEXT_INSTRUCTION
 
-            PROGRAM_CASE(REPORT_SOM_KNOWN) {
+            PROGRAM_CASE(REPORT_SOM_EXHAUST) {
                 os << "    report " << ri->report << endl;
+                dumpReport(os, t, ri->report);
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_EXHAUSTED) {
+                os << "    ekey " << ri->ekey << endl;
+                os << "    fail_jump +" << ri->fail_jump << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_MIN_LENGTH) {
+                os << "    end_adj " << ri->end_adj << endl;
+                os << "    min_length " << ri->min_length << endl;
+                os << "    fail_jump +" << ri->fail_jump << endl;
             }
             PROGRAM_NEXT_INSTRUCTION
 
@@ -319,6 +433,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
             PROGRAM_CASE(SPARSE_ITER_BEGIN) {
                 os << "    iter_offset " << ri->iter_offset << endl;
                 os << "    jump_table " << ri->jump_table << endl;
+                dumpJumpTable(os, t, ri);
                 os << "    fail_jump +" << ri->fail_jump << endl;
             }
             PROGRAM_NEXT_INSTRUCTION
index 37017ca0a2cbe6e4f9329083984681a4e1ce4952..81852f097ba989110891286792048a50e6c0691f 100644 (file)
@@ -51,16 +51,33 @@ enum RoseInstructionCode {
     ROSE_INSTR_CHECK_LOOKAROUND,  //!< Lookaround check.
     ROSE_INSTR_CHECK_LEFTFIX,     //!< Leftfix must be in accept state.
     ROSE_INSTR_PUSH_DELAYED,      //!< Push delayed literal matches.
+    ROSE_INSTR_CATCH_UP,          //!< Catch up engines, anchored matches.
     ROSE_INSTR_SOM_ADJUST,        //!< Set SOM from a distance to EOM.
     ROSE_INSTR_SOM_LEFTFIX,       //!< Acquire SOM from a leftfix engine.
+    ROSE_INSTR_SOM_FROM_REPORT,   //!< Acquire SOM from an internal_report.
+    ROSE_INSTR_SOM_ZERO,          //!< Set SOM to zero.
     ROSE_INSTR_TRIGGER_INFIX,     //!< Trigger an infix engine.
     ROSE_INSTR_TRIGGER_SUFFIX,    //!< Trigger a suffix engine.
-    ROSE_INSTR_REPORT,            //!< Fire an ordinary report.
+    ROSE_INSTR_DEDUPE,            //!< Run deduplication for report.
+    ROSE_INSTR_DEDUPE_SOM,        //!< Run deduplication for SOM report.
     ROSE_INSTR_REPORT_CHAIN,      //!< Fire a chained report (MPV).
-    ROSE_INSTR_REPORT_EOD,        //!< Fire a callback at EOD time.
     ROSE_INSTR_REPORT_SOM_INT,    //!< Manipulate SOM only.
-    ROSE_INSTR_REPORT_SOM,        //!< Manipulate SOM and report.
-    ROSE_INSTR_REPORT_SOM_KNOWN,  //!< Rose role knows its SOM offset.
+    ROSE_INSTR_REPORT_SOM_AWARE,  //!< Manipulate SOM from SOM-aware source.
+
+    /** \brief Fire a report. */
+    ROSE_INSTR_REPORT,
+
+    /** \brief Fire an exhaustible report. */
+    ROSE_INSTR_REPORT_EXHAUST,
+
+    /** \brief Fire a SOM report. */
+    ROSE_INSTR_REPORT_SOM,
+
+    /** \brief Fire an exhaustible SOM report. */
+    ROSE_INSTR_REPORT_SOM_EXHAUST,
+
+    ROSE_INSTR_CHECK_EXHAUSTED,   //!< Check if an ekey has already been set.
+    ROSE_INSTR_CHECK_MIN_LENGTH,  //!< Check (EOM - SOM) against min length.
     ROSE_INSTR_SET_STATE,         //!< Switch a state index on.
     ROSE_INSTR_SET_GROUPS,        //!< Set some literal group bits.
     ROSE_INSTR_SQUASH_GROUPS,     //!< Conditionally turn off some groups.
@@ -106,8 +123,8 @@ struct ROSE_STRUCT_CHECK_ONLY_EOD {
 
 struct ROSE_STRUCT_CHECK_BOUNDS {
     u8 code; //!< From enum RoseInstructionCode.
-    u32 min_bound; //!< Min distance from zero.
-    u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF).
+    u64a min_bound; //!< Min distance from zero.
+    u64a max_bound; //!< Max distance from zero.
     u32 fail_jump; //!< Jump forward this many bytes on failure.
 };
 
@@ -138,6 +155,10 @@ struct ROSE_STRUCT_PUSH_DELAYED {
     u32 index; // Delay literal index (relative to first delay lit).
 };
 
+struct ROSE_STRUCT_CATCH_UP {
+    u8 code; //!< From enum RoseInstructionCode.
+};
+
 struct ROSE_STRUCT_SOM_ADJUST {
     u8 code; //!< From enum RoseInstructionCode.
     u32 distance; //!< Distance to EOM.
@@ -149,6 +170,15 @@ struct ROSE_STRUCT_SOM_LEFTFIX {
     u32 lag; //!< Lag of leftfix for this case.
 };
 
+struct ROSE_STRUCT_SOM_FROM_REPORT {
+    u8 code; //!< From enum RoseInstructionCode.
+    ReportID report; //!< EXTERNAL_CALLBACK_SOM_* report to use.
+};
+
+struct ROSE_STRUCT_SOM_ZERO {
+    u8 code; //!< From enum RoseInstructionCode.
+};
+
 struct ROSE_STRUCT_TRIGGER_INFIX {
     u8 code; //!< From enum RoseInstructionCode.
     u8 cancel; //!< Cancels previous top event.
@@ -162,17 +192,19 @@ struct ROSE_STRUCT_TRIGGER_SUFFIX {
     u32 event; //!< Queue event, from MQE_*.
 };
 
-struct ROSE_STRUCT_REPORT {
+struct ROSE_STRUCT_DEDUPE {
     u8 code; //!< From enum RoseInstructionCode.
     ReportID report;
+    u32 fail_jump; //!< Jump forward this many bytes on failure.
 };
 
-struct ROSE_STRUCT_REPORT_CHAIN {
+struct ROSE_STRUCT_DEDUPE_SOM {
     u8 code; //!< From enum RoseInstructionCode.
     ReportID report;
+    u32 fail_jump; //!< Jump forward this many bytes on failure.
 };
 
-struct ROSE_STRUCT_REPORT_EOD {
+struct ROSE_STRUCT_REPORT_CHAIN {
     u8 code; //!< From enum RoseInstructionCode.
     ReportID report;
 };
@@ -182,16 +214,49 @@ struct ROSE_STRUCT_REPORT_SOM_INT {
     ReportID report;
 };
 
+struct ROSE_STRUCT_REPORT_SOM_AWARE {
+    u8 code; //!< From enum RoseInstructionCode.
+    ReportID report;
+};
+
+struct ROSE_STRUCT_REPORT {
+    u8 code; //!< From enum RoseInstructionCode.
+    ReportID report;
+};
+
+struct ROSE_STRUCT_REPORT_EXHAUST {
+    u8 code; //!< From enum RoseInstructionCode.
+    ReportID report;
+};
+
 struct ROSE_STRUCT_REPORT_SOM {
     u8 code; //!< From enum RoseInstructionCode.
     ReportID report;
 };
 
-struct ROSE_STRUCT_REPORT_SOM_KNOWN {
+struct ROSE_STRUCT_REPORT_SOM_EXHAUST {
+    u8 code; //!< From enum RoseInstructionCode.
+    ReportID report;
+};
+
+struct ROSE_STRUCT_REPORT_SOM_EXT {
     u8 code; //!< From enum RoseInstructionCode.
     ReportID report;
 };
 
+struct ROSE_STRUCT_CHECK_EXHAUSTED {
+    u8 code; //!< From enum RoseInstructionCode.
+    u32 ekey; //!< Exhaustion key to check.
+    u32 fail_jump; //!< Jump forward this many bytes on failure.
+};
+
+struct ROSE_STRUCT_CHECK_MIN_LENGTH {
+    u8 code; //!< From enum RoseInstructionCode.
+    s32 end_adj; //!< Offset adjustment to add to EOM first.
+    u64a min_length; //!< Minimum distance from SOM to EOM.
+    u32 fail_jump; //!< Jump forward this many bytes on failure.
+};
+
 struct ROSE_STRUCT_SET_STATE {
     u8 code; //!< From enum RoseInstructionCode.
     u32 index; //!< State index in multibit.
index d51db18b3d1b1032787cdcacbb590aa538605b33..e38434fdcbbbbe33fc199a09d5ba071879a91695 100644 (file)
@@ -47,6 +47,7 @@
 #include "rose/rose.h"
 #include "rose/runtime.h"
 #include "database.h"
+#include "report.h"
 #include "scratch.h"
 #include "som/som_runtime.h"
 #include "som/som_stream.h"
@@ -56,8 +57,6 @@
 #include "util/fatbit.h"
 #include "util/multibit.h"
 
-#define DEDUPE_MATCHES
-
 static really_inline
 void prefetch_data(const char *data, unsigned length) {
     __builtin_prefetch(data);
@@ -170,306 +169,6 @@ void setBroken(char *state, u8 broken) {
     ts->broken = broken;
 }
 
-static really_inline
-int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch,
-                  char is_simple, char do_som) {
-    assert(id != MO_INVALID_IDX); // Should never get an invalid ID.
-    assert(scratch);
-    assert(scratch->magic == SCRATCH_MAGIC);
-
-    struct core_info *ci = &scratch->core_info;
-    const struct RoseEngine *rose = ci->rose;
-    DEBUG_PRINTF("internal report %u\n", id);
-    const struct internal_report *ri = getInternalReport(rose, id);
-
-    assert(isExternalReport(ri)); /* only external reports should reach here */
-
-    s32 offset_adj = ri->offsetAdjust;
-    UNUSED u32 dkey = ri->dkey;
-    u64a to_offset = offset;
-    u64a from_offset = 0;
-    UNUSED u32 dkeyCount = rose->dkeyCount;
-
-    u32 flags = 0;
-#ifndef RELEASE_BUILD
-    if (offset_adj) {
-        // alert testing tools that we've got adjusted matches
-        flags |= HS_MATCH_FLAG_ADJUSTED;
-    }
-#endif
-
-    DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u "
-                 "offsetAdj=%d\n", offset, id, ri->type, ri->onmatch,
-                 offset_adj);
-
-    if (unlikely(can_stop_matching(scratch))) { /* ok - we are from rose */
-        DEBUG_PRINTF("pre broken - halting\n");
-        return MO_HALT_MATCHING;
-    }
-
-    if (!is_simple && ri->hasBounds) {
-        assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET);
-        assert(ri->minOffset <= ri->maxOffset);
-        if (offset < ri->minOffset || offset > ri->maxOffset) {
-            DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n",
-                         offset, ri->minOffset, ri->maxOffset);
-            return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
-        }
-    }
-
-    if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) {
-        DEBUG_PRINTF("ate exhausted match\n");
-        return MO_CONTINUE_MATCHING;
-    }
-
-    if (ri->type == EXTERNAL_CALLBACK) {
-        from_offset = 0;
-    } else if (do_som) {
-        from_offset = handleSomExternal(scratch, ri, to_offset);
-    }
-
-    to_offset += offset_adj;
-    assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset);
-
-    if (do_som && ri->minLength) {
-        if (from_offset != HS_OFFSET_PAST_HORIZON &&
-                (to_offset - from_offset < ri->minLength)) {
-            return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
-        }
-        if (ri->quashSom) {
-            from_offset = 0;
-        }
-    }
-
-    DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n",
-                 from_offset, to_offset, ri->onmatch, ci->userContext);
-
-    int halt = 0;
-
-    if (do_som || dkey != MO_INVALID_IDX) {
-        if (offset != scratch->deduper.current_report_offset) {
-            assert(scratch->deduper.current_report_offset == ~0ULL ||
-                   scratch->deduper.current_report_offset < offset);
-            if (offset == scratch->deduper.current_report_offset + 1) {
-                fatbit_clear(scratch->deduper.log[offset % 2]);
-            } else {
-                fatbit_clear(scratch->deduper.log[0]);
-                fatbit_clear(scratch->deduper.log[1]);
-            }
-
-            DEBUG_PRINTF("adj dedupe offset %hhd\n", do_som);
-            if (do_som) {
-                halt = flushStoredSomMatches(scratch, offset);
-                if (halt) {
-                    goto exit;
-                }
-            }
-            scratch->deduper.current_report_offset = offset;
-        }
-    }
-
-#ifdef DEDUPE_MATCHES
-    if (dkey != MO_INVALID_IDX) {
-        if (ri->type == EXTERNAL_CALLBACK || ri->quashSom) {
-            DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
-            assert(offset_adj == 0 || offset_adj == -1);
-            if (fatbit_set(scratch->deduper.log[to_offset % 2], dkeyCount,
-                           dkey)) {
-                /* we have already raised this report at this offset, squash dupe
-                 * match. */
-                DEBUG_PRINTF("dedupe\n");
-                goto exit;
-            }
-        } else if (do_som) {
-            /* SOM external event */
-            DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
-            assert(offset_adj == 0 || offset_adj == -1);
-            u64a *starts = scratch->deduper.som_start_log[to_offset % 2];
-            if (fatbit_set(scratch->deduper.som_log[to_offset % 2], dkeyCount,
-                           dkey)) {
-                starts[dkey] = MIN(starts[dkey], from_offset);
-            } else {
-                starts[dkey] = from_offset;
-            }
-
-            if (offset_adj) {
-                scratch->deduper.som_log_dirty |= 1;
-            } else {
-                scratch->deduper.som_log_dirty |= 2;
-            }
-
-            goto exit;
-        }
-    }
-#endif
-
-    halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset,
-                            flags, ci->userContext);
-#ifdef DEDUPE_MATCHES
-exit:
-#endif
-    if (halt) {
-        DEBUG_PRINTF("callback requested to terminate matches\n");
-
-        setBroken(ci->state, BROKEN_FROM_USER);
-        ci->broken = BROKEN_FROM_USER;
-
-        return MO_HALT_MATCHING;
-    }
-
-    if (!is_simple && ri->ekey != END_EXHAUST) {
-        markAsMatched(ci->exhaustionVector, ri->ekey);
-        return MO_CONTINUE_MATCHING;
-    } else {
-        return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
-    }
-}
-
-static really_inline
-int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id,
-                     struct hs_scratch *scratch, char is_simple) {
-    assert(id != MO_INVALID_IDX); // Should never get an invalid ID.
-    assert(scratch);
-    assert(scratch->magic == SCRATCH_MAGIC);
-
-    u32 flags = 0;
-
-    struct core_info *ci = &scratch->core_info;
-    const struct RoseEngine *rose = ci->rose;
-    const struct internal_report *ri = getInternalReport(rose, id);
-
-    /* internal events should be handled by rose directly */
-    assert(ri->type == EXTERNAL_CALLBACK);
-
-    DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u "
-                 "offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch,
-                 ri->offsetAdjust);
-
-    if (unlikely(can_stop_matching(scratch))) {
-        DEBUG_PRINTF("pre broken - halting\n");
-        return MO_HALT_MATCHING;
-    }
-
-    if (!is_simple && ri->hasBounds) {
-        assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET);
-        if (to_offset < ri->minOffset || to_offset > ri->maxOffset) {
-            DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n",
-                         to_offset, ri->minOffset, ri->maxOffset);
-            return MO_CONTINUE_MATCHING;
-        }
-    }
-
-    int halt = 0;
-
-    if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) {
-        DEBUG_PRINTF("ate exhausted match\n");
-        goto do_return;
-    }
-
-#ifdef DEDUPE_MATCHES
-    u64a offset = to_offset;
-#endif
-
-    to_offset += ri->offsetAdjust;
-    assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset);
-
-    if (!is_simple && ri->minLength) {
-        if (from_offset != HS_OFFSET_PAST_HORIZON &&
-                (to_offset - from_offset < ri->minLength)) {
-            return MO_CONTINUE_MATCHING;
-        }
-        if (ri->quashSom) {
-            from_offset = 0;
-        }
-    }
-
-    DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n",
-                 from_offset, to_offset, ri->onmatch, ci->userContext);
-
-#ifndef RELEASE_BUILD
-    if (ri->offsetAdjust != 0) {
-        // alert testing tools that we've got adjusted matches
-        flags |= HS_MATCH_FLAG_ADJUSTED;
-    }
-#endif
-
-#ifdef DEDUPE_MATCHES
-    u32 dkeyCount = rose->dkeyCount;
-
-    if (offset != scratch->deduper.current_report_offset) {
-
-        assert(scratch->deduper.current_report_offset == ~0ULL
-               || scratch->deduper.current_report_offset < offset);
-        if (offset == scratch->deduper.current_report_offset + 1) {
-            fatbit_clear(scratch->deduper.log[offset % 2]);
-        } else {
-            fatbit_clear(scratch->deduper.log[0]);
-            fatbit_clear(scratch->deduper.log[1]);
-        }
-
-        halt = flushStoredSomMatches(scratch, offset);
-        if (halt) {
-            goto do_return;
-        }
-
-        scratch->deduper.current_report_offset = offset;
-    }
-
-    u32 dkey = ri->dkey;
-    if (dkey != MO_INVALID_IDX) {
-        if (ri->quashSom) {
-            DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
-            assert(ri->offsetAdjust == 0 || ri->offsetAdjust == -1);
-            if (fatbit_set(scratch->deduper.log[to_offset % 2], dkeyCount,
-                           dkey)) {
-                /* we have already raised this report at this offset, squash
-                 * dupe match. */
-                DEBUG_PRINTF("dedupe\n");
-                goto do_return;
-            }
-        } else {
-            /* SOM external event */
-            DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
-            assert(ri->offsetAdjust == 0 || ri->offsetAdjust == -1);
-            u64a *starts = scratch->deduper.som_start_log[to_offset % 2];
-            if (fatbit_set(scratch->deduper.som_log[to_offset % 2], dkeyCount,
-                           dkey)) {
-                starts[dkey] = MIN(starts[dkey], from_offset);
-            } else {
-                starts[dkey] = from_offset;
-            }
-
-            if (ri->offsetAdjust) {
-                scratch->deduper.som_log_dirty |= 1;
-            } else {
-                scratch->deduper.som_log_dirty |= 2;
-            }
-
-            goto do_return;
-        }
-    }
-#endif
-
-    halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset,
-                            flags, ci->userContext);
-
-    if (!is_simple) {
-        markAsMatched(ci->exhaustionVector, ri->ekey);
-    }
-
-do_return:
-    if (halt) {
-        DEBUG_PRINTF("callback requested to terminate matches\n");
-
-        setBroken(ci->state, BROKEN_FROM_USER);
-        ci->broken = BROKEN_FROM_USER;
-
-        return MO_HALT_MATCHING;
-    }
-
-    return MO_CONTINUE_MATCHING;
-}
-
 static really_inline
 hwlmcb_rv_t multiDirectAdaptor(u64a real_end, ReportID direct_id, void *context,
                                struct core_info *ci, char is_simple,
@@ -1055,8 +754,7 @@ hs_error_t hs_open_stream(const hs_database_t *db, UNUSED unsigned flags,
 static really_inline
 void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
     const struct RoseEngine *rose = id->rose;
-    char *state = getMultiState(id);
-    u8 broken = getBroken(state);
+    u8 broken = scratch->core_info.broken;
 
     if (broken) {
         DEBUG_PRINTF("stream already broken\n");
@@ -1076,8 +774,7 @@ void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
 static never_inline
 void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
     const struct RoseEngine *t = id->rose;
-    char *state = getMultiState(id);
-    u8 broken = getBroken(state);
+    u8 broken = scratch->core_info.broken;
 
     if (broken) {
         DEBUG_PRINTF("stream already broken\n");
@@ -1372,9 +1069,10 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
     if (!id->offset && rose->boundary.reportZeroOffset) {
         DEBUG_PRINTF("zero reports\n");
         processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch);
-        broken = getBroken(state);
+        broken = scratch->core_info.broken;
         if (unlikely(broken)) {
             DEBUG_PRINTF("stream is broken, halting scan\n");
+            setBroken(state, broken);
             if (broken == BROKEN_FROM_USER) {
                 return HS_SCAN_TERMINATED;
             } else {
@@ -1400,7 +1098,6 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
     if (rose->hasSom && !told_to_stop_matching(scratch)) {
         int halt = flushStoredSomMatches(scratch, ~0ULL);
         if (halt) {
-            setBroken(state, BROKEN_FROM_USER);
             scratch->core_info.broken = BROKEN_FROM_USER;
         }
     }
@@ -1413,6 +1110,7 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
             storeSomToStream(scratch, id->offset);
         }
     } else if (told_to_stop_matching(scratch)) {
+        setBroken(state, BROKEN_FROM_USER);
         return HS_SCAN_TERMINATED;
     } else { /* exhausted */
         setBroken(state, BROKEN_EXHAUSTED);
index 23f2b2827a14e268e71c1f527c6b1db19dfac28c..418fcbab096b37ac9762fdc9cb332b2f824abdf4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -487,6 +487,7 @@ int clearSomLog(struct hs_scratch *scratch, u64a offset, struct fatbit *log,
         int halt = ci->userCallback(onmatch, from_offset, offset, flags,
                                     ci->userContext);
         if (halt) {
+            ci->broken = BROKEN_FROM_USER;
             return 1;
         }
     }
index 0e5bccf818c0732c2cebddcd85909377eb07d877..c4f3bd8c02b1961e407a1a891215154d19c7747e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -184,6 +184,11 @@ bool isExternalReport(const Report &r) {
     return true;
 }
 
+static inline
+bool isExternalSomReport(const Report &r) {
+    return r.type != EXTERNAL_CALLBACK && isExternalReport(r);
+}
+
 static inline
 bool operator<(const Report &a, const Report &b) {
     ORDER_CHECK(type);