src/rose/match.h
src/rose/match.c
src/rose/miracle.h
+ src/rose/program_runtime.h
src/rose/runtime.h
src/rose/rose.h
src/rose/rose_internal.h
#include "catchup.h"
#include "match.h"
+#include "program_runtime.h"
#include "rose.h"
#include "util/fatbit.h"
}
static rose_inline
-int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset,
+int roseEodRunIterator(const struct RoseEngine *t, u64a offset,
struct hs_scratch *scratch) {
- if (!t->eodIterOffset) {
+ if (!t->eodIterProgramOffset) {
return MO_CONTINUE_MATCHING;
}
- DEBUG_PRINTF("running eod iterator at offset %u\n", t->eodIterOffset);
+ DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset);
- const u32 *programTable = getByOffset(t, t->eodProgramTableOffset);
- const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset);
- assert(ISALIGNED(programTable));
- assert(ISALIGNED(it));
-
- // Sparse iterator state was allocated earlier
- struct mmbit_sparse_state *s = scratch->sparse_iter_state;
- struct fatbit *handled_roles = scratch->handled_roles;
-
- const u32 numStates = t->rolesWithStateCount;
-
- void *role_state = getRoleState(state);
- u32 idx = 0;
- u32 i = mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s);
-
- fatbit_clear(handled_roles);
-
- int work_done = 0; // not read from in this path.
-
- for (; i != MMB_INVALID;
- i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) {
- DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx);
- u32 programOffset = programTable[idx];
- u64a som = 0;
- if (roseRunRoleProgram(t, programOffset, offset, &som,
- &(scratch->tctxt),
- &work_done) == HWLM_TERMINATE_MATCHING) {
- return MO_HALT_MATCHING;
- }
+ int work_done = 0;
+ if (roseRunProgram(t, t->eodIterProgramOffset, offset, &(scratch->tctxt), 0,
+ &work_done) == HWLM_TERMINATE_MATCHING) {
+ return MO_HALT_MATCHING;
}
return MO_CONTINUE_MATCHING;
}
}
+static rose_inline
+int roseRunEodProgram(const struct RoseEngine *t, u64a offset,
+ struct hs_scratch *scratch) {
+ if (!t->eodProgramOffset) {
+ return MO_CONTINUE_MATCHING;
+ }
+
+ DEBUG_PRINTF("running eod program at %u\n", t->eodProgramOffset);
+
+ // There should be no pending delayed literals.
+ assert(!scratch->tctxt.filledDelayedSlots);
+
+ int work_done = 0;
+ if (roseRunProgram(t, t->eodProgramOffset, offset, &scratch->tctxt, 0,
+ &work_done) == HWLM_TERMINATE_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+
+ return MO_CONTINUE_MATCHING;
+}
+
static really_inline
void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset,
struct hs_scratch *scratch, const char is_streaming) {
assert(!scratch->core_info.buf || !scratch->core_info.hbuf);
assert(!can_stop_matching(scratch));
- // Fire the special EOD event literal.
- if (t->hasEodEventLiteral) {
- DEBUG_PRINTF("firing eod event id %u at offset %llu\n",
- t->eodLiteralId, offset);
- const struct core_info *ci = &scratch->core_info;
- size_t len = ci->buf ? ci->len : ci->hlen;
- assert(len || !ci->buf); /* len may be 0 if no history is required
- * (bounds checks only can lead to this) */
-
- roseRunEvent(len, t->eodLiteralId, &scratch->tctxt);
- if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("user told us to stop\n");
- return;
- }
+ // Run the unconditional EOD program.
+ if (roseRunEodProgram(t, offset, scratch) == MO_HALT_MATCHING) {
+ return;
}
roseCheckNfaEod(t, state, scratch, offset, is_streaming);
- if (!t->eodIterOffset && !t->ematcherOffset) {
+ if (!t->eodIterProgramOffset && !t->ematcherOffset) {
DEBUG_PRINTF("no eod accepts\n");
return;
}
// Handle pending EOD reports.
- int itrv = roseEodRunIterator(t, state, offset, scratch);
+ int itrv = roseEodRunIterator(t, offset, scratch);
if (itrv == MO_HALT_MATCHING) {
return;
}
cleanupAfterEodMatcher(t, state, offset, scratch);
// Fire any new EOD reports.
- roseEodRunIterator(t, state, offset, scratch);
+ roseEodRunIterator(t, offset, scratch);
roseCheckEodSuffixes(t, state, offset, scratch);
}
#include "infix.h"
#include "match.h"
#include "miracle.h"
+#include "program_runtime.h"
#include "rose_program.h"
#include "rose.h"
#include "som/som_runtime.h"
assert(id < t->literalCount);
const struct RoseLiteral *tl = &getLiteralTable(t)[id];
- DEBUG_PRINTF("literal id=%u, minDepth=%u, groups=0x%016llx\n",
- id, tl->minDepth, tl->groups);
+ DEBUG_PRINTF("literal id=%u, groups=0x%016llx\n", id, tl->groups);
pushDelayedMatches(tl, real_end, tctx);
return tctx->groups;
}
-static really_inline
-hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t,
- struct hs_scratch *scratch, u32 qi, s64a loc,
- char is_mpv, char in_anchored,
- char in_catchup) {
- struct RoseContext *tctxt = &scratch->tctxt;
- u8 *aa = getActiveLeafArray(t, tctxt->state);
- struct fatbit *activeQueues = scratch->aqa;
- u32 aaCount = t->activeArrayCount;
- u32 qCount = t->queueCount;
-
- struct mq *q = &scratch->queues[qi];
- DEBUG_PRINTF("qcl %lld, loc: %lld, min (non mpv) match offset: %llu\n",
- q_cur_loc(q), loc, tctxt->minNonMpvMatchOffset);
- if (q_cur_loc(q) == loc) {
- /* too many tops enqueued at the one spot; need to flatten this queue.
- * We can use the full catchups as it will short circuit as we are
- * already at this location. It also saves waking everybody up */
- pushQueueNoMerge(q, MQE_END, loc);
- nfaQueueExec(q->nfa, q, loc);
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- } else if (!in_catchup) {
- if (is_mpv) {
- tctxt->next_mpv_offset = 0; /* force us to catch the mpv */
- if (loc + scratch->core_info.buf_offset
- <= tctxt->minNonMpvMatchOffset) {
- DEBUG_PRINTF("flushing chained\n");
- if (roseCatchUpMPV(t, tctxt->state, loc, scratch)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- goto done_queue_empty;
- }
- }
-
- if (roseCatchUpTo(t, tctxt->state, loc + scratch->core_info.buf_offset,
- scratch, in_anchored)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- } else {
- /* we must be a chained nfa */
- assert(is_mpv);
- DEBUG_PRINTF("flushing chained\n");
- tctxt->next_mpv_offset = 0; /* force us to catch the mpv */
- if (roseCatchUpMPV(t, tctxt->state, loc, scratch)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- }
-done_queue_empty:
- if (!mmbit_set(aa, aaCount, qi)) {
- initQueue(q, qi, t, tctxt);
- nfaQueueInitState(q->nfa, q);
- pushQueueAt(q, 0, MQE_START, loc);
- fatbit_set(activeQueues, qCount, qi);
- }
-
- assert(!isQueueFull(q));
-
- if (isAllExhausted(t, scratch->core_info.exhaustionVector)) {
- if (!scratch->core_info.broken) {
- scratch->core_info.broken = BROKEN_EXHAUSTED;
- }
- tctxt->groups = 0;
- DEBUG_PRINTF("termination requested\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-static really_inline
-hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t,
- struct hs_scratch *scratch, u32 qi, s64a loc,
- char in_anchored) {
- return ensureQueueFlushed_i(t, scratch, qi, loc, 0, in_anchored, 0);
-}
-
static really_inline
hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t,
struct hs_scratch *scratch, u32 qi, s64a loc,
in_chained);
}
-static rose_inline
-hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t,
- u32 qi, u32 top, u64a som,
- u64a end, struct RoseContext *tctxt,
- char in_anchored) {
- DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top);
-
- u8 *aa = getActiveLeafArray(t, tctxt->state);
- struct hs_scratch *scratch = tctxtToScratch(tctxt);
- const u32 aaCount = t->activeArrayCount;
- const u32 qCount = t->queueCount;
- struct mq *q = &scratch->queues[qi];
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
- const struct NFA *nfa = getNfaByInfo(t, info);
-
- struct core_info *ci = &scratch->core_info;
- s64a loc = (s64a)end - ci->buf_offset;
- assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen);
-
- if (!mmbit_set(aa, aaCount, qi)) {
- initQueue(q, qi, t, tctxt);
- nfaQueueInitState(nfa, q);
- pushQueueAt(q, 0, MQE_START, loc);
- fatbit_set(scratch->aqa, qCount, qi);
- } else if (info->no_retrigger) {
- DEBUG_PRINTF("yawn\n");
- /* nfa only needs one top; we can go home now */
- return HWLM_CONTINUE_MATCHING;
- } else if (!fatbit_set(scratch->aqa, qCount, qi)) {
- initQueue(q, qi, t, tctxt);
- loadStreamState(nfa, q, 0);
- pushQueueAt(q, 0, MQE_START, 0);
- } else if (isQueueFull(q)) {
- DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi);
- if (info->eod) {
- /* can catch up suffix independently no pq */
- q->context = NULL;
- pushQueueNoMerge(q, MQE_END, loc);
- nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX);
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- } else if (ensureQueueFlushed(t, scratch, qi, loc, in_anchored)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
- assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID));
- pushQueueSom(q, top, loc, som);
-
- if (q_cur_loc(q) == (s64a)ci->len && !info->eod) {
- /* we may not run the nfa; need to ensure state is fine */
- DEBUG_PRINTF("empty run\n");
- pushQueueNoMerge(q, MQE_END, loc);
- char alive = nfaQueueExec(nfa, q, loc);
- if (alive) {
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- } else {
- mmbit_unset(aa, aaCount, qi);
- fatbit_unset(scratch->aqa, qCount, qi);
- }
- }
-
- return HWLM_CONTINUE_MATCHING;
-}
-
static rose_inline
void recordAnchoredMatch(struct RoseContext *tctxt, ReportID reportId,
u64a end) {
mmbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx);
}
-/* handles the firing of external matches */
-static rose_inline
-hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, u8 *state, ReportID id,
- u64a end, struct RoseContext *tctxt,
- char in_anchored) {
- struct hs_scratch *scratch = tctxtToScratch(tctxt);
-
- if (roseCatchUpTo(t, state, end, scratch, in_anchored)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
-
- assert(end == tctxt->minMatchOffset);
- DEBUG_PRINTF("firing callback reportId=%u, end=%llu\n", id, end);
- updateLastMatchOffset(tctxt, end);
-
- int cb_rv = tctxt->cb(end, id, tctxt->userCtx);
- if (cb_rv == MO_HALT_MATCHING) {
- DEBUG_PRINTF("termination requested\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
- if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
- return HWLM_CONTINUE_MATCHING;
- }
-
- if (isAllExhausted(t, scratch->core_info.exhaustionVector)) {
- if (!scratch->core_info.broken) {
- scratch->core_info.broken = BROKEN_EXHAUSTED;
- }
- tctxt->groups = 0;
- DEBUG_PRINTF("termination requested\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
- return HWLM_CONTINUE_MATCHING;
-}
-
hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r,
u64a end, struct RoseContext *tctxt,
char in_anchored, char in_catchup) {
return HWLM_CONTINUE_MATCHING;
}
-/* catches up engines enough to ensure any earlier mpv triggers are enqueued
- * and then adds the trigger to the mpv queue. Must not be called during catch
- * up */
-static rose_inline
-hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t,
- u8 *state, ReportID r, u64a end,
- struct RoseContext *tctxt,
- char in_anchored) {
- struct hs_scratch *scratch = tctxtToScratch(tctxt);
-
- if (roseCatchUpMpvFeeders(t, state, end, scratch, in_anchored)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
-
- return roseHandleChainMatch(t, r, end, tctxt, in_anchored, 0);
-}
-
-static rose_inline
-hwlmcb_rv_t roseSomCatchup(const struct RoseEngine *t, u8 *state, u64a end,
- struct RoseContext *tctxt, char in_anchored) {
- struct hs_scratch *scratch = tctxtToScratch(tctxt);
-
- // In SOM processing, we may be able to limit or entirely avoid catchup.
-
- DEBUG_PRINTF("entry\n");
-
- if (end == tctxt->minMatchOffset) {
- DEBUG_PRINTF("already caught up\n");
- return HWLM_CONTINUE_MATCHING;
- }
-
- DEBUG_PRINTF("catching up all NFAs\n");
- if (roseCatchUpTo(t, state, end, scratch, in_anchored)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- updateMinMatchOffset(tctxt, end);
- return HWLM_CONTINUE_MATCHING;
-}
-
-static really_inline
-hwlmcb_rv_t roseHandleSom(const struct RoseEngine *t, u8 *state, ReportID id,
- u64a end, struct RoseContext *tctxt,
- char in_anchored) {
- struct hs_scratch *scratch = tctxtToScratch(tctxt);
-
- DEBUG_PRINTF("id=%u, end=%llu, minMatchOffset=%llu\n", id, end,
- tctxt->minMatchOffset);
-
- // Reach into reports and handle internal reports that just manipulate SOM
- // slots ourselves, rather than going through the callback.
-
- if (roseSomCatchup(t, state, end, tctxt, in_anchored)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
-
- const struct internal_report *ri = getInternalReport(t, id);
- handleSomInternal(scratch, ri, end);
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-static rose_inline
-hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, u8 *state,
- ReportID id, u64a start, u64a end,
- struct RoseContext *tctxt, char in_anchored) {
- if (roseCatchUpTo(t, state, end, tctxtToScratch(tctxt), in_anchored)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
-
- DEBUG_PRINTF("firing som callback reportId=%u, start=%llu end=%llu\n", id,
- start, end);
- DEBUG_PRINTF(" last match %llu\n", tctxt->lastMatchOffset);
- assert(end == tctxt->minMatchOffset);
-
- updateLastMatchOffset(tctxt, end);
- int cb_rv = tctxt->cb_som(start, end, id, tctxt->userCtx);
- if (cb_rv == MO_HALT_MATCHING) {
- DEBUG_PRINTF("termination requested\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
- if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
- return HWLM_CONTINUE_MATCHING;
- }
-
- struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
- if (isAllExhausted(t, ci->exhaustionVector)) {
- if (!ci->broken) {
- ci->broken = BROKEN_EXHAUSTED;
- }
- tctxt->groups = 0;
- DEBUG_PRINTF("termination requested\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-static rose_inline
-hwlmcb_rv_t roseHandleSomSom(const struct RoseEngine *t, u8 *state, ReportID id,
- u64a start, u64a end, struct RoseContext *tctxt,
- char in_anchored) {
- DEBUG_PRINTF("id=%u, start=%llu, end=%llu, minMatchOffset=%llu\n",
- id, start, end, tctxt->minMatchOffset);
-
- // Reach into reports and handle internal reports that just manipulate SOM
- // slots ourselves, rather than going through the callback.
-
- if (roseSomCatchup(t, state, end, tctxt, in_anchored)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
-
- const struct internal_report *ri = getInternalReport(t, id);
- setSomFromSomAware(tctxtToScratch(tctxt), ri, start, end);
- return HWLM_CONTINUE_MATCHING;
-}
-
-static rose_inline
-char rosePrefixCheckMiracles(const struct RoseEngine *t,
- const struct LeftNfaInfo *left,
- struct core_info *ci, struct mq *q, u64a end) {
- if (left->transient) {
- // Miracles won't help us with transient leftfix engines; they only
- // scan for a limited time anyway.
- return 1;
- }
-
- if (!left->stopTable) {
- return 1;
- }
-
- DEBUG_PRINTF("looking for miracle on queue %u\n", q->nfa->queueIndex);
-
- const s64a begin_loc = q_cur_loc(q);
- const s64a end_loc = end - ci->buf_offset;
-
- s64a miracle_loc;
- if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) {
- goto found_miracle;
- }
-
- if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc,
- &miracle_loc)) {
- goto found_miracle;
- }
-
- return 1;
-
-found_miracle:
- DEBUG_PRINTF("miracle at %lld\n", miracle_loc);
- assert(miracle_loc >= begin_loc);
-
- // If we're a prefix, then a miracle effectively results in us needing to
- // re-init our state and start fresh.
- if (!left->infix) {
- if (miracle_loc != begin_loc) {
- DEBUG_PRINTF("re-init prefix state\n");
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, miracle_loc);
- pushQueueAt(q, 1, MQE_TOP, miracle_loc);
- nfaQueueInitState(q->nfa, q);
- }
- return 1;
- }
-
- // Otherwise, we're an infix. Remove tops before the miracle from the queue
- // and re-init at that location.
-
- q_skip_forward_to(q, miracle_loc);
-
- if (q_last_type(q) == MQE_START) {
- DEBUG_PRINTF("miracle caused infix to die\n");
- return 0;
- }
-
- DEBUG_PRINTF("re-init infix state\n");
- assert(q->items[q->cur].type == MQE_START);
- q->items[q->cur].location = miracle_loc;
- nfaQueueInitState(q->nfa, q);
-
- return 1;
-}
-
-static really_inline
-char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag,
- ReportID leftfixReport, u64a end,
- struct RoseContext *tctxt) {
- struct hs_scratch *scratch = tctxtToScratch(tctxt);
- struct core_info *ci = &scratch->core_info;
-
- u32 ri = queueToLeftIndex(t, qi);
- const struct LeftNfaInfo *left = getLeftTable(t) + ri;
-
- DEBUG_PRINTF("testing %s %s %u/%u with lag %u (maxLag=%u)\n",
- (left->transient ? "transient" : "active"),
- (left->infix ? "infix" : "prefix"),
- ri, qi, leftfixLag, left->maxLag);
-
- assert(leftfixLag <= left->maxLag);
-
- struct mq *q = scratch->queues + qi;
- u32 qCount = t->queueCount;
- u32 arCount = t->activeLeftCount;
-
- if (!mmbit_isset(getActiveLeftArray(t, tctxt->state), arCount, ri)) {
- DEBUG_PRINTF("engine is dead nothing to see here\n");
- return 0;
- }
-
- if (unlikely(end < leftfixLag)) {
- assert(0); /* lag is the literal length */
- return 0;
- }
-
- if (nfaSupportsZombie(getNfaByQueue(t, qi)) && ci->buf_offset
- && !fatbit_isset(scratch->aqa, qCount, qi)
- && isZombie(t, tctxt->state, left)) {
- DEBUG_PRINTF("zombie\n");
- return 1;
- }
-
- if (!fatbit_set(scratch->aqa, qCount, qi)) {
- DEBUG_PRINTF("initing q %u\n", qi);
- initRoseQueue(t, qi, left, tctxt);
- if (ci->buf_offset) { // there have been writes before us!
- s32 sp;
- if (left->transient) {
- sp = -(s32)ci->hlen;
- } else {
- sp = -(s32)loadRoseDelay(t, tctxt->state, left);
- }
-
- /* transient nfas are always started fresh -> state not maintained
- * at stream boundary */
-
- pushQueueAt(q, 0, MQE_START, sp);
- if (left->infix || (ci->buf_offset + sp > 0 && !left->transient)) {
- loadStreamState(q->nfa, q, sp);
- } else {
- pushQueueAt(q, 1, MQE_TOP, sp);
- nfaQueueInitState(q->nfa, q);
- }
- } else { // first write ever
- pushQueueAt(q, 0, MQE_START, 0);
- pushQueueAt(q, 1, MQE_TOP, 0);
- nfaQueueInitState(q->nfa, q);
- }
- }
-
- s64a loc = (s64a)end - ci->buf_offset - leftfixLag;
- assert(loc >= q_cur_loc(q));
- assert(leftfixReport != MO_INVALID_IDX);
-
- if (left->transient) {
- s64a start_loc = loc - left->transient;
- if (q_cur_loc(q) < start_loc) {
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, start_loc);
- pushQueueAt(q, 1, MQE_TOP, start_loc);
- nfaQueueInitState(q->nfa, q);
- }
- }
-
- if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) {
- if (left->infix) {
- if (infixTooOld(q, loc)) {
- DEBUG_PRINTF("infix %u died of old age\n", ri);
- scratch->tctxt.groups &= left->squash_mask;
- mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri);
- return 0;
- }
-
- reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth);
- }
-
- if (!rosePrefixCheckMiracles(t, left, ci, q, end)) {
- DEBUG_PRINTF("leftfix %u died due to miracle\n", ri);
- scratch->tctxt.groups &= left->squash_mask;
- mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri);
- return 0;
- }
-
-#ifdef DEBUG
- debugQueue(q);
-#endif
-
- pushQueueNoMerge(q, MQE_END, loc);
-
- char rv = nfaQueueExecRose(q->nfa, q, leftfixReport);
- if (!rv) { /* nfa is dead */
- DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri);
- mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri);
- assert(!mmbit_isset(getActiveLeftArray(t, tctxt->state), arCount,
- ri));
- tctxt->groups &= left->squash_mask;
- return 0;
- }
-
- // Queue must have next start loc before we call nfaInAcceptState.
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
-
- DEBUG_PRINTF("checking for report %u\n", leftfixReport);
- DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv);
- return rv == MO_MATCHES_PENDING;
- } else {
- DEBUG_PRINTF("checking for report %u\n", leftfixReport);
- char rv = nfaInAcceptState(q->nfa, leftfixReport, q);
- DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv);
- return rv;
- }
-}
-
-static rose_inline
-void roseSetRole(const struct RoseEngine *t, u8 *state,
- struct RoseContext *tctxt, u32 stateIndex, u8 depth) {
- DEBUG_PRINTF("state idx=%u, depth=%u\n", stateIndex, depth);
- mmbit_set(getRoleState(state), t->rolesWithStateCount, stateIndex);
- update_depth(tctxt, depth);
-}
-
-static rose_inline
-void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi,
- u32 topEvent, u8 cancel, struct RoseContext *tctxt) {
- struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
- s64a loc = (s64a)end - ci->buf_offset;
-
- u32 ri = queueToLeftIndex(t, qi);
- assert(topEvent < MQE_INVALID);
-
- const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi);
- assert(!left->transient);
-
- DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent);
-
- struct mq *q = tctxtToScratch(tctxt)->queues + qi;
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
-
- u8 *activeLeftArray = getActiveLeftArray(t, tctxt->state);
- const u32 arCount = t->activeLeftCount;
- char alive = mmbit_set(activeLeftArray, arCount, ri);
-
- if (alive && info->no_retrigger) {
- DEBUG_PRINTF("yawn\n");
- return;
- }
-
- struct fatbit *aqa = tctxtToScratch(tctxt)->aqa;
- const u32 qCount = t->queueCount;
-
- if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset &&
- !fatbit_isset(aqa, qCount, qi) && isZombie(t, tctxt->state, left)) {
- DEBUG_PRINTF("yawn - zombie\n");
- return;
- }
-
- if (cancel) {
- DEBUG_PRINTF("dominating top: (re)init\n");
- fatbit_set(aqa, qCount, qi);
- initRoseQueue(t, qi, left, tctxt);
- pushQueueAt(q, 0, MQE_START, loc);
- nfaQueueInitState(q->nfa, q);
- } else if (!fatbit_set(aqa, qCount, qi)) {
- DEBUG_PRINTF("initing %u\n", qi);
- initRoseQueue(t, qi, left, tctxt);
- if (alive) {
- s32 sp = -(s32)loadRoseDelay(t, tctxt->state, left);
- pushQueueAt(q, 0, MQE_START, sp);
- loadStreamState(q->nfa, q, sp);
- } else {
- pushQueueAt(q, 0, MQE_START, loc);
- nfaQueueInitState(q->nfa, q);
- }
- } else if (!alive) {
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- nfaQueueInitState(q->nfa, q);
- } else if (isQueueFull(q)) {
- reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth);
-
- if (isQueueFull(q)) {
- /* still full - reduceQueue did nothing */
- DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", qi,
- q->end - q->cur);
- pushQueueNoMerge(q, MQE_END, loc);
- nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX);
-
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- }
- }
-
- pushQueueSom(q, topEvent, loc, start);
-}
-
-static really_inline
-int reachHasBit(const u8 *reach, u8 c) {
- return !!(reach[c / 8U] & (u8)1U << (c % 8U));
-}
-
-/**
- * \brief Scan around a literal, checking that that "lookaround" reach masks
- * are satisfied.
- */
-static rose_inline
-int roseCheckLookaround(const struct RoseEngine *t, u32 lookaroundIndex,
- u32 lookaroundCount, u64a end,
- struct RoseContext *tctxt) {
- assert(lookaroundIndex != MO_INVALID_IDX);
- assert(lookaroundCount > 0);
-
- const struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
- DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end,
- ci->buf_offset, ci->buf_offset + ci->len);
-
- const u8 *base = (const u8 *)t;
- const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
- const s8 *look = look_base + lookaroundIndex;
- const s8 *look_end = look + lookaroundCount;
- assert(look < look_end);
-
- const u8 *reach_base = base + t->lookaroundReachOffset;
- const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN;
-
- // The following code assumes that the lookaround structures are ordered by
- // increasing offset.
-
- const s64a base_offset = end - ci->buf_offset;
- DEBUG_PRINTF("base_offset=%lld\n", base_offset);
- DEBUG_PRINTF("first look has offset %d\n", *look);
-
- // If our first check tells us we need to look at an offset before the
- // start of the stream, this role cannot match.
- if (unlikely(*look < 0 && (u64a)(0 - *look) > end)) {
- DEBUG_PRINTF("too early, fail\n");
- return 0;
- }
-
- // Skip over offsets that are before the history buffer.
- do {
- s64a offset = base_offset + *look;
- if (offset >= -(s64a)ci->hlen) {
- goto in_history;
- }
- DEBUG_PRINTF("look=%d before history\n", *look);
- look++;
- reach += REACH_BITVECTOR_LEN;
- } while (look < look_end);
-
- // History buffer.
- DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look);
- for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) {
- in_history:
- ;
- s64a offset = base_offset + *look;
- DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset);
-
- if (offset >= 0) {
- DEBUG_PRINTF("in buffer\n");
- goto in_buffer;
- }
-
- assert(offset >= -(s64a)ci->hlen && offset < 0);
- u8 c = ci->hbuf[ci->hlen + offset];
- if (!reachHasBit(reach, c)) {
- DEBUG_PRINTF("char 0x%02x failed reach check\n", c);
- return 0;
- }
- }
-
- // Current buffer.
- DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look);
- for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) {
- in_buffer:
- ;
- s64a offset = base_offset + *look;
- DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset);
-
- if (offset >= (s64a)ci->len) {
- DEBUG_PRINTF("in the future\n");
- break;
- }
-
- assert(offset >= 0 && offset < (s64a)ci->len);
- u8 c = ci->buf[offset];
- if (!reachHasBit(reach, c)) {
- DEBUG_PRINTF("char 0x%02x failed reach check\n", c);
- return 0;
- }
- }
-
- DEBUG_PRINTF("OK :)\n");
- return 1;
-}
-
-static
-int roseNfaEarliestSom(u64a from_offset, UNUSED u64a offset, UNUSED ReportID id,
- void *context) {
- u64a *som = context;
- *som = MIN(*som, from_offset);
- return MO_CONTINUE_MATCHING;
-}
-
-static rose_inline
-u64a roseGetHaigSom(const struct RoseEngine *t, const u32 qi,
- UNUSED const u32 leftfixLag,
- struct RoseContext *tctxt) {
- u32 ri = queueToLeftIndex(t, qi);
-
- UNUSED const struct LeftNfaInfo *left = getLeftTable(t) + ri;
-
- DEBUG_PRINTF("testing %s prefix %u/%u with lag %u (maxLag=%u)\n",
- left->transient ? "transient" : "active", ri, qi,
- leftfixLag, left->maxLag);
-
- assert(leftfixLag <= left->maxLag);
-
- struct mq *q = tctxtToScratch(tctxt)->queues + qi;
-
- u64a start = ~0ULL;
-
- /* switch the callback + context for a fun one */
- q->som_cb = roseNfaEarliestSom;
- q->context = &start;
-
- nfaReportCurrentMatches(q->nfa, q);
-
- /* restore the old callback + context */
- q->som_cb = roseNfaSomAdaptor;
- q->context = NULL;
- DEBUG_PRINTF("earliest som is %llu\n", start);
- return start;
-}
-
-static rose_inline
-char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) {
- assert(max_bound <= ROSE_BOUND_INF);
- assert(min_bound <= max_bound);
-
- if (end < min_bound) {
- return 0;
- }
- return max_bound == ROSE_BOUND_INF || end <= max_bound;
-}
-
-#define PROGRAM_CASE(name) \
- case ROSE_INSTR_##name: { \
- DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_INSTR_##name); \
- const struct ROSE_STRUCT_##name *ri = \
- (const struct ROSE_STRUCT_##name *)pc;
-
-#define PROGRAM_NEXT_INSTRUCTION \
- pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
- break; \
- }
-
-static really_inline
-hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset,
- u64a end, u64a *som, struct RoseContext *tctxt,
- char in_anchored, int *work_done) {
- DEBUG_PRINTF("program begins at offset %u\n", programOffset);
-
- assert(programOffset);
- assert(programOffset < t->size);
-
- const char *pc = getByOffset(t, programOffset);
-
- assert(*(const u8 *)pc != ROSE_INSTR_END);
-
- for (;;) {
- assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN));
- u8 code = *(const u8 *)pc;
- assert(code <= ROSE_INSTR_END);
-
- switch ((enum RoseInstructionCode)code) {
- PROGRAM_CASE(ANCHORED_DELAY) {
- if (in_anchored && end > t->floatingMinLiteralMatchOffset) {
- DEBUG_PRINTF("delay until playback\n");
- update_depth(tctxt, ri->depth);
- tctxt->groups |= ri->groups;
- *work_done = 1;
- assert(ri->done_jump); // must progress
- pc += ri->done_jump;
- continue;
- }
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(CHECK_ONLY_EOD) {
- struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
- if (end != ci->buf_offset + ci->len) {
- DEBUG_PRINTF("should only match at end of data\n");
- assert(ri->fail_jump); // must progress
- pc += ri->fail_jump;
- continue;
- }
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(CHECK_BOUNDS) {
- if (!in_anchored &&
- !roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) {
- DEBUG_PRINTF("failed root bounds check\n");
- assert(ri->fail_jump); // must progress
- pc += ri->fail_jump;
- continue;
- }
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(CHECK_NOT_HANDLED) {
- struct fatbit *handled = tctxtToScratch(tctxt)->handled_roles;
- if (fatbit_set(handled, t->handledKeyCount, ri->key)) {
- DEBUG_PRINTF("key %u already set\n", ri->key);
- assert(ri->fail_jump); // must progress
- pc += ri->fail_jump;
- continue;
- }
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(CHECK_LOOKAROUND) {
- if (!roseCheckLookaround(t, ri->index, ri->count, end, tctxt)) {
- DEBUG_PRINTF("failed lookaround check\n");
- assert(ri->fail_jump); // must progress
- pc += ri->fail_jump;
- continue;
- }
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(CHECK_LEFTFIX) {
- if (!roseTestLeftfix(t, ri->queue, ri->lag, ri->report, end,
- tctxt)) {
- DEBUG_PRINTF("failed lookaround check\n");
- assert(ri->fail_jump); // must progress
- pc += ri->fail_jump;
- continue;
- }
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(SOM_ADJUST) {
- assert(ri->distance <= end);
- *som = end - ri->distance;
- DEBUG_PRINTF("som is (end - %u) = %llu\n", ri->distance, *som);
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(SOM_LEFTFIX) {
- *som = roseGetHaigSom(t, ri->queue, ri->lag, tctxt);
- DEBUG_PRINTF("som from leftfix is %llu\n", *som);
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(TRIGGER_INFIX) {
- roseTriggerInfix(t, *som, end, ri->queue, ri->event, ri->cancel,
- tctxt);
- *work_done = 1;
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(TRIGGER_SUFFIX) {
- if (roseHandleSuffixTrigger(t, ri->queue, ri->event, *som, end,
- tctxt, in_anchored) ==
- HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- *work_done = 1;
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(REPORT) {
- if (roseHandleMatch(t, tctxt->state, ri->report, end, tctxt,
- in_anchored) == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- *work_done = 1;
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(REPORT_CHAIN) {
- if (roseCatchUpAndHandleChainMatch(t, tctxt->state, ri->report,
- end, tctxt, in_anchored) ==
- HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- *work_done = 1;
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(REPORT_EOD) {
- if (tctxt->cb(end, ri->report, tctxt->userCtx) ==
- MO_HALT_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- *work_done = 1;
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(REPORT_SOM_INT) {
- if (roseHandleSom(t, tctxt->state, ri->report, end, tctxt,
- in_anchored) == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- *work_done = 1;
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(REPORT_SOM) {
- if (roseHandleSomSom(t, tctxt->state, ri->report, *som, end,
- tctxt,
- in_anchored) == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- *work_done = 1;
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(REPORT_SOM_KNOWN) {
- if (roseHandleSomMatch(t, tctxt->state, ri->report, *som, end,
- tctxt, in_anchored) ==
- HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- *work_done = 1;
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(SET_STATE) {
- roseSetRole(t, tctxt->state, tctxt, ri->index, ri->depth);
- *work_done = 1;
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(SET_GROUPS) {
- tctxt->groups |= ri->groups;
- DEBUG_PRINTF("set groups 0x%llx -> 0x%llx\n", ri->groups,
- tctxt->groups);
- }
- PROGRAM_NEXT_INSTRUCTION
-
- PROGRAM_CASE(END) {
- DEBUG_PRINTF("finished\n");
- return HWLM_CONTINUE_MATCHING;
- }
- PROGRAM_NEXT_INSTRUCTION
- }
- }
-
- assert(0); // unreachable
- return HWLM_CONTINUE_MATCHING;
-}
-
-#undef PROGRAM_CASE
-#undef PROGRAM_NEXT_INSTRUCTION
-
-hwlmcb_rv_t roseRunRoleProgram(const struct RoseEngine *t, u32 programOffset,
- u64a end, u64a *som, struct RoseContext *tctxt,
- int *work_done) {
- return roseRunRoleProgram_i(t, programOffset, end, som, tctxt, 0,
- work_done);
-}
-
-static really_inline
-void roseSquashGroup(struct RoseContext *tctxt, const struct RoseLiteral *tl) {
- assert(tl->squashesGroup);
-
- // we should be squashing a single group
- assert(popcount64(tl->groups) == 1);
-
- DEBUG_PRINTF("apply squash mask 0x%016llx, groups 0x%016llx -> 0x%016llx\n",
- ~tl->groups, tctxt->groups, tctxt->groups & ~tl->groups);
-
- tctxt->groups &= ~tl->groups;
-}
-
-// Run the sparse iterator for this literal and use that to discover which
-// roles to consider.
-/* Note: uses the stashed sparse iter state; cannot be called from
- * anybody else who is using it */
-/* Note: uses the handled role mmbit; cannot be called from
- * anybody else who is using it (nobody else should be) */
-/* non-root roles should not occur in any anchored context */
-static really_inline
-hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t,
- const struct RoseLiteral *tl, u64a end,
- struct RoseContext *tctxt) {
- /* assert(!tctxt->in_anchored); */
- /* assert(!tctxt->in_anch_playback); */
- const u32 *iterProgram = getByOffset(t, tl->iterProgramOffset);
- const struct mmbit_sparse_iter *it = getByOffset(t, tl->iterOffset);
- assert(ISALIGNED(iterProgram));
- assert(ISALIGNED(it));
-
- // Sparse iterator state was allocated earlier
- struct mmbit_sparse_state *s = tctxtToScratch(tctxt)->sparse_iter_state;
- struct fatbit *handled_roles = tctxtToScratch(tctxt)->handled_roles;
-
- const u32 numStates = t->rolesWithStateCount;
-
- void *role_state = getRoleState(tctxt->state);
- u32 idx = 0;
- int work_done = 0; // set to 1 if we actually process any roles
- u32 i = mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s);
-
- fatbit_clear(handled_roles);
-
- for (; i != MMB_INVALID;
- i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) {
- u32 programOffset = iterProgram[idx];
- DEBUG_PRINTF("pred state %u (iter idx=%u) is on -> program %u\n", i,
- idx, programOffset);
-
- // If this bit is switched on in the sparse iterator, it must be
- // driving a program.
- assert(programOffset);
-
- u64a som = 0ULL;
- if (roseRunRoleProgram_i(t, programOffset, end, &som, tctxt, 0,
- &work_done) == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
- // If we've actually handled any roles, we might need to apply this
- // literal's squash mask to our groups as well.
- if (work_done && tl->squashesGroup) {
- roseSquashGroup(tctxt, tl);
- }
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-// Walk the set of root roles (roles with depth 1) associated with this literal
-// and set them on.
-static really_inline
-char roseWalkRootRoles_i(const struct RoseEngine *t,
- const struct RoseLiteral *tl, u64a end,
- struct RoseContext *tctxt, char in_anchored) {
- if (!tl->rootProgramOffset) {
- return 1;
- }
-
- DEBUG_PRINTF("running literal root program at %u\n", tl->rootProgramOffset);
-
- u64a som = 0;
- int work_done = 0;
-
- if (roseRunRoleProgram_i(t, tl->rootProgramOffset, end, &som, tctxt,
- in_anchored,
- &work_done) == HWLM_TERMINATE_MATCHING) {
- return 0;
- }
-
- // If we've actually handled any roles, we might need to apply this
- // literal's squash mask to our groups as well.
- if (work_done && tl->squashesGroup) {
- roseSquashGroup(tctxt, tl);
- }
-
- return 1;
-}
-
-static never_inline
-char roseWalkRootRoles_A(const struct RoseEngine *t,
- const struct RoseLiteral *tl, u64a end,
- struct RoseContext *tctxt) {
- return roseWalkRootRoles_i(t, tl, end, tctxt, 1);
-}
-
-static never_inline
-char roseWalkRootRoles_N(const struct RoseEngine *t,
- const struct RoseLiteral *tl, u64a end,
- struct RoseContext *tctxt) {
- return roseWalkRootRoles_i(t, tl, end, tctxt, 0);
-}
-
-static really_inline
-char roseWalkRootRoles(const struct RoseEngine *t,
- const struct RoseLiteral *tl, u64a end,
- struct RoseContext *tctxt, char in_anchored,
- char in_anch_playback) {
- assert(!in_anch_playback || tl->rootProgramOffset);
- if (!in_anch_playback && !tl->rootProgramOffset) {
- return 1;
- }
-
- if (in_anchored) {
- return roseWalkRootRoles_A(t, tl, end, tctxt);
- } else {
- return roseWalkRootRoles_N(t, tl, end, tctxt);
- }
-}
-
/* handles catchup, som, cb, etc */
static really_inline
hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, u8 *state,
assert(id < t->literalCount);
const struct RoseLiteral *tl = &getLiteralTable(t)[id];
- assert(tl->rootProgramOffset);
+ assert(tl->programOffset);
assert(!tl->delay_mask);
- DEBUG_PRINTF("literal id=%u, minDepth=%u, groups=0x%016llx\n", id,
- tl->minDepth, tl->groups);
+ DEBUG_PRINTF("literal id=%u, groups=0x%016llx\n", id, tl->groups);
if (real_end <= t->floatingMinLiteralMatchOffset) {
roseFlushLastByteHistory(t, state, real_end, tctxt);
tctxt->lastEndOffset = real_end;
}
- /* anchored literals are root only */
- if (!roseWalkRootRoles(t, tl, real_end, tctxt, 1, 0)) {
- rv = HWLM_TERMINATE_MATCHING;
- }
-
- DEBUG_PRINTF("DONE depth=%u, groups=0x%016llx\n", tctxt->depth,
- tctxt->groups);
-
- if (rv == HWLM_TERMINATE_MATCHING) {
+ int work_done = 0;
+ if (roseRunProgram(t, tl->programOffset, real_end, tctxt, 1, &work_done) ==
+ HWLM_TERMINATE_MATCHING) {
assert(can_stop_matching(tctxtToScratch(tctxt)));
DEBUG_PRINTF("caller requested termination\n");
return MO_HALT_MATCHING;
}
+ // If we've actually handled any roles, we might need to apply this
+ // literal's squash mask to our groups as well.
+ if (work_done && tl->squashesGroup) {
+ roseSquashGroup(tctxt, tl);
+ }
+
+ DEBUG_PRINTF("DONE depth=%u, groups=0x%016llx\n", tctxt->depth,
+ tctxt->groups);
+
if (real_end > t->floatingMinLiteralMatchOffset) {
recordAnchoredLiteralMatch(tctxt, id, real_end);
}
assert(id < t->literalCount);
const struct RoseLiteral *tl = &getLiteralTable(t)[id];
- DEBUG_PRINTF("lit id=%u, minDepth=%u, groups=0x%016llx\n", id, tl->minDepth,
- tl->groups);
+ DEBUG_PRINTF("lit id=%u, groups=0x%016llx\n", id, tl->groups);
if (do_group_check && !(tl->groups & tctxt->groups)) {
DEBUG_PRINTF("IGNORE: none of this literal's groups are set.\n");
return HWLM_CONTINUE_MATCHING;
}
- if (tl->minDepth > tctxt->depth) {
- DEBUG_PRINTF("IGNORE: minDepth=%u > %u\n", tl->minDepth, tctxt->depth);
- goto root_roles;
- }
-
- /* the depth checks will normally prevent roles without a spare iterator
- * from reaching here (root roles) (and only root roles should be seen
- * during anch play back). */
- assert(tl->iterOffset == ROSE_OFFSET_INVALID || !in_anch_playback);
- if (tl->iterOffset != ROSE_OFFSET_INVALID && !in_anch_playback) {
- hwlmcb_rv_t rv = roseWalkSparseIterator(t, tl, end, tctxt);
+ int work_done = 0;
- if (rv == HWLM_TERMINATE_MATCHING) {
+ if (tl->programOffset) {
+ DEBUG_PRINTF("running program at %u\n", tl->programOffset);
+ if (roseRunProgram(t, tl->programOffset, end, tctxt, 0, &work_done) ==
+ HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
+
}
-root_roles:
- // Process "root roles", i.e. depth 1 roles for this literal
- if (!roseWalkRootRoles(t, tl, end, tctxt, 0 /* in_anchored */,
- in_anch_playback)) {
- return HWLM_TERMINATE_MATCHING;
+ // If we've actually handled any roles, we might need to apply this
+ // literal's squash mask to our groups as well.
+ if (work_done && tl->squashesGroup) {
+ roseSquashGroup(tctxt, tl);
}
return HWLM_CONTINUE_MATCHING;
DEBUG_PRINTF("user requested halt\n");
return HWLM_TERMINATE_MATCHING;
}
-
-// Specialised cut-down roseCallback for running ROSE_EVENT "literals", like the
-// EOD one.
-void roseRunEvent(size_t end, u32 id, struct RoseContext *tctxt) {
- const struct RoseEngine *t = tctxt->t;
- struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
- u64a real_end = ci->buf_offset - ci->hlen + end;
-
- DEBUG_PRINTF("EVENT id=%u offset=%llu\n", id, real_end);
-
- // Caller should guard against broken stream.
- assert(!can_stop_matching(tctxtToScratch(tctxt)));
-
- // Shouldn't be here if we're a real literal with benefits.
- assert(id >= t->nonbenefits_base_id);
-
- // At the moment, this path is only used for the EOD event.
- assert(id == t->eodLiteralId);
-
- // There should be no pending delayed literals.
- assert(!tctxt->filledDelayedSlots);
-
- // Note: we throw away the return value.
- roseProcessMatch_i(t, real_end, id, tctxt, 0, 0, 0);
-
- DEBUG_PRINTF("DONE depth=%hhu, groups=0x%016llx\n", tctxt->depth,
- tctxt->groups);
-}
hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id,
void *ctx);
int roseAnchoredCallback(u64a end, u32 id, void *ctx);
-void roseRunEvent(size_t end, u32 id, struct RoseContext *tctxt);
/* Common code, used all over Rose runtime */
scratch->sparse_iter_state);
}
-hwlmcb_rv_t roseRunRoleProgram(const struct RoseEngine *t, u32 programOffset,
- u64a end, u64a *som, struct RoseContext *tctxt,
- int *work_done);
-
#endif
--- /dev/null
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PROGRAM_RUNTIME_H
+#define PROGRAM_RUNTIME_H
+
+#include "catchup.h"
+#include "counting_miracle.h"
+#include "infix.h"
+#include "match.h"
+#include "miracle.h"
+#include "rose.h"
+#include "rose_internal.h"
+#include "rose_program.h"
+#include "rose_types.h"
+#include "runtime.h"
+#include "scratch.h"
+#include "ue2common.h"
+#include "util/fatbit.h"
+#include "util/multibit.h"
+
+static rose_inline
+char rosePrefixCheckMiracles(const struct RoseEngine *t,
+ const struct LeftNfaInfo *left,
+ struct core_info *ci, struct mq *q, u64a end) {
+ if (left->transient) {
+ // Miracles won't help us with transient leftfix engines; they only
+ // scan for a limited time anyway.
+ return 1;
+ }
+
+ if (!left->stopTable) {
+ return 1;
+ }
+
+ DEBUG_PRINTF("looking for miracle on queue %u\n", q->nfa->queueIndex);
+
+ const s64a begin_loc = q_cur_loc(q);
+ const s64a end_loc = end - ci->buf_offset;
+
+ s64a miracle_loc;
+ if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) {
+ goto found_miracle;
+ }
+
+ if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc,
+ &miracle_loc)) {
+ goto found_miracle;
+ }
+
+ return 1;
+
+found_miracle:
+ DEBUG_PRINTF("miracle at %lld\n", miracle_loc);
+ assert(miracle_loc >= begin_loc);
+
+ // If we're a prefix, then a miracle effectively results in us needing to
+ // re-init our state and start fresh.
+ if (!left->infix) {
+ if (miracle_loc != begin_loc) {
+ DEBUG_PRINTF("re-init prefix state\n");
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, miracle_loc);
+ pushQueueAt(q, 1, MQE_TOP, miracle_loc);
+ nfaQueueInitState(q->nfa, q);
+ }
+ return 1;
+ }
+
+ // Otherwise, we're an infix. Remove tops before the miracle from the queue
+ // and re-init at that location.
+
+ q_skip_forward_to(q, miracle_loc);
+
+ if (q_last_type(q) == MQE_START) {
+ DEBUG_PRINTF("miracle caused infix to die\n");
+ return 0;
+ }
+
+ DEBUG_PRINTF("re-init infix state\n");
+ assert(q->items[q->cur].type == MQE_START);
+ q->items[q->cur].location = miracle_loc;
+ nfaQueueInitState(q->nfa, q);
+
+ return 1;
+}
+
+static really_inline
+hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t,
+ struct hs_scratch *scratch, u32 qi, s64a loc,
+ char is_mpv, char in_anchored,
+ char in_catchup) {
+ struct RoseContext *tctxt = &scratch->tctxt;
+ u8 *aa = getActiveLeafArray(t, tctxt->state);
+ struct fatbit *activeQueues = scratch->aqa;
+ u32 aaCount = t->activeArrayCount;
+ u32 qCount = t->queueCount;
+
+ struct mq *q = &scratch->queues[qi];
+ DEBUG_PRINTF("qcl %lld, loc: %lld, min (non mpv) match offset: %llu\n",
+ q_cur_loc(q), loc, tctxt->minNonMpvMatchOffset);
+ if (q_cur_loc(q) == loc) {
+ /* too many tops enqueued at the one spot; need to flatten this queue.
+ * We can use the full catchups as it will short circuit as we are
+ * already at this location. It also saves waking everybody up */
+ pushQueueNoMerge(q, MQE_END, loc);
+ nfaQueueExec(q->nfa, q, loc);
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ } else if (!in_catchup) {
+ if (is_mpv) {
+ tctxt->next_mpv_offset = 0; /* force us to catch the mpv */
+ if (loc + scratch->core_info.buf_offset
+ <= tctxt->minNonMpvMatchOffset) {
+ DEBUG_PRINTF("flushing chained\n");
+ if (roseCatchUpMPV(t, tctxt->state, loc, scratch)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ goto done_queue_empty;
+ }
+ }
+
+ if (roseCatchUpTo(t, tctxt->state, loc + scratch->core_info.buf_offset,
+ scratch, in_anchored)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ } else {
+ /* we must be a chained nfa */
+ assert(is_mpv);
+ DEBUG_PRINTF("flushing chained\n");
+ tctxt->next_mpv_offset = 0; /* force us to catch the mpv */
+ if (roseCatchUpMPV(t, tctxt->state, loc, scratch)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+done_queue_empty:
+ if (!mmbit_set(aa, aaCount, qi)) {
+ initQueue(q, qi, t, tctxt);
+ nfaQueueInitState(q->nfa, q);
+ pushQueueAt(q, 0, MQE_START, loc);
+ fatbit_set(activeQueues, qCount, qi);
+ }
+
+ assert(!isQueueFull(q));
+
+ if (isAllExhausted(t, scratch->core_info.exhaustionVector)) {
+ if (!scratch->core_info.broken) {
+ scratch->core_info.broken = BROKEN_EXHAUSTED;
+ }
+ tctxt->groups = 0;
+ DEBUG_PRINTF("termination requested\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static really_inline
+hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t,
+ struct hs_scratch *scratch, u32 qi, s64a loc,
+ char in_anchored) {
+ return ensureQueueFlushed_i(t, scratch, qi, loc, 0, in_anchored, 0);
+}
+
+static rose_inline
+hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t,
+ u32 qi, u32 top, u64a som,
+ u64a end, struct RoseContext *tctxt,
+ char in_anchored) {
+ DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top);
+
+ u8 *aa = getActiveLeafArray(t, tctxt->state);
+ struct hs_scratch *scratch = tctxtToScratch(tctxt);
+ const u32 aaCount = t->activeArrayCount;
+ const u32 qCount = t->queueCount;
+ struct mq *q = &scratch->queues[qi];
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+ const struct NFA *nfa = getNfaByInfo(t, info);
+
+ struct core_info *ci = &scratch->core_info;
+ s64a loc = (s64a)end - ci->buf_offset;
+ assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen);
+
+ if (!mmbit_set(aa, aaCount, qi)) {
+ initQueue(q, qi, t, tctxt);
+ nfaQueueInitState(nfa, q);
+ pushQueueAt(q, 0, MQE_START, loc);
+ fatbit_set(scratch->aqa, qCount, qi);
+ } else if (info->no_retrigger) {
+ DEBUG_PRINTF("yawn\n");
+ /* nfa only needs one top; we can go home now */
+ return HWLM_CONTINUE_MATCHING;
+ } else if (!fatbit_set(scratch->aqa, qCount, qi)) {
+ initQueue(q, qi, t, tctxt);
+ loadStreamState(nfa, q, 0);
+ pushQueueAt(q, 0, MQE_START, 0);
+ } else if (isQueueFull(q)) {
+ DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi);
+ if (info->eod) {
+ /* can catch up suffix independently no pq */
+ q->context = NULL;
+ pushQueueNoMerge(q, MQE_END, loc);
+ nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX);
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ } else if (ensureQueueFlushed(t, scratch, qi, loc, in_anchored)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
+ assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID));
+ pushQueueSom(q, top, loc, som);
+
+ if (q_cur_loc(q) == (s64a)ci->len && !info->eod) {
+ /* we may not run the nfa; need to ensure state is fine */
+ DEBUG_PRINTF("empty run\n");
+ pushQueueNoMerge(q, MQE_END, loc);
+ char alive = nfaQueueExec(nfa, q, loc);
+ if (alive) {
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ } else {
+ mmbit_unset(aa, aaCount, qi);
+ fatbit_unset(scratch->aqa, qCount, qi);
+ }
+ }
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static really_inline
+char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag,
+ ReportID leftfixReport, u64a end,
+ struct RoseContext *tctxt) {
+ struct hs_scratch *scratch = tctxtToScratch(tctxt);
+ struct core_info *ci = &scratch->core_info;
+
+ u32 ri = queueToLeftIndex(t, qi);
+ const struct LeftNfaInfo *left = getLeftTable(t) + ri;
+
+ DEBUG_PRINTF("testing %s %s %u/%u with lag %u (maxLag=%u)\n",
+ (left->transient ? "transient" : "active"),
+ (left->infix ? "infix" : "prefix"),
+ ri, qi, leftfixLag, left->maxLag);
+
+ assert(leftfixLag <= left->maxLag);
+
+ struct mq *q = scratch->queues + qi;
+ u32 qCount = t->queueCount;
+ u32 arCount = t->activeLeftCount;
+
+ if (!mmbit_isset(getActiveLeftArray(t, tctxt->state), arCount, ri)) {
+ DEBUG_PRINTF("engine is dead nothing to see here\n");
+ return 0;
+ }
+
+ if (unlikely(end < leftfixLag)) {
+ assert(0); /* lag is the literal length */
+ return 0;
+ }
+
+ if (nfaSupportsZombie(getNfaByQueue(t, qi)) && ci->buf_offset
+ && !fatbit_isset(scratch->aqa, qCount, qi)
+ && isZombie(t, tctxt->state, left)) {
+ DEBUG_PRINTF("zombie\n");
+ return 1;
+ }
+
+ if (!fatbit_set(scratch->aqa, qCount, qi)) {
+ DEBUG_PRINTF("initing q %u\n", qi);
+ initRoseQueue(t, qi, left, tctxt);
+ if (ci->buf_offset) { // there have been writes before us!
+ s32 sp;
+ if (left->transient) {
+ sp = -(s32)ci->hlen;
+ } else {
+ sp = -(s32)loadRoseDelay(t, tctxt->state, left);
+ }
+
+ /* transient nfas are always started fresh -> state not maintained
+ * at stream boundary */
+
+ pushQueueAt(q, 0, MQE_START, sp);
+ if (left->infix || (ci->buf_offset + sp > 0 && !left->transient)) {
+ loadStreamState(q->nfa, q, sp);
+ } else {
+ pushQueueAt(q, 1, MQE_TOP, sp);
+ nfaQueueInitState(q->nfa, q);
+ }
+ } else { // first write ever
+ pushQueueAt(q, 0, MQE_START, 0);
+ pushQueueAt(q, 1, MQE_TOP, 0);
+ nfaQueueInitState(q->nfa, q);
+ }
+ }
+
+ s64a loc = (s64a)end - ci->buf_offset - leftfixLag;
+ assert(loc >= q_cur_loc(q));
+ assert(leftfixReport != MO_INVALID_IDX);
+
+ if (left->transient) {
+ s64a start_loc = loc - left->transient;
+ if (q_cur_loc(q) < start_loc) {
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, start_loc);
+ pushQueueAt(q, 1, MQE_TOP, start_loc);
+ nfaQueueInitState(q->nfa, q);
+ }
+ }
+
+ if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) {
+ if (left->infix) {
+ if (infixTooOld(q, loc)) {
+ DEBUG_PRINTF("infix %u died of old age\n", ri);
+ scratch->tctxt.groups &= left->squash_mask;
+ mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri);
+ return 0;
+ }
+
+ reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth);
+ }
+
+ if (!rosePrefixCheckMiracles(t, left, ci, q, end)) {
+ DEBUG_PRINTF("leftfix %u died due to miracle\n", ri);
+ scratch->tctxt.groups &= left->squash_mask;
+ mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri);
+ return 0;
+ }
+
+#ifdef DEBUG
+ debugQueue(q);
+#endif
+
+ pushQueueNoMerge(q, MQE_END, loc);
+
+ char rv = nfaQueueExecRose(q->nfa, q, leftfixReport);
+ if (!rv) { /* nfa is dead */
+ DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri);
+ mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri);
+ assert(!mmbit_isset(getActiveLeftArray(t, tctxt->state), arCount,
+ ri));
+ tctxt->groups &= left->squash_mask;
+ return 0;
+ }
+
+ // Queue must have next start loc before we call nfaInAcceptState.
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+
+ DEBUG_PRINTF("checking for report %u\n", leftfixReport);
+ DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv);
+ return rv == MO_MATCHES_PENDING;
+ } else {
+ DEBUG_PRINTF("checking for report %u\n", leftfixReport);
+ char rv = nfaInAcceptState(q->nfa, leftfixReport, q);
+ DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv);
+ return rv;
+ }
+}
+
+static rose_inline
+void roseSetRole(const struct RoseEngine *t, u8 *state,
+ struct RoseContext *tctxt, u32 stateIndex, u8 depth) {
+ DEBUG_PRINTF("state idx=%u, depth=%u\n", stateIndex, depth);
+ mmbit_set(getRoleState(state), t->rolesWithStateCount, stateIndex);
+ update_depth(tctxt, depth);
+}
+
+static rose_inline
+void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi,
+ u32 topEvent, u8 cancel, struct RoseContext *tctxt) {
+ struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
+ s64a loc = (s64a)end - ci->buf_offset;
+
+ u32 ri = queueToLeftIndex(t, qi);
+ assert(topEvent < MQE_INVALID);
+
+ const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi);
+ assert(!left->transient);
+
+ DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent);
+
+ struct mq *q = tctxtToScratch(tctxt)->queues + qi;
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+
+ u8 *activeLeftArray = getActiveLeftArray(t, tctxt->state);
+ const u32 arCount = t->activeLeftCount;
+ char alive = mmbit_set(activeLeftArray, arCount, ri);
+
+ if (alive && info->no_retrigger) {
+ DEBUG_PRINTF("yawn\n");
+ return;
+ }
+
+ struct fatbit *aqa = tctxtToScratch(tctxt)->aqa;
+ const u32 qCount = t->queueCount;
+
+ if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset &&
+ !fatbit_isset(aqa, qCount, qi) && isZombie(t, tctxt->state, left)) {
+ DEBUG_PRINTF("yawn - zombie\n");
+ return;
+ }
+
+ if (cancel) {
+ DEBUG_PRINTF("dominating top: (re)init\n");
+ fatbit_set(aqa, qCount, qi);
+ initRoseQueue(t, qi, left, tctxt);
+ pushQueueAt(q, 0, MQE_START, loc);
+ nfaQueueInitState(q->nfa, q);
+ } else if (!fatbit_set(aqa, qCount, qi)) {
+ DEBUG_PRINTF("initing %u\n", qi);
+ initRoseQueue(t, qi, left, tctxt);
+ if (alive) {
+ s32 sp = -(s32)loadRoseDelay(t, tctxt->state, left);
+ pushQueueAt(q, 0, MQE_START, sp);
+ loadStreamState(q->nfa, q, sp);
+ } else {
+ pushQueueAt(q, 0, MQE_START, loc);
+ nfaQueueInitState(q->nfa, q);
+ }
+ } else if (!alive) {
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ nfaQueueInitState(q->nfa, q);
+ } else if (isQueueFull(q)) {
+ reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth);
+
+ if (isQueueFull(q)) {
+ /* still full - reduceQueue did nothing */
+ DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", qi,
+ q->end - q->cur);
+ pushQueueNoMerge(q, MQE_END, loc);
+ nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX);
+
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ }
+ }
+
+ pushQueueSom(q, topEvent, loc, start);
+}
+
+/* handles the firing of external matches */
+static rose_inline
+hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, u8 *state, ReportID id,
+ u64a end, struct RoseContext *tctxt,
+ char in_anchored) {
+ struct hs_scratch *scratch = tctxtToScratch(tctxt);
+
+ if (roseCatchUpTo(t, state, end, scratch, in_anchored)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ assert(end == tctxt->minMatchOffset);
+ DEBUG_PRINTF("firing callback reportId=%u, end=%llu\n", id, end);
+ updateLastMatchOffset(tctxt, end);
+
+ int cb_rv = tctxt->cb(end, id, tctxt->userCtx);
+ if (cb_rv == MO_HALT_MATCHING) {
+ DEBUG_PRINTF("termination requested\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ if (isAllExhausted(t, scratch->core_info.exhaustionVector)) {
+ if (!scratch->core_info.broken) {
+ scratch->core_info.broken = BROKEN_EXHAUSTED;
+ }
+ tctxt->groups = 0;
+ DEBUG_PRINTF("termination requested\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+/* catches up engines enough to ensure any earlier mpv triggers are enqueued
+ * and then adds the trigger to the mpv queue. Must not be called during catch
+ * up */
+static rose_inline
+hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t,
+ u8 *state, ReportID r, u64a end,
+ struct RoseContext *tctxt,
+ char in_anchored) {
+ struct hs_scratch *scratch = tctxtToScratch(tctxt);
+
+ if (roseCatchUpMpvFeeders(t, state, end, scratch, in_anchored)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ return roseHandleChainMatch(t, r, end, tctxt, in_anchored, 0);
+}
+
+static rose_inline
+hwlmcb_rv_t roseSomCatchup(const struct RoseEngine *t, u8 *state, u64a end,
+ struct RoseContext *tctxt, char in_anchored) {
+ struct hs_scratch *scratch = tctxtToScratch(tctxt);
+
+ // In SOM processing, we may be able to limit or entirely avoid catchup.
+
+ DEBUG_PRINTF("entry\n");
+
+ if (end == tctxt->minMatchOffset) {
+ DEBUG_PRINTF("already caught up\n");
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ DEBUG_PRINTF("catching up all NFAs\n");
+ if (roseCatchUpTo(t, state, end, scratch, in_anchored)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ updateMinMatchOffset(tctxt, end);
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static really_inline
+hwlmcb_rv_t roseHandleSom(const struct RoseEngine *t, u8 *state, ReportID id,
+ u64a end, struct RoseContext *tctxt,
+ char in_anchored) {
+ struct hs_scratch *scratch = tctxtToScratch(tctxt);
+
+ DEBUG_PRINTF("id=%u, end=%llu, minMatchOffset=%llu\n", id, end,
+ tctxt->minMatchOffset);
+
+ // Reach into reports and handle internal reports that just manipulate SOM
+ // slots ourselves, rather than going through the callback.
+
+ if (roseSomCatchup(t, state, end, tctxt, in_anchored)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ const struct internal_report *ri = getInternalReport(t, id);
+ handleSomInternal(scratch, ri, end);
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static rose_inline
+hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, u8 *state,
+ ReportID id, u64a start, u64a end,
+ struct RoseContext *tctxt, char in_anchored) {
+ if (roseCatchUpTo(t, state, end, tctxtToScratch(tctxt), in_anchored)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ DEBUG_PRINTF("firing som callback reportId=%u, start=%llu end=%llu\n", id,
+ start, end);
+ DEBUG_PRINTF(" last match %llu\n", tctxt->lastMatchOffset);
+ assert(end == tctxt->minMatchOffset);
+
+ updateLastMatchOffset(tctxt, end);
+ int cb_rv = tctxt->cb_som(start, end, id, tctxt->userCtx);
+ if (cb_rv == MO_HALT_MATCHING) {
+ DEBUG_PRINTF("termination requested\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
+ if (isAllExhausted(t, ci->exhaustionVector)) {
+ if (!ci->broken) {
+ ci->broken = BROKEN_EXHAUSTED;
+ }
+ tctxt->groups = 0;
+ DEBUG_PRINTF("termination requested\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static rose_inline
+hwlmcb_rv_t roseHandleSomSom(const struct RoseEngine *t, u8 *state, ReportID id,
+ u64a start, u64a end, struct RoseContext *tctxt,
+ char in_anchored) {
+ DEBUG_PRINTF("id=%u, start=%llu, end=%llu, minMatchOffset=%llu\n",
+ id, start, end, tctxt->minMatchOffset);
+
+ // Reach into reports and handle internal reports that just manipulate SOM
+ // slots ourselves, rather than going through the callback.
+
+ if (roseSomCatchup(t, state, end, tctxt, in_anchored)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ const struct internal_report *ri = getInternalReport(t, id);
+ setSomFromSomAware(tctxtToScratch(tctxt), ri, start, end);
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static really_inline
+int reachHasBit(const u8 *reach, u8 c) {
+ return !!(reach[c / 8U] & (u8)1U << (c % 8U));
+}
+
+/**
+ * \brief Scan around a literal, checking that that "lookaround" reach masks
+ * are satisfied.
+ */
+static rose_inline
+int roseCheckLookaround(const struct RoseEngine *t, u32 lookaroundIndex,
+ u32 lookaroundCount, u64a end,
+ struct RoseContext *tctxt) {
+ assert(lookaroundIndex != MO_INVALID_IDX);
+ assert(lookaroundCount > 0);
+
+ const struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
+ DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end,
+ ci->buf_offset, ci->buf_offset + ci->len);
+
+ const u8 *base = (const u8 *)t;
+ const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
+ const s8 *look = look_base + lookaroundIndex;
+ const s8 *look_end = look + lookaroundCount;
+ assert(look < look_end);
+
+ const u8 *reach_base = base + t->lookaroundReachOffset;
+ const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN;
+
+ // The following code assumes that the lookaround structures are ordered by
+ // increasing offset.
+
+ const s64a base_offset = end - ci->buf_offset;
+ DEBUG_PRINTF("base_offset=%lld\n", base_offset);
+ DEBUG_PRINTF("first look has offset %d\n", *look);
+
+ // If our first check tells us we need to look at an offset before the
+ // start of the stream, this role cannot match.
+ if (unlikely(*look < 0 && (u64a)(0 - *look) > end)) {
+ DEBUG_PRINTF("too early, fail\n");
+ return 0;
+ }
+
+ // Skip over offsets that are before the history buffer.
+ do {
+ s64a offset = base_offset + *look;
+ if (offset >= -(s64a)ci->hlen) {
+ goto in_history;
+ }
+ DEBUG_PRINTF("look=%d before history\n", *look);
+ look++;
+ reach += REACH_BITVECTOR_LEN;
+ } while (look < look_end);
+
+ // History buffer.
+ DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look);
+ for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) {
+ in_history:
+ ;
+ s64a offset = base_offset + *look;
+ DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset);
+
+ if (offset >= 0) {
+ DEBUG_PRINTF("in buffer\n");
+ goto in_buffer;
+ }
+
+ assert(offset >= -(s64a)ci->hlen && offset < 0);
+ u8 c = ci->hbuf[ci->hlen + offset];
+ if (!reachHasBit(reach, c)) {
+ DEBUG_PRINTF("char 0x%02x failed reach check\n", c);
+ return 0;
+ }
+ }
+ // Current buffer.
+ DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look);
+ for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) {
+ in_buffer:
+ ;
+ s64a offset = base_offset + *look;
+ DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset);
+
+ if (offset >= (s64a)ci->len) {
+ DEBUG_PRINTF("in the future\n");
+ break;
+ }
+
+ assert(offset >= 0 && offset < (s64a)ci->len);
+ u8 c = ci->buf[offset];
+ if (!reachHasBit(reach, c)) {
+ DEBUG_PRINTF("char 0x%02x failed reach check\n", c);
+ return 0;
+ }
+ }
+
+ DEBUG_PRINTF("OK :)\n");
+ return 1;
+}
+
+static
+int roseNfaEarliestSom(u64a from_offset, UNUSED u64a offset, UNUSED ReportID id,
+ void *context) {
+ u64a *som = context;
+ *som = MIN(*som, from_offset);
+ return MO_CONTINUE_MATCHING;
+}
+
+static rose_inline
+u64a roseGetHaigSom(const struct RoseEngine *t, const u32 qi,
+ UNUSED const u32 leftfixLag,
+ struct RoseContext *tctxt) {
+ u32 ri = queueToLeftIndex(t, qi);
+
+ UNUSED const struct LeftNfaInfo *left = getLeftTable(t) + ri;
+
+ DEBUG_PRINTF("testing %s prefix %u/%u with lag %u (maxLag=%u)\n",
+ left->transient ? "transient" : "active", ri, qi,
+ leftfixLag, left->maxLag);
+
+ assert(leftfixLag <= left->maxLag);
+
+ struct mq *q = tctxtToScratch(tctxt)->queues + qi;
+
+ u64a start = ~0ULL;
+
+ /* switch the callback + context for a fun one */
+ q->som_cb = roseNfaEarliestSom;
+ q->context = &start;
+
+ nfaReportCurrentMatches(q->nfa, q);
+
+ /* restore the old callback + context */
+ q->som_cb = roseNfaSomAdaptor;
+ q->context = NULL;
+ DEBUG_PRINTF("earliest som is %llu\n", start);
+ return start;
+}
+
+static rose_inline
+char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) {
+ assert(max_bound <= ROSE_BOUND_INF);
+ assert(min_bound <= max_bound);
+
+ if (end < min_bound) {
+ return 0;
+ }
+ return max_bound == ROSE_BOUND_INF || end <= max_bound;
+}
+
+
+#define PROGRAM_CASE(name) \
+ case ROSE_INSTR_##name: { \
+ DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_INSTR_##name); \
+ const struct ROSE_STRUCT_##name *ri = \
+ (const struct ROSE_STRUCT_##name *)pc;
+
+#define PROGRAM_NEXT_INSTRUCTION \
+ pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
+ break; \
+ }
+
+static really_inline
+hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
+ u64a end, struct RoseContext *tctxt,
+ char in_anchored, int *work_done) {
+ DEBUG_PRINTF("program begins at offset %u\n", programOffset);
+
+ assert(programOffset);
+ assert(programOffset < t->size);
+
+ const char *pc_base = getByOffset(t, programOffset);
+ const char *pc = pc_base;
+
+ u64a som = 0;
+
+ assert(*(const u8 *)pc != ROSE_INSTR_END);
+
+ for (;;) {
+ assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN));
+ u8 code = *(const u8 *)pc;
+ assert(code <= ROSE_INSTR_END);
+
+ switch ((enum RoseInstructionCode)code) {
+ PROGRAM_CASE(ANCHORED_DELAY) {
+ if (in_anchored && end > t->floatingMinLiteralMatchOffset) {
+ DEBUG_PRINTF("delay until playback\n");
+ update_depth(tctxt, ri->depth);
+ tctxt->groups |= ri->groups;
+ *work_done = 1;
+ assert(ri->done_jump); // must progress
+ pc += ri->done_jump;
+ continue;
+ }
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(CHECK_DEPTH) {
+ DEBUG_PRINTF("current depth %u, check min depth %u\n",
+ tctxt->depth, ri->min_depth);
+ if (ri->min_depth > tctxt->depth) {
+ DEBUG_PRINTF("failed depth check\n");
+ assert(ri->fail_jump); // must progress
+ pc += ri->fail_jump;
+ continue;
+ }
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(CHECK_ONLY_EOD) {
+ struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
+ if (end != ci->buf_offset + ci->len) {
+ DEBUG_PRINTF("should only match at end of data\n");
+ assert(ri->fail_jump); // must progress
+ pc += ri->fail_jump;
+ continue;
+ }
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(CHECK_BOUNDS) {
+ if (!in_anchored &&
+ !roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) {
+ DEBUG_PRINTF("failed root bounds check\n");
+ assert(ri->fail_jump); // must progress
+ pc += ri->fail_jump;
+ continue;
+ }
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(CHECK_NOT_HANDLED) {
+ struct fatbit *handled = tctxtToScratch(tctxt)->handled_roles;
+ if (fatbit_set(handled, t->handledKeyCount, ri->key)) {
+ DEBUG_PRINTF("key %u already set\n", ri->key);
+ assert(ri->fail_jump); // must progress
+ pc += ri->fail_jump;
+ continue;
+ }
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(CHECK_LOOKAROUND) {
+ if (!roseCheckLookaround(t, ri->index, ri->count, end, tctxt)) {
+ DEBUG_PRINTF("failed lookaround check\n");
+ assert(ri->fail_jump); // must progress
+ pc += ri->fail_jump;
+ continue;
+ }
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(CHECK_LEFTFIX) {
+ if (!roseTestLeftfix(t, ri->queue, ri->lag, ri->report, end,
+ tctxt)) {
+ DEBUG_PRINTF("failed lookaround check\n");
+ assert(ri->fail_jump); // must progress
+ pc += ri->fail_jump;
+ continue;
+ }
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(SOM_ADJUST) {
+ assert(ri->distance <= end);
+ som = end - ri->distance;
+ DEBUG_PRINTF("som is (end - %u) = %llu\n", ri->distance, som);
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(SOM_LEFTFIX) {
+ som = roseGetHaigSom(t, ri->queue, ri->lag, tctxt);
+ DEBUG_PRINTF("som from leftfix is %llu\n", som);
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(TRIGGER_INFIX) {
+ roseTriggerInfix(t, som, end, ri->queue, ri->event, ri->cancel,
+ tctxt);
+ *work_done = 1;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(TRIGGER_SUFFIX) {
+ if (roseHandleSuffixTrigger(t, ri->queue, ri->event, som, end,
+ tctxt, in_anchored) ==
+ HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ *work_done = 1;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(REPORT) {
+ if (roseHandleMatch(t, tctxt->state, ri->report, end, tctxt,
+ in_anchored) == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ *work_done = 1;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(REPORT_CHAIN) {
+ if (roseCatchUpAndHandleChainMatch(t, tctxt->state, ri->report,
+ end, tctxt, in_anchored) ==
+ HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ *work_done = 1;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(REPORT_EOD) {
+ if (tctxt->cb(end, ri->report, tctxt->userCtx) ==
+ MO_HALT_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ *work_done = 1;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(REPORT_SOM_INT) {
+ if (roseHandleSom(t, tctxt->state, ri->report, end, tctxt,
+ in_anchored) == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ *work_done = 1;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(REPORT_SOM) {
+ if (roseHandleSomSom(t, tctxt->state, ri->report, som, end,
+ tctxt,
+ in_anchored) == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ *work_done = 1;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(REPORT_SOM_KNOWN) {
+ if (roseHandleSomMatch(t, tctxt->state, ri->report, som, end,
+ tctxt, in_anchored) ==
+ HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ *work_done = 1;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(SET_STATE) {
+ roseSetRole(t, tctxt->state, tctxt, ri->index, ri->depth);
+ *work_done = 1;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(SET_GROUPS) {
+ tctxt->groups |= ri->groups;
+ DEBUG_PRINTF("set groups 0x%llx -> 0x%llx\n", ri->groups,
+ tctxt->groups);
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(SPARSE_ITER_BEGIN) {
+ DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset);
+ const struct mmbit_sparse_iter *it =
+ getByOffset(t, ri->iter_offset);
+ assert(ISALIGNED(it));
+
+ struct hs_scratch *scratch = tctxtToScratch(tctxt);
+ struct mmbit_sparse_state *s = scratch->sparse_iter_state;
+
+ u32 idx = 0;
+ u32 i = mmbit_sparse_iter_begin(getRoleState(tctxt->state),
+ t->rolesWithStateCount, &idx,
+ it, s);
+ if (i == MMB_INVALID) {
+ DEBUG_PRINTF("no states in sparse iter are on\n");
+ assert(ri->fail_jump); // must progress
+ pc += ri->fail_jump;
+ continue;
+ }
+
+ fatbit_clear(scratch->handled_roles);
+
+ const u32 *jumps = getByOffset(t, ri->jump_table);
+ DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx,
+ jumps[idx]);
+ pc = pc_base + jumps[idx];
+ continue;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(SPARSE_ITER_NEXT) {
+ DEBUG_PRINTF("iter_offset=%u, state=%u\n", ri->iter_offset,
+ ri->state);
+ const struct mmbit_sparse_iter *it =
+ getByOffset(t, ri->iter_offset);
+ assert(ISALIGNED(it));
+
+ struct hs_scratch *scratch = tctxtToScratch(tctxt);
+ struct mmbit_sparse_state *s = scratch->sparse_iter_state;
+
+ u32 idx = 0;
+ u32 i = mmbit_sparse_iter_next(getRoleState(tctxt->state),
+ t->rolesWithStateCount,
+ ri->state, &idx, it, s);
+ if (i == MMB_INVALID) {
+ DEBUG_PRINTF("no more states in sparse iter are on\n");
+ assert(ri->fail_jump); // must progress
+ pc += ri->fail_jump;
+ continue;
+ }
+
+ const u32 *jumps = getByOffset(t, ri->jump_table);
+ DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx,
+ jumps[idx]);
+ pc = pc_base + jumps[idx];
+ continue;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(END) {
+ DEBUG_PRINTF("finished\n");
+ return HWLM_CONTINUE_MATCHING;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+ }
+ }
+
+ assert(0); // unreachable
+ return HWLM_CONTINUE_MATCHING;
+}
+
+#undef PROGRAM_CASE
+#undef PROGRAM_NEXT_INSTRUCTION
+
+static rose_inline
+void roseSquashGroup(struct RoseContext *tctxt, const struct RoseLiteral *tl) {
+ assert(tl->squashesGroup);
+
+ // we should be squashing a single group
+ assert(popcount64(tl->groups) == 1);
+
+ DEBUG_PRINTF("apply squash mask 0x%016llx, groups 0x%016llx -> 0x%016llx\n",
+ ~tl->groups, tctxt->groups, tctxt->groups & ~tl->groups);
+
+ tctxt->groups &= ~tl->groups;
+}
+
+#endif // PROGRAM_RUNTIME_H
RoseCallback callback, RoseCallbackSom som_callback,
void *context);
+static really_inline
+int roseBlockHasEodWork(const struct RoseEngine *t,
+ struct hs_scratch *scratch) {
+ if (t->ematcherOffset) {
+ DEBUG_PRINTF("eod matcher to run\n");
+ return 1;
+ }
+
+ if (t->eodProgramOffset) {
+ DEBUG_PRINTF("has eod program\n");
+ return 1;
+ }
+
+ void *state = scratch->core_info.state;
+ if (mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
+ DEBUG_PRINTF("active outfix/suffix engines\n");
+ return 1;
+ }
+
+ if (t->eodIterOffset) {
+ u32 idx;
+ const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset);
+ struct mmbit_sparse_state *s = scratch->sparse_iter_state;
+ if (mmbit_sparse_iter_begin(getRoleState(state), t->rolesWithStateCount,
+ &idx, it, s) != MMB_INVALID) {
+ DEBUG_PRINTF("eod iter has states on\n");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
/* assumes core_info in scratch has been init to point to data */
static really_inline
void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch,
return;
}
- struct mmbit_sparse_state *s = scratch->sparse_iter_state;
- const u32 numStates = t->rolesWithStateCount;
- u8 *state = (u8 *)scratch->core_info.state;
- void *role_state = getRoleState(state);
- u32 idx = 0;
- const struct mmbit_sparse_iter *it
- = (const void *)((const u8 *)t + t->eodIterOffset);
-
- if (!t->ematcherOffset && !t->hasEodEventLiteral
- && !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)
- && (!t->eodIterOffset
- || mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s)
- == MMB_INVALID)) {
+ if (!roseBlockHasEodWork(t, scratch)) {
+ DEBUG_PRINTF("no eod work\n");
return;
}
const void *get() const {
switch (code()) {
+ case ROSE_INSTR_CHECK_DEPTH: return &u.checkDepth;
case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod;
case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds;
case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled;
case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown;
case ROSE_INSTR_SET_STATE: return &u.setState;
case ROSE_INSTR_SET_GROUPS: return &u.setGroups;
+ case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin;
+ case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext;
case ROSE_INSTR_END: return &u.end;
}
assert(0);
size_t length() const {
switch (code()) {
+ case ROSE_INSTR_CHECK_DEPTH: return sizeof(u.checkDepth);
case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod);
case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds);
case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled);
case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown);
case ROSE_INSTR_SET_STATE: return sizeof(u.setState);
case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups);
+ case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin);
+ case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext);
case ROSE_INSTR_END: return sizeof(u.end);
}
return 0;
}
union {
+ ROSE_STRUCT_CHECK_DEPTH checkDepth;
ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod;
ROSE_STRUCT_CHECK_BOUNDS checkBounds;
ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled;
ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown;
ROSE_STRUCT_SET_STATE setState;
ROSE_STRUCT_SET_GROUPS setGroups;
+ ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin;
+ ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext;
ROSE_STRUCT_END end;
} u;
};
*/
static
vector<RoseInstruction>
-flattenRoleProgram(const vector<vector<RoseInstruction>> &programs) {
+flattenProgram(const vector<vector<RoseInstruction>> &programs) {
vector<RoseInstruction> out;
vector<u32> offsets; // offset of each instruction (bytes)
assert(targets[i] > offsets[i]); // jumps always progress
ri.u.anchoredDelay.done_jump = targets[i] - offsets[i];
break;
+ case ROSE_INSTR_CHECK_DEPTH:
+ assert(targets[i] > offsets[i]);
+ ri.u.checkDepth.fail_jump = targets[i] - offsets[i];
+ break;
case ROSE_INSTR_CHECK_ONLY_EOD:
assert(targets[i] > offsets[i]);
ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i];
}
static
-u32 writeRoleProgram(build_context &bc, vector<RoseInstruction> &program) {
- DEBUG_PRINTF("writing %zu instructions\n", program.size());
+u32 writeProgram(build_context &bc, vector<RoseInstruction> &program) {
+ if (program.empty()) {
+ DEBUG_PRINTF("no program\n");
+ return 0;
+ }
+ DEBUG_PRINTF("writing %zu instructions\n", program.size());
u32 programOffset = 0;
for (const auto &ri : program) {
u32 offset =
return false;
}
-/* creates (and adds to rose) a sparse iterator visiting pred states/roles,
- * returns a pair:
- * - the offset of the itermap
- * - the offset for the sparse iterator.
- */
-static
-pair<u32, u32> addPredSparseIter(build_context &bc,
- const map<u32, u32> &predPrograms) {
- vector<u32> keys;
- vector<u32> programTable;
- for (const auto &elem : predPrograms) {
- keys.push_back(elem.first);
- programTable.push_back(elem.second);
- }
-
- vector<mmbit_sparse_iter> iter;
- mmbBuildSparseIterator(iter, keys, bc.numStates);
- assert(!iter.empty());
- DEBUG_PRINTF("iter size = %zu\n", iter.size());
-
- u32 iterOffset = addIteratorToTable(bc, iter);
- u32 programTableOffset =
- add_to_engine_blob(bc, begin(programTable), end(programTable));
- return make_pair(programTableOffset, iterOffset);
-}
-
static
void fillLookaroundTables(char *look_base, char *reach_base,
const vector<LookEntry> &look_vec) {
* literal entry */
const auto &lit_infos = getLiteralInfoByFinalId(tbi, final_id);
const rose_literal_info &arb_lit_info = **lit_infos.begin();
- const auto &vertices = arb_lit_info.vertices;
literalTable.push_back(RoseLiteral());
RoseLiteral &tl = literalTable.back();
assert(tl.groups || tbi.literals.right.at(literalId).table == ROSE_ANCHORED
|| tbi.literals.right.at(literalId).table == ROSE_EVENT);
- // Minimum depth based on this literal's roles.
- tl.minDepth = calcMinDepth(bc.depths, vertices);
-
- DEBUG_PRINTF("lit %u: role minDepth=%u\n", final_id, tl.minDepth);
-
// If this literal squashes its group behind it, store that data too
tl.squashesGroup = arb_lit_info.squash_group;
}
static
-vector<RoseInstruction> makeRoleProgram(RoseBuildImpl &build, build_context &bc,
- const RoseEdge &e) {
+vector<RoseInstruction> makeProgram(RoseBuildImpl &build, build_context &bc,
+ const RoseEdge &e) {
const RoseGraph &g = build.g;
auto v = target(e, g);
return program;
}
-static
-void findRootEdges(const RoseBuildImpl &build, RoseVertex src,
- map<u32, flat_set<RoseEdge>> &root_edges_map) {
- const auto &g = build.g;
- for (const auto &e : out_edges_range(src, g)) {
- const auto &v = target(e, g);
- if (build.hasDirectFinalId(v)) {
- continue; // Skip direct reports.
- }
- for (auto lit_id : g[v].literals) {
- assert(lit_id < build.literal_info.size());
- u32 final_id = build.literal_info.at(lit_id).final_id;
- if (final_id != MO_INVALID_IDX) {
- root_edges_map[final_id].insert(e);
- }
- }
- }
-}
-
-static
-void buildRootRolePrograms(RoseBuildImpl &build, build_context &bc,
- vector<RoseLiteral> &literalTable) {
- const auto &g = build.g;
-
- map<u32, flat_set<RoseEdge>> root_edges_map; // lit id -> root edges
- findRootEdges(build, build.root, root_edges_map);
- findRootEdges(build, build.anchored_root, root_edges_map);
-
- for (u32 id = 0; id < literalTable.size(); id++) {
- const auto &root_edges = root_edges_map[id];
- DEBUG_PRINTF("lit %u has %zu root edges\n", id, root_edges.size());
-
- // Sort edges by (source, target) vertex indices to ensure
- // deterministic program construction.
- vector<RoseEdge> ordered_edges(begin(root_edges), end(root_edges));
- sort(begin(ordered_edges), end(ordered_edges),
- [&g](const RoseEdge &a, const RoseEdge &b) {
- return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
- tie(g[source(b, g)].idx, g[target(b, g)].idx);
- });
-
- vector<vector<RoseInstruction>> root_prog;
- for (const auto &e : ordered_edges) {
- DEBUG_PRINTF("edge (%zu,%zu)\n", g[source(e, g)].idx,
- g[target(e, g)].idx);
- auto role_prog = makeRoleProgram(build, bc, e);
- if (role_prog.empty()) {
- continue;
- }
- root_prog.push_back(role_prog);
- }
-
- RoseLiteral &tl = literalTable[id];
- if (root_prog.empty()) {
- tl.rootProgramOffset = 0;
- continue;
- }
-
- auto final_program = flattenRoleProgram(root_prog);
- tl.rootProgramOffset = writeRoleProgram(bc, final_program);
- }
-}
-
static
void assignStateIndices(const RoseBuildImpl &build, build_context &bc) {
const auto &g = build.g;
}
static
-vector<RoseInstruction> makeSparseIterProgram(RoseBuildImpl &build,
- build_context &bc,
- const RoseEdge &e) {
+vector<RoseInstruction> makePredProgram(RoseBuildImpl &build, build_context &bc,
+ const RoseEdge &e) {
const RoseGraph &g = build.g;
const RoseVertex v = target(e, g);
- auto program = makeRoleProgram(build, bc, e);
+ auto program = makeProgram(build, bc, e);
if (hasGreaterInDegree(1, v, g)) {
// Only necessary when there is more than one pred.
return program;
}
+/**
+ * Returns the pair (program offset, sparse iter offset).
+ */
static
-void buildLitSparseIter(RoseBuildImpl &build, build_context &bc,
- vector<RoseVertex> &verts, RoseLiteral &tl) {
- const auto &g = build.g;
+pair<u32, u32> makeSparseIterProgram(build_context &bc,
+ map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
+ const vector<RoseVertex> &verts,
+ const vector<RoseInstruction> &root_program) {
+ vector<RoseInstruction> program;
+ u32 iter_offset = 0;
+
+ if (!predProgramLists.empty()) {
+ // First, add the iterator itself.
+ vector<u32> keys;
+ for (const auto &elem : predProgramLists) {
+ keys.push_back(elem.first);
+ }
+ DEBUG_PRINTF("%zu keys: %s\n", keys.size(),
+ as_string_list(keys).c_str());
+
+ vector<mmbit_sparse_iter> iter;
+ mmbBuildSparseIterator(iter, keys, bc.numStates);
+ assert(!iter.empty());
+ iter_offset = addIteratorToTable(bc, iter);
+
+ // Construct our program, starting with the SPARSE_ITER_BEGIN
+ // instruction, keeping track of the jump offset for each sub-program.
+ vector<u32> jump_table;
+ u32 curr_offset = 0;
+
+ // Add a pre-check for min depth, if it's useful.
+ if (!verts.empty()) {
+ u32 min_depth = calcMinDepth(bc.depths, verts);
+ if (min_depth > 1) {
+ auto ri = RoseInstruction(ROSE_INSTR_CHECK_DEPTH);
+ ri.u.checkDepth.min_depth = min_depth;
+ program.push_back(ri);
+ curr_offset = ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
+ }
+ }
- if (verts.empty()) {
- // This literal has no non-root roles => no sparse iter
- tl.iterOffset = ROSE_OFFSET_INVALID;
- tl.iterProgramOffset = 0;
- return;
- }
+ program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN));
+ curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
- // Deterministic ordering.
- sort(begin(verts), end(verts),
- [&g](RoseVertex a, RoseVertex b) { return g[a].idx < g[b].idx; });
+ for (const auto &e : predProgramLists) {
+ DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(),
+ curr_offset);
+ jump_table.push_back(curr_offset);
+ auto subprog = flattenProgram(e.second);
- // pred state id -> list of programs
- map<u32, vector<vector<RoseInstruction>>> predProgramLists;
+ if (e.first != keys.back()) {
+ // For all but the last subprogram, replace the END instruction
+ // with a SPARSE_ITER_NEXT.
+ assert(!subprog.empty());
+ assert(subprog.back().code() == ROSE_INSTR_END);
+ subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT);
+ }
- for (const auto &v : verts) {
- DEBUG_PRINTF("vertex %zu\n", g[v].idx);
- for (const auto &e : in_edges_range(v, g)) {
- const auto &u = source(e, g);
- if (build.isAnyStart(u)) {
- continue; // Root roles are not handled with sparse iterator.
+ for (const auto &ri : subprog) {
+ program.push_back(ri);
+ curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
}
+ }
- assert(contains(bc.roleStateIndices, u));
- u32 pred_state = bc.roleStateIndices.at(u);
+ const u32 end_offset = curr_offset - ROUNDUP_N(program.back().length(),
+ ROSE_INSTR_MIN_ALIGN);
- DEBUG_PRINTF("pred %zu (state %u)\n", g[u].idx, pred_state);
+ // Write the jump table into the bytecode.
+ const u32 jump_table_offset =
+ add_to_engine_blob(bc, begin(jump_table), end(jump_table));
- auto program = makeSparseIterProgram(build, bc, e);
- predProgramLists[pred_state].push_back(program);
+ // Fix up the instruction operands.
+ auto keys_it = begin(keys);
+ curr_offset = 0;
+ for (size_t i = 0; i < program.size(); i++) {
+ auto &ri = program[i];
+ switch (ri.code()) {
+ case ROSE_INSTR_CHECK_DEPTH:
+ ri.u.checkDepth.fail_jump = end_offset - curr_offset;
+ break;
+ case ROSE_INSTR_SPARSE_ITER_BEGIN:
+ ri.u.sparseIterBegin.iter_offset = iter_offset;
+ ri.u.sparseIterBegin.jump_table = jump_table_offset;
+ ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset;
+ break;
+ case ROSE_INSTR_SPARSE_ITER_NEXT:
+ ri.u.sparseIterNext.iter_offset = iter_offset;
+ ri.u.sparseIterNext.jump_table = jump_table_offset;
+ assert(keys_it != end(keys));
+ ri.u.sparseIterNext.state = *keys_it++;
+ ri.u.sparseIterNext.fail_jump = end_offset - curr_offset;
+ break;
+ default:
+ break;
+ }
+ curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
}
}
- map<u32, u32> predPrograms;
- for (const auto &e : predProgramLists) {
- auto program = flattenRoleProgram(e.second);
- u32 offset = writeRoleProgram(bc, program);
- predPrograms.emplace(e.first, offset);
+ // If we have a root program, replace the END instruction with it. Note
+ // that the root program has already been flattened.
+ if (!root_program.empty()) {
+ if (!program.empty()) {
+ assert(program.back().code() == ROSE_INSTR_END);
+ program.pop_back();
+ }
+ program.insert(end(program), begin(root_program), end(root_program));
}
- tie(tl.iterProgramOffset, tl.iterOffset) =
- addPredSparseIter(bc, predPrograms);
+ return {writeProgram(bc, program), iter_offset};
}
-// Build sparse iterators for literals.
static
-void buildSparseIter(RoseBuildImpl &build, build_context &bc,
- vector<RoseLiteral> &literalTable) {
- const RoseGraph &g = build.g;
+u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc,
+ const vector<RoseEdge> &lit_edges) {
+ const auto &g = build.g;
+
+ DEBUG_PRINTF("%zu lit edges\n", lit_edges.size());
+
+ // pred state id -> list of programs
+ map<u32, vector<vector<RoseInstruction>>> predProgramLists;
+ vector<RoseVertex> nonroot_verts;
+
+ // Construct sparse iter sub-programs.
+ for (const auto &e : lit_edges) {
+ const auto &u = source(e, g);
+ if (build.isAnyStart(u)) {
+ continue; // Root roles are not handled with sparse iterator.
+ }
+ DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].idx,
+ g[target(e, g)].idx);
+ assert(contains(bc.roleStateIndices, u));
+ u32 pred_state = bc.roleStateIndices.at(u);
+ auto program = makePredProgram(build, bc, e);
+ predProgramLists[pred_state].push_back(program);
+ nonroot_verts.push_back(target(e, g));
+ }
+
+ // Construct sub-program for handling root roles.
+ vector<vector<RoseInstruction>> root_programs;
+ for (const auto &e : lit_edges) {
+ const auto &u = source(e, g);
+ if (!build.isAnyStart(u)) {
+ continue;
+ }
+ DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].idx, g[target(e, g)].idx);
+ auto role_prog = makeProgram(build, bc, e);
+ if (role_prog.empty()) {
+ continue;
+ }
+ root_programs.push_back(role_prog);
+ }
+
+ vector<RoseInstruction> root_program;
+ if (!root_programs.empty()) {
+ root_program = flattenProgram(root_programs);
+ }
+
+ // Put it all together.
+ return makeSparseIterProgram(bc, predProgramLists, nonroot_verts,
+ root_program).first;
+}
+
+static
+map<u32, vector<RoseEdge>> findEdgesByLiteral(const RoseBuildImpl &build) {
+ // Use a set of edges while building the map to cull duplicates.
+ map<u32, flat_set<RoseEdge>> unique_lit_edge_map;
- // Find all our non-root roles.
- ue2::unordered_map<u32, vector<RoseVertex>> litNonRootVertices;
- for (const auto &v : vertices_range(g)) {
- if (build.isRootSuccessor(v)) {
+ const auto &g = build.g;
+ for (const auto &e : edges_range(g)) {
+ const auto &v = target(e, g);
+ if (build.hasDirectFinalId(v)) {
+ // Skip direct reports, which do not have RoseLiteral entries.
continue;
}
for (const auto &lit_id : g[v].literals) {
+ assert(lit_id < build.literal_info.size());
u32 final_id = build.literal_info.at(lit_id).final_id;
- litNonRootVertices[final_id].push_back(v);
+ if (final_id != MO_INVALID_IDX) {
+ unique_lit_edge_map[final_id].insert(e);
+ }
}
}
+ // Build output map, sorting edges by (source, target) vertex index.
+ map<u32, vector<RoseEdge>> lit_edge_map;
+ for (const auto &m : unique_lit_edge_map) {
+ auto edge_list = vector<RoseEdge>(begin(m.second), end(m.second));
+ sort(begin(edge_list), end(edge_list),
+ [&g](const RoseEdge &a, const RoseEdge &b) {
+ return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
+ tie(g[source(b, g)].idx, g[target(b, g)].idx);
+ });
+ lit_edge_map.emplace(m.first, edge_list);
+ }
+
+ return lit_edge_map;
+}
+
+/** \brief Build the interpreter program for each literal. */
+static
+void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
+ vector<RoseLiteral> &literalTable) {
+ auto lit_edge_map = findEdgesByLiteral(build);
+
for (u32 finalId = 0; finalId != literalTable.size(); ++finalId) {
- buildLitSparseIter(build, bc, litNonRootVertices[finalId],
- literalTable[finalId]);
+ const auto &lit_edges = lit_edge_map[finalId];
+ u32 offset = buildLiteralProgram(build, bc, lit_edges);
+ literalTable[finalId].programOffset = offset;
}
}
return program;
}
-/* returns a pair containing the iter map offset and iter offset */
+/**
+ * Returns the pair (program offset, sparse iter offset).
+ */
static
-pair<u32, u32> buildEodAnchorRoles(RoseBuildImpl &build, build_context &bc) {
+pair<u32, u32> buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) {
const RoseGraph &g = build.g;
// pred state id -> list of programs
return {0, 0};
}
- map<u32, u32> predPrograms;
- for (const auto &e : predProgramLists) {
- DEBUG_PRINTF("pred %u has %zu programs\n", e.first, e.second.size());
- auto program = flattenRoleProgram(e.second);
- u32 offset = writeRoleProgram(bc, program);
- predPrograms.emplace(e.first, offset);
+ return makeSparseIterProgram(bc, predProgramLists, {}, {});
+}
+
+static
+u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) {
+ if (build.eod_event_literal_id == MO_INVALID_IDX) {
+ return 0;
}
- return addPredSparseIter(bc, predPrograms);
+ const RoseGraph &g = build.g;
+ const auto &lit_info = build.literal_info.at(build.eod_event_literal_id);
+ assert(lit_info.delayed_ids.empty());
+ assert(!lit_info.squash_group);
+ assert(!lit_info.requires_benefits);
+
+ // Collect all edges leading into EOD event literal vertices.
+ vector<RoseEdge> edge_list;
+ for (const auto &v : lit_info.vertices) {
+ insert(&edge_list, edge_list.end(), in_edges(v, g));
+ }
+
+ // Sort edge list for determinism, prettiness.
+ sort(begin(edge_list), end(edge_list),
+ [&g](const RoseEdge &a, const RoseEdge &b) {
+ return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
+ tie(g[source(b, g)].idx, g[target(b, g)].idx);
+ });
+
+ return buildLiteralProgram(build, bc, edge_list);
}
static
vector<RoseLiteral> literalTable;
buildLiteralTable(*this, bc, literalTable);
- buildSparseIter(*this, bc, literalTable);
+ buildLiteralPrograms(*this, bc, literalTable);
+ u32 eodProgramOffset = writeEodProgram(*this, bc);
+ u32 eodIterProgramOffset;
u32 eodIterOffset;
- u32 eodProgramTableOffset;
- tie(eodProgramTableOffset, eodIterOffset) = buildEodAnchorRoles(*this, bc);
+ tie(eodIterProgramOffset, eodIterOffset) = buildEodAnchorProgram(*this, bc);
vector<mmbit_sparse_iter> activeLeftIter;
buildActiveLeftIter(leftInfoTable, activeLeftIter);
throw ResourceLimitError();
}
- // Write root programs for literals into the engine blob.
- buildRootRolePrograms(*this, bc, literalTable);
-
u32 amatcherOffset = 0;
u32 fmatcherOffset = 0;
u32 ematcherOffset = 0;
= anchoredReportInverseMapOffset;
engine->multidirectOffset = multidirectOffset;
+ engine->eodProgramOffset = eodProgramOffset;
+ engine->eodIterProgramOffset = eodIterProgramOffset;
engine->eodIterOffset = eodIterOffset;
- engine->eodProgramTableOffset = eodProgramTableOffset;
engine->lastByteHistoryIterOffset = lastByteOffset;
write_out(&engine->state_init, (char *)engine.get(), state_scatter,
state_scatter_aux_offset);
- if (eod_event_literal_id != MO_INVALID_IDX) {
- engine->hasEodEventLiteral = 1;
- DEBUG_PRINTF("eod literal id=%u, final_id=%u\n", eod_event_literal_id,
- literal_info.at(eod_event_literal_id).final_id);
- engine->eodLiteralId = literal_info.at(eod_event_literal_id).final_id;
- }
-
if (anchoredIsMulti(*engine)) {
DEBUG_PRINTF("multiple anchored dfas\n");
engine->maxSafeAnchoredDROffset = 1;
continue;
}
+ // The special EOD event literal has its own program and does not need
+ // a real literal ID.
+ if (i == tbi.eod_event_literal_id) {
+ assert(tbi.eod_event_literal_id != MO_INVALID_IDX);
+ continue;
+ }
+
const rose_literal_info &info = tbi.literal_info[i];
if (info.requires_benefits) {
assert(!tbi.isDelayed(i));
#include "rose_build_impl.h"
#include "rose/rose_dump.h"
#include "rose_internal.h"
-#include "rose_program.h"
#include "ue2common.h"
#include "nfa/nfa_internal.h"
#include "nfagraph/ng_dump.h"
return count_if(tl, tl_end, pred);
}
-static
-size_t literalsWithDepth(const RoseEngine *t, u8 depth) {
- return literalsWithPredicate(
- t, [&depth](const RoseLiteral &l) { return l.minDepth == depth; });
-}
-
static
size_t literalsInGroups(const RoseEngine *t, u32 from, u32 to) {
rose_group mask = ~((1ULL << from) - 1);
}
static
-void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) {
+void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
const char *pc_base = pc;
for (;;) {
u8 code = *(const u8 *)pc;
}
PROGRAM_NEXT_INSTRUCTION
+ PROGRAM_CASE(CHECK_DEPTH) {
+ os << " min_depth " << u32{ri->min_depth} << endl;
+ os << " fail_jump +" << ri->fail_jump << endl;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
PROGRAM_CASE(CHECK_ONLY_EOD) {
os << " fail_jump +" << ri->fail_jump << endl;
}
}
PROGRAM_NEXT_INSTRUCTION
+ PROGRAM_CASE(SPARSE_ITER_BEGIN) {
+ os << " iter_offset " << ri->iter_offset << endl;
+ os << " jump_table " << ri->jump_table << endl;
+ os << " fail_jump +" << ri->fail_jump << endl;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(SPARSE_ITER_NEXT) {
+ os << " iter_offset " << ri->iter_offset << endl;
+ os << " jump_table " << ri->jump_table << endl;
+ os << " state " << ri->state << endl;
+ os << " fail_jump +" << ri->fail_jump << endl;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
PROGRAM_CASE(END) { return; }
PROGRAM_NEXT_INSTRUCTION
#undef PROGRAM_CASE
#undef PROGRAM_NEXT_INSTRUCTION
-static
-void dumpSparseIterPrograms(ofstream &os, const RoseEngine *t, u32 iterOffset,
- u32 programTableOffset) {
- const auto *it =
- (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, iterOffset);
- const u32 *programTable =
- (const u32 *)loadFromByteCodeOffset(t, programTableOffset);
-
- // Construct a full multibit.
- const u32 total_bits = t->rolesWithStateCount;
- const vector<u8> bits(mmbit_size(total_bits), u8{0xff});
-
- struct mmbit_sparse_state s[MAX_SPARSE_ITER_STATES];
- u32 idx = 0;
- for (u32 i = mmbit_sparse_iter_begin(bits.data(), total_bits, &idx, it, s);
- i != MMB_INVALID;
- i = mmbit_sparse_iter_next(bits.data(), total_bits, i, &idx, it, s)) {
- u32 programOffset = programTable[idx];
- os << "Sparse Iter Program " << idx << " triggered by state " << i
- << " @ " << programOffset << ":" << endl;
- dumpRoleProgram(os, t, (const char *)t + programOffset);
- }
-}
-
static
void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) {
ofstream os(filename);
os << "Literal " << i << endl;
os << "---------------" << endl;
- if (lit->rootProgramOffset) {
- os << "Root Program @ " << lit->rootProgramOffset << ":" << endl;
- dumpRoleProgram(os, t, base + lit->rootProgramOffset);
- } else {
- os << "<No Root Program>" << endl;
- }
-
- if (lit->iterOffset != ROSE_OFFSET_INVALID) {
- dumpSparseIterPrograms(os, t, lit->iterOffset,
- lit->iterProgramOffset);
+ if (lit->programOffset) {
+ os << "Program @ " << lit->programOffset << ":" << endl;
+ dumpProgram(os, t, base + lit->programOffset);
} else {
- os << "<No Sparse Iter Programs>" << endl;
+ os << "<No Program>" << endl;
}
os << endl;
static
void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) {
ofstream os(filename);
+ const char *base = (const char *)t;
+
+ os << "Unconditional EOD Program:" << endl;
- if (t->eodIterOffset) {
- dumpSparseIterPrograms(os, t, t->eodIterOffset,
- t->eodProgramTableOffset);
+ if (t->eodProgramOffset) {
+ dumpProgram(os, t, base + t->eodProgramOffset);
+ os << endl;
+ } else {
+ os << "<No EOD Program>" << endl;
+ }
+
+ os << "Sparse Iter EOD Program:" << endl;
+
+ if (t->eodIterProgramOffset) {
+ dumpProgram(os, t, base + t->eodIterProgramOffset);
} else {
- os << "<No EOD Iter Programs>" << endl;
+ os << "<No EOD Iter Program>" << endl;
}
os.close();
literalsWithPredicate(
t, [](const RoseLiteral &l) { return l.squashesGroup != 0; }));
fprintf(f, " - with benefits : %u\n", t->nonbenefits_base_id);
- fprintf(f, " - with root program : %zu\n",
- literalsWithPredicate(t, [](const RoseLiteral &l) {
- return l.rootProgramOffset != 0;
- }));
- fprintf(f, " - with sparse iter : %zu\n",
- literalsWithPredicate(t, [](const RoseLiteral &l) {
- return l.iterOffset != ROSE_OFFSET_INVALID;
- }));
+ fprintf(f, " - with program : %zu\n",
+ literalsWithPredicate(
+ t, [](const RoseLiteral &l) { return l.programOffset != 0; }));
fprintf(f, " - in groups ::\n");
fprintf(f, " + weak : %zu\n",
literalsInGroups(t, 0, t->group_weak_end));
fprintf(f, " + general : %zu\n",
literalsInGroups(t, t->group_weak_end, sizeof(u64a) * 8));
- u32 depth1 = literalsWithDepth(t, 1);
- u32 depth2 = literalsWithDepth(t, 2);
- u32 depth3 = literalsWithDepth(t, 3);
- u32 depth4 = literalsWithDepth(t, 4);
- u32 depthN = t->literalCount - (depth1 + depth2 + depth3 + depth4);
-
- fprintf(f, "\nLiteral depths:\n");
- fprintf(f, " minimum depth 1 : %u\n", depth1);
- fprintf(f, " minimum depth 2 : %u\n", depth2);
- fprintf(f, " minimum depth 3 : %u\n", depth3);
- fprintf(f, " minimum depth 4 : %u\n", depth4);
- fprintf(f, " minimum depth >4 : %u\n", depthN);
-
fprintf(f, "\n");
fprintf(f, " minWidth : %u\n", t->minWidth);
fprintf(f, " minWidthExcludingBoundaries : %u\n",
DUMP_U8(t, hasFloatingDirectReports);
DUMP_U8(t, noFloatingRoots);
DUMP_U8(t, requiresEodCheck);
- DUMP_U8(t, hasEodEventLiteral);
DUMP_U8(t, hasOutfixesInSmallBlock);
DUMP_U8(t, runtimeImpl);
DUMP_U8(t, mpvTriggeredByLeaf);
DUMP_U32(t, roseCount);
DUMP_U32(t, lookaroundTableOffset);
DUMP_U32(t, lookaroundReachOffset);
+ DUMP_U32(t, eodProgramOffset);
+ DUMP_U32(t, eodIterProgramOffset);
DUMP_U32(t, eodIterOffset);
- DUMP_U32(t, eodProgramTableOffset);
DUMP_U32(t, lastByteHistoryIterOffset);
DUMP_U32(t, minWidth);
DUMP_U32(t, minWidthExcludingBoundaries);
DUMP_U32(t, somRevOffsetOffset);
DUMP_U32(t, group_weak_end);
DUMP_U32(t, floatingStreamState);
- DUMP_U32(t, eodLiteralId);
fprintf(f, "}\n");
fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine));
}
/** \brief Structure representing a literal. */
struct RoseLiteral {
/**
- * \brief Role program to run unconditionally when this literal is seen.
+ * \brief Program to run when this literal is seen.
*
* Offset is relative to RoseEngine, or zero for no program.
*/
- u32 rootProgramOffset;
-
- /**
- * \brief Offset of sparse iterator (mmbit_sparse_iter pointer) over
- * predecessor states.
- *
- * Offset is relative to RoseEngine, set to ROSE_OFFSET_INVALID for no
- * iterator.
- */
- u32 iterOffset;
-
- /**
- * \brief Table of role programs to run when triggered by the sparse
- * iterator, indexed by dense sparse iter index.
- *
- * Offset is relative to RoseEngine, zero for no programs.
- */
- u32 iterProgramOffset;
+ u32 programOffset;
/** \brief Bitset of groups that cause this literal to fire. */
rose_group groups;
- /**
- * \brief The minimum depth of this literal in the Rose graph (for depths
- * greater than 1).
- */
- u8 minDepth;
-
/**
* \brief True if this literal switches off its group behind it when it
* sets a role.
u8 noFloatingRoots; /* only need to run the anchored table if something
* matched in the anchored table */
u8 requiresEodCheck; /* stuff happens at eod time */
- u8 hasEodEventLiteral; // fires a ROSE_EVENT literal at eod time.
u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even
in small block scans. */
u8 runtimeImpl; /**< can we just run the floating table or a single outfix?
u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32
* bytes each) */
- u32 eodIterOffset; // or 0 if no eod iterator
- u32 eodProgramTableOffset;
+ u32 eodProgramOffset; //!< Unconditional EOD program, otherwise 0.
+ u32 eodIterProgramOffset; // or 0 if no eod iterator program
+ u32 eodIterOffset; // offset to EOD sparse iter or 0 if none
u32 lastByteHistoryIterOffset; // if non-zero
u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */
u32 group_weak_end; /* end of weak groups, debugging only */
u32 floatingStreamState; // size in bytes
- u32 eodLiteralId; // literal ID for eod ROSE_EVENT if used, otherwise 0.
struct scatter_full_plan state_init;
};
/** \brief Role program instruction opcodes. */
enum RoseInstructionCode {
ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher.
+ ROSE_INSTR_CHECK_DEPTH, //!< Check minimum graph depth.
ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD.
ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0.
ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled".
ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset.
ROSE_INSTR_SET_STATE, //!< Switch a state index on.
ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits.
+ ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states.
+ ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states.
ROSE_INSTR_END //!< End of program.
};
struct ROSE_STRUCT_ANCHORED_DELAY {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
u8 depth; //!< Depth for this state.
rose_group groups; //!< Bitmask.
u32 done_jump; //!< Jump forward this many bytes if successful.
};
+struct ROSE_STRUCT_CHECK_DEPTH {
+ u8 code; //!< From enum RoseInstructionCode.
+ u8 min_depth; //!< Minimum depth of this literal in the Rose graph.
+ u32 fail_jump; //!< Jump forward this many bytes on failure.
+};
+
struct ROSE_STRUCT_CHECK_ONLY_EOD {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
u32 fail_jump; //!< Jump forward this many bytes on failure.
};
struct ROSE_STRUCT_CHECK_BOUNDS {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
u32 min_bound; //!< Min distance from zero.
u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF).
u32 fail_jump; //!< Jump forward this many bytes on failure.
};
struct ROSE_STRUCT_CHECK_NOT_HANDLED {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
u32 key; //!< Key in the "handled_roles" fatbit in scratch.
u32 fail_jump; //!< Jump forward this many bytes if we have seen key before.
};
struct ROSE_STRUCT_CHECK_LOOKAROUND {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
u32 index;
u32 count;
u32 fail_jump; //!< Jump forward this many bytes on failure.
};
struct ROSE_STRUCT_CHECK_LEFTFIX {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
u32 queue; //!< Queue of leftfix to check.
u32 lag; //!< Lag of leftfix for this case.
ReportID report; //!< ReportID of leftfix to check.
};
struct ROSE_STRUCT_SOM_ADJUST {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
u32 distance; //!< Distance to EOM.
};
struct ROSE_STRUCT_SOM_LEFTFIX {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
u32 queue; //!< Queue index of leftfix providing SOM.
u32 lag; //!< Lag of leftfix for this case.
};
struct ROSE_STRUCT_TRIGGER_INFIX {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
u8 cancel; //!< Cancels previous top event.
u32 queue; //!< Queue index of infix.
u32 event; //!< Queue event, from MQE_*.
};
struct ROSE_STRUCT_TRIGGER_SUFFIX {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
u32 queue; //!< Queue index of suffix.
u32 event; //!< Queue event, from MQE_*.
};
struct ROSE_STRUCT_REPORT {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
ReportID report;
};
struct ROSE_STRUCT_REPORT_CHAIN {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
ReportID report;
};
struct ROSE_STRUCT_REPORT_EOD {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
ReportID report;
};
struct ROSE_STRUCT_REPORT_SOM_INT {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
ReportID report;
};
struct ROSE_STRUCT_REPORT_SOM {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
ReportID report;
};
struct ROSE_STRUCT_REPORT_SOM_KNOWN {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
ReportID report;
};
struct ROSE_STRUCT_SET_STATE {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
u8 depth; //!< Depth for this state.
u32 index; //!< State index in multibit.
};
struct ROSE_STRUCT_SET_GROUPS {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
rose_group groups; //!< Bitmask.
};
+/**
+ * Note that the offsets in the jump table are always relative to the start of
+ * the program, not the current instruction.
+ */
+struct ROSE_STRUCT_SPARSE_ITER_BEGIN {
+ u8 code; //!< From enum RoseInstructionCode.
+ u32 iter_offset; //!< Offset of mmbit_sparse_iter structure.
+ u32 jump_table; //!< Offset of jump table indexed by sparse iterator.
+ u32 fail_jump; //!< Jump forward this many bytes on failure.
+};
+
+/**
+ * Note that the offsets in the jump table are always relative to the start of
+ * the program, not the current instruction.
+ */
+struct ROSE_STRUCT_SPARSE_ITER_NEXT {
+ u8 code; //!< From enum RoseInstructionCode.
+ u32 iter_offset; //!< Offset of mmbit_sparse_iter structure.
+ u32 jump_table; //!< Offset of jump table indexed by sparse iterator.
+ u32 state; // Current state index.
+ u32 fail_jump; //!< Jump forward this many bytes on failure.
+};
+
struct ROSE_STRUCT_END {
- u8 code; //!< From enum RoseRoleInstructionCode.
+ u8 code; //!< From enum RoseInstructionCode.
};
#endif // ROSE_ROSE_PROGRAM_H