]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
Rose: Move all literal operations into program
authorJustin Viiret <justin.viiret@intel.com>
Fri, 18 Dec 2015 04:24:52 +0000 (15:24 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Tue, 1 Mar 2016 00:23:56 +0000 (11:23 +1100)
Replace the RoseLiteral structure with more program instructions; now,
instead of each literal ID leading to a RoseLiteral, it simply has a
program to run (and a delay rebuild program).

This commit also makes some other improvements:

 * CHECK_STATE instruction, for use instead of a sparse iterator over a
   single element.
 * Elide some checks (CHECK_LIT_EARLY, ANCHORED_DELAY, etc) when not
   needed.
 * Flatten PUSH_DELAYED behaviour to one instruction per delayed
   literal, rather than the mask/index-list approach used before.
 * Simple program cache at compile time for deduplication.

src/rose/eod.c
src/rose/match.c
src/rose/program_runtime.h
src/rose/rose_build_bytecode.cpp
src/rose/rose_build_compile.cpp
src/rose/rose_build_impl.h
src/rose/rose_build_misc.cpp
src/rose/rose_build_util.h
src/rose/rose_dump.cpp
src/rose/rose_internal.h
src/rose/rose_program.h

index ef9873882bef97a6c7c6758f1924b1fbd4c5e591..b95a952e4cf8db10c104f9d17658bf1c0e65cb10 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -114,9 +114,9 @@ int roseEodRunIterator(const struct RoseEngine *t, u64a offset,
 
     DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset);
 
-    int work_done = 0;
-    if (roseRunProgram(t, t->eodIterProgramOffset, offset, &(scratch->tctxt), 0,
-                       &work_done) == HWLM_TERMINATE_MATCHING) {
+    const size_t match_len = 0;
+    if (roseRunProgram(t, t->eodIterProgramOffset, offset, match_len,
+                       &(scratch->tctxt), 0) == HWLM_TERMINATE_MATCHING) {
         return MO_HALT_MATCHING;
     }
 
@@ -233,9 +233,9 @@ int roseRunEodProgram(const struct RoseEngine *t, u64a offset,
     // There should be no pending delayed literals.
     assert(!scratch->tctxt.filledDelayedSlots);
 
-    int work_done = 0;
-    if (roseRunProgram(t, t->eodProgramOffset, offset, &scratch->tctxt, 0,
-                       &work_done) == HWLM_TERMINATE_MATCHING) {
+    const size_t match_len = 0;
+    if (roseRunProgram(t, t->eodProgramOffset, offset, match_len,
+                       &scratch->tctxt, 0) == HWLM_TERMINATE_MATCHING) {
         return MO_HALT_MATCHING;
     }
 
index 89f0674e773728b093da3bd19f6d31171ff87fb4..72f2a167c3cf35644ba81deb7ecbb38e03bf8c7f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -71,123 +71,6 @@ void printMatch(const struct core_info *ci, u64a start, u64a end) {
 }
 #endif
 
-static rose_inline
-int roseCheckBenefits(struct RoseContext *tctxt, u64a end, u32 mask_rewind,
-                      const u8 *and_mask, const u8 *exp_mask) {
-    DEBUG_PRINTF("am offset = %zu, em offset = %zu\n",
-                 and_mask - (const u8 *)tctxt->t,
-                 exp_mask - (const u8 *)tctxt->t);
-    const u8 *data;
-
-    // If the check works over part of the history and part of the buffer, we
-    // create a temporary copy of the data in here so it's contiguous.
-    u8 temp[MAX_MASK2_WIDTH];
-
-    struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
-    s64a buffer_offset = (s64a)end - ci->buf_offset;
-    DEBUG_PRINTF("rel offset %lld\n", buffer_offset);
-    if (buffer_offset >= mask_rewind) {
-        data = ci->buf + buffer_offset - mask_rewind;
-        DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data,
-                     ci->buf, mask_rewind);
-    } else if (buffer_offset <= 0) {
-        data = ci->hbuf + ci->hlen + buffer_offset - mask_rewind;
-        DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data,
-                     ci->buf, mask_rewind);
-    } else {
-        u32 shortfall = mask_rewind - buffer_offset;
-        DEBUG_PRINTF("shortfall of %u, rewind %u hlen %zu\n", shortfall,
-                     mask_rewind, ci->hlen);
-        data = temp;
-        memcpy(temp, ci->hbuf + ci->hlen - shortfall, shortfall);
-        memcpy(temp + shortfall, ci->buf, mask_rewind - shortfall);
-    }
-
-#ifdef DEBUG
-    DEBUG_PRINTF("DATA: ");
-    for (u32 i = 0; i < mask_rewind; i++) {
-        printf("%c", ourisprint(data[i]) ? data[i] : '?');
-    }
-    printf(" (len=%u)\n", mask_rewind);
-#endif
-
-    u32 len = mask_rewind;
-    while (len >= sizeof(u64a)) {
-        u64a a = unaligned_load_u64a(data);
-        a &= *(const u64a *)and_mask;
-        if (a != *(const u64a *)exp_mask) {
-            DEBUG_PRINTF("argh %016llx %016llx\n", a, *(const u64a *)exp_mask);
-            return 0;
-        }
-        data += sizeof(u64a);
-        and_mask += sizeof(u64a);
-        exp_mask += sizeof(u64a);
-        len -= sizeof(u64a);
-    }
-
-    while (len) {
-        u8 a = *data;
-        a &= *and_mask;
-        if (a != *exp_mask) {
-            DEBUG_PRINTF("argh d%02hhx =%02hhx am%02hhx  em%02hhx\n", a,
-                          *data, *and_mask, *exp_mask);
-            return 0;
-        }
-        data++;
-        and_mask++;
-        exp_mask++;
-        len--;
-    }
-
-    return 1;
-}
-
-static
-int roseCheckLiteralBenefits(u64a end, size_t mask_rewind, u32 id,
-                             struct RoseContext *tctxt) {
-    const struct RoseEngine *t = tctxt->t;
-    const struct lit_benefits *lbi = getLiteralBenefitsTable(t) + id;
-    return roseCheckBenefits(tctxt, end, mask_rewind, lbi->and_mask.a8,
-                             lbi->expected.e8);
-}
-
-static rose_inline
-void pushDelayedMatches(const struct RoseLiteral *tl, u64a offset,
-                        struct RoseContext *tctxt) {
-    u32 delay_mask = tl->delay_mask;
-    if (!delay_mask) {
-        return;
-    }
-
-    u32 delay_count = tctxt->t->delay_count;
-    u8 *delaySlotBase = getDelaySlots(tctxtToScratch(tctxt));
-    size_t delaySlotSize = tctxt->t->delay_slot_size;
-    assert(tl->delayIdsOffset != ROSE_OFFSET_INVALID);
-    const u32 *delayIds = getByOffset(tctxt->t, tl->delayIdsOffset);
-    assert(ISALIGNED(delayIds));
-
-    while (delay_mask) {
-        u32 src_slot_index = findAndClearLSB_32(&delay_mask);
-        u32 slot_index = (src_slot_index + offset) & DELAY_MASK;
-        u8 *slot = delaySlotBase + delaySlotSize * slot_index;
-
-        if (offset + src_slot_index <= tctxt->delayLastEndOffset) {
-            DEBUG_PRINTF("skip too late\n");
-            goto next;
-        }
-
-        DEBUG_PRINTF("pushing tab %u into slot %u\n", *delayIds, slot_index);
-        if (!(tctxt->filledDelayedSlots & (1U << slot_index))) {
-            tctxt->filledDelayedSlots |= 1U << slot_index;
-            mmbit_clear(slot, delay_count);
-        }
-
-        mmbit_set(slot, delay_count, *delayIds);
-    next:
-        delayIds++;
-    }
-}
-
 hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id,
                                      void *ctx) {
     struct hs_scratch *scratch = ctx;
@@ -211,17 +94,17 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id,
         return tctx->groups;
     }
 
-    if (id < t->nonbenefits_base_id
-        && !roseCheckLiteralBenefits(real_end, end - start + 1, id, tctx)) {
-        return tctx->groups;
-    }
-
     assert(id < t->literalCount);
-    const struct RoseLiteral *tl = &getLiteralTable(t)[id];
-
-    DEBUG_PRINTF("literal id=%u, groups=0x%016llx\n", id, tl->groups);
+    const u32 *delayRebuildPrograms =
+        getByOffset(t, t->litDelayRebuildProgramOffset);
+    const u32 programOffset = delayRebuildPrograms[id];
 
-    pushDelayedMatches(tl, real_end, tctx);
+    if (programOffset) {
+        const size_t match_len = end - start + 1;
+        UNUSED hwlmcb_rv_t rv =
+            roseRunProgram(t, programOffset, real_end, match_len, tctx, 0);
+        assert(rv != HWLM_TERMINATE_MATCHING);
+    }
 
     /* we are just repopulating the delay queue, groups should be
      * already set from the original scan. */
@@ -465,31 +348,28 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) {
     }
 
     assert(id < t->literalCount);
-    const struct RoseLiteral *tl = &getLiteralTable(t)[id];
-    assert(tl->programOffset);
-    assert(!tl->delay_mask);
+    const u32 *programs = getByOffset(t, t->litProgramOffset);
+    const u32 programOffset = programs[id];
+    assert(programOffset);
+
+    // Anchored literals are never delayed.
+    assert(!((const u32 *)getByOffset(t, t->litDelayRebuildProgramOffset))[id]);
 
-    DEBUG_PRINTF("literal id=%u, groups=0x%016llx\n", id, tl->groups);
+    DEBUG_PRINTF("literal id=%u\n", id);
 
     if (real_end <= t->floatingMinLiteralMatchOffset) {
         roseFlushLastByteHistory(t, state, real_end, tctxt);
         tctxt->lastEndOffset = real_end;
     }
 
-    int work_done = 0;
-    if (roseRunProgram(t, tl->programOffset, real_end, tctxt, 1, &work_done) ==
+    const size_t match_len = 0;
+    if (roseRunProgram(t, programOffset, real_end, match_len, tctxt, 1) ==
         HWLM_TERMINATE_MATCHING) {
         assert(can_stop_matching(tctxtToScratch(tctxt)));
         DEBUG_PRINTF("caller requested termination\n");
         return MO_HALT_MATCHING;
     }
 
-    // If we've actually handled any roles, we might need to apply this
-    // literal's squash mask to our groups as well.
-    if (work_done && tl->squashesGroup) {
-        roseSquashGroup(tctxt, tl);
-    }
-
     DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups);
 
     if (real_end > t->floatingMinLiteralMatchOffset) {
@@ -502,9 +382,10 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) {
 // Rose match-processing workhorse
 /* assumes not in_anchored */
 static really_inline
-hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id,
-                               struct RoseContext *tctxt, char do_group_check,
-                               char in_delay_play, char in_anch_playback) {
+hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end,
+                               size_t match_len, u32 id,
+                               struct RoseContext *tctxt, char in_delay_play,
+                               char in_anch_playback) {
     /* assert(!tctxt->in_anchored); */
     u8 *state = tctxt->state;
 
@@ -536,63 +417,30 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id,
     }
 
     assert(id < t->literalCount);
-    const struct RoseLiteral *tl = &getLiteralTable(t)[id];
-    DEBUG_PRINTF("lit id=%u, groups=0x%016llx\n", id, tl->groups);
-
-    if (do_group_check && !(tl->groups & tctxt->groups)) {
-        DEBUG_PRINTF("IGNORE: none of this literal's groups are set.\n");
-        return HWLM_CONTINUE_MATCHING;
-    }
-
-    assert(!in_delay_play || !tl->delay_mask);
-    if (!in_delay_play) {
-        pushDelayedMatches(tl, end, tctxt);
-    }
-
-    if (end < t->floatingMinLiteralMatchOffset) {
-        DEBUG_PRINTF("too soon\n");
-        assert(!in_delay_play); /* should not have been enqueued */
-        /* continuing on may result in pushing global time back */
-        return HWLM_CONTINUE_MATCHING;
-    }
-
-    int work_done = 0;
-
-    if (tl->programOffset) {
-        DEBUG_PRINTF("running program at %u\n", tl->programOffset);
-        if (roseRunProgram(t, tl->programOffset, end, tctxt, 0, &work_done) ==
-            HWLM_TERMINATE_MATCHING) {
-            return HWLM_TERMINATE_MATCHING;
-        }
-
-    }
-
-    // If we've actually handled any roles, we might need to apply this
-    // literal's squash mask to our groups as well.
-    if (work_done && tl->squashesGroup) {
-        roseSquashGroup(tctxt, tl);
-    }
-
-    return HWLM_CONTINUE_MATCHING;
+    const u32 *programs = getByOffset(t, t->litProgramOffset);
+    return roseRunProgram(t, programs[id], end, match_len, tctxt, 0);
 }
 
-
 static never_inline
-hwlmcb_rv_t roseProcessDelayedMatch(const struct RoseEngine *t, u64a end, u32 id,
-                                    struct RoseContext *tctxt) {
-    return roseProcessMatch_i(t, end, id, tctxt, 1, 1, 0);
+hwlmcb_rv_t roseProcessDelayedMatch(const struct RoseEngine *t, u64a end,
+                                    u32 id, struct RoseContext *tctxt) {
+    size_t match_len = 0;
+    return roseProcessMatch_i(t, end, match_len, id, tctxt, 1, 0);
 }
 
 static never_inline
-hwlmcb_rv_t roseProcessDelayedAnchoredMatch(const struct RoseEngine *t, u64a end,
-                                            u32 id, struct RoseContext *tctxt) {
-    return roseProcessMatch_i(t, end, id, tctxt, 0, 0, 1);
+hwlmcb_rv_t roseProcessDelayedAnchoredMatch(const struct RoseEngine *t,
+                                            u64a end, u32 id,
+                                            struct RoseContext *tctxt) {
+    size_t match_len = 0;
+    return roseProcessMatch_i(t, end, match_len, id, tctxt, 0, 1);
 }
 
 static really_inline
-hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, u64a end, u32 id,
+hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, u64a end,
+                                 size_t match_len, u32 id,
                                  struct RoseContext *tctxt) {
-    return roseProcessMatch_i(t, end, id, tctxt, 1, 0, 0);
+    return roseProcessMatch_i(t, end, match_len, id, tctxt, 0, 0);
 }
 
 static rose_inline
@@ -839,11 +687,6 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) {
         return HWLM_TERMINATE_MATCHING;
     }
 
-    if (id < tctx->t->nonbenefits_base_id
-        && !roseCheckLiteralBenefits(real_end, end - start + 1, id, tctx)) {
-        return tctx->groups;
-    }
-
     hwlmcb_rv_t rv = flushQueuedLiterals(tctx, real_end);
     /* flushDelayed may have advanced tctx->lastEndOffset */
 
@@ -856,7 +699,8 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) {
         return HWLM_TERMINATE_MATCHING;
     }
 
-    rv = roseProcessMainMatch(tctx->t, real_end, id, tctx);
+    size_t match_len = end - start + 1;
+    rv = roseProcessMainMatch(tctx->t, real_end, match_len, id, tctx);
 
     DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups);
 
index 6ba86ca658ce465dcb0c6945ff4426768a558128..b4d4aeeed1fa2098a3958112dd1cd907798a5b24 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
 #include "runtime.h"
 #include "scratch.h"
 #include "ue2common.h"
+#include "util/compare.h"
 #include "util/fatbit.h"
 #include "util/multibit.h"
 
+static rose_inline
+int roseCheckBenefits(struct RoseContext *tctxt, u64a end, u32 mask_rewind,
+                      const u8 *and_mask, const u8 *exp_mask) {
+    DEBUG_PRINTF("am offset = %zu, em offset = %zu\n",
+                 and_mask - (const u8 *)tctxt->t,
+                 exp_mask - (const u8 *)tctxt->t);
+    const u8 *data;
+
+    // If the check works over part of the history and part of the buffer, we
+    // create a temporary copy of the data in here so it's contiguous.
+    u8 temp[MAX_MASK2_WIDTH];
+
+    struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
+    s64a buffer_offset = (s64a)end - ci->buf_offset;
+    DEBUG_PRINTF("rel offset %lld\n", buffer_offset);
+    if (buffer_offset >= mask_rewind) {
+        data = ci->buf + buffer_offset - mask_rewind;
+        DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data,
+                     ci->buf, mask_rewind);
+    } else if (buffer_offset <= 0) {
+        data = ci->hbuf + ci->hlen + buffer_offset - mask_rewind;
+        DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data,
+                     ci->buf, mask_rewind);
+    } else {
+        u32 shortfall = mask_rewind - buffer_offset;
+        DEBUG_PRINTF("shortfall of %u, rewind %u hlen %zu\n", shortfall,
+                     mask_rewind, ci->hlen);
+        data = temp;
+        memcpy(temp, ci->hbuf + ci->hlen - shortfall, shortfall);
+        memcpy(temp + shortfall, ci->buf, mask_rewind - shortfall);
+    }
+
+#ifdef DEBUG
+    DEBUG_PRINTF("DATA: ");
+    for (u32 i = 0; i < mask_rewind; i++) {
+        printf("%c", ourisprint(data[i]) ? data[i] : '?');
+    }
+    printf(" (len=%u)\n", mask_rewind);
+#endif
+
+    u32 len = mask_rewind;
+    while (len >= sizeof(u64a)) {
+        u64a a = unaligned_load_u64a(data);
+        a &= *(const u64a *)and_mask;
+        if (a != *(const u64a *)exp_mask) {
+            DEBUG_PRINTF("argh %016llx %016llx\n", a, *(const u64a *)exp_mask);
+            return 0;
+        }
+        data += sizeof(u64a);
+        and_mask += sizeof(u64a);
+        exp_mask += sizeof(u64a);
+        len -= sizeof(u64a);
+    }
+
+    while (len) {
+        u8 a = *data;
+        a &= *and_mask;
+        if (a != *exp_mask) {
+            DEBUG_PRINTF("argh d%02hhx =%02hhx am%02hhx  em%02hhx\n", a,
+                          *data, *and_mask, *exp_mask);
+            return 0;
+        }
+        data++;
+        and_mask++;
+        exp_mask++;
+        len--;
+    }
+
+    return 1;
+}
+
+static rose_inline
+void rosePushDelayedMatch(const struct RoseEngine *t, u32 delay,
+                          u32 delay_index, u64a offset,
+                          struct RoseContext *tctxt) {
+    assert(delay);
+
+    const u32 src_slot_index = delay;
+    u32 slot_index = (src_slot_index + offset) & DELAY_MASK;
+
+    if (offset + src_slot_index <= tctxt->delayLastEndOffset) {
+        DEBUG_PRINTF("skip too late\n");
+        return;
+    }
+
+    const u32 delay_count = t->delay_count;
+    u8 *slot = getDelaySlots(tctxtToScratch(tctxt)) +
+               (t->delay_slot_size * slot_index);
+
+    DEBUG_PRINTF("pushing tab %u into slot %u\n", delay_index, slot_index);
+    if (!(tctxt->filledDelayedSlots & (1U << slot_index))) {
+        tctxt->filledDelayedSlots |= 1U << slot_index;
+        mmbit_clear(slot, delay_count);
+    }
+
+    mmbit_set(slot, delay_count, delay_index);
+}
+
 static rose_inline
 char rosePrefixCheckMiracles(const struct RoseEngine *t,
                              const struct LeftNfaInfo *left,
@@ -782,10 +881,10 @@ char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) {
     break;                                                                     \
     }
 
-static really_inline
+static rose_inline
 hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
-                           u64a end, struct RoseContext *tctxt,
-                           char in_anchored, int *work_done) {
+                           u64a end, size_t match_len,
+                           struct RoseContext *tctxt, char in_anchored) {
     DEBUG_PRINTF("program begins at offset %u\n", programOffset);
 
     assert(programOffset);
@@ -800,6 +899,10 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
     // and SPARSE_ITER_NEXT instructions.
     struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
 
+    // If this program has an effect, work_done will be set to one (which may
+    // allow the program to squash groups).
+    int work_done = 0;
+
     assert(*(const u8 *)pc != ROSE_INSTR_END);
 
     for (;;) {
@@ -812,7 +915,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                 if (in_anchored && end > t->floatingMinLiteralMatchOffset) {
                     DEBUG_PRINTF("delay until playback\n");
                     tctxt->groups |= ri->groups;
-                    *work_done = 1;
+                    work_done = 1;
                     assert(ri->done_jump); // must progress
                     pc += ri->done_jump;
                     continue;
@@ -820,6 +923,35 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(CHECK_LIT_MASK) {
+                assert(match_len);
+                if (!roseCheckBenefits(tctxt, end, match_len, ri->and_mask.a8,
+                                       ri->cmp_mask.a8)) {
+                    DEBUG_PRINTF("halt: failed mask check\n");
+                    return HWLM_CONTINUE_MATCHING;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_LIT_EARLY) {
+                if (end < t->floatingMinLiteralMatchOffset) {
+                    DEBUG_PRINTF("halt: too soon, min offset=%u\n",
+                                 t->floatingMinLiteralMatchOffset);
+                    return HWLM_CONTINUE_MATCHING;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_GROUPS) {
+                DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n",
+                             tctxt->groups, ri->groups);
+                if (!(ri->groups & tctxt->groups)) {
+                    DEBUG_PRINTF("halt: no groups are set\n");
+                    return HWLM_CONTINUE_MATCHING;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(CHECK_ONLY_EOD) {
                 struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
                 if (end != ci->buf_offset + ci->len) {
@@ -874,6 +1006,11 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(PUSH_DELAYED) {
+                rosePushDelayedMatch(t, ri->delay, ri->index, end, tctxt);
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(SOM_ADJUST) {
                 assert(ri->distance <= end);
                 som = end - ri->distance;
@@ -890,7 +1027,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
             PROGRAM_CASE(TRIGGER_INFIX) {
                 roseTriggerInfix(t, som, end, ri->queue, ri->event, ri->cancel,
                                  tctxt);
-                *work_done = 1;
+                work_done = 1;
             }
             PROGRAM_NEXT_INSTRUCTION
 
@@ -900,7 +1037,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                     HWLM_TERMINATE_MATCHING) {
                     return HWLM_TERMINATE_MATCHING;
                 }
-                *work_done = 1;
+                work_done = 1;
             }
             PROGRAM_NEXT_INSTRUCTION
 
@@ -909,7 +1046,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                                     in_anchored) == HWLM_TERMINATE_MATCHING) {
                     return HWLM_TERMINATE_MATCHING;
                 }
-                *work_done = 1;
+                work_done = 1;
             }
             PROGRAM_NEXT_INSTRUCTION
 
@@ -919,7 +1056,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                     HWLM_TERMINATE_MATCHING) {
                     return HWLM_TERMINATE_MATCHING;
                 }
-                *work_done = 1;
+                work_done = 1;
             }
             PROGRAM_NEXT_INSTRUCTION
 
@@ -928,7 +1065,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                     MO_HALT_MATCHING) {
                     return HWLM_TERMINATE_MATCHING;
                 }
-                *work_done = 1;
+                work_done = 1;
             }
             PROGRAM_NEXT_INSTRUCTION
 
@@ -937,7 +1074,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                                   in_anchored) == HWLM_TERMINATE_MATCHING) {
                     return HWLM_TERMINATE_MATCHING;
                 }
-                *work_done = 1;
+                work_done = 1;
             }
             PROGRAM_NEXT_INSTRUCTION
 
@@ -947,7 +1084,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                                      in_anchored) == HWLM_TERMINATE_MATCHING) {
                     return HWLM_TERMINATE_MATCHING;
                 }
-                *work_done = 1;
+                work_done = 1;
             }
             PROGRAM_NEXT_INSTRUCTION
 
@@ -957,7 +1094,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                     HWLM_TERMINATE_MATCHING) {
                     return HWLM_TERMINATE_MATCHING;
                 }
-                *work_done = 1;
+                work_done = 1;
             }
             PROGRAM_NEXT_INSTRUCTION
 
@@ -965,7 +1102,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                 DEBUG_PRINTF("set state index %u\n", ri->index);
                 mmbit_set(getRoleState(tctxt->state), t->rolesWithStateCount,
                           ri->index);
-                *work_done = 1;
+                work_done = 1;
             }
             PROGRAM_NEXT_INSTRUCTION
 
@@ -976,6 +1113,28 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(SQUASH_GROUPS) {
+                assert(popcount64(ri->groups) == 63); // Squash only one group.
+                if (work_done) {
+                    tctxt->groups &= ri->groups;
+                    DEBUG_PRINTF("squash groups 0x%llx -> 0x%llx\n", ri->groups,
+                                 tctxt->groups);
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_STATE) {
+                DEBUG_PRINTF("check state %u\n", ri->index);
+                if (!mmbit_isset(getRoleState(tctxt->state),
+                                 t->rolesWithStateCount, ri->index)) {
+                    DEBUG_PRINTF("state not on\n");
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    continue;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(SPARSE_ITER_BEGIN) {
                 DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset);
                 const struct mmbit_sparse_iter *it =
@@ -1045,17 +1204,4 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
 #undef PROGRAM_CASE
 #undef PROGRAM_NEXT_INSTRUCTION
 
-static rose_inline
-void roseSquashGroup(struct RoseContext *tctxt, const struct RoseLiteral *tl) {
-    assert(tl->squashesGroup);
-
-    // we should be squashing a single group
-    assert(popcount64(tl->groups) == 1);
-
-    DEBUG_PRINTF("apply squash mask 0x%016llx, groups 0x%016llx -> 0x%016llx\n",
-                 ~tl->groups, tctxt->groups, tctxt->groups & ~tl->groups);
-
-    tctxt->groups &= ~tl->groups;
-}
-
 #endif // PROGRAM_RUNTIME_H
index 5f6541919cb2cfc67f20ff74baf4987c90503ecc..9444005da78c176f8b661d5500b12c923b26b257 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -170,12 +170,16 @@ public:
 
     const void *get() const {
         switch (code()) {
+        case ROSE_INSTR_CHECK_LIT_MASK: return &u.checkLitMask;
+        case ROSE_INSTR_CHECK_LIT_EARLY: return &u.checkLitEarly;
+        case ROSE_INSTR_CHECK_GROUPS: return &u.checkGroups;
         case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod;
         case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds;
         case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled;
         case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround;
         case ROSE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix;
         case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay;
+        case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed;
         case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust;
         case ROSE_INSTR_SOM_LEFTFIX: return &u.somLeftfix;
         case ROSE_INSTR_TRIGGER_INFIX: return &u.triggerInfix;
@@ -188,6 +192,8 @@ public:
         case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown;
         case ROSE_INSTR_SET_STATE: return &u.setState;
         case ROSE_INSTR_SET_GROUPS: return &u.setGroups;
+        case ROSE_INSTR_SQUASH_GROUPS: return &u.squashGroups;
+        case ROSE_INSTR_CHECK_STATE: return &u.checkState;
         case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin;
         case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext;
         case ROSE_INSTR_END: return &u.end;
@@ -198,12 +204,16 @@ public:
 
     size_t length() const {
         switch (code()) {
+        case ROSE_INSTR_CHECK_LIT_MASK: return sizeof(u.checkLitMask);
+        case ROSE_INSTR_CHECK_LIT_EARLY: return sizeof(u.checkLitEarly);
+        case ROSE_INSTR_CHECK_GROUPS: return sizeof(u.checkGroups);
         case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod);
         case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds);
         case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled);
         case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround);
         case ROSE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix);
         case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay);
+        case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed);
         case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust);
         case ROSE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix);
         case ROSE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix);
@@ -216,6 +226,8 @@ public:
         case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown);
         case ROSE_INSTR_SET_STATE: return sizeof(u.setState);
         case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups);
+        case ROSE_INSTR_SQUASH_GROUPS: return sizeof(u.squashGroups);
+        case ROSE_INSTR_CHECK_STATE: return sizeof(u.checkState);
         case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin);
         case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext);
         case ROSE_INSTR_END: return sizeof(u.end);
@@ -224,12 +236,16 @@ public:
     }
 
     union {
+        ROSE_STRUCT_CHECK_LIT_MASK checkLitMask;
+        ROSE_STRUCT_CHECK_LIT_EARLY checkLitEarly;
+        ROSE_STRUCT_CHECK_GROUPS checkGroups;
         ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod;
         ROSE_STRUCT_CHECK_BOUNDS checkBounds;
         ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled;
         ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround;
         ROSE_STRUCT_CHECK_LEFTFIX checkLeftfix;
         ROSE_STRUCT_ANCHORED_DELAY anchoredDelay;
+        ROSE_STRUCT_PUSH_DELAYED pushDelayed;
         ROSE_STRUCT_SOM_ADJUST somAdjust;
         ROSE_STRUCT_SOM_LEFTFIX somLeftfix;
         ROSE_STRUCT_TRIGGER_INFIX triggerInfix;
@@ -242,12 +258,25 @@ public:
         ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown;
         ROSE_STRUCT_SET_STATE setState;
         ROSE_STRUCT_SET_GROUPS setGroups;
+        ROSE_STRUCT_SQUASH_GROUPS squashGroups;
+        ROSE_STRUCT_CHECK_STATE checkState;
         ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin;
         ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext;
         ROSE_STRUCT_END end;
     } u;
 };
 
+static
+size_t hash_value(const RoseInstruction &ri) {
+    size_t val = 0;
+    const char *bytes = (const char *)ri.get();
+    const size_t len = ri.length();
+    for (size_t i = 0; i < len; i++) {
+        boost::hash_combine(val, bytes[i]);
+    }
+    return val;
+}
+
 struct build_context : boost::noncopyable {
     /** \brief information about engines to the left of a vertex */
     map<RoseVertex, left_build_info> leftfix_info;
@@ -270,6 +299,10 @@ struct build_context : boost::noncopyable {
      * up iterators in early misc. */
     map<vector<mmbit_sparse_iter>, u32> iterCache;
 
+    /** \brief Simple cache of programs written to engine blob, used for
+     * deduplication. */
+    ue2::unordered_map<vector<RoseInstruction>, u32> program_cache;
+
     /** \brief LookEntry list cache, so that we don't have to go scanning
      * through the full list to find cases we've used already. */
     ue2::unordered_map<vector<LookEntry>, size_t> lookaround_cache;
@@ -284,6 +317,9 @@ struct build_context : boost::noncopyable {
      * that have already been pushed into the engine_blob. */
     ue2::unordered_map<u32, u32> engineOffsets;
 
+    /** \brief Minimum offset of a match from the floating table. */
+    u32 floatingMinLiteralMatchOffset = 0;
+
     /** \brief Contents of the Rose bytecode immediately following the
      * RoseEngine. */
     vector<char, AlignedAllocator<char, 64>> engine_blob;
@@ -1453,31 +1489,6 @@ void updateNfaState(const build_context &bc, RoseStateOffsets *so,
     }
 }
 
-static
-void buildLitBenefits(const RoseBuildImpl &tbi, RoseEngine *engine,
-                      u32 base_lits_benefits_offset) {
-    lit_benefits *lba = (lit_benefits *)((char *)engine
-                                         + base_lits_benefits_offset);
-    DEBUG_PRINTF("base offset %u\n", base_lits_benefits_offset);
-    for (u32 i = 0; i < tbi.nonbenefits_base_id; i++) {
-        assert(contains(tbi.final_id_to_literal, i));
-        assert(tbi.final_id_to_literal.at(i).size() == 1);
-        u32 lit_id = *tbi.final_id_to_literal.at(i).begin();
-        const ue2_literal &s = tbi.literals.right.at(lit_id).s;
-        DEBUG_PRINTF("building mask for lit %u (fid %u) %s\n", lit_id, i,
-                     dumpString(s).c_str());
-        assert(s.length() <= MAX_MASK2_WIDTH);
-        u32 j = 0;
-        for (const auto &e : s) {
-            lba[i].and_mask.a8[j] = e.nocase ? 0 : CASE_BIT;
-            lba[i].expected.e8[j] = e.nocase ? 0 : (CASE_BIT & e.c);
-            DEBUG_PRINTF("a%02hhx e%02hhx\n", lba[i].and_mask.a8[j],
-                         lba[i].expected.e8[j]);
-            j++;
-        }
-    }
-}
-
 /* does not include history requirements for outfixes or literal matchers */
 u32 RoseBuildImpl::calcHistoryRequired() const {
     u32 m = cc.grey.minHistoryAvailable;
@@ -2232,11 +2243,11 @@ void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &gre
 }
 
 static
-u32 findMinFloatingLiteralMatch(const RoseBuildImpl &tbi) {
-    const RoseGraph &g = tbi.g;
+u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build) {
+    const RoseGraph &g = build.g;
     u32 minWidth = ROSE_BOUND_INF;
     for (auto v : vertices_range(g)) {
-        if (tbi.isAnchored(v) || tbi.isVirtualVertex(v)) {
+        if (build.isAnchored(v) || build.isVirtualVertex(v)) {
             DEBUG_PRINTF("skipping %zu anchored or root\n", g[v].idx);
             continue;
         }
@@ -2656,12 +2667,21 @@ flattenProgram(const vector<vector<RoseInstruction>> &programs) {
 }
 
 static
-u32 writeProgram(build_context &bc, vector<RoseInstruction> &program) {
+u32 writeProgram(build_context &bc, const vector<RoseInstruction> &program) {
     if (program.empty()) {
         DEBUG_PRINTF("no program\n");
         return 0;
     }
 
+    assert(program.back().code() == ROSE_INSTR_END);
+    assert(program.size() >= 1);
+
+    auto it = bc.program_cache.find(program);
+    if (it != end(bc.program_cache)) {
+        DEBUG_PRINTF("reusing cached program at %u\n", it->second);
+        return it->second;
+    }
+
     DEBUG_PRINTF("writing %zu instructions\n", program.size());
     u32 programOffset = 0;
     for (const auto &ri : program) {
@@ -2674,6 +2694,7 @@ u32 writeProgram(build_context &bc, vector<RoseInstruction> &program) {
         }
     }
     DEBUG_PRINTF("program begins at offset %u\n", programOffset);
+    bc.program_cache.emplace(program, programOffset);
     return programOffset;
 }
 
@@ -2764,72 +2785,6 @@ bool hasBoundaryReports(const BoundaryReports &boundary) {
     return false;
 }
 
-static
-void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc,
-                        vector<RoseLiteral> &literalTable) {
-    const u32 final_id = verify_u32(literalTable.size());
-    assert(contains(tbi.final_id_to_literal, final_id));
-    const UNUSED u32 literalId = *tbi.final_id_to_literal.at(final_id).begin();
-    /* all literal ids associated with this final id should result in identical
-     * literal entry */
-    const auto &lit_infos = getLiteralInfoByFinalId(tbi, final_id);
-    const rose_literal_info &arb_lit_info = **lit_infos.begin();
-
-    literalTable.push_back(RoseLiteral());
-    RoseLiteral &tl = literalTable.back();
-    memset(&tl, 0, sizeof(tl));
-
-    tl.groups = 0;
-    for (const auto &li : lit_infos) {
-        tl.groups |= li->group_mask;
-    }
-
-    assert(tl.groups || tbi.literals.right.at(literalId).table == ROSE_ANCHORED
-           || tbi.literals.right.at(literalId).table == ROSE_EVENT);
-
-    // If this literal squashes its group behind it, store that data too
-    tl.squashesGroup = arb_lit_info.squash_group;
-
-    // Setup the delay stuff
-    const auto &children = arb_lit_info.delayed_ids;
-    if (children.empty()) {
-        tl.delay_mask = 0;
-        tl.delayIdsOffset = ROSE_OFFSET_INVALID;
-    } else {
-        map<u32, u32> local_delay_map; // delay -> relative child id
-        for (const auto &int_id : children) {
-            const rose_literal_id &child_literal = tbi.literals.right.at(int_id);
-            u32 child_id = tbi.literal_info[int_id].final_id;
-            u32 delay_index = child_id - tbi.delay_base_id;
-            tl.delay_mask |= 1U << child_literal.delay;
-            local_delay_map[child_literal.delay] = delay_index;
-        }
-
-        vector<u32> delayIds;
-        for (const auto &did : local_delay_map | map_values) {
-            delayIds.push_back(did);
-        }
-
-        tl.delayIdsOffset = add_to_engine_blob(bc, delayIds.begin(),
-                                               delayIds.end());
-
-    }
-
-    assert(!tbi.literals.right.at(literalId).delay || !tl.delay_mask);
-}
-
-// Construct the literal table.
-static
-void buildLiteralTable(const RoseBuildImpl &tbi, build_context &bc,
-                       vector<RoseLiteral> &literalTable) {
-    size_t numLiterals = tbi.final_id_to_literal.size();
-    literalTable.reserve(numLiterals);
-
-    for (size_t i = 0; i < numLiterals; ++i) {
-        createLiteralEntry(tbi, bc, literalTable);
-    }
-}
-
 /**
  * \brief True if the given vertex is a role that can only be switched on at
  * EOD.
@@ -2945,8 +2900,11 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, UNUSED build_context &bc,
         return;
     }
 
-    // TODO: also limit to matches that can occur after
-    // floatingMinLiteralMatchOffset.
+    // If this match cannot occur after floatingMinLiteralMatchOffset, we do
+    // not need this check.
+    if (build.g[v].max_offset <= bc.floatingMinLiteralMatchOffset) {
+        return;
+    }
 
     auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY);
     ri.u.anchoredDelay.groups = build.g[v].groups;
@@ -3112,6 +3070,13 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v,
     const RoseGraph &g = build.g;
     const RoseVertex u = source(e, g);
 
+    // We know that we can trust the anchored table (DFA) to always deliver us
+    // literals at the correct offset.
+    if (build.isAnchored(v)) {
+        DEBUG_PRINTF("literal in anchored table, skipping bounds check\n");
+        return;
+    }
+
     // Use the minimum literal length.
     u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v));
 
@@ -3347,97 +3312,171 @@ vector<RoseInstruction> makePredProgram(RoseBuildImpl &build, build_context &bc,
     return program;
 }
 
-/**
- * Returns the pair (program offset, sparse iter offset).
- */
 static
-pair<u32, u32> makeSparseIterProgram(build_context &bc,
-                    map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
-                    const vector<RoseInstruction> &root_program) {
-    vector<RoseInstruction> program;
-    u32 iter_offset = 0;
-
-    if (!predProgramLists.empty()) {
-        // First, add the iterator itself.
-        vector<u32> keys;
-        for (const auto &elem : predProgramLists) {
-            keys.push_back(elem.first);
-        }
-        DEBUG_PRINTF("%zu keys: %s\n", keys.size(),
-                     as_string_list(keys).c_str());
-
-        vector<mmbit_sparse_iter> iter;
-        mmbBuildSparseIterator(iter, keys, bc.numStates);
-        assert(!iter.empty());
-        iter_offset = addIteratorToTable(bc, iter);
-
-        // Construct our program, starting with the SPARSE_ITER_BEGIN
-        // instruction, keeping track of the jump offset for each sub-program.
-        vector<u32> jump_table;
-        u32 curr_offset = 0;
-
-        program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN));
-        curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
-
-        for (const auto &e : predProgramLists) {
-            DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(),
-                         curr_offset);
-            jump_table.push_back(curr_offset);
-            auto subprog = flattenProgram(e.second);
-
-            if (e.first != keys.back()) {
-                // For all but the last subprogram, replace the END instruction
-                // with a SPARSE_ITER_NEXT.
-                assert(!subprog.empty());
-                assert(subprog.back().code() == ROSE_INSTR_END);
-                subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT);
-            }
+u32 addPredBlocksSingle(
+    map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
+    u32 curr_offset, vector<RoseInstruction> &program) {
+    assert(predProgramLists.size() == 1);
 
-            for (const auto &ri : subprog) {
-                program.push_back(ri);
-                curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
-            }
+    u32 pred_state = predProgramLists.begin()->first;
+    auto subprog = flattenProgram(predProgramLists.begin()->second);
+
+    // Check our pred state.
+    auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE);
+    ri.u.checkState.index = pred_state;
+    program.push_back(ri);
+    curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
+
+    // Add subprogram.
+    for (const auto &ri : subprog) {
+        program.push_back(ri);
+        curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
+    }
+
+    const u32 end_offset =
+        curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
+
+    // Fix up the instruction operands.
+    curr_offset = 0;
+    for (size_t i = 0; i < program.size(); i++) {
+        auto &ri = program[i];
+        switch (ri.code()) {
+        case ROSE_INSTR_CHECK_STATE:
+            ri.u.checkState.fail_jump = end_offset - curr_offset;
+            break;
+        default:
+            break;
         }
+        curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
+    }
 
-        const u32 end_offset = curr_offset - ROUNDUP_N(program.back().length(),
-                                                       ROSE_INSTR_MIN_ALIGN);
+    return 0; // No iterator.
+}
 
-        // Write the jump table into the bytecode.
-        const u32 jump_table_offset =
-            add_to_engine_blob(bc, begin(jump_table), end(jump_table));
+static
+u32 addPredBlocksMulti(build_context &bc,
+                    map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
+                    u32 curr_offset, vector<RoseInstruction> &program) {
+    assert(!predProgramLists.empty());
 
-        // Fix up the instruction operands.
-        auto keys_it = begin(keys);
-        curr_offset = 0;
-        for (size_t i = 0; i < program.size(); i++) {
-            auto &ri = program[i];
-            switch (ri.code()) {
-            case ROSE_INSTR_SPARSE_ITER_BEGIN:
-                ri.u.sparseIterBegin.iter_offset = iter_offset;
-                ri.u.sparseIterBegin.jump_table = jump_table_offset;
-                ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset;
-                break;
-            case ROSE_INSTR_SPARSE_ITER_NEXT:
-                ri.u.sparseIterNext.iter_offset = iter_offset;
-                ri.u.sparseIterNext.jump_table = jump_table_offset;
-                assert(keys_it != end(keys));
-                ri.u.sparseIterNext.state = *keys_it++;
-                ri.u.sparseIterNext.fail_jump = end_offset - curr_offset;
-                break;
-            default:
-                break;
-            }
+    // First, add the iterator itself.
+    vector<u32> keys;
+    for (const auto &elem : predProgramLists) {
+        keys.push_back(elem.first);
+    }
+    DEBUG_PRINTF("%zu keys: %s\n", keys.size(), as_string_list(keys).c_str());
+
+    vector<mmbit_sparse_iter> iter;
+    mmbBuildSparseIterator(iter, keys, bc.numStates);
+    assert(!iter.empty());
+    u32 iter_offset = addIteratorToTable(bc, iter);
+
+    // Construct our program, starting with the SPARSE_ITER_BEGIN
+    // instruction, keeping track of the jump offset for each sub-program.
+    vector<u32> jump_table;
+
+    program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN));
+    curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
+
+    for (const auto &e : predProgramLists) {
+        DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(),
+                     curr_offset);
+        jump_table.push_back(curr_offset);
+        auto subprog = flattenProgram(e.second);
+
+        if (e.first != keys.back()) {
+            // For all but the last subprogram, replace the END instruction
+            // with a SPARSE_ITER_NEXT.
+            assert(!subprog.empty());
+            assert(subprog.back().code() == ROSE_INSTR_END);
+            subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT);
+        }
+
+        for (const auto &ri : subprog) {
+            program.push_back(ri);
             curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
         }
     }
 
+    const u32 end_offset =
+        curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
+
+    // Write the jump table into the bytecode.
+    const u32 jump_table_offset =
+        add_to_engine_blob(bc, begin(jump_table), end(jump_table));
+
+    // Fix up the instruction operands.
+    auto keys_it = begin(keys);
+    curr_offset = 0;
+    for (size_t i = 0; i < program.size(); i++) {
+        auto &ri = program[i];
+        switch (ri.code()) {
+        case ROSE_INSTR_SPARSE_ITER_BEGIN:
+            ri.u.sparseIterBegin.iter_offset = iter_offset;
+            ri.u.sparseIterBegin.jump_table = jump_table_offset;
+            ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset;
+            break;
+        case ROSE_INSTR_SPARSE_ITER_NEXT:
+            ri.u.sparseIterNext.iter_offset = iter_offset;
+            ri.u.sparseIterNext.jump_table = jump_table_offset;
+            assert(keys_it != end(keys));
+            ri.u.sparseIterNext.state = *keys_it++;
+            ri.u.sparseIterNext.fail_jump = end_offset - curr_offset;
+            break;
+        default:
+            break;
+        }
+        curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
+    }
+
+    return iter_offset;
+}
+
+static
+u32 addPredBlocks(build_context &bc,
+                  map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
+                  u32 curr_offset, vector<RoseInstruction> &program,
+                  bool force_sparse_iter) {
+    const size_t num_preds = predProgramLists.size();
+    if (num_preds == 0) {
+        program = flattenProgram({program});
+        return 0; // No iterator.
+    } else if (!force_sparse_iter && num_preds == 1) {
+        return addPredBlocksSingle(predProgramLists, curr_offset, program);
+    } else {
+        return addPredBlocksMulti(bc, predProgramLists, curr_offset, program);
+    }
+}
+
+/**
+ * Returns the pair (program offset, sparse iter offset).
+ */
+static
+pair<u32, u32> makeSparseIterProgram(build_context &bc,
+                    map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
+                    const vector<RoseInstruction> &root_program,
+                    const vector<RoseInstruction> &pre_program) {
+    vector<RoseInstruction> program;
+    u32 curr_offset = 0;
+
+    // Add pre-program first.
+    for (const auto &ri : pre_program) {
+        program.push_back(ri);
+        curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
+    }
+
+    // Add blocks to deal with non-root edges (triggered by sparse iterator or
+    // mmbit_isset checks). This operation will flatten the program up to this
+    // point.
+    u32 iter_offset =
+        addPredBlocks(bc, predProgramLists, curr_offset, program, false);
+
     // If we have a root program, replace the END instruction with it. Note
     // that the root program has already been flattened.
+    assert(!program.empty());
+    assert(program.back().code() == ROSE_INSTR_END);
     if (!root_program.empty()) {
-        if (!program.empty()) {
-            assert(program.back().code() == ROSE_INSTR_END);
-            program.pop_back();
-        }
+        program.pop_back();
         program.insert(end(program), begin(root_program), end(root_program));
     }
 
@@ -3445,15 +3484,182 @@ pair<u32, u32> makeSparseIterProgram(build_context &bc,
 }
 
 static
-u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc,
+void makePushDelayedInstructions(const RoseBuildImpl &build, u32 final_id,
+                                 vector<RoseInstruction> &program) {
+    const auto &lit_infos = getLiteralInfoByFinalId(build, final_id);
+    const auto &arb_lit_info = **lit_infos.begin();
+    if (arb_lit_info.delayed_ids.empty()) {
+        return;
+    }
+
+    for (const auto &int_id : arb_lit_info.delayed_ids) {
+        const auto &child_literal = build.literals.right.at(int_id);
+        u32 child_id = build.literal_info[int_id].final_id;
+        u32 delay_index = child_id - build.delay_base_id;
+
+        DEBUG_PRINTF("final_id=%u delay=%u child_id=%u\n", final_id,
+                     child_literal.delay, child_id);
+
+        auto ri = RoseInstruction(ROSE_INSTR_PUSH_DELAYED);
+        ri.u.pushDelayed.delay = verify_u8(child_literal.delay);
+        ri.u.pushDelayed.index = delay_index;
+        program.push_back(move(ri));
+    }
+}
+
+static
+void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 final_id,
+                               vector<RoseInstruction> &program) {
+    assert(contains(build.final_id_to_literal, final_id));
+    const auto &lit_infos = getLiteralInfoByFinalId(build, final_id);
+
+    rose_group groups = 0;
+    for (const auto &li : lit_infos) {
+        groups |= li->group_mask;
+    }
+
+    if (!groups) {
+        return;
+    }
+
+    auto ri = RoseInstruction(ROSE_INSTR_CHECK_GROUPS);
+    ri.u.checkGroups.groups = groups;
+    program.push_back(move(ri));
+}
+
+static
+void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 final_id,
+                                 vector<RoseInstruction> &program) {
+    assert(contains(build.final_id_to_literal, final_id));
+    const auto &lit_infos = getLiteralInfoByFinalId(build, final_id);
+    assert(!lit_infos.empty());
+
+    if (!lit_infos.front()->requires_benefits) {
+        return;
+    }
+
+    auto ri = RoseInstruction(ROSE_INSTR_CHECK_LIT_MASK);
+
+    assert(build.final_id_to_literal.at(final_id).size() == 1);
+    u32 lit_id = *build.final_id_to_literal.at(final_id).begin();
+    const ue2_literal &s = build.literals.right.at(lit_id).s;
+    DEBUG_PRINTF("building mask for lit %u (final id %u) %s\n", lit_id,
+                 final_id, dumpString(s).c_str());
+    assert(s.length() <= MAX_MASK2_WIDTH);
+    u32 i = 0;
+    for (const auto &e : s) {
+        ri.u.checkLitMask.and_mask.a8[i] = e.nocase ? 0 : CASE_BIT;
+        ri.u.checkLitMask.cmp_mask.a8[i] = e.nocase ? 0 : (CASE_BIT & e.c);
+        i++;
+    }
+
+    program.push_back(move(ri));
+}
+
+static
+void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id,
+                                vector<RoseInstruction> &program) {
+    assert(contains(build.final_id_to_literal, final_id));
+    const auto &lit_infos = getLiteralInfoByFinalId(build, final_id);
+
+    if (!lit_infos.front()->squash_group) {
+        return;
+    }
+
+    rose_group groups = 0;
+    for (const auto &li : lit_infos) {
+        groups |= li->group_mask;
+    }
+
+    if (!groups) {
+        return;
+    }
+
+    DEBUG_PRINTF("final_id %u squashes 0x%llx\n", final_id, groups);
+
+    auto ri = RoseInstruction(ROSE_INSTR_SQUASH_GROUPS);
+    ri.u.squashGroups.groups = ~groups; // Negated, so we can just AND it in.
+    program.push_back(move(ri));
+}
+
+static
+void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc,
+                                  u32 final_id,
+                                  const vector<RoseEdge> &lit_edges,
+                                  vector<RoseInstruction> &program) {
+    if (lit_edges.empty()) {
+        return;
+    }
+
+    if (bc.floatingMinLiteralMatchOffset == 0) {
+        return;
+    }
+
+    RoseVertex v = target(lit_edges.front(), build.g);
+    if (!build.isFloating(v)) {
+        return;
+    }
+
+    const auto &lit_ids = build.final_id_to_literal.at(final_id);
+    if (lit_ids.empty()) {
+        return;
+    }
+
+    size_t min_offset = SIZE_MAX;
+    for (u32 lit_id : lit_ids) {
+        const auto &lit = build.literals.right.at(lit_id);
+        min_offset = min(min_offset, lit.elength());
+    }
+
+    DEBUG_PRINTF("%zu lits, min_offset=%zu\n", lit_ids.size(), min_offset);
+
+    // If we can't match before the min offset, we don't need the check.
+    if (min_offset >= bc.floatingMinLiteralMatchOffset) {
+        DEBUG_PRINTF("no need for check, min is %u\n",
+                      bc.floatingMinLiteralMatchOffset);
+        return;
+    }
+
+    program.push_back(RoseInstruction(ROSE_INSTR_CHECK_LIT_EARLY));
+}
+
+static
+vector<RoseInstruction> buildLitInitialProgram(RoseBuildImpl &build,
+                                    build_context &bc, u32 final_id,
+                                    const vector<RoseEdge> &lit_edges) {
+    vector<RoseInstruction> pre_program;
+
+    // No initial program for EOD.
+    if (final_id == MO_INVALID_IDX) {
+        return pre_program;
+    }
+
+    DEBUG_PRINTF("final_id %u\n", final_id);
+
+    // Check lit mask.
+    makeCheckLitMaskInstruction(build, final_id, pre_program);
+
+    // Check literal groups.
+    makeGroupCheckInstruction(build, final_id, pre_program);
+
+    // Add instructions for pushing delayed matches, if there are any.
+    makePushDelayedInstructions(build, final_id, pre_program);
+
+    // Add pre-check for early literals in the floating table.
+    makeCheckLitEarlyInstruction(build, bc, final_id, lit_edges, pre_program);
+
+    return pre_program;
+}
+
+static
+u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id,
                         const vector<RoseEdge> &lit_edges) {
     const auto &g = build.g;
 
-    DEBUG_PRINTF("%zu lit edges\n", lit_edges.size());
+    DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size());
 
     // pred state id -> list of programs
     map<u32, vector<vector<RoseInstruction>>> predProgramLists;
-    vector<RoseVertex> nonroot_verts;
 
     // Construct sparse iter sub-programs.
     for (const auto &e : lit_edges) {
@@ -3467,7 +3673,6 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc,
         u32 pred_state = bc.roleStateIndices.at(u);
         auto program = makePredProgram(build, bc, e);
         predProgramLists[pred_state].push_back(program);
-        nonroot_verts.push_back(target(e, g));
     }
 
     // Construct sub-program for handling root roles.
@@ -3485,13 +3690,39 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc,
         root_programs.push_back(role_prog);
     }
 
+    // Literal may squash groups.
+    if (final_id != MO_INVALID_IDX) {
+        root_programs.push_back({});
+        makeGroupSquashInstruction(build, final_id, root_programs.back());
+    }
+
     vector<RoseInstruction> root_program;
     if (!root_programs.empty()) {
         root_program = flattenProgram(root_programs);
     }
 
+    auto pre_program = buildLitInitialProgram(build, bc, final_id, lit_edges);
+
     // Put it all together.
-    return makeSparseIterProgram(bc, predProgramLists, root_program).first;
+    return makeSparseIterProgram(bc, predProgramLists, root_program,
+                                 pre_program).first;
+}
+
+static
+u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc,
+                             u32 final_id) {
+    const auto &lit_infos = getLiteralInfoByFinalId(build, final_id);
+    const auto &arb_lit_info = **lit_infos.begin();
+    if (arb_lit_info.delayed_ids.empty()) {
+        return 0; // No delayed IDs, no work to do.
+    }
+
+    vector<RoseInstruction> program;
+    makeCheckLitMaskInstruction(build, final_id, program);
+    makePushDelayedInstructions(build, final_id, program);
+    assert(!program.empty());
+    program = flattenProgram({program});
+    return writeProgram(bc, program);
 }
 
 static
@@ -3530,17 +3761,35 @@ map<u32, vector<RoseEdge>> findEdgesByLiteral(const RoseBuildImpl &build) {
     return lit_edge_map;
 }
 
-/** \brief Build the interpreter program for each literal. */
+/**
+ * \brief Build the interpreter programs for each literal.
+ *
+ * Returns the base of the literal program list and the base of the delay
+ * rebuild program list.
+ */
 static
-void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
-                     vector<RoseLiteral> &literalTable) {
+pair<u32, u32> buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
+    const u32 num_literals = build.final_id_to_literal.size();
     auto lit_edge_map = findEdgesByLiteral(build);
 
-    for (u32 finalId = 0; finalId != literalTable.size(); ++finalId) {
+    vector<u32> litPrograms(num_literals);
+    vector<u32> delayRebuildPrograms(num_literals);
+
+    for (u32 finalId = 0; finalId != num_literals; ++finalId) {
         const auto &lit_edges = lit_edge_map[finalId];
-        u32 offset = buildLiteralProgram(build, bc, lit_edges);
-        literalTable[finalId].programOffset = offset;
+
+        litPrograms[finalId] =
+            buildLiteralProgram(build, bc, finalId, lit_edges);
+        delayRebuildPrograms[finalId] =
+            buildDelayRebuildProgram(build, bc, finalId);
     }
+
+    u32 litProgramsOffset =
+        add_to_engine_blob(bc, begin(litPrograms), end(litPrograms));
+    u32 delayRebuildProgramsOffset = add_to_engine_blob(
+        bc, begin(delayRebuildPrograms), end(delayRebuildPrograms));
+
+    return {litProgramsOffset, delayRebuildProgramsOffset};
 }
 
 static
@@ -3604,7 +3853,14 @@ pair<u32, u32> buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) {
         return {0, 0};
     }
 
-    return makeSparseIterProgram(bc, predProgramLists, {});
+    vector<RoseInstruction> program;
+
+    // Note: we force the use of a sparse iterator for the EOD program so we
+    // can easily guard EOD execution at runtime.
+    u32 iter_offset = addPredBlocks(bc, predProgramLists, 0, program, true);
+
+    assert(program.size() > 1);
+    return {writeProgram(bc, program), iter_offset};
 }
 
 static
@@ -3634,7 +3890,7 @@ u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) {
                     tie(g[source(b, g)].idx, g[target(b, g)].idx);
          });
 
-    return buildLiteralProgram(build, bc, edge_list);
+    return buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list);
 }
 
 static
@@ -3780,6 +4036,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     aligned_unique_ptr<HWLM> sbtable = buildSmallBlockMatcher(*this, &sbsize);
 
     build_context bc;
+    bc.floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this);
 
     // Build NFAs
     set<u32> no_retrigger_queues;
@@ -3805,10 +4062,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
         throw ResourceLimitError();
     }
 
-    u32 lit_benefits_size =
-        verify_u32(sizeof(lit_benefits) * nonbenefits_base_id);
-    assert(ISALIGNED_16(lit_benefits_size));
-
     vector<u32> suffixEkeyLists;
     buildSuffixEkeyLists(*this, bc, qif, &suffixEkeyLists);
 
@@ -3820,9 +4073,10 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
                        queue_count - leftfixBeginQueue, leftInfoTable,
                        &laggedRoseCount, &historyRequired);
 
-    vector<RoseLiteral> literalTable;
-    buildLiteralTable(*this, bc, literalTable);
-    buildLiteralPrograms(*this, bc, literalTable);
+    u32 litProgramOffset;
+    u32 litDelayRebuildProgramOffset;
+    tie(litProgramOffset, litDelayRebuildProgramOffset) =
+        buildLiteralPrograms(*this, bc);
 
     u32 eodProgramOffset = writeEodProgram(*this, bc);
     u32 eodIterProgramOffset;
@@ -3857,10 +4111,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     currOffset = ROUNDUP_CL(currOffset);
     DEBUG_PRINTF("currOffset %u\n", currOffset);
 
-    /* leave space for the benefits listing */
-    u32 base_lits_benefits_offset = currOffset;
-    currOffset += lit_benefits_size;
-
     if (atable) {
         currOffset = ROUNDUP_CL(currOffset);
         amatcherOffset = currOffset;
@@ -3891,10 +4141,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     u32 intReportOffset = currOffset;
     currOffset += sizeof(internal_report) * int_reports.size();
 
-    u32 literalOffset = ROUNDUP_N(currOffset, alignof(RoseLiteral));
-    u32 literalLen = sizeof(RoseLiteral) * literalTable.size();
-    currOffset = literalOffset + literalLen;
-
     u32 leftOffset = ROUNDUP_N(currOffset, alignof(LeftNfaInfo));
     u32 roseLen = sizeof(LeftNfaInfo) * leftInfoTable.size();
     currOffset = leftOffset + roseLen;
@@ -4016,8 +4262,9 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
 
     fillInReportInfo(engine.get(), intReportOffset, rm, int_reports);
 
-    engine->literalOffset = literalOffset;
-    engine->literalCount = verify_u32(literalTable.size());
+    engine->literalCount = verify_u32(final_id_to_literal.size());
+    engine->litProgramOffset = litProgramOffset;
+    engine->litDelayRebuildProgramOffset = litDelayRebuildProgramOffset;
     engine->runtimeImpl = pickRuntimeImpl(*this, outfixEndQueue);
     engine->mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this);
 
@@ -4053,14 +4300,12 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
 
     engine->lastByteHistoryIterOffset = lastByteOffset;
 
-    u32 delay_count = verify_u32(literalTable.size() - delay_base_id);
+    u32 delay_count = verify_u32(final_id_to_literal.size() - delay_base_id);
     engine->delay_count = delay_count;
     engine->delay_slot_size = mmbit_size(delay_count);
     engine->delay_base_id = delay_base_id;
     engine->anchored_base_id = anchored_base_id;
     engine->anchored_count = delay_base_id - anchored_base_id;
-    engine->nonbenefits_base_id = nonbenefits_base_id;
-    engine->literalBenefitsOffsets = base_lits_benefits_offset;
 
     engine->rosePrefixCount = rosePrefixCount;
 
@@ -4094,7 +4339,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     engine->minWidth = hasBoundaryReports(boundary) ? 0 : minWidth;
     engine->minWidthExcludingBoundaries = minWidth;
     engine->maxSafeAnchoredDROffset = findMinWidth(*this, ROSE_FLOATING);
-    engine->floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this);
+    engine->floatingMinLiteralMatchOffset = bc.floatingMinLiteralMatchOffset;
 
     engine->maxBiAnchoredWidth = findMaxBAWidth(*this);
     engine->noFloatingRoots = hasNoFloatingRoots();
@@ -4109,7 +4354,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     fillMatcherDistances(*this, engine.get());
 
     engine->initialGroups = getInitialGroups();
-    engine->totalNumLiterals = verify_u32(literalTable.size());
+    engine->totalNumLiterals = verify_u32(literal_info.size());
     engine->asize = verify_u32(asize);
     engine->ematcherRegionSize = ematcher_region_size;
     engine->floatingStreamState = verify_u32(floatingStreamStateRequired);
@@ -4138,12 +4383,8 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
                    &engine->scratchStateSize, &engine->nfaStateSize,
                    &engine->tStateSize);
 
-    /* do after update mask */
-    buildLitBenefits(*this, engine.get(), base_lits_benefits_offset);
-
     // Copy in other tables
     copy_bytes(ptr + bc.engine_blob_base, bc.engine_blob);
-    copy_bytes(ptr + engine->literalOffset, literalTable);
     copy_bytes(ptr + engine->leftOffset, leftInfoTable);
 
     fillLookaroundTables(ptr + lookaroundTableOffset,
index 2a3fe5406ccd8e2ba463a370e94349a09fde96fc..6202299baaf8b0db8a90d71bd69ae93f6e32a30b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -258,7 +258,6 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) {
 
     set<u32> anch;
     set<u32> norm;
-    set<u32> norm_benefits;
     set<u32> delay;
 
     /* undelayed ids come first */
@@ -281,12 +280,8 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) {
             continue;
         }
 
-        const rose_literal_info &info = tbi.literal_info[i];
-        if (info.requires_benefits) {
-            assert(!tbi.isDelayed(i));
-            norm_benefits.insert(i);
-            DEBUG_PRINTF("%u has benefits\n", i);
-        } else if (tbi.isDelayed(i)) {
+        if (tbi.isDelayed(i)) {
+            assert(!tbi.literal_info[i].requires_benefits);
             delay.insert(i);
         } else if (tbi.literals.right.at(i).table == ROSE_ANCHORED) {
             anch.insert(i);
@@ -295,12 +290,7 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) {
         }
     }
 
-    /* normal lits first (with benefits confirm)*/
-    allocateFinalIdToSet(g, norm_benefits, &tbi.literal_info,
-                         &tbi.final_id_to_literal, &next_final_id);
-
-    /* other normal lits (without benefits)*/
-    tbi.nonbenefits_base_id = next_final_id;
+    /* normal lits */
     allocateFinalIdToSet(g, norm, &tbi.literal_info, &tbi.final_id_to_literal,
                          &next_final_id);
 
index a7f2e2f703c67a67cdc08ed7c332bf86d4a65ceb..c6d10063f1d633207a34c403c781205023632fdc 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -496,7 +496,6 @@ public:
 
     u32 anchored_base_id;
 
-    u32 nonbenefits_base_id;
     u32 ematcher_region_size; /**< number of bytes the eod table runs over */
 
     /** \brief Mapping from anchored literal ID to the original literal suffix
index 044a4208f4c8dd4c1a211a7c3403173f06f0e6ec..66b0bdd448959e9a458ad0e06d5417d5e58567e4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -78,7 +78,6 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in,
       group_weak_end(0),
       group_end(0),
       anchored_base_id(MO_INVALID_IDX),
-      nonbenefits_base_id(MO_INVALID_IDX),
       ematcher_region_size(0),
       floating_direct_report(false),
       eod_event_literal_id(MO_INVALID_IDX),
index fe2124a0fd9db2f6f9300be141d40633ae5ca0c6..536b031a358baf3d1ec89ae6860a2718c97e14c5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
 
 namespace ue2 {
 
-// Calculate the minimum depth for the given set of vertices, ignoring those
-// with depth 1.
-template<class Cont>
-static
-u8 calcMinDepth(const std::map<RoseVertex, u32> &depths, const Cont &verts) {
-    u8 d = 255;
-    for (RoseVertex v : verts) {
-        u8 vdepth = (u8)std::min((u32)255, depths.at(v));
-        if (vdepth > 1) {
-            d = std::min(d, vdepth);
-        }
-    }
-    return d;
-}
-
 // Comparator for vertices using their index property.
 struct VertexIndexComp {
     VertexIndexComp(const RoseGraph &gg) : g(gg) {}
index 6210d10226ecceea6ae6605de722a15c1dc550b8..cd70c734cddb672586fe73a7edab9cb508ac8c0b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -116,31 +116,6 @@ const HWLM *getSmallBlockMatcher(const RoseEngine *t) {
     return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset);
 }
 
-static
-u32 literalsWithDirectReports(const RoseEngine *t) {
-    return t->totalNumLiterals - t->literalCount;
-}
-
-template<typename Predicate>
-static
-size_t literalsWithPredicate(const RoseEngine *t, Predicate pred) {
-    const RoseLiteral *tl = getLiteralTable(t);
-    const RoseLiteral *tl_end = tl + t->literalCount;
-
-    return count_if(tl, tl_end, pred);
-}
-
-static
-size_t literalsInGroups(const RoseEngine *t, u32 from, u32 to) {
-    rose_group mask = ~((1ULL << from) - 1);
-    if (to < 64) {
-        mask &= ((1ULL << to) - 1);
-    }
-
-    return literalsWithPredicate(
-        t, [&mask](const RoseLiteral &l) { return l.groups & mask; });
-}
-
 static
 CharReach bitvectorToReach(const u8 *reach) {
     CharReach cr;
@@ -177,6 +152,16 @@ void dumpLookaround(ofstream &os, const RoseEngine *t,
     }
 }
 
+static
+string dumpStrMask(const u8 *mask, size_t len) {
+    ostringstream oss;
+    for (size_t i = 0; i < len; i++) {
+        oss << std::hex << std::setw(2) << std::setfill('0') << u32{mask[i]}
+            << " ";
+    }
+    return oss.str();
+}
+
 #define PROGRAM_CASE(name)                                                     \
     case ROSE_INSTR_##name: {                                                  \
         os << "  " << std::setw(4) << std::setfill('0') << (pc - pc_base)      \
@@ -202,14 +187,26 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
             }
             PROGRAM_NEXT_INSTRUCTION
 
-            PROGRAM_CASE(CHECK_ONLY_EOD) {
-                os << "    fail_jump +" << ri->fail_jump << endl;
+            PROGRAM_CASE(CHECK_LIT_MASK) {
+                os << "    and_mask "
+                   << dumpStrMask(ri->and_mask.a8, sizeof(ri->and_mask.a8))
+                   << endl;
+                os << "    cmp_mask "
+                   << dumpStrMask(ri->cmp_mask.a8, sizeof(ri->cmp_mask.a8))
+                   << endl;
             }
             PROGRAM_NEXT_INSTRUCTION
 
-            PROGRAM_CASE(CHECK_BOUNDS) {
-                os << "    min_bound " << ri->min_bound << endl;
-                os << "    max_bound " << ri->max_bound << endl;
+            PROGRAM_CASE(CHECK_LIT_EARLY) {}
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_GROUPS) {
+                os << "    groups 0x" << std::hex << ri->groups << std::dec
+                   << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_ONLY_EOD) {
                 os << "    fail_jump +" << ri->fail_jump << endl;
             }
             PROGRAM_NEXT_INSTRUCTION
@@ -236,6 +233,12 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(PUSH_DELAYED) {
+                os << "    delay " << u32{ri->delay} << endl;
+                os << "    index " << ri->index << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(SOM_ADJUST) {
                 os << "    distance " << ri->distance << endl;
             }
@@ -301,6 +304,18 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(SQUASH_GROUPS) {
+                os << "    groups 0x" << std::hex << ri->groups << std::dec
+                   << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_STATE) {
+                os << "    index " << ri->index << endl;
+                os << "    fail_jump +" << ri->fail_jump << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(SPARSE_ITER_BEGIN) {
                 os << "    iter_offset " << ri->iter_offset << endl;
                 os << "    jump_table " << ri->jump_table << endl;
@@ -334,21 +349,32 @@ static
 void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) {
     ofstream os(filename);
 
-    const RoseLiteral *lits = getLiteralTable(t);
-    const char *base = (const char *)t;
+    const u32 *litPrograms =
+        (const u32 *)loadFromByteCodeOffset(t, t->litProgramOffset);
+    const u32 *delayRebuildPrograms =
+        (const u32 *)loadFromByteCodeOffset(t, t->litDelayRebuildProgramOffset);
 
     for (u32 i = 0; i < t->literalCount; i++) {
-        const RoseLiteral *lit = &lits[i];
         os << "Literal " << i << endl;
         os << "---------------" << endl;
 
-        if (lit->programOffset) {
-            os << "Program @ " << lit->programOffset << ":" << endl;
-            dumpProgram(os, t, base + lit->programOffset);
+        if (litPrograms[i]) {
+            os << "Program @ " << litPrograms[i] << ":" << endl;
+            const char *prog =
+                (const char *)loadFromByteCodeOffset(t, litPrograms[i]);
+            dumpProgram(os, t, prog);
         } else {
             os << "<No Program>" << endl;
         }
 
+        if (delayRebuildPrograms[i]) {
+            os << "Delay Rebuild Program @ " << delayRebuildPrograms[i] << ":"
+               << endl;
+            const char *prog = (const char *)loadFromByteCodeOffset(
+                t, delayRebuildPrograms[i]);
+            dumpProgram(os, t, prog);
+        }
+
         os << endl;
     }
 
@@ -710,8 +736,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
             etable ? hwlmSize(etable) : 0, t->ematcherRegionSize);
     fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n",
             sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance);
-    fprintf(f, " - literal table     : %zu bytes\n",
-            t->literalCount * sizeof(RoseLiteral));
     fprintf(f, " - role state table  : %zu bytes\n",
             t->rolesWithStateCount * sizeof(u32));
     fprintf(f, " - nfa info table    : %u bytes\n",
@@ -745,22 +769,9 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
     fprintf(f, "handled key count    : %u\n", t->handledKeyCount);
     fprintf(f, "\n");
 
-    fprintf(f, "number of literals   : %u\n", t->totalNumLiterals);
-    fprintf(f, " - delayed           : %u\n", t->delay_count);
-    fprintf(f, " - direct report     : %u\n",
-            literalsWithDirectReports(t));
-    fprintf(f, " - that squash group : %zu\n",
-            literalsWithPredicate(
-                t, [](const RoseLiteral &l) { return l.squashesGroup != 0; }));
-    fprintf(f, " - with benefits     : %u\n", t->nonbenefits_base_id);
-    fprintf(f, " - with program      : %zu\n",
-            literalsWithPredicate(
-                t, [](const RoseLiteral &l) { return l.programOffset != 0; }));
-    fprintf(f, " - in groups ::\n");
-    fprintf(f, "   + weak            : %zu\n",
-            literalsInGroups(t, 0, t->group_weak_end));
-    fprintf(f, "   + general         : %zu\n",
-            literalsInGroups(t, t->group_weak_end, sizeof(u64a) * 8));
+    fprintf(f, "total literal count  : %u\n", t->totalNumLiterals);
+    fprintf(f, "  prog table size    : %u\n", t->literalCount);
+    fprintf(f, "  delayed literals   : %u\n", t->delay_count);
 
     fprintf(f, "\n");
     fprintf(f, "  minWidth                    : %u\n", t->minWidth);
@@ -839,7 +850,8 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
     DUMP_U32(t, fmatcherMaxBiAnchoredWidth);
     DUMP_U32(t, intReportOffset);
     DUMP_U32(t, intReportCount);
-    DUMP_U32(t, literalOffset);
+    DUMP_U32(t, litProgramOffset);
+    DUMP_U32(t, litDelayRebuildProgramOffset);
     DUMP_U32(t, literalCount);
     DUMP_U32(t, multidirectOffset);
     DUMP_U32(t, activeArrayCount);
@@ -876,7 +888,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
     DUMP_U32(t, delay_base_id);
     DUMP_U32(t, anchored_count);
     DUMP_U32(t, anchored_base_id);
-    DUMP_U32(t, nonbenefits_base_id);
     DUMP_U32(t, maxFloatingDelayedMatch);
     DUMP_U32(t, delayRebuildLength);
     DUMP_U32(t, stateOffsets.history);
@@ -905,7 +916,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
     DUMP_U32(t, rosePrefixCount);
     DUMP_U32(t, activeLeftIterOffset);
     DUMP_U32(t, ematcherRegionSize);
-    DUMP_U32(t, literalBenefitsOffsets);
     DUMP_U32(t, somRevCount);
     DUMP_U32(t, somRevOffsetOffset);
     DUMP_U32(t, group_weak_end);
index 92a67ae14835457f2e3fd3b5912738c801d5cd35..c9025600833d7f6e4bbdca631a5cc2e607667e47 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -73,43 +73,11 @@ ReportID literalToReport(u32 id) {
     return id & ~LITERAL_DR_FLAG;
 }
 
-/** \brief Structure representing a literal. */
-struct RoseLiteral {
-    /**
-     * \brief Program to run when this literal is seen.
-     *
-     * Offset is relative to RoseEngine, or zero for no program.
-     */
-    u32 programOffset;
-
-    /** \brief Bitset of groups that cause this literal to fire. */
-    rose_group groups;
-
-    /**
-     * \brief True if this literal switches off its group behind it when it
-     * sets a role.
-     */
-    u8 squashesGroup;
-
-    /**
-     * \brief Bitset which indicates that the literal inserts a delayed
-     * match at the given offset.
-     */
-    u32 delay_mask;
-
-    /** \brief Offset to array of ids to poke in the delay structure. */
-    u32 delayIdsOffset;
-};
-
 /* Allocation of Rose literal ids
  *
  * The rose literal id space is segmented:
  *
  * ---- 0
- * |  | Normal undelayed literals in the e, or f tables which require a
- * |  | manual benefits confirm on match [a table never requires benefits]
- * |  |
- * ---- nonbenefits_base_id
  * |  | 'Normal' undelayed literals in either e or f tables
  * |  |
  * |  |
@@ -127,7 +95,7 @@ struct RoseLiteral {
  * ---- LITERAL_DR_FLAG
  * |  | Direct Report literals: immediately raise an internal report with id
  * |  | given by (lit_id & ~LITERAL_DR_FLAG). Raised by a or f tables (or e??).
- * |  | No RoseLiteral structure
+ * |  | No literal programs.
  * |  |
  * |  |
  * ----
@@ -135,14 +103,15 @@ struct RoseLiteral {
 
 /* Rose Literal Sources
  *
- * Rose currently gets events (mainly roseProcessMatch calls) from 8 sources:
+ * Rose currently gets events (mainly roseProcessMatch calls) from a number of
+ * sources:
  * 1) The floating table
  * 2) The anchored table
  * 3) Delayed literals
- * 4) suffixes NFAs
- * 5) masksv2 (literals with benefits)
- * 6) End anchored table
- * 7) prefix / infix nfas
+ * 4) Suffix NFAs
+ * 5) Literal masks
+ * 5) End anchored table
+ * 6) Prefix / Infix nfas
  *
  * Care is required to ensure that events appear to come into Rose in order
  * (or sufficiently ordered for Rose to cope). Generally the progress of the
@@ -165,7 +134,7 @@ struct RoseLiteral {
  * NFA queues are run to the current point (floating or delayed literal) as
  * appropriate.
  *
- * Maskv2:
+ * Literal Masks:
  * These are triggered from either floating literals or delayed literals and
  * inspect the data behind them. Matches are raised at the same location as the
  * trigger literal so there are no ordering issues. Masks are always pure
@@ -301,12 +270,12 @@ struct RoseStateOffsets {
 };
 
 struct RoseBoundaryReports {
-    u32 reportEodOffset; /**< 0 if no reports lits, otherwise offset of
+    u32 reportEodOffset; /**< 0 if no reports list, otherwise offset of
                           * MO_INVALID_IDX terminated list to report at EOD */
-    u32 reportZeroOffset; /**< 0 if no reports lits, otherwise offset of
+    u32 reportZeroOffset; /**< 0 if no reports list, otherwise offset of
                            * MO_INVALID_IDX terminated list to report at offset
                            * 0 */
-    u32 reportZeroEodOffset; /**< 0 if no reports lits, otherwise offset of
+    u32 reportZeroEodOffset; /**< 0 if no reports list, otherwise offset of
                               * MO_INVALID_IDX terminated list to report if eod
                               * is at offset 0. Superset of other lists. */
 };
@@ -338,18 +307,20 @@ struct RoseBoundaryReports {
 #define ROSE_RUNTIME_PURE_LITERAL  1
 #define ROSE_RUNTIME_SINGLE_OUTFIX 2
 
-// Runtime structure header for Rose.
-// In memory, we follow this with:
-//   1a. anchored 'literal' matcher table
-//   1b. floating literal matcher table
-//   1c. eod-anchored literal matcher table
-//   1d. small block table
-//   2. array of RoseLiteral (literalCount entries)
-//   8. array of NFA offsets, one per queue
-//   9. array of state offsets, one per queue (+)
-//  10. array of role ids for the set of all root roles
-//  12. multi-direct report array
-/*
+/**
+ * \brief Runtime structure header for Rose.
+ *
+ * Runtime structure header for Rose.
+ * In memory, we follow this with:
+ *   -# the "engine blob"
+ *   -# anchored 'literal' matcher table
+ *   -# floating literal matcher table
+ *   -# eod-anchored literal matcher table
+ *   -# small block table
+ *   -# array of NFA offsets, one per queue
+ *   -# array of state offsets, one per queue (+)
+ *   -# multi-direct report array
+ *
  *  (+) stateOffset array note: Offsets in the array are either into the stream
  *  state (normal case) or into the tstate region of scratch (for transient rose
  *  nfas). Rose nfa info table can distinguish the cases.
@@ -407,8 +378,22 @@ struct RoseEngine {
                                      * with the anchored table. */
     u32 intReportOffset; /**< offset of array of internal_report structures */
     u32 intReportCount; /**< number of internal_report structures */
-    u32 literalOffset; // offset of RoseLiteral array (bytes)
-    u32 literalCount; // number of RoseLiteral entries [NOT number of literals]
+
+    /** \brief Offset of u32 array of program offsets for literals. */
+    u32 litProgramOffset;
+
+    /** \brief Offset of u32 array of delay rebuild program offsets for
+     * literals. */
+    u32 litDelayRebuildProgramOffset;
+
+    /**
+     * \brief Number of entries in the arrays pointed to by litProgramOffset,
+     * litDelayRebuildProgramOffset.
+     *
+     * Note: NOT the total number of literals.
+     */
+    u32 literalCount;
+
     u32 multidirectOffset; /**< offset of multi-direct report list. */
     u32 activeArrayCount; //number of nfas tracked in the active array
     u32 activeLeftCount; //number of nfas tracked in the active rose array
@@ -468,8 +453,6 @@ struct RoseEngine {
     u32 anchored_count; /* number of anchored literal ids */
     u32 anchored_base_id; /* literal id of the first literal in the A table.
                            * anchored literal ids are contiguous */
-    u32 nonbenefits_base_id; /* first literal id without benefit conf.
-                              * contiguous, blah, blah */
     u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can
                                   * usefully be reported */
     u32 delayRebuildLength; /* length of the history region which needs to be
@@ -486,8 +469,6 @@ struct RoseEngine {
     u32 rosePrefixCount; /* number of rose prefixes */
     u32 activeLeftIterOffset; /* mmbit_sparse_iter over non-transient roses */
     u32 ematcherRegionSize; /* max region size to pass to ematcher */
-    u32 literalBenefitsOffsets; /* offset to array of benefits indexed by lit
-                                   id */
     u32 somRevCount; /**< number of som reverse nfas */
     u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */
     u32 group_weak_end; /* end of weak groups, debugging only */
@@ -496,17 +477,6 @@ struct RoseEngine {
     struct scatter_full_plan state_init;
 };
 
-struct lit_benefits {
-    union {
-        u64a a64[MAX_MASK2_WIDTH/sizeof(u64a)];
-        u8 a8[MAX_MASK2_WIDTH];
-    } and_mask;
-    union {
-        u64a e64[MAX_MASK2_WIDTH/sizeof(u64a)];
-        u8 e8[MAX_MASK2_WIDTH];
-    } expected;
-};
-
 #if defined(_WIN32)
 #pragma pack(push, 1)
 #endif
@@ -574,14 +544,6 @@ const void *getSBLiteralMatcher(const struct RoseEngine *t) {
     return matcher;
 }
 
-static really_inline
-const struct RoseLiteral *getLiteralTable(const struct RoseEngine *t) {
-    const struct RoseLiteral *tl
-        = (const struct RoseLiteral *)((const char *)t + t->literalOffset);
-    assert(ISALIGNED_N(tl, 4));
-    return tl;
-}
-
 static really_inline
 const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) {
     const struct LeftNfaInfo *r
@@ -601,13 +563,6 @@ const struct mmbit_sparse_iter *getActiveLeftIter(const struct RoseEngine *t) {
     return it;
 }
 
-static really_inline
-const struct lit_benefits *getLiteralBenefitsTable(
-                                              const struct RoseEngine *t) {
-    return (const struct lit_benefits *)
-        ((const char *)t + t->literalBenefitsOffsets);
-}
-
 static really_inline
 const struct NfaInfo *getNfaInfoByQueue(const struct RoseEngine *t, u32 qi) {
     const struct NfaInfo *infos
index 3f59ba15ad6d2690e5758f20d8672152be125e1e..37017ca0a2cbe6e4f9329083984681a4e1ce4952 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
 /** \brief Role program instruction opcodes. */
 enum RoseInstructionCode {
     ROSE_INSTR_ANCHORED_DELAY,    //!< Delay until after anchored matcher.
+    ROSE_INSTR_CHECK_LIT_MASK,    //!< Check and/cmp mask.
+    ROSE_INSTR_CHECK_LIT_EARLY,   //!< Skip matches before floating min offset.
+    ROSE_INSTR_CHECK_GROUPS,      //!< Check that literal groups are on.
     ROSE_INSTR_CHECK_ONLY_EOD,    //!< Role matches only at EOD.
     ROSE_INSTR_CHECK_BOUNDS,      //!< Bounds on distance from offset 0.
     ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled".
     ROSE_INSTR_CHECK_LOOKAROUND,  //!< Lookaround check.
     ROSE_INSTR_CHECK_LEFTFIX,     //!< Leftfix must be in accept state.
+    ROSE_INSTR_PUSH_DELAYED,      //!< Push delayed literal matches.
     ROSE_INSTR_SOM_ADJUST,        //!< Set SOM from a distance to EOM.
     ROSE_INSTR_SOM_LEFTFIX,       //!< Acquire SOM from a leftfix engine.
     ROSE_INSTR_TRIGGER_INFIX,     //!< Trigger an infix engine.
@@ -59,6 +63,8 @@ enum RoseInstructionCode {
     ROSE_INSTR_REPORT_SOM_KNOWN,  //!< Rose role knows its SOM offset.
     ROSE_INSTR_SET_STATE,         //!< Switch a state index on.
     ROSE_INSTR_SET_GROUPS,        //!< Set some literal group bits.
+    ROSE_INSTR_SQUASH_GROUPS,     //!< Conditionally turn off some groups.
+    ROSE_INSTR_CHECK_STATE,       //!< Test a single bit in the state multibit.
     ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states.
     ROSE_INSTR_SPARSE_ITER_NEXT,  //!< Continue running sparse iter over states.
     ROSE_INSTR_END                //!< End of program.
@@ -70,6 +76,29 @@ struct ROSE_STRUCT_ANCHORED_DELAY {
     u32 done_jump; //!< Jump forward this many bytes if successful.
 };
 
+union RoseLiteralMask {
+    u64a a64[MAX_MASK2_WIDTH / sizeof(u64a)];
+    u8 a8[MAX_MASK2_WIDTH];
+};
+
+/** Note: check failure will halt program. */
+struct ROSE_STRUCT_CHECK_LIT_MASK {
+    u8 code; //!< From enum RoseInstructionCode.
+    union RoseLiteralMask and_mask;
+    union RoseLiteralMask cmp_mask;
+};
+
+/** Note: check failure will halt program. */
+struct ROSE_STRUCT_CHECK_LIT_EARLY {
+    u8 code; //!< From enum RoseInstructionCode.
+};
+
+/** Note: check failure will halt program. */
+struct ROSE_STRUCT_CHECK_GROUPS {
+    u8 code; //!< From enum RoseInstructionCode.
+    rose_group groups; //!< Bitmask.
+};
+
 struct ROSE_STRUCT_CHECK_ONLY_EOD {
     u8 code; //!< From enum RoseInstructionCode.
     u32 fail_jump; //!< Jump forward this many bytes on failure.
@@ -103,6 +132,12 @@ struct ROSE_STRUCT_CHECK_LEFTFIX {
     u32 fail_jump; //!< Jump forward this many bytes on failure.
 };
 
+struct ROSE_STRUCT_PUSH_DELAYED {
+    u8 code; //!< From enum RoseInstructionCode.
+    u8 delay; // Number of bytes to delay.
+    u32 index; // Delay literal index (relative to first delay lit).
+};
+
 struct ROSE_STRUCT_SOM_ADJUST {
     u8 code; //!< From enum RoseInstructionCode.
     u32 distance; //!< Distance to EOM.
@@ -164,7 +199,18 @@ struct ROSE_STRUCT_SET_STATE {
 
 struct ROSE_STRUCT_SET_GROUPS {
     u8 code; //!< From enum RoseInstructionCode.
-    rose_group groups; //!< Bitmask.
+    rose_group groups; //!< Bitmask to OR into groups.
+};
+
+struct ROSE_STRUCT_SQUASH_GROUPS {
+    u8 code; //!< From enum RoseInstructionCode.
+    rose_group groups; //!< Bitmask to AND into groups.
+};
+
+struct ROSE_STRUCT_CHECK_STATE {
+    u8 code; //!< From enum RoseInstructionCode.
+    u32 index; //!< State index in the role multibit.
+    u32 fail_jump; //!< Jump forward this many bytes on failure.
 };
 
 /**