Rose: Move all literal operations into program

author Justin Viiret <justin.viiret@intel.com>

Fri, 18 Dec 2015 04:24:52 +0000 (15:24 +1100)

committer Matthew Barr <matthew.barr@intel.com>

Tue, 1 Mar 2016 00:23:56 +0000 (11:23 +1100)
author Justin Viiret <justin.viiret@intel.com>
Fri, 18 Dec 2015 04:24:52 +0000 (15:24 +1100)
committer Matthew Barr <matthew.barr@intel.com>
Tue, 1 Mar 2016 00:23:56 +0000 (11:23 +1100)
diff --git a/src/rose/eod.c b/src/rose/eod.c

index ef9873882bef97a6c7c6758f1924b1fbd4c5e591..b95a952e4cf8db10c104f9d17658bf1c0e65cb10 100644 (file)
--- a/src/rose/eod.c
+++ b/src/rose/eod.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -114,9 +114,9 @@ int roseEodRunIterator(const struct RoseEngine *t, u64a offset,
  
      DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset);
  
-    int work_done = 0;
-    if (roseRunProgram(t, t->eodIterProgramOffset, offset, &(scratch->tctxt), 0,
-                       &work_done) == HWLM_TERMINATE_MATCHING) {
+    const size_t match_len = 0;
+    if (roseRunProgram(t, t->eodIterProgramOffset, offset, match_len,
+                       &(scratch->tctxt), 0) == HWLM_TERMINATE_MATCHING) {
          return MO_HALT_MATCHING;
      }
  
@@ -233,9 +233,9 @@ int roseRunEodProgram(const struct RoseEngine *t, u64a offset,
      // There should be no pending delayed literals.
      assert(!scratch->tctxt.filledDelayedSlots);
  
-    int work_done = 0;
-    if (roseRunProgram(t, t->eodProgramOffset, offset, &scratch->tctxt, 0,
-                       &work_done) == HWLM_TERMINATE_MATCHING) {
+    const size_t match_len = 0;
+    if (roseRunProgram(t, t->eodProgramOffset, offset, match_len,
+                       &scratch->tctxt, 0) == HWLM_TERMINATE_MATCHING) {
          return MO_HALT_MATCHING;
      }
  
diff --git a/src/rose/match.c b/src/rose/match.c

index 89f0674e773728b093da3bd19f6d31171ff87fb4..72f2a167c3cf35644ba81deb7ecbb38e03bf8c7f 100644 (file)
--- a/src/rose/match.c
+++ b/src/rose/match.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -71,123 +71,6 @@ void printMatch(const struct core_info *ci, u64a start, u64a end) {
  }
  #endif
  
-static rose_inline
-int roseCheckBenefits(struct RoseContext *tctxt, u64a end, u32 mask_rewind,
-                      const u8 *and_mask, const u8 *exp_mask) {
-    DEBUG_PRINTF("am offset = %zu, em offset = %zu\n",
-                 and_mask - (const u8 *)tctxt->t,
-                 exp_mask - (const u8 *)tctxt->t);
-    const u8 *data;
-
-    // If the check works over part of the history and part of the buffer, we
-    // create a temporary copy of the data in here so it's contiguous.
-    u8 temp[MAX_MASK2_WIDTH];
-
-    struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
-    s64a buffer_offset = (s64a)end - ci->buf_offset;
-    DEBUG_PRINTF("rel offset %lld\n", buffer_offset);
-    if (buffer_offset >= mask_rewind) {
-        data = ci->buf + buffer_offset - mask_rewind;
-        DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data,
-                     ci->buf, mask_rewind);
-    } else if (buffer_offset <= 0) {
-        data = ci->hbuf + ci->hlen + buffer_offset - mask_rewind;
-        DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data,
-                     ci->buf, mask_rewind);
-    } else {
-        u32 shortfall = mask_rewind - buffer_offset;
-        DEBUG_PRINTF("shortfall of %u, rewind %u hlen %zu\n", shortfall,
-                     mask_rewind, ci->hlen);
-        data = temp;
-        memcpy(temp, ci->hbuf + ci->hlen - shortfall, shortfall);
-        memcpy(temp + shortfall, ci->buf, mask_rewind - shortfall);
-    }
-
-#ifdef DEBUG
-    DEBUG_PRINTF("DATA: ");
-    for (u32 i = 0; i < mask_rewind; i++) {
-        printf("%c", ourisprint(data[i]) ? data[i] : '?');
-    }
-    printf(" (len=%u)\n", mask_rewind);
-#endif
-
-    u32 len = mask_rewind;
-    while (len >= sizeof(u64a)) {
-        u64a a = unaligned_load_u64a(data);
-        a &= *(const u64a *)and_mask;
-        if (a != *(const u64a *)exp_mask) {
-            DEBUG_PRINTF("argh %016llx %016llx\n", a, *(const u64a *)exp_mask);
-            return 0;
-        }
-        data += sizeof(u64a);
-        and_mask += sizeof(u64a);
-        exp_mask += sizeof(u64a);
-        len -= sizeof(u64a);
-    }
-
-    while (len) {
-        u8 a = *data;
-        a &= *and_mask;
-        if (a != *exp_mask) {
-            DEBUG_PRINTF("argh d%02hhx =%02hhx am%02hhx  em%02hhx\n", a,
-                          *data, *and_mask, *exp_mask);
-            return 0;
-        }
-        data++;
-        and_mask++;
-        exp_mask++;
-        len--;
-    }
-
-    return 1;
-}
-
-static
-int roseCheckLiteralBenefits(u64a end, size_t mask_rewind, u32 id,
-                             struct RoseContext *tctxt) {
-    const struct RoseEngine *t = tctxt->t;
-    const struct lit_benefits *lbi = getLiteralBenefitsTable(t) + id;
-    return roseCheckBenefits(tctxt, end, mask_rewind, lbi->and_mask.a8,
-                             lbi->expected.e8);
-}
-
-static rose_inline
-void pushDelayedMatches(const struct RoseLiteral *tl, u64a offset,
-                        struct RoseContext *tctxt) {
-    u32 delay_mask = tl->delay_mask;
-    if (!delay_mask) {
-        return;
-    }
-
-    u32 delay_count = tctxt->t->delay_count;
-    u8 *delaySlotBase = getDelaySlots(tctxtToScratch(tctxt));
-    size_t delaySlotSize = tctxt->t->delay_slot_size;
-    assert(tl->delayIdsOffset != ROSE_OFFSET_INVALID);
-    const u32 *delayIds = getByOffset(tctxt->t, tl->delayIdsOffset);
-    assert(ISALIGNED(delayIds));
-
-    while (delay_mask) {
-        u32 src_slot_index = findAndClearLSB_32(&delay_mask);
-        u32 slot_index = (src_slot_index + offset) & DELAY_MASK;
-        u8 *slot = delaySlotBase + delaySlotSize * slot_index;
-
-        if (offset + src_slot_index <= tctxt->delayLastEndOffset) {
-            DEBUG_PRINTF("skip too late\n");
-            goto next;
-        }
-
-        DEBUG_PRINTF("pushing tab %u into slot %u\n", *delayIds, slot_index);
-        if (!(tctxt->filledDelayedSlots & (1U << slot_index))) {
-            tctxt->filledDelayedSlots |= 1U << slot_index;
-            mmbit_clear(slot, delay_count);
-        }
-
-        mmbit_set(slot, delay_count, *delayIds);
-    next:
-        delayIds++;
-    }
-}
-
  hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id,
                                       void *ctx) {
      struct hs_scratch *scratch = ctx;
@@ -211,17 +94,17 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id,
          return tctx->groups;
      }
  
-    if (id < t->nonbenefits_base_id
-        && !roseCheckLiteralBenefits(real_end, end - start + 1, id, tctx)) {
-        return tctx->groups;
-    }
-
      assert(id < t->literalCount);
-    const struct RoseLiteral *tl = &getLiteralTable(t)[id];
-
-    DEBUG_PRINTF("literal id=%u, groups=0x%016llx\n", id, tl->groups);
+    const u32 *delayRebuildPrograms =
+        getByOffset(t, t->litDelayRebuildProgramOffset);
+    const u32 programOffset = delayRebuildPrograms[id];
  
-    pushDelayedMatches(tl, real_end, tctx);
+    if (programOffset) {
+        const size_t match_len = end - start + 1;
+        UNUSED hwlmcb_rv_t rv =
+            roseRunProgram(t, programOffset, real_end, match_len, tctx, 0);
+        assert(rv != HWLM_TERMINATE_MATCHING);
+    }
  
      /* we are just repopulating the delay queue, groups should be
       * already set from the original scan. */
@@ -465,31 +348,28 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) {
      }
  
      assert(id < t->literalCount);
-    const struct RoseLiteral *tl = &getLiteralTable(t)[id];
-    assert(tl->programOffset);
-    assert(!tl->delay_mask);
+    const u32 *programs = getByOffset(t, t->litProgramOffset);
+    const u32 programOffset = programs[id];
+    assert(programOffset);
+
+    // Anchored literals are never delayed.
+    assert(!((const u32 *)getByOffset(t, t->litDelayRebuildProgramOffset))[id]);
  
-    DEBUG_PRINTF("literal id=%u, groups=0x%016llx\n", id, tl->groups);
+    DEBUG_PRINTF("literal id=%u\n", id);
  
      if (real_end <= t->floatingMinLiteralMatchOffset) {
          roseFlushLastByteHistory(t, state, real_end, tctxt);
          tctxt->lastEndOffset = real_end;
      }
  
-    int work_done = 0;
-    if (roseRunProgram(t, tl->programOffset, real_end, tctxt, 1, &work_done) ==
+    const size_t match_len = 0;
+    if (roseRunProgram(t, programOffset, real_end, match_len, tctxt, 1) ==
          HWLM_TERMINATE_MATCHING) {
          assert(can_stop_matching(tctxtToScratch(tctxt)));
          DEBUG_PRINTF("caller requested termination\n");
          return MO_HALT_MATCHING;
      }
  
-    // If we've actually handled any roles, we might need to apply this
-    // literal's squash mask to our groups as well.
-    if (work_done && tl->squashesGroup) {
-        roseSquashGroup(tctxt, tl);
-    }
-
      DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups);
  
      if (real_end > t->floatingMinLiteralMatchOffset) {
@@ -502,9 +382,10 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) {
  // Rose match-processing workhorse
  /* assumes not in_anchored */
  static really_inline
-hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id,
-                               struct RoseContext *tctxt, char do_group_check,
-                               char in_delay_play, char in_anch_playback) {
+hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end,
+                               size_t match_len, u32 id,
+                               struct RoseContext *tctxt, char in_delay_play,
+                               char in_anch_playback) {
      /* assert(!tctxt->in_anchored); */
      u8 *state = tctxt->state;
  
@@ -536,63 +417,30 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id,
      }
  
      assert(id < t->literalCount);
-    const struct RoseLiteral *tl = &getLiteralTable(t)[id];
-    DEBUG_PRINTF("lit id=%u, groups=0x%016llx\n", id, tl->groups);
-
-    if (do_group_check && !(tl->groups & tctxt->groups)) {
-        DEBUG_PRINTF("IGNORE: none of this literal's groups are set.\n");
-        return HWLM_CONTINUE_MATCHING;
-    }
-
-    assert(!in_delay_play || !tl->delay_mask);
-    if (!in_delay_play) {
-        pushDelayedMatches(tl, end, tctxt);
-    }
-
-    if (end < t->floatingMinLiteralMatchOffset) {
-        DEBUG_PRINTF("too soon\n");
-        assert(!in_delay_play); /* should not have been enqueued */
-        /* continuing on may result in pushing global time back */
-        return HWLM_CONTINUE_MATCHING;
-    }
-
-    int work_done = 0;
-
-    if (tl->programOffset) {
-        DEBUG_PRINTF("running program at %u\n", tl->programOffset);
-        if (roseRunProgram(t, tl->programOffset, end, tctxt, 0, &work_done) ==
-            HWLM_TERMINATE_MATCHING) {
-            return HWLM_TERMINATE_MATCHING;
-        }
-
-    }
-
-    // If we've actually handled any roles, we might need to apply this
-    // literal's squash mask to our groups as well.
-    if (work_done && tl->squashesGroup) {
-        roseSquashGroup(tctxt, tl);
-    }
-
-    return HWLM_CONTINUE_MATCHING;
+    const u32 *programs = getByOffset(t, t->litProgramOffset);
+    return roseRunProgram(t, programs[id], end, match_len, tctxt, 0);
  }
  
-
  static never_inline
-hwlmcb_rv_t roseProcessDelayedMatch(const struct RoseEngine *t, u64a end, u32 id,
-                                    struct RoseContext *tctxt) {
-    return roseProcessMatch_i(t, end, id, tctxt, 1, 1, 0);
+hwlmcb_rv_t roseProcessDelayedMatch(const struct RoseEngine *t, u64a end,
+                                    u32 id, struct RoseContext *tctxt) {
+    size_t match_len = 0;
+    return roseProcessMatch_i(t, end, match_len, id, tctxt, 1, 0);
  }
  
  static never_inline
-hwlmcb_rv_t roseProcessDelayedAnchoredMatch(const struct RoseEngine *t, u64a end,
-                                            u32 id, struct RoseContext *tctxt) {
-    return roseProcessMatch_i(t, end, id, tctxt, 0, 0, 1);
+hwlmcb_rv_t roseProcessDelayedAnchoredMatch(const struct RoseEngine *t,
+                                            u64a end, u32 id,
+                                            struct RoseContext *tctxt) {
+    size_t match_len = 0;
+    return roseProcessMatch_i(t, end, match_len, id, tctxt, 0, 1);
  }
  
  static really_inline
-hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, u64a end, u32 id,
+hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, u64a end,
+                                 size_t match_len, u32 id,
                                   struct RoseContext *tctxt) {
-    return roseProcessMatch_i(t, end, id, tctxt, 1, 0, 0);
+    return roseProcessMatch_i(t, end, match_len, id, tctxt, 0, 0);
  }
  
  static rose_inline
@@ -839,11 +687,6 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) {
          return HWLM_TERMINATE_MATCHING;
      }
  
-    if (id < tctx->t->nonbenefits_base_id
-        && !roseCheckLiteralBenefits(real_end, end - start + 1, id, tctx)) {
-        return tctx->groups;
-    }
-
      hwlmcb_rv_t rv = flushQueuedLiterals(tctx, real_end);
      /* flushDelayed may have advanced tctx->lastEndOffset */
  
@@ -856,7 +699,8 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) {
          return HWLM_TERMINATE_MATCHING;
      }
  
-    rv = roseProcessMainMatch(tctx->t, real_end, id, tctx);
+    size_t match_len = end - start + 1;
+    rv = roseProcessMainMatch(tctx->t, real_end, match_len, id, tctx);
  
      DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups);
  
diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h

index 6ba86ca658ce465dcb0c6945ff4426768a558128..b4d4aeeed1fa2098a3958112dd1cd907798a5b24 100644 (file)
--- a/src/rose/program_runtime.h
+++ b/src/rose/program_runtime.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -41,9 +41,108 @@
  #include "runtime.h"
  #include "scratch.h"
  #include "ue2common.h"
+#include "util/compare.h"
  #include "util/fatbit.h"
  #include "util/multibit.h"
  
+static rose_inline
+int roseCheckBenefits(struct RoseContext *tctxt, u64a end, u32 mask_rewind,
+                      const u8 *and_mask, const u8 *exp_mask) {
+    DEBUG_PRINTF("am offset = %zu, em offset = %zu\n",
+                 and_mask - (const u8 *)tctxt->t,
+                 exp_mask - (const u8 *)tctxt->t);
+    const u8 *data;
+
+    // If the check works over part of the history and part of the buffer, we
+    // create a temporary copy of the data in here so it's contiguous.
+    u8 temp[MAX_MASK2_WIDTH];
+
+    struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
+    s64a buffer_offset = (s64a)end - ci->buf_offset;
+    DEBUG_PRINTF("rel offset %lld\n", buffer_offset);
+    if (buffer_offset >= mask_rewind) {
+        data = ci->buf + buffer_offset - mask_rewind;
+        DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data,
+                     ci->buf, mask_rewind);
+    } else if (buffer_offset <= 0) {
+        data = ci->hbuf + ci->hlen + buffer_offset - mask_rewind;
+        DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data,
+                     ci->buf, mask_rewind);
+    } else {
+        u32 shortfall = mask_rewind - buffer_offset;
+        DEBUG_PRINTF("shortfall of %u, rewind %u hlen %zu\n", shortfall,
+                     mask_rewind, ci->hlen);
+        data = temp;
+        memcpy(temp, ci->hbuf + ci->hlen - shortfall, shortfall);
+        memcpy(temp + shortfall, ci->buf, mask_rewind - shortfall);
+    }
+
+#ifdef DEBUG
+    DEBUG_PRINTF("DATA: ");
+    for (u32 i = 0; i < mask_rewind; i++) {
+        printf("%c", ourisprint(data[i]) ? data[i] : '?');
+    }
+    printf(" (len=%u)\n", mask_rewind);
+#endif
+
+    u32 len = mask_rewind;
+    while (len >= sizeof(u64a)) {
+        u64a a = unaligned_load_u64a(data);
+        a &= *(const u64a *)and_mask;
+        if (a != *(const u64a *)exp_mask) {
+            DEBUG_PRINTF("argh %016llx %016llx\n", a, *(const u64a *)exp_mask);
+            return 0;
+        }
+        data += sizeof(u64a);
+        and_mask += sizeof(u64a);
+        exp_mask += sizeof(u64a);
+        len -= sizeof(u64a);
+    }
+
+    while (len) {
+        u8 a = *data;
+        a &= *and_mask;
+        if (a != *exp_mask) {
+            DEBUG_PRINTF("argh d%02hhx =%02hhx am%02hhx  em%02hhx\n", a,
+                          *data, *and_mask, *exp_mask);
+            return 0;
+        }
+        data++;
+        and_mask++;
+        exp_mask++;
+        len--;
+    }
+
+    return 1;
+}
+
+static rose_inline
+void rosePushDelayedMatch(const struct RoseEngine *t, u32 delay,
+                          u32 delay_index, u64a offset,
+                          struct RoseContext *tctxt) {
+    assert(delay);
+
+    const u32 src_slot_index = delay;
+    u32 slot_index = (src_slot_index + offset) & DELAY_MASK;
+
+    if (offset + src_slot_index <= tctxt->delayLastEndOffset) {
+        DEBUG_PRINTF("skip too late\n");
+        return;
+    }
+
+    const u32 delay_count = t->delay_count;
+    u8 *slot = getDelaySlots(tctxtToScratch(tctxt)) +
+               (t->delay_slot_size * slot_index);
+
+    DEBUG_PRINTF("pushing tab %u into slot %u\n", delay_index, slot_index);
+    if (!(tctxt->filledDelayedSlots & (1U << slot_index))) {
+        tctxt->filledDelayedSlots |= 1U << slot_index;
+        mmbit_clear(slot, delay_count);
+    }
+
+    mmbit_set(slot, delay_count, delay_index);
+}
+
  static rose_inline
  char rosePrefixCheckMiracles(const struct RoseEngine *t,
                               const struct LeftNfaInfo *left,
@@ -782,10 +881,10 @@ char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) {
      break;                                                                     \
      }
  
-static really_inline
+static rose_inline
  hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
-                           u64a end, struct RoseContext *tctxt,
-                           char in_anchored, int *work_done) {
+                           u64a end, size_t match_len,
+                           struct RoseContext *tctxt, char in_anchored) {
      DEBUG_PRINTF("program begins at offset %u\n", programOffset);
  
      assert(programOffset);
@@ -800,6 +899,10 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
      // and SPARSE_ITER_NEXT instructions.
      struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
  
+    // If this program has an effect, work_done will be set to one (which may
+    // allow the program to squash groups).
+    int work_done = 0;
+
      assert(*(const u8 *)pc != ROSE_INSTR_END);
  
      for (;;) {
@@ -812,7 +915,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                  if (in_anchored && end > t->floatingMinLiteralMatchOffset) {
                      DEBUG_PRINTF("delay until playback\n");
                      tctxt->groups |= ri->groups;
-                    *work_done = 1;
+                    work_done = 1;
                      assert(ri->done_jump); // must progress
                      pc += ri->done_jump;
                      continue;
@@ -820,6 +923,35 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
              }
              PROGRAM_NEXT_INSTRUCTION
  
+            PROGRAM_CASE(CHECK_LIT_MASK) {
+                assert(match_len);
+                if (!roseCheckBenefits(tctxt, end, match_len, ri->and_mask.a8,
+                                       ri->cmp_mask.a8)) {
+                    DEBUG_PRINTF("halt: failed mask check\n");
+                    return HWLM_CONTINUE_MATCHING;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_LIT_EARLY) {
+                if (end < t->floatingMinLiteralMatchOffset) {
+                    DEBUG_PRINTF("halt: too soon, min offset=%u\n",
+                                 t->floatingMinLiteralMatchOffset);
+                    return HWLM_CONTINUE_MATCHING;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_GROUPS) {
+                DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n",
+                             tctxt->groups, ri->groups);
+                if (!(ri->groups & tctxt->groups)) {
+                    DEBUG_PRINTF("halt: no groups are set\n");
+                    return HWLM_CONTINUE_MATCHING;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
              PROGRAM_CASE(CHECK_ONLY_EOD) {
                  struct core_info *ci = &tctxtToScratch(tctxt)->core_info;
                  if (end != ci->buf_offset + ci->len) {
@@ -874,6 +1006,11 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
              }
              PROGRAM_NEXT_INSTRUCTION
  
+            PROGRAM_CASE(PUSH_DELAYED) {
+                rosePushDelayedMatch(t, ri->delay, ri->index, end, tctxt);
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
              PROGRAM_CASE(SOM_ADJUST) {
                  assert(ri->distance <= end);
                  som = end - ri->distance;
@@ -890,7 +1027,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
              PROGRAM_CASE(TRIGGER_INFIX) {
                  roseTriggerInfix(t, som, end, ri->queue, ri->event, ri->cancel,
                                   tctxt);
-                *work_done = 1;
+                work_done = 1;
              }
              PROGRAM_NEXT_INSTRUCTION
  
@@ -900,7 +1037,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                      HWLM_TERMINATE_MATCHING) {
                      return HWLM_TERMINATE_MATCHING;
                  }
-                *work_done = 1;
+                work_done = 1;
              }
              PROGRAM_NEXT_INSTRUCTION
  
@@ -909,7 +1046,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                                      in_anchored) == HWLM_TERMINATE_MATCHING) {
                      return HWLM_TERMINATE_MATCHING;
                  }
-                *work_done = 1;
+                work_done = 1;
              }
              PROGRAM_NEXT_INSTRUCTION
  
@@ -919,7 +1056,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                      HWLM_TERMINATE_MATCHING) {
                      return HWLM_TERMINATE_MATCHING;
                  }
-                *work_done = 1;
+                work_done = 1;
              }
              PROGRAM_NEXT_INSTRUCTION
  
@@ -928,7 +1065,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                      MO_HALT_MATCHING) {
                      return HWLM_TERMINATE_MATCHING;
                  }
-                *work_done = 1;
+                work_done = 1;
              }
              PROGRAM_NEXT_INSTRUCTION
  
@@ -937,7 +1074,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                                    in_anchored) == HWLM_TERMINATE_MATCHING) {
                      return HWLM_TERMINATE_MATCHING;
                  }
-                *work_done = 1;
+                work_done = 1;
              }
              PROGRAM_NEXT_INSTRUCTION
  
@@ -947,7 +1084,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                                       in_anchored) == HWLM_TERMINATE_MATCHING) {
                      return HWLM_TERMINATE_MATCHING;
                  }
-                *work_done = 1;
+                work_done = 1;
              }
              PROGRAM_NEXT_INSTRUCTION
  
@@ -957,7 +1094,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                      HWLM_TERMINATE_MATCHING) {
                      return HWLM_TERMINATE_MATCHING;
                  }
-                *work_done = 1;
+                work_done = 1;
              }
              PROGRAM_NEXT_INSTRUCTION
  
@@ -965,7 +1102,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
                  DEBUG_PRINTF("set state index %u\n", ri->index);
                  mmbit_set(getRoleState(tctxt->state), t->rolesWithStateCount,
                            ri->index);
-                *work_done = 1;
+                work_done = 1;
              }
              PROGRAM_NEXT_INSTRUCTION
  
@@ -976,6 +1113,28 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
              }
              PROGRAM_NEXT_INSTRUCTION
  
+            PROGRAM_CASE(SQUASH_GROUPS) {
+                assert(popcount64(ri->groups) == 63); // Squash only one group.
+                if (work_done) {
+                    tctxt->groups &= ri->groups;
+                    DEBUG_PRINTF("squash groups 0x%llx -> 0x%llx\n", ri->groups,
+                                 tctxt->groups);
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_STATE) {
+                DEBUG_PRINTF("check state %u\n", ri->index);
+                if (!mmbit_isset(getRoleState(tctxt->state),
+                                 t->rolesWithStateCount, ri->index)) {
+                    DEBUG_PRINTF("state not on\n");
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    continue;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
              PROGRAM_CASE(SPARSE_ITER_BEGIN) {
                  DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset);
                  const struct mmbit_sparse_iter *it =
@@ -1045,17 +1204,4 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
  #undef PROGRAM_CASE
  #undef PROGRAM_NEXT_INSTRUCTION
  
-static rose_inline
-void roseSquashGroup(struct RoseContext *tctxt, const struct RoseLiteral *tl) {
-    assert(tl->squashesGroup);
-
-    // we should be squashing a single group
-    assert(popcount64(tl->groups) == 1);
-
-    DEBUG_PRINTF("apply squash mask 0x%016llx, groups 0x%016llx -> 0x%016llx\n",
-                 ~tl->groups, tctxt->groups, tctxt->groups & ~tl->groups);
-
-    tctxt->groups &= ~tl->groups;
-}
-
  #endif // PROGRAM_RUNTIME_H
diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp

index 5f6541919cb2cfc67f20ff74baf4987c90503ecc..9444005da78c176f8b661d5500b12c923b26b257 100644 (file)
--- a/src/rose/rose_build_bytecode.cpp
+++ b/src/rose/rose_build_bytecode.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -170,12 +170,16 @@ public:
  
      const void *get() const {
          switch (code()) {
+        case ROSE_INSTR_CHECK_LIT_MASK: return &u.checkLitMask;
+        case ROSE_INSTR_CHECK_LIT_EARLY: return &u.checkLitEarly;
+        case ROSE_INSTR_CHECK_GROUPS: return &u.checkGroups;
          case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod;
          case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds;
          case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled;
          case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround;
          case ROSE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix;
          case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay;
+        case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed;
          case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust;
          case ROSE_INSTR_SOM_LEFTFIX: return &u.somLeftfix;
          case ROSE_INSTR_TRIGGER_INFIX: return &u.triggerInfix;
@@ -188,6 +192,8 @@ public:
          case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown;
          case ROSE_INSTR_SET_STATE: return &u.setState;
          case ROSE_INSTR_SET_GROUPS: return &u.setGroups;
+        case ROSE_INSTR_SQUASH_GROUPS: return &u.squashGroups;
+        case ROSE_INSTR_CHECK_STATE: return &u.checkState;
          case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin;
          case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext;
          case ROSE_INSTR_END: return &u.end;
@@ -198,12 +204,16 @@ public:
  
      size_t length() const {
          switch (code()) {
+        case ROSE_INSTR_CHECK_LIT_MASK: return sizeof(u.checkLitMask);
+        case ROSE_INSTR_CHECK_LIT_EARLY: return sizeof(u.checkLitEarly);
+        case ROSE_INSTR_CHECK_GROUPS: return sizeof(u.checkGroups);
          case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod);
          case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds);
          case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled);
          case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround);
          case ROSE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix);
          case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay);
+        case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed);
          case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust);
          case ROSE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix);
          case ROSE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix);
@@ -216,6 +226,8 @@ public:
          case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown);
          case ROSE_INSTR_SET_STATE: return sizeof(u.setState);
          case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups);
+        case ROSE_INSTR_SQUASH_GROUPS: return sizeof(u.squashGroups);
+        case ROSE_INSTR_CHECK_STATE: return sizeof(u.checkState);
          case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin);
          case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext);
          case ROSE_INSTR_END: return sizeof(u.end);
@@ -224,12 +236,16 @@ public:
      }
  
      union {
+        ROSE_STRUCT_CHECK_LIT_MASK checkLitMask;
+        ROSE_STRUCT_CHECK_LIT_EARLY checkLitEarly;
+        ROSE_STRUCT_CHECK_GROUPS checkGroups;
          ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod;
          ROSE_STRUCT_CHECK_BOUNDS checkBounds;
          ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled;
          ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround;
          ROSE_STRUCT_CHECK_LEFTFIX checkLeftfix;
          ROSE_STRUCT_ANCHORED_DELAY anchoredDelay;
+        ROSE_STRUCT_PUSH_DELAYED pushDelayed;
          ROSE_STRUCT_SOM_ADJUST somAdjust;
          ROSE_STRUCT_SOM_LEFTFIX somLeftfix;
          ROSE_STRUCT_TRIGGER_INFIX triggerInfix;
@@ -242,12 +258,25 @@ public:
          ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown;
          ROSE_STRUCT_SET_STATE setState;
          ROSE_STRUCT_SET_GROUPS setGroups;
+        ROSE_STRUCT_SQUASH_GROUPS squashGroups;
+        ROSE_STRUCT_CHECK_STATE checkState;
          ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin;
          ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext;
          ROSE_STRUCT_END end;
      } u;
  };
  
+static
+size_t hash_value(const RoseInstruction &ri) {
+    size_t val = 0;
+    const char *bytes = (const char *)ri.get();
+    const size_t len = ri.length();
+    for (size_t i = 0; i < len; i++) {
+        boost::hash_combine(val, bytes[i]);
+    }
+    return val;
+}
+
  struct build_context : boost::noncopyable {
      /** \brief information about engines to the left of a vertex */
      map<RoseVertex, left_build_info> leftfix_info;
@@ -270,6 +299,10 @@ struct build_context : boost::noncopyable {
       * up iterators in early misc. */
      map<vector<mmbit_sparse_iter>, u32> iterCache;
  
+    /** \brief Simple cache of programs written to engine blob, used for
+     * deduplication. */
+    ue2::unordered_map<vector<RoseInstruction>, u32> program_cache;
+
      /** \brief LookEntry list cache, so that we don't have to go scanning
       * through the full list to find cases we've used already. */
      ue2::unordered_map<vector<LookEntry>, size_t> lookaround_cache;
@@ -284,6 +317,9 @@ struct build_context : boost::noncopyable {
       * that have already been pushed into the engine_blob. */
      ue2::unordered_map<u32, u32> engineOffsets;
  
+    /** \brief Minimum offset of a match from the floating table. */
+    u32 floatingMinLiteralMatchOffset = 0;
+
      /** \brief Contents of the Rose bytecode immediately following the
       * RoseEngine. */
      vector<char, AlignedAllocator<char, 64>> engine_blob;
@@ -1453,31 +1489,6 @@ void updateNfaState(const build_context &bc, RoseStateOffsets *so,
      }
  }
  
-static
-void buildLitBenefits(const RoseBuildImpl &tbi, RoseEngine *engine,
-                      u32 base_lits_benefits_offset) {
-    lit_benefits *lba = (lit_benefits *)((char *)engine
-                                         + base_lits_benefits_offset);
-    DEBUG_PRINTF("base offset %u\n", base_lits_benefits_offset);
-    for (u32 i = 0; i < tbi.nonbenefits_base_id; i++) {
-        assert(contains(tbi.final_id_to_literal, i));
-        assert(tbi.final_id_to_literal.at(i).size() == 1);
-        u32 lit_id = *tbi.final_id_to_literal.at(i).begin();
-        const ue2_literal &s = tbi.literals.right.at(lit_id).s;
-        DEBUG_PRINTF("building mask for lit %u (fid %u) %s\n", lit_id, i,
-                     dumpString(s).c_str());
-        assert(s.length() <= MAX_MASK2_WIDTH);
-        u32 j = 0;
-        for (const auto &e : s) {
-            lba[i].and_mask.a8[j] = e.nocase ? 0 : CASE_BIT;
-            lba[i].expected.e8[j] = e.nocase ? 0 : (CASE_BIT & e.c);
-            DEBUG_PRINTF("a%02hhx e%02hhx\n", lba[i].and_mask.a8[j],
-                         lba[i].expected.e8[j]);
-            j++;
-        }
-    }
-}
-
  /* does not include history requirements for outfixes or literal matchers */
  u32 RoseBuildImpl::calcHistoryRequired() const {
      u32 m = cc.grey.minHistoryAvailable;
@@ -2232,11 +2243,11 @@ void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &gre
  }
  
  static
-u32 findMinFloatingLiteralMatch(const RoseBuildImpl &tbi) {
-    const RoseGraph &g = tbi.g;
+u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build) {
+    const RoseGraph &g = build.g;
      u32 minWidth = ROSE_BOUND_INF;
      for (auto v : vertices_range(g)) {
-        if (tbi.isAnchored(v) || tbi.isVirtualVertex(v)) {
+        if (build.isAnchored(v) || build.isVirtualVertex(v)) {
              DEBUG_PRINTF("skipping %zu anchored or root\n", g[v].idx);
              continue;
          }
@@ -2656,12 +2667,21 @@ flattenProgram(const vector<vector<RoseInstruction>> &programs) {
  }
  
  static
-u32 writeProgram(build_context &bc, vector<RoseInstruction> &program) {
+u32 writeProgram(build_context &bc, const vector<RoseInstruction> &program) {
      if (program.empty()) {
          DEBUG_PRINTF("no program\n");
          return 0;
      }
  
+    assert(program.back().code() == ROSE_INSTR_END);
+    assert(program.size() >= 1);
+
+    auto it = bc.program_cache.find(program);
+    if (it != end(bc.program_cache)) {
+        DEBUG_PRINTF("reusing cached program at %u\n", it->second);
+        return it->second;
+    }
+
      DEBUG_PRINTF("writing %zu instructions\n", program.size());
      u32 programOffset = 0;
      for (const auto &ri : program) {
@@ -2674,6 +2694,7 @@ u32 writeProgram(build_context &bc, vector<RoseInstruction> &program) {
          }
      }
      DEBUG_PRINTF("program begins at offset %u\n", programOffset);
+    bc.program_cache.emplace(program, programOffset);
      return programOffset;
  }
  
@@ -2764,72 +2785,6 @@ bool hasBoundaryReports(const BoundaryReports &boundary) {
      return false;
  }
  
-static
-void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc,
-                        vector<RoseLiteral> &literalTable) {
-    const u32 final_id = verify_u32(literalTable.size());
-    assert(contains(tbi.final_id_to_literal, final_id));
-    const UNUSED u32 literalId = *tbi.final_id_to_literal.at(final_id).begin();
-    /* all literal ids associated with this final id should result in identical
-     * literal entry */
-    const auto &lit_infos = getLiteralInfoByFinalId(tbi, final_id);
-    const rose_literal_info &arb_lit_info = **lit_infos.begin();
-
-    literalTable.push_back(RoseLiteral());
-    RoseLiteral &tl = literalTable.back();
-    memset(&tl, 0, sizeof(tl));
-
-    tl.groups = 0;
-    for (const auto &li : lit_infos) {
-        tl.groups |= li->group_mask;
-    }
-
-    assert(tl.groups || tbi.literals.right.at(literalId).table == ROSE_ANCHORED
-           || tbi.literals.right.at(literalId).table == ROSE_EVENT);
-
-    // If this literal squashes its group behind it, store that data too
-    tl.squashesGroup = arb_lit_info.squash_group;
-
-    // Setup the delay stuff
-    const auto &children = arb_lit_info.delayed_ids;
-    if (children.empty()) {
-        tl.delay_mask = 0;
-        tl.delayIdsOffset = ROSE_OFFSET_INVALID;
-    } else {
-        map<u32, u32> local_delay_map; // delay -> relative child id
-        for (const auto &int_id : children) {
-            const rose_literal_id &child_literal = tbi.literals.right.at(int_id);
-            u32 child_id = tbi.literal_info[int_id].final_id;
-            u32 delay_index = child_id - tbi.delay_base_id;
-            tl.delay_mask |= 1U << child_literal.delay;
-            local_delay_map[child_literal.delay] = delay_index;
-        }
-
-        vector<u32> delayIds;
-        for (const auto &did : local_delay_map | map_values) {
-            delayIds.push_back(did);
-        }
-
-        tl.delayIdsOffset = add_to_engine_blob(bc, delayIds.begin(),
-                                               delayIds.end());
-
-    }
-
-    assert(!tbi.literals.right.at(literalId).delay || !tl.delay_mask);
-}
-
-// Construct the literal table.
-static
-void buildLiteralTable(const RoseBuildImpl &tbi, build_context &bc,
-                       vector<RoseLiteral> &literalTable) {
-    size_t numLiterals = tbi.final_id_to_literal.size();
-    literalTable.reserve(numLiterals);
-
-    for (size_t i = 0; i < numLiterals; ++i) {
-        createLiteralEntry(tbi, bc, literalTable);
-    }
-}
-
  /**
   * \brief True if the given vertex is a role that can only be switched on at
   * EOD.
@@ -2945,8 +2900,11 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, UNUSED build_context &bc,
          return;
      }
  
-    // TODO: also limit to matches that can occur after
-    // floatingMinLiteralMatchOffset.
+    // If this match cannot occur after floatingMinLiteralMatchOffset, we do
+    // not need this check.
+    if (build.g[v].max_offset <= bc.floatingMinLiteralMatchOffset) {
+        return;
+    }
  
      auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY);
      ri.u.anchoredDelay.groups = build.g[v].groups;
@@ -3112,6 +3070,13 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v,
      const RoseGraph &g = build.g;
      const RoseVertex u = source(e, g);
  
+    // We know that we can trust the anchored table (DFA) to always deliver us
+    // literals at the correct offset.
+    if (build.isAnchored(v)) {
+        DEBUG_PRINTF("literal in anchored table, skipping bounds check\n");
+        return;
+    }
+
      // Use the minimum literal length.
      u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v));
  
@@ -3347,97 +3312,171 @@ vector<RoseInstruction> makePredProgram(RoseBuildImpl &build, build_context &bc,
      return program;
  }
  
-/**
- * Returns the pair (program offset, sparse iter offset).
- */
  static
-pair<u32, u32> makeSparseIterProgram(build_context &bc,
-                    map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
-                    const vector<RoseInstruction> &root_program) {
-    vector<RoseInstruction> program;
-    u32 iter_offset = 0;
-
-    if (!predProgramLists.empty()) {
-        // First, add the iterator itself.
-        vector<u32> keys;
-        for (const auto &elem : predProgramLists) {
-            keys.push_back(elem.first);
-        }
-        DEBUG_PRINTF("%zu keys: %s\n", keys.size(),
-                     as_string_list(keys).c_str());
-
-        vector<mmbit_sparse_iter> iter;
-        mmbBuildSparseIterator(iter, keys, bc.numStates);
-        assert(!iter.empty());
-        iter_offset = addIteratorToTable(bc, iter);
-
-        // Construct our program, starting with the SPARSE_ITER_BEGIN
-        // instruction, keeping track of the jump offset for each sub-program.
-        vector<u32> jump_table;
-        u32 curr_offset = 0;
-
-        program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN));
-        curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
-
-        for (const auto &e : predProgramLists) {
-            DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(),
-                         curr_offset);
-            jump_table.push_back(curr_offset);
-            auto subprog = flattenProgram(e.second);
-
-            if (e.first != keys.back()) {
-                // For all but the last subprogram, replace the END instruction
-                // with a SPARSE_ITER_NEXT.
-                assert(!subprog.empty());
-                assert(subprog.back().code() == ROSE_INSTR_END);
-                subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT);
-            }
+u32 addPredBlocksSingle(
+    map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
+    u32 curr_offset, vector<RoseInstruction> &program) {
+    assert(predProgramLists.size() == 1);
  
-            for (const auto &ri : subprog) {
-                program.push_back(ri);
-                curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
-            }
+    u32 pred_state = predProgramLists.begin()->first;
+    auto subprog = flattenProgram(predProgramLists.begin()->second);
+
+    // Check our pred state.
+    auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE);
+    ri.u.checkState.index = pred_state;
+    program.push_back(ri);
+    curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
+
+    // Add subprogram.
+    for (const auto &ri : subprog) {
+        program.push_back(ri);
+        curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
+    }
+
+    const u32 end_offset =
+        curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
+
+    // Fix up the instruction operands.
+    curr_offset = 0;
+    for (size_t i = 0; i < program.size(); i++) {
+        auto &ri = program[i];
+        switch (ri.code()) {
+        case ROSE_INSTR_CHECK_STATE:
+            ri.u.checkState.fail_jump = end_offset - curr_offset;
+            break;
+        default:
+            break;
          }
+        curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
+    }
  
-        const u32 end_offset = curr_offset - ROUNDUP_N(program.back().length(),
-                                                       ROSE_INSTR_MIN_ALIGN);
+    return 0; // No iterator.
+}
  
-        // Write the jump table into the bytecode.
-        const u32 jump_table_offset =
-            add_to_engine_blob(bc, begin(jump_table), end(jump_table));
+static
+u32 addPredBlocksMulti(build_context &bc,
+                    map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
+                    u32 curr_offset, vector<RoseInstruction> &program) {
+    assert(!predProgramLists.empty());
  
-        // Fix up the instruction operands.
-        auto keys_it = begin(keys);
-        curr_offset = 0;
-        for (size_t i = 0; i < program.size(); i++) {
-            auto &ri = program[i];
-            switch (ri.code()) {
-            case ROSE_INSTR_SPARSE_ITER_BEGIN:
-                ri.u.sparseIterBegin.iter_offset = iter_offset;
-                ri.u.sparseIterBegin.jump_table = jump_table_offset;
-                ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset;
-                break;
-            case ROSE_INSTR_SPARSE_ITER_NEXT:
-                ri.u.sparseIterNext.iter_offset = iter_offset;
-                ri.u.sparseIterNext.jump_table = jump_table_offset;
-                assert(keys_it != end(keys));
-                ri.u.sparseIterNext.state = *keys_it++;
-                ri.u.sparseIterNext.fail_jump = end_offset - curr_offset;
-                break;
-            default:
-                break;
-            }
+    // First, add the iterator itself.
+    vector<u32> keys;
+    for (const auto &elem : predProgramLists) {
+        keys.push_back(elem.first);
+    }
+    DEBUG_PRINTF("%zu keys: %s\n", keys.size(), as_string_list(keys).c_str());
+
+    vector<mmbit_sparse_iter> iter;
+    mmbBuildSparseIterator(iter, keys, bc.numStates);
+    assert(!iter.empty());
+    u32 iter_offset = addIteratorToTable(bc, iter);
+
+    // Construct our program, starting with the SPARSE_ITER_BEGIN
+    // instruction, keeping track of the jump offset for each sub-program.
+    vector<u32> jump_table;
+
+    program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN));
+    curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
+
+    for (const auto &e : predProgramLists) {
+        DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(),
+                     curr_offset);
+        jump_table.push_back(curr_offset);
+        auto subprog = flattenProgram(e.second);
+
+        if (e.first != keys.back()) {
+            // For all but the last subprogram, replace the END instruction
+            // with a SPARSE_ITER_NEXT.
+            assert(!subprog.empty());
+            assert(subprog.back().code() == ROSE_INSTR_END);
+            subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT);
+        }
+
+        for (const auto &ri : subprog) {
+            program.push_back(ri);
              curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
          }
      }
  
+    const u32 end_offset =
+        curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
+
+    // Write the jump table into the bytecode.
+    const u32 jump_table_offset =
+        add_to_engine_blob(bc, begin(jump_table), end(jump_table));
+
+    // Fix up the instruction operands.
+    auto keys_it = begin(keys);
+    curr_offset = 0;
+    for (size_t i = 0; i < program.size(); i++) {
+        auto &ri = program[i];
+        switch (ri.code()) {
+        case ROSE_INSTR_SPARSE_ITER_BEGIN:
+            ri.u.sparseIterBegin.iter_offset = iter_offset;
+            ri.u.sparseIterBegin.jump_table = jump_table_offset;
+            ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset;
+            break;
+        case ROSE_INSTR_SPARSE_ITER_NEXT:
+            ri.u.sparseIterNext.iter_offset = iter_offset;
+            ri.u.sparseIterNext.jump_table = jump_table_offset;
+            assert(keys_it != end(keys));
+            ri.u.sparseIterNext.state = *keys_it++;
+            ri.u.sparseIterNext.fail_jump = end_offset - curr_offset;
+            break;
+        default:
+            break;
+        }
+        curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
+    }
+
+    return iter_offset;
+}
+
+static
+u32 addPredBlocks(build_context &bc,
+                  map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
+                  u32 curr_offset, vector<RoseInstruction> &program,
+                  bool force_sparse_iter) {
+    const size_t num_preds = predProgramLists.size();
+    if (num_preds == 0) {
+        program = flattenProgram({program});
+        return 0; // No iterator.
+    } else if (!force_sparse_iter && num_preds == 1) {
+        return addPredBlocksSingle(predProgramLists, curr_offset, program);
+    } else {
+        return addPredBlocksMulti(bc, predProgramLists, curr_offset, program);
+    }
+}
+
+/**
+ * Returns the pair (program offset, sparse iter offset).
+ */
+static
+pair<u32, u32> makeSparseIterProgram(build_context &bc,
+                    map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
+                    const vector<RoseInstruction> &root_program,
+                    const vector<RoseInstruction> &pre_program) {
+    vector<RoseInstruction> program;
+    u32 curr_offset = 0;
+
+    // Add pre-program first.
+    for (const auto &ri : pre_program) {
+        program.push_back(ri);
+        curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
+    }
+
+    // Add blocks to deal with non-root edges (triggered by sparse iterator or
+    // mmbit_isset checks). This operation will flatten the program up to this
+    // point.
+    u32 iter_offset =
+        addPredBlocks(bc, predProgramLists, curr_offset, program, false);
+
      // If we have a root program, replace the END instruction with it. Note
      // that the root program has already been flattened.
+    assert(!program.empty());
+    assert(program.back().code() == ROSE_INSTR_END);
      if (!root_program.empty()) {
-        if (!program.empty()) {
-            assert(program.back().code() == ROSE_INSTR_END);
-            program.pop_back();
-        }
+        program.pop_back();
          program.insert(end(program), begin(root_program), end(root_program));
      }
  
@@ -3445,15 +3484,182 @@ pair<u32, u32> makeSparseIterProgram(build_context &bc,
  }
  
  static
-u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc,
+void makePushDelayedInstructions(const RoseBuildImpl &build, u32 final_id,
+                                 vector<RoseInstruction> &program) {
+    const auto &lit_infos = getLiteralInfoByFinalId(build, final_id);
+    const auto &arb_lit_info = **lit_infos.begin();
+    if (arb_lit_info.delayed_ids.empty()) {
+        return;
+    }
+
+    for (const auto &int_id : arb_lit_info.delayed_ids) {
+        const auto &child_literal = build.literals.right.at(int_id);
+        u32 child_id = build.literal_info[int_id].final_id;
+        u32 delay_index = child_id - build.delay_base_id;
+
+        DEBUG_PRINTF("final_id=%u delay=%u child_id=%u\n", final_id,
+                     child_literal.delay, child_id);
+
+        auto ri = RoseInstruction(ROSE_INSTR_PUSH_DELAYED);
+        ri.u.pushDelayed.delay = verify_u8(child_literal.delay);
+        ri.u.pushDelayed.index = delay_index;
+        program.push_back(move(ri));
+    }
+}
+
+static
+void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 final_id,
+                               vector<RoseInstruction> &program) {
+    assert(contains(build.final_id_to_literal, final_id));
+    const auto &lit_infos = getLiteralInfoByFinalId(build, final_id);
+
+    rose_group groups = 0;
+    for (const auto &li : lit_infos) {
+        groups |= li->group_mask;
+    }
+
+    if (!groups) {
+        return;
+    }
+
+    auto ri = RoseInstruction(ROSE_INSTR_CHECK_GROUPS);
+    ri.u.checkGroups.groups = groups;
+    program.push_back(move(ri));
+}
+
+static
+void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 final_id,
+                                 vector<RoseInstruction> &program) {
+    assert(contains(build.final_id_to_literal, final_id));
+    const auto &lit_infos = getLiteralInfoByFinalId(build, final_id);
+    assert(!lit_infos.empty());
+
+    if (!lit_infos.front()->requires_benefits) {
+        return;
+    }
+
+    auto ri = RoseInstruction(ROSE_INSTR_CHECK_LIT_MASK);
+
+    assert(build.final_id_to_literal.at(final_id).size() == 1);
+    u32 lit_id = *build.final_id_to_literal.at(final_id).begin();
+    const ue2_literal &s = build.literals.right.at(lit_id).s;
+    DEBUG_PRINTF("building mask for lit %u (final id %u) %s\n", lit_id,
+                 final_id, dumpString(s).c_str());
+    assert(s.length() <= MAX_MASK2_WIDTH);
+    u32 i = 0;
+    for (const auto &e : s) {
+        ri.u.checkLitMask.and_mask.a8[i] = e.nocase ? 0 : CASE_BIT;
+        ri.u.checkLitMask.cmp_mask.a8[i] = e.nocase ? 0 : (CASE_BIT & e.c);
+        i++;
+    }
+
+    program.push_back(move(ri));
+}
+
+static
+void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id,
+                                vector<RoseInstruction> &program) {
+    assert(contains(build.final_id_to_literal, final_id));
+    const auto &lit_infos = getLiteralInfoByFinalId(build, final_id);
+
+    if (!lit_infos.front()->squash_group) {
+        return;
+    }
+
+    rose_group groups = 0;
+    for (const auto &li : lit_infos) {
+        groups |= li->group_mask;
+    }
+
+    if (!groups) {
+        return;
+    }
+
+    DEBUG_PRINTF("final_id %u squashes 0x%llx\n", final_id, groups);
+
+    auto ri = RoseInstruction(ROSE_INSTR_SQUASH_GROUPS);
+    ri.u.squashGroups.groups = ~groups; // Negated, so we can just AND it in.
+    program.push_back(move(ri));
+}
+
+static
+void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc,
+                                  u32 final_id,
+                                  const vector<RoseEdge> &lit_edges,
+                                  vector<RoseInstruction> &program) {
+    if (lit_edges.empty()) {
+        return;
+    }
+
+    if (bc.floatingMinLiteralMatchOffset == 0) {
+        return;
+    }
+
+    RoseVertex v = target(lit_edges.front(), build.g);
+    if (!build.isFloating(v)) {
+        return;
+    }
+
+    const auto &lit_ids = build.final_id_to_literal.at(final_id);
+    if (lit_ids.empty()) {
+        return;
+    }
+
+    size_t min_offset = SIZE_MAX;
+    for (u32 lit_id : lit_ids) {
+        const auto &lit = build.literals.right.at(lit_id);
+        min_offset = min(min_offset, lit.elength());
+    }
+
+    DEBUG_PRINTF("%zu lits, min_offset=%zu\n", lit_ids.size(), min_offset);
+
+    // If we can't match before the min offset, we don't need the check.
+    if (min_offset >= bc.floatingMinLiteralMatchOffset) {
+        DEBUG_PRINTF("no need for check, min is %u\n",
+                      bc.floatingMinLiteralMatchOffset);
+        return;
+    }
+
+    program.push_back(RoseInstruction(ROSE_INSTR_CHECK_LIT_EARLY));
+}
+
+static
+vector<RoseInstruction> buildLitInitialProgram(RoseBuildImpl &build,
+                                    build_context &bc, u32 final_id,
+                                    const vector<RoseEdge> &lit_edges) {
+    vector<RoseInstruction> pre_program;
+
+    // No initial program for EOD.
+    if (final_id == MO_INVALID_IDX) {
+        return pre_program;
+    }
+
+    DEBUG_PRINTF("final_id %u\n", final_id);
+
+    // Check lit mask.
+    makeCheckLitMaskInstruction(build, final_id, pre_program);
+
+    // Check literal groups.
+    makeGroupCheckInstruction(build, final_id, pre_program);
+
+    // Add instructions for pushing delayed matches, if there are any.
+    makePushDelayedInstructions(build, final_id, pre_program);
+
+    // Add pre-check for early literals in the floating table.
+    makeCheckLitEarlyInstruction(build, bc, final_id, lit_edges, pre_program);
+
+    return pre_program;
+}
+
+static
+u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id,
                          const vector<RoseEdge> &lit_edges) {
      const auto &g = build.g;
  
-    DEBUG_PRINTF("%zu lit edges\n", lit_edges.size());
+    DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size());
  
      // pred state id -> list of programs
      map<u32, vector<vector<RoseInstruction>>> predProgramLists;
-    vector<RoseVertex> nonroot_verts;
  
      // Construct sparse iter sub-programs.
      for (const auto &e : lit_edges) {
@@ -3467,7 +3673,6 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc,
          u32 pred_state = bc.roleStateIndices.at(u);
          auto program = makePredProgram(build, bc, e);
          predProgramLists[pred_state].push_back(program);
-        nonroot_verts.push_back(target(e, g));
      }
  
      // Construct sub-program for handling root roles.
@@ -3485,13 +3690,39 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc,
          root_programs.push_back(role_prog);
      }
  
+    // Literal may squash groups.
+    if (final_id != MO_INVALID_IDX) {
+        root_programs.push_back({});
+        makeGroupSquashInstruction(build, final_id, root_programs.back());
+    }
+
      vector<RoseInstruction> root_program;
      if (!root_programs.empty()) {
          root_program = flattenProgram(root_programs);
      }
  
+    auto pre_program = buildLitInitialProgram(build, bc, final_id, lit_edges);
+
      // Put it all together.
-    return makeSparseIterProgram(bc, predProgramLists, root_program).first;
+    return makeSparseIterProgram(bc, predProgramLists, root_program,
+                                 pre_program).first;
+}
+
+static
+u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc,
+                             u32 final_id) {
+    const auto &lit_infos = getLiteralInfoByFinalId(build, final_id);
+    const auto &arb_lit_info = **lit_infos.begin();
+    if (arb_lit_info.delayed_ids.empty()) {
+        return 0; // No delayed IDs, no work to do.
+    }
+
+    vector<RoseInstruction> program;
+    makeCheckLitMaskInstruction(build, final_id, program);
+    makePushDelayedInstructions(build, final_id, program);
+    assert(!program.empty());
+    program = flattenProgram({program});
+    return writeProgram(bc, program);
  }
  
  static
@@ -3530,17 +3761,35 @@ map<u32, vector<RoseEdge>> findEdgesByLiteral(const RoseBuildImpl &build) {
      return lit_edge_map;
  }
  
-/** \brief Build the interpreter program for each literal. */
+/**
+ * \brief Build the interpreter programs for each literal.
+ *
+ * Returns the base of the literal program list and the base of the delay
+ * rebuild program list.
+ */
  static
-void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
-                     vector<RoseLiteral> &literalTable) {
+pair<u32, u32> buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
+    const u32 num_literals = build.final_id_to_literal.size();
      auto lit_edge_map = findEdgesByLiteral(build);
  
-    for (u32 finalId = 0; finalId != literalTable.size(); ++finalId) {
+    vector<u32> litPrograms(num_literals);
+    vector<u32> delayRebuildPrograms(num_literals);
+
+    for (u32 finalId = 0; finalId != num_literals; ++finalId) {
          const auto &lit_edges = lit_edge_map[finalId];
-        u32 offset = buildLiteralProgram(build, bc, lit_edges);
-        literalTable[finalId].programOffset = offset;
+
+        litPrograms[finalId] =
+            buildLiteralProgram(build, bc, finalId, lit_edges);
+        delayRebuildPrograms[finalId] =
+            buildDelayRebuildProgram(build, bc, finalId);
      }
+
+    u32 litProgramsOffset =
+        add_to_engine_blob(bc, begin(litPrograms), end(litPrograms));
+    u32 delayRebuildProgramsOffset = add_to_engine_blob(
+        bc, begin(delayRebuildPrograms), end(delayRebuildPrograms));
+
+    return {litProgramsOffset, delayRebuildProgramsOffset};
  }
  
  static
@@ -3604,7 +3853,14 @@ pair<u32, u32> buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) {
          return {0, 0};
      }
  
-    return makeSparseIterProgram(bc, predProgramLists, {});
+    vector<RoseInstruction> program;
+
+    // Note: we force the use of a sparse iterator for the EOD program so we
+    // can easily guard EOD execution at runtime.
+    u32 iter_offset = addPredBlocks(bc, predProgramLists, 0, program, true);
+
+    assert(program.size() > 1);
+    return {writeProgram(bc, program), iter_offset};
  }
  
  static
@@ -3634,7 +3890,7 @@ u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) {
                      tie(g[source(b, g)].idx, g[target(b, g)].idx);
           });
  
-    return buildLiteralProgram(build, bc, edge_list);
+    return buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list);
  }
  
  static
@@ -3780,6 +4036,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
      aligned_unique_ptr<HWLM> sbtable = buildSmallBlockMatcher(*this, &sbsize);
  
      build_context bc;
+    bc.floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this);
  
      // Build NFAs
      set<u32> no_retrigger_queues;
@@ -3805,10 +4062,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
          throw ResourceLimitError();
      }
  
-    u32 lit_benefits_size =
-        verify_u32(sizeof(lit_benefits) * nonbenefits_base_id);
-    assert(ISALIGNED_16(lit_benefits_size));
-
      vector<u32> suffixEkeyLists;
      buildSuffixEkeyLists(*this, bc, qif, &suffixEkeyLists);
  
@@ -3820,9 +4073,10 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
                         queue_count - leftfixBeginQueue, leftInfoTable,
                         &laggedRoseCount, &historyRequired);
  
-    vector<RoseLiteral> literalTable;
-    buildLiteralTable(*this, bc, literalTable);
-    buildLiteralPrograms(*this, bc, literalTable);
+    u32 litProgramOffset;
+    u32 litDelayRebuildProgramOffset;
+    tie(litProgramOffset, litDelayRebuildProgramOffset) =
+        buildLiteralPrograms(*this, bc);
  
      u32 eodProgramOffset = writeEodProgram(*this, bc);
      u32 eodIterProgramOffset;
@@ -3857,10 +4111,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
      currOffset = ROUNDUP_CL(currOffset);
      DEBUG_PRINTF("currOffset %u\n", currOffset);
  
-    /* leave space for the benefits listing */
-    u32 base_lits_benefits_offset = currOffset;
-    currOffset += lit_benefits_size;
-
      if (atable) {
          currOffset = ROUNDUP_CL(currOffset);
          amatcherOffset = currOffset;
@@ -3891,10 +4141,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
      u32 intReportOffset = currOffset;
      currOffset += sizeof(internal_report) * int_reports.size();
  
-    u32 literalOffset = ROUNDUP_N(currOffset, alignof(RoseLiteral));
-    u32 literalLen = sizeof(RoseLiteral) * literalTable.size();
-    currOffset = literalOffset + literalLen;
-
      u32 leftOffset = ROUNDUP_N(currOffset, alignof(LeftNfaInfo));
      u32 roseLen = sizeof(LeftNfaInfo) * leftInfoTable.size();
      currOffset = leftOffset + roseLen;
@@ -4016,8 +4262,9 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
  
      fillInReportInfo(engine.get(), intReportOffset, rm, int_reports);
  
-    engine->literalOffset = literalOffset;
-    engine->literalCount = verify_u32(literalTable.size());
+    engine->literalCount = verify_u32(final_id_to_literal.size());
+    engine->litProgramOffset = litProgramOffset;
+    engine->litDelayRebuildProgramOffset = litDelayRebuildProgramOffset;
      engine->runtimeImpl = pickRuntimeImpl(*this, outfixEndQueue);
      engine->mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this);
  
@@ -4053,14 +4300,12 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
  
      engine->lastByteHistoryIterOffset = lastByteOffset;
  
-    u32 delay_count = verify_u32(literalTable.size() - delay_base_id);
+    u32 delay_count = verify_u32(final_id_to_literal.size() - delay_base_id);
      engine->delay_count = delay_count;
      engine->delay_slot_size = mmbit_size(delay_count);
      engine->delay_base_id = delay_base_id;
      engine->anchored_base_id = anchored_base_id;
      engine->anchored_count = delay_base_id - anchored_base_id;
-    engine->nonbenefits_base_id = nonbenefits_base_id;
-    engine->literalBenefitsOffsets = base_lits_benefits_offset;
  
      engine->rosePrefixCount = rosePrefixCount;
  
@@ -4094,7 +4339,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
      engine->minWidth = hasBoundaryReports(boundary) ? 0 : minWidth;
      engine->minWidthExcludingBoundaries = minWidth;
      engine->maxSafeAnchoredDROffset = findMinWidth(*this, ROSE_FLOATING);
-    engine->floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this);
+    engine->floatingMinLiteralMatchOffset = bc.floatingMinLiteralMatchOffset;
  
      engine->maxBiAnchoredWidth = findMaxBAWidth(*this);
      engine->noFloatingRoots = hasNoFloatingRoots();
@@ -4109,7 +4354,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
      fillMatcherDistances(*this, engine.get());
  
      engine->initialGroups = getInitialGroups();
-    engine->totalNumLiterals = verify_u32(literalTable.size());
+    engine->totalNumLiterals = verify_u32(literal_info.size());
      engine->asize = verify_u32(asize);
      engine->ematcherRegionSize = ematcher_region_size;
      engine->floatingStreamState = verify_u32(floatingStreamStateRequired);
@@ -4138,12 +4383,8 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
                     &engine->scratchStateSize, &engine->nfaStateSize,
                     &engine->tStateSize);
  
-    /* do after update mask */
-    buildLitBenefits(*this, engine.get(), base_lits_benefits_offset);
-
      // Copy in other tables
      copy_bytes(ptr + bc.engine_blob_base, bc.engine_blob);
-    copy_bytes(ptr + engine->literalOffset, literalTable);
      copy_bytes(ptr + engine->leftOffset, leftInfoTable);
  
      fillLookaroundTables(ptr + lookaroundTableOffset,
diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp

index 2a3fe5406ccd8e2ba463a370e94349a09fde96fc..6202299baaf8b0db8a90d71bd69ae93f6e32a30b 100644 (file)
--- a/src/rose/rose_build_compile.cpp
+++ b/src/rose/rose_build_compile.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -258,7 +258,6 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) {
  
      set<u32> anch;
      set<u32> norm;
-    set<u32> norm_benefits;
      set<u32> delay;
  
      /* undelayed ids come first */
@@ -281,12 +280,8 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) {
              continue;
          }
  
-        const rose_literal_info &info = tbi.literal_info[i];
-        if (info.requires_benefits) {
-            assert(!tbi.isDelayed(i));
-            norm_benefits.insert(i);
-            DEBUG_PRINTF("%u has benefits\n", i);
-        } else if (tbi.isDelayed(i)) {
+        if (tbi.isDelayed(i)) {
+            assert(!tbi.literal_info[i].requires_benefits);
              delay.insert(i);
          } else if (tbi.literals.right.at(i).table == ROSE_ANCHORED) {
              anch.insert(i);
@@ -295,12 +290,7 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) {
          }
      }
  
-    /* normal lits first (with benefits confirm)*/
-    allocateFinalIdToSet(g, norm_benefits, &tbi.literal_info,
-                         &tbi.final_id_to_literal, &next_final_id);
-
-    /* other normal lits (without benefits)*/
-    tbi.nonbenefits_base_id = next_final_id;
+    /* normal lits */
      allocateFinalIdToSet(g, norm, &tbi.literal_info, &tbi.final_id_to_literal,
                           &next_final_id);
  
diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h

index a7f2e2f703c67a67cdc08ed7c332bf86d4a65ceb..c6d10063f1d633207a34c403c781205023632fdc 100644 (file)
--- a/src/rose/rose_build_impl.h
+++ b/src/rose/rose_build_impl.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -496,7 +496,6 @@ public:
  
      u32 anchored_base_id;
  
-    u32 nonbenefits_base_id;
      u32 ematcher_region_size; /**< number of bytes the eod table runs over */
  
      /** \brief Mapping from anchored literal ID to the original literal suffix
diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp

index 044a4208f4c8dd4c1a211a7c3403173f06f0e6ec..66b0bdd448959e9a458ad0e06d5417d5e58567e4 100644 (file)
--- a/src/rose/rose_build_misc.cpp
+++ b/src/rose/rose_build_misc.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -78,7 +78,6 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in,
        group_weak_end(0),
        group_end(0),
        anchored_base_id(MO_INVALID_IDX),
-      nonbenefits_base_id(MO_INVALID_IDX),
        ematcher_region_size(0),
        floating_direct_report(false),
        eod_event_literal_id(MO_INVALID_IDX),
diff --git a/src/rose/rose_build_util.h b/src/rose/rose_build_util.h

index fe2124a0fd9db2f6f9300be141d40633ae5ca0c6..536b031a358baf3d1ec89ae6860a2718c97e14c5 100644 (file)
--- a/src/rose/rose_build_util.h
+++ b/src/rose/rose_build_util.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -36,21 +36,6 @@
  
  namespace ue2 {
  
-// Calculate the minimum depth for the given set of vertices, ignoring those
-// with depth 1.
-template<class Cont>
-static
-u8 calcMinDepth(const std::map<RoseVertex, u32> &depths, const Cont &verts) {
-    u8 d = 255;
-    for (RoseVertex v : verts) {
-        u8 vdepth = (u8)std::min((u32)255, depths.at(v));
-        if (vdepth > 1) {
-            d = std::min(d, vdepth);
-        }
-    }
-    return d;
-}
-
  // Comparator for vertices using their index property.
  struct VertexIndexComp {
      VertexIndexComp(const RoseGraph &gg) : g(gg) {}
diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp

index 6210d10226ecceea6ae6605de722a15c1dc550b8..cd70c734cddb672586fe73a7edab9cb508ac8c0b 100644 (file)
--- a/src/rose/rose_dump.cpp
+++ b/src/rose/rose_dump.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -116,31 +116,6 @@ const HWLM *getSmallBlockMatcher(const RoseEngine *t) {
      return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset);
  }
  
-static
-u32 literalsWithDirectReports(const RoseEngine *t) {
-    return t->totalNumLiterals - t->literalCount;
-}
-
-template<typename Predicate>
-static
-size_t literalsWithPredicate(const RoseEngine *t, Predicate pred) {
-    const RoseLiteral *tl = getLiteralTable(t);
-    const RoseLiteral *tl_end = tl + t->literalCount;
-
-    return count_if(tl, tl_end, pred);
-}
-
-static
-size_t literalsInGroups(const RoseEngine *t, u32 from, u32 to) {
-    rose_group mask = ~((1ULL << from) - 1);
-    if (to < 64) {
-        mask &= ((1ULL << to) - 1);
-    }
-
-    return literalsWithPredicate(
-        t, [&mask](const RoseLiteral &l) { return l.groups & mask; });
-}
-
  static
  CharReach bitvectorToReach(const u8 *reach) {
      CharReach cr;
@@ -177,6 +152,16 @@ void dumpLookaround(ofstream &os, const RoseEngine *t,
      }
  }
  
+static
+string dumpStrMask(const u8 *mask, size_t len) {
+    ostringstream oss;
+    for (size_t i = 0; i < len; i++) {
+        oss << std::hex << std::setw(2) << std::setfill('0') << u32{mask[i]}
+            << " ";
+    }
+    return oss.str();
+}
+
  #define PROGRAM_CASE(name)                                                     \
      case ROSE_INSTR_##name: {                                                  \
          os << "  " << std::setw(4) << std::setfill('0') << (pc - pc_base)      \
@@ -202,14 +187,26 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
              }
              PROGRAM_NEXT_INSTRUCTION
  
-            PROGRAM_CASE(CHECK_ONLY_EOD) {
-                os << "    fail_jump +" << ri->fail_jump << endl;
+            PROGRAM_CASE(CHECK_LIT_MASK) {
+                os << "    and_mask "
+                   << dumpStrMask(ri->and_mask.a8, sizeof(ri->and_mask.a8))
+                   << endl;
+                os << "    cmp_mask "
+                   << dumpStrMask(ri->cmp_mask.a8, sizeof(ri->cmp_mask.a8))
+                   << endl;
              }
              PROGRAM_NEXT_INSTRUCTION
  
-            PROGRAM_CASE(CHECK_BOUNDS) {
-                os << "    min_bound " << ri->min_bound << endl;
-                os << "    max_bound " << ri->max_bound << endl;
+            PROGRAM_CASE(CHECK_LIT_EARLY) {}
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_GROUPS) {
+                os << "    groups 0x" << std::hex << ri->groups << std::dec
+                   << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_ONLY_EOD) {
                  os << "    fail_jump +" << ri->fail_jump << endl;
              }
              PROGRAM_NEXT_INSTRUCTION
@@ -236,6 +233,12 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
              }
              PROGRAM_NEXT_INSTRUCTION
  
+            PROGRAM_CASE(PUSH_DELAYED) {
+                os << "    delay " << u32{ri->delay} << endl;
+                os << "    index " << ri->index << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
              PROGRAM_CASE(SOM_ADJUST) {
                  os << "    distance " << ri->distance << endl;
              }
@@ -301,6 +304,18 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
              }
              PROGRAM_NEXT_INSTRUCTION
  
+            PROGRAM_CASE(SQUASH_GROUPS) {
+                os << "    groups 0x" << std::hex << ri->groups << std::dec
+                   << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_STATE) {
+                os << "    index " << ri->index << endl;
+                os << "    fail_jump +" << ri->fail_jump << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
              PROGRAM_CASE(SPARSE_ITER_BEGIN) {
                  os << "    iter_offset " << ri->iter_offset << endl;
                  os << "    jump_table " << ri->jump_table << endl;
@@ -334,21 +349,32 @@ static
  void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) {
      ofstream os(filename);
  
-    const RoseLiteral *lits = getLiteralTable(t);
-    const char *base = (const char *)t;
+    const u32 *litPrograms =
+        (const u32 *)loadFromByteCodeOffset(t, t->litProgramOffset);
+    const u32 *delayRebuildPrograms =
+        (const u32 *)loadFromByteCodeOffset(t, t->litDelayRebuildProgramOffset);
  
      for (u32 i = 0; i < t->literalCount; i++) {
-        const RoseLiteral *lit = &lits[i];
          os << "Literal " << i << endl;
          os << "---------------" << endl;
  
-        if (lit->programOffset) {
-            os << "Program @ " << lit->programOffset << ":" << endl;
-            dumpProgram(os, t, base + lit->programOffset);
+        if (litPrograms[i]) {
+            os << "Program @ " << litPrograms[i] << ":" << endl;
+            const char *prog =
+                (const char *)loadFromByteCodeOffset(t, litPrograms[i]);
+            dumpProgram(os, t, prog);
          } else {
              os << "<No Program>" << endl;
          }
  
+        if (delayRebuildPrograms[i]) {
+            os << "Delay Rebuild Program @ " << delayRebuildPrograms[i] << ":"
+               << endl;
+            const char *prog = (const char *)loadFromByteCodeOffset(
+                t, delayRebuildPrograms[i]);
+            dumpProgram(os, t, prog);
+        }
+
          os << endl;
      }
  
@@ -710,8 +736,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
              etable ? hwlmSize(etable) : 0, t->ematcherRegionSize);
      fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n",
              sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance);
-    fprintf(f, " - literal table     : %zu bytes\n",
-            t->literalCount * sizeof(RoseLiteral));
      fprintf(f, " - role state table  : %zu bytes\n",
              t->rolesWithStateCount * sizeof(u32));
      fprintf(f, " - nfa info table    : %u bytes\n",
@@ -745,22 +769,9 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
      fprintf(f, "handled key count    : %u\n", t->handledKeyCount);
      fprintf(f, "\n");
  
-    fprintf(f, "number of literals   : %u\n", t->totalNumLiterals);
-    fprintf(f, " - delayed           : %u\n", t->delay_count);
-    fprintf(f, " - direct report     : %u\n",
-            literalsWithDirectReports(t));
-    fprintf(f, " - that squash group : %zu\n",
-            literalsWithPredicate(
-                t, [](const RoseLiteral &l) { return l.squashesGroup != 0; }));
-    fprintf(f, " - with benefits     : %u\n", t->nonbenefits_base_id);
-    fprintf(f, " - with program      : %zu\n",
-            literalsWithPredicate(
-                t, [](const RoseLiteral &l) { return l.programOffset != 0; }));
-    fprintf(f, " - in groups ::\n");
-    fprintf(f, "   + weak            : %zu\n",
-            literalsInGroups(t, 0, t->group_weak_end));
-    fprintf(f, "   + general         : %zu\n",
-            literalsInGroups(t, t->group_weak_end, sizeof(u64a) * 8));
+    fprintf(f, "total literal count  : %u\n", t->totalNumLiterals);
+    fprintf(f, "  prog table size    : %u\n", t->literalCount);
+    fprintf(f, "  delayed literals   : %u\n", t->delay_count);
  
      fprintf(f, "\n");
      fprintf(f, "  minWidth                    : %u\n", t->minWidth);
@@ -839,7 +850,8 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
      DUMP_U32(t, fmatcherMaxBiAnchoredWidth);
      DUMP_U32(t, intReportOffset);
      DUMP_U32(t, intReportCount);
-    DUMP_U32(t, literalOffset);
+    DUMP_U32(t, litProgramOffset);
+    DUMP_U32(t, litDelayRebuildProgramOffset);
      DUMP_U32(t, literalCount);
      DUMP_U32(t, multidirectOffset);
      DUMP_U32(t, activeArrayCount);
@@ -876,7 +888,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
      DUMP_U32(t, delay_base_id);
      DUMP_U32(t, anchored_count);
      DUMP_U32(t, anchored_base_id);
-    DUMP_U32(t, nonbenefits_base_id);
      DUMP_U32(t, maxFloatingDelayedMatch);
      DUMP_U32(t, delayRebuildLength);
      DUMP_U32(t, stateOffsets.history);
@@ -905,7 +916,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
      DUMP_U32(t, rosePrefixCount);
      DUMP_U32(t, activeLeftIterOffset);
      DUMP_U32(t, ematcherRegionSize);
-    DUMP_U32(t, literalBenefitsOffsets);
      DUMP_U32(t, somRevCount);
      DUMP_U32(t, somRevOffsetOffset);
      DUMP_U32(t, group_weak_end);
diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h

index 92a67ae14835457f2e3fd3b5912738c801d5cd35..c9025600833d7f6e4bbdca631a5cc2e607667e47 100644 (file)
--- a/src/rose/rose_internal.h
+++ b/src/rose/rose_internal.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -73,43 +73,11 @@ ReportID literalToReport(u32 id) {
      return id & ~LITERAL_DR_FLAG;
  }
  
-/** \brief Structure representing a literal. */
-struct RoseLiteral {
-    /**
-     * \brief Program to run when this literal is seen.
-     *
-     * Offset is relative to RoseEngine, or zero for no program.
-     */
-    u32 programOffset;
-
-    /** \brief Bitset of groups that cause this literal to fire. */
-    rose_group groups;
-
-    /**
-     * \brief True if this literal switches off its group behind it when it
-     * sets a role.
-     */
-    u8 squashesGroup;
-
-    /**
-     * \brief Bitset which indicates that the literal inserts a delayed
-     * match at the given offset.
-     */
-    u32 delay_mask;
-
-    /** \brief Offset to array of ids to poke in the delay structure. */
-    u32 delayIdsOffset;
-};
-
  /* Allocation of Rose literal ids
   *
   * The rose literal id space is segmented:
   *
   * ---- 0
- * |  | Normal undelayed literals in the e, or f tables which require a
- * |  | manual benefits confirm on match [a table never requires benefits]
- * |  |
- * ---- nonbenefits_base_id
   * |  | 'Normal' undelayed literals in either e or f tables
   * |  |
   * |  |
@@ -127,7 +95,7 @@ struct RoseLiteral {
   * ---- LITERAL_DR_FLAG
   * |  | Direct Report literals: immediately raise an internal report with id
   * |  | given by (lit_id & ~LITERAL_DR_FLAG). Raised by a or f tables (or e??).
- * |  | No RoseLiteral structure
+ * |  | No literal programs.
   * |  |
   * |  |
   * ----
@@ -135,14 +103,15 @@ struct RoseLiteral {
  
  /* Rose Literal Sources
   *
- * Rose currently gets events (mainly roseProcessMatch calls) from 8 sources:
+ * Rose currently gets events (mainly roseProcessMatch calls) from a number of
+ * sources:
   * 1) The floating table
   * 2) The anchored table
   * 3) Delayed literals
- * 4) suffixes NFAs
- * 5) masksv2 (literals with benefits)
- * 6) End anchored table
- * 7) prefix / infix nfas
+ * 4) Suffix NFAs
+ * 5) Literal masks
+ * 5) End anchored table
+ * 6) Prefix / Infix nfas
   *
   * Care is required to ensure that events appear to come into Rose in order
   * (or sufficiently ordered for Rose to cope). Generally the progress of the
@@ -165,7 +134,7 @@ struct RoseLiteral {
   * NFA queues are run to the current point (floating or delayed literal) as
   * appropriate.
   *
- * Maskv2:
+ * Literal Masks:
   * These are triggered from either floating literals or delayed literals and
   * inspect the data behind them. Matches are raised at the same location as the
   * trigger literal so there are no ordering issues. Masks are always pure
@@ -301,12 +270,12 @@ struct RoseStateOffsets {
  };
  
  struct RoseBoundaryReports {
-    u32 reportEodOffset; /**< 0 if no reports lits, otherwise offset of
+    u32 reportEodOffset; /**< 0 if no reports list, otherwise offset of
                            * MO_INVALID_IDX terminated list to report at EOD */
-    u32 reportZeroOffset; /**< 0 if no reports lits, otherwise offset of
+    u32 reportZeroOffset; /**< 0 if no reports list, otherwise offset of
                             * MO_INVALID_IDX terminated list to report at offset
                             * 0 */
-    u32 reportZeroEodOffset; /**< 0 if no reports lits, otherwise offset of
+    u32 reportZeroEodOffset; /**< 0 if no reports list, otherwise offset of
                                * MO_INVALID_IDX terminated list to report if eod
                                * is at offset 0. Superset of other lists. */
  };
@@ -338,18 +307,20 @@ struct RoseBoundaryReports {
  #define ROSE_RUNTIME_PURE_LITERAL  1
  #define ROSE_RUNTIME_SINGLE_OUTFIX 2
  
-// Runtime structure header for Rose.
-// In memory, we follow this with:
-//   1a. anchored 'literal' matcher table
-//   1b. floating literal matcher table
-//   1c. eod-anchored literal matcher table
-//   1d. small block table
-//   2. array of RoseLiteral (literalCount entries)
-//   8. array of NFA offsets, one per queue
-//   9. array of state offsets, one per queue (+)
-//  10. array of role ids for the set of all root roles
-//  12. multi-direct report array
-/*
+/**
+ * \brief Runtime structure header for Rose.
+ *
+ * Runtime structure header for Rose.
+ * In memory, we follow this with:
+ *   -# the "engine blob"
+ *   -# anchored 'literal' matcher table
+ *   -# floating literal matcher table
+ *   -# eod-anchored literal matcher table
+ *   -# small block table
+ *   -# array of NFA offsets, one per queue
+ *   -# array of state offsets, one per queue (+)
+ *   -# multi-direct report array
+ *
   *  (+) stateOffset array note: Offsets in the array are either into the stream
   *  state (normal case) or into the tstate region of scratch (for transient rose
   *  nfas). Rose nfa info table can distinguish the cases.
@@ -407,8 +378,22 @@ struct RoseEngine {
                                       * with the anchored table. */
      u32 intReportOffset; /**< offset of array of internal_report structures */
      u32 intReportCount; /**< number of internal_report structures */
-    u32 literalOffset; // offset of RoseLiteral array (bytes)
-    u32 literalCount; // number of RoseLiteral entries [NOT number of literals]
+
+    /** \brief Offset of u32 array of program offsets for literals. */
+    u32 litProgramOffset;
+
+    /** \brief Offset of u32 array of delay rebuild program offsets for
+     * literals. */
+    u32 litDelayRebuildProgramOffset;
+
+    /**
+     * \brief Number of entries in the arrays pointed to by litProgramOffset,
+     * litDelayRebuildProgramOffset.
+     *
+     * Note: NOT the total number of literals.
+     */
+    u32 literalCount;
+
      u32 multidirectOffset; /**< offset of multi-direct report list. */
      u32 activeArrayCount; //number of nfas tracked in the active array
      u32 activeLeftCount; //number of nfas tracked in the active rose array
@@ -468,8 +453,6 @@ struct RoseEngine {
      u32 anchored_count; /* number of anchored literal ids */
      u32 anchored_base_id; /* literal id of the first literal in the A table.
                             * anchored literal ids are contiguous */
-    u32 nonbenefits_base_id; /* first literal id without benefit conf.
-                              * contiguous, blah, blah */
      u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can
                                    * usefully be reported */
      u32 delayRebuildLength; /* length of the history region which needs to be
@@ -486,8 +469,6 @@ struct RoseEngine {
      u32 rosePrefixCount; /* number of rose prefixes */
      u32 activeLeftIterOffset; /* mmbit_sparse_iter over non-transient roses */
      u32 ematcherRegionSize; /* max region size to pass to ematcher */
-    u32 literalBenefitsOffsets; /* offset to array of benefits indexed by lit
-                                   id */
      u32 somRevCount; /**< number of som reverse nfas */
      u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */
      u32 group_weak_end; /* end of weak groups, debugging only */
@@ -496,17 +477,6 @@ struct RoseEngine {
      struct scatter_full_plan state_init;
  };
  
-struct lit_benefits {
-    union {
-        u64a a64[MAX_MASK2_WIDTH/sizeof(u64a)];
-        u8 a8[MAX_MASK2_WIDTH];
-    } and_mask;
-    union {
-        u64a e64[MAX_MASK2_WIDTH/sizeof(u64a)];
-        u8 e8[MAX_MASK2_WIDTH];
-    } expected;
-};
-
  #if defined(_WIN32)
  #pragma pack(push, 1)
  #endif
@@ -574,14 +544,6 @@ const void *getSBLiteralMatcher(const struct RoseEngine *t) {
      return matcher;
  }
  
-static really_inline
-const struct RoseLiteral *getLiteralTable(const struct RoseEngine *t) {
-    const struct RoseLiteral *tl
-        = (const struct RoseLiteral *)((const char *)t + t->literalOffset);
-    assert(ISALIGNED_N(tl, 4));
-    return tl;
-}
-
  static really_inline
  const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) {
      const struct LeftNfaInfo *r
@@ -601,13 +563,6 @@ const struct mmbit_sparse_iter *getActiveLeftIter(const struct RoseEngine *t) {
      return it;
  }
  
-static really_inline
-const struct lit_benefits *getLiteralBenefitsTable(
-                                              const struct RoseEngine *t) {
-    return (const struct lit_benefits *)
-        ((const char *)t + t->literalBenefitsOffsets);
-}
-
  static really_inline
  const struct NfaInfo *getNfaInfoByQueue(const struct RoseEngine *t, u32 qi) {
      const struct NfaInfo *infos
diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h

index 3f59ba15ad6d2690e5758f20d8672152be125e1e..37017ca0a2cbe6e4f9329083984681a4e1ce4952 100644 (file)
--- a/src/rose/rose_program.h
+++ b/src/rose/rose_program.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -42,11 +42,15 @@
  /** \brief Role program instruction opcodes. */
  enum RoseInstructionCode {
      ROSE_INSTR_ANCHORED_DELAY,    //!< Delay until after anchored matcher.
+    ROSE_INSTR_CHECK_LIT_MASK,    //!< Check and/cmp mask.
+    ROSE_INSTR_CHECK_LIT_EARLY,   //!< Skip matches before floating min offset.
+    ROSE_INSTR_CHECK_GROUPS,      //!< Check that literal groups are on.
      ROSE_INSTR_CHECK_ONLY_EOD,    //!< Role matches only at EOD.
      ROSE_INSTR_CHECK_BOUNDS,      //!< Bounds on distance from offset 0.
      ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled".
      ROSE_INSTR_CHECK_LOOKAROUND,  //!< Lookaround check.
      ROSE_INSTR_CHECK_LEFTFIX,     //!< Leftfix must be in accept state.
+    ROSE_INSTR_PUSH_DELAYED,      //!< Push delayed literal matches.
      ROSE_INSTR_SOM_ADJUST,        //!< Set SOM from a distance to EOM.
      ROSE_INSTR_SOM_LEFTFIX,       //!< Acquire SOM from a leftfix engine.
      ROSE_INSTR_TRIGGER_INFIX,     //!< Trigger an infix engine.
@@ -59,6 +63,8 @@ enum RoseInstructionCode {
      ROSE_INSTR_REPORT_SOM_KNOWN,  //!< Rose role knows its SOM offset.
      ROSE_INSTR_SET_STATE,         //!< Switch a state index on.
      ROSE_INSTR_SET_GROUPS,        //!< Set some literal group bits.
+    ROSE_INSTR_SQUASH_GROUPS,     //!< Conditionally turn off some groups.
+    ROSE_INSTR_CHECK_STATE,       //!< Test a single bit in the state multibit.
      ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states.
      ROSE_INSTR_SPARSE_ITER_NEXT,  //!< Continue running sparse iter over states.
      ROSE_INSTR_END                //!< End of program.
@@ -70,6 +76,29 @@ struct ROSE_STRUCT_ANCHORED_DELAY {
      u32 done_jump; //!< Jump forward this many bytes if successful.
  };
  
+union RoseLiteralMask {
+    u64a a64[MAX_MASK2_WIDTH / sizeof(u64a)];
+    u8 a8[MAX_MASK2_WIDTH];
+};
+
+/** Note: check failure will halt program. */
+struct ROSE_STRUCT_CHECK_LIT_MASK {
+    u8 code; //!< From enum RoseInstructionCode.
+    union RoseLiteralMask and_mask;
+    union RoseLiteralMask cmp_mask;
+};
+
+/** Note: check failure will halt program. */
+struct ROSE_STRUCT_CHECK_LIT_EARLY {
+    u8 code; //!< From enum RoseInstructionCode.
+};
+
+/** Note: check failure will halt program. */
+struct ROSE_STRUCT_CHECK_GROUPS {
+    u8 code; //!< From enum RoseInstructionCode.
+    rose_group groups; //!< Bitmask.
+};
+
  struct ROSE_STRUCT_CHECK_ONLY_EOD {
      u8 code; //!< From enum RoseInstructionCode.
      u32 fail_jump; //!< Jump forward this many bytes on failure.
@@ -103,6 +132,12 @@ struct ROSE_STRUCT_CHECK_LEFTFIX {
      u32 fail_jump; //!< Jump forward this many bytes on failure.
  };
  
+struct ROSE_STRUCT_PUSH_DELAYED {
+    u8 code; //!< From enum RoseInstructionCode.
+    u8 delay; // Number of bytes to delay.
+    u32 index; // Delay literal index (relative to first delay lit).
+};
+
  struct ROSE_STRUCT_SOM_ADJUST {
      u8 code; //!< From enum RoseInstructionCode.
      u32 distance; //!< Distance to EOM.
@@ -164,7 +199,18 @@ struct ROSE_STRUCT_SET_STATE {
  
  struct ROSE_STRUCT_SET_GROUPS {
      u8 code; //!< From enum RoseInstructionCode.
-    rose_group groups; //!< Bitmask.
+    rose_group groups; //!< Bitmask to OR into groups.
+};
+
+struct ROSE_STRUCT_SQUASH_GROUPS {
+    u8 code; //!< From enum RoseInstructionCode.
+    rose_group groups; //!< Bitmask to AND into groups.
+};
+
+struct ROSE_STRUCT_CHECK_STATE {
+    u8 code; //!< From enum RoseInstructionCode.
+    u32 index; //!< State index in the role multibit.
+    u32 fail_jump; //!< Jump forward this many bytes on failure.
  };
  
  /**
author	Justin Viiret <justin.viiret@intel.com>
	Fri, 18 Dec 2015 04:24:52 +0000 (15:24 +1100)
committer	Matthew Barr <matthew.barr@intel.com>
	Tue, 1 Mar 2016 00:23:56 +0000 (11:23 +1100)
src/rose/eod.c		patch \| blob \| blame \| history
src/rose/match.c		patch \| blob \| blame \| history
src/rose/program_runtime.h		patch \| blob \| blame \| history
src/rose/rose_build_bytecode.cpp		patch \| blob \| blame \| history
src/rose/rose_build_compile.cpp		patch \| blob \| blame \| history
src/rose/rose_build_impl.h		patch \| blob \| blame \| history
src/rose/rose_build_misc.cpp		patch \| blob \| blame \| history
src/rose/rose_build_util.h		patch \| blob \| blame \| history
src/rose/rose_dump.cpp		patch \| blob \| blame \| history
src/rose/rose_internal.h		patch \| blob \| blame \| history
src/rose/rose_program.h		patch \| blob \| blame \| history