]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
rose: shift program construction functions to rose_build_program
authorAlex Coyte <a.coyte@intel.com>
Wed, 26 Apr 2017 03:45:31 +0000 (13:45 +1000)
committerMatthew Barr <matthew.barr@intel.com>
Tue, 30 May 2017 03:58:32 +0000 (13:58 +1000)
CMakeLists.txt
src/rose/rose_build_bytecode.cpp
src/rose/rose_build_impl.h
src/rose/rose_build_misc.cpp
src/rose/rose_build_program.cpp
src/rose/rose_build_program.h
src/rose/rose_build_resources.h [new file with mode: 0644]

index 650bcf20e551de78d90fff117b1b3777cb88be0d..bc42c659406c2bb329de76191a3b402e35711de2 100644 (file)
@@ -971,6 +971,7 @@ SET (hs_SRCS
     src/rose/rose_build_misc.cpp
     src/rose/rose_build_program.cpp
     src/rose/rose_build_program.h
+    src/rose/rose_build_resources.h
     src/rose/rose_build_role_aliasing.cpp
     src/rose/rose_build_scatter.cpp
     src/rose/rose_build_scatter.h
index 949275583069115f21ec877375c662d6615b7b34..636af0a6a47225fd331f8a020fd8a51f3c6da388 100644 (file)
 #include "rose_build_exclusive.h"
 #include "rose_build_groups.h"
 #include "rose_build_infix.h"
-#include "rose_build_instructions.h"
 #include "rose_build_long_lit.h"
 #include "rose_build_lookaround.h"
 #include "rose_build_matchers.h"
 #include "rose_build_program.h"
+#include "rose_build_resources.h"
 #include "rose_build_scatter.h"
 #include "rose_build_util.h"
 #include "rose_build_width.h"
@@ -82,7 +82,6 @@
 #include "util/compile_context.h"
 #include "util/compile_error.h"
 #include "util/container.h"
-#include "util/dump_charclass.h"
 #include "util/fatbit_build.h"
 #include "util/graph_range.h"
 #include "util/make_unique.h"
@@ -133,56 +132,6 @@ namespace ue2 {
 
 namespace /* anon */ {
 
-static constexpr u32 INVALID_QUEUE = ~0U;
-
-struct left_build_info {
-    // Constructor for an engine implementation.
-    left_build_info(u32 q, u32 l, u32 t, rose_group sm,
-                    const std::vector<u8> &stops, u32 max_ql, u8 cm_count,
-                    const CharReach &cm_cr)
-        : queue(q), lag(l), transient(t), squash_mask(sm), stopAlphabet(stops),
-          max_queuelen(max_ql), countingMiracleCount(cm_count),
-          countingMiracleReach(cm_cr) {}
-
-    // Constructor for a lookaround implementation.
-    explicit left_build_info(const vector<vector<LookEntry>> &looks)
-        : has_lookaround(true), lookaround(looks) {}
-
-    u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */
-    u32 lag = 0;
-    u32 transient = 0;
-    rose_group squash_mask = ~rose_group{0};
-    vector<u8> stopAlphabet;
-    u32 max_queuelen = 0;
-    u8 countingMiracleCount = 0;
-    CharReach countingMiracleReach;
-    u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */
-    /* leftfix can be completely implemented with lookaround */
-    bool has_lookaround = false;
-    vector<vector<LookEntry>> lookaround; // alternative implementation to the NFA
-};
-
-/**
- * \brief Structure tracking which resources are used by this Rose instance at
- * runtime.
- *
- * We use this to control how much initialisation we need to do at the
- * beginning of a stream/block at runtime.
- */
-struct RoseResources {
-    bool has_outfixes = false;
-    bool has_suffixes = false;
-    bool has_leftfixes = false;
-    bool has_literals = false;
-    bool has_states = false;
-    bool checks_groups = false;
-    bool has_lit_delay = false;
-    bool has_lit_check = false; // long literal support
-    bool has_anchored = false;
-    bool has_floating = false;
-    bool has_eod = false;
-};
-
 struct build_context : noncopyable {
     /** \brief information about engines to the left of a vertex */
     map<RoseVertex, left_build_info> leftfix_info;
@@ -190,27 +139,15 @@ struct build_context : noncopyable {
     /** \brief mapping from suffix to queue index. */
     map<suffix_id, u32> suffixes;
 
+    /** \brief engine info by queue. */
+    map<u32, engine_info> engine_info_by_queue;
+
     /** \brief Simple cache of programs written to engine blob, used for
      * deduplication. */
     ue2::unordered_map<RoseProgram, u32, RoseProgramHash,
                        RoseProgramEquivalence> program_cache;
 
-    /** \brief LookEntry list cache, so that we can reuse the look index and
-     * reach index for the same lookaround. */
-    ue2::unordered_map<vector<vector<LookEntry>>,
-                       pair<size_t, size_t>> lookaround_cache;
-
-    /** \brief Lookaround table for Rose roles. */
-    vector<vector<vector<LookEntry>>> lookaround;
-
-    /** \brief Lookaround look table size. */
-    size_t lookTableSize = 0;
-
-    /** \brief Lookaround reach table size.
-     * since single path lookaround and multi-path lookaround have different
-     * bitvectors range (32 and 256), we need to maintain both look table size
-     * and reach table size. */
-    size_t reachTableSize = 0;
+    lookaround_info lookarounds;
 
     /** \brief State indices, for those roles that have them.
      * Each vertex present has a unique state index in the range
@@ -225,17 +162,10 @@ struct build_context : noncopyable {
      * that need hash table support. */
     vector<ue2_case_string> longLiterals;
 
-    /** \brief Long literal length threshold, used in streaming mode. */
-    size_t longLitLengthThreshold = 0;
-
     /** \brief Contents of the Rose bytecode immediately following the
      * RoseEngine. */
     RoseEngineBlob engine_blob;
 
-    /** \brief True if reports need CATCH_UP instructions to catch up suffixes,
-     * outfixes etc. */
-    bool needs_catchup;
-
     /** \brief True if this Rose engine has an MPV engine. */
     bool needs_mpv_catchup = false;
 
@@ -243,34 +173,6 @@ struct build_context : noncopyable {
     RoseResources resources;
 };
 
-/** \brief Data only used during construction of various programs (literal,
- * anchored, delay, etc). */
-struct ProgramBuild : noncopyable {
-    explicit ProgramBuild(u32 fMinLitOffset)
-        : floatingMinLiteralMatchOffset(fMinLitOffset) {
-    }
-
-    /** \brief Minimum offset of a match from the floating table. */
-    const u32 floatingMinLiteralMatchOffset;
-
-    /** \brief Mapping from vertex to key, for vertices with a
-     * CHECK_NOT_HANDLED instruction. */
-    ue2::unordered_map<RoseVertex, u32> handledKeys;
-
-    /** \brief Mapping from Rose literal ID to anchored program index. */
-    map<u32, u32> anchored_programs;
-
-    /** \brief Mapping from Rose literal ID to delayed program index. */
-    map<u32, u32> delay_programs;
-
-    /** \brief Mapping from every vertex to the groups that must be on for that
-     * vertex to be reached. */
-    ue2::unordered_map<RoseVertex, rose_group> vertex_group_map;
-
-    /** \brief Global bitmap of groups that can be squashed. */
-    rose_group squashable_groups = 0;
-};
-
 /** \brief subengine info including built engine and
 * corresponding triggering rose vertices */
 struct ExclusiveSubengine {
@@ -291,18 +193,7 @@ struct ExclusiveInfo : noncopyable {
 }
 
 static
-const NFA *get_nfa_from_blob(const build_context &bc, u32 qi) {
-    assert(contains(bc.engineOffsets, qi));
-    u32 nfa_offset = bc.engineOffsets.at(qi);
-    assert(nfa_offset >= bc.engine_blob.base_offset);
-    const NFA *n = (const NFA *)(bc.engine_blob.data() + nfa_offset -
-                                 bc.engine_blob.base_offset);
-    assert(n->queueIndex == qi);
-    return n;
-}
-
-static
-const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) {
+void add_nfa_to_blob(build_context &bc, NFA &nfa) {
     u32 qi = nfa.queueIndex;
     u32 nfa_offset = bc.engine_blob.add(nfa, nfa.length);
     DEBUG_PRINTF("added nfa qi=%u, type=%u, length=%u at offset=%u\n", qi,
@@ -310,10 +201,6 @@ const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) {
 
     assert(!contains(bc.engineOffsets, qi));
     bc.engineOffsets.emplace(qi, nfa_offset);
-
-    const NFA *n = get_nfa_from_blob(bc, qi);
-    assert(memcmp(&nfa, n, nfa.length) == 0);
-    return n;
 }
 
 static
@@ -401,8 +288,8 @@ bool isPureFloating(const RoseResources &resources, const CompileContext &cc) {
     }
 
     if (cc.streaming && resources.has_lit_check) {
-        DEBUG_PRINTF("has long literals in streaming mode, which needs "
-                     "long literal table support\n");
+        DEBUG_PRINTF("has long literals in streaming mode, which needs long "
+                     "literal table support\n");
         return false;
     }
 
@@ -719,8 +606,7 @@ buildRepeatEngine(const CastleProto &proto,
 
 static
 bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, bool is_transient,
-                               const CompileContext &cc,
-                               const ReportManager &rm) {
+                         const CompileContext &cc, const ReportManager &rm) {
     // Unleash the Sheng!!
     auto dfa = shengCompile(rdfa, cc, rm, false);
     if (!dfa && !is_transient) {
@@ -1155,6 +1041,31 @@ left_id updateLeftfixWithEager(RoseGraph &g, const eager_info &ei,
     return leftfix;
 }
 
+static
+void enforceEngineSizeLimit(const NFA *n, const Grey &grey) {
+    const size_t nfa_size = n->length;
+    // Global limit.
+    if (nfa_size > grey.limitEngineSize) {
+        throw ResourceLimitError();
+    }
+
+    // Type-specific limit checks follow.
+
+    if (isDfaType(n->type)) {
+        if (nfa_size > grey.limitDFASize) {
+            throw ResourceLimitError();
+        }
+    } else if (isNfaType(n->type)) {
+        if (nfa_size > grey.limitNFASize) {
+            throw ResourceLimitError();
+        }
+    } else if (isLbrType(n->type)) {
+        if (nfa_size > grey.limitLBRSize) {
+            throw ResourceLimitError();
+        }
+    }
+}
+
 static
 bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi,
                   const map<left_id, set<PredTopPair> > &infixTriggers,
@@ -1193,6 +1104,9 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi,
     setLeftNfaProperties(*nfa, leftfix);
 
     nfa->queueIndex = qi;
+    enforceEngineSizeLimit(nfa.get(), cc.grey);
+    bc.engine_info_by_queue.emplace(nfa->queueIndex,
+                                    engine_info(nfa.get(), is_transient));
 
     if (!prefix && !leftfix.haig() && leftfix.graph()
         && nfaStuckOn(*leftfix.graph())) {
@@ -1290,12 +1204,10 @@ void updateTops(const RoseGraph &g, const TamaInfo &tamaInfo,
     for (const auto &n : tamaInfo.subengines) {
         for (const auto &v : subengines[i].vertices) {
             if (is_suffix) {
-                tamaProto.add(n, g[v].index, g[v].suffix.top,
-                              out_top_remap);
+                tamaProto.add(n, g[v].index, g[v].suffix.top, out_top_remap);
             } else {
                 for (const auto &e : in_edges_range(v, g)) {
-                    tamaProto.add(n, g[v].index, g[e].rose_top,
-                                  out_top_remap);
+                    tamaProto.add(n, g[v].index, g[e].rose_top, out_top_remap);
                 }
             }
         }
@@ -1308,32 +1220,34 @@ shared_ptr<TamaProto> constructContainerEngine(const RoseGraph &g,
                                                build_context &bc,
                                                const ExclusiveInfo &info,
                                                const u32 queue,
-                                               const bool is_suffix) {
+                                               const bool is_suffix,
+                                               const Grey &grey) {
     const auto &subengines = info.subengines;
-    auto tamaInfo =
-        constructTamaInfo(g, subengines, is_suffix);
+    auto tamaInfo = constructTamaInfo(g, subengines, is_suffix);
 
     map<pair<const NFA *, u32>, u32> out_top_remap;
     auto n = buildTamarama(*tamaInfo, queue, out_top_remap);
+    enforceEngineSizeLimit(n.get(), grey);
+    bc.engine_info_by_queue.emplace(n->queueIndex, engine_info(n.get(), false));
     add_nfa_to_blob(bc, *n);
 
     DEBUG_PRINTF("queue id:%u\n", queue);
     shared_ptr<TamaProto> tamaProto = make_shared<TamaProto>();
     tamaProto->reports = info.reports;
-    updateTops(g, *tamaInfo, *tamaProto, subengines,
-               out_top_remap, is_suffix);
+    updateTops(g, *tamaInfo, *tamaProto, subengines, out_top_remap, is_suffix);
     return tamaProto;
 }
 
 static
 void buildInfixContainer(RoseGraph &g, build_context &bc,
-                         const vector<ExclusiveInfo> &exclusive_info) {
+                         const vector<ExclusiveInfo> &exclusive_info,
+                         const Grey &grey) {
     // Build tamarama engine
     for (const auto &info : exclusive_info) {
         const u32 queue = info.queue;
         const auto &subengines = info.subengines;
         auto tamaProto =
-            constructContainerEngine(g, bc, info, queue, false);
+            constructContainerEngine(g, bc, info, queue, false, grey);
 
         for (const auto &sub : subengines) {
             const auto &verts = sub.vertices;
@@ -1347,13 +1261,14 @@ void buildInfixContainer(RoseGraph &g, build_context &bc,
 
 static
 void buildSuffixContainer(RoseGraph &g, build_context &bc,
-                          const vector<ExclusiveInfo> &exclusive_info) {
+                          const vector<ExclusiveInfo> &exclusive_info,
+                          const Grey &grey) {
     // Build tamarama engine
     for (const auto &info : exclusive_info) {
         const u32 queue = info.queue;
         const auto &subengines = info.subengines;
-        auto tamaProto =
-            constructContainerEngine(g, bc, info, queue, true);
+        auto tamaProto = constructContainerEngine(g, bc, info, queue, true,
+                                                  grey);
         for (const auto &sub : subengines) {
             const auto &verts = sub.vertices;
             for (const auto &v : verts) {
@@ -1488,7 +1403,7 @@ void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc,
     }
     updateExclusiveInfixProperties(build, exclusive_info, bc.leftfix_info,
                                    no_retrigger_queues);
-    buildInfixContainer(g, bc, exclusive_info);
+    buildInfixContainer(g, bc, exclusive_info, build.cc.grey);
 }
 
 static
@@ -1560,8 +1475,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
     findInfixTriggers(tbi, &infixTriggers);
 
     if (cc.grey.allowTamarama && cc.streaming && !do_prefix) {
-        findExclusiveInfixes(tbi, bc, qif, infixTriggers,
-                             no_retrigger_queues);
+        findExclusiveInfixes(tbi, bc, qif, infixTriggers, no_retrigger_queues);
     }
 
     for (auto v : vertices_range(g)) {
@@ -1769,6 +1683,9 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired,
 
     u32 qi = mpv_outfix->get_queue(tbi.qif);
     nfa->queueIndex = qi;
+    enforceEngineSizeLimit(nfa.get(), tbi.cc.grey);
+    bc.engine_info_by_queue.emplace(nfa->queueIndex,
+                                    engine_info(nfa.get(), false));
 
     DEBUG_PRINTF("built mpv\n");
 
@@ -1827,6 +1744,9 @@ bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc,
         setOutfixProperties(*n, out);
 
         n->queueIndex = out.get_queue(tbi.qif);
+        enforceEngineSizeLimit(n.get(), tbi.cc.grey);
+        bc.engine_info_by_queue.emplace(n->queueIndex,
+                                        engine_info(n.get(), false));
 
         if (!*historyRequired && requires_decompress_key(*n)) {
             *historyRequired = 1;
@@ -1924,14 +1844,14 @@ void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc,
     }
     updateExclusiveSuffixProperties(build, exclusive_info,
                                     no_retrigger_queues);
-    buildSuffixContainer(g, bc, exclusive_info);
+    buildSuffixContainer(g, bc, exclusive_info, build.cc.grey);
 }
 
 static
 void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc,
-                  QueueIndexFactory &qif,
-                  map<suffix_id, set<PredTopPair>> &suffixTriggers,
-                  set<u32> *no_retrigger_queues) {
+                           QueueIndexFactory &qif,
+                           map<suffix_id, set<PredTopPair>> &suffixTriggers,
+                           set<u32> *no_retrigger_queues) {
     const RoseGraph &g = tbi.g;
 
     map<suffix_id, u32> suffixes;
@@ -2021,6 +1941,10 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc,
         setSuffixProperties(*n, s, tbi.rm);
 
         n->queueIndex = queue;
+        enforceEngineSizeLimit(n.get(), tbi.cc.grey);
+        bc.engine_info_by_queue.emplace(n->queueIndex,
+                                        engine_info(n.get(), false));
+
         if (s.graph() && nfaStuckOn(*s.graph())) { /* todo: have corresponding
                                                     * haig analysis */
             assert(!s.haig());
@@ -2114,44 +2038,28 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif,
 }
 
 static
-void allocateStateSpace(const NFA *nfa, NfaInfo &nfa_info, bool is_transient,
+void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info,
                         RoseStateOffsets *so, u32 *scratchStateSize,
                         u32 *streamStateSize, u32 *transientStateSize) {
     u32 state_offset;
-    if (is_transient) {
+    if (eng_info.transient) {
         // Transient engines do not use stream state, but must have room in
         // transient state (stored in scratch).
         state_offset = *transientStateSize;
-        *transientStateSize += nfa->streamStateSize;
+        *transientStateSize += eng_info.stream_size;
     } else {
         // Pack NFA stream state on to the end of the Rose stream state.
         state_offset = so->end;
-        so->end += nfa->streamStateSize;
-        *streamStateSize += nfa->streamStateSize;
+        so->end += eng_info.stream_size;
+        *streamStateSize += eng_info.stream_size;
     }
 
     nfa_info.stateOffset = state_offset;
 
     // Uncompressed state in scratch must be aligned.
-    u32 alignReq = state_alignment(*nfa);
-    assert(alignReq);
-    *scratchStateSize = ROUNDUP_N(*scratchStateSize, alignReq);
+    *scratchStateSize = ROUNDUP_N(*scratchStateSize, eng_info.scratch_align);
     nfa_info.fullStateOffset = *scratchStateSize;
-    *scratchStateSize += nfa->scratchStateSize;
-}
-
-static
-set<u32>
-findTransientQueues(const map<RoseVertex, left_build_info> &leftfix_info) {
-    DEBUG_PRINTF("curating transient queues\n");
-    set<u32> out;
-    for (const auto &left : leftfix_info | map_values) {
-        if (left.transient) {
-            DEBUG_PRINTF("q %u is transient\n", left.queue);
-            out.insert(left.queue);
-        }
-    }
-    return out;
+    *scratchStateSize += eng_info.scratch_size;
 }
 
 static
@@ -2159,7 +2067,6 @@ void updateNfaState(const build_context &bc, vector<NfaInfo> &nfa_infos,
                     RoseStateOffsets *so, u32 *scratchStateSize,
                     u32 *streamStateSize, u32 *transientStateSize) {
     if (nfa_infos.empty()) {
-        assert(bc.engineOffsets.empty());
         return;
     }
 
@@ -2167,14 +2074,10 @@ void updateNfaState(const build_context &bc, vector<NfaInfo> &nfa_infos,
     *transientStateSize = 0;
     *scratchStateSize = 0;
 
-    auto transient_queues = findTransientQueues(bc.leftfix_info);
-
-    for (const auto &m : bc.engineOffsets) {
-        const NFA *nfa = get_nfa_from_blob(bc, m.first);
-        u32 qi = nfa->queueIndex;
-        bool is_transient = contains(transient_queues, qi);
+    for (u32 qi = 0; qi < nfa_infos.size(); qi++) {
         NfaInfo &nfa_info = nfa_infos[qi];
-        allocateStateSpace(nfa, nfa_info, is_transient, so, scratchStateSize,
+        const auto &eng_info = bc.engine_info_by_queue.at(qi);
+        allocateStateSpace(eng_info, nfa_info, so, scratchStateSize,
                            streamStateSize, transientStateSize);
     }
 }
@@ -2267,30 +2170,6 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) {
     return bc.engine_blob.add_iterator(iter);
 }
 
-static
-void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &grey) {
-    // Global limit.
-    if (nfa_size > grey.limitEngineSize) {
-        throw ResourceLimitError();
-    }
-
-    // Type-specific limit checks follow.
-
-    if (isDfaType(n->type)) {
-        if (nfa_size > grey.limitDFASize) {
-            throw ResourceLimitError();
-        }
-    } else if (isNfaType(n->type)) {
-        if (nfa_size > grey.limitNFASize) {
-            throw ResourceLimitError();
-        }
-    } else if (isLbrType(n->type)) {
-        if (nfa_size > grey.limitLBRSize) {
-            throw ResourceLimitError();
-        }
-    }
-}
-
 static
 u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build,
                                 const vector<raw_dfa> &anchored_dfas) {
@@ -2363,8 +2242,8 @@ static
 u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) {
     vector<u32> keys;
     for (u32 qi = 0; qi < activeQueueCount; ++qi) {
-        const NFA *n = get_nfa_from_blob(bc, qi);
-        if (nfaAcceptsEod(n)) {
+        const auto &eng_info = bc.engine_info_by_queue.at(qi);
+        if (eng_info.accepts_eod) {
             DEBUG_PRINTF("nfa qi=%u accepts eod\n", qi);
             keys.push_back(qi);
         }
@@ -2451,61 +2330,7 @@ void addSomRevNfas(build_context &bc, RoseEngine &proto,
 }
 
 static
-void applyFinalSpecialisation(RoseProgram &program) {
-    assert(!program.empty());
-    assert(program.back().code() == ROSE_INSTR_END);
-    if (program.size() < 2) {
-        return;
-    }
-
-    /* Replace the second-to-last instruction (before END) with a one-shot
-     * specialisation if available. */
-    auto it = next(program.rbegin());
-    if (auto *ri = dynamic_cast<const RoseInstrReport *>(it->get())) {
-        DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n");
-        program.replace(it, make_unique<RoseInstrFinalReport>(
-                                ri->onmatch, ri->offset_adjust));
-    }
-}
-
-static
-void recordResources(RoseResources &resources, const RoseProgram &program) {
-    for (const auto &ri : program) {
-        switch (ri->code()) {
-        case ROSE_INSTR_TRIGGER_SUFFIX:
-            resources.has_suffixes = true;
-            break;
-        case ROSE_INSTR_TRIGGER_INFIX:
-        case ROSE_INSTR_CHECK_INFIX:
-        case ROSE_INSTR_CHECK_PREFIX:
-        case ROSE_INSTR_SOM_LEFTFIX:
-            resources.has_leftfixes = true;
-            break;
-        case ROSE_INSTR_SET_STATE:
-        case ROSE_INSTR_CHECK_STATE:
-        case ROSE_INSTR_SPARSE_ITER_BEGIN:
-        case ROSE_INSTR_SPARSE_ITER_NEXT:
-            resources.has_states = true;
-            break;
-        case ROSE_INSTR_CHECK_GROUPS:
-            resources.checks_groups = true;
-            break;
-        case ROSE_INSTR_PUSH_DELAYED:
-            resources.has_lit_delay = true;
-            break;
-        case ROSE_INSTR_CHECK_LONG_LIT:
-        case ROSE_INSTR_CHECK_LONG_LIT_NOCASE:
-            resources.has_lit_check = true;
-            break;
-        default:
-            break;
-        }
-    }
-}
-
-static
-void recordResources(RoseResources &resources,
-                     const RoseBuildImpl &build,
+void recordResources(RoseResources &resources, const RoseBuildImpl &build,
                      const vector<LitFragment> &fragments) {
     if (!build.outfixes.empty()) {
         resources.has_outfixes = true;
@@ -2526,26 +2351,6 @@ void recordResources(RoseResources &resources,
     }
 }
 
-static
-void recordLongLiterals(vector<ue2_case_string> &longLiterals,
-                        const RoseProgram &program) {
-    for (const auto &ri : program) {
-        if (const auto *ri_check =
-                dynamic_cast<const RoseInstrCheckLongLit *>(ri.get())) {
-            DEBUG_PRINTF("found CHECK_LONG_LIT for string '%s'\n",
-                         escapeString(ri_check->literal).c_str());
-            longLiterals.emplace_back(ri_check->literal, false);
-            continue;
-        }
-        if (const auto *ri_check =
-                dynamic_cast<const RoseInstrCheckLongLitNocase *>(ri.get())) {
-            DEBUG_PRINTF("found CHECK_LONG_LIT_NOCASE for string '%s'\n",
-                         escapeString(ri_check->literal).c_str());
-            longLiterals.emplace_back(ri_check->literal, true);
-        }
-    }
-}
-
 static
 u32 writeProgram(build_context &bc, RoseProgram &&program) {
     if (program.empty()) {
@@ -2593,41 +2398,12 @@ u32 writeActiveLeftIter(RoseEngineBlob &engine_blob,
     return engine_blob.add_iterator(iter);
 }
 
-static
-bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) {
-    const auto &g = build.g;
-    const auto v = target(e, g);
-
-    if (!build.g[v].eod_accept) {
-        return false;
-    }
-
-    // If there's a graph between us and EOD, we shouldn't be eager.
-    if (build.g[v].left) {
-        return false;
-    }
-
-    // Must be exactly at EOD.
-    if (g[e].minBound != 0 || g[e].maxBound != 0) {
-        return false;
-    }
-
-    // In streaming mode, we can only eagerly report EOD for literals in the
-    // EOD-anchored table, as that's the only time we actually know where EOD
-    // is. In block mode, we always have this information.
-    const auto u = source(e, g);
-    if (build.cc.streaming && !build.isInETable(u)) {
-        return false;
-    }
-
-    return true;
-}
-
 static
 bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc,
                    u32 outfixEndQueue) {
     for (u32 i = 0; i < outfixEndQueue; i++) {
-        if (nfaAcceptsEod(get_nfa_from_blob(bc, i))) {
+        const auto &eng_info = bc.engine_info_by_queue.at(i);
+        if (eng_info.accepts_eod) {
             DEBUG_PRINTF("outfix has eod\n");
             return true;
         }
@@ -2699,21 +2475,22 @@ void writeMultipathLookaround(const vector<vector<LookEntry>> &multi_look,
 }
 
 static
-void writeLookaroundTables(build_context &bc, RoseEngine &proto) {
-    vector<s8> look_table(bc.lookTableSize, 0);
-    vector<u8> reach_table(bc.reachTableSize, 0);
+void writeLookaroundTables(const lookaround_info &lookarounds,
+                           RoseEngineBlob &engine_blob, RoseEngine &proto) {
+    vector<s8> look_table(lookarounds.lookTableSize, 0);
+    vector<u8> reach_table(lookarounds.reachTableSize, 0);
     s8 *look = look_table.data();
     u8 *reach = reach_table.data();
-    for (const auto &l : bc.lookaround) {
-        if (l.size() == 1) {
-            writeLookaround(l.front(), look, reach);
+    for (const auto &la : lookarounds.table) {
+        if (la.size() == 1) {
+            writeLookaround(la.front(), look, reach);
         } else {
-            writeMultipathLookaround(l, look, reach);
+            writeMultipathLookaround(la, look, reach);
         }
     }
 
-    proto.lookaroundTableOffset = bc.engine_blob.add_range(look_table);
-    proto.lookaroundReachOffset = bc.engine_blob.add_range(reach_table);
+    proto.lookaroundTableOffset = engine_blob.add_range(look_table);
+    proto.lookaroundReachOffset = engine_blob.add_range(reach_table);
 }
 
 static
@@ -2750,9 +2527,6 @@ void writeNfaInfo(const RoseBuildImpl &build, build_context &bc,
     memset(infos.data(), 0, sizeof(NfaInfo) * queue_count);
 
     for (u32 qi = 0; qi < queue_count; qi++) {
-        const NFA *n = get_nfa_from_blob(bc, qi);
-        enforceEngineSizeLimit(n, n->length, build.cc.grey);
-
         NfaInfo &info = infos[qi];
         info.nfaOffset = bc.engineOffsets.at(qi);
         assert(qi < ekey_lists.size());
@@ -2806,1567 +2580,99 @@ bool hasBoundaryReports(const BoundaryReports &boundary) {
     return false;
 }
 
-/**
- * \brief True if the given vertex is a role that can only be switched on at
- * EOD.
- */
 static
-bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) {
-    const RoseGraph &g = tbi.g;
-
-    // All such roles have only (0,0) edges to vertices with the eod_accept
-    // property, and no other effects (suffixes, ordinary reports, etc, etc).
-
-    if (isLeafNode(v, g) || !g[v].reports.empty() || g[v].suffix) {
-        return false;
-    }
-
-    for (const auto &e : out_edges_range(v, g)) {
-        RoseVertex w = target(e, g);
-        if (!g[w].eod_accept) {
-            return false;
-        }
-        assert(!g[w].reports.empty());
-        assert(g[w].literals.empty());
+void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc,
+                          const BoundaryReports &boundary,
+                          const DerivedBoundaryReports &dboundary,
+                          RoseBoundaryReports &out) {
+    DEBUG_PRINTF("report ^:  %zu\n", boundary.report_at_0.size());
+    DEBUG_PRINTF("report $:  %zu\n", boundary.report_at_eod.size());
+    DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size());
 
-        if (g[e].minBound || g[e].maxBound) {
-            return false;
-        }
-    }
+    auto eod_prog = makeBoundaryProgram(build, boundary.report_at_eod);
+    out.reportEodOffset = writeProgram(bc, move(eod_prog));
 
-    /* There is no pointing enforcing this check at runtime if
-     * this role is only fired by the eod event literal */
-    if (tbi.eod_event_literal_id != MO_INVALID_IDX &&
-        g[v].literals.size() == 1 &&
-        *g[v].literals.begin() == tbi.eod_event_literal_id) {
-        return false;
-    }
+    auto zero_prog = makeBoundaryProgram(build, boundary.report_at_0);
+    out.reportZeroOffset = writeProgram(bc, move(zero_prog));
 
-    return true;
+    auto zeod_prog = makeBoundaryProgram(build, dboundary.report_at_0_eod_full);
+    out.reportZeroEodOffset = writeProgram(bc, move(zeod_prog));
 }
 
 static
-void addLookaround(build_context &bc,
-                   const vector<vector<LookEntry>> &look,
-                   u32 &look_index, u32 &reach_index) {
-    // Check the cache.
-    auto it = bc.lookaround_cache.find(look);
-    if (it != bc.lookaround_cache.end()) {
-        look_index = verify_u32(it->second.first);
-        reach_index = verify_u32(it->second.second);
-        DEBUG_PRINTF("reusing look at idx %u\n", look_index);
-        DEBUG_PRINTF("reusing reach at idx %u\n", reach_index);
-        return;
-    }
-
-    size_t look_idx = bc.lookTableSize;
-    size_t reach_idx = bc.reachTableSize;
+unordered_map<RoseVertex, u32> assignStateIndices(const RoseBuildImpl &build) {
+    const auto &g = build.g;
 
-    if (look.size() == 1) {
-        bc.lookTableSize += look.front().size();
-        bc.reachTableSize += look.front().size() * REACH_BITVECTOR_LEN;
-    } else {
-        bc.lookTableSize += look.size();
-        bc.reachTableSize += look.size() * MULTI_REACH_BITVECTOR_LEN;
-    }
+    u32 state = 0;
+    unordered_map<RoseVertex, u32> roleStateIndices;
+    for (auto v : vertices_range(g)) {
+        // Virtual vertices (starts, EOD accept vertices) never need state
+        // indices.
+        if (build.isVirtualVertex(v)) {
+            continue;
+        }
 
-    bc.lookaround_cache.emplace(look, make_pair(look_idx, reach_idx));
-    bc.lookaround.emplace_back(look);
+        // We only need a state index if we have successors that are not
+        // eagerly-reported EOD vertices.
+        bool needs_state_index = false;
+        for (const auto &e : out_edges_range(v, g)) {
+            if (!canEagerlyReportAtEod(build, e)) {
+                needs_state_index = true;
+                break;
+            }
+        }
 
-    DEBUG_PRINTF("adding look at idx %zu\n", look_idx);
-    DEBUG_PRINTF("adding reach at idx %zu\n", reach_idx);
-    look_index =  verify_u32(look_idx);
-    reach_index = verify_u32(reach_idx);
-}
+        if (!needs_state_index) {
+            continue;
+        }
 
-static
-bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) {
-    size_t reach_size = cr.count();
-    assert(reach_size > 0);
-    // check whether entry_size is some power of 2.
-    if ((reach_size - 1) & reach_size) {
-        return false;
-    }
-    make_and_cmp_mask(cr, &andmask, &cmpmask);
-    if ((1 << popcount32((u8)(~andmask))) ^ reach_size) {
-        return false;
+        /* TODO: also don't need a state index if all edges are nfa based */
+        roleStateIndices.emplace(v, state++);
     }
-    return true;
-}
 
-static
-bool checkReachWithFlip(const CharReach &cr, u8 &andmask,
-                       u8 &cmpmask, u8 &flip) {
-    if (checkReachMask(cr, andmask, cmpmask)) {
-        flip = 0;
-        return true;
-    }
-    if (checkReachMask(~cr, andmask, cmpmask)) {
-        flip = 1;
-        return true;
-    }
-    return false;
-}
+    DEBUG_PRINTF("assigned %u states (from %zu vertices)\n", state,
+                 num_vertices(g));
 
-static
-bool makeRoleByte(const vector<LookEntry> &look, RoseProgram &program) {
-    if (look.size() == 1) {
-        const auto &entry = look[0];
-        u8 andmask_u8, cmpmask_u8;
-        u8 flip;
-        if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) {
-            return false;
-        }
-        s32 checkbyte_offset = verify_s32(entry.offset);
-        DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset);
-        const auto *end_inst = program.end_instruction();
-        auto ri = make_unique<RoseInstrCheckByte>(andmask_u8, cmpmask_u8, flip,
-                                                  checkbyte_offset, end_inst);
-        program.add_before_end(move(ri));
-        return true;
-    }
-    return false;
+    return roleStateIndices;
 }
 
 static
-bool makeRoleMask(const vector<LookEntry> &look, RoseProgram &program) {
-    if (look.back().offset < look.front().offset + 8) {
-        s32 base_offset = verify_s32(look.front().offset);
-        u64a and_mask = 0;
-        u64a cmp_mask = 0;
-        u64a neg_mask = 0;
-        for (const auto &entry : look) {
-            u8 andmask_u8, cmpmask_u8, flip;
-            if (!checkReachWithFlip(entry.reach, andmask_u8,
-                                    cmpmask_u8, flip)) {
-                return false;
-            }
-            DEBUG_PRINTF("entry offset %d\n", entry.offset);
-            u32 shift = (entry.offset - base_offset) << 3;
-            and_mask |= (u64a)andmask_u8 << shift;
-            cmp_mask |= (u64a)cmpmask_u8 << shift;
-            if (flip) {
-                neg_mask |= 0xffLLU << shift;
-            }
+bool hasUsefulStops(const left_build_info &build) {
+    for (u32 i = 0; i < N_CHARS; i++) {
+        if (build.stopAlphabet[i]) {
+            return true;
         }
-        DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n",
-                     and_mask, cmp_mask);
-        const auto *end_inst = program.end_instruction();
-        auto ri = make_unique<RoseInstrCheckMask>(and_mask, cmp_mask, neg_mask,
-                                                  base_offset, end_inst);
-        program.add_before_end(move(ri));
-        return true;
     }
     return false;
 }
 
-static UNUSED
-string convertMaskstoString(u8 *p, int byte_len) {
-    string s;
-    for (int i = 0; i < byte_len; i++) {
-        u8 hi = *p >> 4;
-        u8 lo = *p & 0xf;
-        s += (char)(hi + (hi < 10 ? 48 : 87));
-        s += (char)(lo + (lo < 10 ? 48 : 87));
-        p++;
-    }
-    return s;
-}
-
 static
-bool makeRoleMask32(const vector<LookEntry> &look,
-                    RoseProgram &program) {
-    if (look.back().offset >= look.front().offset + 32) {
-        return false;
-    }
-    s32 base_offset = verify_s32(look.front().offset);
-    array<u8, 32> and_mask, cmp_mask;
-    and_mask.fill(0);
-    cmp_mask.fill(0);
-    u32 neg_mask = 0;
-    for (const auto &entry : look) {
-        u8 andmask_u8, cmpmask_u8, flip;
-        if (!checkReachWithFlip(entry.reach, andmask_u8,
-                                cmpmask_u8, flip)) {
-            return false;
-        }
-        u32 shift = entry.offset - base_offset;
-        assert(shift < 32);
-        and_mask[shift] = andmask_u8;
-        cmp_mask[shift] = cmpmask_u8;
-        if (flip) {
-            neg_mask |= 1 << shift;
-        }
-    }
+void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
+                        const set<u32> &eager_queues, u32 leftfixBeginQueue,
+                        u32 leftfixCount, vector<LeftNfaInfo> &leftTable,
+                        u32 *laggedRoseCount, size_t *history) {
+    const RoseGraph &g = tbi.g;
+    const CompileContext &cc = tbi.cc;
 
-    DEBUG_PRINTF("and_mask %s\n",
-                 convertMaskstoString(and_mask.data(), 32).c_str());
-    DEBUG_PRINTF("cmp_mask %s\n",
-                 convertMaskstoString(cmp_mask.data(), 32).c_str());
-    DEBUG_PRINTF("neg_mask %08x\n", neg_mask);
-    DEBUG_PRINTF("base_offset %d\n", base_offset);
+    ue2::unordered_set<u32> done_core;
 
-    const auto *end_inst = program.end_instruction();
-    auto ri = make_unique<RoseInstrCheckMask32>(and_mask, cmp_mask, neg_mask,
-                                                base_offset, end_inst);
-    program.add_before_end(move(ri));
-    return true;
-}
+    leftTable.resize(leftfixCount);
 
-// Sorting by the size of every bucket.
-// Used in map<u32, vector<s8>, cmpNibble>.
-struct cmpNibble {
-    bool operator()(const u32 data1, const u32 data2) const{
-        u32 size1 = popcount32(data1 >> 16) * popcount32(data1 << 16);
-        u32 size2 = popcount32(data2 >> 16) * popcount32(data2 << 16);
-        return std::tie(size1, data1) < std::tie(size2, data2);
-    }
-};
+    u32 lagIndex = 0;
 
-// Insert all pairs of bucket and offset into buckets.
-static really_inline
-void getAllBuckets(const vector<LookEntry> &look,
-                   map<u32, vector<s8>, cmpNibble> &buckets, u64a &neg_mask) {
-    s32 base_offset = verify_s32(look.front().offset);
-    for (const auto &entry : look) {
-        CharReach cr = entry.reach;
-        // Flip heavy character classes to save buckets.
-        if (cr.count() > 128 ) {
-            cr.flip();
-        } else {
-            neg_mask ^= 1ULL << (entry.offset - base_offset);
-        }
-        map <u16, u16> lo2hi;
-        // We treat Ascii Table as a 16x16 grid.
-        // Push every row in cr into lo2hi and mark the row number.
-        for (size_t i = cr.find_first(); i != CharReach::npos;) {
-            u8 it_hi = i >> 4;
-            u16 low_encode = 0;
-            while (i != CharReach::npos && (i >> 4) == it_hi) {
-                low_encode |= 1 << (i & 0xf);
-                i = cr.find_next(i);
-            }
-            lo2hi[low_encode] |= 1 << it_hi;
-        }
-        for (const auto &it : lo2hi) {
-            u32 hi_lo = (it.second << 16) | it.first;
-            buckets[hi_lo].push_back(entry.offset);
+    for (RoseVertex v : vertices_range(g)) {
+        if (!g[v].left) {
+            continue;
         }
-    }
-}
-
-// Once we have a new bucket, we'll try to combine it with all old buckets.
-static really_inline
-void nibUpdate(map<u32, u16> &nib, u32 hi_lo) {
-    u16 hi = hi_lo >> 16;
-    u16 lo = hi_lo & 0xffff;
-    for (const auto pairs : nib) {
-        u32 old = pairs.first;
-        if ((old >> 16) == hi || (old & 0xffff) == lo) {
-            if (!nib[old | hi_lo]) {
-                nib[old | hi_lo] = nib[old] | nib[hi_lo];
-            }
+        assert(contains(bc.leftfix_info, v));
+        const left_build_info &lbi = bc.leftfix_info.at(v);
+        if (lbi.has_lookaround) {
+            continue;
         }
-    }
-}
 
-static really_inline
-void nibMaskUpdate(array<u8, 32> &mask, u32 data, u8 bit_index) {
-    for (u8 index = 0; data > 0; data >>= 1, index++) {
-        if (data & 1) {
-            // 0 ~ 7 bucket in first 16 bytes,
-            // 8 ~ 15 bucket in second 16 bytes.
-            if (bit_index >= 8) {
-                mask[index + 16] |= 1 << (bit_index - 8);
-            } else {
-                mask[index] |= 1 << bit_index;
-            }
-        }
-    }
-}
-
-static
-bool getShuftiMasks(const vector<LookEntry> &look, array<u8, 32> &hi_mask,
-                    array<u8, 32> &lo_mask, u8 *bucket_select_hi,
-                    u8 *bucket_select_lo, u64a &neg_mask,
-                    u8 &bit_idx, size_t len) {
-    map<u32, u16> nib; // map every bucket to its bucket number.
-    map<u32, vector<s8>, cmpNibble> bucket2offsets;
-    s32 base_offset = look.front().offset;
-
-    bit_idx = 0;
-    neg_mask = ~0ULL;
-
-    getAllBuckets(look, bucket2offsets, neg_mask);
-
-    for (const auto &it : bucket2offsets) {
-        u32 hi_lo = it.first;
-        // New bucket.
-        if (!nib[hi_lo]) {
-            if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) {
-                return false;
-            }
-            nib[hi_lo] = 1 << bit_idx;
-
-            nibUpdate(nib, hi_lo);
-            nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx);
-            nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx);
-            bit_idx++;
-        }
-
-        DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]);
-
-        // Update bucket_select_mask.
-        u8 nib_hi = nib[hi_lo] >> 8;
-        u8 nib_lo = nib[hi_lo] & 0xff;
-        for (const auto offset : it.second) {
-            bucket_select_hi[offset - base_offset] |= nib_hi;
-            bucket_select_lo[offset - base_offset] |= nib_lo;
-        }
-    }
-    return true;
-}
-
-static
-unique_ptr<RoseInstruction>
-makeCheckShufti16x8(u32 offset_range, u8 bucket_idx,
-                    const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
-                    const array<u8, 32> &bucket_select_mask,
-                    u32 neg_mask, s32 base_offset,
-                    const RoseInstruction *end_inst) {
-    if (offset_range > 16 || bucket_idx > 8) {
-        return nullptr;
-    }
-    array<u8, 32> nib_mask;
-    array<u8, 16> bucket_select_mask_16;
-    copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin());
-    copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16);
-    copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16,
-         bucket_select_mask_16.begin());
-    return make_unique<RoseInstrCheckShufti16x8>
-           (nib_mask, bucket_select_mask_16,
-            neg_mask & 0xffff, base_offset, end_inst);
-}
-
-static
-unique_ptr<RoseInstruction>
-makeCheckShufti32x8(u32 offset_range, u8 bucket_idx,
-                    const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
-                    const array<u8, 32> &bucket_select_mask,
-                    u32 neg_mask, s32 base_offset,
-                    const RoseInstruction *end_inst) {
-    if (offset_range > 32 || bucket_idx > 8) {
-        return nullptr;
-    }
-
-    array<u8, 16> hi_mask_16;
-    array<u8, 16> lo_mask_16;
-    copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin());
-    copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin());
-    return make_unique<RoseInstrCheckShufti32x8>
-           (hi_mask_16, lo_mask_16, bucket_select_mask,
-            neg_mask, base_offset, end_inst);
-}
-
-static
-unique_ptr<RoseInstruction>
-makeCheckShufti16x16(u32 offset_range, u8 bucket_idx,
-                     const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
-                     const array<u8, 32> &bucket_select_mask_lo,
-                     const array<u8, 32> &bucket_select_mask_hi,
-                     u32 neg_mask, s32 base_offset,
-                     const RoseInstruction *end_inst) {
-    if (offset_range > 16 || bucket_idx > 16) {
-        return nullptr;
-    }
-
-    array<u8, 32> bucket_select_mask_32;
-    copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16,
-         bucket_select_mask_32.begin());
-    copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16,
-         bucket_select_mask_32.begin() + 16);
-    return make_unique<RoseInstrCheckShufti16x16>
-           (hi_mask, lo_mask, bucket_select_mask_32,
-            neg_mask & 0xffff, base_offset, end_inst);
-}
-static
-unique_ptr<RoseInstruction>
-makeCheckShufti32x16(u32 offset_range, u8 bucket_idx,
-                     const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
-                     const array<u8, 32> &bucket_select_mask_lo,
-                     const array<u8, 32> &bucket_select_mask_hi,
-                     u32 neg_mask, s32 base_offset,
-                     const RoseInstruction *end_inst) {
-    if (offset_range > 32 || bucket_idx > 16) {
-        return nullptr;
-    }
-
-    return make_unique<RoseInstrCheckShufti32x16>
-           (hi_mask, lo_mask, bucket_select_mask_hi,
-            bucket_select_mask_lo, neg_mask, base_offset, end_inst);
-}
-
-static
-bool makeRoleShufti(const vector<LookEntry> &look,
-                    RoseProgram &program) {
-
-    s32 base_offset = verify_s32(look.front().offset);
-    if (look.back().offset >= base_offset + 32) {
-        return false;
-    }
-
-    u8 bucket_idx = 0; // number of buckets
-    u64a neg_mask_64;
-    array<u8, 32> hi_mask;
-    array<u8, 32> lo_mask;
-    array<u8, 32> bucket_select_hi;
-    array<u8, 32> bucket_select_lo;
-    hi_mask.fill(0);
-    lo_mask.fill(0);
-    bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8.
-    bucket_select_lo.fill(0);
-
-    if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(),
-                        bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) {
-        return false;
-    }
-    u32 neg_mask = (u32)neg_mask_64;
-
-    DEBUG_PRINTF("hi_mask %s\n",
-                 convertMaskstoString(hi_mask.data(), 32).c_str());
-    DEBUG_PRINTF("lo_mask %s\n",
-                 convertMaskstoString(lo_mask.data(), 32).c_str());
-    DEBUG_PRINTF("bucket_select_hi %s\n",
-                 convertMaskstoString(bucket_select_hi.data(), 32).c_str());
-    DEBUG_PRINTF("bucket_select_lo %s\n",
-                 convertMaskstoString(bucket_select_lo.data(), 32).c_str());
-
-    const auto *end_inst = program.end_instruction();
-    s32 offset_range = look.back().offset - base_offset + 1;
-
-    auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask,
-                                  bucket_select_lo, neg_mask, base_offset,
-                                  end_inst);
-    if (!ri) {
-        ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask,
-                                 bucket_select_lo, neg_mask, base_offset,
-                                 end_inst);
-    }
-    if (!ri) {
-        ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask,
-                                  bucket_select_lo, bucket_select_hi,
-                                  neg_mask, base_offset, end_inst);
-    }
-    if (!ri) {
-        ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask,
-                                  bucket_select_lo, bucket_select_hi,
-                                  neg_mask, base_offset, end_inst);
-    }
-    assert(ri);
-    program.add_before_end(move(ri));
-
-    return true;
-}
-
-/**
- * Builds a lookaround instruction, or an appropriate specialization if one is
- * available.
- */
-static
-void makeLookaroundInstruction(build_context &bc, const vector<LookEntry> &look,
-                               RoseProgram &program) {
-    assert(!look.empty());
-
-    if (makeRoleByte(look, program)) {
-        return;
-    }
-
-    if (look.size() == 1) {
-        s8 offset = look.begin()->offset;
-        u32 look_idx, reach_idx;
-        vector<vector<LookEntry>> lookaround;
-        lookaround.emplace_back(look);
-        addLookaround(bc, lookaround, look_idx, reach_idx);
-        // We don't need look_idx here.
-        auto ri = make_unique<RoseInstrCheckSingleLookaround>(offset, reach_idx,
-                                                     program.end_instruction());
-        program.add_before_end(move(ri));
-        return;
-    }
-
-    if (makeRoleMask(look, program)) {
-        return;
-    }
-
-    if (makeRoleMask32(look, program)) {
-        return;
-    }
-
-    if (makeRoleShufti(look, program)) {
-        return;
-    }
-
-    u32 look_idx, reach_idx;
-    vector<vector<LookEntry>> lookaround;
-    lookaround.emplace_back(look);
-    addLookaround(bc, lookaround, look_idx, reach_idx);
-    u32 look_count = verify_u32(look.size());
-
-    auto ri = make_unique<RoseInstrCheckLookaround>(look_idx, reach_idx,
-                                                    look_count,
-                                                    program.end_instruction());
-    program.add_before_end(move(ri));
-}
-
-#if defined(DEBUG) || defined(DUMP_SUPPORT)
-static UNUSED
-string dumpMultiLook(const vector<LookEntry> &looks) {
-    ostringstream oss;
-    for (auto it = looks.begin(); it != looks.end(); ++it) {
-        if (it != looks.begin()) {
-            oss << ", ";
-        }
-        oss << "{" << int(it->offset) << ": " << describeClass(it->reach) << "}";
-    }
-    return oss.str();
-}
-#endif
-
-static
-bool makeRoleMultipathShufti(const vector<vector<LookEntry>> &multi_look,
-                             RoseProgram &program) {
-    if (multi_look.empty()) {
-        return false;
-    }
-
-    // find the base offset
-    assert(!multi_look[0].empty());
-    s32 base_offset = multi_look[0].front().offset;
-    s32 last_start = base_offset;
-    s32 end_offset = multi_look[0].back().offset;
-    size_t multi_len = 0;
-
-    for (const auto &look : multi_look) {
-        assert(look.size() > 0);
-        multi_len += look.size();
-
-        LIMIT_TO_AT_MOST(&base_offset, look.front().offset);
-        ENSURE_AT_LEAST(&last_start, look.front().offset);
-        ENSURE_AT_LEAST(&end_offset, look.back().offset);
-    }
-
-    assert(last_start < 0);
-
-    if (end_offset - base_offset >= MULTIPATH_MAX_LEN) {
-        return false;
-    }
-
-    if (multi_len <= 16) {
-        multi_len = 16;
-    } else if (multi_len <= 32) {
-        multi_len = 32;
-    } else if (multi_len <= 64) {
-        multi_len = 64;
-    } else {
-        DEBUG_PRINTF("too long for multi-path\n");
-        return false;
-    }
-
-    vector<LookEntry> linear_look;
-    array<u8, 64> data_select_mask;
-    data_select_mask.fill(0);
-    u64a hi_bits_mask = 0;
-    u64a lo_bits_mask = 0;
-
-    for (const auto &look : multi_look) {
-        assert(linear_look.size() < 64);
-        lo_bits_mask |= 1LLU << linear_look.size();
-        for (const auto &entry : look) {
-            assert(entry.offset - base_offset < MULTIPATH_MAX_LEN);
-            data_select_mask[linear_look.size()] =
-                                          verify_u8(entry.offset - base_offset);
-            linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach);
-        }
-        hi_bits_mask |= 1LLU << (linear_look.size() - 1);
-    }
-
-    u8 bit_index = 0; // number of buckets
-    u64a neg_mask;
-    array<u8, 32> hi_mask;
-    array<u8, 32> lo_mask;
-    array<u8, 64> bucket_select_hi;
-    array<u8, 64> bucket_select_lo;
-    hi_mask.fill(0);
-    lo_mask.fill(0);
-    bucket_select_hi.fill(0);
-    bucket_select_lo.fill(0);
-
-    if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(),
-                        bucket_select_lo.data(), neg_mask, bit_index,
-                        multi_len)) {
-        return false;
-    }
-
-    DEBUG_PRINTF("hi_mask %s\n",
-                 convertMaskstoString(hi_mask.data(), 16).c_str());
-    DEBUG_PRINTF("lo_mask %s\n",
-                 convertMaskstoString(lo_mask.data(), 16).c_str());
-    DEBUG_PRINTF("bucket_select_hi %s\n",
-                 convertMaskstoString(bucket_select_hi.data(), 64).c_str());
-    DEBUG_PRINTF("bucket_select_lo %s\n",
-                 convertMaskstoString(bucket_select_lo.data(), 64).c_str());
-    DEBUG_PRINTF("data_select_mask %s\n",
-                 convertMaskstoString(data_select_mask.data(), 64).c_str());
-    DEBUG_PRINTF("hi_bits_mask %llx\n", hi_bits_mask);
-    DEBUG_PRINTF("lo_bits_mask %llx\n", lo_bits_mask);
-    DEBUG_PRINTF("neg_mask %llx\n", neg_mask);
-    DEBUG_PRINTF("base_offset %d\n", base_offset);
-    DEBUG_PRINTF("last_start %d\n", last_start);
-
-    // Since we don't have 16x16 now, just call 32x16 instead.
-    if (bit_index > 8) {
-        assert(multi_len <= 32);
-        multi_len = 32;
-    }
-
-    const auto *end_inst = program.end_instruction();
-    assert(multi_len == 16 || multi_len == 32 || multi_len == 64);
-    if (multi_len == 16) {
-        neg_mask &= 0xffff;
-        assert(!(hi_bits_mask & ~0xffffULL));
-        assert(!(lo_bits_mask & ~0xffffULL));
-        assert(bit_index <=8);
-        array<u8, 32> nib_mask;
-        copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin());
-        copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16);
-
-        auto ri = make_unique<RoseInstrCheckMultipathShufti16x8>
-                  (nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask,
-                   lo_bits_mask, neg_mask, base_offset, last_start, end_inst);
-        program.add_before_end(move(ri));
-    } else if (multi_len == 32) {
-        neg_mask &= 0xffffffff;
-        assert(!(hi_bits_mask & ~0xffffffffULL));
-        assert(!(lo_bits_mask & ~0xffffffffULL));
-        if (bit_index <= 8) {
-            auto ri = make_unique<RoseInstrCheckMultipathShufti32x8>
-                      (hi_mask, lo_mask, bucket_select_lo, data_select_mask,
-                       hi_bits_mask, lo_bits_mask, neg_mask, base_offset,
-                       last_start, end_inst);
-            program.add_before_end(move(ri));
-        } else {
-            auto ri = make_unique<RoseInstrCheckMultipathShufti32x16>
-                      (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo,
-                       data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask,
-                       base_offset, last_start, end_inst);
-            program.add_before_end(move(ri));
-        }
-    } else {
-        auto ri = make_unique<RoseInstrCheckMultipathShufti64>
-                  (hi_mask, lo_mask, bucket_select_lo, data_select_mask,
-                   hi_bits_mask, lo_bits_mask, neg_mask, base_offset,
-                   last_start, end_inst);
-        program.add_before_end(move(ri));
-    }
-    return true;
-}
-
-static
-void makeRoleMultipathLookaround(build_context &bc,
-                                 const vector<vector<LookEntry>> &multi_look,
-                                 RoseProgram &program) {
-    assert(!multi_look.empty());
-    assert(multi_look.size() <= MAX_LOOKAROUND_PATHS);
-    vector<vector<LookEntry>> ordered_look;
-    set<s32> look_offset;
-
-    assert(!multi_look[0].empty());
-    s32 last_start = multi_look[0][0].offset;
-
-    // build offset table.
-    for (const auto &look : multi_look) {
-        assert(look.size() > 0);
-        last_start = max(last_start, (s32)look.begin()->offset);
-
-        for (const auto &t : look) {
-            look_offset.insert(t.offset);
-        }
-    }
-
-    array<u8, MULTIPATH_MAX_LEN> start_mask;
-    if (multi_look.size() < MAX_LOOKAROUND_PATHS) {
-        start_mask.fill((1 << multi_look.size()) - 1);
-    } else {
-        start_mask.fill(0xff);
-    }
-
-    u32 path_idx = 0;
-    for (const auto &look : multi_look) {
-        for (const auto &t : look) {
-            assert(t.offset >= (int)*look_offset.begin());
-            size_t update_offset = t.offset - *look_offset.begin() + 1;
-            if (update_offset < start_mask.size()) {
-                start_mask[update_offset] &= ~(1 << path_idx);
-            }
-        }
-        path_idx++;
-    }
-
-    for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) {
-        start_mask[i] &= start_mask[i - 1];
-        DEBUG_PRINTF("start_mask[%u] = %x\n", i, start_mask[i]);
-    }
-
-    assert(look_offset.size() <= MULTIPATH_MAX_LEN);
-
-    assert(last_start < 0);
-
-    for (const auto &offset : look_offset) {
-        vector<LookEntry> multi_entry;
-        multi_entry.resize(MAX_LOOKAROUND_PATHS);
-
-        for (size_t i = 0; i < multi_look.size(); i++) {
-            for (const auto &t : multi_look[i]) {
-                if (t.offset == offset) {
-                    multi_entry[i] = t;
-                }
-            }
-        }
-        ordered_look.emplace_back(multi_entry);
-    }
-
-    u32 look_idx, reach_idx;
-    addLookaround(bc, ordered_look, look_idx, reach_idx);
-    u32 look_count = verify_u32(ordered_look.size());
-
-    auto ri = make_unique<RoseInstrMultipathLookaround>(look_idx, reach_idx,
-                                                        look_count, last_start,
-                                                        start_mask,
-                                                    program.end_instruction());
-    program.add_before_end(move(ri));
-}
-
-static
-void makeRoleLookaround(const RoseBuildImpl &build, build_context &bc,
-                        RoseVertex v, RoseProgram &program) {
-    if (!build.cc.grey.roseLookaroundMasks) {
-        return;
-    }
-
-    vector<vector<LookEntry>> looks;
-
-    // Lookaround from leftfix (mandatory).
-    if (contains(bc.leftfix_info, v) && bc.leftfix_info.at(v).has_lookaround) {
-        DEBUG_PRINTF("using leftfix lookaround\n");
-        looks = bc.leftfix_info.at(v).lookaround;
-    }
-
-    // We may be able to find more lookaround info (advisory) and merge it
-    // in.
-    if (looks.size() <= 1) {
-        vector<LookEntry> look;
-        vector<LookEntry> look_more;
-        if (!looks.empty()) {
-            look = move(looks.front());
-        }
-        findLookaroundMasks(build, v, look_more);
-        mergeLookaround(look, look_more);
-        if (!look.empty()) {
-            makeLookaroundInstruction(bc, look, program);
-        }
-        return;
-    }
-
-    if (!makeRoleMultipathShufti(looks, program)) {
-        assert(looks.size() <= 8);
-        makeRoleMultipathLookaround(bc, looks, program);
-    }
-}
-
-static
-void makeRoleCheckLeftfix(const RoseBuildImpl &build,
-                          const map<RoseVertex, left_build_info> &leftfix_info,
-                          RoseVertex v, RoseProgram &program) {
-    auto it = leftfix_info.find(v);
-    if (it == end(leftfix_info)) {
-        return;
-    }
-    const left_build_info &lni = it->second;
-    if (lni.has_lookaround) {
-        return; // Leftfix completely implemented by lookaround.
-    }
-
-    assert(!build.cc.streaming ||
-           build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG);
-
-    bool is_prefix = build.isRootSuccessor(v);
-    const auto *end_inst = program.end_instruction();
-
-    unique_ptr<RoseInstruction> ri;
-    if (is_prefix) {
-        ri = make_unique<RoseInstrCheckPrefix>(lni.queue, build.g[v].left.lag,
-                                               build.g[v].left.leftfix_report,
-                                               end_inst);
-    } else {
-        ri = make_unique<RoseInstrCheckInfix>(lni.queue, build.g[v].left.lag,
-                                              build.g[v].left.leftfix_report,
-                                              end_inst);
-    }
-    program.add_before_end(move(ri));
-}
-
-static
-void makeAnchoredLiteralDelay(const RoseBuildImpl &build,
-                              const ProgramBuild &prog_build, u32 lit_id,
-                              RoseProgram &program) {
-    // Only relevant for literals in the anchored table.
-    const rose_literal_id &lit = build.literals.right.at(lit_id);
-    if (lit.table != ROSE_ANCHORED) {
-        return;
-    }
-
-    // If this literal match cannot occur after floatingMinLiteralMatchOffset,
-    // we do not need this check.
-    bool all_too_early = true;
-    rose_group groups = 0;
-
-    const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
-    for (RoseVertex v : lit_vertices) {
-         if (build.g[v].max_offset > prog_build.floatingMinLiteralMatchOffset) {
-             all_too_early = false;
-         }
-         groups |= build.g[v].groups;
-    }
-
-    if (all_too_early) {
-        return;
-    }
-
-    assert(contains(prog_build.anchored_programs, lit_id));
-    u32 anch_id = prog_build.anchored_programs.at(lit_id);
-
-    const auto *end_inst = program.end_instruction();
-    auto ri = make_unique<RoseInstrAnchoredDelay>(groups, anch_id, end_inst);
-    program.add_before_end(move(ri));
-}
-
-static
-void makeDedupe(const RoseBuildImpl &build, const Report &report,
-                RoseProgram &program) {
-    const auto *end_inst = program.end_instruction();
-    auto ri =
-        make_unique<RoseInstrDedupe>(report.quashSom, build.rm.getDkey(report),
-                                     report.offsetAdjust, end_inst);
-    program.add_before_end(move(ri));
-}
-
-static
-void makeDedupeSom(const RoseBuildImpl &build, const Report &report,
-                   RoseProgram &program) {
-    const auto *end_inst = program.end_instruction();
-    auto ri = make_unique<RoseInstrDedupeSom>(report.quashSom,
-                                              build.rm.getDkey(report),
-                                              report.offsetAdjust, end_inst);
-    program.add_before_end(move(ri));
-}
-
-static
-void makeCatchup(const RoseBuildImpl &build, bool needs_catchup,
-                 const flat_set<ReportID> &reports, RoseProgram &program) {
-    if (!needs_catchup) {
-        return;
-    }
-
-    // Everything except the INTERNAL_ROSE_CHAIN report needs catchup to run
-    // before reports are triggered.
-
-    auto report_needs_catchup = [&](const ReportID &id) {
-        const Report &report = build.rm.getReport(id);
-        return report.type != INTERNAL_ROSE_CHAIN;
-    };
-
-    if (!any_of(begin(reports), end(reports), report_needs_catchup)) {
-        DEBUG_PRINTF("none of the given reports needs catchup\n");
-        return;
-    }
-
-    program.add_before_end(make_unique<RoseInstrCatchUp>());
-}
-
-static
-void makeCatchupMpv(const RoseBuildImpl &build, bool needs_mpv_catchup,
-                    ReportID id, RoseProgram &program) {
-    if (!needs_mpv_catchup) {
-        return;
-    }
-
-    const Report &report = build.rm.getReport(id);
-    if (report.type == INTERNAL_ROSE_CHAIN) {
-        return;
-    }
-
-    program.add_before_end(make_unique<RoseInstrCatchUpMpv>());
-}
-
-static
-void writeSomOperation(const Report &report, som_operation *op) {
-    assert(op);
-
-    memset(op, 0, sizeof(*op));
-
-    switch (report.type) {
-    case EXTERNAL_CALLBACK_SOM_REL:
-        op->type = SOM_EXTERNAL_CALLBACK_REL;
-        break;
-    case INTERNAL_SOM_LOC_SET:
-        op->type = SOM_INTERNAL_LOC_SET;
-        break;
-    case INTERNAL_SOM_LOC_SET_IF_UNSET:
-        op->type = SOM_INTERNAL_LOC_SET_IF_UNSET;
-        break;
-    case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
-        op->type = SOM_INTERNAL_LOC_SET_IF_WRITABLE;
-        break;
-    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
-        op->type = SOM_INTERNAL_LOC_SET_REV_NFA;
-        break;
-    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
-        op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET;
-        break;
-    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
-        op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE;
-        break;
-    case INTERNAL_SOM_LOC_COPY:
-        op->type = SOM_INTERNAL_LOC_COPY;
-        break;
-    case INTERNAL_SOM_LOC_COPY_IF_WRITABLE:
-        op->type = SOM_INTERNAL_LOC_COPY_IF_WRITABLE;
-        break;
-    case INTERNAL_SOM_LOC_MAKE_WRITABLE:
-        op->type = SOM_INTERNAL_LOC_MAKE_WRITABLE;
-        break;
-    case EXTERNAL_CALLBACK_SOM_STORED:
-        op->type = SOM_EXTERNAL_CALLBACK_STORED;
-        break;
-    case EXTERNAL_CALLBACK_SOM_ABS:
-        op->type = SOM_EXTERNAL_CALLBACK_ABS;
-        break;
-    case EXTERNAL_CALLBACK_SOM_REV_NFA:
-        op->type = SOM_EXTERNAL_CALLBACK_REV_NFA;
-        break;
-    case INTERNAL_SOM_LOC_SET_FROM:
-        op->type = SOM_INTERNAL_LOC_SET_FROM;
-        break;
-    case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE:
-        op->type = SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE;
-        break;
-    default:
-        // This report doesn't correspond to a SOM operation.
-        assert(0);
-        throw CompileError("Unable to generate bytecode.");
-    }
-
-    op->onmatch = report.onmatch;
-
-    switch (report.type) {
-    case EXTERNAL_CALLBACK_SOM_REV_NFA:
-    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
-    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
-    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
-        op->aux.revNfaIndex = report.revNfaIndex;
-        break;
-    default:
-        op->aux.somDistance = report.somDistance;
-        break;
-    }
-}
-
-static
-void makeReport(const RoseBuildImpl &build, const ReportID id,
-                const bool has_som, RoseProgram &program) {
-    assert(id < build.rm.numReports());
-    const Report &report = build.rm.getReport(id);
-
-    RoseProgram report_block;
-    const RoseInstruction *end_inst = report_block.end_instruction();
-
-    // Handle min/max offset checks.
-    if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) {
-        auto ri = make_unique<RoseInstrCheckBounds>(report.minOffset,
-                                                    report.maxOffset, end_inst);
-        report_block.add_before_end(move(ri));
-    }
-
-    // If this report has an exhaustion key, we can check it in the program
-    // rather than waiting until we're in the callback adaptor.
-    if (report.ekey != INVALID_EKEY) {
-        auto ri = make_unique<RoseInstrCheckExhausted>(report.ekey, end_inst);
-        report_block.add_before_end(move(ri));
-    }
-
-    // External SOM reports that aren't passthrough need their SOM value
-    // calculated.
-    if (isExternalSomReport(report) &&
-        report.type != EXTERNAL_CALLBACK_SOM_PASS) {
-        auto ri = make_unique<RoseInstrSomFromReport>();
-        writeSomOperation(report, &ri->som);
-        report_block.add_before_end(move(ri));
-    }
-
-    // Min length constraint.
-    if (report.minLength > 0) {
-        assert(build.hasSom);
-        auto ri = make_unique<RoseInstrCheckMinLength>(
-            report.offsetAdjust, report.minLength, end_inst);
-        report_block.add_before_end(move(ri));
-    }
-
-    if (report.quashSom) {
-        report_block.add_before_end(make_unique<RoseInstrSomZero>());
-    }
-
-    switch (report.type) {
-    case EXTERNAL_CALLBACK:
-        if (!has_som) {
-            // Dedupe is only necessary if this report has a dkey, or if there
-            // are SOM reports to catch up.
-            bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom;
-            if (report.ekey == INVALID_EKEY) {
-                if (needs_dedupe) {
-                    report_block.add_before_end(
-                        make_unique<RoseInstrDedupeAndReport>(
-                            report.quashSom, build.rm.getDkey(report),
-                            report.onmatch, report.offsetAdjust, end_inst));
-                } else {
-                    report_block.add_before_end(make_unique<RoseInstrReport>(
-                        report.onmatch, report.offsetAdjust));
-                }
-            } else {
-                if (needs_dedupe) {
-                    makeDedupe(build, report, report_block);
-                }
-                report_block.add_before_end(make_unique<RoseInstrReportExhaust>(
-                    report.onmatch, report.offsetAdjust, report.ekey));
-            }
-        } else { // has_som
-            makeDedupeSom(build, report, report_block);
-            if (report.ekey == INVALID_EKEY) {
-                report_block.add_before_end(make_unique<RoseInstrReportSom>(
-                    report.onmatch, report.offsetAdjust));
-            } else {
-                report_block.add_before_end(
-                    make_unique<RoseInstrReportSomExhaust>(
-                        report.onmatch, report.offsetAdjust, report.ekey));
-            }
-        }
-        break;
-    case INTERNAL_SOM_LOC_SET:
-    case INTERNAL_SOM_LOC_SET_IF_UNSET:
-    case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
-    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
-    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
-    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
-    case INTERNAL_SOM_LOC_COPY:
-    case INTERNAL_SOM_LOC_COPY_IF_WRITABLE:
-    case INTERNAL_SOM_LOC_MAKE_WRITABLE:
-    case INTERNAL_SOM_LOC_SET_FROM:
-    case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE:
-        if (has_som) {
-            auto ri = make_unique<RoseInstrReportSomAware>();
-            writeSomOperation(report, &ri->som);
-            report_block.add_before_end(move(ri));
-        } else {
-            auto ri = make_unique<RoseInstrReportSomInt>();
-            writeSomOperation(report, &ri->som);
-            report_block.add_before_end(move(ri));
-        }
-        break;
-    case INTERNAL_ROSE_CHAIN: {
-        report_block.add_before_end(make_unique<RoseInstrReportChain>(
-            report.onmatch, report.topSquashDistance));
-        break;
-    }
-    case EXTERNAL_CALLBACK_SOM_REL:
-    case EXTERNAL_CALLBACK_SOM_STORED:
-    case EXTERNAL_CALLBACK_SOM_ABS:
-    case EXTERNAL_CALLBACK_SOM_REV_NFA:
-        makeDedupeSom(build, report, report_block);
-        if (report.ekey == INVALID_EKEY) {
-            report_block.add_before_end(make_unique<RoseInstrReportSom>(
-                report.onmatch, report.offsetAdjust));
-        } else {
-            report_block.add_before_end(make_unique<RoseInstrReportSomExhaust>(
-                report.onmatch, report.offsetAdjust, report.ekey));
-        }
-        break;
-    case EXTERNAL_CALLBACK_SOM_PASS:
-        makeDedupeSom(build, report, report_block);
-        if (report.ekey == INVALID_EKEY) {
-            report_block.add_before_end(make_unique<RoseInstrReportSom>(
-                report.onmatch, report.offsetAdjust));
-        } else {
-            report_block.add_before_end(make_unique<RoseInstrReportSomExhaust>(
-                report.onmatch, report.offsetAdjust, report.ekey));
-        }
-        break;
-
-    default:
-        assert(0);
-        throw CompileError("Unable to generate bytecode.");
-    }
-
-    assert(!report_block.empty());
-    program.add_block(move(report_block));
-}
-
-static
-void makeRoleReports(const RoseBuildImpl &build, const build_context &bc,
-                     RoseVertex v, RoseProgram &program) {
-    const auto &g = build.g;
-
-    /* we are a suffaig - need to update role to provide som to the
-     * suffix. */
-    bool has_som = false;
-    if (g[v].left.tracksSom()) {
-        assert(contains(bc.leftfix_info, v));
-        const left_build_info &lni = bc.leftfix_info.at(v);
-        program.add_before_end(
-            make_unique<RoseInstrSomLeftfix>(lni.queue, g[v].left.lag));
-        has_som = true;
-    } else if (g[v].som_adjust) {
-        program.add_before_end(
-            make_unique<RoseInstrSomAdjust>(g[v].som_adjust));
-        has_som = true;
-    }
-
-    const auto &reports = g[v].reports;
-    makeCatchup(build, bc.needs_catchup, reports, program);
-
-    RoseProgram report_block;
-    for (ReportID id : reports) {
-        makeReport(build, id, has_som, report_block);
-    }
-    program.add_before_end(move(report_block));
-}
-
-static
-void makeRoleSuffix(const RoseBuildImpl &build, const build_context &bc,
-                    RoseVertex v, RoseProgram &program) {
-    const auto &g = build.g;
-    if (!g[v].suffix) {
-        return;
-    }
-    assert(contains(bc.suffixes, g[v].suffix));
-    u32 qi = bc.suffixes.at(g[v].suffix);
-    assert(contains(bc.engineOffsets, qi));
-    const NFA *nfa = get_nfa_from_blob(bc, qi);
-    u32 suffixEvent;
-    if (isContainerType(nfa->type)) {
-        auto tamaProto = g[v].suffix.tamarama.get();
-        assert(tamaProto);
-        u32 top = (u32)MQE_TOP_FIRST +
-                  tamaProto->top_remap.at(make_pair(g[v].index,
-                                                    g[v].suffix.top));
-        assert(top < MQE_INVALID);
-        suffixEvent = top;
-    } else if (isMultiTopType(nfa->type)) {
-        assert(!g[v].suffix.haig);
-        u32 top = (u32)MQE_TOP_FIRST + g[v].suffix.top;
-        assert(top < MQE_INVALID);
-        suffixEvent = top;
-    } else {
-        // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP
-        // event.
-        assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph));
-        suffixEvent = MQE_TOP;
-    }
-    program.add_before_end(
-        make_unique<RoseInstrTriggerSuffix>(qi, suffixEvent));
-}
-
-static
-void makeRoleGroups(const RoseBuildImpl &build, ProgramBuild &prog_build,
-                    RoseVertex v, RoseProgram &program) {
-    const auto &g = build.g;
-    rose_group groups = g[v].groups;
-    if (!groups) {
-        return;
-    }
-
-    // The set of "already on" groups as we process this vertex is the
-    // intersection of the groups set by our predecessors.
-    assert(in_degree(v, g) > 0);
-    rose_group already_on = ~rose_group{0};
-    for (const auto &u : inv_adjacent_vertices_range(v, g)) {
-        already_on &= prog_build.vertex_group_map.at(u);
-    }
-
-    DEBUG_PRINTF("already_on=0x%llx\n", already_on);
-    DEBUG_PRINTF("squashable=0x%llx\n", prog_build.squashable_groups);
-    DEBUG_PRINTF("groups=0x%llx\n", groups);
-
-    already_on &= ~prog_build.squashable_groups;
-    DEBUG_PRINTF("squashed already_on=0x%llx\n", already_on);
-
-    // We don't *have* to mask off the groups that we know are already on, but
-    // this will make bugs more apparent.
-    groups &= ~already_on;
-
-    if (!groups) {
-        DEBUG_PRINTF("no new groups to set, skipping\n");
-        return;
-    }
-
-    program.add_before_end(make_unique<RoseInstrSetGroups>(groups));
-}
-
-static
-void makeRoleInfixTriggers(const RoseBuildImpl &build, const build_context &bc,
-                           RoseVertex u, RoseProgram &program) {
-    const auto &g = build.g;
-
-    vector<RoseInstrTriggerInfix> infix_program;
-
-    for (const auto &e : out_edges_range(u, g)) {
-        RoseVertex v = target(e, g);
-        if (!g[v].left) {
-            continue;
-        }
-
-        assert(contains(bc.leftfix_info, v));
-        const left_build_info &lbi = bc.leftfix_info.at(v);
-        if (lbi.has_lookaround) {
-            continue;
-        }
-
-        const NFA *nfa = get_nfa_from_blob(bc, lbi.queue);
-
-        // DFAs have no TOP_N support, so they get a classic MQE_TOP event.
-        u32 top;
-        if (isContainerType(nfa->type)) {
-            auto tamaProto = g[v].left.tamarama.get();
-            assert(tamaProto);
-            top = MQE_TOP_FIRST + tamaProto->top_remap.at(
-                                      make_pair(g[v].index, g[e].rose_top));
-            assert(top < MQE_INVALID);
-        } else if (!isMultiTopType(nfa->type)) {
-            assert(num_tops(g[v].left) == 1);
-            top = MQE_TOP;
-        } else {
-            top = MQE_TOP_FIRST + g[e].rose_top;
-            assert(top < MQE_INVALID);
-        }
-
-        infix_program.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top);
-    }
-
-    if (infix_program.empty()) {
-        return;
-    }
-
-    // Order, de-dupe and add instructions to the end of program.
-    sort_and_unique(infix_program, [](const RoseInstrTriggerInfix &a,
-                                      const RoseInstrTriggerInfix &b) {
-        return tie(a.cancel, a.queue, a.event) <
-               tie(b.cancel, b.queue, b.event);
-    });
-    for (const auto &ri : infix_program) {
-        program.add_before_end(make_unique<RoseInstrTriggerInfix>(ri));
-    }
-}
-
-static
-void makeRoleSetState(const unordered_map<RoseVertex, u32> &roleStateIndices,
-                      RoseVertex v, RoseProgram &program) {
-    // We only need this instruction if a state index has been assigned to this
-    // vertex.
-    auto it = roleStateIndices.find(v);
-    if (it == end(roleStateIndices)) {
-        return;
-    }
-    program.add_before_end(make_unique<RoseInstrSetState>(it->second));
-}
-
-static
-void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v,
-                         const RoseEdge &e, RoseProgram &program) {
-    const RoseGraph &g = build.g;
-    const RoseVertex u = source(e, g);
-
-    // We know that we can trust the anchored table (DFA) to always deliver us
-    // literals at the correct offset.
-    if (build.isAnchored(v)) {
-        DEBUG_PRINTF("literal in anchored table, skipping bounds check\n");
-        return;
-    }
-
-    // Use the minimum literal length.
-    u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v));
-
-    u64a min_bound = g[e].minBound + lit_length;
-    u64a max_bound = g[e].maxBound == ROSE_BOUND_INF
-                         ? ROSE_BOUND_INF
-                         : g[e].maxBound + lit_length;
-
-    if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
-        assert(g[u].fixedOffset());
-        // Make offsets absolute.
-        min_bound += g[u].max_offset;
-        if (max_bound != ROSE_BOUND_INF) {
-            max_bound += g[u].max_offset;
-        }
-    }
-
-    assert(max_bound <= ROSE_BOUND_INF);
-    assert(min_bound <= max_bound);
-
-    // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET
-    // (max value of a u64a) to represent ROSE_BOUND_INF.
-    if (max_bound == ROSE_BOUND_INF) {
-        max_bound = MAX_OFFSET;
-    }
-
-    // This instruction should be doing _something_ -- bounds should be tighter
-    // than just {length, inf}.
-    assert(min_bound > lit_length || max_bound < MAX_OFFSET);
-
-    const auto *end_inst = program.end_instruction();
-    program.add_before_end(
-        make_unique<RoseInstrCheckBounds>(min_bound, max_bound, end_inst));
-}
-
-static
-void makeRoleCheckNotHandled(ProgramBuild &prog_build, RoseVertex v,
-                             RoseProgram &program) {
-    u32 handled_key;
-    if (contains(prog_build.handledKeys, v)) {
-        handled_key = prog_build.handledKeys.at(v);
-    } else {
-        handled_key = verify_u32(prog_build.handledKeys.size());
-        prog_build.handledKeys.emplace(v, handled_key);
-    }
-
-    const auto *end_inst = program.end_instruction();
-    auto ri = make_unique<RoseInstrCheckNotHandled>(handled_key, end_inst);
-    program.add_before_end(move(ri));
-}
-
-static
-void makeRoleEagerEodReports(const RoseBuildImpl &build, build_context &bc,
-                             RoseVertex v, RoseProgram &program) {
-    RoseProgram eod_program;
-
-    for (const auto &e : out_edges_range(v, build.g)) {
-        if (canEagerlyReportAtEod(build, e)) {
-            RoseProgram block;
-            makeRoleReports(build, bc, target(e, build.g), block);
-            eod_program.add_block(move(block));
-        }
-    }
-
-    if (eod_program.empty()) {
-        return;
-    }
-
-    if (!onlyAtEod(build, v)) {
-        // The rest of our program wasn't EOD anchored, so we need to guard
-        // these reports with a check.
-        const auto *end_inst = eod_program.end_instruction();
-        eod_program.insert(begin(eod_program),
-                           make_unique<RoseInstrCheckOnlyEod>(end_inst));
-    }
-
-    program.add_before_end(move(eod_program));
-}
-
-/* Makes a program for a role/vertex given a specfic pred/in_edge. */
-static
-RoseProgram makeRoleProgram(const RoseBuildImpl &build, build_context &bc,
-                            ProgramBuild &prog_build, const RoseEdge &e) {
-    const RoseGraph &g = build.g;
-    auto v = target(e, g);
-
-    RoseProgram program;
-
-    // First, add program instructions that enforce preconditions without
-    // effects.
-
-    if (onlyAtEod(build, v)) {
-        DEBUG_PRINTF("only at eod\n");
-        const auto *end_inst = program.end_instruction();
-        program.add_before_end(make_unique<RoseInstrCheckOnlyEod>(end_inst));
-    }
-
-    if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
-        makeRoleCheckBounds(build, v, e, program);
-    }
-
-    // This role program may be triggered by different predecessors, with
-    // different offset bounds. We must ensure we put this check/set operation
-    // after the bounds check to deal with this case.
-    if (in_degree(v, g) > 1) {
-        assert(!build.isRootSuccessor(v));
-        makeRoleCheckNotHandled(prog_build, v, program);
-    }
-
-    makeRoleLookaround(build, bc, v, program);
-    makeRoleCheckLeftfix(build, bc.leftfix_info, v, program);
-
-    // Next, we can add program instructions that have effects. This must be
-    // done as a series of blocks, as some of them (like reports) are
-    // escapable.
-
-    RoseProgram effects_block;
-
-    RoseProgram reports_block;
-    makeRoleReports(build, bc, v, reports_block);
-    effects_block.add_block(move(reports_block));
-
-    RoseProgram infix_block;
-    makeRoleInfixTriggers(build, bc, v, infix_block);
-    effects_block.add_block(move(infix_block));
-
-    // Note: SET_GROUPS instruction must be after infix triggers, as an infix
-    // going dead may switch off groups.
-    RoseProgram groups_block;
-    makeRoleGroups(build, prog_build, v, groups_block);
-    effects_block.add_block(move(groups_block));
-
-    RoseProgram suffix_block;
-    makeRoleSuffix(build, bc, v, suffix_block);
-    effects_block.add_block(move(suffix_block));
-
-    RoseProgram state_block;
-    makeRoleSetState(bc.roleStateIndices, v, state_block);
-    effects_block.add_block(move(state_block));
-
-    // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if
-    // the program doesn't have one already).
-    RoseProgram eod_block;
-    makeRoleEagerEodReports(build, bc, v, eod_block);
-    effects_block.add_block(move(eod_block));
-
-    /* a 'ghost role' may do nothing if we know that its groups are already set
-     * - in this case we can avoid producing a program at all. */
-    if (effects_block.empty()) {
-        return {};
-    }
-
-    program.add_before_end(move(effects_block));
-    return program;
-}
-
-static
-u32 writeBoundaryProgram(const RoseBuildImpl &build, build_context &bc,
-                         const set<ReportID> &reports) {
-    if (reports.empty()) {
-        return 0;
-    }
-
-    // Note: no CATCHUP instruction is necessary in the boundary case, as we
-    // should always be caught up (and may not even have the resources in
-    // scratch to support it).
-
-    const bool has_som = false;
-    RoseProgram program;
-    for (const auto &id : reports) {
-        makeReport(build, id, has_som, program);
-    }
-    return writeProgram(bc, move(program));
-}
-
-static
-void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc,
-                          const BoundaryReports &boundary,
-                          const DerivedBoundaryReports &dboundary,
-                          RoseBoundaryReports &out) {
-    DEBUG_PRINTF("report ^:  %zu\n", boundary.report_at_0.size());
-    DEBUG_PRINTF("report $:  %zu\n", boundary.report_at_eod.size());
-    DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size());
-
-    out.reportEodOffset =
-        writeBoundaryProgram(build, bc, boundary.report_at_eod);
-    out.reportZeroOffset =
-        writeBoundaryProgram(build, bc, boundary.report_at_0);
-    out.reportZeroEodOffset =
-        writeBoundaryProgram(build, bc, dboundary.report_at_0_eod_full);
-}
-
-static
-unordered_map<RoseVertex, u32> assignStateIndices(const RoseBuildImpl &build) {
-    const auto &g = build.g;
-
-    u32 state = 0;
-    unordered_map<RoseVertex, u32> roleStateIndices;
-    for (auto v : vertices_range(g)) {
-        // Virtual vertices (starts, EOD accept vertices) never need state
-        // indices.
-        if (build.isVirtualVertex(v)) {
-            continue;
-        }
-
-        // We only need a state index if we have successors that are not
-        // eagerly-reported EOD vertices.
-        bool needs_state_index = false;
-        for (const auto &e : out_edges_range(v, g)) {
-            if (!canEagerlyReportAtEod(build, e)) {
-                needs_state_index = true;
-                break;
-            }
-        }
-
-        if (!needs_state_index) {
-            continue;
-        }
-
-        /* TODO: also don't need a state index if all edges are nfa based */
-        roleStateIndices.emplace(v, state++);
-    }
-
-    DEBUG_PRINTF("assigned %u states (from %zu vertices)\n", state,
-                 num_vertices(g));
-
-    return roleStateIndices;
-}
-
-static
-bool hasUsefulStops(const left_build_info &build) {
-    for (u32 i = 0; i < N_CHARS; i++) {
-        if (build.stopAlphabet[i]) {
-            return true;
-        }
-    }
-    return false;
-}
-
-static
-void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
-                        const set<u32> &eager_queues,
-                        u32 leftfixBeginQueue, u32 leftfixCount,
-                        vector<LeftNfaInfo> &leftTable, u32 *laggedRoseCount,
-                        size_t *history) {
-    const RoseGraph &g = tbi.g;
-    const CompileContext &cc = tbi.cc;
-
-    ue2::unordered_set<u32> done_core;
-
-    leftTable.resize(leftfixCount);
-
-    u32 lagIndex = 0;
-
-    for (RoseVertex v : vertices_range(g)) {
-        if (!g[v].left) {
-            continue;
-        }
-        assert(contains(bc.leftfix_info, v));
-        const left_build_info &lbi = bc.leftfix_info.at(v);
-        if (lbi.has_lookaround) {
-            continue;
-        }
-
-        assert(lbi.queue >= leftfixBeginQueue);
-        u32 left_index = lbi.queue - leftfixBeginQueue;
-        assert(left_index < leftfixCount);
+        assert(lbi.queue >= leftfixBeginQueue);
+        u32 left_index = lbi.queue - leftfixBeginQueue;
+        assert(left_index < leftfixCount);
 
         /* seedy hack to make miracles more effective.
          *
@@ -4430,445 +2736,6 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
     *laggedRoseCount = lagIndex;
 }
 
-static
-void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block,
-                        RoseProgram &program) {
-    // Prepend an instruction to check the pred state is on.
-    const auto *end_inst = pred_block.end_instruction();
-    assert(!pred_block.empty());
-    pred_block.insert(begin(pred_block),
-                      make_unique<RoseInstrCheckState>(pred_state, end_inst));
-    program.add_block(move(pred_block));
-}
-
-static
-void addPredBlocksAny(map<u32, RoseProgram> &pred_blocks, u32 num_states,
-                      RoseProgram &program) {
-    RoseProgram sparse_program;
-
-    vector<u32> keys;
-    for (const u32 &key : pred_blocks | map_keys) {
-        keys.push_back(key);
-    }
-
-    const RoseInstruction *end_inst = sparse_program.end_instruction();
-    auto ri = make_unique<RoseInstrSparseIterAny>(num_states, keys, end_inst);
-    sparse_program.add_before_end(move(ri));
-
-    RoseProgram &block = pred_blocks.begin()->second;
-    assert(!block.empty());
-
-    /* we no longer need the check handled instruction as all the pred-role
-     * blocks are being collapsed together */
-    stripCheckHandledInstruction(block);
-
-    sparse_program.add_before_end(move(block));
-    program.add_block(move(sparse_program));
-}
-
-static
-void addPredBlocksMulti(map<u32, RoseProgram> &pred_blocks,
-                        u32 num_states, RoseProgram &program) {
-    assert(!pred_blocks.empty());
-
-    RoseProgram sparse_program;
-    const RoseInstruction *end_inst = sparse_program.end_instruction();
-    vector<pair<u32, const RoseInstruction *>> jump_table;
-
-    // BEGIN instruction.
-    auto ri_begin = make_unique<RoseInstrSparseIterBegin>(num_states, end_inst);
-    RoseInstrSparseIterBegin *begin_inst = ri_begin.get();
-    sparse_program.add_before_end(move(ri_begin));
-
-    // NEXT instructions, one per pred program.
-    u32 prev_key = pred_blocks.begin()->first;
-    for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) {
-        auto ri = make_unique<RoseInstrSparseIterNext>(prev_key, begin_inst,
-                                                       end_inst);
-        sparse_program.add_before_end(move(ri));
-        prev_key = it->first;
-    }
-
-    // Splice in each pred program after its BEGIN/NEXT.
-    auto out_it = begin(sparse_program);
-    for (auto &m : pred_blocks) {
-        u32 key = m.first;
-        RoseProgram &flat_prog = m.second;
-        assert(!flat_prog.empty());
-        const size_t block_len = flat_prog.size() - 1; // without INSTR_END.
-
-        assert(dynamic_cast<const RoseInstrSparseIterBegin *>(out_it->get()) ||
-               dynamic_cast<const RoseInstrSparseIterNext *>(out_it->get()));
-        out_it = sparse_program.insert(++out_it, move(flat_prog));
-
-        // Jump table target for this key is the beginning of the block we just
-        // spliced in.
-        jump_table.emplace_back(key, out_it->get());
-
-        assert(distance(begin(sparse_program), out_it) + block_len <=
-               sparse_program.size());
-        advance(out_it, block_len);
-    }
-
-    // Write the jump table back into the SPARSE_ITER_BEGIN instruction.
-    begin_inst->jump_table = move(jump_table);
-
-    program.add_block(move(sparse_program));
-}
-
-static
-void addPredBlocks(map<u32, RoseProgram> &pred_blocks, u32 num_states,
-                   RoseProgram &program) {
-    const size_t num_preds = pred_blocks.size();
-    if (num_preds == 0) {
-        return;
-    }
-
-    if (num_preds == 1) {
-        const auto head = pred_blocks.begin();
-        addPredBlockSingle(head->first, head->second, program);
-        return;
-    }
-
-    // First, see if all our blocks are equivalent, in which case we can
-    // collapse them down into one.
-    const auto &blocks = pred_blocks | map_values;
-    if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) {
-            return RoseProgramEquivalence()(*begin(blocks), block);
-        })) {
-        DEBUG_PRINTF("all blocks equiv\n");
-        addPredBlocksAny(pred_blocks, num_states, program);
-        return;
-    }
-
-    addPredBlocksMulti(pred_blocks, num_states, program);
-}
-
-static
-void makePushDelayedInstructions(const RoseBuildImpl &build,
-                                 ProgramBuild &prog_build, u32 lit_id,
-                                 RoseProgram &program) {
-    const auto &info = build.literal_info.at(lit_id);
-
-    vector<RoseInstrPushDelayed> delay_instructions;
-
-    for (const auto &delayed_lit_id : info.delayed_ids) {
-        DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id);
-        assert(contains(prog_build.delay_programs, delayed_lit_id));
-        u32 delay_id = prog_build.delay_programs.at(delayed_lit_id);
-        const auto &delay_lit = build.literals.right.at(delayed_lit_id);
-        delay_instructions.emplace_back(verify_u8(delay_lit.delay), delay_id);
-    }
-
-    sort_and_unique(delay_instructions, [](const RoseInstrPushDelayed &a,
-                                           const RoseInstrPushDelayed &b) {
-        return tie(a.delay, a.index) < tie(b.delay, b.index);
-    });
-
-    for (const auto &ri : delay_instructions) {
-        program.add_before_end(make_unique<RoseInstrPushDelayed>(ri));
-    }
-}
-
-static
-void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id,
-                               RoseProgram &program) {
-    const auto &info = build.literal_info.at(lit_id);
-    rose_group groups = info.group_mask;
-    if (!groups) {
-        return;
-    }
-    program.add_before_end(make_unique<RoseInstrCheckGroups>(groups));
-}
-
-static
-void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc,
-                                 u32 lit_id, RoseProgram &program) {
-    const auto &info = build.literal_info.at(lit_id);
-    if (!info.requires_benefits) {
-        return;
-    }
-
-    vector<LookEntry> look;
-
-    const ue2_literal &s = build.literals.right.at(lit_id).s;
-    DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id,
-                 dumpString(s).c_str());
-    assert(s.length() <= MAX_MASK2_WIDTH);
-    s32 i = 0 - s.length();
-    for (const auto &e : s) {
-        if (!e.nocase) {
-            look.emplace_back(verify_s8(i), e);
-        }
-        i++;
-    }
-
-    assert(!look.empty());
-    makeLookaroundInstruction(bc, look, program);
-}
-
-static
-void makeGroupSquashInstruction(const RoseBuildImpl &build,
-                                u32 lit_id,
-                                RoseProgram &program) {
-    const auto &info = build.literal_info.at(lit_id);
-    if (!info.squash_group) {
-        return;
-    }
-
-    rose_group groups = info.group_mask;
-    if (!groups) {
-        return;
-    }
-
-    DEBUG_PRINTF("squashes 0x%llx\n", groups);
-    program.add_before_end(
-        make_unique<RoseInstrSquashGroups>(~groups)); // Note negated.
-}
-
-static
-u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) {
-    const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
-    assert(!lit_vertices.empty());
-
-    u32 max_offset = 0;
-    for (const auto &v : lit_vertices) {
-        max_offset = max(max_offset, build.g[v].max_offset);
-    }
-
-    return max_offset;
-}
-
-static
-u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) {
-    const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
-    assert(!lit_vertices.empty());
-
-    u32 min_offset = UINT32_MAX;
-    for (const auto &v : lit_vertices) {
-        min_offset = min(min_offset, build.g[v].min_offset);
-    }
-
-    return min_offset;
-}
-
-static
-void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id,
-                                  const vector<RoseEdge> &lit_edges,
-                                  u32 floatingMinLiteralMatchOffset,
-                                  RoseProgram &program) {
-    if (lit_edges.empty()) {
-        return;
-    }
-
-    if (floatingMinLiteralMatchOffset == 0) {
-        return;
-    }
-
-    RoseVertex v = target(lit_edges.front(), build.g);
-    if (!build.isFloating(v)) {
-        return;
-    }
-
-    const auto &lit = build.literals.right.at(lit_id);
-    size_t min_len = lit.elength();
-    u32 min_offset = findMinOffset(build, lit_id);
-    DEBUG_PRINTF("has min_len=%zu, min_offset=%u, global min is %u\n", min_len,
-                 min_offset, floatingMinLiteralMatchOffset);
-
-    // If we can't match before the min offset, we don't need the check.
-    if (min_len >= floatingMinLiteralMatchOffset) {
-        DEBUG_PRINTF("no need for check, min is %u\n",
-                     floatingMinLiteralMatchOffset);
-        return;
-    }
-
-    assert(min_offset >= floatingMinLiteralMatchOffset);
-    assert(min_offset < UINT32_MAX);
-
-    DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset);
-    const auto *end_inst = program.end_instruction();
-    program.add_before_end(
-        make_unique<RoseInstrCheckLitEarly>(min_offset, end_inst));
-}
-
-static
-void makeCheckLiteralInstruction(const RoseBuildImpl &build,  u32 lit_id,
-                                 size_t longLitLengthThreshold,
-                                 RoseProgram &program) {
-    assert(longLitLengthThreshold > 0);
-
-    DEBUG_PRINTF("lit_id=%u, long lit threshold %zu\n", lit_id,
-                 longLitLengthThreshold);
-
-    if (build.isDelayed(lit_id)) {
-        return;
-    }
-
-    const rose_literal_id &lit = build.literals.right.at(lit_id);
-
-    if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) {
-        DEBUG_PRINTF("lit short enough to not need confirm\n");
-        return;
-    }
-
-    // Check resource limits as well.
-    if (lit.s.length() > build.cc.grey.limitLiteralLength) {
-        throw ResourceLimitError();
-    }
-
-    if (lit.s.length() <= longLitLengthThreshold) {
-        DEBUG_PRINTF("is a medium-length literal\n");
-        const auto *end_inst = program.end_instruction();
-        unique_ptr<RoseInstruction> ri;
-        if (lit.s.any_nocase()) {
-            ri = make_unique<RoseInstrCheckMedLitNocase>(lit.s.get_string(),
-                                                         end_inst);
-        } else {
-            ri = make_unique<RoseInstrCheckMedLit>(lit.s.get_string(),
-                                                   end_inst);
-        }
-        program.add_before_end(move(ri));
-        return;
-    }
-
-    // Long literal support should only really be used for the floating table
-    // in streaming mode.
-    assert(lit.table == ROSE_FLOATING && build.cc.streaming);
-
-    DEBUG_PRINTF("is a long literal\n");
-
-    const auto *end_inst = program.end_instruction();
-    unique_ptr<RoseInstruction> ri;
-    if (lit.s.any_nocase()) {
-        ri = make_unique<RoseInstrCheckLongLitNocase>(lit.s.get_string(),
-                                                      end_inst);
-    } else {
-        ri = make_unique<RoseInstrCheckLongLit>(lit.s.get_string(), end_inst);
-    }
-    program.add_before_end(move(ri));
-}
-
-static
-bool hasDelayedLiteral(const RoseBuildImpl &build,
-                       const vector<RoseEdge> &lit_edges) {
-    auto is_delayed = bind(&RoseBuildImpl::isDelayed, &build, _1);
-    for (const auto &e : lit_edges) {
-        auto v = target(e, build.g);
-        const auto &lits = build.g[v].literals;
-        if (any_of(begin(lits), end(lits), is_delayed)) {
-            return true;
-        }
-    }
-    return false;
-}
-
-static
-RoseProgram makeLitInitialProgram(const RoseBuildImpl &build,
-                                  build_context &bc, ProgramBuild &prog_build,
-                                  u32 lit_id, const vector<RoseEdge> &lit_edges,
-                                  bool is_anchored_replay_program) {
-    RoseProgram program;
-
-    // Check long literal info.
-    makeCheckLiteralInstruction(build, lit_id, bc.longLitLengthThreshold,
-                                program);
-
-    // Check lit mask.
-    makeCheckLitMaskInstruction(build, bc, lit_id, program);
-
-    // Check literal groups. This is an optimisation that we only perform for
-    // delayed literals, as their groups may be switched off; ordinarily, we
-    // can trust the HWLM matcher.
-    if (hasDelayedLiteral(build, lit_edges)) {
-        makeGroupCheckInstruction(build, lit_id, program);
-    }
-
-    // Add instructions for pushing delayed matches, if there are any.
-    makePushDelayedInstructions(build, prog_build, lit_id, program);
-
-    // Add pre-check for early literals in the floating table.
-    makeCheckLitEarlyInstruction(build, lit_id, lit_edges,
-                                 prog_build.floatingMinLiteralMatchOffset,
-                                 program);
-
-    /* Check if we are able to deliever matches from the anchored table now */
-    if (!is_anchored_replay_program) {
-        makeAnchoredLiteralDelay(build, prog_build, lit_id, program);
-    }
-
-    return program;
-}
-
-static
-RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc,
-                               ProgramBuild &prog_build, u32 lit_id,
-                               const vector<RoseEdge> &lit_edges,
-                               bool is_anchored_replay_program) {
-    const auto &g = build.g;
-
-    DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size());
-
-    // Construct initial program up front, as its early checks must be able
-    // to jump to end and terminate processing for this literal.
-    auto lit_program = makeLitInitialProgram(build, bc, prog_build, lit_id,
-                                             lit_edges,
-                                             is_anchored_replay_program);
-
-    RoseProgram role_programs;
-
-    // Predecessor state id -> program block.
-    map<u32, RoseProgram> pred_blocks;
-
-    // Construct sparse iter sub-programs.
-    for (const auto &e : lit_edges) {
-        const auto &u = source(e, g);
-        if (build.isAnyStart(u)) {
-            continue; // Root roles are not handled with sparse iterator.
-        }
-        DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].index,
-                     g[target(e, g)].index);
-        assert(contains(bc.roleStateIndices, u));
-        u32 pred_state = bc.roleStateIndices.at(u);
-        auto role_prog = makeRoleProgram(build, bc, prog_build, e);
-        if (!role_prog.empty()) {
-            pred_blocks[pred_state].add_block(move(role_prog));
-        }
-    }
-
-    // Add blocks to deal with non-root edges (triggered by sparse iterator or
-    // mmbit_isset checks).
-    addPredBlocks(pred_blocks, bc.roleStateIndices.size(), role_programs);
-
-    // Add blocks to handle root roles.
-    for (const auto &e : lit_edges) {
-        const auto &u = source(e, g);
-        if (!build.isAnyStart(u)) {
-            continue;
-        }
-        DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index,
-                     g[target(e, g)].index);
-        role_programs.add_block(makeRoleProgram(build, bc, prog_build, e));
-    }
-
-    if (lit_id == build.eod_event_literal_id) {
-        /* Note: does not require the lit intial program */
-        assert(build.eod_event_literal_id != MO_INVALID_IDX);
-        return role_programs;
-    }
-
-    /* Instructions to run even if a role program bails out */
-    RoseProgram unconditional_block;
-
-    // Literal may squash groups.
-    makeGroupSquashInstruction(build, lit_id, unconditional_block);
-
-    role_programs.add_block(move(unconditional_block));
-    lit_program.add_before_end(move(role_programs));
-
-    return lit_program;
-}
-
 static
 RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc,
                                ProgramBuild &prog_build, u32 lit_id,
@@ -4885,49 +2752,10 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc,
         edges_ptr = &no_edges;
     }
 
-    return makeLiteralProgram(build, bc, prog_build, lit_id, *edges_ptr,
-                              is_anchored_replay_program);
-}
-
-/**
- * \brief Consumes list of program blocks corresponding to different literals,
- * checks them for duplicates and then concatenates them into one program.
- *
- * Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is
- * inserted to prevent the work_done flag being contaminated by early blocks.
- */
-static
-RoseProgram assembleProgramBlocks(vector<RoseProgram> &&blocks) {
-    RoseProgram program;
-
-    DEBUG_PRINTF("%zu blocks before dedupe\n", blocks.size());
-
-    sort(blocks.begin(), blocks.end(),
-         [](const RoseProgram &a, const RoseProgram &b) {
-             RoseProgramHash hasher;
-             return hasher(a) < hasher(b);
-         });
-
-    blocks.erase(unique(blocks.begin(), blocks.end(), RoseProgramEquivalence()),
-                 blocks.end());
-
-    DEBUG_PRINTF("%zu blocks after dedupe\n", blocks.size());
-
-    for (auto &block : blocks) {
-        /* If we have multiple blocks from different literals and any of them
-         * squash groups, we will have to add a CLEAR_WORK_DONE instruction to
-         * each literal program block to clear the work_done flags so that it's
-         * only set if a state has been. */
-        if (!program.empty() && reads_work_done_flag(block)) {
-            RoseProgram clear_block;
-            clear_block.add_before_end(make_unique<RoseInstrClearWorkDone>());
-            program.add_block(move(clear_block));
-        }
-
-        program.add_block(move(block));
-    }
-
-    return program;
+    return makeLiteralProgram(build, bc.leftfix_info, bc.suffixes,
+                              bc.engine_info_by_queue, bc.lookarounds,
+                              bc.roleStateIndices, prog_build, lit_id,
+                              *edges_ptr, is_anchored_replay_program);
 }
 
 static
@@ -4947,38 +2775,6 @@ RoseProgram makeFragmentProgram(const RoseBuildImpl &build, build_context &bc,
     return assembleProgramBlocks(move(blocks));
 }
 
-static
-u32 writeDelayRebuildProgram(const RoseBuildImpl &build, build_context &bc,
-                             ProgramBuild &prog_build,
-                             const vector<u32> &lit_ids) {
-    assert(!lit_ids.empty());
-
-    if (!build.cc.streaming) {
-        return 0; // We only do delayed rebuild in streaming mode.
-    }
-
-    vector<RoseProgram> blocks;
-
-    for (const auto &lit_id : lit_ids) {
-        DEBUG_PRINTF("lit_id=%u\n", lit_id);
-        const auto &info = build.literal_info.at(lit_id);
-        if (info.delayed_ids.empty()) {
-            continue; // No delayed IDs, no work to do.
-        }
-
-        RoseProgram prog;
-        makeCheckLiteralInstruction(build, lit_id, bc.longLitLengthThreshold,
-                                    prog);
-        makeCheckLitMaskInstruction(build, bc, lit_id, prog);
-        makePushDelayedInstructions(build, prog_build, lit_id, prog);
-        blocks.push_back(move(prog));
-    }
-
-    auto program = assembleProgramBlocks(move(blocks));
-
-    return writeProgram(bc, move(program));
-}
-
 /**
  * \brief Returns a map from literal ID to a list of edges leading into
  * vertices with that literal ID.
@@ -5118,8 +2914,15 @@ void buildLiteralPrograms(const RoseBuildImpl &build,
                                             lit_edge_map);
         frag.lit_program_offset = writeProgram(bc, move(lit_prog));
 
-        frag.delay_program_offset
-            = writeDelayRebuildProgram(build, bc, prog_build, frag.lit_ids);
+        // We only do delayed rebuild in streaming mode.
+        if (!build.cc.streaming) {
+            continue;
+        }
+
+        auto rebuild_prog = makeDelayRebuildProgram(build,
+                                                    bc.lookarounds, prog_build,
+                                                    frag.lit_ids);
+        frag.delay_program_offset = writeProgram(bc, move(rebuild_prog));
     }
 }
 
@@ -5267,10 +3070,7 @@ pair<u32, u32> buildReportPrograms(const RoseBuildImpl &build,
     programs.reserve(reports.size());
 
     for (ReportID id : reports) {
-        RoseProgram program;
-        const bool has_som = false;
-        makeCatchupMpv(build, bc.needs_mpv_catchup, id, program);
-        makeReport(build, id, has_som, program);
+        auto program = makeReportProgram(build, bc.needs_mpv_catchup, id);
         u32 offset = writeProgram(bc, move(program));
         programs.push_back(offset);
         build.rm.setProgramOffset(id, offset);
@@ -5283,38 +3083,6 @@ pair<u32, u32> buildReportPrograms(const RoseBuildImpl &build,
     return {offset, count};
 }
 
-static
-RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build,
-                                 bool needs_catchup,
-                                 ProgramBuild &prog_build, const RoseEdge &e,
-                                 const bool multiple_preds) {
-    const RoseGraph &g = build.g;
-    const RoseVertex v = target(e, g);
-
-    RoseProgram program;
-
-    if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
-        makeRoleCheckBounds(build, v, e, program);
-    }
-
-    if (multiple_preds) {
-        // Only necessary when there is more than one pred.
-        makeRoleCheckNotHandled(prog_build, v, program);
-    }
-
-    const auto &reports = g[v].reports;
-    makeCatchup(build, needs_catchup, reports, program);
-
-    const bool has_som = false;
-    RoseProgram report_block;
-    for (const auto &id : reports) {
-        makeReport(build, id, has_som, report_block);
-    }
-    program.add_before_end(move(report_block));
-
-    return program;
-}
-
 static
 bool hasEodAnchoredSuffix(const RoseBuildImpl &build) {
     const RoseGraph &g = build.g;
@@ -5366,7 +3134,8 @@ void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc,
                 continue;
             }
             if (canEagerlyReportAtEod(build, e)) {
-                DEBUG_PRINTF("already done report for vertex %zu\n", g[u].index);
+                DEBUG_PRINTF("already done report for vertex %zu\n",
+                             g[u].index);
                 continue;
             }
             edge_list.push_back(e);
@@ -5378,8 +3147,7 @@ void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc,
             assert(contains(bc.roleStateIndices, u));
             u32 pred_state = bc.roleStateIndices.at(u);
             pred_blocks[pred_state].add_block(
-                makeEodAnchorProgram(build, bc.needs_catchup, prog_build, e,
-                                     multiple_preds));
+                makeEodAnchorProgram(build, prog_build, e, multiple_preds));
         }
     }
 
@@ -5414,58 +3182,31 @@ void addEodEventProgram(const RoseBuildImpl &build, build_context &bc,
                     tie(g[source(b, g)].index, g[target(b, g)].index);
          });
 
-    auto block = makeLiteralProgram(build, bc, prog_build,
+    auto block = makeLiteralProgram(build, bc.leftfix_info, bc.suffixes,
+                                    bc.engine_info_by_queue, bc.lookarounds,
+                                    bc.roleStateIndices, prog_build,
                                     build.eod_event_literal_id, edge_list,
                                     false);
     program.add_block(move(block));
 }
 
 static
-void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) {
-    if (!eodNfaIterOffset) {
-        return;
-    }
-
-    RoseProgram block;
-    block.add_before_end(make_unique<RoseInstrEnginesEod>(eodNfaIterOffset));
-    program.add_block(move(block));
-}
-
-static
-void addSuffixesEodProgram(const RoseBuildImpl &build, RoseProgram &program) {
-    if (!hasEodAnchoredSuffix(build)) {
-        return;
-    }
-
-    RoseProgram block;
-    block.add_before_end(make_unique<RoseInstrSuffixesEod>());
-    program.add_block(move(block));
-}
-
-static
-void addMatcherEodProgram(const RoseBuildImpl &build, RoseProgram &program) {
-    if (!hasEodMatcher(build)) {
-        return;
-    }
-
-    RoseProgram block;
-    block.add_before_end(make_unique<RoseInstrMatcherEod>());
-    program.add_block(move(block));
-}
-
-static
-u32 writeEodProgram(const RoseBuildImpl &build, build_context &bc,
-                    ProgramBuild &prog_build, u32 eodNfaIterOffset) {
+RoseProgram makeEodProgram(const RoseBuildImpl &build, build_context &bc,
+                           ProgramBuild &prog_build, u32 eodNfaIterOffset) {
     RoseProgram program;
 
     addEodEventProgram(build, bc, prog_build, program);
     addEnginesEodProgram(eodNfaIterOffset, program);
     addEodAnchorProgram(build, bc, prog_build, false, program);
-    addMatcherEodProgram(build, program);
+    if (hasEodMatcher(build)) {
+        addMatcherEodProgram(program);
+    }
     addEodAnchorProgram(build, bc, prog_build, true, program);
-    addSuffixesEodProgram(build, program);
+    if (hasEodAnchoredSuffix(build)) {
+        addSuffixesEodProgram(program);
+    }
 
-    return writeProgram(bc, move(program));
+    return program;
 }
 
 static
@@ -5737,8 +3478,6 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     build_context bc;
     u32 floatingMinLiteralMatchOffset
         = findMinFloatingLiteralMatch(*this, anchored_dfas);
-    bc.longLitLengthThreshold = longLitLengthThreshold;
-    bc.needs_catchup = needsCatchup(*this);
     recordResources(bc.resources, *this, fragments);
     if (!anchored_dfas.empty()) {
         bc.resources.has_anchored = true;
@@ -5791,7 +3530,8 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
                        &laggedRoseCount, &historyRequired);
 
     // Information only needed for program construction.
-    ProgramBuild prog_build(floatingMinLiteralMatchOffset);
+    ProgramBuild prog_build(floatingMinLiteralMatchOffset,
+                            longLitLengthThreshold, needsCatchup(*this));
     prog_build.vertex_group_map = getVertexGroupMap(*this);
     prog_build.squashable_groups = getSquashableGroups(*this);
 
@@ -5803,13 +3543,14 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
 
     buildLiteralPrograms(*this, fragments, bc, prog_build);
 
-    proto.eodProgramOffset =
-        writeEodProgram(*this, bc, prog_build, eodNfaIterOffset);
+    auto eod_prog = makeEodProgram(*this, bc, prog_build, eodNfaIterOffset);
+    proto.eodProgramOffset = writeProgram(bc, move(eod_prog));
 
     size_t longLitStreamStateRequired = 0;
-    proto.longLitTableOffset = buildLongLiteralTable(*this, bc.engine_blob,
-                bc.longLiterals, longLitLengthThreshold, &historyRequired,
-                &longLitStreamStateRequired);
+    proto.longLitTableOffset
+        = buildLongLiteralTable(*this, bc.engine_blob, bc.longLiterals,
+                                longLitLengthThreshold, &historyRequired,
+                                &longLitStreamStateRequired);
 
     proto.lastByteHistoryIterOffset = buildLastByteIter(g, bc);
     proto.eagerIterOffset = writeEagerQueueIter(
@@ -5817,7 +3558,7 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
 
     addSomRevNfas(bc, proto, ssm);
 
-    writeLookaroundTables(bc, proto);
+    writeLookaroundTables(bc.lookarounds, bc.engine_blob, proto);
     writeDkeyInfo(rm, bc.engine_blob, proto);
     writeLeftInfo(bc.engine_blob, proto, leftInfoTable);
 
@@ -5829,8 +3570,8 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
 
     // Build floating HWLM matcher.
     rose_group fgroups = 0;
-    auto ftable = buildFloatingMatcher(*this, fragments,
-                        bc.longLitLengthThreshold, &fgroups, &historyRequired);
+    auto ftable = buildFloatingMatcher(*this, fragments, longLitLengthThreshold,
+                                       &fgroups, &historyRequired);
     if (ftable) {
         proto.fmatcherOffset = bc.engine_blob.add(ftable);
         bc.resources.has_floating = true;
@@ -5838,7 +3579,7 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
 
     // Build delay rebuild HWLM matcher.
     auto drtable = buildDelayRebuildMatcher(*this, fragments,
-                                            bc.longLitLengthThreshold);
+                                            longLitLengthThreshold);
     if (drtable) {
         proto.drmatcherOffset = bc.engine_blob.add(drtable);
     }
index 21db7a8ee3175b3970ba9e6e49a247042b9d18df..b920e922da33a1870442882cd6736f8fb1ccd5a6 100644 (file)
@@ -622,6 +622,11 @@ u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm);
 void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
                           std::vector<u8> &cmp);
 
+u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id);
+u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id);
+
+bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e);
+
 #ifndef NDEBUG
 bool canImplementGraphs(const RoseBuildImpl &tbi);
 #endif
index 44044cb96c067b30fa9b7980a20a27ee7f834225..51a6ea85c2678c5da7b30fe6ea7fe68a3e95c2a8 100644 (file)
@@ -909,6 +909,59 @@ u32 roseQuality(const RoseEngine *t) {
     return 1;
 }
 
+u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) {
+    const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
+    assert(!lit_vertices.empty());
+
+    u32 min_offset = UINT32_MAX;
+    for (const auto &v : lit_vertices) {
+        min_offset = min(min_offset, build.g[v].min_offset);
+    }
+
+    return min_offset;
+}
+
+u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) {
+    const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
+    assert(!lit_vertices.empty());
+
+    u32 max_offset = 0;
+    for (const auto &v : lit_vertices) {
+        max_offset = max(max_offset, build.g[v].max_offset);
+    }
+
+    return max_offset;
+}
+
+bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) {
+    const auto &g = build.g;
+    const auto v = target(e, g);
+
+    if (!build.g[v].eod_accept) {
+        return false;
+    }
+
+    // If there's a graph between us and EOD, we shouldn't be eager.
+    if (build.g[v].left) {
+        return false;
+    }
+
+    // Must be exactly at EOD.
+    if (g[e].minBound != 0 || g[e].maxBound != 0) {
+        return false;
+    }
+
+    // In streaming mode, we can only eagerly report EOD for literals in the
+    // EOD-anchored table, as that's the only time we actually know where EOD
+    // is. In block mode, we always have this information.
+    const auto u = source(e, g);
+    if (build.cc.streaming && !build.isInETable(u)) {
+        return false;
+    }
+
+    return true;
+}
+
 #ifndef NDEBUG
 /** \brief Returns true if all the graphs (NFA, DFA, Haig, etc) in this Rose
  * graph are implementable. */
index c319eed287df3db0fa89247d8dc1d394c9a66913..8d0306ae75bc2321ad7c239845a32fa068effafa 100644 (file)
 #include "rose_build_program.h"
 
 #include "rose_build_instructions.h"
+#include "rose_build_lookaround.h"
+#include "rose_build_resources.h"
+#include "nfa/nfa_api_queue.h"
+#include "nfa/nfa_build_util.h"
+#include "nfa/tamaramacompile.h"
+#include "nfagraph/ng_util.h"
+#include "util/charreach_util.h"
+#include "util/container.h"
+#include "util/compile_context.h"
+#include "util/compile_error.h"
+#include "util/dump_charclass.h"
+#include "util/report_manager.h"
+#include "util/verify_types.h"
+
+#include <boost/range/adaptor/map.hpp>
 
 #include <algorithm>
 #include <cstring>
 
 using namespace std;
+using boost::adaptors::map_values;
+using boost::adaptors::map_keys;
 
 namespace ue2 {
 
+engine_info::engine_info(const NFA *nfa, bool trans)
+    : type((NFAEngineType)nfa->type), accepts_eod(nfaAcceptsEod(nfa)),
+      stream_size(nfa->streamStateSize),
+      scratch_size(nfa->scratchStateSize),
+      scratch_align(state_alignment(*nfa)),
+      transient(trans) {
+    assert(scratch_align);
+}
+
+left_build_info::left_build_info(u32 q, u32 l, u32 t, rose_group sm,
+                                 const std::vector<u8> &stops, u32 max_ql,
+                                 u8 cm_count, const CharReach &cm_cr)
+    : queue(q), lag(l), transient(t), squash_mask(sm), stopAlphabet(stops),
+      max_queuelen(max_ql), countingMiracleCount(cm_count),
+      countingMiracleReach(cm_cr) {
+}
+
+left_build_info::left_build_info(const vector<vector<LookEntry>> &looks)
+    : has_lookaround(true), lookaround(looks) {
+}
+
 using OffsetMap = RoseInstruction::OffsetMap;
 
 static
@@ -216,6 +254,8 @@ bool RoseProgramEquivalence::operator()(const RoseProgram &prog1,
     return std::equal(prog1.begin(), prog1.end(), prog2.begin(), is_equiv);
 }
 
+/* Removes any CHECK_HANDLED instructions from the given program */
+static
 void stripCheckHandledInstruction(RoseProgram &prog) {
     for (auto it = prog.begin(); it != prog.end();) {
         auto ins = dynamic_cast<const RoseInstrCheckNotHandled *>(it->get());
@@ -238,6 +278,9 @@ void stripCheckHandledInstruction(RoseProgram &prog) {
     }
 }
 
+
+/** Returns true if the program may read the the interpreter's work_done flag */
+static
 bool reads_work_done_flag(const RoseProgram &prog) {
     for (const auto &ri : prog) {
         if (dynamic_cast<const RoseInstrSquashGroups *>(ri.get())) {
@@ -247,4 +290,2111 @@ bool reads_work_done_flag(const RoseProgram &prog) {
     return false;
 }
 
+void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) {
+    if (!eodNfaIterOffset) {
+        return;
+    }
+
+    RoseProgram block;
+    block.add_before_end(make_unique<RoseInstrEnginesEod>(eodNfaIterOffset));
+    program.add_block(move(block));
+}
+
+void addSuffixesEodProgram(RoseProgram &program) {
+    RoseProgram block;
+    block.add_before_end(make_unique<RoseInstrSuffixesEod>());
+    program.add_block(move(block));
+}
+
+void addMatcherEodProgram(RoseProgram &program) {
+    RoseProgram block;
+    block.add_before_end(make_unique<RoseInstrMatcherEod>());
+    program.add_block(move(block));
+}
+
+static
+void makeRoleCheckLeftfix(const RoseBuildImpl &build,
+                          const map<RoseVertex, left_build_info> &leftfix_info,
+                          RoseVertex v, RoseProgram &program) {
+    auto it = leftfix_info.find(v);
+    if (it == end(leftfix_info)) {
+        return;
+    }
+    const left_build_info &lni = it->second;
+    if (lni.has_lookaround) {
+        return; // Leftfix completely implemented by lookaround.
+    }
+
+    assert(!build.cc.streaming ||
+           build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG);
+
+    bool is_prefix = build.isRootSuccessor(v);
+    const auto *end_inst = program.end_instruction();
+
+    unique_ptr<RoseInstruction> ri;
+    if (is_prefix) {
+        ri = make_unique<RoseInstrCheckPrefix>(lni.queue, build.g[v].left.lag,
+                                               build.g[v].left.leftfix_report,
+                                               end_inst);
+    } else {
+        ri = make_unique<RoseInstrCheckInfix>(lni.queue, build.g[v].left.lag,
+                                              build.g[v].left.leftfix_report,
+                                              end_inst);
+    }
+    program.add_before_end(move(ri));
+}
+
+static
+void makeAnchoredLiteralDelay(const RoseBuildImpl &build,
+                              const ProgramBuild &prog_build, u32 lit_id,
+                              RoseProgram &program) {
+    // Only relevant for literals in the anchored table.
+    const rose_literal_id &lit = build.literals.right.at(lit_id);
+    if (lit.table != ROSE_ANCHORED) {
+        return;
+    }
+
+    // If this literal match cannot occur after floatingMinLiteralMatchOffset,
+    // we do not need this check.
+    bool all_too_early = true;
+    rose_group groups = 0;
+
+    const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
+    for (RoseVertex v : lit_vertices) {
+         if (build.g[v].max_offset > prog_build.floatingMinLiteralMatchOffset) {
+             all_too_early = false;
+         }
+         groups |= build.g[v].groups;
+    }
+
+    if (all_too_early) {
+        return;
+    }
+
+    assert(contains(prog_build.anchored_programs, lit_id));
+    u32 anch_id = prog_build.anchored_programs.at(lit_id);
+
+    const auto *end_inst = program.end_instruction();
+    auto ri = make_unique<RoseInstrAnchoredDelay>(groups, anch_id, end_inst);
+    program.add_before_end(move(ri));
+}
+
+static
+void makeDedupe(const ReportManager &rm, const Report &report,
+                RoseProgram &program) {
+    const auto *end_inst = program.end_instruction();
+    auto ri =
+        make_unique<RoseInstrDedupe>(report.quashSom, rm.getDkey(report),
+                                     report.offsetAdjust, end_inst);
+    program.add_before_end(move(ri));
+}
+
+static
+void makeDedupeSom(const ReportManager &rm, const Report &report,
+                   RoseProgram &program) {
+    const auto *end_inst = program.end_instruction();
+    auto ri = make_unique<RoseInstrDedupeSom>(report.quashSom,
+                                              rm.getDkey(report),
+                                              report.offsetAdjust, end_inst);
+    program.add_before_end(move(ri));
+}
+
+static
+void makeCatchup(const ReportManager &rm, bool needs_catchup,
+                 const flat_set<ReportID> &reports, RoseProgram &program) {
+    if (!needs_catchup) {
+        return;
+    }
+
+    // Everything except the INTERNAL_ROSE_CHAIN report needs catchup to run
+    // before reports are triggered.
+
+    auto report_needs_catchup = [&](const ReportID &id) {
+        const Report &report = rm.getReport(id);
+        return report.type != INTERNAL_ROSE_CHAIN;
+    };
+
+    if (!any_of(begin(reports), end(reports), report_needs_catchup)) {
+        DEBUG_PRINTF("none of the given reports needs catchup\n");
+        return;
+    }
+
+    program.add_before_end(make_unique<RoseInstrCatchUp>());
+}
+
+static
+void writeSomOperation(const Report &report, som_operation *op) {
+    assert(op);
+
+    memset(op, 0, sizeof(*op));
+
+    switch (report.type) {
+    case EXTERNAL_CALLBACK_SOM_REL:
+        op->type = SOM_EXTERNAL_CALLBACK_REL;
+        break;
+    case INTERNAL_SOM_LOC_SET:
+        op->type = SOM_INTERNAL_LOC_SET;
+        break;
+    case INTERNAL_SOM_LOC_SET_IF_UNSET:
+        op->type = SOM_INTERNAL_LOC_SET_IF_UNSET;
+        break;
+    case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
+        op->type = SOM_INTERNAL_LOC_SET_IF_WRITABLE;
+        break;
+    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
+        op->type = SOM_INTERNAL_LOC_SET_REV_NFA;
+        break;
+    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
+        op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET;
+        break;
+    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
+        op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE;
+        break;
+    case INTERNAL_SOM_LOC_COPY:
+        op->type = SOM_INTERNAL_LOC_COPY;
+        break;
+    case INTERNAL_SOM_LOC_COPY_IF_WRITABLE:
+        op->type = SOM_INTERNAL_LOC_COPY_IF_WRITABLE;
+        break;
+    case INTERNAL_SOM_LOC_MAKE_WRITABLE:
+        op->type = SOM_INTERNAL_LOC_MAKE_WRITABLE;
+        break;
+    case EXTERNAL_CALLBACK_SOM_STORED:
+        op->type = SOM_EXTERNAL_CALLBACK_STORED;
+        break;
+    case EXTERNAL_CALLBACK_SOM_ABS:
+        op->type = SOM_EXTERNAL_CALLBACK_ABS;
+        break;
+    case EXTERNAL_CALLBACK_SOM_REV_NFA:
+        op->type = SOM_EXTERNAL_CALLBACK_REV_NFA;
+        break;
+    case INTERNAL_SOM_LOC_SET_FROM:
+        op->type = SOM_INTERNAL_LOC_SET_FROM;
+        break;
+    case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE:
+        op->type = SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE;
+        break;
+    default:
+        // This report doesn't correspond to a SOM operation.
+        assert(0);
+        throw CompileError("Unable to generate bytecode.");
+    }
+
+    op->onmatch = report.onmatch;
+
+    switch (report.type) {
+    case EXTERNAL_CALLBACK_SOM_REV_NFA:
+    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
+    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
+    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
+        op->aux.revNfaIndex = report.revNfaIndex;
+        break;
+    default:
+        op->aux.somDistance = report.somDistance;
+        break;
+    }
+}
+
+static
+void makeReport(const RoseBuildImpl &build, const ReportID id,
+                const bool has_som, RoseProgram &program) {
+    assert(id < build.rm.numReports());
+    const Report &report = build.rm.getReport(id);
+
+    RoseProgram report_block;
+    const RoseInstruction *end_inst = report_block.end_instruction();
+
+    // Handle min/max offset checks.
+    if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) {
+        auto ri = make_unique<RoseInstrCheckBounds>(report.minOffset,
+                                                    report.maxOffset, end_inst);
+        report_block.add_before_end(move(ri));
+    }
+
+    // If this report has an exhaustion key, we can check it in the program
+    // rather than waiting until we're in the callback adaptor.
+    if (report.ekey != INVALID_EKEY) {
+        auto ri = make_unique<RoseInstrCheckExhausted>(report.ekey, end_inst);
+        report_block.add_before_end(move(ri));
+    }
+
+    // External SOM reports that aren't passthrough need their SOM value
+    // calculated.
+    if (isExternalSomReport(report) &&
+        report.type != EXTERNAL_CALLBACK_SOM_PASS) {
+        auto ri = make_unique<RoseInstrSomFromReport>();
+        writeSomOperation(report, &ri->som);
+        report_block.add_before_end(move(ri));
+    }
+
+    // Min length constraint.
+    if (report.minLength > 0) {
+        assert(build.hasSom);
+        auto ri = make_unique<RoseInstrCheckMinLength>(
+            report.offsetAdjust, report.minLength, end_inst);
+        report_block.add_before_end(move(ri));
+    }
+
+    if (report.quashSom) {
+        report_block.add_before_end(make_unique<RoseInstrSomZero>());
+    }
+
+    switch (report.type) {
+    case EXTERNAL_CALLBACK:
+        if (!has_som) {
+            // Dedupe is only necessary if this report has a dkey, or if there
+            // are SOM reports to catch up.
+            bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom;
+            if (report.ekey == INVALID_EKEY) {
+                if (needs_dedupe) {
+                    report_block.add_before_end(
+                        make_unique<RoseInstrDedupeAndReport>(
+                            report.quashSom, build.rm.getDkey(report),
+                            report.onmatch, report.offsetAdjust, end_inst));
+                } else {
+                    report_block.add_before_end(make_unique<RoseInstrReport>(
+                        report.onmatch, report.offsetAdjust));
+                }
+            } else {
+                if (needs_dedupe) {
+                    makeDedupe(build.rm, report, report_block);
+                }
+                report_block.add_before_end(make_unique<RoseInstrReportExhaust>(
+                    report.onmatch, report.offsetAdjust, report.ekey));
+            }
+        } else { // has_som
+            makeDedupeSom(build.rm, report, report_block);
+            if (report.ekey == INVALID_EKEY) {
+                report_block.add_before_end(make_unique<RoseInstrReportSom>(
+                    report.onmatch, report.offsetAdjust));
+            } else {
+                report_block.add_before_end(
+                    make_unique<RoseInstrReportSomExhaust>(
+                        report.onmatch, report.offsetAdjust, report.ekey));
+            }
+        }
+        break;
+    case INTERNAL_SOM_LOC_SET:
+    case INTERNAL_SOM_LOC_SET_IF_UNSET:
+    case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
+    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
+    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
+    case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
+    case INTERNAL_SOM_LOC_COPY:
+    case INTERNAL_SOM_LOC_COPY_IF_WRITABLE:
+    case INTERNAL_SOM_LOC_MAKE_WRITABLE:
+    case INTERNAL_SOM_LOC_SET_FROM:
+    case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE:
+        if (has_som) {
+            auto ri = make_unique<RoseInstrReportSomAware>();
+            writeSomOperation(report, &ri->som);
+            report_block.add_before_end(move(ri));
+        } else {
+            auto ri = make_unique<RoseInstrReportSomInt>();
+            writeSomOperation(report, &ri->som);
+            report_block.add_before_end(move(ri));
+        }
+        break;
+    case INTERNAL_ROSE_CHAIN: {
+        report_block.add_before_end(make_unique<RoseInstrReportChain>(
+            report.onmatch, report.topSquashDistance));
+        break;
+    }
+    case EXTERNAL_CALLBACK_SOM_REL:
+    case EXTERNAL_CALLBACK_SOM_STORED:
+    case EXTERNAL_CALLBACK_SOM_ABS:
+    case EXTERNAL_CALLBACK_SOM_REV_NFA:
+        makeDedupeSom(build.rm, report, report_block);
+        if (report.ekey == INVALID_EKEY) {
+            report_block.add_before_end(make_unique<RoseInstrReportSom>(
+                report.onmatch, report.offsetAdjust));
+        } else {
+            report_block.add_before_end(make_unique<RoseInstrReportSomExhaust>(
+                report.onmatch, report.offsetAdjust, report.ekey));
+        }
+        break;
+    case EXTERNAL_CALLBACK_SOM_PASS:
+        makeDedupeSom(build.rm, report, report_block);
+        if (report.ekey == INVALID_EKEY) {
+            report_block.add_before_end(make_unique<RoseInstrReportSom>(
+                report.onmatch, report.offsetAdjust));
+        } else {
+            report_block.add_before_end(make_unique<RoseInstrReportSomExhaust>(
+                report.onmatch, report.offsetAdjust, report.ekey));
+        }
+        break;
+
+    default:
+        assert(0);
+        throw CompileError("Unable to generate bytecode.");
+    }
+
+    assert(!report_block.empty());
+    program.add_block(move(report_block));
+}
+
+static
+void makeRoleReports(const RoseBuildImpl &build,
+                     const std::map<RoseVertex, left_build_info> &leftfix_info,
+                     bool needs_catchup, RoseVertex v, RoseProgram &program) {
+    const auto &g = build.g;
+
+    bool report_som = false;
+    if (g[v].left.tracksSom()) {
+        /* we are a suffaig - need to update role to provide som to the
+         * suffix. */
+        assert(contains(leftfix_info, v));
+        const left_build_info &lni = leftfix_info.at(v);
+        program.add_before_end(
+            make_unique<RoseInstrSomLeftfix>(lni.queue, g[v].left.lag));
+        report_som = true;
+    } else if (g[v].som_adjust) {
+        program.add_before_end(
+            make_unique<RoseInstrSomAdjust>(g[v].som_adjust));
+        report_som = true;
+    }
+
+    makeCatchup(build.rm, needs_catchup, g[v].reports, program);
+
+    RoseProgram report_block;
+    for (ReportID id : g[v].reports) {
+        makeReport(build, id, report_som, report_block);
+    }
+    program.add_before_end(move(report_block));
+}
+
+static
+void makeRoleSetState(const unordered_map<RoseVertex, u32> &roleStateIndices,
+                      RoseVertex v, RoseProgram &program) {
+    // We only need this instruction if a state index has been assigned to this
+    // vertex.
+    auto it = roleStateIndices.find(v);
+    if (it == end(roleStateIndices)) {
+        return;
+    }
+    program.add_before_end(make_unique<RoseInstrSetState>(it->second));
+}
+
+static
+void makePushDelayedInstructions(const RoseLiteralMap &literals,
+                                 ProgramBuild &prog_build,
+                                 const flat_set<u32> &delayed_ids,
+                                 RoseProgram &program) {
+    vector<RoseInstrPushDelayed> delay_instructions;
+
+    for (const auto &delayed_lit_id : delayed_ids) {
+        DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id);
+        assert(contains(prog_build.delay_programs, delayed_lit_id));
+        u32 delay_id = prog_build.delay_programs.at(delayed_lit_id);
+        const auto &delay_lit = literals.right.at(delayed_lit_id);
+        delay_instructions.emplace_back(verify_u8(delay_lit.delay), delay_id);
+    }
+
+    sort_and_unique(delay_instructions, [](const RoseInstrPushDelayed &a,
+                                           const RoseInstrPushDelayed &b) {
+        return tie(a.delay, a.index) < tie(b.delay, b.index);
+    });
+
+    for (const auto &ri : delay_instructions) {
+        program.add_before_end(make_unique<RoseInstrPushDelayed>(ri));
+    }
+}
+
+static
+void makeCheckLiteralInstruction(const rose_literal_id &lit,
+                                 size_t longLitLengthThreshold,
+                                 RoseProgram &program,
+                                 const CompileContext &cc) {
+    assert(longLitLengthThreshold > 0);
+
+    DEBUG_PRINTF("lit=%s, long lit threshold %zu\n", dumpString(lit.s).c_str(),
+                 longLitLengthThreshold);
+
+    if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) {
+        DEBUG_PRINTF("lit short enough to not need confirm\n");
+        return;
+    }
+
+    // Check resource limits as well.
+    if (lit.s.length() > cc.grey.limitLiteralLength) {
+        throw ResourceLimitError();
+    }
+
+    if (lit.s.length() <= longLitLengthThreshold) {
+        DEBUG_PRINTF("is a medium-length literal\n");
+        const auto *end_inst = program.end_instruction();
+        unique_ptr<RoseInstruction> ri;
+        if (lit.s.any_nocase()) {
+            ri = make_unique<RoseInstrCheckMedLitNocase>(lit.s.get_string(),
+                                                         end_inst);
+        } else {
+            ri = make_unique<RoseInstrCheckMedLit>(lit.s.get_string(),
+                                                   end_inst);
+        }
+        program.add_before_end(move(ri));
+        return;
+    }
+
+    // Long literal support should only really be used for the floating table
+    // in streaming mode.
+    assert(lit.table == ROSE_FLOATING && cc.streaming);
+
+    DEBUG_PRINTF("is a long literal\n");
+
+    const auto *end_inst = program.end_instruction();
+    unique_ptr<RoseInstruction> ri;
+    if (lit.s.any_nocase()) {
+        ri = make_unique<RoseInstrCheckLongLitNocase>(lit.s.get_string(),
+                                                      end_inst);
+    } else {
+        ri = make_unique<RoseInstrCheckLongLit>(lit.s.get_string(), end_inst);
+    }
+    program.add_before_end(move(ri));
+}
+
+static
+void makeRoleCheckNotHandled(ProgramBuild &prog_build, RoseVertex v,
+                             RoseProgram &program) {
+    u32 handled_key;
+    if (contains(prog_build.handledKeys, v)) {
+        handled_key = prog_build.handledKeys.at(v);
+    } else {
+        handled_key = verify_u32(prog_build.handledKeys.size());
+        prog_build.handledKeys.emplace(v, handled_key);
+    }
+
+    const auto *end_inst = program.end_instruction();
+    auto ri = make_unique<RoseInstrCheckNotHandled>(handled_key, end_inst);
+    program.add_before_end(move(ri));
+}
+
+static
+void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v,
+                         const RoseEdge &e, RoseProgram &program) {
+    const RoseGraph &g = build.g;
+    const RoseVertex u = source(e, g);
+
+    // We know that we can trust the anchored table (DFA) to always deliver us
+    // literals at the correct offset.
+    if (build.isAnchored(v)) {
+        DEBUG_PRINTF("literal in anchored table, skipping bounds check\n");
+        return;
+    }
+
+    // Use the minimum literal length.
+    u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v));
+
+    u64a min_bound = g[e].minBound + lit_length;
+    u64a max_bound = g[e].maxBound == ROSE_BOUND_INF
+                         ? ROSE_BOUND_INF
+                         : g[e].maxBound + lit_length;
+
+    if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
+        assert(g[u].fixedOffset());
+        // Make offsets absolute.
+        min_bound += g[u].max_offset;
+        if (max_bound != ROSE_BOUND_INF) {
+            max_bound += g[u].max_offset;
+        }
+    }
+
+    assert(max_bound <= ROSE_BOUND_INF);
+    assert(min_bound <= max_bound);
+
+    // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET
+    // (max value of a u64a) to represent ROSE_BOUND_INF.
+    if (max_bound == ROSE_BOUND_INF) {
+        max_bound = MAX_OFFSET;
+    }
+
+    // This instruction should be doing _something_ -- bounds should be tighter
+    // than just {length, inf}.
+    assert(min_bound > lit_length || max_bound < MAX_OFFSET);
+
+    const auto *end_inst = program.end_instruction();
+    program.add_before_end(
+        make_unique<RoseInstrCheckBounds>(min_bound, max_bound, end_inst));
+}
+
+static
+void makeRoleGroups(const RoseGraph &g, ProgramBuild &prog_build,
+                    RoseVertex v, RoseProgram &program) {
+    rose_group groups = g[v].groups;
+    if (!groups) {
+        return;
+    }
+
+    // The set of "already on" groups as we process this vertex is the
+    // intersection of the groups set by our predecessors.
+    assert(in_degree(v, g) > 0);
+    rose_group already_on = ~rose_group{0};
+    for (const auto &u : inv_adjacent_vertices_range(v, g)) {
+        already_on &= prog_build.vertex_group_map.at(u);
+    }
+
+    DEBUG_PRINTF("already_on=0x%llx\n", already_on);
+    DEBUG_PRINTF("squashable=0x%llx\n", prog_build.squashable_groups);
+    DEBUG_PRINTF("groups=0x%llx\n", groups);
+
+    already_on &= ~prog_build.squashable_groups;
+    DEBUG_PRINTF("squashed already_on=0x%llx\n", already_on);
+
+    // We don't *have* to mask off the groups that we know are already on, but
+    // this will make bugs more apparent.
+    groups &= ~already_on;
+
+    if (!groups) {
+        DEBUG_PRINTF("no new groups to set, skipping\n");
+        return;
+    }
+
+    program.add_before_end(make_unique<RoseInstrSetGroups>(groups));
+}
+
+static
+void addLookaround(lookaround_info &lookarounds,
+                   const vector<vector<LookEntry>> &look,
+                   u32 &look_index, u32 &reach_index) {
+    // Check the cache.
+    auto it = lookarounds.cache.find(look);
+    if (it != lookarounds.cache.end()) {
+        look_index = verify_u32(it->second.first);
+        reach_index = verify_u32(it->second.second);
+        DEBUG_PRINTF("reusing look at idx %u\n", look_index);
+        DEBUG_PRINTF("reusing reach at idx %u\n", reach_index);
+        return;
+    }
+
+    size_t look_idx = lookarounds.lookTableSize;
+    size_t reach_idx = lookarounds.reachTableSize;
+
+    if (look.size() == 1) {
+        lookarounds.lookTableSize += look.front().size();
+        lookarounds.reachTableSize += look.front().size() * REACH_BITVECTOR_LEN;
+    } else {
+        lookarounds.lookTableSize += look.size();
+        lookarounds.reachTableSize += look.size() * MULTI_REACH_BITVECTOR_LEN;
+    }
+
+    lookarounds.cache.emplace(look, make_pair(look_idx, reach_idx));
+    lookarounds.table.emplace_back(look);
+
+    DEBUG_PRINTF("adding look at idx %zu\n", look_idx);
+    DEBUG_PRINTF("adding reach at idx %zu\n", reach_idx);
+    look_index =  verify_u32(look_idx);
+    reach_index = verify_u32(reach_idx);
+}
+
+static
+bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) {
+    size_t reach_size = cr.count();
+    assert(reach_size > 0);
+    // check whether entry_size is some power of 2.
+    if ((reach_size - 1) & reach_size) {
+        return false;
+    }
+    make_and_cmp_mask(cr, &andmask, &cmpmask);
+    if ((1 << popcount32((u8)(~andmask))) ^ reach_size) {
+        return false;
+    }
+    return true;
+}
+
+static
+bool checkReachWithFlip(const CharReach &cr, u8 &andmask,
+                       u8 &cmpmask, u8 &flip) {
+    if (checkReachMask(cr, andmask, cmpmask)) {
+        flip = 0;
+        return true;
+    }
+    if (checkReachMask(~cr, andmask, cmpmask)) {
+        flip = 1;
+        return true;
+    }
+    return false;
+}
+
+static
+bool makeRoleByte(const vector<LookEntry> &look, RoseProgram &program) {
+    if (look.size() == 1) {
+        const auto &entry = look[0];
+        u8 andmask_u8, cmpmask_u8;
+        u8 flip;
+        if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) {
+            return false;
+        }
+        s32 checkbyte_offset = verify_s32(entry.offset);
+        DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset);
+        const auto *end_inst = program.end_instruction();
+        auto ri = make_unique<RoseInstrCheckByte>(andmask_u8, cmpmask_u8, flip,
+                                                  checkbyte_offset, end_inst);
+        program.add_before_end(move(ri));
+        return true;
+    }
+    return false;
+}
+
+static
+bool makeRoleMask(const vector<LookEntry> &look, RoseProgram &program) {
+    if (look.back().offset < look.front().offset + 8) {
+        s32 base_offset = verify_s32(look.front().offset);
+        u64a and_mask = 0;
+        u64a cmp_mask = 0;
+        u64a neg_mask = 0;
+        for (const auto &entry : look) {
+            u8 andmask_u8, cmpmask_u8, flip;
+            if (!checkReachWithFlip(entry.reach, andmask_u8,
+                                    cmpmask_u8, flip)) {
+                return false;
+            }
+            DEBUG_PRINTF("entry offset %d\n", entry.offset);
+            u32 shift = (entry.offset - base_offset) << 3;
+            and_mask |= (u64a)andmask_u8 << shift;
+            cmp_mask |= (u64a)cmpmask_u8 << shift;
+            if (flip) {
+                neg_mask |= 0xffLLU << shift;
+            }
+        }
+        DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n",
+                     and_mask, cmp_mask);
+        const auto *end_inst = program.end_instruction();
+        auto ri = make_unique<RoseInstrCheckMask>(and_mask, cmp_mask, neg_mask,
+                                                  base_offset, end_inst);
+        program.add_before_end(move(ri));
+        return true;
+    }
+    return false;
+}
+
+static UNUSED
+string convertMaskstoString(u8 *p, int byte_len) {
+    string s;
+    for (int i = 0; i < byte_len; i++) {
+        u8 hi = *p >> 4;
+        u8 lo = *p & 0xf;
+        s += (char)(hi + (hi < 10 ? 48 : 87));
+        s += (char)(lo + (lo < 10 ? 48 : 87));
+        p++;
+    }
+    return s;
+}
+
+static
+bool makeRoleMask32(const vector<LookEntry> &look,
+                    RoseProgram &program) {
+    if (look.back().offset >= look.front().offset + 32) {
+        return false;
+    }
+    s32 base_offset = verify_s32(look.front().offset);
+    array<u8, 32> and_mask, cmp_mask;
+    and_mask.fill(0);
+    cmp_mask.fill(0);
+    u32 neg_mask = 0;
+    for (const auto &entry : look) {
+        u8 andmask_u8, cmpmask_u8, flip;
+        if (!checkReachWithFlip(entry.reach, andmask_u8,
+                                cmpmask_u8, flip)) {
+            return false;
+        }
+        u32 shift = entry.offset - base_offset;
+        assert(shift < 32);
+        and_mask[shift] = andmask_u8;
+        cmp_mask[shift] = cmpmask_u8;
+        if (flip) {
+            neg_mask |= 1 << shift;
+        }
+    }
+
+    DEBUG_PRINTF("and_mask %s\n",
+                 convertMaskstoString(and_mask.data(), 32).c_str());
+    DEBUG_PRINTF("cmp_mask %s\n",
+                 convertMaskstoString(cmp_mask.data(), 32).c_str());
+    DEBUG_PRINTF("neg_mask %08x\n", neg_mask);
+    DEBUG_PRINTF("base_offset %d\n", base_offset);
+
+    const auto *end_inst = program.end_instruction();
+    auto ri = make_unique<RoseInstrCheckMask32>(and_mask, cmp_mask, neg_mask,
+                                                base_offset, end_inst);
+    program.add_before_end(move(ri));
+    return true;
+}
+
+// Sorting by the size of every bucket.
+// Used in map<u32, vector<s8>, cmpNibble>.
+struct cmpNibble {
+    bool operator()(const u32 data1, const u32 data2) const{
+        u32 size1 = popcount32(data1 >> 16) * popcount32(data1 << 16);
+        u32 size2 = popcount32(data2 >> 16) * popcount32(data2 << 16);
+        return std::tie(size1, data1) < std::tie(size2, data2);
+    }
+};
+
+// Insert all pairs of bucket and offset into buckets.
+static really_inline
+void getAllBuckets(const vector<LookEntry> &look,
+                   map<u32, vector<s8>, cmpNibble> &buckets, u64a &neg_mask) {
+    s32 base_offset = verify_s32(look.front().offset);
+    for (const auto &entry : look) {
+        CharReach cr = entry.reach;
+        // Flip heavy character classes to save buckets.
+        if (cr.count() > 128 ) {
+            cr.flip();
+        } else {
+            neg_mask ^= 1ULL << (entry.offset - base_offset);
+        }
+        map <u16, u16> lo2hi;
+        // We treat Ascii Table as a 16x16 grid.
+        // Push every row in cr into lo2hi and mark the row number.
+        for (size_t i = cr.find_first(); i != CharReach::npos;) {
+            u8 it_hi = i >> 4;
+            u16 low_encode = 0;
+            while (i != CharReach::npos && (i >> 4) == it_hi) {
+                low_encode |= 1 << (i & 0xf);
+                i = cr.find_next(i);
+            }
+            lo2hi[low_encode] |= 1 << it_hi;
+        }
+        for (const auto &it : lo2hi) {
+            u32 hi_lo = (it.second << 16) | it.first;
+            buckets[hi_lo].push_back(entry.offset);
+        }
+    }
+}
+
+// Once we have a new bucket, we'll try to combine it with all old buckets.
+static really_inline
+void nibUpdate(map<u32, u16> &nib, u32 hi_lo) {
+    u16 hi = hi_lo >> 16;
+    u16 lo = hi_lo & 0xffff;
+    for (const auto pairs : nib) {
+        u32 old = pairs.first;
+        if ((old >> 16) == hi || (old & 0xffff) == lo) {
+            if (!nib[old | hi_lo]) {
+                nib[old | hi_lo] = nib[old] | nib[hi_lo];
+            }
+        }
+    }
+}
+
+static really_inline
+void nibMaskUpdate(array<u8, 32> &mask, u32 data, u8 bit_index) {
+    for (u8 index = 0; data > 0; data >>= 1, index++) {
+        if (data & 1) {
+            // 0 ~ 7 bucket in first 16 bytes,
+            // 8 ~ 15 bucket in second 16 bytes.
+            if (bit_index >= 8) {
+                mask[index + 16] |= 1 << (bit_index - 8);
+            } else {
+                mask[index] |= 1 << bit_index;
+            }
+        }
+    }
+}
+
+static
+bool getShuftiMasks(const vector<LookEntry> &look, array<u8, 32> &hi_mask,
+                    array<u8, 32> &lo_mask, u8 *bucket_select_hi,
+                    u8 *bucket_select_lo, u64a &neg_mask,
+                    u8 &bit_idx, size_t len) {
+    map<u32, u16> nib; // map every bucket to its bucket number.
+    map<u32, vector<s8>, cmpNibble> bucket2offsets;
+    s32 base_offset = look.front().offset;
+
+    bit_idx = 0;
+    neg_mask = ~0ULL;
+
+    getAllBuckets(look, bucket2offsets, neg_mask);
+
+    for (const auto &it : bucket2offsets) {
+        u32 hi_lo = it.first;
+        // New bucket.
+        if (!nib[hi_lo]) {
+            if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) {
+                return false;
+            }
+            nib[hi_lo] = 1 << bit_idx;
+
+            nibUpdate(nib, hi_lo);
+            nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx);
+            nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx);
+            bit_idx++;
+        }
+
+        DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]);
+
+        // Update bucket_select_mask.
+        u8 nib_hi = nib[hi_lo] >> 8;
+        u8 nib_lo = nib[hi_lo] & 0xff;
+        for (const auto offset : it.second) {
+            bucket_select_hi[offset - base_offset] |= nib_hi;
+            bucket_select_lo[offset - base_offset] |= nib_lo;
+        }
+    }
+    return true;
+}
+
+static
+unique_ptr<RoseInstruction>
+makeCheckShufti16x8(u32 offset_range, u8 bucket_idx,
+                    const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
+                    const array<u8, 32> &bucket_select_mask,
+                    u32 neg_mask, s32 base_offset,
+                    const RoseInstruction *end_inst) {
+    if (offset_range > 16 || bucket_idx > 8) {
+        return nullptr;
+    }
+    array<u8, 32> nib_mask;
+    array<u8, 16> bucket_select_mask_16;
+    copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin());
+    copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16);
+    copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16,
+         bucket_select_mask_16.begin());
+    return make_unique<RoseInstrCheckShufti16x8>
+           (nib_mask, bucket_select_mask_16,
+            neg_mask & 0xffff, base_offset, end_inst);
+}
+
+static
+unique_ptr<RoseInstruction>
+makeCheckShufti32x8(u32 offset_range, u8 bucket_idx,
+                    const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
+                    const array<u8, 32> &bucket_select_mask,
+                    u32 neg_mask, s32 base_offset,
+                    const RoseInstruction *end_inst) {
+    if (offset_range > 32 || bucket_idx > 8) {
+        return nullptr;
+    }
+
+    array<u8, 16> hi_mask_16;
+    array<u8, 16> lo_mask_16;
+    copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin());
+    copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin());
+    return make_unique<RoseInstrCheckShufti32x8>
+           (hi_mask_16, lo_mask_16, bucket_select_mask,
+            neg_mask, base_offset, end_inst);
+}
+
+static
+unique_ptr<RoseInstruction>
+makeCheckShufti16x16(u32 offset_range, u8 bucket_idx,
+                     const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
+                     const array<u8, 32> &bucket_select_mask_lo,
+                     const array<u8, 32> &bucket_select_mask_hi,
+                     u32 neg_mask, s32 base_offset,
+                     const RoseInstruction *end_inst) {
+    if (offset_range > 16 || bucket_idx > 16) {
+        return nullptr;
+    }
+
+    array<u8, 32> bucket_select_mask_32;
+    copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16,
+         bucket_select_mask_32.begin());
+    copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16,
+         bucket_select_mask_32.begin() + 16);
+    return make_unique<RoseInstrCheckShufti16x16>
+           (hi_mask, lo_mask, bucket_select_mask_32,
+            neg_mask & 0xffff, base_offset, end_inst);
+}
+static
+unique_ptr<RoseInstruction>
+makeCheckShufti32x16(u32 offset_range, u8 bucket_idx,
+                     const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
+                     const array<u8, 32> &bucket_select_mask_lo,
+                     const array<u8, 32> &bucket_select_mask_hi,
+                     u32 neg_mask, s32 base_offset,
+                     const RoseInstruction *end_inst) {
+    if (offset_range > 32 || bucket_idx > 16) {
+        return nullptr;
+    }
+
+    return make_unique<RoseInstrCheckShufti32x16>
+           (hi_mask, lo_mask, bucket_select_mask_hi,
+            bucket_select_mask_lo, neg_mask, base_offset, end_inst);
+}
+
+static
+bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) {
+
+    s32 base_offset = verify_s32(look.front().offset);
+    if (look.back().offset >= base_offset + 32) {
+        return false;
+    }
+
+    u8 bucket_idx = 0; // number of buckets
+    u64a neg_mask_64;
+    array<u8, 32> hi_mask;
+    array<u8, 32> lo_mask;
+    array<u8, 32> bucket_select_hi;
+    array<u8, 32> bucket_select_lo;
+    hi_mask.fill(0);
+    lo_mask.fill(0);
+    bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8.
+    bucket_select_lo.fill(0);
+
+    if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(),
+                        bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) {
+        return false;
+    }
+    u32 neg_mask = (u32)neg_mask_64;
+
+    DEBUG_PRINTF("hi_mask %s\n",
+                 convertMaskstoString(hi_mask.data(), 32).c_str());
+    DEBUG_PRINTF("lo_mask %s\n",
+                 convertMaskstoString(lo_mask.data(), 32).c_str());
+    DEBUG_PRINTF("bucket_select_hi %s\n",
+                 convertMaskstoString(bucket_select_hi.data(), 32).c_str());
+    DEBUG_PRINTF("bucket_select_lo %s\n",
+                 convertMaskstoString(bucket_select_lo.data(), 32).c_str());
+
+    const auto *end_inst = program.end_instruction();
+    s32 offset_range = look.back().offset - base_offset + 1;
+
+    auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask,
+                                  bucket_select_lo, neg_mask, base_offset,
+                                  end_inst);
+    if (!ri) {
+        ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask,
+                                 bucket_select_lo, neg_mask, base_offset,
+                                 end_inst);
+    }
+    if (!ri) {
+        ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask,
+                                  bucket_select_lo, bucket_select_hi,
+                                  neg_mask, base_offset, end_inst);
+    }
+    if (!ri) {
+        ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask,
+                                  bucket_select_lo, bucket_select_hi,
+                                  neg_mask, base_offset, end_inst);
+    }
+    assert(ri);
+    program.add_before_end(move(ri));
+
+    return true;
+}
+
+/**
+ * Builds a lookaround instruction, or an appropriate specialization if one is
+ * available.
+ */
+static
+void makeLookaroundInstruction(lookaround_info &lookarounds,
+                               const vector<LookEntry> &look,
+                               RoseProgram &program) {
+    assert(!look.empty());
+
+    if (makeRoleByte(look, program)) {
+        return;
+    }
+
+    if (look.size() == 1) {
+        s8 offset = look.begin()->offset;
+        u32 look_idx, reach_idx;
+        vector<vector<LookEntry>> lookaround;
+        lookaround.emplace_back(look);
+        addLookaround(lookarounds, lookaround, look_idx, reach_idx);
+        // We don't need look_idx here.
+        auto ri = make_unique<RoseInstrCheckSingleLookaround>(offset, reach_idx,
+                                                     program.end_instruction());
+        program.add_before_end(move(ri));
+        return;
+    }
+
+    if (makeRoleMask(look, program)) {
+        return;
+    }
+
+    if (makeRoleMask32(look, program)) {
+        return;
+    }
+
+    if (makeRoleShufti(look, program)) {
+        return;
+    }
+
+    u32 look_idx, reach_idx;
+    vector<vector<LookEntry>> lookaround;
+    lookaround.emplace_back(look);
+    addLookaround(lookarounds, lookaround, look_idx, reach_idx);
+    u32 look_count = verify_u32(look.size());
+
+    auto ri = make_unique<RoseInstrCheckLookaround>(look_idx, reach_idx,
+                                                    look_count,
+                                                    program.end_instruction());
+    program.add_before_end(move(ri));
+}
+
+static
+void makeCheckLitMaskInstruction(const RoseBuildImpl &build,
+                                 lookaround_info &lookarounds, u32 lit_id,
+                                 RoseProgram &program) {
+    const auto &info = build.literal_info.at(lit_id);
+    if (!info.requires_benefits) {
+        return;
+    }
+
+    vector<LookEntry> look;
+
+    const ue2_literal &s = build.literals.right.at(lit_id).s;
+    DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id,
+                 dumpString(s).c_str());
+    assert(s.length() <= MAX_MASK2_WIDTH);
+    s32 i = 0 - s.length();
+    for (const auto &e : s) {
+        if (!e.nocase) {
+            look.emplace_back(verify_s8(i), e);
+        }
+        i++;
+    }
+
+    assert(!look.empty());
+    makeLookaroundInstruction(lookarounds, look, program);
+}
+
+static
+void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id,
+                                  const vector<RoseEdge> &lit_edges,
+                                  u32 floatingMinLiteralMatchOffset,
+                                  RoseProgram &prog) {
+    if (lit_edges.empty()) {
+        return;
+    }
+
+    if (floatingMinLiteralMatchOffset == 0) {
+        return;
+    }
+
+    RoseVertex v = target(lit_edges.front(), build.g);
+    if (!build.isFloating(v)) {
+        return;
+    }
+
+    const auto &lit = build.literals.right.at(lit_id);
+    size_t min_len = lit.elength();
+    u32 min_offset = findMinOffset(build, lit_id);
+    DEBUG_PRINTF("has min_len=%zu, min_offset=%u, global min is %u\n", min_len,
+                 min_offset, floatingMinLiteralMatchOffset);
+
+    // If we can't match before the min offset, we don't need the check.
+    if (min_len >= floatingMinLiteralMatchOffset) {
+        DEBUG_PRINTF("no need for check, min is %u\n",
+                     floatingMinLiteralMatchOffset);
+        return;
+    }
+
+    assert(min_offset >= floatingMinLiteralMatchOffset);
+    assert(min_offset < UINT32_MAX);
+
+    DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset);
+    const auto *end = prog.end_instruction();
+    prog.add_before_end(make_unique<RoseInstrCheckLitEarly>(min_offset, end));
+}
+
+static
+void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id,
+                               RoseProgram &prog) {
+    const auto &info = build.literal_info.at(lit_id);
+
+    if (!info.group_mask) {
+        return;
+    }
+    prog.add_before_end(make_unique<RoseInstrCheckGroups>(info.group_mask));
+}
+
+static
+bool hasDelayedLiteral(const RoseBuildImpl &build,
+                       const vector<RoseEdge> &lit_edges) {
+    auto is_delayed = bind(&RoseBuildImpl::isDelayed, &build, _1);
+    for (const auto &e : lit_edges) {
+        auto v = target(e, build.g);
+        const auto &lits = build.g[v].literals;
+        if (any_of(begin(lits), end(lits), is_delayed)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static
+RoseProgram makeLitInitialProgram(const RoseBuildImpl &build,
+                                  lookaround_info &lookarounds,
+                                  ProgramBuild &prog_build, u32 lit_id,
+                                  const vector<RoseEdge> &lit_edges,
+                                  bool is_anchored_replay_program) {
+    RoseProgram program;
+
+    // Check long literal info.
+    if (!build.isDelayed(lit_id)) {
+        makeCheckLiteralInstruction(build.literals.right.at(lit_id),
+                                    prog_build.longLitLengthThreshold,
+                                    program, build.cc);
+    }
+
+    // Check lit mask.
+    makeCheckLitMaskInstruction(build, lookarounds, lit_id, program);
+
+    // Check literal groups. This is an optimisation that we only perform for
+    // delayed literals, as their groups may be switched off; ordinarily, we
+    // can trust the HWLM matcher.
+    if (hasDelayedLiteral(build, lit_edges)) {
+        makeGroupCheckInstruction(build, lit_id, program);
+    }
+
+    // Add instructions for pushing delayed matches, if there are any.
+    makePushDelayedInstructions(build.literals, prog_build,
+                                build.literal_info.at(lit_id).delayed_ids,
+                                program);
+
+    // Add pre-check for early literals in the floating table.
+    makeCheckLitEarlyInstruction(build, lit_id, lit_edges,
+                                 prog_build.floatingMinLiteralMatchOffset,
+                                 program);
+
+    /* Check if we are able to deliever matches from the anchored table now */
+    if (!is_anchored_replay_program) {
+        makeAnchoredLiteralDelay(build, prog_build, lit_id, program);
+    }
+
+    return program;
+}
+
+#if defined(DEBUG) || defined(DUMP_SUPPORT)
+static UNUSED
+string dumpMultiLook(const vector<LookEntry> &looks) {
+    ostringstream oss;
+    for (auto it = looks.begin(); it != looks.end(); ++it) {
+        if (it != looks.begin()) {
+            oss << ", ";
+        }
+        oss << "{" << int(it->offset) << ": " << describeClass(it->reach) << "}";
+    }
+    return oss.str();
+}
+#endif
+
+static
+bool makeRoleMultipathShufti(const vector<vector<LookEntry>> &multi_look,
+                             RoseProgram &program) {
+    if (multi_look.empty()) {
+        return false;
+    }
+
+    // find the base offset
+    assert(!multi_look[0].empty());
+    s32 base_offset = multi_look[0].front().offset;
+    s32 last_start = base_offset;
+    s32 end_offset = multi_look[0].back().offset;
+    size_t multi_len = 0;
+
+    for (const auto &look : multi_look) {
+        assert(look.size() > 0);
+        multi_len += look.size();
+
+        LIMIT_TO_AT_MOST(&base_offset, look.front().offset);
+        ENSURE_AT_LEAST(&last_start, look.front().offset);
+        ENSURE_AT_LEAST(&end_offset, look.back().offset);
+    }
+
+    assert(last_start < 0);
+
+    if (end_offset - base_offset >= MULTIPATH_MAX_LEN) {
+        return false;
+    }
+
+    if (multi_len <= 16) {
+        multi_len = 16;
+    } else if (multi_len <= 32) {
+        multi_len = 32;
+    } else if (multi_len <= 64) {
+        multi_len = 64;
+    } else {
+        DEBUG_PRINTF("too long for multi-path\n");
+        return false;
+    }
+
+    vector<LookEntry> linear_look;
+    array<u8, 64> data_select_mask;
+    data_select_mask.fill(0);
+    u64a hi_bits_mask = 0;
+    u64a lo_bits_mask = 0;
+
+    for (const auto &look : multi_look) {
+        assert(linear_look.size() < 64);
+        lo_bits_mask |= 1LLU << linear_look.size();
+        for (const auto &entry : look) {
+            assert(entry.offset - base_offset < MULTIPATH_MAX_LEN);
+            data_select_mask[linear_look.size()] =
+                                          verify_u8(entry.offset - base_offset);
+            linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach);
+        }
+        hi_bits_mask |= 1LLU << (linear_look.size() - 1);
+    }
+
+    u8 bit_index = 0; // number of buckets
+    u64a neg_mask;
+    array<u8, 32> hi_mask;
+    array<u8, 32> lo_mask;
+    array<u8, 64> bucket_select_hi;
+    array<u8, 64> bucket_select_lo;
+    hi_mask.fill(0);
+    lo_mask.fill(0);
+    bucket_select_hi.fill(0);
+    bucket_select_lo.fill(0);
+
+    if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(),
+                        bucket_select_lo.data(), neg_mask, bit_index,
+                        multi_len)) {
+        return false;
+    }
+
+    DEBUG_PRINTF("hi_mask %s\n",
+                 convertMaskstoString(hi_mask.data(), 16).c_str());
+    DEBUG_PRINTF("lo_mask %s\n",
+                 convertMaskstoString(lo_mask.data(), 16).c_str());
+    DEBUG_PRINTF("bucket_select_hi %s\n",
+                 convertMaskstoString(bucket_select_hi.data(), 64).c_str());
+    DEBUG_PRINTF("bucket_select_lo %s\n",
+                 convertMaskstoString(bucket_select_lo.data(), 64).c_str());
+    DEBUG_PRINTF("data_select_mask %s\n",
+                 convertMaskstoString(data_select_mask.data(), 64).c_str());
+    DEBUG_PRINTF("hi_bits_mask %llx\n", hi_bits_mask);
+    DEBUG_PRINTF("lo_bits_mask %llx\n", lo_bits_mask);
+    DEBUG_PRINTF("neg_mask %llx\n", neg_mask);
+    DEBUG_PRINTF("base_offset %d\n", base_offset);
+    DEBUG_PRINTF("last_start %d\n", last_start);
+
+    // Since we don't have 16x16 now, just call 32x16 instead.
+    if (bit_index > 8) {
+        assert(multi_len <= 32);
+        multi_len = 32;
+    }
+
+    const auto *end_inst = program.end_instruction();
+    assert(multi_len == 16 || multi_len == 32 || multi_len == 64);
+    if (multi_len == 16) {
+        neg_mask &= 0xffff;
+        assert(!(hi_bits_mask & ~0xffffULL));
+        assert(!(lo_bits_mask & ~0xffffULL));
+        assert(bit_index <=8);
+        array<u8, 32> nib_mask;
+        copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin());
+        copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16);
+
+        auto ri = make_unique<RoseInstrCheckMultipathShufti16x8>
+                  (nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask,
+                   lo_bits_mask, neg_mask, base_offset, last_start, end_inst);
+        program.add_before_end(move(ri));
+    } else if (multi_len == 32) {
+        neg_mask &= 0xffffffff;
+        assert(!(hi_bits_mask & ~0xffffffffULL));
+        assert(!(lo_bits_mask & ~0xffffffffULL));
+        if (bit_index <= 8) {
+            auto ri = make_unique<RoseInstrCheckMultipathShufti32x8>
+                      (hi_mask, lo_mask, bucket_select_lo, data_select_mask,
+                       hi_bits_mask, lo_bits_mask, neg_mask, base_offset,
+                       last_start, end_inst);
+            program.add_before_end(move(ri));
+        } else {
+            auto ri = make_unique<RoseInstrCheckMultipathShufti32x16>
+                      (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo,
+                       data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask,
+                       base_offset, last_start, end_inst);
+            program.add_before_end(move(ri));
+        }
+    } else {
+        auto ri = make_unique<RoseInstrCheckMultipathShufti64>
+                  (hi_mask, lo_mask, bucket_select_lo, data_select_mask,
+                   hi_bits_mask, lo_bits_mask, neg_mask, base_offset,
+                   last_start, end_inst);
+        program.add_before_end(move(ri));
+    }
+    return true;
+}
+
+static
+void makeRoleMultipathLookaround(lookaround_info &lookarounds,
+                                 const vector<vector<LookEntry>> &multi_look,
+                                 RoseProgram &program) {
+    assert(!multi_look.empty());
+    assert(multi_look.size() <= MAX_LOOKAROUND_PATHS);
+    vector<vector<LookEntry>> ordered_look;
+    set<s32> look_offset;
+
+    assert(!multi_look[0].empty());
+    s32 last_start = multi_look[0][0].offset;
+
+    // build offset table.
+    for (const auto &look : multi_look) {
+        assert(look.size() > 0);
+        last_start = max(last_start, (s32)look.begin()->offset);
+
+        for (const auto &t : look) {
+            look_offset.insert(t.offset);
+        }
+    }
+
+    array<u8, MULTIPATH_MAX_LEN> start_mask;
+    if (multi_look.size() < MAX_LOOKAROUND_PATHS) {
+        start_mask.fill((1 << multi_look.size()) - 1);
+    } else {
+        start_mask.fill(0xff);
+    }
+
+    u32 path_idx = 0;
+    for (const auto &look : multi_look) {
+        for (const auto &t : look) {
+            assert(t.offset >= (int)*look_offset.begin());
+            size_t update_offset = t.offset - *look_offset.begin() + 1;
+            if (update_offset < start_mask.size()) {
+                start_mask[update_offset] &= ~(1 << path_idx);
+            }
+        }
+        path_idx++;
+    }
+
+    for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) {
+        start_mask[i] &= start_mask[i - 1];
+        DEBUG_PRINTF("start_mask[%u] = %x\n", i, start_mask[i]);
+    }
+
+    assert(look_offset.size() <= MULTIPATH_MAX_LEN);
+
+    assert(last_start < 0);
+
+    for (const auto &offset : look_offset) {
+        vector<LookEntry> multi_entry;
+        multi_entry.resize(MAX_LOOKAROUND_PATHS);
+
+        for (size_t i = 0; i < multi_look.size(); i++) {
+            for (const auto &t : multi_look[i]) {
+                if (t.offset == offset) {
+                    multi_entry[i] = t;
+                }
+            }
+        }
+        ordered_look.emplace_back(multi_entry);
+    }
+
+    u32 look_idx, reach_idx;
+    addLookaround(lookarounds, ordered_look, look_idx, reach_idx);
+    u32 look_count = verify_u32(ordered_look.size());
+
+    auto ri = make_unique<RoseInstrMultipathLookaround>(look_idx, reach_idx,
+                                                        look_count, last_start,
+                                                        start_mask,
+                                                    program.end_instruction());
+    program.add_before_end(move(ri));
+}
+
+static
+void makeRoleLookaround(const RoseBuildImpl &build,
+                        const map<RoseVertex, left_build_info> &leftfix_info,
+                        lookaround_info &lookarounds, RoseVertex v,
+                        RoseProgram &program) {
+    if (!build.cc.grey.roseLookaroundMasks) {
+        return;
+    }
+
+    vector<vector<LookEntry>> looks;
+
+    // Lookaround from leftfix (mandatory).
+    if (contains(leftfix_info, v) && leftfix_info.at(v).has_lookaround) {
+        DEBUG_PRINTF("using leftfix lookaround\n");
+        looks = leftfix_info.at(v).lookaround;
+    }
+
+    // We may be able to find more lookaround info (advisory) and merge it
+    // in.
+    if (looks.size() <= 1) {
+        vector<LookEntry> look;
+        vector<LookEntry> look_more;
+        if (!looks.empty()) {
+            look = move(looks.front());
+        }
+        findLookaroundMasks(build, v, look_more);
+        mergeLookaround(look, look_more);
+        if (!look.empty()) {
+            makeLookaroundInstruction(lookarounds, look, program);
+        }
+        return;
+    }
+
+    if (!makeRoleMultipathShufti(looks, program)) {
+        assert(looks.size() <= 8);
+        makeRoleMultipathLookaround(lookarounds, looks, program);
+    }
+}
+
+static
+void makeRoleSuffix(const RoseBuildImpl &build,
+                    const map<suffix_id, u32> &suffixes,
+                    const map<u32, engine_info> &engine_info_by_queue,
+                    RoseVertex v, RoseProgram &prog) {
+    const auto &g = build.g;
+    if (!g[v].suffix) {
+        return;
+    }
+    assert(contains(suffixes, g[v].suffix));
+    u32 queue = suffixes.at(g[v].suffix);
+    u32 event;
+    assert(contains(engine_info_by_queue, queue));
+    const auto eng_info = engine_info_by_queue.at(queue);
+    if (isContainerType(eng_info.type)) {
+        auto tamaProto = g[v].suffix.tamarama.get();
+        assert(tamaProto);
+        event = (u32)MQE_TOP_FIRST +
+                  tamaProto->top_remap.at(make_pair(g[v].index,
+                                                    g[v].suffix.top));
+        assert(event < MQE_INVALID);
+    } else if (isMultiTopType(eng_info.type)) {
+        assert(!g[v].suffix.haig);
+        event = (u32)MQE_TOP_FIRST + g[v].suffix.top;
+        assert(event < MQE_INVALID);
+    } else {
+        // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP
+        // event.
+        assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph));
+        event = MQE_TOP;
+    }
+
+    prog.add_before_end(make_unique<RoseInstrTriggerSuffix>(queue, event));
+}
+
+static
+void addInfixTriggerInstructions(vector<TriggerInfo> triggers,
+                                 RoseProgram &prog) {
+    // Order, de-dupe and add instructions to the end of program.
+    sort_and_unique(triggers, [](const TriggerInfo &a, const TriggerInfo &b) {
+        return tie(a.cancel, a.queue, a.event) <
+               tie(b.cancel, b.queue, b.event);
+    });
+    for (const auto &ti : triggers) {
+        prog.add_before_end(
+             make_unique<RoseInstrTriggerInfix>(ti.cancel, ti.queue, ti.event));
+    }
+}
+
+static
+void makeRoleInfixTriggers(const RoseBuildImpl &build,
+                           const map<RoseVertex, left_build_info> &leftfix_info,
+                           const map<u32, engine_info> &engine_info_by_queue,
+                           RoseVertex u, RoseProgram &program) {
+    const auto &g = build.g;
+
+    vector<TriggerInfo> triggers;
+
+    for (const auto &e : out_edges_range(u, g)) {
+        RoseVertex v = target(e, g);
+        if (!g[v].left) {
+            continue;
+        }
+
+        assert(contains(leftfix_info, v));
+        const left_build_info &lbi = leftfix_info.at(v);
+        if (lbi.has_lookaround) {
+            continue;
+        }
+
+        assert(contains(engine_info_by_queue, lbi.queue));
+        const auto &eng_info = engine_info_by_queue.at(lbi.queue);
+
+        // DFAs have no TOP_N support, so they get a classic MQE_TOP event.
+        u32 top;
+        if (isContainerType(eng_info.type)) {
+            auto tamaProto = g[v].left.tamarama.get();
+            assert(tamaProto);
+            top = MQE_TOP_FIRST + tamaProto->top_remap.at(
+                                      make_pair(g[v].index, g[e].rose_top));
+            assert(top < MQE_INVALID);
+        } else if (!isMultiTopType(eng_info.type)) {
+            assert(num_tops(g[v].left) == 1);
+            top = MQE_TOP;
+        } else {
+            top = MQE_TOP_FIRST + g[e].rose_top;
+            assert(top < MQE_INVALID);
+        }
+
+        triggers.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top);
+    }
+
+    addInfixTriggerInstructions(move(triggers), program);
+}
+
+
+/**
+ * \brief True if the given vertex is a role that can only be switched on at
+ * EOD.
+ */
+static
+bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) {
+    const RoseGraph &g = tbi.g;
+
+    // All such roles have only (0,0) edges to vertices with the eod_accept
+    // property, and no other effects (suffixes, ordinary reports, etc, etc).
+
+    if (isLeafNode(v, g) || !g[v].reports.empty() || g[v].suffix) {
+        return false;
+    }
+
+    for (const auto &e : out_edges_range(v, g)) {
+        RoseVertex w = target(e, g);
+        if (!g[w].eod_accept) {
+            return false;
+        }
+        assert(!g[w].reports.empty());
+        assert(g[w].literals.empty());
+
+        if (g[e].minBound || g[e].maxBound) {
+            return false;
+        }
+    }
+
+    /* There is no pointing enforcing this check at runtime if
+     * this role is only fired by the eod event literal */
+    if (tbi.eod_event_literal_id != MO_INVALID_IDX &&
+        g[v].literals.size() == 1 &&
+        *g[v].literals.begin() == tbi.eod_event_literal_id) {
+        return false;
+    }
+
+    return true;
+}
+
+static
+void addCheckOnlyEodInstruction(RoseProgram &prog) {
+    DEBUG_PRINTF("only at eod\n");
+    const auto *end_inst = prog.end_instruction();
+    prog.add_before_end(make_unique<RoseInstrCheckOnlyEod>(end_inst));
+}
+
+static
+void makeRoleEagerEodReports(const RoseBuildImpl &build,
+                         const map<RoseVertex, left_build_info> &leftfix_info,
+                         bool needs_catchup, RoseVertex v,
+                         RoseProgram &program) {
+    RoseProgram eod_program;
+
+    for (const auto &e : out_edges_range(v, build.g)) {
+        if (canEagerlyReportAtEod(build, e)) {
+            RoseProgram block;
+            makeRoleReports(build, leftfix_info, needs_catchup,
+                            target(e, build.g), block);
+            eod_program.add_block(move(block));
+        }
+    }
+
+    if (eod_program.empty()) {
+        return;
+    }
+
+    if (!onlyAtEod(build, v)) {
+        // The rest of our program wasn't EOD anchored, so we need to guard
+        // these reports with a check.
+        addCheckOnlyEodInstruction(program);
+    }
+
+    program.add_before_end(move(eod_program));
+}
+
+/* Makes a program for a role/vertex given a specfic pred/in_edge. */
+static
+RoseProgram makeRoleProgram(const RoseBuildImpl &build,
+                        const map<RoseVertex, left_build_info> &leftfix_info,
+                        const map<suffix_id, u32> &suffixes,
+                        const map<u32, engine_info> &engine_info_by_queue,
+                        lookaround_info &lookarounds,
+                        unordered_map<RoseVertex, u32> roleStateIndices,
+                        ProgramBuild &prog_build, const RoseEdge &e) {
+    const RoseGraph &g = build.g;
+    auto v = target(e, g);
+
+    RoseProgram program;
+
+    // First, add program instructions that enforce preconditions without
+    // effects.
+
+    if (onlyAtEod(build, v)) {
+        addCheckOnlyEodInstruction(program);
+    }
+
+    if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
+        makeRoleCheckBounds(build, v, e, program);
+    }
+
+    // This role program may be triggered by different predecessors, with
+    // different offset bounds. We must ensure we put this check/set operation
+    // after the bounds check to deal with this case.
+    if (in_degree(v, g) > 1) {
+        assert(!build.isRootSuccessor(v));
+        makeRoleCheckNotHandled(prog_build, v, program);
+    }
+
+    makeRoleLookaround(build, leftfix_info, lookarounds, v, program);
+    makeRoleCheckLeftfix(build, leftfix_info, v, program);
+
+    // Next, we can add program instructions that have effects. This must be
+    // done as a series of blocks, as some of them (like reports) are
+    // escapable.
+
+    RoseProgram effects_block;
+
+    RoseProgram reports_block;
+    makeRoleReports(build, leftfix_info, prog_build.needs_catchup, v,
+                    reports_block);
+    effects_block.add_block(move(reports_block));
+
+    RoseProgram infix_block;
+    makeRoleInfixTriggers(build, leftfix_info, engine_info_by_queue, v,
+                          infix_block);
+    effects_block.add_block(move(infix_block));
+
+    // Note: SET_GROUPS instruction must be after infix triggers, as an infix
+    // going dead may switch off groups.
+    RoseProgram groups_block;
+    makeRoleGroups(build.g, prog_build, v, groups_block);
+    effects_block.add_block(move(groups_block));
+
+    RoseProgram suffix_block;
+    makeRoleSuffix(build, suffixes, engine_info_by_queue, v, suffix_block);
+    effects_block.add_block(move(suffix_block));
+
+    RoseProgram state_block;
+    makeRoleSetState(roleStateIndices, v, state_block);
+    effects_block.add_block(move(state_block));
+
+    // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if
+    // the program doesn't have one already).
+    RoseProgram eod_block;
+    makeRoleEagerEodReports(build, leftfix_info, prog_build.needs_catchup, v,
+                            eod_block);
+    effects_block.add_block(move(eod_block));
+
+    /* a 'ghost role' may do nothing if we know that its groups are already set
+     * - in this case we can avoid producing a program at all. */
+    if (effects_block.empty()) {
+        return {};
+    }
+
+    program.add_before_end(move(effects_block));
+    return program;
+}
+
+static
+void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 lit_id,
+                                RoseProgram &prog) {
+    const auto &info = build.literal_info.at(lit_id);
+    if (!info.squash_group) {
+        return;
+    }
+
+    DEBUG_PRINTF("squashes 0x%llx\n", info.group_mask);
+    assert(info.group_mask);
+    /* Note: group_mask is negated. */
+    prog.add_before_end(make_unique<RoseInstrSquashGroups>(~info.group_mask));
+}
+
+RoseProgram assembleProgramBlocks(vector<RoseProgram> &&blocks) {
+    DEBUG_PRINTF("%zu blocks before dedupe\n", blocks.size());
+
+    sort(blocks.begin(), blocks.end(),
+         [](const RoseProgram &a, const RoseProgram &b) {
+             RoseProgramHash hasher;
+             return hasher(a) < hasher(b);
+         });
+
+    blocks.erase(unique(blocks.begin(), blocks.end(), RoseProgramEquivalence()),
+                 blocks.end());
+
+    DEBUG_PRINTF("%zu blocks after dedupe\n", blocks.size());
+
+    RoseProgram prog;
+    for (auto &block : blocks) {
+        /* If we have multiple blocks from different literals and any of them
+         * squash groups, we will have to add a CLEAR_WORK_DONE instruction to
+         * each literal program block to clear the work_done flags so that it's
+         * only set if a state has been. */
+        if (!prog.empty() && reads_work_done_flag(block)) {
+            RoseProgram clear_block;
+            clear_block.add_before_end(make_unique<RoseInstrClearWorkDone>());
+            prog.add_block(move(clear_block));
+        }
+
+        prog.add_block(move(block));
+    }
+
+    return prog;
+}
+
+RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
+                          const map<RoseVertex, left_build_info> &leftfix_info,
+                          const map<suffix_id, u32> &suffixes,
+                          const map<u32, engine_info> &engine_info_by_queue,
+                          lookaround_info &lookarounds,
+                          unordered_map<RoseVertex, u32> roleStateIndices,
+                          ProgramBuild &prog_build, u32 lit_id,
+                          const vector<RoseEdge> &lit_edges,
+                          bool is_anchored_replay_program) {
+    const auto &g = build.g;
+
+    DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size());
+
+    // Construct initial program up front, as its early checks must be able
+    // to jump to end and terminate processing for this literal.
+    auto lit_program = makeLitInitialProgram(build, lookarounds, prog_build,
+                                             lit_id, lit_edges,
+                                             is_anchored_replay_program);
+
+    RoseProgram role_programs;
+
+    // Predecessor state id -> program block.
+    map<u32, RoseProgram> pred_blocks;
+
+    // Construct sparse iter sub-programs.
+    for (const auto &e : lit_edges) {
+        const auto &u = source(e, g);
+        if (build.isAnyStart(u)) {
+            continue; // Root roles are not handled with sparse iterator.
+        }
+        DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].index,
+                     g[target(e, g)].index);
+        assert(contains(roleStateIndices, u));
+        u32 pred_state = roleStateIndices.at(u);
+        auto role_prog = makeRoleProgram(build, leftfix_info, suffixes,
+                                         engine_info_by_queue, lookarounds,
+                                         roleStateIndices, prog_build, e);
+        if (!role_prog.empty()) {
+            pred_blocks[pred_state].add_block(move(role_prog));
+        }
+    }
+
+    // Add blocks to deal with non-root edges (triggered by sparse iterator or
+    // mmbit_isset checks).
+    addPredBlocks(pred_blocks, roleStateIndices.size(), role_programs);
+
+    // Add blocks to handle root roles.
+    for (const auto &e : lit_edges) {
+        const auto &u = source(e, g);
+        if (!build.isAnyStart(u)) {
+            continue;
+        }
+        DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index,
+                     g[target(e, g)].index);
+        auto role_prog = makeRoleProgram(build, leftfix_info, suffixes,
+                                         engine_info_by_queue, lookarounds,
+                                         roleStateIndices, prog_build, e);
+        role_programs.add_block(move(role_prog));
+    }
+
+    if (lit_id == build.eod_event_literal_id) {
+        /* Note: does not require the lit intial program */
+        assert(build.eod_event_literal_id != MO_INVALID_IDX);
+        return role_programs;
+    }
+
+    /* Instructions to run even if a role program bails out */
+    RoseProgram unconditional_block;
+
+    // Literal may squash groups.
+    makeGroupSquashInstruction(build, lit_id, unconditional_block);
+
+    role_programs.add_block(move(unconditional_block));
+    lit_program.add_before_end(move(role_programs));
+
+    return lit_program;
+}
+
+RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build,
+                                    lookaround_info &lookarounds,
+                                    ProgramBuild &prog_build,
+                                    const vector<u32> &lit_ids) {
+    assert(!lit_ids.empty());
+    assert(build.cc.streaming);
+
+    vector<RoseProgram> blocks;
+
+    for (const auto &lit_id : lit_ids) {
+        DEBUG_PRINTF("lit_id=%u\n", lit_id);
+        const auto &info = build.literal_info.at(lit_id);
+        if (info.delayed_ids.empty()) {
+            continue; // No delayed IDs, no work to do.
+        }
+
+        RoseProgram prog;
+        if (!build.isDelayed(lit_id)) {
+            makeCheckLiteralInstruction(build.literals.right.at(lit_id),
+                                        prog_build.longLitLengthThreshold, prog,
+                                        build.cc);
+        }
+
+        makeCheckLitMaskInstruction(build, lookarounds, lit_id, prog);
+        makePushDelayedInstructions(build.literals, prog_build,
+                                    build.literal_info.at(lit_id).delayed_ids,
+                                    prog);
+        blocks.push_back(move(prog));
+    }
+
+    return assembleProgramBlocks(move(blocks));
+}
+
+RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build,
+                                 ProgramBuild &prog_build, const RoseEdge &e,
+                                 const bool multiple_preds) {
+    const RoseGraph &g = build.g;
+    const RoseVertex v = target(e, g);
+
+    RoseProgram program;
+
+    if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
+        makeRoleCheckBounds(build, v, e, program);
+    }
+
+    if (multiple_preds) {
+        // Only necessary when there is more than one pred.
+        makeRoleCheckNotHandled(prog_build, v, program);
+    }
+
+    makeCatchup(build.rm, prog_build.needs_catchup, g[v].reports, program);
+
+    const bool has_som = false;
+    RoseProgram report_block;
+    for (const auto &id : g[v].reports) {
+        makeReport(build, id, has_som, report_block);
+    }
+    program.add_before_end(move(report_block));
+
+    return program;
+}
+
+static
+void makeCatchupMpv(const ReportManager &rm, bool needs_mpv_catchup,
+                    ReportID id, RoseProgram &program) {
+    if (!needs_mpv_catchup) {
+        return;
+    }
+
+    const Report &report = rm.getReport(id);
+    if (report.type == INTERNAL_ROSE_CHAIN) {
+        return;
+    }
+
+    program.add_before_end(make_unique<RoseInstrCatchUpMpv>());
+}
+
+RoseProgram makeReportProgram(const RoseBuildImpl &build,
+                              bool needs_mpv_catchup, ReportID id) {
+    RoseProgram prog;
+
+    makeCatchupMpv(build.rm, needs_mpv_catchup, id, prog);
+
+    const bool has_som = false;
+    makeReport(build, id, has_som, prog);
+
+    return prog;
+}
+
+RoseProgram makeBoundaryProgram(const RoseBuildImpl &build,
+                                const set<ReportID> &reports) {
+    // Note: no CATCHUP instruction is necessary in the boundary case, as we
+    // should always be caught up (and may not even have the resources in
+    // scratch to support it).
+
+    const bool has_som = false;
+    RoseProgram prog;
+    for (const auto &id : reports) {
+        makeReport(build, id, has_som, prog);
+    }
+
+    return prog;
+}
+
+static
+void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block,
+                        RoseProgram &program) {
+    // Prepend an instruction to check the pred state is on.
+    const auto *end_inst = pred_block.end_instruction();
+    pred_block.insert(begin(pred_block),
+                      make_unique<RoseInstrCheckState>(pred_state, end_inst));
+    program.add_block(move(pred_block));
+}
+
+static
+void addPredBlocksAny(map<u32, RoseProgram> &pred_blocks, u32 num_states,
+                      RoseProgram &program) {
+    RoseProgram sparse_program;
+
+    vector<u32> keys;
+    for (const u32 &key : pred_blocks | map_keys) {
+        keys.push_back(key);
+    }
+
+    const RoseInstruction *end_inst = sparse_program.end_instruction();
+    auto ri = make_unique<RoseInstrSparseIterAny>(num_states, keys, end_inst);
+    sparse_program.add_before_end(move(ri));
+
+    RoseProgram &block = pred_blocks.begin()->second;
+
+    /* we no longer need the check handled instruction as all the pred-role
+     * blocks are being collapsed together */
+    stripCheckHandledInstruction(block);
+
+    sparse_program.add_before_end(move(block));
+    program.add_block(move(sparse_program));
+}
+
+static
+void addPredBlocksMulti(map<u32, RoseProgram> &pred_blocks,
+                        u32 num_states, RoseProgram &program) {
+    assert(!pred_blocks.empty());
+
+    RoseProgram sparse_program;
+    const RoseInstruction *end_inst = sparse_program.end_instruction();
+    vector<pair<u32, const RoseInstruction *>> jump_table;
+
+    // BEGIN instruction.
+    auto ri_begin = make_unique<RoseInstrSparseIterBegin>(num_states, end_inst);
+    RoseInstrSparseIterBegin *begin_inst = ri_begin.get();
+    sparse_program.add_before_end(move(ri_begin));
+
+    // NEXT instructions, one per pred program.
+    u32 prev_key = pred_blocks.begin()->first;
+    for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) {
+        auto ri = make_unique<RoseInstrSparseIterNext>(prev_key, begin_inst,
+                                                       end_inst);
+        sparse_program.add_before_end(move(ri));
+        prev_key = it->first;
+    }
+
+    // Splice in each pred program after its BEGIN/NEXT.
+    auto out_it = begin(sparse_program);
+    for (auto &m : pred_blocks) {
+        u32 key = m.first;
+        RoseProgram &flat_prog = m.second;
+        assert(!flat_prog.empty());
+        const size_t block_len = flat_prog.size() - 1; // without INSTR_END.
+
+        assert(dynamic_cast<const RoseInstrSparseIterBegin *>(out_it->get()) ||
+               dynamic_cast<const RoseInstrSparseIterNext *>(out_it->get()));
+        out_it = sparse_program.insert(++out_it, move(flat_prog));
+
+        // Jump table target for this key is the beginning of the block we just
+        // spliced in.
+        jump_table.emplace_back(key, out_it->get());
+
+        assert(distance(begin(sparse_program), out_it) + block_len <=
+               sparse_program.size());
+        advance(out_it, block_len);
+    }
+
+    // Write the jump table back into the SPARSE_ITER_BEGIN instruction.
+    begin_inst->jump_table = move(jump_table);
+
+    program.add_block(move(sparse_program));
+}
+
+void addPredBlocks(map<u32, RoseProgram> &pred_blocks, u32 num_states,
+                   RoseProgram &program) {
+    // Trim empty blocks, if any exist.
+    for (auto it = pred_blocks.begin(); it != pred_blocks.end();) {
+        if (it->second.empty()) {
+            it = pred_blocks.erase(it);
+        } else {
+            ++it;
+        }
+    }
+
+    const size_t num_preds = pred_blocks.size();
+    if (num_preds == 0) {
+        return;
+    }
+
+    if (num_preds == 1) {
+        const auto head = pred_blocks.begin();
+        addPredBlockSingle(head->first, head->second, program);
+        return;
+    }
+
+    // First, see if all our blocks are equivalent, in which case we can
+    // collapse them down into one.
+    const auto &blocks = pred_blocks | map_values;
+    if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) {
+            return RoseProgramEquivalence()(*begin(blocks), block);
+        })) {
+        DEBUG_PRINTF("all blocks equiv\n");
+        addPredBlocksAny(pred_blocks, num_states, program);
+        return;
+    }
+
+    addPredBlocksMulti(pred_blocks, num_states, program);
+}
+
+void applyFinalSpecialisation(RoseProgram &program) {
+    assert(!program.empty());
+    assert(program.back().code() == ROSE_INSTR_END);
+    if (program.size() < 2) {
+        return;
+    }
+
+    /* Replace the second-to-last instruction (before END) with a one-shot
+     * specialisation if available. */
+    auto it = next(program.rbegin());
+    if (auto *ri = dynamic_cast<const RoseInstrReport *>(it->get())) {
+        DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n");
+        program.replace(it, make_unique<RoseInstrFinalReport>(
+                                ri->onmatch, ri->offset_adjust));
+    }
+}
+
+void recordLongLiterals(vector<ue2_case_string> &longLiterals,
+                        const RoseProgram &program) {
+    for (const auto &ri : program) {
+        if (const auto *ri_check =
+                dynamic_cast<const RoseInstrCheckLongLit *>(ri.get())) {
+            DEBUG_PRINTF("found CHECK_LONG_LIT for string '%s'\n",
+                         escapeString(ri_check->literal).c_str());
+            longLiterals.emplace_back(ri_check->literal, false);
+            continue;
+        }
+        if (const auto *ri_check =
+                dynamic_cast<const RoseInstrCheckLongLitNocase *>(ri.get())) {
+            DEBUG_PRINTF("found CHECK_LONG_LIT_NOCASE for string '%s'\n",
+                         escapeString(ri_check->literal).c_str());
+            longLiterals.emplace_back(ri_check->literal, true);
+        }
+    }
+}
+
+void recordResources(RoseResources &resources, const RoseProgram &program) {
+    for (const auto &ri : program) {
+        switch (ri->code()) {
+        case ROSE_INSTR_TRIGGER_SUFFIX:
+            resources.has_suffixes = true;
+            break;
+        case ROSE_INSTR_TRIGGER_INFIX:
+        case ROSE_INSTR_CHECK_INFIX:
+        case ROSE_INSTR_CHECK_PREFIX:
+        case ROSE_INSTR_SOM_LEFTFIX:
+            resources.has_leftfixes = true;
+            break;
+        case ROSE_INSTR_SET_STATE:
+        case ROSE_INSTR_CHECK_STATE:
+        case ROSE_INSTR_SPARSE_ITER_BEGIN:
+        case ROSE_INSTR_SPARSE_ITER_NEXT:
+            resources.has_states = true;
+            break;
+        case ROSE_INSTR_CHECK_GROUPS:
+            resources.checks_groups = true;
+            break;
+        case ROSE_INSTR_PUSH_DELAYED:
+            resources.has_lit_delay = true;
+            break;
+        case ROSE_INSTR_CHECK_LONG_LIT:
+        case ROSE_INSTR_CHECK_LONG_LIT_NOCASE:
+            resources.has_lit_check = true;
+            break;
+        default:
+            break;
+        }
+    }
+}
+
 } // namespace ue2
index c25aab61acd1fe46b707aa139ec2b244924a2029..d6a9e21896b90706362c357cf16db73138cf83c7 100644 (file)
 
 namespace ue2 {
 
+struct LookEntry;
 class RoseEngineBlob;
 class RoseInstruction;
+struct RoseResources;
 
 /**
  * \brief Container for a list of program instructions.
@@ -145,11 +147,161 @@ public:
     bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const;
 };
 
-/* Removes any CHECK_HANDLED instructions from the given program */
-void stripCheckHandledInstruction(RoseProgram &prog);
+/** \brief Data only used during construction of various programs (literal,
+ * anchored, delay, etc). */
+struct ProgramBuild : noncopyable {
+    explicit ProgramBuild(u32 fMinLitOffset, size_t longLitThresh,
+                          bool catchup)
+        : floatingMinLiteralMatchOffset(fMinLitOffset),
+        longLitLengthThreshold(longLitThresh), needs_catchup(catchup) {
+    }
+
+    /** \brief Minimum offset of a match from the floating table. */
+    const u32 floatingMinLiteralMatchOffset;
+
+    /** \brief Long literal length threshold, used in streaming mode. */
+    const size_t longLitLengthThreshold;
+
+    /** \brief True if reports need CATCH_UP instructions to catch up suffixes,
+     * outfixes etc. */
+    const bool needs_catchup;
+
+    /** \brief Mapping from vertex to key, for vertices with a
+     * CHECK_NOT_HANDLED instruction. */
+    ue2::unordered_map<RoseVertex, u32> handledKeys;
+
+    /** \brief Mapping from Rose literal ID to anchored program index. */
+    std::map<u32, u32> anchored_programs;
+
+    /** \brief Mapping from Rose literal ID to delayed program index. */
+    std::map<u32, u32> delay_programs;
+
+    /** \brief Mapping from every vertex to the groups that must be on for that
+     * vertex to be reached. */
+    ue2::unordered_map<RoseVertex, rose_group> vertex_group_map;
+
+    /** \brief Global bitmap of groups that can be squashed. */
+    rose_group squashable_groups = 0;
+};
+
+void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program);
+void addSuffixesEodProgram(RoseProgram &program);
+void addMatcherEodProgram(RoseProgram &program);
+
+static constexpr u32 INVALID_QUEUE = ~0U;
+
+struct left_build_info {
+    // Constructor for an engine implementation.
+    left_build_info(u32 q, u32 l, u32 t, rose_group sm,
+                    const std::vector<u8> &stops, u32 max_ql, u8 cm_count,
+                    const CharReach &cm_cr);
+
+    // Constructor for a lookaround implementation.
+    explicit left_build_info(const std::vector<std::vector<LookEntry>> &looks);
+
+    u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */
+    u32 lag = 0;
+    u32 transient = 0;
+    rose_group squash_mask = ~rose_group{0};
+    std::vector<u8> stopAlphabet;
+    u32 max_queuelen = 0;
+    u8 countingMiracleCount = 0;
+    CharReach countingMiracleReach;
+    u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */
+    bool has_lookaround = false;
+
+    // alternative implementation to the NFA
+    std::vector<std::vector<LookEntry>> lookaround;
+};
+
+struct lookaround_info : noncopyable {
+    /** \brief LookEntry list cache, so that we can reuse the look index and
+     * reach index for the same lookaround. */
+    ue2::unordered_map<std::vector<std::vector<LookEntry>>,
+        std::pair<size_t, size_t>> cache;
+
+    /** \brief Lookaround table for Rose roles. */
+    std::vector<std::vector<std::vector<LookEntry>>> table;
+
+    /** \brief Lookaround look table size. */
+    size_t lookTableSize = 0;
+
+    /** \brief Lookaround reach table size.
+     * since single path lookaround and multi-path lookaround have different
+     * bitvectors range (32 and 256), we need to maintain both look table size
+     * and reach table size. */
+    size_t reachTableSize = 0;
+};
+
+/**
+ * \brief Provides a brief summary of properties of an NFA that has already been
+ * finalised and stored in the blob.
+ */
+struct engine_info {
+    engine_info(const NFA *nfa, bool trans);
+
+    enum NFAEngineType type;
+    bool accepts_eod;
+    u32 stream_size;
+    u32 scratch_size;
+    u32 scratch_align;
+    bool transient;
+};
+
+/**
+ * \brief Consumes list of program blocks corresponding to different literals,
+ * checks them for duplicates and then concatenates them into one program.
+ *
+ * Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is
+ * inserted to prevent the work_done flag being contaminated by early blocks.
+ */
+RoseProgram assembleProgramBlocks(std::vector<RoseProgram> &&blocks);
+
+RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
+                      const std::map<RoseVertex, left_build_info> &leftfix_info,
+                      const std::map<suffix_id, u32> &suffixes,
+                      const std::map<u32, engine_info> &engine_info_by_queue,
+                      lookaround_info &lookarounds,
+                      unordered_map<RoseVertex, u32> roleStateIndices,
+                      ProgramBuild &prog_build, u32 lit_id,
+                      const std::vector<RoseEdge> &lit_edges,
+                      bool is_anchored_replay_program);
+
+RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build,
+                                    lookaround_info &lookarounds,
+                                    ProgramBuild &prog_build,
+                                    const std::vector<u32> &lit_ids);
+
+RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build,
+                                 ProgramBuild &prog_build, const RoseEdge &e,
+                                 const bool multiple_preds);
+
+RoseProgram makeReportProgram(const RoseBuildImpl &build,
+                              bool needs_mpv_catchup, ReportID id);
+
+RoseProgram makeBoundaryProgram(const RoseBuildImpl &build,
+                                const std::set<ReportID> &reports);
+
+struct TriggerInfo {
+    TriggerInfo(bool c, u32 q, u32 e) : cancel(c), queue(q), event(e) {}
+    bool cancel;
+    u32 queue;
+    u32 event;
+
+    bool operator==(const TriggerInfo &b) const {
+        return cancel == b.cancel && queue == b.queue && event == b.event;
+    }
+};
+
+void addPredBlocks(std::map<u32, RoseProgram> &pred_blocks, u32 num_states,
+                   RoseProgram &program);
+
+void applyFinalSpecialisation(RoseProgram &program);
+
+void recordLongLiterals(std::vector<ue2_case_string> &longLiterals,
+                        const RoseProgram &program);
 
-/** Returns true if the program may read the the interpreter's work_done flag */
-bool reads_work_done_flag(const RoseProgram &prog);
+void recordResources(RoseResources &resources, const RoseProgram &program);
 
 } // namespace ue2
 
diff --git a/src/rose/rose_build_resources.h b/src/rose/rose_build_resources.h
new file mode 100644 (file)
index 0000000..3edb81b
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2017, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_RESOURCES_H
+#define ROSE_BUILD_RESOURCES_H
+
+namespace ue2 {
+
+/**
+ * \brief Structure tracking which resources are used by this Rose instance at
+ * runtime.
+ *
+ * We use this to control how much initialisation we need to do at the
+ * beginning of a stream/block at runtime.
+ */
+struct RoseResources {
+    bool has_outfixes = false;
+    bool has_suffixes = false;
+    bool has_leftfixes = false;
+    bool has_literals = false;
+    bool has_states = false;
+    bool checks_groups = false;
+    bool has_lit_delay = false;
+    bool has_lit_check = false; // long literal support
+    bool has_anchored = false;
+    bool has_floating = false;
+    bool has_eod = false;
+};
+
+}
+
+#endif