]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
FDR: Squash buckets of included literals in FDR confirm
authorWang, Xiang W <xiang.w.wang@intel.com>
Thu, 22 Jun 2017 08:50:45 +0000 (04:50 -0400)
committerMatthew Barr <matthew.barr@intel.com>
Mon, 21 Aug 2017 01:12:36 +0000 (11:12 +1000)
 - Change the compile of literal matchers to two passes.
 - Reverse the bucket assignment in FDR, bucket with longer literals has
   smaller bucket id.
 - Squash the buckets of included literals and jump to the the program of
   included literals directly from parent literal program without going
   through FDR confirm for included iterals.

26 files changed:
src/fdr/fdr.c
src/fdr/fdr_compile.cpp
src/fdr/fdr_compile.h
src/fdr/fdr_compile_internal.h
src/fdr/fdr_confirm_compile.cpp
src/fdr/fdr_confirm_runtime.h
src/fdr/teddy_compile.cpp
src/fdr/teddy_compile.h
src/fdr/teddy_runtime_common.h
src/hwlm/hwlm_build.cpp
src/hwlm/hwlm_build.h
src/hwlm/hwlm_literal.h
src/rose/program_runtime.h
src/rose/rose_build_bytecode.cpp
src/rose/rose_build_dump.cpp
src/rose/rose_build_instructions.cpp
src/rose/rose_build_instructions.h
src/rose/rose_build_matchers.cpp
src/rose/rose_build_matchers.h
src/rose/rose_build_program.cpp
src/rose/rose_build_program.h
src/rose/rose_program.h
src/scratch.c
src/scratch.h
unit/internal/fdr.cpp
unit/internal/fdr_flood.cpp

index 8d072ea2cab2b73bc93b2c32bcd54d28234c11f7..f7da6981bdf54666a944ba549ed49ef669fd6d7a 100644 (file)
@@ -359,7 +359,7 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
         }
         u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a) + 1);
         confWithBit(fdrc, a, ptr_main - a->buf + byte, control,
-                    last_match_id, confVal);
+                    last_match_id, confVal, conf, bit);
     } while (unlikely(!!*conf));
 }
 
index 6f2de3d908031090e911944054fbd9c84d02ee6d..181f95126d69e6319734f64c88583de1ac4a27c0 100644 (file)
 #include "ue2common.h"
 #include "hwlm/hwlm_build.h"
 #include "util/compare.h"
+#include "util/container.h"
 #include "util/dump_mask.h"
+#include "util/make_unique.h"
 #include "util/math.h"
 #include "util/noncopyable.h"
 #include "util/target_info.h"
+#include "util/ue2_containers.h"
 #include "util/ue2string.h"
 #include "util/verify_types.h"
 
@@ -81,7 +84,6 @@ private:
     bool make_small;
 
     u8 *tabIndexToMask(u32 indexInTable);
-    void assignStringsToBuckets();
 #ifdef DEBUG
     void dumpMasks(const u8 *defaultMask);
 #endif
@@ -90,10 +92,13 @@ private:
     void createInitialState(FDR *fdr);
 
 public:
-    FDRCompiler(vector<hwlmLiteral> lits_in, const FDREngineDescription &eng_in,
+    FDRCompiler(vector<hwlmLiteral> lits_in,
+                map<BucketIndex, std::vector<LiteralIndex>> bucketToLits_in,
+                const FDREngineDescription &eng_in,
                 bool make_small_in, const Grey &grey_in)
         : eng(eng_in), grey(grey_in), tab(eng_in.getTabSizeBytes()),
-          lits(move(lits_in)), make_small(make_small_in) {}
+          lits(move(lits_in)), bucketToLits(move(bucketToLits_in)),
+          make_small(make_small_in) {}
 
     bytecode_ptr<FDR> build();
 };
@@ -309,7 +314,10 @@ next_literal:
     return chunks;
 }
 
-void FDRCompiler::assignStringsToBuckets() {
+static
+map<BucketIndex, vector<LiteralIndex>> assignStringsToBuckets(
+                                    vector<hwlmLiteral> &lits,
+                                    const FDREngineDescription &eng) {
     const double MAX_SCORE = numeric_limits<double>::max();
 
     assert(!lits.empty()); // Shouldn't be called with no literals.
@@ -393,6 +401,7 @@ void FDRCompiler::assignStringsToBuckets() {
 
     // our best score is in t[0][N_BUCKETS-1] and we can follow the links
     // to find where our buckets should start and what goes into them
+    vector<vector<LiteralIndex>> buckets;
     for (u32 i = 0, n = numBuckets; n && (i != numChunks - 1); n--) {
         u32 j = t[i][n - 1].second;
         if (j == 0) {
@@ -403,21 +412,33 @@ void FDRCompiler::assignStringsToBuckets() {
         u32 first_id = chunks[i].first_id;
         u32 last_id = chunks[j].first_id;
         assert(first_id < last_id);
-        u32 bucket = numBuckets - n;
         UNUSED const auto &first_lit = lits[first_id];
         UNUSED const auto &last_lit = lits[last_id - 1];
-        DEBUG_PRINTF("placing [%u-%u) in bucket %u (%u lits, len %zu-%zu, "
+        DEBUG_PRINTF("placing [%u-%u) in one bucket (%u lits, len %zu-%zu, "
                       "score %0.4f)\n",
-                      first_id, last_id, bucket, last_id - first_id,
+                      first_id, last_id, last_id - first_id,
                       first_lit.s.length(), last_lit.s.length(),
                       getScoreUtil(first_lit.s.length(), last_id - first_id));
 
-        auto &bucket_lits = bucketToLits[bucket];
-        for (u32 k = first_id; k < last_id; k++) {
-            bucket_lits.push_back(k);
+        vector<LiteralIndex> litIds;
+        u32 cnt = last_id - first_id;
+        // long literals first for included literals checking
+        for (u32 k = 0; k < cnt; k++) {
+            litIds.push_back(last_id - k - 1);
         }
+
         i = j;
+        buckets.push_back(litIds);
+    }
+
+    // reverse bucket id, longer literals come first
+    map<BucketIndex, vector<LiteralIndex>> bucketToLits;
+    size_t bucketCnt = buckets.size();
+    for (size_t i = 0; i < bucketCnt; i++) {
+        bucketToLits.emplace(bucketCnt - i - 1, move(buckets[i]));
     }
+
+    return bucketToLits;
 }
 
 #ifdef DEBUG
@@ -541,24 +562,216 @@ void FDRCompiler::setupTab() {
 }
 
 bytecode_ptr<FDR> FDRCompiler::build() {
-    assignStringsToBuckets();
     setupTab();
     return setupFDR();
 }
 
+static
+bool isSuffix(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
+    auto s1 = lit1.s;
+    auto s2 = lit2.s;
+    if (lit1.nocase || lit2.nocase) {
+        upperString(s1);
+        upperString(s2);
+    }
+    size_t len1 = s1.length();
+    size_t len2 = s2.length();
+    assert(len1 >= len2);
+    return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2);
+}
+
+/*
+ * if lit2 is a suffix of lit1 but the case sensitivity, groups or mask info
+ * of lit2 is a subset of lit1, then lit1 can't squash lit2 and lit2 can
+ * possibly match when lit1 matches. In this case, we can't do bucket
+ * squashing. e.g. AAA(no case) in bucket 0, AA(no case) and aa in bucket 1,
+ * we can't squash bucket 1 if we have input like "aaa" as aa can also match.
+ */
+static
+bool includedCheck(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
+    /* lit1 is caseless and lit2 is case sensitive */
+    if ((lit1.nocase && !lit2.nocase)) {
+        return true;
+    }
+
+    /* lit2's group is a subset of lit1 */
+    if (lit1.groups != lit2.groups &&
+        (lit2.groups == (lit1.groups & lit2.groups))) {
+        return true;
+    }
+
+    /* TODO: narrow down cases for mask check */
+    if (lit1.cmp != lit2.cmp || lit1.msk != lit2.msk) {
+        return true;
+    }
+
+    return false;
+}
+
+/*
+ * if lit2 is an included literal of both lit1 and lit0, and lit1 is an
+ * exceptional literal of lit0 - lit1 sometimes matches when lit0 matches,
+ * then we give up squashing for lit1. e.g. lit0:AAA(no case), lit1:aa,
+ * lit2:A(no case). We can have duplicate matches for input "aaa" if lit0
+ * and lit1 both squash lit2.
+ */
+static
+bool checkParentLit(
+            u32 pos1, const unordered_set<u32> &parent_map,
+            const unordered_map<u32, unordered_set<u32>> &exception_map) {
+    for (const auto pos2 : parent_map) {
+        if (contains(exception_map, pos2)) {
+            const auto &exception_pos = exception_map.at(pos2);
+            if (contains(exception_pos, pos1)) {
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+static
+void buildSquashMask(vector<hwlmLiteral> &lits, u32 id1, u32 bucket1,
+                     size_t start, const vector<pair<u32, u32>> &group,
+                     unordered_map<u32, unordered_set<u32>> &parent_map,
+                     unordered_map<u32, unordered_set<u32>> &exception_map) {
+    auto &lit1 = lits[id1];
+    DEBUG_PRINTF("b:%u len:%zu\n", bucket1, lit1.s.length());
+
+    size_t cnt = group.size();
+    bool included = false;
+    bool exception = false;
+    u32 child_id = ~0U;
+    for (size_t i = start; i < cnt; i++) {
+        u32 bucket2 = group[i].first;
+        assert(bucket2 >= bucket1);
+
+        u32 id2 = group[i].second;
+        auto &lit2 = lits[id2];
+        // check if lit2 is a suffix of lit1
+        if (isSuffix(lit1, lit2)) {
+            /* if we have a included literal in the same bucket,
+             * quit and let the included literal to do possible squashing
+             */
+            if (bucket1 == bucket2) {
+                DEBUG_PRINTF("same bucket\n");
+                return;
+            }
+            /*
+             * if lit2 is a suffix but doesn't pass included checks for
+             * extra info, we give up sqaushing
+             */
+            if (includedCheck(lit1, lit2)) {
+                DEBUG_PRINTF("find exceptional suffix %u\n", lit2.id);
+                exception_map[id1].insert(id2);
+                exception = true;
+            } else if (checkParentLit(id1, parent_map[id2], exception_map)) {
+                if (lit1.included_id == INVALID_LIT_ID) {
+                    DEBUG_PRINTF("find suffix lit1 %u lit2 %u\n",
+                                 lit1.id, lit2.id);
+                    lit1.included_id = lit2.id;
+                } else {
+                    /*
+                     * if we have multiple included literals in one bucket,
+                     * give up squashing.
+                     */
+                    DEBUG_PRINTF("multiple included literals\n");
+                    lit1.included_id = INVALID_LIT_ID;
+                    return;
+                }
+                child_id = id2;
+                included = true;
+            }
+        }
+
+        size_t next = i + 1;
+        u32 nextBucket = next < cnt ? group[next].first : ~0U;
+        if (bucket2 != nextBucket) {
+            if (included) {
+                if (exception) {
+                    /*
+                     * give up if we have exception literals
+                     * in the same bucket as the included literal
+                     */
+                    lit1.included_id = INVALID_LIT_ID;
+                } else {
+                    parent_map[child_id].insert(id1);
+
+                    lit1.squash |= 1U << bucket2;
+                    DEBUG_PRINTF("build squash mask %2x for %u\n",
+                                 lit1.squash, lit1.id);
+                }
+                return;
+            }
+            exception = false;
+        }
+    }
+}
+
+static constexpr u32 INCLUDED_LIMIT = 1000;
+
+static
+void findIncludedLits(vector<hwlmLiteral> &lits,
+                      const vector<vector<pair<u32, u32>>> &lastCharMap) {
+    /** Map for finding the positions of literal which includes a literal
+     * in FDR hwlm literal vector.
+     */
+    unordered_map<u32, unordered_set<u32>> parent_map;
+
+    /** Map for finding the positions of exception literals which could
+     * sometimes match if a literal matches in FDR hwlm literal vector.
+     */
+    unordered_map<u32, unordered_set<u32>> exception_map;
+    for (const auto &group : lastCharMap) {
+        size_t cnt = group.size();
+        if (cnt > INCLUDED_LIMIT) {
+            continue;
+        }
+        for (size_t i = 0; i < cnt; i++) {
+            u32 bucket1 = group[i].first;
+            u32 id1 = group[i].second;
+            buildSquashMask(lits, id1, bucket1, i + 1, group, parent_map,
+                            exception_map);
+        }
+    }
+}
+
+static
+void addIncludedInfo(
+               vector<hwlmLiteral> &lits, u32 nBuckets,
+               map<BucketIndex, vector<LiteralIndex>> &bucketToLits) {
+    vector<vector<pair<u32, u32>>> lastCharMap(256);
+
+    for (BucketIndex b = 0; b < nBuckets; b++) {
+        if (!bucketToLits[b].empty()) {
+            for (const LiteralIndex &lit_idx : bucketToLits[b]) {
+                const auto &lit = lits[lit_idx];
+                u8 c = mytoupper(lit.s.back());
+                lastCharMap[c].emplace_back(b, lit_idx);
+            }
+        }
+    }
+
+    findIncludedLits(lits, lastCharMap);
+}
+
 } // namespace
 
 static
-bytecode_ptr<FDR> fdrBuildTableInternal(const vector<hwlmLiteral> &lits,
-                                        bool make_small, const target_t &target,
-                                        const Grey &grey, u32 hint) {
+unique_ptr<HWLMProto> fdrBuildProtoInternal(u8 engType,
+                                            vector<hwlmLiteral> &lits,
+                                            bool make_small,
+                                            const target_t &target,
+                                            const Grey &grey, u32 hint) {
     DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
 
     if (grey.fdrAllowTeddy) {
-        auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, grey);
-        if (fdr) {
+        auto proto = teddyBuildProtoHinted(engType, lits, make_small, hint,
+                                           target);
+        if (proto) {
             DEBUG_PRINTF("build with teddy succeeded\n");
-            return fdr;
+            return proto;
         } else {
             DEBUG_PRINTF("build with teddy failed, will try with FDR\n");
         }
@@ -576,23 +789,47 @@ bytecode_ptr<FDR> fdrBuildTableInternal(const vector<hwlmLiteral> &lits,
         des->stride = 1;
     }
 
-    FDRCompiler fc(lits, *des, make_small, grey);
+    auto bucketToLits = assignStringsToBuckets(lits, *des);
+    addIncludedInfo(lits, des->getNumBuckets(), bucketToLits);
+    auto proto =
+        ue2::make_unique<HWLMProto>(engType, move(des), lits, bucketToLits,
+                                    make_small);
+    return proto;
+}
+
+unique_ptr<HWLMProto> fdrBuildProto(u8 engType, vector<hwlmLiteral> lits,
+                                    bool make_small, const target_t &target,
+                                    const Grey &grey) {
+    return fdrBuildProtoInternal(engType, lits, make_small, target, grey,
+                                 HINT_INVALID);
+}
+
+static
+bytecode_ptr<FDR> fdrBuildTableInternal(const HWLMProto &proto,
+                                        const Grey &grey) {
+
+    if (proto.teddyEng) {
+        return teddyBuildTable(proto, grey);
+    }
+
+    FDRCompiler fc(proto.lits, proto.bucketToLits, *(proto.fdrEng),
+                   proto.make_small, grey);
     return fc.build();
 }
 
-bytecode_ptr<FDR> fdrBuildTable(const vector<hwlmLiteral> &lits,
-                                bool make_small, const target_t &target,
-                                const Grey &grey) {
-    return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID);
+bytecode_ptr<FDR> fdrBuildTable(const HWLMProto &proto, const Grey &grey) {
+    return fdrBuildTableInternal(proto, grey);
 }
 
 #if !defined(RELEASE_BUILD)
 
-bytecode_ptr<FDR> fdrBuildTableHinted(const vector<hwlmLiteral> &lits,
-                                      bool make_small, u32 hint,
-                                      const target_t &target,
-                                      const Grey &grey) {
-    return fdrBuildTableInternal(lits, make_small, target, grey, hint);
+unique_ptr<HWLMProto> fdrBuildProtoHinted(u8 engType,
+                                          vector<hwlmLiteral> lits,
+                                          bool make_small, u32 hint,
+                                          const target_t &target,
+                                          const Grey &grey) {
+    return fdrBuildProtoInternal(engType, lits, make_small, target, grey,
+                                 hint);
 }
 
 #endif
index 58047600f03157f5c3e1ad1e87b807cfb3b6b7c9..f0ce49256a9cd75191867b7d8325d4c204fcd24b 100644 (file)
@@ -34,6 +34,7 @@
 #define FDR_COMPILE_H
 
 #include "ue2common.h"
+#include "hwlm/hwlm_build.h"
 #include "util/bytecode_ptr.h"
 
 #include <vector>
@@ -46,18 +47,23 @@ struct hwlmLiteral;
 struct Grey;
 struct target_t;
 
-bytecode_ptr<FDR> fdrBuildTable(const std::vector<hwlmLiteral> &lits,
-                                bool make_small, const target_t &target,
-                                const Grey &grey);
+bytecode_ptr<FDR> fdrBuildTable(const HWLMProto &proto, const Grey &grey);
 
 #if !defined(RELEASE_BUILD)
-
-bytecode_ptr<FDR> fdrBuildTableHinted(const std::vector<hwlmLiteral> &lits,
-                                      bool make_small, u32 hint,
-                                      const target_t &target, const Grey &grey);
-
+std::unique_ptr<HWLMProto> fdrBuildProtoHinted(
+                                          u8 engType,
+                                          std::vector<hwlmLiteral> lits,
+                                          bool make_small, u32 hint,
+                                          const target_t &target,
+                                          const Grey &grey);
 #endif
 
+std::unique_ptr<HWLMProto> fdrBuildProto(
+                                     u8 engType,
+                                     std::vector<hwlmLiteral> lits,
+                                     bool make_small, const target_t &target,
+                                     const Grey &grey);
+
 /** \brief Returns size in bytes of the given FDR engine. */
 size_t fdrSize(const struct FDR *fdr);
 
index 756fe8e70afde7a0da2123279376bbbddc5ac203..3879960a29773640ec5d8961224d201b712cdec1 100644 (file)
@@ -57,10 +57,11 @@ class FDREngineDescription;
 struct hwlmStreamingControl;
 struct Grey;
 
-bytecode_ptr<u8> setupFullConfs(const std::vector<hwlmLiteral> &lits,
-               const EngineDescription &eng,
-               std::map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits,
-               bool make_small);
+bytecode_ptr<u8> setupFullConfs(
+      const std::vector<hwlmLiteral> &lits,
+      const EngineDescription &eng,
+      const std::map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits,
+      bool make_small);
 
 // all suffixes include an implicit max_bucket_width suffix to ensure that
 // we always read a full-scale flood "behind" us in terms of what's in our
index a6eee4cfa21727648a457a9b45bec0349dfa9159..c75f8d17f60a3627d16de5a7fa8c63ff4c6d06f9 100644 (file)
@@ -292,7 +292,7 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
 bytecode_ptr<u8>
 setupFullConfs(const vector<hwlmLiteral> &lits,
                const EngineDescription &eng,
-               map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
+               const map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
                bool make_small) {
     unique_ptr<TeddyEngineDescription> teddyDescr =
         getTeddyDescription(eng.getID());
@@ -300,9 +300,9 @@ setupFullConfs(const vector<hwlmLiteral> &lits,
     BC2CONF bc2Conf;
     u32 totalConfirmSize = 0;
     for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
-        if (!bucketToLits[b].empty()) {
+        if (contains(bucketToLits, b)) {
             vector<hwlmLiteral> vl;
-            for (const LiteralIndex &lit_idx : bucketToLits[b]) {
+            for (const LiteralIndex &lit_idx : bucketToLits.at(b)) {
                 vl.push_back(lits[lit_idx]);
             }
 
index 86a3bfa4b336a1d8c3286003f2eb60da186c4c3b..067e50e2c4bbabcc7d8f442466c59d4b8dc4eca0 100644 (file)
@@ -29,6 +29,7 @@
 #ifndef FDR_CONFIRM_RUNTIME_H
 #define FDR_CONFIRM_RUNTIME_H
 
+#include "scratch.h"
 #include "fdr_internal.h"
 #include "fdr_loadval.h"
 #include "hwlm/hwlm.h"
@@ -41,7 +42,7 @@
 static really_inline
 void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a,
                  size_t i, hwlmcb_rv_t *control, u32 *last_match,
-                 u64a conf_key) {
+                 u64a conf_key, u64a *conf, u8 bit) {
     assert(i < a->len);
     assert(i >= a->start_offset);
     assert(ISALIGNED(fdrc));
@@ -57,6 +58,10 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
     const struct LitInfo *li
         = (const struct LitInfo *)((const u8 *)fdrc + start);
 
+    struct hs_scratch *scratch = a->scratch;
+    assert(!scratch->fdr_conf);
+    scratch->fdr_conf = conf;
+    scratch->fdr_conf_offset = bit;
     u8 oldNext; // initialized in loop
     do {
         assert(ISALIGNED(li));
@@ -88,11 +93,12 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
         }
 
         *last_match = li->id;
-        *control = a->cb(i, li->id, a->scratch);
+        *control = a->cb(i, li->id, scratch);
     out:
         oldNext = li->next; // oldNext is either 0 or an 'adjust' value
         li++;
     } while (oldNext);
+    scratch->fdr_conf = NULL;
 }
 
 #endif
index a35e59000b38a6fa989d63126324f21745a569c1..bb02f7598629c874f5b5bdc740e400a21cab3275 100644 (file)
 #include "teddy_engine_description.h"
 #include "grey.h"
 #include "ue2common.h"
+#include "hwlm/hwlm_build.h"
 #include "util/alloc.h"
 #include "util/compare.h"
 #include "util/container.h"
+#include "util/make_unique.h"
 #include "util/noncopyable.h"
 #include "util/popcount.h"
 #include "util/target_info.h"
@@ -77,17 +79,18 @@ class TeddyCompiler : noncopyable {
     const TeddyEngineDescription &eng;
     const Grey &grey;
     const vector<hwlmLiteral> &lits;
+    map<BucketIndex, std::vector<LiteralIndex>> bucketToLits;
     bool make_small;
 
 public:
     TeddyCompiler(const vector<hwlmLiteral> &lits_in,
+                  map<BucketIndex, std::vector<LiteralIndex>> bucketToLits_in,
                   const TeddyEngineDescription &eng_in, bool make_small_in,
                   const Grey &grey_in)
-        : eng(eng_in), grey(grey_in), lits(lits_in), make_small(make_small_in) {
-    }
+        : eng(eng_in), grey(grey_in), lits(lits_in),
+          bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
 
     bytecode_ptr<FDR> build();
-    bool pack(map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits);
 };
 
 class TeddySet {
@@ -216,8 +219,10 @@ public:
     }
 };
 
-bool TeddyCompiler::pack(map<BucketIndex,
-                             std::vector<LiteralIndex>> &bucketToLits) {
+static
+bool pack(const vector<hwlmLiteral> &lits,
+          const TeddyEngineDescription &eng,
+          map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits) {
     set<TeddySet> sts;
 
     for (u32 i = 0; i < lits.size(); i++) {
@@ -473,30 +478,6 @@ void fillReinforcedTable(const map<BucketIndex,
 }
 
 bytecode_ptr<FDR> TeddyCompiler::build() {
-    assert(eng.numMasks <= MAX_NUM_MASKS);
-
-    if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
-        DEBUG_PRINTF("too many literals: %zu\n", lits.size());
-        return nullptr;
-    }
-
-#ifdef TEDDY_DEBUG
-    for (size_t i = 0; i < lits.size(); i++) {
-        printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(),
-               lits[i].nocase ? "caseless" : "caseful");
-        for (size_t j = 0; j < lits[i].s.size(); j++) {
-            printf("%02x", ((u32)lits[i].s[j])&0xff);
-        }
-        printf("\n");
-    }
-#endif
-
-    map<BucketIndex, std::vector<LiteralIndex>> bucketToLits;
-    if (!pack(bucketToLits)) {
-        DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n",
-                     lits.size(), eng.getNumBuckets());
-        return nullptr;
-    }
     u32 maskWidth = eng.getNumBuckets() / 8;
 
     size_t headerSize = sizeof(Teddy);
@@ -565,12 +546,49 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
     return fdr;
 }
 
+
+static
+bool assignStringsToBuckets(
+                const vector<hwlmLiteral> &lits,
+                TeddyEngineDescription &eng,
+                map<BucketIndex, vector<LiteralIndex>> &bucketToLits) {
+    assert(eng.numMasks <= MAX_NUM_MASKS);
+    if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
+        DEBUG_PRINTF("too many literals: %zu\n", lits.size());
+        return false;
+    }
+
+#ifdef TEDDY_DEBUG
+    for (size_t i = 0; i < lits.size(); i++) {
+        printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(),
+               lits[i].nocase ? "caseless" : "caseful");
+        for (size_t j = 0; j < lits[i].s.size(); j++) {
+            printf("%02x", ((u32)lits[i].s[j])&0xff);
+        }
+        printf("\n");
+    }
+#endif
+
+    if (!pack(lits, eng, bucketToLits)) {
+        DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n",
+                     lits.size(), eng.getNumBuckets());
+        return false;
+    }
+    return true;
+}
+
 } // namespace
 
-bytecode_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
-                                        bool make_small, u32 hint,
-                                        const target_t &target,
-                                        const Grey &grey) {
+bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey) {
+    TeddyCompiler tc(proto.lits, proto.bucketToLits, *(proto.teddyEng),
+                     proto.make_small, grey);
+    return tc.build();
+}
+
+
+unique_ptr<HWLMProto> teddyBuildProtoHinted(
+                        u8 engType, const vector<hwlmLiteral> &lits,
+                        bool make_small, u32 hint, const target_t &target) {
     unique_ptr<TeddyEngineDescription> des;
     if (hint == HINT_INVALID) {
         des = chooseTeddyEngine(target, lits);
@@ -580,8 +598,14 @@ bytecode_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
     if (!des) {
         return nullptr;
     }
-    TeddyCompiler tc(lits, *des, make_small, grey);
-    return tc.build();
+
+    map<BucketIndex, std::vector<LiteralIndex>> bucketToLits;
+    if (!assignStringsToBuckets(lits, *des, bucketToLits)) {
+        return nullptr;
+    }
+
+    return ue2::make_unique<HWLMProto>(engType, move(des), lits,
+                                       bucketToLits, make_small);
 }
 
 } // namespace ue2
index 5ff4d83948ad10801b2cb9b1850cc02cbb144bdd..ec251310263e177f519847d1f19d8d31a803bb03 100644 (file)
@@ -35,6 +35,7 @@
 #define TEDDY_COMPILE_H
 
 #include "ue2common.h"
+#include "hwlm/hwlm_build.h"
 #include "util/bytecode_ptr.h"
 
 #include <vector>
@@ -46,12 +47,13 @@ namespace ue2 {
 struct Grey;
 struct hwlmLiteral;
 struct target_t;
+struct TeddyEngineDescription;
 
-bytecode_ptr<FDR> teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits,
-                                        bool make_small, u32 hint,
-                                        const target_t &target,
-                                        const Grey &grey);
+bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey);
 
+std::unique_ptr<HWLMProto> teddyBuildProtoHinted(
+                          u8 engType, const std::vector<hwlmLiteral> &lits,
+                          bool make_small, u32 hint, const target_t &target);
 } // namespace ue2
 
 #endif // TEDDY_COMPILE_H
index 6b809cce78d36932bb6b7c36ad676bf3e2f88dfe..5332423e810794d12f40059310a0241822ef20e8 100644 (file)
@@ -419,9 +419,10 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
         if (!(fdrc->groups & *control)) {
             continue;
         }
+        u64a tmp = 0;
         u64a confVal = getConfVal(a, ptr, byte, reason);
         confWithBit(fdrc, a, ptr - a->buf + byte, control,
-                    last_match, confVal);
+                    last_match, confVal, &tmp, 0);
     } while (unlikely(*conf));
 }
 
index c2db5480d20859e7cd15f89a0d2711bb279ffb02..1b332815290d77d31629a54d92878c95f23e07e5 100644 (file)
 #include "scratch.h"
 #include "ue2common.h"
 #include "fdr/fdr_compile.h"
+#include "fdr/fdr_compile_internal.h"
+#include "fdr/fdr_engine_description.h"
+#include "fdr/teddy_engine_description.h"
 #include "util/compile_context.h"
 #include "util/compile_error.h"
+#include "util/make_unique.h"
 #include "util/ue2string.h"
 
 #include <cassert>
@@ -53,6 +57,28 @@ using namespace std;
 
 namespace ue2 {
 
+HWLMProto::HWLMProto(u8 engType_in, vector<hwlmLiteral> lits_in)
+    : engType(engType_in), lits(move(lits_in)) {}
+
+HWLMProto::HWLMProto(u8 engType_in,
+                     unique_ptr<FDREngineDescription> eng_in,
+                     vector<hwlmLiteral> lits_in,
+                     map<u32, vector<u32>> bucketToLits_in,
+                     bool make_small_in)
+    : engType(engType_in), fdrEng(move(eng_in)), lits(move(lits_in)),
+      bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
+
+HWLMProto::HWLMProto(u8 engType_in,
+                     unique_ptr<TeddyEngineDescription> eng_in,
+                     vector<hwlmLiteral> lits_in,
+                     map<u32, vector<u32>> bucketToLits_in,
+                     bool make_small_in)
+    : engType(engType_in), teddyEng(move(eng_in)),
+      lits(move(lits_in)),
+      bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
+
+HWLMProto::~HWLMProto() {}
+
 static
 void dumpLits(UNUSED const vector<hwlmLiteral> &lits) {
 #ifdef DEBUG
@@ -92,9 +118,52 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
     return true;
 }
 
-bytecode_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, bool make_small,
-                             const CompileContext &cc,
+bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
                              UNUSED hwlm_group_t expected_groups) {
+    size_t engSize = 0;
+    shared_ptr<void> eng;
+
+    const auto &lits = proto.lits;
+    DEBUG_PRINTF("building table with %zu strings\n", lits.size());
+
+    if (proto.engType == HWLM_ENGINE_NOOD) {
+        DEBUG_PRINTF("build noodle table\n");
+        const hwlmLiteral &lit = lits.front();
+        auto noodle = noodBuildTable(lit);
+        if (noodle) {
+            engSize = noodle.size();
+        }
+        eng = move(noodle);
+    } else {
+        DEBUG_PRINTF("building a new deal\n");
+        auto fdr = fdrBuildTable(proto, cc.grey);
+        if (fdr) {
+            engSize = fdr.size();
+        }
+        eng = move(fdr);
+    }
+
+    if (!eng) {
+        return nullptr;
+    }
+
+    assert(engSize);
+    if (engSize > cc.grey.limitLiteralMatcherSize) {
+        throw ResourceLimitError();
+    }
+
+    const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize;
+    auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64);
+
+    h->type = proto.engType;
+    memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
+
+    return h;
+}
+
+unique_ptr<HWLMProto>
+hwlmBuildProto(vector<hwlmLiteral> &lits, bool make_small,
+               const CompileContext &cc) {
     assert(!lits.empty());
     dumpLits(lits);
 
@@ -124,9 +193,7 @@ bytecode_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, bool make_small,
         }
     }
 
-    u8 engType = 0;
-    size_t engSize = 0;
-    shared_ptr<void> eng;
+    unique_ptr<HWLMProto> proto;
 
     DEBUG_PRINTF("building table with %zu strings\n", lits.size());
 
@@ -134,39 +201,17 @@ bytecode_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, bool make_small,
 
     if (isNoodleable(lits, cc)) {
         DEBUG_PRINTF("build noodle table\n");
-        engType = HWLM_ENGINE_NOOD;
-        const hwlmLiteral &lit = lits.front();
-        auto noodle = noodBuildTable(lit);
-        if (noodle) {
-            engSize = noodle.size();
-        }
-        eng = move(noodle);
+        proto = ue2::make_unique<HWLMProto>(HWLM_ENGINE_NOOD, lits);
     } else {
         DEBUG_PRINTF("building a new deal\n");
-        engType = HWLM_ENGINE_FDR;
-        auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey);
-        if (fdr) {
-            engSize = fdr.size();
+        proto = fdrBuildProto(HWLM_ENGINE_FDR, lits, make_small,
+                              cc.target_info, cc.grey);
+        if (!proto) {
+            return nullptr;
         }
-        eng = move(fdr);
-    }
-
-    if (!eng) {
-        return nullptr;
-    }
-
-    assert(engSize);
-    if (engSize > cc.grey.limitLiteralMatcherSize) {
-        throw ResourceLimitError();
     }
 
-    const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize;
-    auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64);
-
-    h->type = engType;
-    memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
-
-    return h;
+    return proto;
 }
 
 size_t hwlmSize(const HWLM *h) {
index f2691496eee4dc8e0e0e67f8e7fe325ecb5d86b9..4aefc3640d3addc295554e91328ae243b9fe449d 100644 (file)
 #define HWLM_BUILD_H
 
 #include "hwlm.h"
+#include "hwlm_literal.h"
 #include "ue2common.h"
 #include "util/bytecode_ptr.h"
 
+#include <map>
 #include <memory>
 #include <vector>
 
@@ -44,15 +46,62 @@ struct HWLM;
 
 namespace ue2 {
 
+class FDREngineDescription;
+class TeddyEngineDescription;
 struct CompileContext;
 struct Grey;
-struct hwlmLiteral;
+
+/** \brief Class representing a literal matcher prototype. */
+struct HWLMProto {
+    /**
+     * \brief Engine type to distinguish noodle from FDR and Teddy.
+     */
+    u8 engType;
+
+    /**
+     * \brief FDR engine description.
+     */
+    std::unique_ptr<FDREngineDescription> fdrEng;
+
+    /**
+     * \brief Teddy engine description.
+     */
+    std::unique_ptr<TeddyEngineDescription> teddyEng;
+
+     /**
+      * \brief HWLM literals passed from Rose.
+      */
+    std::vector<hwlmLiteral> lits;
+
+    /**
+     * \brief Bucket assignment info in FDR and Teddy
+     */
+    std::map<u32, std::vector<u32>> bucketToLits;
+
+    /**
+     * \brief Flag to optimise matcher for small size from Rose.
+     */
+    bool make_small;
+
+    HWLMProto(u8 engType_in, std::vector<hwlmLiteral> lits_in);
+
+    HWLMProto(u8 engType_in, std::unique_ptr<FDREngineDescription> eng_in,
+              std::vector<hwlmLiteral> lits_in,
+              std::map<u32, std::vector<u32>> bucketToLits_in,
+              bool make_small_in);
+
+    HWLMProto(u8 engType_in, std::unique_ptr<TeddyEngineDescription> eng_in,
+              std::vector<hwlmLiteral> lits_in,
+              std::map<u32, std::vector<u32>> bucketToLits_in,
+              bool make_small_in);
+
+    ~HWLMProto();
+};
 
 /** \brief Build an \ref HWLM literal matcher runtime structure for a group of
  * literals.
  *
- * \param lits The group of literals.
- * \param make_small Optimise matcher for small size.
+ * \param proto Literal matcher prototype.
  * \param cc Compile context.
  * \param expected_groups FIXME: document me!
  *
@@ -60,10 +109,13 @@ struct hwlmLiteral;
  * may result in a nullptr return value, or a std::bad_alloc exception being
  * thrown.
  */
-bytecode_ptr<HWLM> hwlmBuild(const std::vector<hwlmLiteral> &lits,
-                             bool make_small, const CompileContext &cc,
+bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
                              hwlm_group_t expected_groups = HWLM_ALL_GROUPS);
 
+std::unique_ptr<HWLMProto>
+hwlmBuildProto(std::vector<hwlmLiteral> &lits, bool make_small,
+               const CompileContext &cc);
+
 /**
  * Returns an estimate of the number of repeated characters on the end of a
  * literal that will make a literal set of size \a numLiterals suffer
index 9ae7744de7f145e8ac874310a0085fd33a4265ab..08510fb0f487298812d1ab16ad69b3e90a27fa88 100644 (file)
@@ -45,6 +45,8 @@ namespace ue2 {
 /** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
 #define HWLM_MASKLEN 8
 
+#define INVALID_LIT_ID ~0U
+
 /** \brief Class representing a literal, fed to \ref hwlmBuild. */
 struct hwlmLiteral {
     std::string s; //!< \brief The literal itself.
@@ -64,6 +66,21 @@ struct hwlmLiteral {
      * can be quashed by the literal matcher. */
     bool noruns;
 
+    /** \brief included literal id. */
+    u32 included_id = INVALID_LIT_ID;
+
+    /** \brief Squash mask for FDR's confirm mask for included literals.
+     *
+     * In FDR confirm, if we have included literal in another bucket,
+     * we can use this mask to squash the bit for the bucket in FDR confirm
+     * mask and then run programs of included literal directly and avoid
+     * confirm work.
+     *
+     * This value is calculated in FDR compile code once bucket assignment is
+     * completed
+     */
+    u8 squash = 0;
+
     /** \brief Set of groups that literal belongs to.
      *
      * Use \ref HWLM_ALL_GROUPS for a literal that could match regardless of
index 83a34a396d9e7d5e253bc520d86b1466a7f95ca8..ab0934de55239d1bc6791b7c2cddbc651e4a0d31 100644 (file)
@@ -2570,6 +2570,23 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
                 }
             }
             PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(INCLUDED_JUMP) {
+                if (scratch->fdr_conf) {
+                    // squash the bucket of included literal
+                    u8 shift = scratch->fdr_conf_offset & ~7U;
+                    u64a mask = ((~(u64a)ri->squash) << shift);
+                    *(scratch->fdr_conf) &= mask;
+
+                    pc = getByOffset(t, ri->child_offset);
+                    pc_base = pc;
+                    programOffset = (const u8 *)pc_base -(const u8 *)t;
+                    DEBUG_PRINTF("pc_base %p pc %p child_offset %u\n",
+                                 pc_base, pc, ri->child_offset);
+                    continue;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
         }
     }
 
index 4d0793bfeab6d4d45e4419ec0ae5a7317d81553d..a41f03229855a05b944880372e419ce906d5408f 100644 (file)
@@ -49,6 +49,7 @@
 #include "rose_internal.h"
 #include "rose_program.h"
 #include "hwlm/hwlm.h" /* engine types */
+#include "hwlm/hwlm_build.h"
 #include "hwlm/hwlm_literal.h"
 #include "nfa/castlecompile.h"
 #include "nfa/goughcompile.h"
@@ -2803,7 +2804,7 @@ vector<LitFragment> groupByFragment(const RoseBuildImpl &build) {
         auto groups = info.group_mask;
 
         if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) {
-            fragments.emplace_back(frag_id, groups, lit_id);
+            fragments.emplace_back(frag_id, lit.s, groups, lit_id);
             frag_id++;
             continue;
         }
@@ -2816,10 +2817,11 @@ vector<LitFragment> groupByFragment(const RoseBuildImpl &build) {
     }
 
     for (auto &m : frag_info) {
+        auto &lit = m.first;
         auto &fi = m.second;
         DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(),
                      as_string_list(fi.lit_ids).c_str());
-        fragments.emplace_back(frag_id, fi.groups, move(fi.lit_ids));
+        fragments.emplace_back(frag_id, lit.s, fi.groups, move(fi.lit_ids));
         frag_id++;
         assert(frag_id == fragments.size());
     }
@@ -2827,23 +2829,100 @@ vector<LitFragment> groupByFragment(const RoseBuildImpl &build) {
     return fragments;
 }
 
-/**
- * \brief Build the interpreter programs for each literal.
- */
 static
-void buildLiteralPrograms(const RoseBuildImpl &build,
-                          vector<LitFragment> &fragments, build_context &bc,
-                          ProgramBuild &prog_build) {
-    DEBUG_PRINTF("%zu fragments\n", fragments.size());
-    auto lit_edge_map = findEdgesByLiteral(build);
+void buildIncludedIdMap(unordered_map<u32, pair<u32, u8>> &includedIdMap,
+                        const LitProto *litProto) {
+    if (!litProto) {
+        return;
+    }
+    const auto &proto = *litProto->hwlmProto;
+    for (const auto &lit : proto.lits) {
+        if (lit.included_id != INVALID_LIT_ID) {
+            includedIdMap[lit.id] = make_pair(lit.included_id, lit.squash);
+        }
+    }
+}
 
-    for (auto &frag : fragments) {
-        DEBUG_PRINTF("frag_id=%u, lit_ids=[%s]\n", frag.fragment_id,
-                     as_string_list(frag.lit_ids).c_str());
+static
+void findInclusionGroups(vector<LitFragment> &fragments,
+                         LitProto *fproto, LitProto *drproto,
+                         LitProto *eproto, LitProto *sbproto) {
+    unordered_map<u32, pair<u32, u8>> includedIdMap;
+    unordered_map<u32, pair<u32, u8>> includedDelayIdMap;
+    buildIncludedIdMap(includedIdMap, fproto);
+    buildIncludedIdMap(includedDelayIdMap, drproto);
+    buildIncludedIdMap(includedIdMap, eproto);
+    buildIncludedIdMap(includedIdMap, sbproto);
 
-        auto lit_prog = makeFragmentProgram(build, bc, prog_build, frag.lit_ids,
-                                            lit_edge_map);
-        frag.lit_program_offset = writeProgram(bc, move(lit_prog));
+    size_t fragNum = fragments.size();
+    vector<u32> candidates;
+    for (size_t j = 0; j < fragNum; j++) {
+        DEBUG_PRINTF("frag id %lu\n", j);
+        u32 id = j;
+        if (contains(includedIdMap, id) ||
+            contains(includedDelayIdMap, id)) {
+            candidates.push_back(j);
+            DEBUG_PRINTF("find candidate\n");
+        }
+    }
+
+    for (const auto &c : candidates) {
+        auto &frag = fragments[c];
+        u32 id = c;
+        if (contains(includedIdMap, id)) {
+            const auto &childId = includedIdMap[id];
+            frag.included_frag_id = childId.first;
+            frag.squash = childId.second;
+            DEBUG_PRINTF("frag id %u child frag id %u\n", c,
+                         frag.included_frag_id);
+        }
+
+        if (contains(includedDelayIdMap, id)) {
+            const auto &childId = includedDelayIdMap[id];
+            frag.included_delay_frag_id = childId.first;
+            frag.delay_squash = childId.second;
+
+            DEBUG_PRINTF("delay frag id %u child frag id %u\n", c,
+                             frag.included_delay_frag_id);
+        }
+    }
+}
+
+static
+void buildFragmentPrograms(const RoseBuildImpl &build,
+                           vector<LitFragment> &fragments,
+                           build_context &bc, ProgramBuild &prog_build,
+                           const map<u32, vector<RoseEdge>> &lit_edge_map) {
+    // Sort fragments based on literal length and case info to build
+    // included literal programs before their parent programs.
+    vector<LitFragment> ordered_fragments(fragments);
+    stable_sort(begin(ordered_fragments), end(ordered_fragments),
+         [](const LitFragment &a, const LitFragment &b) {
+             auto len1 = a.s.length();
+             auto caseful1 = !a.s.any_nocase();
+             auto len2 = b.s.length();
+             auto caseful2 = !b.s.any_nocase();
+             return tie(len1, caseful1) < tie(len2, caseful2);
+         });
+
+    for (auto &frag : ordered_fragments) {
+        auto &pfrag = fragments[frag.fragment_id];
+        DEBUG_PRINTF("frag_id=%u, lit_ids=[%s]\n", pfrag.fragment_id,
+                     as_string_list(pfrag.lit_ids).c_str());
+
+        auto lit_prog = makeFragmentProgram(build, bc, prog_build,
+                                            pfrag.lit_ids, lit_edge_map);
+        if (pfrag.included_frag_id != INVALID_FRAG_ID &&
+            !lit_prog.empty()) {
+            auto &cfrag = fragments[pfrag.included_frag_id];
+            assert(pfrag.s.length() >= cfrag.s.length() &&
+                   !pfrag.s.any_nocase() >= !cfrag.s.any_nocase());
+            u32 child_offset = cfrag.lit_program_offset;
+            DEBUG_PRINTF("child %u offset %u\n", cfrag.fragment_id,
+                         child_offset);
+            addIncludedJumpProgram(lit_prog, child_offset, pfrag.squash);
+        }
+        pfrag.lit_program_offset = writeProgram(bc, move(lit_prog));
 
         // We only do delayed rebuild in streaming mode.
         if (!build.cc.streaming) {
@@ -2851,11 +2930,82 @@ void buildLiteralPrograms(const RoseBuildImpl &build,
         }
 
         auto rebuild_prog = makeDelayRebuildProgram(build, prog_build,
-                                                    frag.lit_ids);
-        frag.delay_program_offset = writeProgram(bc, move(rebuild_prog));
+                                                    pfrag.lit_ids);
+        if (pfrag.included_delay_frag_id != INVALID_FRAG_ID &&
+            !rebuild_prog.empty()) {
+            auto &cfrag = fragments[pfrag.included_delay_frag_id];
+            assert(pfrag.s.length() >= cfrag.s.length() &&
+                   !pfrag.s.any_nocase() >= !cfrag.s.any_nocase());
+            u32 child_offset = cfrag.delay_program_offset;
+            DEBUG_PRINTF("child %u offset %u\n", cfrag.fragment_id,
+                         child_offset);
+            addIncludedJumpProgram(rebuild_prog, child_offset,
+                                   pfrag.delay_squash);
+        }
+        pfrag.delay_program_offset = writeProgram(bc, move(rebuild_prog));
+    }
+}
+
+static
+void updateLitProtoProgramOffset(vector<LitFragment> &fragments,
+                                 LitProto &litProto, bool delay) {
+    auto &proto = *litProto.hwlmProto;
+    for (auto &lit : proto.lits) {
+        auto fragId = lit.id;
+        auto &frag = fragments[fragId];
+        if (delay) {
+            DEBUG_PRINTF("delay_program_offset:%u\n",
+                         frag.delay_program_offset);
+            lit.id = frag.delay_program_offset;
+        } else {
+            DEBUG_PRINTF("lit_program_offset:%u\n",
+                         frag.lit_program_offset);
+            lit.id = frag.lit_program_offset;
+        }
     }
 }
 
+static
+void updateLitProgramOffset(vector<LitFragment> &fragments,
+                            LitProto *fproto, LitProto *drproto,
+                            LitProto *eproto, LitProto *sbproto) {
+    if (fproto) {
+        updateLitProtoProgramOffset(fragments, *fproto, false);
+    }
+
+    if (drproto) {
+        updateLitProtoProgramOffset(fragments, *drproto, true);
+    }
+
+    if (eproto) {
+        updateLitProtoProgramOffset(fragments, *eproto, false);
+    }
+
+    if (sbproto) {
+        updateLitProtoProgramOffset(fragments, *sbproto, false);
+    }
+}
+
+/**
+ * \brief Build the interpreter programs for each literal.
+ */
+static
+void buildLiteralPrograms(const RoseBuildImpl &build,
+                          vector<LitFragment> &fragments, build_context &bc,
+                          ProgramBuild &prog_build, LitProto *fproto,
+                          LitProto *drproto, LitProto *eproto,
+                          LitProto *sbproto) {
+    DEBUG_PRINTF("%zu fragments\n", fragments.size());
+    auto lit_edge_map = findEdgesByLiteral(build);
+
+    findInclusionGroups(fragments, fproto, drproto, eproto, sbproto);
+
+    buildFragmentPrograms(build, fragments, bc, prog_build, lit_edge_map);
+
+    // update literal program offsets for literal matcher prototypes
+    updateLitProgramOffset(fragments, fproto, drproto, eproto, sbproto);
+}
+
 /**
  * \brief Write delay replay programs to the bytecode.
  *
@@ -3470,7 +3620,24 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     tie(proto.delayProgramOffset, proto.delay_count) =
         writeDelayPrograms(*this, fragments, bc, prog_build);
 
-    buildLiteralPrograms(*this, fragments, bc, prog_build);
+    // Build floating HWLM matcher prototype.
+    rose_group fgroups = 0;
+    auto fproto = buildFloatingMatcherProto(*this, fragments,
+                                            longLitLengthThreshold,
+                                            &fgroups, &historyRequired);
+
+    // Build delay rebuild HWLM matcher prototype.
+    auto drproto = buildDelayRebuildMatcherProto(*this, fragments,
+                                                 longLitLengthThreshold);
+
+    // Build EOD-anchored HWLM matcher prototype.
+    auto eproto = buildEodAnchoredMatcherProto(*this, fragments);
+
+    // Build small-block HWLM matcher prototype.
+    auto sbproto = buildSmallBlockMatcherProto(*this, fragments);
+
+    buildLiteralPrograms(*this, fragments, bc, prog_build, fproto.get(),
+                         drproto.get(), eproto.get(), sbproto.get());
 
     auto eod_prog = makeEodProgram(*this, bc, prog_build, eodNfaIterOffset);
     proto.eodProgramOffset = writeProgram(bc, move(eod_prog));
@@ -3497,29 +3664,26 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     }
 
     // Build floating HWLM matcher.
-    rose_group fgroups = 0;
-    auto ftable = buildFloatingMatcher(*this, fragments, longLitLengthThreshold,
-                                       &fgroups, &historyRequired);
+    auto ftable = buildHWLMMatcher(*this, fproto.get());
     if (ftable) {
         proto.fmatcherOffset = bc.engine_blob.add(ftable);
         bc.resources.has_floating = true;
     }
 
     // Build delay rebuild HWLM matcher.
-    auto drtable = buildDelayRebuildMatcher(*this, fragments,
-                                            longLitLengthThreshold);
+    auto drtable = buildHWLMMatcher(*this, drproto.get());
     if (drtable) {
         proto.drmatcherOffset = bc.engine_blob.add(drtable);
     }
 
     // Build EOD-anchored HWLM matcher.
-    auto etable = buildEodAnchoredMatcher(*this, fragments);
+    auto etable = buildHWLMMatcher(*this, eproto.get());
     if (etable) {
         proto.ematcherOffset = bc.engine_blob.add(etable);
     }
 
     // Build small-block HWLM matcher.
-    auto sbtable = buildSmallBlockMatcher(*this, fragments);
+    auto sbtable = buildHWLMMatcher(*this, sbproto.get());
     if (sbtable) {
         proto.sbmatcherOffset = bc.engine_blob.add(sbtable);
     }
index 5e9f95f268af8d247b0eecdf5a45a0bec9a873db..e98308acfc87b146b60f7bffd48f0509ffa542aa 100644 (file)
@@ -1463,6 +1463,12 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(INCLUDED_JUMP) {
+                os << "    child_offset " << ri->child_offset << endl;
+                os << "    squash " << ri->squash << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
         default:
             os << "  UNKNOWN (code " << int{code} << ")" << endl;
             os << "  <stopping>" << endl;
index b00c36be685b537541d7bc1a5ccf28fafe1af1c1..8af08298451f17232148c9bab92b34ebe8c2e276 100644 (file)
@@ -636,4 +636,12 @@ void RoseInstrCheckMultipathShufti64::write(void *dest, RoseEngineBlob &blob,
     inst->fail_jump = calc_jump(offset_map, this, target);
 }
 
+void RoseInstrIncludedJump::write(void *dest, RoseEngineBlob &blob,
+                                  const OffsetMap &offset_map) const {
+    RoseInstrBase::write(dest, blob, offset_map);
+    auto *inst = static_cast<impl_type *>(dest);
+    inst->child_offset = child_offset;
+    inst->squash = squash;
+}
+
 }
index 025f6a671861959625c8090c7f42a667e79f6588..3bc3266b45812238ff60809e8e3b1968728fb41b 100644 (file)
@@ -2121,6 +2121,34 @@ public:
     }
 };
 
+class RoseInstrIncludedJump
+    : public RoseInstrBaseNoTargets<ROSE_INSTR_INCLUDED_JUMP,
+                                    ROSE_STRUCT_INCLUDED_JUMP,
+                                    RoseInstrIncludedJump> {
+public:
+    u32 child_offset;
+    u8 squash;
+
+    RoseInstrIncludedJump(u32 child_offset_in, u8 squash_in)
+        : child_offset(child_offset_in), squash(squash_in) {}
+
+    bool operator==(const RoseInstrIncludedJump &ri) const {
+        return child_offset == ri.child_offset && squash == ri.squash;
+    }
+
+    size_t hash() const override {
+        return hash_all(static_cast<int>(opcode), child_offset, squash);
+    }
+
+    void write(void *dest, RoseEngineBlob &blob,
+               const OffsetMap &offset_map) const override;
+
+    bool equiv_to(const RoseInstrIncludedJump &ri, const OffsetMap &,
+                  const OffsetMap &) const {
+        return child_offset == ri.child_offset && squash == ri.squash;
+    }
+};
+
 class RoseInstrEnd
     : public RoseInstrBaseTrivial<ROSE_INSTR_END, ROSE_STRUCT_END,
                                   RoseInstrEnd> {
index 57269747a132553bcda89686ee0b4d85c0abdfae..2c302a859acf1aa713fcd3734c3d6a46980d54ef 100644 (file)
@@ -46,6 +46,7 @@
 #include "util/compile_context.h"
 #include "util/compile_error.h"
 #include "util/dump_charclass.h"
+#include "util/make_unique.h"
 #include "util/report.h"
 #include "util/report_manager.h"
 #include "util/verify_types.h"
@@ -699,8 +700,7 @@ struct MatcherProto {
 
 static
 void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp,
-                        const LitFragment &f, u32 id, bool delay_rebuild,
-                        size_t max_len) {
+                        const LitFragment &f, u32 id, size_t max_len) {
     const rose_literal_id &lit = build.literals.at(id);
 
     DEBUG_PRINTF("lit='%s' (len %zu)\n", dumpString(lit.s).c_str(),
@@ -737,12 +737,10 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp,
         return;
     }
 
-    u32 prog_offset =
-        delay_rebuild ? f.delay_program_offset : f.lit_program_offset;
     const auto &groups = f.groups;
 
-    mp.lits.emplace_back(move(s_final), nocase, noruns, prog_offset, groups,
-                         msk, cmp);
+    mp.lits.emplace_back(move(s_final), nocase, noruns, f.fragment_id,
+                         groups, msk, cmp);
 }
 
 static
@@ -837,8 +835,7 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build,
         }
 
         // Build our fragment (for the HWLM matcher) from the first literal.
-        addFragmentLiteral(build, mp, f, used_lit_ids.front(), delay_rebuild,
-                           max_len);
+        addFragmentLiteral(build, mp, f, used_lit_ids.front(), max_len);
 
         for (u32 id : used_lit_ids) {
             const rose_literal_id &lit = build.literals.at(id);
@@ -876,8 +873,8 @@ void MatcherProto::insert(const MatcherProto &a) {
 }
 
 static
-void buildAccel(const RoseBuildImpl &build, const MatcherProto &mp,
-                HWLM &hwlm) {
+void buildAccel(const RoseBuildImpl &build,
+                const vector<AccelString> &accel_lits, HWLM &hwlm) {
     if (!build.cc.grey.hamsterAccelForward) {
         return;
     }
@@ -886,49 +883,68 @@ void buildAccel(const RoseBuildImpl &build, const MatcherProto &mp,
         return;
     }
 
-    buildForwardAccel(&hwlm, mp.accel_lits, build.getInitialGroups());
+    buildForwardAccel(&hwlm, accel_lits, build.getInitialGroups());
 }
 
-bytecode_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
-                                        const vector<LitFragment> &fragments,
-                                        size_t longLitLengthThreshold,
-                                        rose_group *fgroups,
-                                        size_t *historyRequired) {
-    *fgroups = 0;
-
-    auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, false,
-                               longLitLengthThreshold);
-    if (mp.lits.empty()) {
-        DEBUG_PRINTF("empty floating matcher\n");
+bytecode_ptr<HWLM>
+buildHWLMMatcher(const RoseBuildImpl &build, LitProto *litProto) {
+    if (!litProto) {
         return nullptr;
     }
-    dumpMatcherLiterals(mp.lits, "floating", build.cc.grey);
-
-    for (const hwlmLiteral &lit : mp.lits) {
-        *fgroups |= lit.groups;
-    }
-
-    auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups());
+    auto hwlm = hwlmBuild(*litProto->hwlmProto, build.cc,
+                          build.getInitialGroups());
     if (!hwlm) {
         throw CompileError("Unable to generate bytecode.");
     }
 
-    buildAccel(build, mp, *hwlm);
-
-    if (build.cc.streaming) {
-        DEBUG_PRINTF("history_required=%zu\n", mp.history_required);
-        assert(mp.history_required <= build.cc.grey.maxHistoryAvailable);
-        *historyRequired = max(*historyRequired, mp.history_required);
-    }
+    buildAccel(build, litProto->accel_lits, *hwlm);
 
-    DEBUG_PRINTF("built floating literal table size %zu bytes\n", hwlm.size());
+    DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n",
+                 hwlm.size());
     return hwlm;
 }
 
-bytecode_ptr<HWLM>
-buildDelayRebuildMatcher(const RoseBuildImpl &build,
-                         const vector<LitFragment> &fragments,
-                         size_t longLitLengthThreshold) {
+unique_ptr<LitProto>
+buildFloatingMatcherProto(const RoseBuildImpl &build,
+                          const vector<LitFragment> &fragments,
+                          size_t longLitLengthThreshold,
+                          rose_group *fgroups,
+                          size_t *historyRequired) {
+    DEBUG_PRINTF("Floating literal matcher\n");
+    *fgroups = 0;
+
+     auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, false,
+                                           longLitLengthThreshold);
+     if (mp.lits.empty()) {
+         DEBUG_PRINTF("empty floating matcher\n");
+         return nullptr;
+     }
+     dumpMatcherLiterals(mp.lits, "floating", build.cc.grey);
+
+     for (const hwlmLiteral &lit : mp.lits) {
+         *fgroups |= lit.groups;
+     }
+
+     if (build.cc.streaming) {
+         DEBUG_PRINTF("history_required=%zu\n", mp.history_required);
+         assert(mp.history_required <= build.cc.grey.maxHistoryAvailable);
+         *historyRequired = max(*historyRequired, mp.history_required);
+     }
+
+     auto proto = hwlmBuildProto(mp.lits, false, build.cc);
+
+     if (!proto) {
+        throw CompileError("Unable to generate literal matcher proto.");
+     }
+
+     return ue2::make_unique<LitProto>(move(proto), mp.accel_lits);
+}
+
+unique_ptr<LitProto>
+buildDelayRebuildMatcherProto(const RoseBuildImpl &build,
+                              const vector<LitFragment> &fragments,
+                              size_t longLitLengthThreshold) {
+    DEBUG_PRINTF("Delay literal matcher\n");
     if (!build.cc.streaming) {
         DEBUG_PRINTF("not streaming\n");
         return nullptr;
@@ -942,20 +958,20 @@ buildDelayRebuildMatcher(const RoseBuildImpl &build,
     }
     dumpMatcherLiterals(mp.lits, "delay_rebuild", build.cc.grey);
 
-    auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups());
-    if (!hwlm) {
-        throw CompileError("Unable to generate bytecode.");
-    }
 
-    buildAccel(build, mp, *hwlm);
+    auto proto = hwlmBuildProto(mp.lits, false, build.cc);
 
-    DEBUG_PRINTF("built delay rebuild table size %zu bytes\n", hwlm.size());
-    return hwlm;
+    if (!proto) {
+        throw CompileError("Unable to generate literal matcher proto.");
+    }
+
+    return ue2::make_unique<LitProto>(move(proto), mp.accel_lits);
 }
 
-bytecode_ptr<HWLM>
-buildSmallBlockMatcher(const RoseBuildImpl &build,
-                       const vector<LitFragment> &fragments) {
+unique_ptr<LitProto>
+buildSmallBlockMatcherProto(const RoseBuildImpl &build,
+                            const vector<LitFragment> &fragments) {
+    DEBUG_PRINTF("Small block literal matcher\n");
     if (build.cc.streaming) {
         DEBUG_PRINTF("streaming mode\n");
         return nullptr;
@@ -1000,21 +1016,19 @@ buildSmallBlockMatcher(const RoseBuildImpl &build,
         return nullptr;
     }
 
-    auto hwlm = hwlmBuild(mp.lits, true, build.cc, build.getInitialGroups());
-    if (!hwlm) {
-        throw CompileError("Unable to generate bytecode.");
-    }
+    auto proto = hwlmBuildProto(mp.lits, false, build.cc);
 
-    buildAccel(build, mp, *hwlm);
+    if (!proto) {
+        throw CompileError("Unable to generate literal matcher proto.");
+    }
 
-    DEBUG_PRINTF("built small block literal table size %zu bytes\n",
-                 hwlm.size());
-    return hwlm;
+    return ue2::make_unique<LitProto>(move(proto), mp.accel_lits);
 }
 
-bytecode_ptr<HWLM>
-buildEodAnchoredMatcher(const RoseBuildImpl &build,
-                        const vector<LitFragment> &fragments) {
+unique_ptr<LitProto>
+buildEodAnchoredMatcherProto(const RoseBuildImpl &build,
+                             const vector<LitFragment> &fragments) {
+    DEBUG_PRINTF("Eod anchored literal matcher\n");
     auto mp = makeMatcherProto(build, fragments, ROSE_EOD_ANCHORED, false,
                                build.ematcher_region_size);
 
@@ -1027,16 +1041,13 @@ buildEodAnchoredMatcher(const RoseBuildImpl &build,
 
     assert(build.ematcher_region_size);
 
-    auto hwlm = hwlmBuild(mp.lits, true, build.cc, build.getInitialGroups());
-    if (!hwlm) {
-        throw CompileError("Unable to generate bytecode.");
-    }
+    auto proto = hwlmBuildProto(mp.lits, false, build.cc);
 
-    buildAccel(build, mp, *hwlm);
+    if (!proto) {
+        throw CompileError("Unable to generate literal matcher proto.");
+    }
 
-    DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n",
-                 hwlm.size());
-    return hwlm;
+    return ue2::make_unique<LitProto>(move(proto), mp.accel_lits);
 }
 
 } // namespace ue2
index 2b1afc8c6bd934df5a3da49fe55dd140cb0ac4a5..9668ebc93233b231fe9a531a39e5b3c826322592 100644 (file)
 #define ROSE_BUILD_MATCHERS_H
 
 #include "rose_build_impl.h"
+#include "rose_build_lit_accel.h"
+#include "hwlm/hwlm_build.h"
 #include "util/bytecode_ptr.h"
+#include "util/ue2string.h"
 
 #include <vector>
 
@@ -44,38 +47,80 @@ struct HWLM;
 
 namespace ue2 {
 
+static constexpr u32 INVALID_FRAG_ID = ~0U;
+
 struct LitFragment {
-    LitFragment(u32 fragment_id_in, rose_group groups_in, u32 lit_id)
-    : fragment_id(fragment_id_in), groups(groups_in), lit_ids({lit_id}) {}
-    LitFragment(u32 fragment_id_in, rose_group groups_in,
-                std::vector<u32> lit_ids_in)
-    : fragment_id(fragment_id_in), groups(groups_in),
-        lit_ids(std::move(lit_ids_in)) {}
+    LitFragment(u32 fragment_id_in, ue2_literal s_in,
+                rose_group groups_in, u32 lit_id)
+    : fragment_id(fragment_id_in), s(s_in), groups(groups_in),
+      lit_ids({lit_id}) {}
+    LitFragment(u32 fragment_id_in, ue2_literal s_in,
+                rose_group groups_in, std::vector<u32> lit_ids_in)
+    : fragment_id(fragment_id_in), s(s_in), groups(groups_in),
+      lit_ids(std::move(lit_ids_in)) {}
     u32 fragment_id;
+
+    /**
+     * \brief literal fragment.
+     */
+    ue2_literal s;
+
+    /**
+     * \brief FDR confirm squash mask for included literals.
+     */
+    u8 squash;
+
+    /**
+     * \brief FDR confirm squash mask for included literals (Delayed
+     * literals only).
+     */
+    u8 delay_squash;
+
+    /**
+     * \brief Fragment id of included literal.
+     */
+    u32 included_frag_id = INVALID_FRAG_ID;
+
+    /**
+     * \brief Fragment Id of included literal (Delayed literals only).
+     */
+    u32 included_delay_frag_id = INVALID_FRAG_ID;
     rose_group groups;
     std::vector<u32> lit_ids;
     u32 lit_program_offset = ROSE_INVALID_PROG_OFFSET;
     u32 delay_program_offset = ROSE_INVALID_PROG_OFFSET;
 };
 
-bytecode_ptr<HWLM>
-buildFloatingMatcher(const RoseBuildImpl &build,
-                     const std::vector<LitFragment> &fragments,
-                     size_t longLitLengthThreshold, rose_group *fgroups,
-                     size_t *historyRequired);
+struct LitProto {
+    LitProto(std::unique_ptr<HWLMProto> hwlmProto_in,
+             std::vector<AccelString> &accel_lits_in)
+    : hwlmProto(std::move(hwlmProto_in)), accel_lits(accel_lits_in) {}
 
-bytecode_ptr<HWLM>
-buildDelayRebuildMatcher(const RoseBuildImpl &build,
-                         const std::vector<LitFragment> &fragments,
-                         size_t longLitLengthThreshold);
+    std::unique_ptr<HWLMProto> hwlmProto;
+    std::vector<AccelString> accel_lits;
+};
 
 bytecode_ptr<HWLM>
-buildSmallBlockMatcher(const RoseBuildImpl &build,
-                       const std::vector<LitFragment> &fragments);
+buildHWLMMatcher(const RoseBuildImpl &build, LitProto *proto);
 
-bytecode_ptr<HWLM>
-buildEodAnchoredMatcher(const RoseBuildImpl &build,
-                        const std::vector<LitFragment> &fragments);
+std::unique_ptr<LitProto>
+buildFloatingMatcherProto(const RoseBuildImpl &build,
+                          const std::vector<LitFragment> &fragments,
+                          size_t longLitLengthThreshold,
+                          rose_group *fgroups,
+                          size_t *historyRequired);
+
+std::unique_ptr<LitProto>
+buildDelayRebuildMatcherProto(const RoseBuildImpl &build,
+                              const std::vector<LitFragment> &fragments,
+                              size_t longLitLengthThreshold);
+std::unique_ptr<LitProto>
+buildSmallBlockMatcherProto(const RoseBuildImpl &build,
+                            const std::vector<LitFragment> &fragments);
+
+std::unique_ptr<LitProto>
+buildEodAnchoredMatcherProto(const RoseBuildImpl &build,
+                             const std::vector<LitFragment> &fragments);
 
 void findMoreLiteralMasks(RoseBuildImpl &build);
 
index 562ddb20917eed361701ea3171d826fd898ee47c..01bd7c546bc6793b7d6c4af1d21a1a3039c82907 100644 (file)
@@ -2164,6 +2164,14 @@ RoseProgram makeBoundaryProgram(const RoseBuildImpl &build,
     return prog;
 }
 
+void addIncludedJumpProgram(RoseProgram &program, u32 child_offset,
+                            u8 squash) {
+    RoseProgram block;
+    block.add_before_end(make_unique<RoseInstrIncludedJump>(child_offset,
+                                                            squash));
+    program.add_block(move(block));
+}
+
 static
 void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block,
                         RoseProgram &program) {
index 8758ef64a02f38dd3ae2c65c40cc64baec5c53c2..afbaa36e551f262a01ef4e990f0b19ec1cb53627 100644 (file)
@@ -282,6 +282,7 @@ void recordLongLiterals(std::vector<ue2_case_string> &longLiterals,
 
 void recordResources(RoseResources &resources, const RoseProgram &program);
 
+void addIncludedJumpProgram(RoseProgram &program, u32 child_offset, u8 squash);
 } // namespace ue2
 
 #endif // ROSE_BUILD_PROGRAM_H
index 78b123d5c30f2230f76a94d75fafbef92bafeeb9..eeebfed1c7e82ab91a033bbfd8c0cf20ef0ea25b 100644 (file)
@@ -178,7 +178,12 @@ enum RoseInstructionCode {
      */
     ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64,
 
-    LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64 //!< Sentinel.
+    /**
+     * \brief Jump to the program of included literal.
+     */
+    ROSE_INSTR_INCLUDED_JUMP,
+
+    LAST_ROSE_INSTRUCTION = ROSE_INSTR_INCLUDED_JUMP //!< Sentinel.
 };
 
 struct ROSE_STRUCT_END {
@@ -625,4 +630,10 @@ struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 {
     s32 last_start; //!< The latest start offset among 8 paths.
     u32 fail_jump; //!< Jump forward this many bytes on failure.
 };
+
+struct ROSE_STRUCT_INCLUDED_JUMP {
+    u8 code; //!< From enum RoseInstructionCode.
+    u8 squash; //!< FDR confirm squash mask for included literal.
+    u32 child_offset; //!< Program offset of included literal.
+};
 #endif // ROSE_ROSE_PROGRAM_H
index 84d23cedd5218e31e082e6f92d3548d8962c7a7b..8e082c772cf42027fd691431d31593fa512b6150 100644 (file)
@@ -136,6 +136,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
     s->in_use = 0;
     s->scratchSize = alloc_size;
     s->scratch_alloc = (char *)s_tmp;
+    s->fdr_conf = NULL;
 
     // each of these is at an offset from the previous
     char *current = (char *)s + sizeof(*s);
index 1d4b849ed7ef41be4f1a86e4767d0b0f715e161b..fa998e8490459baaee5c111e6f96f8480e981d2d 100644 (file)
@@ -200,6 +200,9 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
     u32 delay_fatbit_size; /**< size of each delay fatbit in bytes */
     u32 scratchSize;
     char *scratch_alloc; /* user allocated scratch object */
+    u64a *fdr_conf; /**< FDR confirm value */
+    u8 fdr_conf_offset; /**< offset where FDR/Teddy front end matches
+                         * in buffer */
 };
 
 /* array of fatbit ptr; TODO: why not an array of fatbits? */
index 399147e2bd03fb2a471797bc7ac114cadccf552a..87ab097475ef2b8f0bffdb42d179290132a76f9d 100644 (file)
@@ -36,6 +36,7 @@
 #include "fdr/fdr_engine_description.h"
 #include "fdr/teddy_compile.h"
 #include "fdr/teddy_engine_description.h"
+#include "hwlm/hwlm_internal.h"
 #include "util/alloc.h"
 
 #include "database.h"
@@ -135,6 +136,31 @@ vector<u32> getValidFdrEngines() {
     return ret;
 }
 
+
+static
+bytecode_ptr<FDR> buildFDREngineHinted(std::vector<hwlmLiteral> &lits,
+                                       bool make_small, u32 hint,
+                                       const target_t &target,
+                                       const Grey &grey) {
+    auto proto = fdrBuildProtoHinted(HWLM_ENGINE_FDR, lits, make_small, hint,
+                                     target, grey);
+    if (!proto) {
+        return nullptr;
+    }
+    return fdrBuildTable(*proto, grey);
+}
+
+static
+bytecode_ptr<FDR> buildFDREngine(std::vector<hwlmLiteral> &lits,
+                                 bool make_small, const target_t &target,
+                                 const Grey &grey) {
+    auto proto = fdrBuildProto(HWLM_ENGINE_FDR, lits, make_small, target, grey);
+    if (!proto) {
+        return nullptr;
+    }
+    return fdrBuildTable(*proto, grey);
+}
+
 class FDRp : public TestWithParam<u32> {
 };
 
@@ -147,10 +173,12 @@ TEST_P(FDRp, Simple) {
     vector<hwlmLiteral> lits;
     lits.push_back(hwlmLiteral("mnopqr", 0, 0));
 
-    auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
+    auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(),
+                                    Grey());
     CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
     struct hs_scratch scratch;
+    scratch.fdr_conf = NULL;
     fdrExec(fdr.get(), (const u8 *)data, sizeof(data), 0, decentCallback,
             &scratch, HWLM_ALL_GROUPS);
 
@@ -170,10 +198,12 @@ TEST_P(FDRp, SimpleSingle) {
     vector<hwlmLiteral> lits;
     lits.push_back(hwlmLiteral("m", 0, 0));
 
-    auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
+    auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(),
+                                    Grey());
     CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
     struct hs_scratch scratch;
+    scratch.fdr_conf = NULL;
     fdrExec(fdr.get(), (const u8 *)data, sizeof(data) - 1 /* skip nul */, 0,
             decentCallback, &scratch, HWLM_ALL_GROUPS);
 
@@ -192,7 +222,8 @@ TEST_P(FDRp, MultiLocation) {
     vector<hwlmLiteral> lits;
     lits.push_back(hwlmLiteral("abc", 0, 1));
 
-    auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
+    auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(),
+                                    Grey());
     CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
     const u32 testSize = 128;
@@ -200,6 +231,7 @@ TEST_P(FDRp, MultiLocation) {
     vector<u8> data(testSize, 0);
 
     struct hs_scratch scratch;
+    scratch.fdr_conf = NULL;
     for (u32 i = 0; i < testSize - 3; i++) {
         memcpy(data.data() + i, "abc", 3);
         fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &scratch,
@@ -220,10 +252,12 @@ TEST_P(FDRp, NoRepeat1) {
     vector<hwlmLiteral> lits
         = { hwlmLiteral("m", 0, 1, 0, HWLM_ALL_GROUPS, {}, {}) };
 
-    auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
+    auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(),
+                                    Grey());
     CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
     struct hs_scratch scratch;
+    scratch.fdr_conf = NULL;
     fdrExec(fdr.get(), (const u8 *)data, sizeof(data) - 1 /* skip nul */, 0,
             decentCallback, &scratch, HWLM_ALL_GROUPS);
 
@@ -242,10 +276,12 @@ TEST_P(FDRp, NoRepeat2) {
         = { hwlmLiteral("m", 0, 1, 0, HWLM_ALL_GROUPS, {}, {}),
             hwlmLiteral("A", 0, 42) };
 
-    auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
+    auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(),
+                                    Grey());
     CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
     struct hs_scratch scratch;
+    scratch.fdr_conf = NULL;
     fdrExec(fdr.get(), (const u8 *)data, sizeof(data) - 1 /* skip nul */, 0,
             decentCallback, &scratch, HWLM_ALL_GROUPS);
 
@@ -265,10 +301,12 @@ TEST_P(FDRp, NoRepeat3) {
         = { hwlmLiteral("90m", 0, 1, 0, HWLM_ALL_GROUPS, {}, {}),
             hwlmLiteral("zA", 0, 1, 0, HWLM_ALL_GROUPS, {}, {}) };
 
-    auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
+    auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(),
+                                    Grey());
     CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
     struct hs_scratch scratch;
+    scratch.fdr_conf = NULL;
     fdrExec(fdr.get(), (const u8 *)data, sizeof(data) - 1 /* skip nul */, 0,
             decentCallback, &scratch, HWLM_ALL_GROUPS);
 
@@ -293,6 +331,7 @@ hwlm_error_t safeExecStreaming(const FDR *fdr, const u8 *hbuf, size_t hlen,
         hbuf = new_hbuf;
     }
     struct hs_scratch scratch;
+    scratch.fdr_conf = NULL;
     return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, &scratch,
                             groups);
 }
@@ -304,7 +343,8 @@ TEST_P(FDRp, SmallStreaming) {
     vector<hwlmLiteral> lits = {hwlmLiteral("a", 1, 1),
                                 hwlmLiteral("aardvark", 0, 10)};
 
-    auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
+    auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(),
+                                    Grey());
     CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
     vector<match> expected;
@@ -342,7 +382,8 @@ TEST_P(FDRp, SmallStreaming2) {
                                 hwlmLiteral("kk", 1, 2),
                                 hwlmLiteral("aardvark", 0, 10)};
 
-    auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
+    auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(),
+                                    Grey());
     CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
     vector<match> expected;
@@ -373,7 +414,8 @@ TEST_P(FDRp, moveByteStream) {
     vector<hwlmLiteral> lits;
     lits.push_back(hwlmLiteral("mnopqr", 0, 0));
 
-    auto fdrTable0 = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
+    auto fdrTable0 = buildFDREngineHinted(lits, false, hint,
+                                          get_current_target(), Grey());
     CHECK_WITH_TEDDY_OK_TO_FAIL(fdrTable0, hint);
 
     size_t size = fdrSize(fdrTable0.get());
@@ -390,6 +432,7 @@ TEST_P(FDRp, moveByteStream) {
 
     // check matches
     struct hs_scratch scratch;
+    scratch.fdr_conf = NULL;
 
     hwlm_error_t fdrStatus = fdrExec(fdrTable.get(), (const u8 *)data,
                                      data_len, 0, decentCallback, &scratch,
@@ -414,7 +457,8 @@ TEST_P(FDRp, Stream1) {
     lits.push_back(hwlmLiteral("f", 0, 0));
     lits.push_back(hwlmLiteral("literal", 0, 1));
 
-    auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
+    auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(),
+                                    Grey());
     CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
     // check matches
@@ -470,12 +514,13 @@ TEST_P(FDRpp, AlignAndTooEarly) {
 
     vector<hwlmLiteral> lits;
     struct hs_scratch scratch;
+    scratch.fdr_conf = NULL;
     for (size_t litLen = 1; litLen <= patLen; litLen++) {
 
         // building literal from pattern substring of variable length 1-patLen
         lits.push_back(hwlmLiteral(string(pattern, 0, litLen), 0, 0));
-        auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(),
-                                       Grey());
+        auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(),
+                                        Grey());
         CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
         // check with buffer offset from aligned start from 0 to 31
@@ -592,6 +637,7 @@ TEST_P(FDRpa, ShortWritings) {
 
     // run the literal matching through all generated literals
     struct hs_scratch scratch;
+    scratch.fdr_conf = NULL;
     for (size_t patIdx = 0; patIdx < pats.size();) {
         // group them in the sets of 32
         vector<hwlmLiteral> testSigs;
@@ -599,8 +645,8 @@ TEST_P(FDRpa, ShortWritings) {
             testSigs.push_back(hwlmLiteral(pats[patIdx], false, patIdx));
         }
 
-        auto fdr = fdrBuildTableHinted(testSigs, false, hint,
-                                       get_current_target(), Grey());
+        auto fdr = buildFDREngineHinted(testSigs, false, hint,
+                                        get_current_target(), Grey());
 
         CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
@@ -659,7 +705,7 @@ TEST(FDR, FDRTermS) {
     lits.push_back(hwlmLiteral("f", 0, 0));
     lits.push_back(hwlmLiteral("ff", 0, 1));
 
-    auto fdr = fdrBuildTable(lits, false, get_current_target(), Grey());
+    auto fdr = buildFDREngine(lits, false, get_current_target(), Grey());
     ASSERT_TRUE(fdr != nullptr);
 
     // check matches
@@ -682,11 +728,12 @@ TEST(FDR, FDRTermB) {
     lits.push_back(hwlmLiteral("f", 0, 0));
     lits.push_back(hwlmLiteral("ff", 0, 1));
 
-    auto fdr = fdrBuildTable(lits, false, get_current_target(), Grey());
+    auto fdr = buildFDREngine(lits, false, get_current_target(), Grey());
     ASSERT_TRUE(fdr != nullptr);
 
     // check matches
     struct hs_scratch scratch;
+    scratch.fdr_conf = NULL;
 
     fdrStatus = fdrExec(fdr.get(), (const u8 *)data1, data_len1,
                         0, decentCallbackT, &scratch, HWLM_ALL_GROUPS);
index 8bdd076312219d767ba327da1734231685b50ab1..81afbeaa11ee5ad49a06a7a2f73b18aee0f0ef80 100644 (file)
@@ -36,6 +36,7 @@
 #include "fdr/fdr_engine_description.h"
 #include "fdr/teddy_compile.h"
 #include "fdr/teddy_engine_description.h"
+#include "hwlm/hwlm_internal.h"
 #include "scratch.h"
 #include "util/alloc.h"
 #include "util/bitutils.h"
@@ -131,6 +132,16 @@ static vector<u32> getValidFdrEngines() {
     return ret;
 }
 
+static
+bytecode_ptr<FDR> buildFDREngineHinted(std::vector<hwlmLiteral> &lits,
+                                       bool make_small, u32 hint,
+                                       const target_t &target,
+                                       const Grey &grey) {
+    auto proto = fdrBuildProtoHinted(HWLM_ENGINE_FDR, lits, make_small, hint,
+                                     target, grey);
+    return fdrBuildTable(*proto, grey);
+}
+
 class FDRFloodp : public TestWithParam<u32> {
 };
 
@@ -142,6 +153,7 @@ TEST_P(FDRFloodp, NoMask) {
     u8 c = 0;
 
     struct hs_scratch scratch;
+    scratch.fdr_conf = NULL;
     while (1) {
         SCOPED_TRACE((unsigned int)c);
         u8 bit = 1 << (c & 0x7);
@@ -169,8 +181,8 @@ TEST_P(FDRFloodp, NoMask) {
             lits.push_back(hwlmLiteral(sAlt, false, i * 8 + 7));
         }
 
-        auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(),
-                                       Grey());
+        auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(),
+                                        Grey());
         CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
         hwlm_error_t fdrStatus = fdrExec(fdr.get(), &data[0], dataSize,
@@ -235,6 +247,7 @@ TEST_P(FDRFloodp, WithMask) {
     u8 c = '\0';
 
     struct hs_scratch scratch;
+    scratch.fdr_conf = NULL;
     while (1) {
         u8 bit = 1 << (c & 0x7);
         u8 cAlt = c ^ bit;
@@ -305,8 +318,8 @@ TEST_P(FDRFloodp, WithMask) {
                                                     HWLM_ALL_GROUPS, msk, cmp));
             }
         }
-        auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(),
-                                       Grey());
+        auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(),
+                                        Grey());
         CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
         hwlm_error_t fdrStatus = fdrExec(fdr.get(), &data[0], dataSize,
@@ -400,6 +413,7 @@ TEST_P(FDRFloodp, StreamingMask) {
     u8 c = '\0';
 
     struct hs_scratch scratch;
+    scratch.fdr_conf = NULL;
     while (1) {
         u8 bit = 1 << (c & 0x7);
         u8 cAlt = c ^ bit;
@@ -470,8 +484,8 @@ TEST_P(FDRFloodp, StreamingMask) {
                                                     HWLM_ALL_GROUPS, msk, cmp));
             }
         }
-        auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(),
-                                       Grey());
+        auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(),
+                                        Grey());
         CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
         hwlm_error_t fdrStatus;