]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
rose/hwlm: limit literals to eight bytes
authorJustin Viiret <justin.viiret@intel.com>
Mon, 28 Nov 2016 05:46:03 +0000 (16:46 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Wed, 26 Apr 2017 04:41:29 +0000 (14:41 +1000)
Rework HWLM to work over literals of eight bytes ("medium length"),
doing confirm in the Rose interpreter.

19 files changed:
src/fdr/fdr_compile.cpp
src/fdr/fdr_compile.h
src/hwlm/hwlm_build.cpp
src/hwlm/hwlm_build.h
src/hwlm/hwlm_literal.h
src/rose/program_runtime.h
src/rose/rose_build_bytecode.cpp
src/rose/rose_build_compile.cpp
src/rose/rose_build_dump.cpp
src/rose/rose_build_impl.h
src/rose/rose_build_matchers.cpp
src/rose/rose_build_matchers.h
src/rose/rose_build_misc.cpp
src/rose/rose_build_program.cpp
src/rose/rose_build_program.h
src/rose/rose_dump.cpp
src/rose/rose_program.h
unit/internal/fdr.cpp
unit/internal/fdr_flood.cpp

index f7451492ba09f53671276d84590f3a44bb7d775a..c9d6cbcb8e100477905a50fd00e0a2cf34477ee5 100644 (file)
@@ -545,35 +545,12 @@ FDRCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
 } // namespace
 
 static
-size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
-    size_t rv = 0;
-    for (const auto &lit : lits) {
-        rv = max(rv, lit.msk.size());
-    }
-    return rv;
-}
-
-static
-void setHistoryRequired(hwlmStreamingControl &stream_ctl,
-                        const vector<hwlmLiteral> &lits) {
-    size_t max_mask_len = maxMaskLen(lits);
-
-    // we want enough history to manage the longest literal and the longest
-    // mask.
-    stream_ctl.literal_history_required = max(maxLen(lits), max_mask_len) - 1;
-}
-
-static
-aligned_unique_ptr<FDR>
-fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
-                      const target_t &target, const Grey &grey, u32 hint,
-                      hwlmStreamingControl *stream_control) {
+aligned_unique_ptr<FDR> fdrBuildTableInternal(const vector<hwlmLiteral> &lits,
+                                              bool make_small,
+                                              const target_t &target,
+                                              const Grey &grey, u32 hint) {
     pair<aligned_unique_ptr<u8>, size_t> link(nullptr, 0);
 
-    if (stream_control) {
-        setHistoryRequired(*stream_control, lits);
-    }
-
     DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
 
     if (grey.fdrAllowTeddy) {
@@ -606,21 +583,18 @@ fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
 
 aligned_unique_ptr<FDR> fdrBuildTable(const vector<hwlmLiteral> &lits,
                                       bool make_small, const target_t &target,
-                                      const Grey &grey,
-                                      hwlmStreamingControl *stream_control) {
-    return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID,
-                                 stream_control);
+                                      const Grey &grey) {
+    return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID);
 }
 
 #if !defined(RELEASE_BUILD)
 
-aligned_unique_ptr<FDR>
-fdrBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small, u32 hint,
-                    const target_t &target, const Grey &grey,
-                    hwlmStreamingControl *stream_control) {
+aligned_unique_ptr<FDR> fdrBuildTableHinted(const vector<hwlmLiteral> &lits,
+                                            bool make_small, u32 hint,
+                                            const target_t &target,
+                                            const Grey &grey) {
     pair<u8 *, size_t> link(nullptr, 0);
-    return fdrBuildTableInternal(lits, make_small, target, grey, hint,
-                                 stream_control);
+    return fdrBuildTableInternal(lits, make_small, target, grey, hint);
 }
 
 #endif
index c12e00714cf8e17c13c869e7cb4d95cf9714a005..a135a6e17d71bddeed725bcc89008ebe4f7ec7ec 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -43,21 +43,18 @@ struct FDR;
 namespace ue2 {
 
 struct hwlmLiteral;
-struct hwlmStreamingControl;
 struct Grey;
 struct target_t;
 
 ue2::aligned_unique_ptr<FDR>
 fdrBuildTable(const std::vector<hwlmLiteral> &lits, bool make_small,
-              const target_t &target, const Grey &grey,
-              hwlmStreamingControl *stream_control = nullptr);
+              const target_t &target, const Grey &grey);
 
 #if !defined(RELEASE_BUILD)
 
 ue2::aligned_unique_ptr<FDR>
 fdrBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
-                    u32 hint, const target_t &target, const Grey &grey,
-                    hwlmStreamingControl *stream_control = nullptr);
+                    u32 hint, const target_t &target, const Grey &grey);
 
 #endif
 
index fa6335c9443ede6b67122330a451e7729259350d..29e71293946925778b837cac9c6348f132f41962 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -33,6 +33,7 @@
 #include "hwlm.h"
 #include "hwlm_build.h"
 #include "hwlm_internal.h"
+#include "hwlm_literal.h"
 #include "noodle_engine.h"
 #include "noodle_build.h"
 #include "scratch.h"
@@ -512,7 +513,6 @@ bool everyoneHasGroups(const vector<hwlmLiteral> &lits) {
 
 static
 bool isNoodleable(const vector<hwlmLiteral> &lits,
-                  const hwlmStreamingControl *stream_control,
                   const CompileContext &cc) {
     if (!cc.grey.allowNoodle) {
         return false;
@@ -523,19 +523,6 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
         return false;
     }
 
-    if (stream_control) { // nullptr if in block mode
-        if (lits.front().s.length() > stream_control->history_max + 1) {
-            DEBUG_PRINTF("length of %zu too long for history max %zu\n",
-                         lits.front().s.length(),
-                         stream_control->history_max);
-            return false;
-        }
-        if (2 * lits.front().s.length() - 2 > FDR_TEMP_BUF_SIZE) {
-            assert(0);
-            return false;
-        }
-    }
-
     if (!lits.front().msk.empty()) {
         DEBUG_PRINTF("noodle can't handle supplementary masks\n");
         return false;
@@ -545,22 +532,11 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
 }
 
 aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
-                                   hwlmStreamingControl *stream_control,
                                    bool make_small, const CompileContext &cc,
                                    hwlm_group_t expected_groups) {
     assert(!lits.empty());
     dumpLits(lits);
 
-    if (stream_control) {
-        assert(stream_control->history_min <= stream_control->history_max);
-
-        // We should not have been passed any literals that are too long to
-        // match with a maximally-sized history buffer.
-        assert(all_of(begin(lits), end(lits), [&](const hwlmLiteral &lit) {
-            return lit.s.length() <= stream_control->history_max + 1;
-        }));
-    }
-
     // Check that we haven't exceeded the maximum number of literals.
     if (lits.size() > cc.grey.limitLiteralCount) {
         throw ResourceLimitError();
@@ -595,7 +571,7 @@ aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
 
     assert(everyoneHasGroups(lits));
 
-    if (isNoodleable(lits, stream_control, cc)) {
+    if (isNoodleable(lits, cc)) {
         DEBUG_PRINTF("build noodle table\n");
         engType = HWLM_ENGINE_NOOD;
         const hwlmLiteral &lit = lits.front();
@@ -603,19 +579,11 @@ aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
         if (noodle) {
             engSize = noodSize(noodle.get());
         }
-        if (stream_control) {
-            // For now, a single literal still goes to noodle and asks
-            // for a great big history
-            stream_control->literal_history_required = lit.s.length() - 1;
-            assert(stream_control->literal_history_required
-                   <= stream_control->history_max);
-        }
         eng = move(noodle);
     } else {
         DEBUG_PRINTF("building a new deal\n");
         engType = HWLM_ENGINE_FDR;
-        auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey,
-                            stream_control);
+        auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey);
         if (fdr) {
             engSize = fdrSize(fdr.get());
         }
@@ -640,14 +608,6 @@ aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
         buildForwardAccel(h.get(), lits, expected_groups);
     }
 
-    if (stream_control) {
-        DEBUG_PRINTF("requires %zu (of max %zu) bytes of history\n",
-                     stream_control->literal_history_required,
-                     stream_control->history_max);
-        assert(stream_control->literal_history_required
-                    <= stream_control->history_max);
-    }
-
     return h;
 }
 
index fbf359e60389d1103f4d0f574ad98582771c17d0..5dd7dbc9065f32bf2b471263b6e56a537e415c76 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -34,7 +34,6 @@
 #define HWLM_BUILD_H
 
 #include "hwlm.h"
-#include "hwlm_literal.h"
 #include "ue2common.h"
 #include "util/alloc.h"
 
@@ -47,30 +46,12 @@ namespace ue2 {
 
 struct CompileContext;
 struct Grey;
-struct target_t;
-
-/** \brief Structure gathering together the input/output parameters related to
- * streaming mode operation. */
-struct hwlmStreamingControl {
-    /** \brief IN parameter: Upper limit on the amount of history that can be
-     * requested. */
-    size_t history_max;
-
-    /** \brief IN parameter: History already known to be used before literal
-     * analysis. */
-    size_t history_min;
-
-    /** \brief OUT parameter: History required by the literal matcher to
-     * correctly match all literals. */
-    size_t literal_history_required;
-};
+struct hwlmLiteral;
 
 /** \brief Build an \ref HWLM literal matcher runtime structure for a group of
  * literals.
  *
  * \param lits The group of literals.
- * \param stream_control Streaming control parameters. If the matcher will
- *        operate in non-streaming (block) mode, this pointer should be NULL.
  * \param make_small Optimise matcher for small size.
  * \param cc Compile context.
  * \param expected_groups FIXME: document me!
@@ -80,8 +61,7 @@ struct hwlmStreamingControl {
  * thrown.
  */
 aligned_unique_ptr<HWLM>
-hwlmBuild(const std::vector<hwlmLiteral> &lits,
-          hwlmStreamingControl *stream_control, bool make_small,
+hwlmBuild(const std::vector<hwlmLiteral> &lits, bool make_small,
           const CompileContext &cc,
           hwlm_group_t expected_groups = HWLM_ALL_GROUPS);
 
index b7af99d329ab470d828032f84b6147da389b3b1a..a08b2ff69eca5c4a182584b037b44a38f71df9a9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -42,7 +42,7 @@
 namespace ue2 {
 
 /** \brief Max length of the literal passed to HWLM. */
-#define HWLM_LITERAL_MAX_LEN 255
+#define HWLM_LITERAL_MAX_LEN 8
 
 /** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
 #define HWLM_MASKLEN 8
index e883c239e9b8173245050e73770a923122a4140e..1a5f25e970356d4c89f6c33fe885dd2e86d264fc 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -1409,6 +1409,68 @@ int roseCheckLongLiteral(const struct RoseEngine *t,
     return 1;
 }
 
+static rose_inline
+int roseCheckMediumLiteral(const struct RoseEngine *t,
+                           const struct hs_scratch *scratch, u64a end,
+                           u32 lit_offset, u32 lit_length, char nocase) {
+    const struct core_info *ci = &scratch->core_info;
+    const u8 *lit = getByOffset(t, lit_offset);
+
+    DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length);
+    DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset);
+
+    if (end < lit_length) {
+        DEBUG_PRINTF("too short!\n");
+        return 0;
+    }
+
+    // If any portion of the literal matched in the current buffer, check it.
+    if (end > ci->buf_offset) {
+        u32 scan_len = MIN(end - ci->buf_offset, lit_length);
+        u64a scan_start = end - ci->buf_offset - scan_len;
+        DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len,
+                     scan_start, end);
+        if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len,
+                       scan_len, nocase)) {
+            DEBUG_PRINTF("cmp of suffix failed\n");
+            return 0;
+        }
+    }
+
+    // If the entirety of the literal was in the current block, we are done.
+    if (end - lit_length >= ci->buf_offset) {
+        DEBUG_PRINTF("literal confirmed in current block\n");
+        return 1;
+    }
+
+    // We still have a prefix which we must test against the history buffer.
+    assert(t->mode != HS_MODE_BLOCK);
+
+    u64a lit_start_offset = end - lit_length;
+    u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset);
+    u32 hist_rewind = ci->buf_offset - lit_start_offset;
+    DEBUG_PRINTF("hlen=%zu, hist_rewind=%u\n", ci->hlen, hist_rewind);
+
+    // History length check required for confirm in the EOD and delayed
+    // rebuild paths.
+    if (hist_rewind > ci->hlen) {
+        DEBUG_PRINTF("not enough history\n");
+        return 0;
+    }
+
+    DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n",
+                 prefix_len, ci->hlen, hist_rewind);
+    assert(hist_rewind <= ci->hlen);
+    if (cmpForward(ci->hbuf + ci->hlen - hist_rewind, lit, prefix_len,
+                   nocase)) {
+        DEBUG_PRINTF("cmp of prefix failed\n");
+        return 0;
+    }
+
+    DEBUG_PRINTF("cmp succeeded\n");
+    return 1;
+}
+
 static
 void updateSeqPoint(struct RoseContext *tctxt, u64a offset,
                     const char from_mpv) {
@@ -2060,8 +2122,10 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
                 const char nocase = 0;
                 if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
                                           ri->lit_length, nocase)) {
-                    DEBUG_PRINTF("halt: failed long lit check\n");
-                    return HWLM_CONTINUE_MATCHING;
+                    DEBUG_PRINTF("failed long lit check\n");
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    continue;
                 }
             }
             PROGRAM_NEXT_INSTRUCTION
@@ -2070,8 +2134,34 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
                 const char nocase = 1;
                 if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
                                           ri->lit_length, nocase)) {
-                    DEBUG_PRINTF("halt: failed nocase long lit check\n");
-                    return HWLM_CONTINUE_MATCHING;
+                    DEBUG_PRINTF("failed nocase long lit check\n");
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    continue;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_MED_LIT) {
+                const char nocase = 0;
+                if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset,
+                                            ri->lit_length, nocase)) {
+                    DEBUG_PRINTF("failed lit check\n");
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    continue;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_MED_LIT_NOCASE) {
+                const char nocase = 1;
+                if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset,
+                                            ri->lit_length, nocase)) {
+                    DEBUG_PRINTF("failed long lit check\n");
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    continue;
                 }
             }
             PROGRAM_NEXT_INSTRUCTION
index edf3e5e908e63d534dc4fa7f786d4affdbbd7772..9f978134f48f17aa0da44a6d8d925bfdc41fa046 100644 (file)
@@ -4353,13 +4353,18 @@ static
 void makeCheckLiteralInstruction(const RoseBuildImpl &build,
                                  const build_context &bc, u32 final_id,
                                  RoseProgram &program) {
+    assert(bc.longLitLengthThreshold > 0);
+
+    DEBUG_PRINTF("final_id %u, long lit threshold %zu\n", final_id,
+                 bc.longLitLengthThreshold);
+
     const auto &lits = build.final_id_to_literal.at(final_id);
     if (lits.size() != 1) {
-        // Long literals should not share a final_id.
+        // final_id sharing is only allowed for literals that are short enough
+        // to not require any additional confirm work.
         assert(all_of(begin(lits), end(lits), [&](u32 lit_id) {
             const rose_literal_id &lit = build.literals.right.at(lit_id);
-            return lit.table != ROSE_FLOATING ||
-                   lit.s.length() <= bc.longLitLengthThreshold;
+            return lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX;
         }));
         return;
     }
@@ -4370,11 +4375,9 @@ void makeCheckLiteralInstruction(const RoseBuildImpl &build,
     }
 
     const rose_literal_id &lit = build.literals.right.at(lit_id);
-    if (lit.table != ROSE_FLOATING) {
-        return;
-    }
-    assert(bc.longLitLengthThreshold > 0);
-    if (lit.s.length() <= bc.longLitLengthThreshold) {
+
+    if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) {
+        DEBUG_PRINTF("lit short enough to not need confirm\n");
         return;
     }
 
@@ -4383,11 +4386,34 @@ void makeCheckLiteralInstruction(const RoseBuildImpl &build,
         throw ResourceLimitError();
     }
 
+    if (lit.s.length() <= bc.longLitLengthThreshold) {
+        DEBUG_PRINTF("is a medium-length literal\n");
+        const auto *end_inst = program.end_instruction();
+        unique_ptr<RoseInstruction> ri;
+        if (lit.s.any_nocase()) {
+            ri = make_unique<RoseInstrCheckMedLitNocase>(lit.s.get_string(),
+                                                         end_inst);
+        } else {
+            ri = make_unique<RoseInstrCheckMedLit>(lit.s.get_string(),
+                                                   end_inst);
+        }
+        program.add_before_end(move(ri));
+        return;
+    }
+
+    // Long literal support should only really be used for the floating table
+    // in streaming mode.
+    assert(lit.table == ROSE_FLOATING && build.cc.streaming);
+
+    DEBUG_PRINTF("is a long literal\n");
+
+    const auto *end_inst = program.end_instruction();
     unique_ptr<RoseInstruction> ri;
     if (lit.s.any_nocase()) {
-        ri = make_unique<RoseInstrCheckLongLitNocase>(lit.s.get_string());
+        ri = make_unique<RoseInstrCheckLongLitNocase>(lit.s.get_string(),
+                                                      end_inst);
     } else {
-        ri = make_unique<RoseInstrCheckLongLit>(lit.s.get_string());
+        ri = make_unique<RoseInstrCheckLongLit>(lit.s.get_string(), end_inst);
     }
     program.add_before_end(move(ri));
 }
@@ -4522,6 +4548,7 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc,
     }
 
     RoseProgram program;
+    makeCheckLiteralInstruction(build, bc, final_id, program);
     makeCheckLitMaskInstruction(build, bc, final_id, program);
     makePushDelayedInstructions(build, final_id, program);
     assert(!program.empty());
@@ -4951,7 +4978,7 @@ u32 buildEagerQueueIter(const set<u32> &eager, u32 leftfixBeginQueue,
 
 static
 void allocateFinalIdToSet(RoseBuildImpl &build, const set<u32> &lits,
-                          size_t longLitLengthThreshold, u32 *next_final_id) {
+                          u32 *next_final_id) {
     const auto &g = build.g;
     auto &literal_info = build.literal_info;
     auto &final_id_to_literal = build.final_id_to_literal;
@@ -4961,8 +4988,6 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set<u32> &lits,
      * ids and squash the same roles and have the same group squashing
      * behaviour. Benefits literals cannot be merged. */
 
-    assert(longLitLengthThreshold > 0);
-
     for (u32 int_id : lits) {
         rose_literal_info &curr_info = literal_info[int_id];
         const rose_literal_id &lit = build.literals.right.at(int_id);
@@ -4974,10 +4999,10 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set<u32> &lits,
             goto assign_new_id;
         }
 
-        // Long literals (that require CHECK_LONG_LIT instructions) cannot be
-        // merged.
-        if (lit.s.length() > longLitLengthThreshold) {
-            DEBUG_PRINTF("id %u is a long literal\n", int_id);
+        // Literals that need confirmation with CHECK_LONG_LIT or CHECK_MED_LIT
+        // cannot be merged.
+        if (lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) {
+            DEBUG_PRINTF("id %u needs lit confirm\n", int_id);
             goto assign_new_id;
         }
 
@@ -5001,7 +5026,7 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set<u32> &lits,
                 const auto &cand_info = literal_info[cand_id];
                 const auto &cand_lit = build.literals.right.at(cand_id);
 
-                if (cand_lit.s.length() > longLitLengthThreshold) {
+                if (cand_lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) {
                     continue;
                 }
 
@@ -5071,8 +5096,7 @@ bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) {
 
 /** \brief Allocate final literal IDs for all literals.  */
 static
-void allocateFinalLiteralId(RoseBuildImpl &build,
-                            size_t longLitLengthThreshold) {
+void allocateFinalLiteralId(RoseBuildImpl &build) {
     set<u32> anch;
     set<u32> norm;
     set<u32> delay;
@@ -5106,15 +5130,15 @@ void allocateFinalLiteralId(RoseBuildImpl &build,
     }
 
     /* normal lits */
-    allocateFinalIdToSet(build, norm, longLitLengthThreshold, &next_final_id);
+    allocateFinalIdToSet(build, norm, &next_final_id);
 
     /* next anchored stuff */
     build.anchored_base_id = next_final_id;
-    allocateFinalIdToSet(build, anch, longLitLengthThreshold, &next_final_id);
+    allocateFinalIdToSet(build, anch, &next_final_id);
 
     /* delayed ids come last */
     build.delay_base_id = next_final_id;
-    allocateFinalIdToSet(build, delay, longLitLengthThreshold, &next_final_id);
+    allocateFinalIdToSet(build, delay, &next_final_id);
 }
 
 static
@@ -5188,10 +5212,11 @@ size_t calcLongLitThreshold(const RoseBuildImpl &build,
                             const size_t historyRequired) {
     const auto &cc = build.cc;
 
-    // In block mode, we should only use the long literal support for literals
-    // that cannot be handled by HWLM.
+    // In block mode, we don't have history, so we don't need long literal
+    // support and can just use "medium-length" literal confirm. TODO: we could
+    // specialize further and have a block mode literal confirm instruction.
     if (!cc.streaming) {
-        return HWLM_LITERAL_MAX_LEN;
+        return SIZE_MAX;
     }
 
     size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN;
@@ -5227,7 +5252,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
                                                          historyRequired);
     DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold);
 
-    allocateFinalLiteralId(*this, longLitLengthThreshold);
+    allocateFinalLiteralId(*this);
 
     auto anchored_dfas = buildAnchoredDfas(*this);
 
index 9b8ea7f75ba5d2bd6d34cff4ff0c6a91c9c0cb05..0505a71eb282a147fa18318cdc74ff497b9526c9 100644 (file)
@@ -123,7 +123,7 @@ void RoseBuildImpl::handleMixedSensitivity(void) {
         // with a CHECK_LONG_LIT instruction and need unique final_ids.
         // TODO: we could allow explosion for literals where the prefixes
         // covered by CHECK_LONG_LIT are identical.
-        if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN &&
+        if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX &&
             limited_explosion(lit.s)) {
             DEBUG_PRINTF("need to explode existing string '%s'\n",
                          dumpString(lit.s).c_str());
index 677403121a01150c3f9fb95b9d0e10e83dacfb35..e7cef1009a2944354ac4b4ff34d62d20aef3732e 100644 (file)
@@ -35,7 +35,7 @@
 #include "rose/rose_dump.h"
 #include "rose_internal.h"
 #include "ue2common.h"
-#include "hwlm/hwlm_build.h"
+#include "hwlm/hwlm_literal.h"
 #include "nfa/castlecompile.h"
 #include "nfa/nfa_internal.h"
 #include "nfagraph/ng_dump.h"
@@ -505,24 +505,22 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) {
     size_t longLitLengthThreshold =
         calcLongLitThreshold(build, historyRequired);
 
-    auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED,
-                                       longLitLengthThreshold);
-    dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits);
+    auto mp = makeMatcherProto(build, ROSE_ANCHORED, longLitLengthThreshold);
+    dumpTestLiterals(base + "rose_anchored_test_literals.txt", mp.lits);
 
-    lits = fillHamsterLiteralList(build, ROSE_FLOATING, longLitLengthThreshold);
-    dumpTestLiterals(base + "rose_float_test_literals.txt", lits);
+    mp = makeMatcherProto(build, ROSE_FLOATING, longLitLengthThreshold);
+    dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits);
 
-    lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED,
-                                  build.ematcher_region_size);
-    dumpTestLiterals(base + "rose_eod_test_literals.txt", lits);
+    mp = makeMatcherProto(build, ROSE_EOD_ANCHORED, build.ematcher_region_size);
+    dumpTestLiterals(base + "rose_eod_test_literals.txt", mp.lits);
 
     if (!build.cc.streaming) {
-        lits = fillHamsterLiteralList(build, ROSE_FLOATING,
+        mp = makeMatcherProto(build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN,
+                              ROSE_SMALL_BLOCK_LEN);
+        auto mp2 = makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK,
                                     ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
-        auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK,
-                                    ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
-        lits.insert(end(lits), begin(lits2), end(lits2));
-        dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits);
+        mp.lits.insert(end(mp.lits), begin(mp2.lits), end(mp2.lits));
+        dumpTestLiterals(base + "rose_smallblock_test_literals.txt", mp.lits);
     }
 }
 
index 6b326d34bfb1605b2ffcad23e464cafd0f401d17..02c5a3896a7127d83ddd49048a6d3f68bce19d2f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -58,6 +58,17 @@ namespace ue2 {
 
 #define ROSE_LONG_LITERAL_THRESHOLD_MIN 33
 
+/**
+ * \brief The largest allowable "short" literal fragment which can be given to
+ * a literal matcher directly.
+ *
+ * Literals longer than this will be truncated to their suffix and confirmed in
+ * the Rose interpreter, either as "medium length" literals which can be
+ * confirmed from history, or "long literals" which make use of the streaming
+ * table support.
+ */
+#define ROSE_SHORT_LITERAL_LEN_MAX 8
+
 struct BoundaryReports;
 struct CastleProto;
 struct CompileContext;
index 01633c06c965e1dbada77068d2f84e2a1a9827e0..f7c237a774aada2d87ac5a6a2e6f46a34ce82e4b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Intel Corporation
+ * Copyright (c) 2016-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -491,8 +491,14 @@ bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id,
         return false;
     }
 
-    if (build.literals.right.at(id).s.length() > max_len) {
-        DEBUG_PRINTF("requires literal check\n");
+    size_t len = build.literals.right.at(id).s.length();
+    if (len > max_len) {
+        DEBUG_PRINTF("long literal, requires confirm\n");
+        return false;
+    }
+
+    if (len > ROSE_SHORT_LITERAL_LEN_MAX) {
+        DEBUG_PRINTF("medium-length literal, requires confirm\n");
         return false;
     }
 
@@ -626,10 +632,10 @@ u64a literalMinReportOffset(const RoseBuildImpl &build,
     return lit_min_offset;
 }
 
-vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
-                                           rose_literal_table table,
-                                           size_t max_len, u32 max_offset) {
-    vector<hwlmLiteral> lits;
+MatcherProto makeMatcherProto(const RoseBuildImpl &build,
+                              rose_literal_table table, size_t max_len,
+                              u32 max_offset) {
+    MatcherProto mp;
 
     for (const auto &e : build.literals.right) {
         const u32 id = e.first;
@@ -652,7 +658,8 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
         /* Note: requires_benefits are handled in the literal entries */
         const ue2_literal &lit = e.second.s;
 
-        DEBUG_PRINTF("lit='%s'\n", escapeString(lit).c_str());
+        DEBUG_PRINTF("lit='%s' (len %zu)\n", escapeString(lit).c_str(),
+                     lit.length());
 
         if (max_offset != ROSE_BOUND_INF) {
             u64a min_report = literalMinReportOffset(build, e.second, info);
@@ -665,14 +672,22 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
 
         const vector<u8> &msk = e.second.msk;
         const vector<u8> &cmp = e.second.cmp;
-
         bool noruns = isNoRunsLiteral(build, id, info, max_len);
 
+        size_t lit_hist_len = 0;
+        if (build.cc.streaming) {
+            lit_hist_len = max(msk.size(), min(lit.length(), max_len));
+            lit_hist_len = lit_hist_len ? lit_hist_len - 1 : 0;
+        }
+        DEBUG_PRINTF("lit requires %zu bytes of history\n", lit_hist_len);
+        assert(lit_hist_len <= build.cc.grey.maxHistoryAvailable);
+
         if (info.requires_explode) {
             DEBUG_PRINTF("exploding lit\n");
 
-            // We do not require_explode for long literals.
-            assert(lit.length() <= max_len);
+            // We do not require_explode for literals that need confirm
+            // (long/medium length literals).
+            assert(lit.length() <= ROSE_SHORT_LITERAL_LEN_MAX);
 
             case_iter cit = caseIterateBegin(lit);
             case_iter cite = caseIterateEnd();
@@ -690,8 +705,9 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
                     continue;
                 }
 
-                lits.emplace_back(move(s), nocase, noruns, final_id, groups,
-                                  msk, cmp);
+                mp.history_required = max(mp.history_required, lit_hist_len);
+                mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups,
+                                     msk, cmp);
             }
         } else {
             string s = lit.get_string();
@@ -702,11 +718,13 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
                          final_id, escapeString(s).c_str(), (int)nocase, noruns,
                          dumpMask(msk).c_str(), dumpMask(cmp).c_str());
 
-            if (s.length() > max_len) {
-                DEBUG_PRINTF("truncating to tail of length %zu\n", max_len);
-                s.erase(0, s.length() - max_len);
+            if (s.length() > ROSE_SHORT_LITERAL_LEN_MAX) {
+                DEBUG_PRINTF("truncating to tail of length %zu\n",
+                             size_t{ROSE_SHORT_LITERAL_LEN_MAX});
+                s.erase(0, s.length() - ROSE_SHORT_LITERAL_LEN_MAX);
                 // We shouldn't have set a threshold below 8 chars.
-                assert(msk.size() <= max_len);
+                assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX);
+                assert(!noruns);
             }
 
             if (!maskIsConsistent(s, nocase, msk, cmp)) {
@@ -714,12 +732,13 @@ vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
                 continue;
             }
 
-            lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk,
-                              cmp);
+            mp.history_required = max(mp.history_required, lit_hist_len);
+            mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk,
+                                 cmp);
         }
     }
 
-    return lits;
+    return mp;
 }
 
 aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
@@ -730,49 +749,31 @@ aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
     *fsize = 0;
     *fgroups = 0;
 
-    auto fl = fillHamsterLiteralList(build, ROSE_FLOATING,
-                                     longLitLengthThreshold);
-    if (fl.empty()) {
+    auto mp = makeMatcherProto(build, ROSE_FLOATING, longLitLengthThreshold);
+    if (mp.lits.empty()) {
         DEBUG_PRINTF("empty floating matcher\n");
         return nullptr;
     }
 
-    for (const hwlmLiteral &hlit : fl) {
-        *fgroups |= hlit.groups;
+    for (const hwlmLiteral &lit : mp.lits) {
+        *fgroups |= lit.groups;
     }
 
-    hwlmStreamingControl ctl;
-    hwlmStreamingControl *ctlp;
-    if (build.cc.streaming) {
-        ctl.history_max = build.cc.grey.maxHistoryAvailable;
-        ctl.history_min = MAX(*historyRequired,
-                              build.cc.grey.minHistoryAvailable);
-        DEBUG_PRINTF("streaming control, history max=%zu, min=%zu\n",
-                     ctl.history_max, ctl.history_min);
-        ctlp = &ctl;
-    } else {
-        ctlp = nullptr; // Null for non-streaming.
-    }
-
-    aligned_unique_ptr<HWLM> ftable =
-        hwlmBuild(fl, ctlp, false, build.cc, build.getInitialGroups());
-    if (!ftable) {
+    auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups());
+    if (!hwlm) {
         throw CompileError("Unable to generate bytecode.");
     }
 
     if (build.cc.streaming) {
-        DEBUG_PRINTF("literal_history_required=%zu\n",
-                ctl.literal_history_required);
-        assert(ctl.literal_history_required <=
-               build.cc.grey.maxHistoryAvailable);
-        *historyRequired = max(*historyRequired,
-                ctl.literal_history_required);
+        DEBUG_PRINTF("history_required=%zu\n", mp.history_required);
+        assert(mp.history_required <= build.cc.grey.maxHistoryAvailable);
+        *historyRequired = max(*historyRequired, mp.history_required);
     }
 
-    *fsize = hwlmSize(ftable.get());
+    *fsize = hwlmSize(hwlm.get());
     assert(*fsize);
     DEBUG_PRINTF("built floating literal table size %zu bytes\n", *fsize);
-    return ftable;
+    return hwlm;
 }
 
 aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build,
@@ -791,38 +792,38 @@ aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build,
         return nullptr;
     }
 
-    auto lits = fillHamsterLiteralList(
-        build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
-    if (lits.empty()) {
+    auto mp = makeMatcherProto(build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN,
+                               ROSE_SMALL_BLOCK_LEN);
+    if (mp.lits.empty()) {
         DEBUG_PRINTF("no floating table\n");
         return nullptr;
-    } else if (lits.size() == 1) {
+    } else if (mp.lits.size() == 1) {
         DEBUG_PRINTF("single floating literal, noodle will be fast enough\n");
         return nullptr;
     }
 
-    auto anchored_lits =
-        fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK,
-                               ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
-    if (anchored_lits.empty()) {
+    auto mp_anchored =
+        makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN,
+                         ROSE_SMALL_BLOCK_LEN);
+    if (mp_anchored.lits.empty()) {
         DEBUG_PRINTF("no small-block anchored literals\n");
         return nullptr;
     }
 
-    lits.insert(lits.end(), anchored_lits.begin(), anchored_lits.end());
+    mp.lits.insert(mp.lits.end(), mp_anchored.lits.begin(),
+                   mp_anchored.lits.end());
 
     // None of our literals should be longer than the small block limit.
-    assert(all_of(begin(lits), end(lits), [](const hwlmLiteral &lit) {
+    assert(all_of(begin(mp.lits), end(mp.lits), [](const hwlmLiteral &lit) {
         return lit.s.length() <= ROSE_SMALL_BLOCK_LEN;
     }));
 
-    if (lits.empty()) {
+    if (mp.lits.empty()) {
         DEBUG_PRINTF("no literals shorter than small block len\n");
         return nullptr;
     }
 
-    aligned_unique_ptr<HWLM> hwlm =
-        hwlmBuild(lits, nullptr, true, build.cc, build.getInitialGroups());
+    auto hwlm = hwlmBuild(mp.lits, true, build.cc, build.getInitialGroups());
     if (!hwlm) {
         throw CompileError("Unable to generate bytecode.");
     }
@@ -837,10 +838,10 @@ aligned_unique_ptr<HWLM> buildEodAnchoredMatcher(const RoseBuildImpl &build,
                                                  size_t *esize) {
     *esize = 0;
 
-    auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED,
-                                     build.ematcher_region_size);
+    auto mp =
+        makeMatcherProto(build, ROSE_EOD_ANCHORED, build.ematcher_region_size);
 
-    if (el.empty()) {
+    if (mp.lits.empty()) {
         DEBUG_PRINTF("no eod anchored literals\n");
         assert(!build.ematcher_region_size);
         return nullptr;
@@ -848,17 +849,15 @@ aligned_unique_ptr<HWLM> buildEodAnchoredMatcher(const RoseBuildImpl &build,
 
     assert(build.ematcher_region_size);
 
-    hwlmStreamingControl *ctlp = nullptr; // not a streaming case
-    aligned_unique_ptr<HWLM> etable =
-        hwlmBuild(el, ctlp, true, build.cc, build.getInitialGroups());
-    if (!etable) {
+    auto hwlm = hwlmBuild(mp.lits, true, build.cc, build.getInitialGroups());
+    if (!hwlm) {
         throw CompileError("Unable to generate bytecode.");
     }
 
-    *esize = hwlmSize(etable.get());
+    *esize = hwlmSize(hwlm.get());
     assert(*esize);
     DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", *esize);
-    return etable;
+    return hwlm;
 }
 
 } // namespace ue2
index a25dbca39fbd201b23148fa57f6b186b87b6ad70..15ccf278375c1dbc7d12a4a029a473e0e2a19e6a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Intel Corporation
+ * Copyright (c) 2016-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -44,15 +44,21 @@ namespace ue2 {
 
 struct hwlmLiteral;
 
+struct MatcherProto {
+    std::vector<hwlmLiteral> lits;
+    size_t history_required = 0;
+};
+
 /**
- * \brief Build up a vector of literals for the given table.
+ * \brief Build up a vector of literals (and associated other data) for the
+ * given table.
  *
  * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can
  * only lead to a pattern match after max_offset may be excluded.
  */
-std::vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
-                    rose_literal_table table, size_t max_len,
-                    u32 max_offset = ROSE_BOUND_INF);
+MatcherProto makeMatcherProto(const RoseBuildImpl &build,
+                              rose_literal_table table, size_t max_len,
+                              u32 max_offset = ROSE_BOUND_INF);
 
 aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
                                               size_t longLitLengthThreshold,
index 28b885bd52582eb235cd99e898c932ef39cce5be..97579111dea04332454adc7487e0249c3115dd29 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -28,7 +28,7 @@
 
 #include "rose_build_impl.h"
 
-#include "hwlm/hwlm_build.h"
+#include "hwlm/hwlm_literal.h"
 #include "nfa/castlecompile.h"
 #include "nfa/goughcompile.h"
 #include "nfa/mcclellancompile_util.h"
index ee237639bd15dc87a02483ffb121d30f24da55c9..5f7ab0bfe0a22d05eefb3fba01b36bdc1fc41ab7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Intel Corporation
+ * Copyright (c) 2016-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -502,6 +502,7 @@ void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob,
     assert(!literal.empty());
     inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1);
     inst->lit_length = verify_u32(literal.size());
+    inst->fail_jump = calc_jump(offset_map, this, target);
 }
 
 void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob,
@@ -511,6 +512,27 @@ void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob,
     assert(!literal.empty());
     inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1);
     inst->lit_length = verify_u32(literal.size());
+    inst->fail_jump = calc_jump(offset_map, this, target);
+}
+
+void RoseInstrCheckMedLit::write(void *dest, RoseEngineBlob &blob,
+                                 const OffsetMap &offset_map) const {
+    RoseInstrBase::write(dest, blob, offset_map);
+    auto *inst = static_cast<impl_type *>(dest);
+    assert(!literal.empty());
+    inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1);
+    inst->lit_length = verify_u32(literal.size());
+    inst->fail_jump = calc_jump(offset_map, this, target);
+}
+
+void RoseInstrCheckMedLitNocase::write(void *dest, RoseEngineBlob &blob,
+                                       const OffsetMap &offset_map) const {
+    RoseInstrBase::write(dest, blob, offset_map);
+    auto *inst = static_cast<impl_type *>(dest);
+    assert(!literal.empty());
+    inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1);
+    inst->lit_length = verify_u32(literal.size());
+    inst->fail_jump = calc_jump(offset_map, this, target);
 }
 
 static
index 39e2e23c480df1f9dde949d6716cc89dc7b16395..440bf4e1a1f094e74170c58b403654efe5be3724 100644 (file)
@@ -1723,17 +1723,19 @@ public:
 };
 
 class RoseInstrCheckLongLit
-    : public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_LONG_LIT,
+    : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_LONG_LIT,
                                     ROSE_STRUCT_CHECK_LONG_LIT,
                                     RoseInstrCheckLongLit> {
 public:
     std::string literal;
+    const RoseInstruction *target;
 
-    explicit RoseInstrCheckLongLit(std::string literal_in)
-        : literal(std::move(literal_in)) {}
+    RoseInstrCheckLongLit(std::string literal_in,
+                          const RoseInstruction *target_in)
+        : literal(std::move(literal_in)), target(target_in) {}
 
     bool operator==(const RoseInstrCheckLongLit &ri) const {
-        return literal == ri.literal;
+        return literal == ri.literal && target == ri.target;
     }
 
     size_t hash() const override {
@@ -1743,26 +1745,29 @@ public:
     void write(void *dest, RoseEngineBlob &blob,
                const OffsetMap &offset_map) const override;
 
-    bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &,
-                  const OffsetMap &) const {
-        return literal == ri.literal;
+    bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &offsets,
+                  const OffsetMap &other_offsets) const {
+        return literal == ri.literal &&
+               offsets.at(target) == other_offsets.at(ri.target);
     }
 };
 
 class RoseInstrCheckLongLitNocase
-    : public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_LONG_LIT_NOCASE,
+    : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_LONG_LIT_NOCASE,
                                     ROSE_STRUCT_CHECK_LONG_LIT_NOCASE,
                                     RoseInstrCheckLongLitNocase> {
 public:
     std::string literal;
+    const RoseInstruction *target;
 
-    explicit RoseInstrCheckLongLitNocase(std::string literal_in)
-        : literal(std::move(literal_in)) {
+    RoseInstrCheckLongLitNocase(std::string literal_in,
+                                const RoseInstruction *target_in)
+        : literal(std::move(literal_in)), target(target_in) {
         upperString(literal);
     }
 
     bool operator==(const RoseInstrCheckLongLitNocase &ri) const {
-        return literal == ri.literal;
+        return literal == ri.literal && target == ri.target;
     }
 
     size_t hash() const override {
@@ -1772,9 +1777,74 @@ public:
     void write(void *dest, RoseEngineBlob &blob,
                const OffsetMap &offset_map) const override;
 
-    bool equiv_to(const RoseInstrCheckLongLitNocase &ri, const OffsetMap &,
-                  const OffsetMap &) const {
-        return literal == ri.literal;
+    bool equiv_to(const RoseInstrCheckLongLitNocase &ri,
+                  const OffsetMap &offsets,
+                  const OffsetMap &other_offsets) const {
+        return literal == ri.literal &&
+               offsets.at(target) == other_offsets.at(ri.target);
+    }
+};
+
+class RoseInstrCheckMedLit
+    : public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_MED_LIT,
+                                    ROSE_STRUCT_CHECK_MED_LIT,
+                                    RoseInstrCheckMedLit> {
+public:
+    std::string literal;
+    const RoseInstruction *target;
+
+    explicit RoseInstrCheckMedLit(std::string literal_in,
+                                  const RoseInstruction *target_in)
+        : literal(std::move(literal_in)), target(target_in) {}
+
+    bool operator==(const RoseInstrCheckMedLit &ri) const {
+        return literal == ri.literal && target == ri.target;
+    }
+
+    size_t hash() const override {
+        return hash_all(static_cast<int>(opcode), literal);
+    }
+
+    void write(void *dest, RoseEngineBlob &blob,
+               const OffsetMap &offset_map) const override;
+
+    bool equiv_to(const RoseInstrCheckMedLit &ri, const OffsetMap &offsets,
+                  const OffsetMap &other_offsets) const {
+        return literal == ri.literal &&
+               offsets.at(target) == other_offsets.at(ri.target);
+    }
+};
+
+class RoseInstrCheckMedLitNocase
+    : public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_MED_LIT_NOCASE,
+                                    ROSE_STRUCT_CHECK_MED_LIT_NOCASE,
+                                    RoseInstrCheckMedLitNocase> {
+public:
+    std::string literal;
+    const RoseInstruction *target;
+
+    explicit RoseInstrCheckMedLitNocase(std::string literal_in,
+                                        const RoseInstruction *target_in)
+        : literal(std::move(literal_in)), target(target_in) {
+        upperString(literal);
+    }
+
+    bool operator==(const RoseInstrCheckMedLitNocase &ri) const {
+        return literal == ri.literal && target == ri.target;
+    }
+
+    size_t hash() const override {
+        return hash_all(static_cast<int>(opcode), literal);
+    }
+
+    void write(void *dest, RoseEngineBlob &blob,
+               const OffsetMap &offset_map) const override;
+
+    bool equiv_to(const RoseInstrCheckMedLitNocase &ri,
+                  const OffsetMap &offsets,
+                  const OffsetMap &other_offsets) const {
+        return literal == ri.literal &&
+               offsets.at(target) == other_offsets.at(ri.target);
     }
 };
 
index 1867be507cbd5f15047f00c73f84c681446d6c20..5d79da2eb8ed3029e5603b4056e27fdb2e0f01eb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -617,6 +617,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
                 const char *lit = (const char *)t + ri->lit_offset;
                 os << "    literal: \""
                    << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
+                os << "    fail_jump " << offset + ri->fail_jump << endl;
             }
             PROGRAM_NEXT_INSTRUCTION
 
@@ -626,6 +627,27 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
                 const char *lit = (const char *)t + ri->lit_offset;
                 os << "    literal: \""
                    << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
+                os << "    fail_jump " << offset + ri->fail_jump << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_MED_LIT) {
+                os << "    lit_offset " << ri->lit_offset << endl;
+                os << "    lit_length " << ri->lit_length << endl;
+                const char *lit = (const char *)t + ri->lit_offset;
+                os << "    literal: \""
+                   << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
+                os << "    fail_jump " << offset + ri->fail_jump << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
+            PROGRAM_CASE(CHECK_MED_LIT_NOCASE) {
+                os << "    lit_offset " << ri->lit_offset << endl;
+                os << "    lit_length " << ri->lit_length << endl;
+                const char *lit = (const char *)t + ri->lit_offset;
+                os << "    literal: \""
+                   << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
+                os << "    fail_jump " << offset + ri->fail_jump << endl;
             }
             PROGRAM_NEXT_INSTRUCTION
 
index ed913316261646fe919b9358517ae45bcfdc433c..c5ddc94287c80e76d4c6f31eeed285831e3e00e5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -129,7 +129,19 @@ enum RoseInstructionCode {
      */
     ROSE_INSTR_CHECK_LONG_LIT_NOCASE,
 
-    LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_LONG_LIT_NOCASE //!< Sentinel.
+    /**
+     * \brief Confirm a case-sensitive "medium length" literal at the current
+     * offset. In streaming mode, this will check history if needed.
+     */
+    ROSE_INSTR_CHECK_MED_LIT,
+
+    /**
+     * \brief Confirm a case-insensitive "medium length" literal at the current
+     * offset. In streaming mode, this will check history if needed.
+     */
+    ROSE_INSTR_CHECK_MED_LIT_NOCASE,
+
+    LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_MED_LIT_NOCASE //!< Sentinel.
 };
 
 struct ROSE_STRUCT_END {
@@ -477,18 +489,32 @@ struct ROSE_STRUCT_MATCHER_EOD {
     u8 code; //!< From enum RoseInstructionCode.
 };
 
-/** Note: check failure will halt program. */
 struct ROSE_STRUCT_CHECK_LONG_LIT {
     u8 code; //!< From enum RoseInstructionCode.
     u32 lit_offset; //!< Offset of literal string.
     u32 lit_length; //!< Length of literal string.
+    u32 fail_jump; //!< Jump forward this many bytes on failure.
 };
 
-/** Note: check failure will halt program. */
 struct ROSE_STRUCT_CHECK_LONG_LIT_NOCASE {
     u8 code; //!< From enum RoseInstructionCode.
     u32 lit_offset; //!< Offset of literal string.
     u32 lit_length; //!< Length of literal string.
+    u32 fail_jump; //!< Jump forward this many bytes on failure.
+};
+
+struct ROSE_STRUCT_CHECK_MED_LIT {
+    u8 code; //!< From enum RoseInstructionCode.
+    u32 lit_offset; //!< Offset of literal string.
+    u32 lit_length; //!< Length of literal string.
+    u32 fail_jump; //!< Jump forward this many bytes on failure.
+};
+
+struct ROSE_STRUCT_CHECK_MED_LIT_NOCASE {
+    u8 code; //!< From enum RoseInstructionCode.
+    u32 lit_offset; //!< Offset of literal string.
+    u32 lit_length; //!< Length of literal string.
+    u32 fail_jump; //!< Jump forward this many bytes on failure.
 };
 
 #endif // ROSE_ROSE_PROGRAM_H
index 6116bfdb61b4ec0dc047ce9ab1af88fee39333c5..8ec725985ec823c7a019da1dde90c3a4465b47bd 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -96,15 +96,6 @@ struct match {
 };
 
 extern "C" {
-static
-hwlmcb_rv_t countCallback(UNUSED size_t start, UNUSED size_t end, u32,
-                          void *ctxt) {
-    if (ctxt) {
-        ++*(u32 *)ctxt;
-    }
-
-    return HWLM_CONTINUE_MATCHING;
-}
 
 static
 hwlmcb_rv_t decentCallback(size_t start, size_t end, u32 id, void *ctxt) {
@@ -231,42 +222,6 @@ TEST_P(FDRp, MultiLocation) {
     }
 }
 
-TEST_P(FDRp, Flood) {
-    const u32 hint = GetParam();
-    SCOPED_TRACE(hint);
-
-    vector<hwlmLiteral> lits;
-    lits.push_back(hwlmLiteral("aaaa", 0, 1));
-    lits.push_back(hwlmLiteral("aaaaaaaa", 0, 2));
-    lits.push_back(hwlmLiteral("baaaaaaaa", 0, 3));
-    lits.push_back(hwlmLiteral("aaaaaaaab", 0, 4));
-
-    auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
-    CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
-
-    const u32 testSize = 1024;
-    vector<u8> data(testSize, 'a');
-
-    vector<match> matches;
-    fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches,
-            HWLM_ALL_GROUPS);
-    ASSERT_EQ(testSize - 3 + testSize - 7, matches.size());
-    EXPECT_EQ(match(0, 3, 1), matches[0]);
-    EXPECT_EQ(match(1, 4, 1), matches[1]);
-    EXPECT_EQ(match(2, 5, 1), matches[2]);
-    EXPECT_EQ(match(3, 6, 1), matches[3]);
-
-    u32 currentMatch = 4;
-    for (u32 i = 7; i < testSize; i++, currentMatch += 2) {
-        EXPECT_TRUE(
-          (match(i - 3, i, 1) == matches[currentMatch] &&
-           match(i - 7, i, 2) == matches[currentMatch+1]) ||
-          (match(i - 7, i, 2) == matches[currentMatch+1] &&
-           match(i - 3, i, 1) == matches[currentMatch])
-        );
-    }
-}
-
 TEST_P(FDRp, NoRepeat1) {
     const u32 hint = GetParam();
     SCOPED_TRACE(hint);
@@ -414,36 +369,6 @@ TEST_P(FDRp, SmallStreaming2) {
     ASSERT_EQ(expected.size(), matches.size());
 }
 
-TEST_P(FDRp, LongLiteral) {
-    const u32 hint = GetParam();
-    SCOPED_TRACE(hint);
-    size_t sz;
-    const u8 *data;
-    vector<hwlmLiteral> lits;
-
-    string alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
-    string alpha4 = alpha+alpha+alpha+alpha;
-    lits.push_back(hwlmLiteral(alpha4.c_str(), 0,10));
-
-    auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
-    CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
-
-    u32 count = 0;
-
-    data = (const u8 *)alpha4.c_str();
-    sz = alpha4.size();
-
-    fdrExec(fdr.get(), data, sz, 0, countCallback, &count, HWLM_ALL_GROUPS);
-    EXPECT_EQ(1U, count);
-    count = 0;
-    fdrExec(fdr.get(), data, sz - 1, 0, countCallback, &count, HWLM_ALL_GROUPS);
-    EXPECT_EQ(0U, count);
-    count = 0;
-    fdrExec(fdr.get(), data + 1, sz - 1, 0, countCallback, &count,
-            HWLM_ALL_GROUPS);
-    EXPECT_EQ(0U, count);
-}
-
 TEST_P(FDRp, moveByteStream) {
     const u32 hint = GetParam();
     SCOPED_TRACE(hint);
@@ -491,7 +416,7 @@ TEST_P(FDRp, Stream1) {
 
     vector<hwlmLiteral> lits;
     lits.push_back(hwlmLiteral("f", 0, 0));
-    lits.push_back(hwlmLiteral("longsigislong", 0, 1));
+    lits.push_back(hwlmLiteral("literal", 0, 1));
 
     auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
     CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
@@ -514,7 +439,7 @@ INSTANTIATE_TEST_CASE_P(FDR, FDRp, ValuesIn(getValidFdrEngines()));
 
 typedef struct {
     string pattern;
-    unsigned char alien;
+    unsigned char alien; // character not present in pattern
 } pattern_alien_t;
 
 // gtest helper
@@ -529,7 +454,6 @@ class FDRpp : public TestWithParam<tuple<u32, pattern_alien_t>> {};
 // not happen if literal is partially (from 1 character up to full literal
 // length) is out of searched buffer - "too early" and "too late" conditions
 TEST_P(FDRpp, AlignAndTooEarly) {
-
     const size_t buf_alignment = 32;
     // Buffer should be big enough to hold two instances of matching literals
     // (up to 64 bytes each) and room for offset (up to 32 bytes)
@@ -538,7 +462,7 @@ TEST_P(FDRpp, AlignAndTooEarly) {
     const u32 hint = get<0>(GetParam());
     SCOPED_TRACE(hint);
 
-    // pattern which is used to generate literals of variable size - from 1 to 64
+    // pattern which is used to generate literals of variable size - from 1 to 8
     const string &pattern = get<1>(GetParam()).pattern;
     const size_t patLen = pattern.size();
     const unsigned char alien = get<1>(GetParam()).alien;
@@ -551,7 +475,7 @@ TEST_P(FDRpp, AlignAndTooEarly) {
     vector<hwlmLiteral> lits;
     for (size_t litLen = 1; litLen <= patLen; litLen++) {
 
-        // building literal from pattern substring of variable length 1-64
+        // building literal from pattern substring of variable length 1-patLen
         lits.push_back(hwlmLiteral(string(pattern, 0, litLen), 0, 0));
         auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(),
                                        Grey());
@@ -596,9 +520,9 @@ TEST_P(FDRpp, AlignAndTooEarly) {
 }
 
 static const pattern_alien_t test_pattern[] = {
-        {"abaabaaabaaabbaaaaabaaaaabbaaaaaaabaabbaaaabaaaaaaaabbbbaaaaaaab", 'x'},
-        {"zzzyyzyzyyyyzyyyyyzzzzyyyyyyyyzyyyyyyyzzzzzyzzzzzzzzzyzzyzzzzzzz", (unsigned char)'\x99'},
-        {"abcdef lafjk askldfjklf alfqwei9rui 'gldgkjnooiuswfs138746453583", '\0'}
+        {"abaabaaa", 'x'},
+        {"zzzyyzyz", (unsigned char)'\x99'},
+        {"abcdef l", '\0'}
 };
 
 INSTANTIATE_TEST_CASE_P(FDR, FDRpp, Combine(ValuesIn(getValidFdrEngines()),
index 7b00ac4c889c04a4436bf3e81ca27011cf891f3e..952fffc199fa2fac8ad3d3639ecbe1bc93df1331 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -161,8 +161,8 @@ TEST_P(FDRFloodp, NoMask) {
         vector<hwlmLiteral> lits;
 
         // build literals of type "aaaa", "aaab", "baaa"
-        // of lengths 1, 2, 4, 8, 16, 32, both case-less and case-sensitive
-        for (int i = 0; i < ; i++) {
+        // of lengths 1, 2, 4, 8, both case-less and case-sensitive
+        for (int i = 0; i < 4; i++) {
             string s(1 << i, c);
             lits.push_back(hwlmLiteral(s, false, i * 8 + 0));
             s[0] = cAlt;
@@ -183,13 +183,13 @@ TEST_P(FDRFloodp, NoMask) {
                                        Grey());
         CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
 
-        map <u32, int> matchesCounts;
+        map<u32, int> matchesCounts;
 
         hwlm_error_t fdrStatus = fdrExec(fdr.get(), &data[0], dataSize,
                     0, countCallback, (void *)&matchesCounts, HWLM_ALL_GROUPS);
         ASSERT_EQ(0, fdrStatus);
 
-        for (u8 i = 0; i < ; i++) {
+        for (u8 i = 0; i < 4; i++) {
             u32 cnt = dataSize - (1 << i) + 1;
             ASSERT_EQ(cnt, matchesCounts[i * 8 + 0]);
             ASSERT_EQ(0, matchesCounts[i * 8 + 1]);
@@ -214,7 +214,7 @@ TEST_P(FDRFloodp, NoMask) {
                     0, countCallback, (void *)&matchesCounts, HWLM_ALL_GROUPS);
         ASSERT_EQ(0, fdrStatus);
 
-        for (u8 i = 0; i < ; i++) {
+        for (u8 i = 0; i < 4; i++) {
             u32 cnt = dataSize - (1 << i) + 1;
             ASSERT_EQ(0, matchesCounts[i * 8 + 0]);
             ASSERT_EQ(i == 0 ? cnt : 0, matchesCounts[i * 8 + 1]);