]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
literal matching: separate path for pure literal patterns
authorHong, Yang A <yang.a.hong@intel.com>
Tue, 8 Jan 2019 22:00:55 +0000 (06:00 +0800)
committerChang, Harry <harry.chang@intel.com>
Mon, 21 Jan 2019 01:59:22 +0000 (09:59 +0800)
17 files changed:
src/fdr/fdr_compile.cpp
src/fdr/fdr_confirm.h
src/fdr/fdr_confirm_compile.cpp
src/fdr/fdr_confirm_runtime.h
src/hwlm/hwlm_literal.cpp
src/hwlm/hwlm_literal.h
src/parser/shortcut_literal.cpp
src/rose/match.c
src/rose/program_runtime.c
src/rose/program_runtime.h
src/rose/rose_build_bytecode.cpp
src/rose/rose_build_impl.h
src/rose/rose_build_matchers.cpp
src/runtime.c
src/scratch.c
src/scratch.h
src/util/ue2string.h

index 5e3c6a4ebeb59bdbc5febc46449726a1cc42608e..65c5020ef5710ee92f04f07414c75d4fc84f2bf2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -807,6 +807,9 @@ void findIncludedLits(vector<hwlmLiteral> &lits,
         for (size_t i = 0; i < cnt; i++) {
             u32 bucket1 = group[i].first;
             u32 id1 = group[i].second;
+            if (lits[id1].pure) {
+                continue;
+            }
             buildSquashMask(lits, id1, bucket1, i + 1, group, parent_map,
                             exception_map);
         }
index d975747e61d95af53e6c6a5d83f0b8a034737e52..9490df43f39ccfdc722c4defebffe5699d1c69e7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -62,6 +62,7 @@ struct LitInfo {
     u8 size;
     u8 flags; //!< bitfield of flags from FDR_LIT_FLAG_* above.
     u8 next;
+    u8 pure; //!< The pass-on of pure flag from hwlmLiteral.
 };
 
 #define FDRC_FLAG_NO_CONFIRM 1
index c75f8d17f60a3627d16de5a7fa8c63ff4c6d06f9..3eab21b2096691e8fcdb42bb1ecabca31bcf441c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -87,6 +87,7 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
         info.flags = flags;
         info.size = verify_u8(max(lit.msk.size(), lit.s.size()));
         info.groups = lit.groups;
+        info.pure = lit.pure;
 
         // these are built up assuming a LE machine
         CONF_TYPE msk = all_ones;
index 067e50e2c4bbabcc7d8f442466c59d4b8dc4eca0..67e0d692ec36b4823ab9b51423ecbb936e9456c1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -65,6 +65,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
     u8 oldNext; // initialized in loop
     do {
         assert(ISALIGNED(li));
+        scratch->pure = li->pure;
 
         if (unlikely((conf_key & li->msk) != li->v)) {
             goto out;
@@ -99,6 +100,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
         li++;
     } while (oldNext);
     scratch->fdr_conf = NULL;
+    scratch->pure = 0;
 }
 
 #endif
index b0968d79bb004effbbfb3d8fe93ea5e0987e819b..b257dfb07fd91fca2ef311c74e9109a65aa47efc 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -83,9 +83,10 @@ bool maskIsConsistent(const std::string &s, bool nocase, const vector<u8> &msk,
  * \ref HWLM_MASKLEN. */
 hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
                          bool noruns_in, u32 id_in, hwlm_group_t groups_in,
-                         const vector<u8> &msk_in, const vector<u8> &cmp_in)
+                         const vector<u8> &msk_in, const vector<u8> &cmp_in,
+                         bool pure_in)
     : s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
-      groups(groups_in), msk(msk_in), cmp(cmp_in) {
+      groups(groups_in), msk(msk_in), cmp(cmp_in), pure(pure_in) {
     assert(s.size() <= HWLM_LITERAL_MAX_LEN);
     assert(msk.size() <= HWLM_MASKLEN);
     assert(msk.size() == cmp.size());
index 08510fb0f487298812d1ab16ad69b3e90a27fa88..72a57f9445fbf89e5edff3b6bf2ebf6df33f1a94 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -113,15 +113,20 @@ struct hwlmLiteral {
      */
     std::vector<u8> cmp;
 
+    bool pure; //!< \brief The pass-on of pure flag from LitFragment.
+
     /** \brief Complete constructor, takes group information and msk/cmp.
      *
      * This constructor takes a msk/cmp pair. Both must be vectors of length <=
      * \ref HWLM_MASKLEN. */
     hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in,
                 u32 id_in, hwlm_group_t groups_in,
-                const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in);
+                const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in,
+                bool pure_in = false);
 
-    /** \brief Simple constructor: no group information, no msk/cmp. */
+    /** \brief Simple constructor: no group information, no msk/cmp.
+     *
+     * This constructor is only used in internal unit test. */
     hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in)
         : hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {}
 };
index 7a7ab6eeae6a8fba7f894b24c27f4c7e0511ea9a..d08bab3c0d7f43ddc8c44e0940f8f05b1629c352 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -185,6 +185,7 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &pe) {
         return false;
     }
 
+    vis.lit.set_pure();
     const ue2_literal &lit = vis.lit;
 
     if (lit.empty()) {
index 8ad58b15f2c3e2202ea2dbad897e36fd50e5857f..192b4709dcf70fb87033aea4a4b59ff34c935674 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -238,7 +238,11 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t,
     assert(id && id < t->size); // id is an offset into bytecode
     const u64a som = 0;
     const u8 flags = 0;
-    return roseRunProgram(t, scratch, id, som, end, flags);
+    if (!scratch->pure) {
+        return roseRunProgram(t, scratch, id, som, end, flags);
+    } else {
+        return roseRunProgram_l(t, scratch, id, som, end, flags);
+    }
 }
 
 static rose_inline
index 3350e167afa3a0fd9843c016cf60c13b62b76802..5a7f786ed81ffb2a53eb5c85e95fe5a731fc323f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -2778,6 +2778,293 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
     return HWLM_CONTINUE_MATCHING;
 }
 
+#define L_PROGRAM_CASE(name)                                                   \
+    case ROSE_INSTR_##name: {                                                  \
+        DEBUG_PRINTF("l_instruction: " #name " (pc=%u)\n",                     \
+                     programOffset + (u32)(pc - pc_base));                     \
+        const struct ROSE_STRUCT_##name *ri =                                  \
+            (const struct ROSE_STRUCT_##name *)pc;
+
+#define L_PROGRAM_NEXT_INSTRUCTION                                             \
+    pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN);                        \
+    break;                                                                     \
+    }
+
+#define L_PROGRAM_NEXT_INSTRUCTION_JUMP continue;
+
+hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
+                             struct hs_scratch *scratch, u32 programOffset,
+                             u64a som, u64a end, u8 prog_flags) {
+    DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset,
+                 som, end, prog_flags);
+
+    assert(programOffset != ROSE_INVALID_PROG_OFFSET);
+    assert(programOffset >= sizeof(struct RoseEngine));
+    assert(programOffset < t->size);
+
+    const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV;
+
+    const char *pc_base = getByOffset(t, programOffset);
+    const char *pc = pc_base;
+
+    struct RoseContext *tctxt = &scratch->tctxt;
+
+    assert(*(const u8 *)pc != ROSE_INSTR_END);
+
+    for (;;) {
+        assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN));
+        assert(pc >= pc_base);
+        assert((size_t)(pc - pc_base) < t->size);
+        const u8 code = *(const u8 *)pc;
+        assert(code <= LAST_ROSE_INSTRUCTION);
+
+        switch ((enum RoseInstructionCode)code) {
+            L_PROGRAM_CASE(END) {
+                DEBUG_PRINTF("finished\n");
+                return HWLM_CONTINUE_MATCHING;
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(CATCH_UP) {
+                if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) {
+                    return HWLM_TERMINATE_MATCHING;
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(SOM_FROM_REPORT) {
+                som = handleSomExternal(scratch, &ri->som, end);
+                DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch,
+                             som);
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(DEDUPE) {
+                updateSeqPoint(tctxt, end, from_mpv);
+                const char do_som = t->hasSom; // TODO: constant propagate
+                const char is_external_report = 1;
+                enum DedupeResult rv =
+                    dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust,
+                                  ri->dkey, ri->offset_adjust,
+                                  is_external_report, ri->quash_som, do_som);
+                switch (rv) {
+                case DEDUPE_HALT:
+                    return HWLM_TERMINATE_MATCHING;
+                case DEDUPE_SKIP:
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    L_PROGRAM_NEXT_INSTRUCTION_JUMP
+                case DEDUPE_CONTINUE:
+                    break;
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(DEDUPE_SOM) {
+                updateSeqPoint(tctxt, end, from_mpv);
+                const char is_external_report = 0;
+                const char do_som = 1;
+                enum DedupeResult rv =
+                    dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust,
+                                  ri->dkey, ri->offset_adjust,
+                                  is_external_report, ri->quash_som, do_som);
+                switch (rv) {
+                case DEDUPE_HALT:
+                    return HWLM_TERMINATE_MATCHING;
+                case DEDUPE_SKIP:
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    L_PROGRAM_NEXT_INSTRUCTION_JUMP
+                case DEDUPE_CONTINUE:
+                    break;
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(REPORT) {
+                updateSeqPoint(tctxt, end, from_mpv);
+                if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
+                               INVALID_EKEY) == HWLM_TERMINATE_MATCHING) {
+                    return HWLM_TERMINATE_MATCHING;
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(REPORT_EXHAUST) {
+                updateSeqPoint(tctxt, end, from_mpv);
+                if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
+                               ri->ekey) == HWLM_TERMINATE_MATCHING) {
+                    return HWLM_TERMINATE_MATCHING;
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(REPORT_SOM) {
+                updateSeqPoint(tctxt, end, from_mpv);
+                if (roseReportSom(t, scratch, som, end, ri->onmatch,
+                                  ri->offset_adjust,
+                                  INVALID_EKEY) == HWLM_TERMINATE_MATCHING) {
+                    return HWLM_TERMINATE_MATCHING;
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(DEDUPE_AND_REPORT) {
+                updateSeqPoint(tctxt, end, from_mpv);
+                const char do_som = t->hasSom; // TODO: constant propagate
+                const char is_external_report = 1;
+                enum DedupeResult rv =
+                    dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust,
+                                  ri->dkey, ri->offset_adjust,
+                                  is_external_report, ri->quash_som, do_som);
+                switch (rv) {
+                case DEDUPE_HALT:
+                    return HWLM_TERMINATE_MATCHING;
+                case DEDUPE_SKIP:
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    L_PROGRAM_NEXT_INSTRUCTION_JUMP
+                case DEDUPE_CONTINUE:
+                    break;
+                }
+
+                const u32 ekey = INVALID_EKEY;
+                if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
+                               ekey) == HWLM_TERMINATE_MATCHING) {
+                    return HWLM_TERMINATE_MATCHING;
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(FINAL_REPORT) {
+                updateSeqPoint(tctxt, end, from_mpv);
+                if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
+                               INVALID_EKEY) == HWLM_TERMINATE_MATCHING) {
+                    return HWLM_TERMINATE_MATCHING;
+                }
+                /* One-shot specialisation: this instruction always terminates
+                 * execution of the program. */
+                return HWLM_CONTINUE_MATCHING;
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(CHECK_EXHAUSTED) {
+                DEBUG_PRINTF("check ekey %u\n", ri->ekey);
+                assert(ri->ekey != INVALID_EKEY);
+                assert(ri->ekey < t->ekeyCount);
+                const char *evec = scratch->core_info.exhaustionVector;
+                if (isExhausted(t, evec, ri->ekey)) {
+                    DEBUG_PRINTF("ekey %u already set, match is exhausted\n",
+                                 ri->ekey);
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    L_PROGRAM_NEXT_INSTRUCTION_JUMP
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(CHECK_LONG_LIT) {
+                const char nocase = 0;
+                if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
+                                          ri->lit_length, nocase)) {
+                    DEBUG_PRINTF("failed long lit check\n");
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    L_PROGRAM_NEXT_INSTRUCTION_JUMP
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) {
+                const char nocase = 1;
+                if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
+                                          ri->lit_length, nocase)) {
+                    DEBUG_PRINTF("failed nocase long lit check\n");
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    L_PROGRAM_NEXT_INSTRUCTION_JUMP
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(CHECK_MED_LIT) {
+                const char nocase = 0;
+                if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset,
+                                            ri->lit_length, nocase)) {
+                    DEBUG_PRINTF("failed lit check\n");
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    L_PROGRAM_NEXT_INSTRUCTION_JUMP
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(CHECK_MED_LIT_NOCASE) {
+                const char nocase = 1;
+                if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset,
+                                            ri->lit_length, nocase)) {
+                    DEBUG_PRINTF("failed long lit check\n");
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    L_PROGRAM_NEXT_INSTRUCTION_JUMP
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(SET_LOGICAL) {
+                DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n",
+                             ri->lkey, ri->offset_adjust);
+                assert(ri->lkey != INVALID_LKEY);
+                assert(ri->lkey < t->lkeyCount);
+                char *lvec = scratch->core_info.logicalVector;
+                setLogicalVal(t, lvec, ri->lkey, 1);
+                updateLastCombMatchOffset(tctxt, end + ri->offset_adjust);
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(SET_COMBINATION) {
+                DEBUG_PRINTF("set ckey %u as active\n", ri->ckey);
+                assert(ri->ckey != INVALID_CKEY);
+                assert(ri->ckey < t->ckeyCount);
+                char *cvec = scratch->core_info.combVector;
+                setCombinationActive(t, cvec, ri->ckey);
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(FLUSH_COMBINATION) {
+                assert(end >= tctxt->lastCombMatchOffset);
+                if (end > tctxt->lastCombMatchOffset) {
+                    if (flushActiveCombinations(t, scratch)
+                            == HWLM_TERMINATE_MATCHING) {
+                        return HWLM_TERMINATE_MATCHING;
+                    }
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            L_PROGRAM_CASE(SET_EXHAUST) {
+                updateSeqPoint(tctxt, end, from_mpv);
+                if (roseSetExhaust(t, scratch, ri->ekey)
+                        == HWLM_TERMINATE_MATCHING) {
+                    return HWLM_TERMINATE_MATCHING;
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
+            default: {
+                assert(0); // unreachable
+            }
+        }
+    }
+
+    assert(0); // unreachable
+    return HWLM_CONTINUE_MATCHING;
+}
+
+#undef L_PROGRAM_CASE
+#undef L_PROGRAM_NEXT_INSTRUCTION
+#undef L_PROGRAM_NEXT_INSTRUCTION_JUMP
+
 #undef PROGRAM_CASE
 #undef PROGRAM_NEXT_INSTRUCTION
 #undef PROGRAM_NEXT_INSTRUCTION_JUMP
index 5b16118eaf37f09c880ee3fb8b4576caa64ed591..50bf202c6f8ef5407adb4c400773085729e518de 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -54,4 +54,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
                            struct hs_scratch *scratch, u32 programOffset,
                            u64a som, u64a end, u8 prog_flags);
 
+hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
+                             struct hs_scratch *scratch, u32 programOffset,
+                             u64a som, u64a end, u8 prog_flags);
+
 #endif // PROGRAM_RUNTIME_H
index 2c0a9b28605637063b41ffa446420b99655eb267..0ef20f2130e20d5d36be0995e93d0d6a5945e441 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -2843,9 +2843,34 @@ vector<LitFragment> groupByFragment(const RoseBuildImpl &build) {
 
         DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id,
                      dumpString(lit.s).c_str());
-        auto &fi = frag_info[getFragment(lit)];
-        fi.lit_ids.push_back(lit_id);
-        fi.groups |= groups;
+
+        /**   0:/xxabcdefgh/      */
+        /**   1:/yyabcdefgh/      */
+        /**   2:/yyabcdefgh.+/    */
+        // Above 3 patterns should firstly convert into RoseLiteralMap with
+        // 2 elements ("xxabcdefgh" and "yyabcdefgh"), then convert into
+        // LitFragment with 1 element ("abcdefgh"). Special care should be
+        // taken to handle the 'pure' flag during the conversion.
+
+        rose_literal_id lit_frag = getFragment(lit);
+        auto it = frag_info.find(lit_frag);
+        if (it != frag_info.end()) {
+            if (!lit_frag.s.get_pure() && it->first.s.get_pure()) {
+                struct FragmentInfo f_info = it->second;
+                f_info.lit_ids.push_back(lit_id);
+                f_info.groups |= groups;
+                frag_info.erase(it->first);
+                frag_info.emplace(lit_frag, f_info);
+            } else {
+                it->second.lit_ids.push_back(lit_id);
+                it->second.groups |= groups;
+            }
+        } else {
+            struct FragmentInfo f_info;
+            f_info.lit_ids.push_back(lit_id);
+            f_info.groups |= groups;
+            frag_info.emplace(lit_frag, f_info);
+        }
     }
 
     for (auto &m : frag_info) {
index 900aee6cd33a726ddc8737504beed8394ec65d3c..fe48da4c08bb63a98502fcb7796da4e7a6a75202 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -340,7 +340,14 @@ public:
     std::pair<u32, bool> insert(const rose_literal_id &lit) {
         auto it = lits_index.find(lit);
         if (it != lits_index.end()) {
-            return {it->second, false};
+            u32 idx = it->second;
+            auto &l = lits.at(idx);
+            if (!lit.s.get_pure() && l.s.get_pure()) {
+                lits_index.erase(l);
+                l.s.unset_pure();
+                lits_index.emplace(l, idx);
+            }
+            return {idx, false};
         }
         u32 id = verify_u32(lits.size());
         lits.push_back(lit);
index 2c302a859acf1aa713fcd3734c3d6a46980d54ef..8c532cabd36e418fd9bcb28e3159b1832baa243c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2017, Intel Corporation
+ * Copyright (c) 2016-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -727,6 +727,7 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp,
 
     const auto &s_final = lit_final.get_string();
     bool nocase = lit_final.any_nocase();
+    bool pure = f.s.get_pure();
 
     DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, cmp=%s\n",
                  f.fragment_id, escapeString(s_final).c_str(), (int)nocase,
@@ -740,7 +741,7 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp,
     const auto &groups = f.groups;
 
     mp.lits.emplace_back(move(s_final), nocase, noruns, f.fragment_id,
-                         groups, msk, cmp);
+                         groups, msk, cmp, pure);
 }
 
 static
index 64a04fd7d3790cafb9b4a900766a36ef334b111f..68f1f8a752fc0d203008bf0792be2b3018f1a2b5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -141,6 +141,7 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose,
     s->deduper.current_report_offset = ~0ULL;
     s->deduper.som_log_dirty = 1; /* som logs have not been cleared */
     s->fdr_conf = NULL;
+    s->pure = 0;
 
     // Rose program execution (used for some report paths) depends on these
     // values being initialised.
index 8e082c772cf42027fd691431d31593fa512b6150..c23b5b3c3ae9e1edddaa1277de5286530e5fef11 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -137,6 +137,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
     s->scratchSize = alloc_size;
     s->scratch_alloc = (char *)s_tmp;
     s->fdr_conf = NULL;
+    s->pure = 0;
 
     // each of these is at an offset from the previous
     char *current = (char *)s + sizeof(*s);
index 59aa02c69fc49520b12979ff74d6ef92e8b1c74c..dab7bab76d7c556f01416781742ee12172d63cb7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -208,6 +208,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
     u64a *fdr_conf; /**< FDR confirm value */
     u8 fdr_conf_offset; /**< offset where FDR/Teddy front end matches
                          * in buffer */
+    u8 pure; /**< indicator of pure-literal or cutting-literal */
 };
 
 /* array of fatbit ptr; TODO: why not an array of fatbits? */
index 0fa76c3a5baf98f7a15be67eb17abb75112691d2..1ce51b2f1f48eafaed41a4e36425e0df51ddfa30 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -211,10 +211,17 @@ public:
 
     size_t hash() const;
 
+    void set_pure() { pure = true; }
+    void unset_pure() { pure = false; }
+    bool get_pure() const { return pure; }
+
+    /* TODO: consider existing member functions possibly related with pure. */
+
 private:
     friend const_iterator;
     std::string s;
     boost::dynamic_bitset<> nocase;
+    bool pure = false; /**< born from cutting or not (pure literal). */
 };
 
 /// Return a reversed copy of this literal.