]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
Logical combination: support EOD match from purely negative case.
authorChang, Harry <harry.chang@intel.com>
Wed, 27 Mar 2019 07:19:14 +0000 (15:19 +0800)
committerChang, Harry <harry.chang@intel.com>
Tue, 13 Aug 2019 06:50:07 +0000 (14:50 +0800)
18 files changed:
src/parser/logical_combination.cpp
src/report.h
src/rose/match.c
src/rose/program_runtime.c
src/rose/rose.h
src/rose/rose_build_bytecode.cpp
src/rose/rose_build_dump.cpp
src/rose/rose_build_instructions.cpp
src/rose/rose_build_instructions.h
src/rose/rose_build_program.cpp
src/rose/rose_build_program.h
src/rose/rose_internal.h
src/rose/rose_program.h
src/runtime.c
tools/hscollider/GraphTruth.cpp
tools/hscollider/GroundTruth.cpp
unit/hyperscan/bad_patterns.txt
unit/hyperscan/logical_combination.cpp

index b78390b07ca79f4e6f190d9e8eb9ed84fdaa5933..49e060c9813f60166585ea795c6f5a9af52b3cc8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, Intel Corporation
+ * Copyright (c) 2018-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -254,44 +254,6 @@ void popOperator(vector<LogicalOperator> &op_stack, vector<u32> &subid_stack,
     op_stack.pop_back();
 }
 
-static
-char getValue(const vector<char> &lv, u32 ckey) {
-    if (ckey & LOGICAL_OP_BIT) {
-        return lv[ckey & ~LOGICAL_OP_BIT];
-    } else {
-        return 0;
-    }
-}
-
-static
-bool hasMatchFromPurelyNegative(const vector<LogicalOp> &tree,
-                                u32 start, u32 result) {
-    vector<char> lv(tree.size());
-    assert(start <= result);
-    for (u32 i = start; i <= result; i++) {
-        assert(i & LOGICAL_OP_BIT);
-        const LogicalOp &op = tree[i & ~LOGICAL_OP_BIT];
-        assert(i == op.id);
-        switch (op.op) {
-        case LOGICAL_OP_NOT:
-            lv[op.id & ~LOGICAL_OP_BIT] = !getValue(lv, op.ro);
-            break;
-        case LOGICAL_OP_AND:
-            lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) &
-                                          getValue(lv, op.ro);
-            break;
-        case LOGICAL_OP_OR:
-            lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) |
-                                          getValue(lv, op.ro);
-            break;
-        default:
-            assert(0);
-            break;
-        }
-    }
-    return lv[result & ~LOGICAL_OP_BIT];
-}
-
 void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical,
                                             u32 ekey, u64a min_offset,
                                             u64a max_offset) {
@@ -366,9 +328,6 @@ void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical,
     if (lkey_start == INVALID_LKEY) {
         throw CompileError("No logical operation.");
     }
-    if (hasMatchFromPurelyNegative(logicalTree, lkey_start, lkey_result)) {
-        throw CompileError("Has match from purely negative sub-expressions.");
-    }
     combinationInfoAdd(ckey, id, ekey, lkey_start, lkey_result,
                        min_offset, max_offset);
 }
index a2e2d0f3d3b01ae54a521676b7dfa26a20f2bbe9..b35f4c052d21dc8723f44b1c237a1c5996c6dbfd 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018, Intel Corporation
+ * Copyright (c) 2016-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -222,6 +222,58 @@ char isLogicalCombination(const struct RoseEngine *rose, char *lvec,
     return getLogicalVal(rose, lvec, result);
 }
 
+/** \brief Returns 1 if combination matches when no sub-expression matches. */
+static really_inline
+char isPurelyNegativeMatch(const struct RoseEngine *rose, char *lvec,
+                           u32 start, u32 result) {
+    const struct LogicalOp *logicalTree = (const struct LogicalOp *)
+        ((const char *)rose + rose->logicalTreeOffset);
+    assert(start >= rose->lkeyCount);
+    assert(start <= result);
+    assert(result < rose->lkeyCount + rose->lopCount);
+    for (u32 i = start; i <= result; i++) {
+        const struct LogicalOp *op = logicalTree + (i - rose->lkeyCount);
+        assert(i == op->id);
+        assert(op->op <= LAST_LOGICAL_OP);
+        switch ((enum LogicalOpType)op->op) {
+        case LOGICAL_OP_NOT:
+            if ((op->ro < rose->lkeyCount) &&
+                getLogicalVal(rose, lvec, op->ro)) {
+                // sub-expression not negative
+                return 0;
+            }
+            setLogicalVal(rose, lvec, op->id,
+                          !getLogicalVal(rose, lvec, op->ro));
+            break;
+        case LOGICAL_OP_AND:
+            if (((op->lo < rose->lkeyCount) &&
+                 getLogicalVal(rose, lvec, op->lo)) ||
+                ((op->ro < rose->lkeyCount) &&
+                 getLogicalVal(rose, lvec, op->ro))) {
+                // sub-expression not negative
+                return 0;
+            }
+            setLogicalVal(rose, lvec, op->id,
+                          getLogicalVal(rose, lvec, op->lo) &
+                          getLogicalVal(rose, lvec, op->ro)); // &&
+            break;
+        case LOGICAL_OP_OR:
+            if (((op->lo < rose->lkeyCount) &&
+                 getLogicalVal(rose, lvec, op->lo)) ||
+                ((op->ro < rose->lkeyCount) &&
+                 getLogicalVal(rose, lvec, op->ro))) {
+                // sub-expression not negative
+                return 0;
+            }
+            setLogicalVal(rose, lvec, op->id,
+                          getLogicalVal(rose, lvec, op->lo) |
+                          getLogicalVal(rose, lvec, op->ro)); // ||
+            break;
+        }
+    }
+    return getLogicalVal(rose, lvec, result);
+}
+
 /** \brief Clear all keys in the logical vector. */
 static really_inline
 void clearLvec(const struct RoseEngine *rose, char *lvec, char *cvec) {
index 192b4709dcf70fb87033aea4a4b59ff34c935674..c91b2a504dc306dff05fc5738d2a6af49a673ffc 100644 (file)
@@ -591,6 +591,23 @@ int roseRunFlushCombProgram(const struct RoseEngine *rose,
     return MO_CONTINUE_MATCHING;
 }
 
+/**
+ * \brief Execute last flush combination program.
+ *
+ * Returns MO_HALT_MATCHING if the stream is exhausted or the user has
+ * instructed us to halt, or MO_CONTINUE_MATCHING otherwise.
+ */
+int roseRunLastFlushCombProgram(const struct RoseEngine *rose,
+                                struct hs_scratch *scratch, u64a end) {
+    hwlmcb_rv_t rv = roseRunProgram(rose, scratch,
+                                    rose->lastFlushCombProgramOffset,
+                                    0, end, 0);
+    if (rv == HWLM_TERMINATE_MATCHING) {
+        return MO_HALT_MATCHING;
+    }
+    return MO_CONTINUE_MATCHING;
+}
+
 int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) {
     struct hs_scratch *scratch = context;
     assert(scratch && scratch->magic == SCRATCH_MAGIC);
index 7f5150e039fa2d88d47de13eb50b371d19825083..4c487062eb2ad688ec0f503abb7f035ecba1d394 100644 (file)
@@ -1875,6 +1875,49 @@ hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t,
     return HWLM_CONTINUE_MATCHING;
 }
 
+static rose_inline
+hwlmcb_rv_t checkPurelyNegatives(const struct RoseEngine *t,
+                                 struct hs_scratch *scratch, u64a end) {
+    for (u32 i = 0; i < t->ckeyCount; i++) {
+        const struct CombInfo *combInfoMap = (const struct CombInfo *)
+            ((const char *)t + t->combInfoMapOffset);
+        const struct CombInfo *ci = combInfoMap + i;
+        if ((ci->min_offset != 0) && (end < ci->min_offset)) {
+            DEBUG_PRINTF("halt: before min_offset=%llu\n", ci->min_offset);
+            continue;
+        }
+        if ((ci->max_offset != MAX_OFFSET) && (end > ci->max_offset)) {
+            DEBUG_PRINTF("halt: after max_offset=%llu\n", ci->max_offset);
+            continue;
+        }
+
+        DEBUG_PRINTF("check ekey %u\n", ci->ekey);
+        if (ci->ekey != INVALID_EKEY) {
+            assert(ci->ekey < t->ekeyCount);
+            const char *evec = scratch->core_info.exhaustionVector;
+            if (isExhausted(t, evec, ci->ekey)) {
+                DEBUG_PRINTF("ekey %u already set, match is exhausted\n",
+                             ci->ekey);
+                continue;
+            }
+        }
+
+        DEBUG_PRINTF("check ckey %u purely negative\n", i);
+        char *lvec = scratch->core_info.logicalVector;
+        if (!isPurelyNegativeMatch(t, lvec, ci->start, ci->result)) {
+            DEBUG_PRINTF("Logical Combination from purely negative Failed!\n");
+            continue;
+        }
+
+        DEBUG_PRINTF("Logical Combination from purely negative Passed!\n");
+        if (roseReport(t, scratch, end, ci->id, 0,
+                       ci->ekey) == HWLM_TERMINATE_MATCHING) {
+            return HWLM_TERMINATE_MATCHING;
+        }
+    }
+    return HWLM_CONTINUE_MATCHING;
+}
+
 #if !defined(_WIN32)
 #define PROGRAM_CASE(name)                                                     \
     case ROSE_INSTR_##name: {                                                  \
@@ -2004,7 +2047,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
         &&LABEL_ROSE_INSTR_SET_LOGICAL,
         &&LABEL_ROSE_INSTR_SET_COMBINATION,
         &&LABEL_ROSE_INSTR_FLUSH_COMBINATION,
-        &&LABEL_ROSE_INSTR_SET_EXHAUST
+        &&LABEL_ROSE_INSTR_SET_EXHAUST,
+        &&LABEL_ROSE_INSTR_LAST_FLUSH_COMBINATION
     };
 #endif
 
@@ -2772,6 +2816,19 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(LAST_FLUSH_COMBINATION) {
+                assert(end >= tctxt->lastCombMatchOffset);
+                if (flushActiveCombinations(t, scratch)
+                        == HWLM_TERMINATE_MATCHING) {
+                    return HWLM_TERMINATE_MATCHING;
+                }
+                if (checkPurelyNegatives(t, scratch, end)
+                        == HWLM_TERMINATE_MATCHING) {
+                    return HWLM_TERMINATE_MATCHING;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             default: {
                 assert(0); // unreachable
                 scratch->core_info.status |= STATUS_ERROR;
@@ -3082,6 +3139,19 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
             }
             L_PROGRAM_NEXT_INSTRUCTION
 
+            L_PROGRAM_CASE(LAST_FLUSH_COMBINATION) {
+                assert(end >= tctxt->lastCombMatchOffset);
+                if (flushActiveCombinations(t, scratch)
+                        == HWLM_TERMINATE_MATCHING) {
+                    return HWLM_TERMINATE_MATCHING;
+                }
+                if (checkPurelyNegatives(t, scratch, end)
+                        == HWLM_TERMINATE_MATCHING) {
+                    return HWLM_TERMINATE_MATCHING;
+                }
+            }
+            L_PROGRAM_NEXT_INSTRUCTION
+
             default: {
                 assert(0); // unreachable
                 scratch->core_info.status |= STATUS_ERROR;
index c2b682f6ba1f47a6fbe1eb9a023ab8e2dceb2301..409b70028fc6d71becbbd8b08b926ee6d68461dc 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -56,4 +56,7 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
 int roseRunFlushCombProgram(const struct RoseEngine *rose,
                             struct hs_scratch *scratch, u64a end);
 
+int roseRunLastFlushCombProgram(const struct RoseEngine *rose,
+                                struct hs_scratch *scratch, u64a end);
+
 #endif // ROSE_H
index 0ef20f2130e20d5d36be0995e93d0d6a5945e441..908d13c136ccdb143561aceb8d691899101d388b 100644 (file)
@@ -3370,6 +3370,15 @@ RoseProgram makeFlushCombProgram(const RoseEngine &t) {
     return program;
 }
 
+static
+RoseProgram makeLastFlushCombProgram(const RoseEngine &t) {
+    RoseProgram program;
+    if (t.ckeyCount) {
+        addLastFlushCombinationProgram(program);
+    }
+    return program;
+}
+
 static
 u32 history_required(const rose_literal_id &key) {
     if (key.msk.size() < key.s.length()) {
@@ -3740,6 +3749,10 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     auto flushComb_prog = makeFlushCombProgram(proto);
     proto.flushCombProgramOffset = writeProgram(bc, move(flushComb_prog));
 
+    auto lastFlushComb_prog = makeLastFlushCombProgram(proto);
+    proto.lastFlushCombProgramOffset =
+        writeProgram(bc, move(lastFlushComb_prog));
+
     // Build anchored matcher.
     auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas);
     if (atable) {
index 2eb7bb51b1947d5fab543521d5cb93b173643281..8999daef25d80b7cf4fbfd982e3d6268bc3802fa 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -1486,6 +1486,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(LAST_FLUSH_COMBINATION) {}
+            PROGRAM_NEXT_INSTRUCTION
+
         default:
             os << "  UNKNOWN (code " << int{code} << ")" << endl;
             os << "  <stopping>" << endl;
@@ -1557,6 +1560,25 @@ void dumpRoseFlushCombPrograms(const RoseEngine *t, const string &filename) {
     os.close();
 }
 
+static
+void dumpRoseLastFlushCombPrograms(const RoseEngine *t,
+                                   const string &filename) {
+    ofstream os(filename);
+    const char *base = (const char *)t;
+
+    if (t->lastFlushCombProgramOffset) {
+        os << "Last Flush Combination Program @ "
+           << t->lastFlushCombProgramOffset
+           << ":" << endl;
+        dumpProgram(os, t, base + t->lastFlushCombProgramOffset);
+        os << endl;
+    } else {
+        os << "<No Last Flush Combination Program>" << endl;
+    }
+
+    os.close();
+}
+
 static
 void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) {
     ofstream os(filename);
@@ -2249,6 +2271,8 @@ void roseDumpPrograms(const vector<LitFragment> &fragments, const RoseEngine *t,
     dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt");
     dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt");
     dumpRoseFlushCombPrograms(t, base + "/rose_flush_comb_programs.txt");
+    dumpRoseLastFlushCombPrograms(t,
+            base + "/rose_last_flush_comb_programs.txt");
     dumpRoseReportPrograms(t, base + "/rose_report_programs.txt");
     dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt");
     dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt");
index 2fe53455908b579e9b8ad4d78b4e04741d1c323e..c503f7311a5e6b63d7fca3bd313ca1bb7d47a709 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, Intel Corporation
+ * Copyright (c) 2017-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -48,6 +48,7 @@ RoseInstrMatcherEod::~RoseInstrMatcherEod() = default;
 RoseInstrEnd::~RoseInstrEnd() = default;
 RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default;
 RoseInstrFlushCombination::~RoseInstrFlushCombination() = default;
+RoseInstrLastFlushCombination::~RoseInstrLastFlushCombination() = default;
 
 using OffsetMap = RoseInstruction::OffsetMap;
 
index 61e6d7a655f30aaa8e013581d288ceaafa9c83db..306a4166cb0e02ab3cc16eff22e63c04443051f8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, Intel Corporation
+ * Copyright (c) 2017-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -2206,6 +2206,14 @@ public:
     ~RoseInstrFlushCombination() override;
 };
 
+class RoseInstrLastFlushCombination
+    : public RoseInstrBaseTrivial<ROSE_INSTR_LAST_FLUSH_COMBINATION,
+                                  ROSE_STRUCT_LAST_FLUSH_COMBINATION,
+                                  RoseInstrLastFlushCombination> {
+public:
+    ~RoseInstrLastFlushCombination() override;
+};
+
 class RoseInstrSetExhaust
     : public RoseInstrBaseNoTargets<ROSE_INSTR_SET_EXHAUST,
                                     ROSE_STRUCT_SET_EXHAUST,
index 2a6581e9771a37e40a1166e5cbfdffbdf15e4921..501932c5ca9001206dc24515608d3fe5bdc1635b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018, Intel Corporation
+ * Copyright (c) 2016-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -317,6 +317,10 @@ void addFlushCombinationProgram(RoseProgram &program) {
     program.add_before_end(make_unique<RoseInstrFlushCombination>());
 }
 
+void addLastFlushCombinationProgram(RoseProgram &program) {
+    program.add_before_end(make_unique<RoseInstrLastFlushCombination>());
+}
+
 static
 void makeRoleCheckLeftfix(const RoseBuildImpl &build,
                           const map<RoseVertex, left_build_info> &leftfix_info,
index 8c8c37ed942127768495a6c3523c489c71d0d294..7d781f3191edc83405723d23b900aadd1374f07d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018, Intel Corporation
+ * Copyright (c) 2016-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -188,6 +188,7 @@ void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program);
 void addSuffixesEodProgram(RoseProgram &program);
 void addMatcherEodProgram(RoseProgram &program);
 void addFlushCombinationProgram(RoseProgram &program);
+void addLastFlushCombinationProgram(RoseProgram &program);
 
 static constexpr u32 INVALID_QUEUE = ~0U;
 
index 386b035ca2e0f8cff3047a9540e672b851809ef0..ff24a9cc81b641d4593efa3a0a92574d23ac3676 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -426,6 +426,8 @@ struct RoseEngine {
 
     u32 eodProgramOffset; //!< EOD program, otherwise 0.
     u32 flushCombProgramOffset; /**< FlushCombination program, otherwise 0 */
+    u32 lastFlushCombProgramOffset; /**< LastFlushCombination program,
+                                     * otherwise 0 */
 
     u32 lastByteHistoryIterOffset; // if non-zero
 
index 7feee04fe6a0c8c8e459cf98177eb8a337589e0d..e5485476b7fb0f9d2071bad6197a8e39cb006a99 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -201,7 +201,14 @@ enum RoseInstructionCode {
     /** \brief Mark as exhausted instead of report while quiet. */
     ROSE_INSTR_SET_EXHAUST,
 
-    LAST_ROSE_INSTRUCTION = ROSE_INSTR_SET_EXHAUST //!< Sentinel.
+    /**
+     * \brief Calculate any combination's logical value if none of its
+     * sub-expression matches until EOD, then check if compliant with any
+     * logical constraints.
+     */
+    ROSE_INSTR_LAST_FLUSH_COMBINATION,
+
+    LAST_ROSE_INSTRUCTION = ROSE_INSTR_LAST_FLUSH_COMBINATION //!< Sentinel.
 };
 
 struct ROSE_STRUCT_END {
@@ -674,4 +681,8 @@ struct ROSE_STRUCT_SET_EXHAUST {
     u8 code; //!< From enum RoseInstructionCode.
     u32 ekey; //!< Exhaustion key.
 };
+
+struct ROSE_STRUCT_LAST_FLUSH_COMBINATION {
+    u8 code; //!< From enum RoseInstructionCode.
+};
 #endif // ROSE_ROSE_PROGRAM_H
index cfcd0f7c88b1729b675d73ab19f408836a0ca4b6..43cdab094d9008e68800632601f0c698bf2a778f 100644 (file)
@@ -455,8 +455,9 @@ set_retval:
         return HS_UNKNOWN_ERROR;
     }
 
-    if (rose->flushCombProgramOffset) {
-        if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
+    if (rose->lastFlushCombProgramOffset) {
+        if (roseRunLastFlushCombProgram(rose, scratch, length)
+            == MO_HALT_MATCHING) {
             if (unlikely(internal_matching_error(scratch))) {
                 unmarkScratchInUse(scratch);
                 return HS_UNKNOWN_ERROR;
@@ -698,8 +699,9 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
         }
     }
 
-    if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
-        if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
+    if (rose->lastFlushCombProgramOffset && !told_to_stop_matching(scratch)) {
+        if (roseRunLastFlushCombProgram(rose, scratch, id->offset)
+            == MO_HALT_MATCHING) {
             DEBUG_PRINTF("told to stop matching\n");
             scratch->core_info.status |= STATUS_TERMINATED;
         }
@@ -1000,31 +1002,22 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
 
     if (onEvent) {
         if (!scratch || !validScratch(id->rose, scratch)) {
+            hs_stream_free(id);
             return HS_INVALID;
         }
         if (unlikely(markScratchInUse(scratch))) {
+            hs_stream_free(id);
             return HS_SCRATCH_IN_USE;
         }
         report_eod_matches(id, scratch, onEvent, context);
         if (unlikely(internal_matching_error(scratch))) {
             unmarkScratchInUse(scratch);
+            hs_stream_free(id);
             return HS_UNKNOWN_ERROR;
         }
         unmarkScratchInUse(scratch);
     }
 
-    if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
-        if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL)
-            == MO_HALT_MATCHING) {
-            scratch->core_info.status |= STATUS_TERMINATED;
-            if (unlikely(internal_matching_error(scratch))) {
-                unmarkScratchInUse(scratch);
-                return HS_UNKNOWN_ERROR;
-            }
-            unmarkScratchInUse(scratch);
-        }
-    }
-
     hs_stream_free(id);
 
     return HS_SUCCESS;
@@ -1054,18 +1047,6 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags,
         unmarkScratchInUse(scratch);
     }
 
-    if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
-        if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL)
-            == MO_HALT_MATCHING) {
-            scratch->core_info.status |= STATUS_TERMINATED;
-            if (unlikely(internal_matching_error(scratch))) {
-                unmarkScratchInUse(scratch);
-                return HS_UNKNOWN_ERROR;
-            }
-            unmarkScratchInUse(scratch);
-        }
-    }
-
     // history already initialised
     init_stream(id, id->rose, 0);
 
index b4b3f809bde4e1fdab03e27d8a4ce099169f31f6..0b67b11c5fb9b7f874b2cd0178a88dd893c70ee0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -299,6 +299,46 @@ char isLogicalCombination(vector<char> &lv, const vector<LogicalOp> &comb,
     return lv[result];
 }
 
+/** \brief Returns 1 if combination matches when no sub-expression matches. */
+static
+char isPurelyNegativeMatch(vector<char> &lv, const vector<LogicalOp> &comb,
+                           size_t lkeyCount, unsigned start, unsigned result) {
+    assert(start <= result);
+    for (unsigned i = start; i <= result; i++) {
+        const LogicalOp &op = comb[i - lkeyCount];
+        assert(i == op.id);
+        switch (op.op) {
+        case LOGICAL_OP_NOT:
+            if ((op.ro < lkeyCount) && lv[op.ro]) {
+                // sub-expression not negative
+                return 0;
+            }
+            lv[op.id] = !lv[op.ro];
+            break;
+        case LOGICAL_OP_AND:
+            if (((op.lo < lkeyCount) && lv[op.lo]) ||
+                ((op.ro < lkeyCount) && lv[op.ro])) {
+                // sub-expression not negative
+                return 0;
+            }
+            lv[op.id] = lv[op.lo] & lv[op.ro]; // &&
+            break;
+        case LOGICAL_OP_OR:
+            if (((op.lo < lkeyCount) && lv[op.lo]) ||
+                ((op.ro < lkeyCount) && lv[op.ro])) {
+                // sub-expression not negative
+                return 0;
+            }
+            lv[op.id] = lv[op.lo] | lv[op.ro]; // ||
+            break;
+        default:
+            assert(0);
+            break;
+        }
+    }
+    return lv[result];
+}
+
 bool GraphTruth::run(unsigned, const CompiledNG &cng, const CNGInfo &cngi,
                      const string &buffer, ResultSet &rs, string &error) {
     if (cngi.quiet) {
@@ -359,6 +399,13 @@ bool GraphTruth::run(unsigned, const CompiledNG &cng, const CNGInfo &cngi,
                 }
             }
         }
+        if (isPurelyNegativeMatch(lv, comb, m_lkey.size(),
+                                  li.start, li.result)) {
+            u64a to = buffer.length();
+            if ((to >= cngi.min_offset) && (to <= cngi.max_offset)) {
+                rs.addMatch(0, to);
+            }
+        }
         return true;
     }
 
index fe038c818c266f4f38fceec39574fa1e07cb8896..5a4bdc004a46b66501ce833f535d9c90b32c1795 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -557,6 +557,46 @@ char isLogicalCombination(vector<char> &lv, const vector<LogicalOp> &comb,
     return lv[result];
 }
 
+/** \brief Returns 1 if combination matches when no sub-expression matches. */
+static
+char isPurelyNegativeMatch(vector<char> &lv, const vector<LogicalOp> &comb,
+                           size_t lkeyCount, unsigned start, unsigned result) {
+    assert(start <= result);
+    for (unsigned i = start; i <= result; i++) {
+        const LogicalOp &op = comb[i - lkeyCount];
+        assert(i == op.id);
+        switch (op.op) {
+        case LOGICAL_OP_NOT:
+            if ((op.ro < lkeyCount) && lv[op.ro]) {
+                // sub-expression not negative
+                return 0;
+            }
+            lv[op.id] = !lv[op.ro];
+            break;
+        case LOGICAL_OP_AND:
+            if (((op.lo < lkeyCount) && lv[op.lo]) ||
+                ((op.ro < lkeyCount) && lv[op.ro])) {
+                // sub-expression not negative
+                return 0;
+            }
+            lv[op.id] = lv[op.lo] & lv[op.ro]; // &&
+            break;
+        case LOGICAL_OP_OR:
+            if (((op.lo < lkeyCount) && lv[op.lo]) ||
+                ((op.ro < lkeyCount) && lv[op.ro])) {
+                // sub-expression not negative
+                return 0;
+            }
+            lv[op.id] = lv[op.lo] | lv[op.ro]; // ||
+            break;
+        default:
+            assert(0);
+            break;
+        }
+    }
+    return lv[result];
+}
+
 bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
                       const string &buffer, ResultSet &rs, string &error) {
     if (compiled.quiet) {
@@ -616,6 +656,13 @@ bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
                 }
             }
         }
+        if (isPurelyNegativeMatch(lv, comb, m_lkey.size(),
+                                  li.start, li.result)) {
+            u64a to = buffer.length();
+            if ((to >= compiled.min_offset) && (to <= compiled.max_offset)) {
+                rs.addMatch(0, to);
+            }
+        }
         return true;
     }
 
index 6d4283dac6ed3f039d2d3454379dfe0899ffeeef..c4a9f13c4afbea3deb5aaf78ed817b2e721af0ea 100644 (file)
 158:/141 & (142|!143) )| 144/C #Not enough left parentheses at index 17.
 159:/1234567890 & (142|!143 )/C #Expression id too large at index 10.
 160:/141 & (142|!143 )|/C #Not enough operand at index 18.
-161:/!141/C #Has match from purely negative sub-expressions.
-162:/!141 | 142 | 143/C #Has match from purely negative sub-expressions.
-163:/!141 & !142 & !143/C #Has match from purely negative sub-expressions.
-164:/(141 | !142 & !143)/C #Has match from purely negative sub-expressions.
-165:/!(141 | 142 | 143)/C #Has match from purely negative sub-expressions.
-166:/141/C #No logical operation.
-167:/119 & 121/C #Unknown sub-expression id.
-168:/166 & 167/C #Unknown sub-expression id.
+161:/141/C #No logical operation.
+162:/119 & 121/C #Unknown sub-expression id.
+163:/166 & 167/C #Unknown sub-expression id.
index 169de333be90d4c3b64af3d11aa336833b743d3e..5b1c1ec2843cb109999c97005cc42e3d9b10917e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, Intel Corporation
+ * Copyright (c) 2018-2019, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -694,3 +694,113 @@ TEST(LogicalCombination, MultiCombQuietUniSub5) {
     err = hs_free_scratch(scratch);
     ASSERT_EQ(HS_SUCCESS, err);
 }
+
+TEST(LogicalCombination, SingleCombPurelyNegative6) {
+    hs_database_t *db = nullptr;
+    hs_compile_error_t *compile_err = nullptr;
+    CallBackContext c;
+    string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
+    const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+                          "ijkl[mMn]", "(!201 | 202 & 203) & (!204 | 205)"};
+    unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION};
+    unsigned ids[] = {201, 202, 203, 204, 205, 1002};
+    hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM,
+                                      nullptr, &db, &compile_err);
+
+    ASSERT_EQ(HS_SUCCESS, err);
+    ASSERT_TRUE(db != nullptr);
+
+    hs_scratch_t *scratch = nullptr;
+    err = hs_alloc_scratch(db, &scratch);
+    ASSERT_EQ(HS_SUCCESS, err);
+    ASSERT_TRUE(scratch != nullptr);
+
+    c.halt = 0;
+    err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+                  (void *)&c);
+    ASSERT_EQ(HS_SUCCESS, err);
+    ASSERT_EQ(1U, c.matches.size());
+    ASSERT_EQ(MatchRecord(53, 1002), c.matches[0]);
+
+    hs_free_database(db);
+    err = hs_free_scratch(scratch);
+    ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(LogicalCombination, SingleCombQuietPurelyNegative6) {
+    hs_database_t *db = nullptr;
+    hs_compile_error_t *compile_err = nullptr;
+    CallBackContext c;
+    string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
+    const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+                          "ijkl[mMn]", "(!201 | 202 & 203) & (!204 | 205)"};
+    unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION | HS_FLAG_QUIET};
+    unsigned ids[] = {201, 202, 203, 204, 205, 1002};
+    hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM,
+                                      nullptr, &db, &compile_err);
+
+    ASSERT_EQ(HS_SUCCESS, err);
+    ASSERT_TRUE(db != nullptr);
+
+    hs_scratch_t *scratch = nullptr;
+    err = hs_alloc_scratch(db, &scratch);
+    ASSERT_EQ(HS_SUCCESS, err);
+    ASSERT_TRUE(scratch != nullptr);
+
+    c.halt = 0;
+    err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+                  (void *)&c);
+    ASSERT_EQ(HS_SUCCESS, err);
+    ASSERT_EQ(0U, c.matches.size());
+
+    hs_free_database(db);
+    err = hs_free_scratch(scratch);
+    ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(LogicalCombination, MultiCombPurelyNegativeUniSub6) {
+    hs_database_t *db = nullptr;
+    hs_compile_error_t *compile_err = nullptr;
+    CallBackContext c;
+    string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+                  "-----------------------------------------------"
+                  "xxxfedxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+                  "-----------------------------------------------"
+                  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+                  "------------------------------------------";
+    const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+                          "ijkl[mMn]", "cba", "fed", "google.*cn",
+                          "haystacks{4,8}", "ijkl[oOp]", "cab", "fee",
+                          "goobar.*jp", "shockwave{4,6}", "ijkl[rRs]",
+                          "(101 & 102 & 103) | (!104 & !105)",
+                          "(!201 | 202 & 203) & (!204 | 205)",
+                          "((301 | 302) & 303) & (304 | 305)"};
+    unsigned flags[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                        HS_FLAG_COMBINATION, HS_FLAG_COMBINATION,
+                        HS_FLAG_COMBINATION};
+    unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301,
+                      302, 303, 304, 305, 1001, 1002, 1003};
+    hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM,
+                                      nullptr, &db, &compile_err);
+
+    ASSERT_EQ(HS_SUCCESS, err);
+    ASSERT_TRUE(db != nullptr);
+
+    hs_scratch_t *scratch = nullptr;
+    err = hs_alloc_scratch(db, &scratch);
+    ASSERT_EQ(HS_SUCCESS, err);
+    ASSERT_TRUE(scratch != nullptr);
+
+    c.halt = 0;
+    err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+                  (void *)&c);
+    ASSERT_EQ(HS_SUCCESS, err);
+    ASSERT_EQ(3U, c.matches.size());
+    ASSERT_EQ(MatchRecord(106, 202), c.matches[0]);
+    ASSERT_EQ(MatchRecord(106, 1002), c.matches[1]);
+    ASSERT_EQ(MatchRecord(300, 1001), c.matches[2]);
+
+    hs_free_database(db);
+    err = hs_free_scratch(scratch);
+    ASSERT_EQ(HS_SUCCESS, err);
+}