src/parser/check_refs.h
src/parser/control_verbs.cpp
src/parser/control_verbs.h
+ src/parser/logical_combination.cpp
+ src/parser/logical_combination.h
src/parser/parse_error.cpp
src/parser/parse_error.h
src/parser/parser_util.cpp
Approximate matching is always disabled by default, and can be enabled on a
per-pattern basis by using an extended parameter described in :ref:`extparam`.
+
+.. _logical_combinations:
+
+********************
+Logical Combinations
+********************
+
+For situations when a user requires behaviour that depends on the presence or
+absence of matches from groups of patterns, Hyperscan provides support for the
+logical combination of patterns in a given pattern set, with three operators:
+``NOT``, ``AND`` and ``OR``.
+
+The logical value of such a combination is based on each expression's matching
+status at a given offset. The matching status of any expression has a boolean
+value: *false* if the expression has not yet matched or *true* if the expression
+has already matched. In particular, the value of a ``NOT`` operation at a given
+offset is *true* if the expression it refers to is *false* at this offset.
+
+For example, ``NOT 101`` means that expression 101 has not yet matched at this
+offset.
+
+A logical combination is passed to Hyperscan at compile time as an expression.
+This combination expression will raise matches at every offset where one of its
+sub-expressions matches and the logical value of the whole expression is *true*.
+
+To illustrate, here is an example combination expression: ::
+
+ ((301 OR 302) AND 303) AND (304 OR NOT 305)
+
+If expression 301 matches at offset 10, the logical value of 301 is *true*
+while the other patterns' values are *false*. Hence, the whole combination's value is
+*false*.
+
+Then expression 303 matches at offset 20. Now the values of 301 and 303 are
+*true* while the other patterns' values are still *false*. In this case, the
+combination's value is *true*, so the combination expression raises a match at
+offset 20.
+
+Finally, expression 305 has matches at offset 30. Now the values of 301, 303 and 305
+are *true* while the other patterns' values are still *false*. In this case, the
+combination's value is *false* and no match is raised.
+
+**Using Logical Combinations**
+
+In logical combination syntax, an expression is written as infix notation, it
+consists of operands, operators and parentheses. The operands are expression
+IDs, and operators are ``!`` (NOT), ``&`` (AND) or ``|`` (OR). For example, the
+combination described in the previous section would be written as: ::
+
+ ((301 | 302) & 303) & (304 | !305)
+
+In a logical combination expression:
+
+ * The priority of operators are ``!`` > ``&`` > ``|``. For example:
+ - ``A&B|C`` is treated as ``(A&B)|C``,
+ - ``A|B&C`` is treated as ``A|(B&C)``,
+ - ``A&!B`` is treated as ``A&(!B)``.
+ * Extra parentheses are allowed. For example:
+ - ``(A)&!(B)`` is the same as ``A&!B``,
+ - ``(A&B)|C`` is the same as ``A&B|C``.
+ * Whitespace is ignored.
+
+To use a logical combination expression, it must be passed to one of the
+Hyperscan compile functions (:c:func:`hs_compile_multi`,
+:c:func:`hs_compile_ext_multi`) along with the :c:member:`HS_FLAG_COMBINATION` flag,
+which identifies the pattern as a logical combination expression. The patterns
+referred to in the logical combination expression must be compiled together in
+the same pattern set as the combination expression.
+
+When an expression has the :c:member:`HS_FLAG_COMBINATION` flag set, it ignores
+all other flags except the :c:member:`HS_FLAG_SINGLEMATCH` flag and the
+:c:member:`HS_FLAG_QUIET` flag.
+
+Hyperscan will reject logical combination expressions at compile time that
+evaluate to *true* when no patterns have matched; for example: ::
+
+ !101
+ !101|102
+ !101&!102
+ !(101&102)
+
+Patterns that are referred to as operands within a logical combination (for
+example, 301 through 305 in the examples above) may also use the
+:c:member:`HS_FLAG_QUIET` flag to silence the reporting of individual matches
+for those patterns. In the absence of this flag, all matches (for
+both individual patterns and their logical combinations) will be reported.
+
+When an expression has both the :c:member:`HS_FLAG_COMBINATION` flag and the
+:c:member:`HS_FLAG_QUIET` flag set, no matches for this logical combination
+will be reported.
``W`` :c:member:`HS_FLAG_UCP` Unicode property support
``P`` :c:member:`HS_FLAG_PREFILTER` Prefiltering mode
``L`` :c:member:`HS_FLAG_SOM_LEFTMOST` Leftmost start of match reporting
+``C`` :c:member:`HS_FLAG_COMBINATION` Logical combination of patterns
+``Q`` :c:member:`HS_FLAG_QUIET` Quiet at matching
========= ================================= ===========
In addition to the set of flags above, :ref:`extparam` can be supplied
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "parser/buildstate.h"
#include "parser/dump.h"
#include "parser/Component.h"
+#include "parser/logical_combination.h"
#include "parser/parse_error.h"
#include "parser/Parser.h" // for flags
#include "parser/position.h"
const hs_expr_ext *ext)
: expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH,
false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET,
- 0, 0, 0) {
+ 0, 0, 0, flags & HS_FLAG_QUIET) {
+ // We disallow SOM + Quiet.
+ if ((flags & HS_FLAG_QUIET) && (flags & HS_FLAG_SOM_LEFTMOST)) {
+ throw CompileError("HS_FLAG_QUIET is not supported in "
+ "combination with HS_FLAG_SOM_LEFTMOST.");
+ }
+ flags &= ~HS_FLAG_QUIET;
ParseMode mode(flags);
component = parse(expression, mode);
DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s'\n", index, id, flags,
expression);
+ if (flags & HS_FLAG_COMBINATION) {
+ if (flags & ~(HS_FLAG_COMBINATION | HS_FLAG_QUIET |
+ HS_FLAG_SINGLEMATCH)) {
+ throw CompileError("only HS_FLAG_QUIET and HS_FLAG_SINGLEMATCH "
+ "are supported in combination "
+ "with HS_FLAG_COMBINATION.");
+ }
+ if (flags & HS_FLAG_QUIET) {
+ DEBUG_PRINTF("skip QUIET logical combination expression %u\n", id);
+ } else {
+ u32 ekey = INVALID_EKEY;
+ u64a min_offset = 0;
+ u64a max_offset = MAX_OFFSET;
+ if (flags & HS_FLAG_SINGLEMATCH) {
+ ekey = ng.rm.getExhaustibleKey(id);
+ }
+ if (ext) {
+ validateExt(*ext);
+ if (ext->flags & ~(HS_EXT_FLAG_MIN_OFFSET |
+ HS_EXT_FLAG_MAX_OFFSET)) {
+ throw CompileError("only HS_EXT_FLAG_MIN_OFFSET and "
+ "HS_EXT_FLAG_MAX_OFFSET extra flags "
+ "are supported in combination "
+ "with HS_FLAG_COMBINATION.");
+ }
+ if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
+ min_offset = ext->min_offset;
+ }
+ if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
+ max_offset = ext->max_offset;
+ }
+ }
+ ng.rm.pl.parseLogicalCombination(id, expression, ekey, min_offset,
+ max_offset);
+ DEBUG_PRINTF("parsed logical combination expression %u\n", id);
+ }
+ return;
+ }
+
// Ensure that our pattern isn't too long (in characters).
if (strlen(expression) > cc.grey.limitPatternLength) {
throw CompileError("Pattern length exceeds limit.");
/*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
bool highlander_in, bool utf8_in, bool prefilter_in,
som_type som_in, ReportID report_in, u64a min_offset_in,
u64a max_offset_in, u64a min_length_in, u32 edit_distance_in,
- u32 hamm_distance_in)
+ u32 hamm_distance_in, bool quiet_in)
: index(index_in), report(report_in), allow_vacuous(allow_vacuous_in),
highlander(highlander_in), utf8(utf8_in), prefilter(prefilter_in),
som(som_in), min_offset(min_offset_in), max_offset(max_offset_in),
min_length(min_length_in), edit_distance(edit_distance_in),
- hamm_distance(hamm_distance_in) {}
+ hamm_distance(hamm_distance_in), quiet(quiet_in) {}
/**
* \brief Index of the expression represented by this graph.
*/
u32 edit_distance;
u32 hamm_distance;
+
+ /** \brief Quiet on match. */
+ bool quiet;
};
}
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
}
}
+ // Check sub-expression ids
+ ng.rm.pl.validateSubIDs(ids, expressions, flags, elements);
+ // Renumber and assign lkey to reports
+ ng.rm.logicalKeyRenumber();
+
unsigned length = 0;
struct hs_database *out = build(ng, &length);
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*/
#define HS_FLAG_SOM_LEFTMOST 256
+/**
+ * Compile flag: Logical combination.
+ *
+ * This flag instructs Hyperscan to parse this expression as logical
+ * combination syntax.
+ * Logical constraints consist of operands, operators and parentheses.
+ * The operands are expression indices, and operators can be
+ * '!'(NOT), '&'(AND) or '|'(OR).
+ * For example:
+ * (101&102&103)|(104&!105)
+ * ((301|302)&303)&(304|305)
+ */
+#define HS_FLAG_COMBINATION 512
+
+/**
+ * Compile flag: Don't do any match reporting.
+ *
+ * This flag instructs Hyperscan to ignore match reporting for this expression.
+ * It is designed to be used on the sub-expressions in logical combinations.
+ */
+#define HS_FLAG_QUIET 1024
+
/** @} */
/**
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
}
bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
- u32 external_report, bool highlander, som_type som) {
+ u32 external_report, bool highlander, som_type som,
+ bool quiet) {
assert(!literal.empty());
if (!cc.grey.shortcutLiterals) {
} else {
u32 ekey = highlander ? rm.getExhaustibleKey(external_report)
: INVALID_EKEY;
- Report r = makeECallback(external_report, 0, ekey);
+ Report r = makeECallback(external_report, 0, ekey, quiet);
id = rm.getInternalId(r);
}
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
/** \brief Adds a literal to Rose, used by literal shortcut passes (instead
* of using \ref addGraph) */
bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report,
- bool highlander, som_type som);
+ bool highlander, som_type som, bool quiet);
/** \brief Maximum history in bytes available for use by SOM reverse NFAs,
* a hack for pattern support (see UE-1903). This is always set to the max
--- /dev/null
+/*
+ * Copyright (c) 2018, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Parse and build ParsedLogical::logicalTree and combInfoMap.
+ */
+#include "logical_combination.h"
+#include "parser/parse_error.h"
+#include "util/container.h"
+#include "hs_compile.h"
+
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+u32 ParsedLogical::getLogicalKey(u32 a) {
+ auto it = toLogicalKeyMap.find(a);
+ if (it == toLogicalKeyMap.end()) {
+ // get size before assigning to avoid wacky LHS shenanigans
+ u32 size = toLogicalKeyMap.size();
+ bool inserted;
+ tie(it, inserted) = toLogicalKeyMap.emplace(a, size);
+ assert(inserted);
+ }
+ DEBUG_PRINTF("%u -> lkey %u\n", it->first, it->second);
+ return it->second;
+}
+
+u32 ParsedLogical::getCombKey(u32 a) {
+ auto it = toCombKeyMap.find(a);
+ if (it == toCombKeyMap.end()) {
+ u32 size = toCombKeyMap.size();
+ bool inserted;
+ tie(it, inserted) = toCombKeyMap.emplace(a, size);
+ assert(inserted);
+ }
+ DEBUG_PRINTF("%u -> ckey %u\n", it->first, it->second);
+ return it->second;
+}
+
+void ParsedLogical::addRelateCKey(u32 lkey, u32 ckey) {
+ auto it = lkey2ckeys.find(lkey);
+ if (it == lkey2ckeys.end()) {
+ bool inserted;
+ tie(it, inserted) = lkey2ckeys.emplace(lkey, set<u32>());
+ assert(inserted);
+ }
+ it->second.insert(ckey);
+ DEBUG_PRINTF("lkey %u belongs to combination key %u\n",
+ it->first, ckey);
+}
+
+#define TRY_RENUM_OP(ckey) \
+do { \
+ if (ckey & LOGICAL_OP_BIT) { \
+ ckey = (ckey & ~LOGICAL_OP_BIT) + toLogicalKeyMap.size(); \
+ } \
+} while(0)
+
+u32 ParsedLogical::logicalTreeAdd(u32 op, u32 left, u32 right) {
+ LogicalOp lop;
+ assert((LOGICAL_OP_BIT & (u32)logicalTree.size()) == 0);
+ lop.id = LOGICAL_OP_BIT | (u32)logicalTree.size();
+ lop.op = op;
+ lop.lo = left;
+ lop.ro = right;
+ logicalTree.push_back(lop);
+ return lop.id;
+}
+
+void ParsedLogical::combinationInfoAdd(UNUSED u32 ckey, u32 id, u32 ekey,
+ u32 lkey_start, u32 lkey_result,
+ u64a min_offset, u64a max_offset) {
+ assert(ckey == combInfoMap.size());
+ CombInfo ci;
+ ci.id = id;
+ ci.ekey = ekey;
+ ci.start = lkey_start;
+ ci.result = lkey_result;
+ ci.min_offset = min_offset;
+ ci.max_offset = max_offset;
+ combInfoMap.push_back(ci);
+
+ DEBUG_PRINTF("ckey %u (id %u) -> lkey %u..%u, ekey=0x%x\n", ckey, ci.id,
+ ci.start, ci.result, ci.ekey);
+}
+
+void ParsedLogical::validateSubIDs(const unsigned *ids,
+ const char *const *expressions,
+ const unsigned *flags,
+ unsigned elements) {
+ for (const auto &it : toLogicalKeyMap) {
+ bool unknown = true;
+ u32 i = 0;
+ for (i = 0; i < elements; i++) {
+ if ((ids ? ids[i] : 0) == it.first) {
+ unknown = false;
+ break;
+ }
+ }
+ if (unknown) {
+ throw CompileError("Unknown sub-expression id.");
+ }
+ if (contains(toCombKeyMap, it.first)) {
+ throw CompileError("Have combination of combination.");
+ }
+ if (flags && (flags[i] & HS_FLAG_SOM_LEFTMOST)) {
+ throw CompileError("Have SOM flag in sub-expression.");
+ }
+ if (flags && (flags[i] & HS_FLAG_PREFILTER)) {
+ throw CompileError("Have PREFILTER flag in sub-expression.");
+ }
+ hs_compile_error_t *compile_err = NULL;
+ hs_expr_info_t *info = NULL;
+ hs_error_t err = hs_expression_info(expressions[i], flags[i], &info,
+ &compile_err);
+ if (err != HS_SUCCESS) {
+ hs_free_compile_error(compile_err);
+ throw CompileError("Run hs_expression_info() failed.");
+ }
+ if (!info) {
+ throw CompileError("Get hs_expr_info_t failed.");
+ } else {
+ if (info->unordered_matches) {
+ throw CompileError("Have unordered match in sub-expressions.");
+ }
+ free(info);
+ }
+ }
+}
+
+void ParsedLogical::logicalKeyRenumber() {
+ // renumber operation lkey in op vector
+ for (auto &op : logicalTree) {
+ TRY_RENUM_OP(op.id);
+ TRY_RENUM_OP(op.lo);
+ TRY_RENUM_OP(op.ro);
+ }
+ // renumber operation lkey in info map
+ for (auto &ci : combInfoMap) {
+ TRY_RENUM_OP(ci.start);
+ TRY_RENUM_OP(ci.result);
+ }
+}
+
+struct LogicalOperator {
+ LogicalOperator(u32 op_in, u32 paren_in)
+ : op(op_in), paren(paren_in) {}
+ u32 op;
+ u32 paren;
+};
+
+static
+u32 toOperator(char c) {
+ u32 op = UNKNOWN_OP;
+ switch (c) {
+ case '!' :
+ op = LOGICAL_OP_NOT;
+ break;
+ case '&' :
+ op = LOGICAL_OP_AND;
+ break;
+ case '|' :
+ op = LOGICAL_OP_OR;
+ break;
+ default:
+ break;
+ };
+ return op;
+}
+
+static
+bool cmpOperator(const LogicalOperator &op1, const LogicalOperator &op2) {
+ if (op1.paren < op2.paren) {
+ return false;
+ }
+ if (op1.paren > op2.paren) {
+ return true;
+ }
+ assert(op1.paren == op2.paren);
+ if (op1.op > op2.op) {
+ return false;
+ }
+ if (op1.op < op2.op) {
+ return true;
+ }
+ return true;
+}
+
+static
+u32 fetchSubID(const char *logical, u32 &digit, u32 end) {
+ if (digit == (u32)-1) { // no digit parsing in progress
+ return (u32)-1;
+ }
+ assert(end > digit);
+ if (end - digit > 9) {
+ throw LocatedParseError("Expression id too large");
+ }
+ u32 mult = 1;
+ u32 sum = 0;
+ for (u32 j = end - 1; (j >= digit) && (j != (u32)-1) ; j--) {
+ assert(isdigit(logical[j]));
+ sum += (logical[j] - '0') * mult;
+ mult *= 10;
+ }
+ digit = (u32)-1;
+ return sum;
+}
+
+static
+void popOperator(vector<LogicalOperator> &op_stack, vector<u32> &subid_stack,
+ ParsedLogical &pl) {
+ if (subid_stack.empty()) {
+ throw LocatedParseError("Not enough operand");
+ }
+ u32 right = subid_stack.back();
+ subid_stack.pop_back();
+ u32 left = 0;
+ if (op_stack.back().op != LOGICAL_OP_NOT) {
+ if (subid_stack.empty()) {
+ throw LocatedParseError("Not enough operand");
+ }
+ left = subid_stack.back();
+ subid_stack.pop_back();
+ }
+ subid_stack.push_back(pl.logicalTreeAdd(op_stack.back().op, left, right));
+ op_stack.pop_back();
+}
+
+static
+char getValue(const vector<char> &lv, u32 ckey) {
+ if (ckey & LOGICAL_OP_BIT) {
+ return lv[ckey & ~LOGICAL_OP_BIT];
+ } else {
+ return 0;
+ }
+}
+
+static
+bool hasMatchFromPurelyNegative(const vector<LogicalOp> &tree,
+ u32 start, u32 result) {
+ vector<char> lv(tree.size());
+ assert(start <= result);
+ for (u32 i = start; i <= result; i++) {
+ assert(i & LOGICAL_OP_BIT);
+ const LogicalOp &op = tree[i & ~LOGICAL_OP_BIT];
+ assert(i == op.id);
+ switch (op.op) {
+ case LOGICAL_OP_NOT:
+ lv[op.id & ~LOGICAL_OP_BIT] = !getValue(lv, op.ro);
+ break;
+ case LOGICAL_OP_AND:
+ lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) &
+ getValue(lv, op.ro);
+ break;
+ case LOGICAL_OP_OR:
+ lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) |
+ getValue(lv, op.ro);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ return lv[result & ~LOGICAL_OP_BIT];
+}
+
+void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical,
+ u32 ekey, u64a min_offset,
+ u64a max_offset) {
+ u32 ckey = getCombKey(id);
+ vector<LogicalOperator> op_stack;
+ vector<u32> subid_stack;
+ u32 lkey_start = INVALID_LKEY; // logical operation's lkey
+ u32 paren = 0; // parentheses
+ u32 digit = (u32)-1; // digit start offset, invalid offset is -1
+ u32 subid = (u32)-1;
+ u32 i;
+ try {
+ for (i = 0; logical[i]; i++) {
+ if (isdigit(logical[i])) {
+ if (digit == (u32)-1) { // new digit start
+ digit = i;
+ }
+ } else {
+ if ((subid = fetchSubID(logical, digit, i)) != (u32)-1) {
+ subid_stack.push_back(getLogicalKey(subid));
+ addRelateCKey(subid_stack.back(), ckey);
+ }
+ if (logical[i] == ' ') { // skip whitespace
+ continue;
+ }
+ if (logical[i] == '(') {
+ paren += 1;
+ } else if (logical[i] == ')') {
+ if (paren <= 0) {
+ throw LocatedParseError("Not enough left parentheses");
+ }
+ paren -= 1;
+ } else {
+ u32 prio = toOperator(logical[i]);
+ if (prio != UNKNOWN_OP) {
+ LogicalOperator op(prio, paren);
+ while (!op_stack.empty()
+ && cmpOperator(op_stack.back(), op)) {
+ popOperator(op_stack, subid_stack, *this);
+ if (lkey_start == INVALID_LKEY) {
+ lkey_start = subid_stack.back();
+ }
+ }
+ op_stack.push_back(op);
+ } else {
+ throw LocatedParseError("Unknown character");
+ }
+ }
+ }
+ }
+ if (paren != 0) {
+ throw LocatedParseError("Not enough right parentheses");
+ }
+ if ((subid = fetchSubID(logical, digit, i)) != (u32)-1) {
+ subid_stack.push_back(getLogicalKey(subid));
+ addRelateCKey(subid_stack.back(), ckey);
+ }
+ while (!op_stack.empty()) {
+ popOperator(op_stack, subid_stack, *this);
+ if (lkey_start == INVALID_LKEY) {
+ lkey_start = subid_stack.back();
+ }
+ }
+ if (subid_stack.size() != 1) {
+ throw LocatedParseError("Not enough operator");
+ }
+ } catch (LocatedParseError &error) {
+ error.locate(i);
+ throw;
+ }
+ u32 lkey_result = subid_stack.back(); // logical operation's lkey
+ if (lkey_start == INVALID_LKEY) {
+ throw CompileError("No logical operation.");
+ }
+ if (hasMatchFromPurelyNegative(logicalTree, lkey_start, lkey_result)) {
+ throw CompileError("Has match from purely negative sub-expressions.");
+ }
+ combinationInfoAdd(ckey, id, ekey, lkey_start, lkey_result,
+ min_offset, max_offset);
+}
+
+} // namespace ue2
--- /dev/null
+/*
+ * Copyright (c) 2018, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Parse and build ParsedLogical::logicalTree and combInfoMap.
+ */
+
+#ifndef LOGICAL_COMBINATION_H
+#define LOGICAL_COMBINATION_H
+
+#include "util/logical.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+namespace ue2 {
+
+class ParsedLogical {
+ friend class ReportManager;
+public:
+ /** \brief Parse 1 logical expression \a logical, assign temporary ckey. */
+ void parseLogicalCombination(unsigned id, const char *logical, u32 ekey,
+ u64a min_offset, u64a max_offset);
+
+ /** \brief Check if all sub-expression id in combinations are valid. */
+ void validateSubIDs(const unsigned *ids, const char *const *expressions,
+ const unsigned *flags, unsigned elements);
+
+ /** \brief Renumber and assign final lkey for each logical operation
+ * after parsed all logical expressions. */
+ void logicalKeyRenumber();
+
+ /** \brief Fetch the lkey associated with the given expression id,
+ * assigning one if necessary. */
+ u32 getLogicalKey(u32 expressionId);
+
+ /** \brief Fetch the ckey associated with the given expression id,
+ * assigning one if necessary. */
+ u32 getCombKey(u32 expressionId);
+
+ /** \brief Add lkey's corresponding combination id. */
+ void addRelateCKey(u32 lkey, u32 ckey);
+
+ /** \brief Add one Logical Operation. */
+ u32 logicalTreeAdd(u32 op, u32 left, u32 right);
+
+ /** \brief Assign the combination info associated with the given ckey. */
+ void combinationInfoAdd(u32 ckey, u32 id, u32 ekey, u32 lkey_start,
+ u32 lkey_result, u64a min_offset, u64a max_offset);
+
+ const std::map<u32, u32> &getLkeyMap() const {
+ return toLogicalKeyMap;
+ }
+
+ const std::vector<LogicalOp> &getLogicalTree() const {
+ return logicalTree;
+ }
+
+ CombInfo getCombInfoById(u32 id) const {
+ u32 ckey = toCombKeyMap.at(id);
+ assert(ckey < combInfoMap.size());
+ return combInfoMap.at(ckey);
+ }
+
+private:
+ /** \brief Mapping from ckey to combination info. */
+ std::vector<CombInfo> combInfoMap;
+
+ /** \brief Mapping from combination expression id to combination key,
+ * combination key is used in combination bit-vector cache. */
+ std::map<u32, u32> toCombKeyMap;
+
+ /** \brief Mapping from expression id to logical key, logical key is used
+ * as index in LogicalOp array. */
+ std::map<u32, u32> toLogicalKeyMap;
+
+ /** \brief Mapping from logical key to related combination keys. */
+ std::map<u32, std::set<u32>> lkey2ckeys;
+
+ /** \brief Logical constraints, each operation from postfix notation. */
+ std::vector<LogicalOp> logicalTree;
+};
+
+} // namespace ue2
+
+#endif
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str());
return ng.addLiteral(lit, expr.index, expr.report, expr.highlander,
- expr.som);
+ expr.som, expr.quiet);
}
} // namespace ue2
/*
- * Copyright (c) 2016, Intel Corporation
+ * Copyright (c) 2016-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "rose/runtime.h"
#include "som/som_runtime.h"
#include "util/exhaust.h"
+#include "util/logical.h"
#include "util/fatbit.h"
enum DedupeResult {
mmbit_clear((u8 *)evec, rose->ekeyCount);
}
+/** \brief Test whether the given key (\a lkey) is set in the logical vector
+ * \a lvec. */
+static really_inline
+char getLogicalVal(const struct RoseEngine *rose, const char *lvec, u32 lkey) {
+ DEBUG_PRINTF("checking lkey matching %p %u\n", lvec, lkey);
+ assert(lkey != INVALID_LKEY);
+ assert(lkey < rose->lkeyCount + rose->lopCount);
+ return mmbit_isset((const u8 *)lvec, rose->lkeyCount + rose->lopCount,
+ lkey);
+}
+
+/** \brief Mark key \a lkey on in the logical vector. */
+static really_inline
+void setLogicalVal(const struct RoseEngine *rose, char *lvec, u32 lkey,
+ char val) {
+ DEBUG_PRINTF("marking as matched logical key %u\n", lkey);
+ assert(lkey != INVALID_LKEY);
+ assert(lkey < rose->lkeyCount + rose->lopCount);
+ switch (val) {
+ case 0:
+ mmbit_unset((u8 *)lvec, rose->lkeyCount + rose->lopCount, lkey);
+ break;
+ default:
+ mmbit_set((u8 *)lvec, rose->lkeyCount + rose->lopCount, lkey);
+ break;
+ }
+}
+
+/** \brief Mark key \a ckey on in the combination vector. */
+static really_inline
+void setCombinationActive(const struct RoseEngine *rose, char *cvec, u32 ckey) {
+ DEBUG_PRINTF("marking as active combination key %u\n", ckey);
+ assert(ckey != INVALID_CKEY);
+ assert(ckey < rose->ckeyCount);
+ mmbit_set((u8 *)cvec, rose->ckeyCount, ckey);
+}
+
+/** \brief Returns 1 if compliant to all logical combinations. */
+static really_inline
+char isLogicalCombination(const struct RoseEngine *rose, char *lvec,
+ u32 start, u32 result) {
+ const struct LogicalOp *logicalTree = (const struct LogicalOp *)
+ ((const char *)rose + rose->logicalTreeOffset);
+ assert(start >= rose->lkeyCount);
+ assert(start <= result);
+ assert(result < rose->lkeyCount + rose->lopCount);
+ for (u32 i = start; i <= result; i++) {
+ const struct LogicalOp *op = logicalTree + (i - rose->lkeyCount);
+ assert(i == op->id);
+ assert(op->op <= LAST_LOGICAL_OP);
+ switch ((enum LogicalOpType)op->op) {
+ case LOGICAL_OP_NOT:
+ setLogicalVal(rose, lvec, op->id,
+ !getLogicalVal(rose, lvec, op->ro));
+ break;
+ case LOGICAL_OP_AND:
+ setLogicalVal(rose, lvec, op->id,
+ getLogicalVal(rose, lvec, op->lo) &
+ getLogicalVal(rose, lvec, op->ro)); // &&
+ break;
+ case LOGICAL_OP_OR:
+ setLogicalVal(rose, lvec, op->id,
+ getLogicalVal(rose, lvec, op->lo) |
+ getLogicalVal(rose, lvec, op->ro)); // ||
+ break;
+ }
+ }
+ return getLogicalVal(rose, lvec, result);
+}
+
+/** \brief Clear all keys in the logical vector. */
+static really_inline
+void clearLvec(const struct RoseEngine *rose, char *lvec, char *cvec) {
+ DEBUG_PRINTF("clearing lvec %p %u\n", lvec,
+ rose->lkeyCount + rose->lopCount);
+ DEBUG_PRINTF("clearing cvec %p %u\n", cvec, rose->ckeyCount);
+ mmbit_clear((u8 *)lvec, rose->lkeyCount + rose->lopCount);
+ mmbit_clear((u8 *)cvec, rose->ckeyCount);
+}
+
+/** \brief Clear all keys in the combination vector. */
+static really_inline
+void clearCvec(const struct RoseEngine *rose, char *cvec) {
+ DEBUG_PRINTF("clearing cvec %p %u\n", cvec, rose->ckeyCount);
+ mmbit_clear((u8 *)cvec, rose->ckeyCount);
+}
+
/**
* \brief Deliver the given report to the user callback.
*
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
tctxt->lastEndOffset = 0;
tctxt->filledDelayedSlots = 0;
tctxt->lastMatchOffset = 0;
+ tctxt->lastCombMatchOffset = 0;
tctxt->minMatchOffset = 0;
tctxt->minNonMpvMatchOffset = 0;
tctxt->next_mpv_offset = 0;
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
}
done:
+ if (t->flushCombProgramOffset) {
+ if (roseRunFlushCombProgram(t, scratch, mpv_exec_end)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
updateMinMatchOffsetFromMpv(&scratch->tctxt, mpv_exec_end);
scratch->tctxt.next_mpv_offset
= MAX(next_pos_match_loc + scratch->core_info.buf_offset,
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "hwlm/hwlm.h"
#include "runtime.h"
#include "scratch.h"
+#include "rose.h"
#include "rose_common.h"
#include "rose_internal.h"
#include "ue2common.h"
assert(!can_stop_matching(scratch));
if (canSkipCatchUpMPV(t, scratch, cur_offset)) {
+ if (t->flushCombProgramOffset) {
+ if (roseRunFlushCombProgram(t, scratch, cur_offset)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset);
return HWLM_CONTINUE_MATCHING;
}
hwlmcb_rv_t rv;
if (!t->activeArrayCount
|| !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
+ if (t->flushCombProgramOffset) {
+ if (roseRunFlushCombProgram(t, scratch, end)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
updateMinMatchOffset(&scratch->tctxt, end);
rv = HWLM_CONTINUE_MATCHING;
} else {
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
return MO_CONTINUE_MATCHING;
}
+/**
+ * \brief Execute a flush combination program.
+ *
+ * Returns MO_HALT_MATCHING if the stream is exhausted or the user has
+ * instructed us to halt, or MO_CONTINUE_MATCHING otherwise.
+ */
+int roseRunFlushCombProgram(const struct RoseEngine *rose,
+ struct hs_scratch *scratch, u64a end) {
+ hwlmcb_rv_t rv = roseRunProgram(rose, scratch, rose->flushCombProgramOffset,
+ 0, end, 0);
+ if (rv == HWLM_TERMINATE_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ return MO_CONTINUE_MATCHING;
+}
+
int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) {
struct hs_scratch *scratch = context;
assert(scratch && scratch->magic == SCRATCH_MAGIC);
setSomFromSomAware(scratch, sr, start, end);
}
+static rose_inline
+hwlmcb_rv_t roseSetExhaust(const struct RoseEngine *t,
+ struct hs_scratch *scratch, u32 ekey) {
+ assert(scratch);
+ assert(scratch->magic == SCRATCH_MAGIC);
+
+ struct core_info *ci = &scratch->core_info;
+
+ assert(!can_stop_matching(scratch));
+ assert(!isExhausted(ci->rose, ci->exhaustionVector, ekey));
+
+ markAsMatched(ci->rose, ci->exhaustionVector, ekey);
+
+ return roseHaltIfExhausted(t, scratch);
+}
+
static really_inline
int reachHasBit(const u8 *reach, u8 c) {
return !!(reach[c / 8U] & (u8)1U << (c % 8U));
}
}
+static rose_inline
+hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t,
+ struct hs_scratch *scratch) {
+ u8 *cvec = (u8 *)scratch->core_info.combVector;
+ if (!mmbit_any(cvec, t->ckeyCount)) {
+ return HWLM_CONTINUE_MATCHING;
+ }
+ u64a end = scratch->tctxt.lastCombMatchOffset;
+ for (u32 i = mmbit_iterate(cvec, t->ckeyCount, MMB_INVALID);
+ i != MMB_INVALID; i = mmbit_iterate(cvec, t->ckeyCount, i)) {
+ const struct CombInfo *combInfoMap = (const struct CombInfo *)
+ ((const char *)t + t->combInfoMapOffset);
+ const struct CombInfo *ci = combInfoMap + i;
+ if ((ci->min_offset != 0) && (end < ci->min_offset)) {
+ DEBUG_PRINTF("halt: before min_offset=%llu\n", ci->min_offset);
+ continue;
+ }
+ if ((ci->max_offset != MAX_OFFSET) && (end > ci->max_offset)) {
+ DEBUG_PRINTF("halt: after max_offset=%llu\n", ci->max_offset);
+ continue;
+ }
+
+ DEBUG_PRINTF("check ekey %u\n", ci->ekey);
+ if (ci->ekey != INVALID_EKEY) {
+ assert(ci->ekey < t->ekeyCount);
+ const char *evec = scratch->core_info.exhaustionVector;
+ if (isExhausted(t, evec, ci->ekey)) {
+ DEBUG_PRINTF("ekey %u already set, match is exhausted\n",
+ ci->ekey);
+ continue;
+ }
+ }
+
+ DEBUG_PRINTF("check ckey %u\n", i);
+ char *lvec = scratch->core_info.logicalVector;
+ if (!isLogicalCombination(t, lvec, ci->start, ci->result)) {
+ DEBUG_PRINTF("Logical Combination Failed!\n");
+ continue;
+ }
+
+ DEBUG_PRINTF("Logical Combination Passed!\n");
+ if (roseReport(t, scratch, end, ci->id, 0,
+ ci->ekey) == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+ clearCvec(t, (char *)cvec);
+ return HWLM_CONTINUE_MATCHING;
+}
+
#define PROGRAM_CASE(name) \
case ROSE_INSTR_##name: { \
DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \
}
}
PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(SET_LOGICAL) {
+ DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n",
+ ri->lkey, ri->offset_adjust);
+ assert(ri->lkey != INVALID_LKEY);
+ assert(ri->lkey < t->lkeyCount);
+ char *lvec = scratch->core_info.logicalVector;
+ setLogicalVal(t, lvec, ri->lkey, 1);
+ updateLastCombMatchOffset(tctxt, end + ri->offset_adjust);
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(SET_COMBINATION) {
+ DEBUG_PRINTF("set ckey %u as active\n", ri->ckey);
+ assert(ri->ckey != INVALID_CKEY);
+ assert(ri->ckey < t->ckeyCount);
+ char *cvec = scratch->core_info.combVector;
+ setCombinationActive(t, cvec, ri->ckey);
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(FLUSH_COMBINATION) {
+ assert(end >= tctxt->lastCombMatchOffset);
+ if (end > tctxt->lastCombMatchOffset) {
+ if (flushActiveCombinations(t, scratch)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(SET_EXHAUST) {
+ updateSeqPoint(tctxt, end, from_mpv);
+ if (roseSetExhaust(t, scratch, ri->ekey)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ work_done = 1;
+ }
+ PROGRAM_NEXT_INSTRUCTION
}
}
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
u64a stream_offset, struct hs_scratch *scratch);
+int roseRunFlushCombProgram(const struct RoseEngine *rose,
+ struct hs_scratch *scratch, u64a end);
+
#endif // ROSE_H
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
curr_offset += mmbit_size(build.rm.numEkeys());
so->exhausted_size = mmbit_size(build.rm.numEkeys());
+ // Logical multibit.
+ so->logicalVec = curr_offset;
+ so->logicalVec_size = mmbit_size(build.rm.numLogicalKeys() +
+ build.rm.numLogicalOps());
+ curr_offset += so->logicalVec_size;
+
+ // Combination multibit.
+ so->combVec = curr_offset;
+ so->combVec_size = mmbit_size(build.rm.numCkeys());
+ curr_offset += so->combVec_size;
+
// SOM locations and valid/writeable multibit structures.
if (build.ssm.numSomSlots()) {
const u32 somWidth = build.ssm.somPrecision();
proto.rosePrefixCount = countRosePrefixes(leftInfoTable);
}
+static
+void writeLogicalInfo(const ReportManager &rm, RoseEngineBlob &engine_blob,
+ RoseEngine &proto) {
+ const auto &tree = rm.getLogicalTree();
+ proto.logicalTreeOffset = engine_blob.add_range(tree);
+ const auto &combMap = rm.getCombInfoMap();
+ proto.combInfoMapOffset = engine_blob.add_range(combMap);
+ proto.lkeyCount = rm.numLogicalKeys();
+ proto.lopCount = rm.numLogicalOps();
+ proto.ckeyCount = rm.numCkeys();
+}
+
static
void writeNfaInfo(const RoseBuildImpl &build, build_context &bc,
RoseEngine &proto, const set<u32> &no_retrigger_queues) {
return program;
}
+static
+RoseProgram makeFlushCombProgram(const RoseEngine &t) {
+ RoseProgram program;
+ if (t.ckeyCount) {
+ addFlushCombinationProgram(program);
+ }
+ return program;
+}
+
static
u32 history_required(const rose_literal_id &key) {
if (key.msk.size() < key.s.length()) {
writeDkeyInfo(rm, bc.engine_blob, proto);
writeLeftInfo(bc.engine_blob, proto, leftInfoTable);
+ writeLogicalInfo(rm, bc.engine_blob, proto);
+
+ auto flushComb_prog = makeFlushCombProgram(proto);
+ proto.flushCombProgramOffset = writeProgram(bc, move(flushComb_prog));
// Build anchored matcher.
auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas);
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
}
PROGRAM_NEXT_INSTRUCTION
+ PROGRAM_CASE(SET_LOGICAL) {
+ os << " lkey " << ri->lkey << endl;
+ os << " offset_adjust " << ri->offset_adjust << endl;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(SET_COMBINATION) {
+ os << " ckey " << ri->ckey << endl;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(FLUSH_COMBINATION) {}
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(SET_EXHAUST) {
+ os << " ekey " << ri->ekey << endl;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
default:
os << " UNKNOWN (code " << int{code} << ")" << endl;
os << " <stopping>" << endl;
os.close();
}
+static
+void dumpRoseFlushCombPrograms(const RoseEngine *t, const string &filename) {
+ ofstream os(filename);
+ const char *base = (const char *)t;
+
+ if (t->flushCombProgramOffset) {
+ os << "Flush Combination Program @ " << t->flushCombProgramOffset
+ << ":" << endl;
+ dumpProgram(os, t, base + t->flushCombProgramOffset);
+ os << endl;
+ } else {
+ os << "<No Flush Combination Program>" << endl;
+ }
+
+ os.close();
+}
+
static
void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) {
ofstream os(filename);
fprintf(f, " - history buffer : %u bytes\n", t->historyRequired);
fprintf(f, " - exhaustion vector : %u bytes\n",
t->stateOffsets.exhausted_size);
+ fprintf(f, " - logical vector : %u bytes\n",
+ t->stateOffsets.logicalVec_size);
+ fprintf(f, " - combination vector: %u bytes\n",
+ t->stateOffsets.combVec_size);
fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize);
fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState);
fprintf(f, " - active array : %u bytes\n",
DUMP_U32(t, mode);
DUMP_U32(t, historyRequired);
DUMP_U32(t, ekeyCount);
+ DUMP_U32(t, lkeyCount);
+ DUMP_U32(t, lopCount);
+ DUMP_U32(t, ckeyCount);
+ DUMP_U32(t, logicalTreeOffset);
+ DUMP_U32(t, combInfoMapOffset);
DUMP_U32(t, dkeyCount);
DUMP_U32(t, dkeyLogSize);
DUMP_U32(t, invDkeyOffset);
DUMP_U32(t, leftOffset);
DUMP_U32(t, roseCount);
DUMP_U32(t, eodProgramOffset);
+ DUMP_U32(t, flushCombProgramOffset);
DUMP_U32(t, lastByteHistoryIterOffset);
DUMP_U32(t, minWidth);
DUMP_U32(t, minWidthExcludingBoundaries);
DUMP_U32(t, stateOffsets.history);
DUMP_U32(t, stateOffsets.exhausted);
DUMP_U32(t, stateOffsets.exhausted_size);
+ DUMP_U32(t, stateOffsets.logicalVec);
+ DUMP_U32(t, stateOffsets.logicalVec_size);
+ DUMP_U32(t, stateOffsets.combVec);
+ DUMP_U32(t, stateOffsets.combVec_size);
DUMP_U32(t, stateOffsets.activeLeafArray);
DUMP_U32(t, stateOffsets.activeLeafArray_size);
DUMP_U32(t, stateOffsets.activeLeftArray);
const string &base) {
dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt");
dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt");
+ dumpRoseFlushCombPrograms(t, base + "/rose_flush_comb_programs.txt");
dumpRoseReportPrograms(t, base + "/rose_report_programs.txt");
dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt");
dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt");
/*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
RoseInstrMatcherEod::~RoseInstrMatcherEod() = default;
RoseInstrEnd::~RoseInstrEnd() = default;
RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default;
+RoseInstrFlushCombination::~RoseInstrFlushCombination() = default;
using OffsetMap = RoseInstruction::OffsetMap;
inst->squash = squash;
}
+void RoseInstrSetLogical::write(void *dest, RoseEngineBlob &blob,
+ const OffsetMap &offset_map) const {
+ RoseInstrBase::write(dest, blob, offset_map);
+ auto *inst = static_cast<impl_type *>(dest);
+ inst->lkey = lkey;
+ inst->offset_adjust = offset_adjust;
+}
+
+void RoseInstrSetCombination::write(void *dest, RoseEngineBlob &blob,
+ const OffsetMap &offset_map) const {
+ RoseInstrBase::write(dest, blob, offset_map);
+ auto *inst = static_cast<impl_type *>(dest);
+ inst->ckey = ckey;
+}
+
+void RoseInstrSetExhaust::write(void *dest, RoseEngineBlob &blob,
+ const OffsetMap &offset_map) const {
+ RoseInstrBase::write(dest, blob, offset_map);
+ auto *inst = static_cast<impl_type *>(dest);
+ inst->ekey = ekey;
+}
+
}
/*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
}
};
+class RoseInstrSetLogical
+ : public RoseInstrBaseNoTargets<ROSE_INSTR_SET_LOGICAL,
+ ROSE_STRUCT_SET_LOGICAL,
+ RoseInstrSetLogical> {
+public:
+ u32 lkey;
+ s32 offset_adjust;
+
+ RoseInstrSetLogical(u32 lkey_in, s32 offset_adjust_in)
+ : lkey(lkey_in), offset_adjust(offset_adjust_in) {}
+
+ bool operator==(const RoseInstrSetLogical &ri) const {
+ return lkey == ri.lkey && offset_adjust == ri.offset_adjust;
+ }
+
+ size_t hash() const override {
+ return hash_all(opcode, lkey, offset_adjust);
+ }
+
+ void write(void *dest, RoseEngineBlob &blob,
+ const OffsetMap &offset_map) const override;
+
+ bool equiv_to(const RoseInstrSetLogical &ri, const OffsetMap &,
+ const OffsetMap &) const {
+ return lkey == ri.lkey && offset_adjust == ri.offset_adjust;
+ }
+};
+
+class RoseInstrSetCombination
+ : public RoseInstrBaseNoTargets<ROSE_INSTR_SET_COMBINATION,
+ ROSE_STRUCT_SET_COMBINATION,
+ RoseInstrSetCombination> {
+public:
+ u32 ckey;
+
+ RoseInstrSetCombination(u32 ckey_in) : ckey(ckey_in) {}
+
+ bool operator==(const RoseInstrSetCombination &ri) const {
+ return ckey == ri.ckey;
+ }
+
+ size_t hash() const override {
+ return hash_all(opcode, ckey);
+ }
+
+ void write(void *dest, RoseEngineBlob &blob,
+ const OffsetMap &offset_map) const override;
+
+ bool equiv_to(const RoseInstrSetCombination &ri, const OffsetMap &,
+ const OffsetMap &) const {
+ return ckey == ri.ckey;
+ }
+};
+
+class RoseInstrFlushCombination
+ : public RoseInstrBaseTrivial<ROSE_INSTR_FLUSH_COMBINATION,
+ ROSE_STRUCT_FLUSH_COMBINATION,
+ RoseInstrFlushCombination> {
+public:
+ ~RoseInstrFlushCombination() override;
+};
+
+class RoseInstrSetExhaust
+ : public RoseInstrBaseNoTargets<ROSE_INSTR_SET_EXHAUST,
+ ROSE_STRUCT_SET_EXHAUST,
+ RoseInstrSetExhaust> {
+public:
+ u32 ekey;
+
+ RoseInstrSetExhaust(u32 ekey_in) : ekey(ekey_in) {}
+
+ bool operator==(const RoseInstrSetExhaust &ri) const {
+ return ekey == ri.ekey;
+ }
+
+ size_t hash() const override {
+ return hash_all(opcode, ekey);
+ }
+
+ void write(void *dest, RoseEngineBlob &blob,
+ const OffsetMap &offset_map) const override;
+
+ bool equiv_to(const RoseInstrSetExhaust &ri, const OffsetMap &,
+ const OffsetMap &) const {
+ return ekey == ri.ekey;
+ }
+};
+
class RoseInstrEnd
: public RoseInstrBaseTrivial<ROSE_INSTR_END, ROSE_STRUCT_END,
RoseInstrEnd> {
program.add_block(move(block));
}
+void addFlushCombinationProgram(RoseProgram &program) {
+ program.add_before_end(make_unique<RoseInstrFlushCombination>());
+}
+
static
void makeRoleCheckLeftfix(const RoseBuildImpl &build,
const map<RoseVertex, left_build_info> &leftfix_info,
}
}
+static
+void addLogicalSetRequired(const Report &report, ReportManager &rm,
+ RoseProgram &program) {
+ if (report.lkey == INVALID_LKEY) {
+ return;
+ }
+ // set matching status of current lkey
+ auto risl = make_unique<RoseInstrSetLogical>(report.lkey,
+ report.offsetAdjust);
+ program.add_before_end(move(risl));
+ // set current lkey's corresponding ckeys active, pending to check
+ for (auto ckey : rm.getRelateCKeys(report.lkey)) {
+ auto risc = make_unique<RoseInstrSetCombination>(ckey);
+ program.add_before_end(move(risc));
+ }
+}
+
static
void makeReport(const RoseBuildImpl &build, const ReportID id,
const bool has_som, RoseProgram &program) {
switch (report.type) {
case EXTERNAL_CALLBACK:
+ if (build.rm.numCkeys()) {
+ addFlushCombinationProgram(report_block);
+ }
if (!has_som) {
// Dedupe is only necessary if this report has a dkey, or if there
// are SOM reports to catch up.
bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom;
if (report.ekey == INVALID_EKEY) {
if (needs_dedupe) {
- report_block.add_before_end(
- make_unique<RoseInstrDedupeAndReport>(
- report.quashSom, build.rm.getDkey(report),
- report.onmatch, report.offsetAdjust, end_inst));
+ if (!report.quiet) {
+ report_block.add_before_end(
+ make_unique<RoseInstrDedupeAndReport>(
+ report.quashSom, build.rm.getDkey(report),
+ report.onmatch, report.offsetAdjust, end_inst));
+ } else {
+ makeDedupe(build.rm, report, report_block);
+ }
} else {
- report_block.add_before_end(make_unique<RoseInstrReport>(
- report.onmatch, report.offsetAdjust));
+ if (!report.quiet) {
+ report_block.add_before_end(
+ make_unique<RoseInstrReport>(
+ report.onmatch, report.offsetAdjust));
+ }
}
} else {
if (needs_dedupe) {
makeDedupe(build.rm, report, report_block);
}
- report_block.add_before_end(make_unique<RoseInstrReportExhaust>(
- report.onmatch, report.offsetAdjust, report.ekey));
+ if (!report.quiet) {
+ report_block.add_before_end(
+ make_unique<RoseInstrReportExhaust>(
+ report.onmatch, report.offsetAdjust, report.ekey));
+ } else {
+ report_block.add_before_end(
+ make_unique<RoseInstrSetExhaust>(report.ekey));
+ }
}
} else { // has_som
makeDedupeSom(build.rm, report, report_block);
if (report.ekey == INVALID_EKEY) {
- report_block.add_before_end(make_unique<RoseInstrReportSom>(
- report.onmatch, report.offsetAdjust));
+ if (!report.quiet) {
+ report_block.add_before_end(make_unique<RoseInstrReportSom>(
+ report.onmatch, report.offsetAdjust));
+ }
} else {
- report_block.add_before_end(
- make_unique<RoseInstrReportSomExhaust>(
- report.onmatch, report.offsetAdjust, report.ekey));
+ if (!report.quiet) {
+ report_block.add_before_end(
+ make_unique<RoseInstrReportSomExhaust>(
+ report.onmatch, report.offsetAdjust, report.ekey));
+ } else {
+ report_block.add_before_end(
+ make_unique<RoseInstrSetExhaust>(report.ekey));
+ }
}
}
+ addLogicalSetRequired(report, build.rm, report_block);
break;
case INTERNAL_SOM_LOC_SET:
case INTERNAL_SOM_LOC_SET_IF_UNSET:
case INTERNAL_SOM_LOC_MAKE_WRITABLE:
case INTERNAL_SOM_LOC_SET_FROM:
case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE:
+ if (build.rm.numCkeys()) {
+ addFlushCombinationProgram(report_block);
+ }
if (has_som) {
auto ri = make_unique<RoseInstrReportSomAware>();
writeSomOperation(report, &ri->som);
case EXTERNAL_CALLBACK_SOM_STORED:
case EXTERNAL_CALLBACK_SOM_ABS:
case EXTERNAL_CALLBACK_SOM_REV_NFA:
+ if (build.rm.numCkeys()) {
+ addFlushCombinationProgram(report_block);
+ }
makeDedupeSom(build.rm, report, report_block);
if (report.ekey == INVALID_EKEY) {
- report_block.add_before_end(make_unique<RoseInstrReportSom>(
- report.onmatch, report.offsetAdjust));
+ if (!report.quiet) {
+ report_block.add_before_end(make_unique<RoseInstrReportSom>(
+ report.onmatch, report.offsetAdjust));
+ }
} else {
- report_block.add_before_end(make_unique<RoseInstrReportSomExhaust>(
- report.onmatch, report.offsetAdjust, report.ekey));
+ if (!report.quiet) {
+ report_block.add_before_end(
+ make_unique<RoseInstrReportSomExhaust>(
+ report.onmatch, report.offsetAdjust, report.ekey));
+ } else {
+ report_block.add_before_end(
+ make_unique<RoseInstrSetExhaust>(report.ekey));
+ }
}
+ addLogicalSetRequired(report, build.rm, report_block);
break;
case EXTERNAL_CALLBACK_SOM_PASS:
+ if (build.rm.numCkeys()) {
+ addFlushCombinationProgram(report_block);
+ }
makeDedupeSom(build.rm, report, report_block);
if (report.ekey == INVALID_EKEY) {
- report_block.add_before_end(make_unique<RoseInstrReportSom>(
- report.onmatch, report.offsetAdjust));
+ if (!report.quiet) {
+ report_block.add_before_end(make_unique<RoseInstrReportSom>(
+ report.onmatch, report.offsetAdjust));
+ }
} else {
- report_block.add_before_end(make_unique<RoseInstrReportSomExhaust>(
- report.onmatch, report.offsetAdjust, report.ekey));
+ if (!report.quiet) {
+ report_block.add_before_end(
+ make_unique<RoseInstrReportSomExhaust>(
+ report.onmatch, report.offsetAdjust, report.ekey));
+ } else {
+ report_block.add_before_end(
+ make_unique<RoseInstrSetExhaust>(report.ekey));
+ }
}
+ addLogicalSetRequired(report, build.rm, report_block);
break;
default:
throw CompileError("Unable to generate bytecode.");
}
- assert(!report_block.empty());
program.add_block(move(report_block));
}
/*
- * Copyright (c) 2016-2017, Intel Corporation
+ * Copyright (c) 2016-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program);
void addSuffixesEodProgram(RoseProgram &program);
void addMatcherEodProgram(RoseProgram &program);
+void addFlushCombinationProgram(RoseProgram &program);
static constexpr u32 INVALID_QUEUE = ~0U;
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* reports with that ekey should not be delivered to the user. */
u32 exhausted;
- /** size of exhausted multibit */
+ /** size in bytes of exhausted multibit */
u32 exhausted_size;
+ /** Logical multibit.
+ *
+ * entry per logical key(operand/operator) (used by Logical Combination). */
+ u32 logicalVec;
+
+ /** size in bytes of logical multibit */
+ u32 logicalVec_size;
+
+ /** Combination multibit.
+ *
+ * entry per combination key (used by Logical Combination). */
+ u32 combVec;
+
+ /** size in bytes of combination multibit */
+ u32 combVec_size;
+
/** Multibit for active suffix/outfix engines. */
u32 activeLeafArray;
u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */
u32 historyRequired; /**< max amount of history required for streaming */
u32 ekeyCount; /**< number of exhaustion keys */
+ u32 lkeyCount; /**< number of logical keys */
+ u32 lopCount; /**< number of logical ops */
+ u32 ckeyCount; /**< number of combination keys */
+ u32 logicalTreeOffset; /**< offset to mapping from lkey to LogicalOp */
+ u32 combInfoMapOffset; /**< offset to mapping from ckey to combInfo */
u32 dkeyCount; /**< number of dedupe keys */
u32 dkeyLogSize; /**< size of fatbit for storing dkey log (bytes) */
u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external
u32 roseCount;
u32 eodProgramOffset; //!< EOD program, otherwise 0.
+ u32 flushCombProgramOffset; /**< FlushCombination program, otherwise 0 */
u32 lastByteHistoryIterOffset; // if non-zero
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*/
ROSE_INSTR_INCLUDED_JUMP,
- LAST_ROSE_INSTRUCTION = ROSE_INSTR_INCLUDED_JUMP //!< Sentinel.
+ /**
+ * \brief Set matching status of a sub-expression.
+ */
+ ROSE_INSTR_SET_LOGICAL,
+
+ /**
+ * \brief Set combination status pending checking.
+ */
+ ROSE_INSTR_SET_COMBINATION,
+
+ /**
+ * \brief Check if compliant with any logical constraints.
+ */
+ ROSE_INSTR_FLUSH_COMBINATION,
+
+ /** \brief Mark as exhausted instead of report while quiet. */
+ ROSE_INSTR_SET_EXHAUST,
+
+ LAST_ROSE_INSTRUCTION = ROSE_INSTR_SET_EXHAUST //!< Sentinel.
};
struct ROSE_STRUCT_END {
u8 squash; //!< FDR confirm squash mask for included literal.
u32 child_offset; //!< Program offset of included literal.
};
+
+struct ROSE_STRUCT_SET_LOGICAL {
+ u8 code; //!< From enum RoseInstructionCode.
+ u32 lkey; //!< Logical key to set.
+ s32 offset_adjust; //!< offsetAdjust from struct Report triggers the flush.
+};
+
+struct ROSE_STRUCT_SET_COMBINATION {
+ u8 code; //!< From enum RoseInstructionCode.
+ u32 ckey; //!< Combination key to set.
+};
+
+struct ROSE_STRUCT_FLUSH_COMBINATION {
+ u8 code; //!< From enum RoseInstructionCode.
+};
+
+struct ROSE_STRUCT_SET_EXHAUST {
+ u8 code; //!< From enum RoseInstructionCode.
+ u32 ekey; //!< Exhaustion key.
+};
#endif // ROSE_ROSE_PROGRAM_H
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
tctxt->lastMatchOffset = offset;
}
+static really_inline
+void updateLastCombMatchOffset(struct RoseContext *tctxt, u64a offset) {
+ DEBUG_PRINTF("match @%llu, last match @%llu\n", offset,
+ tctxt->lastCombMatchOffset);
+
+ assert(offset >= tctxt->lastCombMatchOffset);
+ tctxt->lastCombMatchOffset = offset;
+}
+
static really_inline
void updateMinMatchOffset(struct RoseContext *tctxt, u64a offset) {
DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
tctxt->lastEndOffset = offset;
tctxt->filledDelayedSlots = 0;
tctxt->lastMatchOffset = 0;
+ tctxt->lastCombMatchOffset = offset;
tctxt->minMatchOffset = offset;
tctxt->minNonMpvMatchOffset = offset;
tctxt->next_mpv_offset = 0;
tctxt->lastEndOffset = offset;
tctxt->filledDelayedSlots = 0;
tctxt->lastMatchOffset = 0;
+ tctxt->lastCombMatchOffset = offset; /* DO NOT set 0 here! */
tctxt->minMatchOffset = offset;
tctxt->minNonMpvMatchOffset = offset;
tctxt->next_mpv_offset = offset;
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
length, NULL, 0, 0, 0, flags);
clearEvec(rose, scratch->core_info.exhaustionVector);
+ if (rose->ckeyCount) {
+ scratch->core_info.logicalVector = scratch->bstate +
+ rose->stateOffsets.logicalVec;
+ scratch->core_info.combVector = scratch->bstate +
+ rose->stateOffsets.combVec;
+ scratch->tctxt.lastCombMatchOffset = 0;
+ clearLvec(rose, scratch->core_info.logicalVector,
+ scratch->core_info.combVector);
+ }
if (!length) {
if (rose->boundary.reportZeroEodOffset) {
scratch);
}
+ if (rose->flushCombProgramOffset) {
+ if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
+ unmarkScratchInUse(scratch);
+ return HS_SCAN_TERMINATED;
+ }
+ }
+
set_retval:
DEBUG_PRINTF("done. told_to_stop_matching=%d\n",
told_to_stop_matching(scratch));
roseInitState(rose, state);
clearEvec(rose, state + rose->stateOffsets.exhausted);
+ if (rose->ckeyCount) {
+ clearLvec(rose, state + rose->stateOffsets.logicalVec,
+ state + rose->stateOffsets.combVec);
+ }
// SOM state multibit structures.
initSomState(rose, state);
getHistory(state, rose, id->offset),
getHistoryAmount(rose, id->offset), id->offset, status, 0);
+ if (rose->ckeyCount) {
+ scratch->core_info.logicalVector = state +
+ rose->stateOffsets.logicalVec;
+ scratch->core_info.combVector = state + rose->stateOffsets.combVec;
+ scratch->tctxt.lastCombMatchOffset = id->offset;
+ }
+
if (rose->somLocationCount) {
loadSomFromStream(scratch, id->offset);
}
scratch->core_info.status |= STATUS_TERMINATED;
}
}
+
+ if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
+ if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
+ DEBUG_PRINTF("told to stop matching\n");
+ scratch->core_info.status |= STATUS_TERMINATED;
+ }
+ }
}
HS_PUBLIC_API
populateCoreInfo(scratch, rose, state, onEvent, context, data, length,
getHistory(state, rose, id->offset), historyAmount,
id->offset, status, flags);
+ if (rose->ckeyCount) {
+ scratch->core_info.logicalVector = state +
+ rose->stateOffsets.logicalVec;
+ scratch->core_info.combVector = state + rose->stateOffsets.combVec;
+ scratch->tctxt.lastCombMatchOffset = id->offset;
+ }
assert(scratch->core_info.hlen <= id->offset
&& scratch->core_info.hlen <= rose->historyRequired);
}
}
+ if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
+ if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
+ scratch->core_info.status |= STATUS_TERMINATED;
+ }
+ }
+
setStreamStatus(state, scratch->core_info.status);
if (likely(!can_stop_matching(scratch))) {
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
const struct RoseEngine *rose;
char *state; /**< full stream state */
char *exhaustionVector; /**< pointer to evec for this stream */
+ char *logicalVector; /**< pointer to lvec for this stream */
+ char *combVector; /**< pointer to cvec for this stream */
const u8 *buf; /**< main scan buffer */
size_t len; /**< length of main scan buffer in bytes */
const u8 *hbuf; /**< history buffer */
* stream */
u64a lastMatchOffset; /**< last match offset report up out of rose;
* used _only_ for debugging, asserts */
+ u64a lastCombMatchOffset; /**< last match offset of active combinations */
u64a minMatchOffset; /**< the earliest offset that we are still allowed to
* report */
u64a minNonMpvMatchOffset; /**< the earliest offset that non-mpv engines are
/*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
/* copy the exhaustion multibit */
COPY_MULTIBIT(stream_body + so->exhausted, rose->ekeyCount);
+ /* copy the logical multibit */
+ COPY_MULTIBIT(stream_body + so->logicalVec,
+ rose->lkeyCount + rose->lopCount);
+
+ /* copy the combination multibit */
+ COPY_MULTIBIT(stream_body + so->combVec, rose->ckeyCount);
+
/* copy nfa stream state for endfixes */
/* Note: in the expand case the active array has already been copied into
* the stream. */
--- /dev/null
+/*
+ * Copyright (c) 2018, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Inline functions for manipulating logical combinations.
+ */
+
+#ifndef LOGICAL_H
+#define LOGICAL_H
+
+#include "ue2common.h"
+
+/** Index meaning a given logical key is invalid. */
+#define INVALID_LKEY (~(u32)0)
+#define INVALID_CKEY INVALID_LKEY
+
+/** Logical operation type, the priority is from high to low. */
+enum LogicalOpType {
+ LOGICAL_OP_NOT,
+ LOGICAL_OP_AND,
+ LOGICAL_OP_OR,
+ LAST_LOGICAL_OP = LOGICAL_OP_OR //!< Sentinel.
+};
+
+#define UNKNOWN_OP (~(u32)0)
+
+/** Logical Operation is consist of 4 parts. */
+struct LogicalOp {
+ u32 id; //!< logical operator/operation id
+ u32 op; //!< LogicalOpType
+ u32 lo; //!< left operand
+ u32 ro; //!< right operand
+};
+
+/** Each logical combination has its info:
+ * It occupies a region in LogicalOp vector.
+ * It has an exhaustion key for single-match mode. */
+struct CombInfo {
+ u32 id;
+ u32 ekey; //!< exhaustion key
+ u32 start; //!< ckey of logical operation to start calculating
+ u32 result; //!< ckey of logical operation to give final result
+ u64a min_offset;
+ u64a max_offset;
+};
+
+/** Temporarily use to seperate operations' id from reports' lkey
+ * when building logicalTree in shunting yard algorithm,
+ * operations' id will be finally renumbered following reports' lkey. */
+#define LOGICAL_OP_BIT 0x80000000UL
+
+#endif
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "ue2common.h"
#include "util/exhaust.h" // for INVALID_EKEY
+#include "util/logical.h" // for INVALID_LKEY
#include "util/hash.h"
#include "util/order_check.h"
* exhaustible, this will be INVALID_EKEY. */
u32 ekey = INVALID_EKEY;
+ /** \brief Logical Combination key in each combination.
+ *
+ * If in Logical Combination, the lkey to check before reporting a match.
+ * Additionally before checking the lkey will be set. If not
+ * in Logical Combination, this will be INVALID_LKEY. */
+ u32 lkey = INVALID_LKEY;
+
+ /** \brief Quiet flag for expressions in any logical combination. */
+ bool quiet = false;
+
/** \brief Adjustment to add to the match offset when we report a match.
*
* This is usually used for reports attached to states that form part of a
}
static inline
-Report makeECallback(u32 report, s32 offsetAdjust, u32 ekey) {
+Report makeECallback(u32 report, s32 offsetAdjust, u32 ekey, bool quiet) {
Report ir(EXTERNAL_CALLBACK, report);
ir.offsetAdjust = offsetAdjust;
ir.ekey = ekey;
+ ir.quiet = (u8)quiet;
return ir;
}
static inline
Report makeCallback(u32 report, s32 offsetAdjust) {
- return makeECallback(report, offsetAdjust, INVALID_EKEY);
+ return makeECallback(report, offsetAdjust, INVALID_EKEY, false);
}
static inline
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
return it->second;
}
+const set<u32> &ReportManager::getRelateCKeys(u32 lkey) {
+ auto it = pl.lkey2ckeys.find(lkey);
+ assert(it != pl.lkey2ckeys.end());
+ return it->second;
+}
+
+void ReportManager::logicalKeyRenumber() {
+ pl.logicalKeyRenumber();
+ // assign to corresponding report
+ for (u32 i = 0; i < reportIds.size(); i++) {
+ Report &ir = reportIds[i];
+ if (contains(pl.toLogicalKeyMap, ir.onmatch)) {
+ ir.lkey = pl.toLogicalKeyMap.at(ir.onmatch);
+ }
+ }
+}
+
+const vector<LogicalOp> &ReportManager::getLogicalTree() const {
+ return pl.logicalTree;
+}
+
+const vector<CombInfo> &ReportManager::getCombInfoMap() const {
+ return pl.combInfoMap;
+}
+
u32 ReportManager::getUnassociatedExhaustibleKey(void) {
u32 rv = toExhaustibleKeyMap.size();
bool inserted;
return (u32) toExhaustibleKeyMap.size();
}
+u32 ReportManager::numLogicalKeys() const {
+ return (u32) pl.toLogicalKeyMap.size();
+}
+
+u32 ReportManager::numLogicalOps() const {
+ return (u32) pl.logicalTree.size();
+}
+
+u32 ReportManager::numCkeys() const {
+ return (u32) pl.toCombKeyMap.size();
+}
+
bool ReportManager::patternSetCanExhaust() const {
return global_exhaust && !toExhaustibleKeyMap.empty();
}
ekey = getExhaustibleKey(expr.report);
}
- return makeECallback(expr.report, adj, ekey);
+ return makeECallback(expr.report, adj, ekey, expr.quiet);
}
void ReportManager::setProgramOffset(ReportID id, u32 programOffset) {
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "util/compile_error.h"
#include "util/noncopyable.h"
#include "util/report.h"
+#include "parser/logical_combination.h"
#include <map>
#include <set>
/** \brief Total number of exhaustion keys. */
u32 numEkeys() const;
+ /** \brief Total number of logical keys. */
+ u32 numLogicalKeys() const;
+
+ /** \brief Total number of logical operators. */
+ u32 numLogicalOps() const;
+
+ /** \brief Total number of combination keys. */
+ u32 numCkeys() const;
+
/** \brief True if the pattern set can exhaust (i.e. all patterns are
* highlander). */
bool patternSetCanExhaust() const;
* assigning one if necessary. */
u32 getExhaustibleKey(u32 expressionIndex);
+ /** \brief Get lkey's corresponding ckeys. */
+ const std::set<u32> &getRelateCKeys(u32 lkey);
+
+ /** \brief Renumber lkey for logical operations, after parsed
+ * all logical expressions. */
+ void logicalKeyRenumber();
+
+ /** \brief Used in Rose for writing bytecode. */
+ const std::vector<LogicalOp> &getLogicalTree() const;
+
+ /** \brief Used in Rose for writing bytecode. */
+ const std::vector<CombInfo> &getCombInfoMap() const;
+
/** \brief Fetch the dedupe key associated with the given report. Returns
* ~0U if no dkey is needed. */
u32 getDkey(const Report &r) const;
* set. */
u32 getProgramOffset(ReportID id) const;
+ /** \brief Parsed logical combination structure. */
+ ParsedLogical pl;
+
private:
/** \brief Grey box ref, for checking resource limits. */
const Grey &grey;
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
bool g_allSignatures = false;
bool g_forceEditDistance = false;
bool build_sigs = false;
+bool check_logical = false;
unsigned int g_signature;
unsigned int g_editDistance;
unsigned int globalFlags = 0;
unsigned int num_of_threads = 1;
unsigned int countFailures = 0;
+class ParsedExpr {
+public:
+ ParsedExpr(string regex_in, unsigned int flags_in, hs_expr_ext ext_in)
+ : regex(regex_in), flags(flags_in), ext(ext_in) {}
+ ~ParsedExpr() {}
+ string regex;
+ unsigned int flags;
+ hs_expr_ext ext;
+};
+
+typedef map<unsigned int, ParsedExpr> ExprExtMap;
+ExprExtMap g_combs;
+ExprExtMap g_validSubs;
+
+// Iterator pointing to next logical expression to process.
+ExprExtMap::const_iterator comb_read_it;
+
// Global greybox structure, used in non-release builds.
unique_ptr<Grey> g_grey;
// Mutex serialising access to output map and stdout.
std::mutex lk_output;
+// Mutex guarding access to write g_combs.
+std::mutex lk_write_comb;
+
+// Mutex guarding access to write g_validSubs.
+std::mutex lk_write_sub;
+
// Possible values for pattern check results.
enum ExprStatus {NOT_PROCESSED, SUCCESS, FAILURE};
}
}
+static
+bool getNextLogicalExpression(ExprExtMap::const_iterator &it) {
+ lock_guard<mutex> lock(lk_read);
+ if (comb_read_it != g_combs.end()) {
+ it = comb_read_it;
+ ++comb_read_it;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static
+void cacheCombExpr(unsigned id, const string ®ex, unsigned int flags,
+ const hs_expr_ext &ext) {
+ lock_guard<mutex> lock(lk_write_comb);
+ g_combs.emplace(id, ParsedExpr(regex, flags, ext));
+}
+
+static
+void cacheSubExpr(unsigned id, const string ®ex, unsigned int flags,
+ const hs_expr_ext &ext) {
+ lock_guard<mutex> lock(lk_write_sub);
+ g_validSubs.emplace(id, ParsedExpr(regex, flags, ext));
+}
+
// This function prints the Pattern IDs order
// It creates the output for build sigs
// Caller is required to hold lk_output when calling this function
ext.flags |= HS_EXT_FLAG_EDIT_DISTANCE;
}
+ if (flags & HS_FLAG_COMBINATION) {
+ if (check_logical) {
+ cacheCombExpr(it->first, regex, flags, ext);
+ } else {
+ recordFailure(g_exprMap, it->first, "Unsupported flag used.");
+ }
+ continue;
+ }
+
// Try and compile a database.
const char *regexp = regex.c_str();
const hs_expr_ext *extp = &ext;
nullptr, &db, &compile_err);
#endif
+ if (err == HS_SUCCESS) {
+ assert(db);
+ recordSuccess(g_exprMap, it->first);
+ hs_free_database(db);
+ if (check_logical) {
+ cacheSubExpr(it->first, regex, flags, ext);
+ }
+ } else {
+ assert(!db);
+ assert(compile_err);
+ recordFailure(g_exprMap, it->first, compile_err->message);
+ hs_free_compile_error(compile_err);
+ }
+ }
+}
+
+static
+bool fetchSubIds(const char *logical, vector<unsigned> &ids) {
+ unsigned mult = 1;
+ unsigned id = 0;
+ for (int i = strlen(logical) - 1; i >= 0; i--) {
+ if (isdigit(logical[i])) {
+ if (mult > 100000000) {
+ return false;
+ }
+ id += (logical[i] - '0') * mult;
+ mult *= 10;
+ } else if (mult > 1) {
+ ids.push_back(id);
+ mult = 1;
+ id = 0;
+ }
+ }
+ if (mult > 1) {
+ ids.push_back(id);
+ }
+ return true;
+}
+
+static
+void checkLogicalExpression(UNUSED void *threadarg) {
+ unsigned int mode = g_streaming ? HS_MODE_STREAM
+ : g_vectored ? HS_MODE_VECTORED
+ : HS_MODE_BLOCK;
+ if (g_streaming) {
+ // Use SOM mode, for permissiveness' sake.
+ mode |= HS_MODE_SOM_HORIZON_LARGE;
+ }
+
+ ExprExtMap::const_iterator it;
+ while (getNextLogicalExpression(it)) {
+ const ParsedExpr &comb = it->second;
+
+ vector<unsigned> subIds;
+ if (!fetchSubIds(comb.regex.c_str(), subIds)) {
+ recordFailure(g_exprMap, it->first, "Sub-expression id too large.");
+ continue;
+ }
+
+ vector<const char *> regexv;
+ vector<unsigned> flagsv;
+ vector<unsigned> idv;
+ vector<const hs_expr_ext *> extv;
+ bool valid = true;
+
+ for (const auto i : subIds) {
+ ExprExtMap::const_iterator jt = g_validSubs.find(i);
+ if (jt != g_validSubs.end()) {
+ const ParsedExpr &sub = jt->second;
+ regexv.push_back(sub.regex.c_str());
+ flagsv.push_back(sub.flags);
+ idv.push_back(i);
+ extv.push_back(&sub.ext);
+ } else {
+ valid = false;
+ break;
+ }
+ }
+
+ if (valid) {
+ regexv.push_back(comb.regex.c_str());
+ flagsv.push_back(comb.flags);
+ idv.push_back(it->first);
+ extv.push_back(&comb.ext);
+ } else {
+ recordFailure(g_exprMap, it->first, "Sub-expression id not valid.");
+ continue;
+ }
+
+ // Try and compile a database.
+ hs_error_t err;
+ hs_compile_error_t *compile_err;
+ hs_database_t *db = nullptr;
+
+#if !defined(RELEASE_BUILD)
+ // This variant is available in non-release builds and allows us to
+ // modify greybox settings.
+ err = hs_compile_multi_int(regexv.data(), flagsv.data(), idv.data(),
+ extv.data(), regexv.size(), mode,
+ nullptr, &db, &compile_err, *g_grey);
+#else
+ err = hs_compile_ext_multi(regexv.data(), flagsv.data(), idv.data(),
+ extv.data(), regexv.size(), mode,
+ nullptr, &db, &compile_err);
+#endif
+
if (err == HS_SUCCESS) {
assert(db);
recordSuccess(g_exprMap, it->first);
<< " -T NUM Run with NUM threads." << endl
<< " -h Display this help." << endl
<< " -B Build signature set." << endl
+ << " -C Check logical combinations (default: off)." << endl
<< endl;
}
static
void processArgs(int argc, char *argv[], UNUSED unique_ptr<Grey> &grey) {
- const char options[] = "e:E:s:z:hLNV8G:T:B";
+ const char options[] = "e:E:s:z:hLNV8G:T:BC";
bool signatureSet = false;
for (;;) {
case 'B':
build_sigs = true;
break;
+ case 'C':
+ check_logical = true;
+ break;
default:
usage();
exit(1);
threads[i].join();
}
+ if (check_logical) {
+ comb_read_it = g_combs.begin();
+
+ for (unsigned int i = 0; i < num_of_threads; i++) {
+ threads[i] = thread(checkLogicalExpression, nullptr);
+ }
+
+ for (unsigned int i = 0; i < num_of_threads; i++) {
+ threads[i].join();
+ }
+ }
+
if (!g_exprMap.empty() && !build_sigs) {
cout << "SUMMARY: " << countFailures << " of "
<< g_exprMap.size() << " failed." << endl;
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "nfagraph/ng_util.h"
#include "parser/Parser.h"
#include "parser/unsupported.h"
+#include "parser/logical_combination.h"
#include "util/compile_context.h"
#include "util/make_unique.h"
#include "util/report_manager.h"
CompiledNG(unique_ptr<NGHolder> g_in,
unique_ptr<ReportManager> rm_in)
: g(std::move(g_in)), rm(std::move(rm_in)) {}
+ CompiledNG(unique_ptr<ParsedLogical> pl_in)
+ : pl(std::move(pl_in)) {}
unique_ptr<ue2::NGHolder> g;
unique_ptr<ue2::ReportManager> rm;
+ unique_ptr<ue2::ParsedLogical> pl;
};
static
}
try {
+ if (combination) {
+ auto pl = ue2::make_unique<ParsedLogical>();
+ pl->parseLogicalCombination(id, re.c_str(), ~0U, 0, ~0ULL);
+ pl->logicalKeyRenumber();
+ cng = make_unique<CompiledNG>(move(pl));
+ return;
+ }
+
bool isStreaming = colliderMode == MODE_STREAMING;
bool isVectored = colliderMode == MODE_VECTORED;
CompileContext cc(isStreaming, isVectored, get_current_target(),
bool highlander = false;
bool prefilter = false;
bool som = false;
+ bool combination = false;
+ bool quiet = false;
auto i = m_expr.find(id);
if (i == m_expr.end()) {
throw NGCompileFailure("Cannot parse expression flags.");
}
// read PCRE flags
- if (!getPcreFlags(hs_flags, &flags, &highlander, &prefilter, &som)) {
+ if (!getPcreFlags(hs_flags, &flags, &highlander, &prefilter, &som,
+ &combination, &quiet)) {
throw NGCompileFailure("Cannot get PCRE flags.");
}
if (force_utf8) {
cngi->highlander = highlander;
cngi->prefilter = prefilter;
cngi->som = som;
+ cngi->combination = combination;
+ cngi->quiet = quiet;
cngi->min_offset = ext.min_offset;
cngi->max_offset = ext.max_offset;
cngi->min_length = ext.min_length;
return cngi;
}
+/** \brief Returns 1 if compliant to all logical combinations. */
+static
+char isLogicalCombination(vector<char> &lv, const vector<LogicalOp> &comb,
+ size_t lkeyCount, unsigned start, unsigned result) {
+ assert(start <= result);
+ for (unsigned i = start; i <= result; i++) {
+ const LogicalOp &op = comb[i - lkeyCount];
+ assert(i == op.id);
+ switch (op.op) {
+ case LOGICAL_OP_NOT:
+ lv[op.id] = !lv[op.ro];
+ break;
+ case LOGICAL_OP_AND:
+ lv[op.id] = lv[op.lo] & lv[op.ro]; // &&
+ break;
+ case LOGICAL_OP_OR:
+ lv[op.id] = lv[op.lo] | lv[op.ro]; // ||
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ return lv[result];
+}
+
bool GraphTruth::run(unsigned, const CompiledNG &cng, const CNGInfo &cngi,
- const string &buffer, ResultSet &rs, string &) {
+ const string &buffer, ResultSet &rs, string &error) {
+ if (cngi.quiet) {
+ return true;
+ }
+
+ if (cngi.combination) {
+ // Compile and run sub-expressions, store match results.
+ map<unsigned long long, set<MatchResult>> offset_to_matches;
+ map<unsigned long long, set<unsigned>> offset_to_lkeys;
+ set<unsigned> sub_exps;
+ const auto &m_lkey = cng.pl->getLkeyMap();
+ for (const auto &it_lkey : m_lkey) {
+ if (sub_exps.find(it_lkey.first) == sub_exps.end()) {
+ sub_exps.emplace(it_lkey.first);
+ ResultSet sub_rs(RESULT_FROM_PCRE);
+ shared_ptr<CNGInfo> sub_cngi = preprocess(it_lkey.first);
+ const CompiledNG *sub_cng;
+ try {
+ sub_cng = sub_cngi->get();
+ }
+ catch (const NGCompileFailure &err) {
+ return false;
+ }
+ catch (const NGUnsupportedFailure &err) {
+ return false;
+ }
+ sub_cngi->quiet = false; // force not quiet in sub-exp.
+ if (!run(it_lkey.first, *sub_cng, *sub_cngi, buffer, sub_rs, error)) {
+ rs.clear();
+ return false;
+ }
+ for (const auto &it_mr : sub_rs.matches) {
+ offset_to_matches[it_mr.to].emplace(it_mr);
+ offset_to_lkeys[it_mr.to].emplace(it_lkey.second);
+ if (sub_cngi->highlander) {
+ break;
+ }
+ }
+ }
+ }
+ // Calculate rs for combination expression.
+ vector<char> lv;
+ const auto &comb = cng.pl->getLogicalTree();
+ lv.resize(m_lkey.size() + comb.size());
+ const auto &li = cng.pl->getCombInfoById(cngi.id);
+ for (const auto &it : offset_to_lkeys) {
+ for (auto report : it.second) {
+ lv[report] = 1;
+ }
+ if (isLogicalCombination(lv, comb, m_lkey.size(),
+ li.start, li.result)) {
+ for (const auto &mr : offset_to_matches.at(it.first)) {
+ if ((mr.to >= cngi.min_offset) &&
+ (mr.to <= cngi.max_offset)) {
+ rs.addMatch(mr.from, mr.to);
+ }
+ }
+ }
+ }
+ return true;
+ }
+
set<pair<size_t, size_t>> matches;
if (g_streamOffset) {
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
bool highlander = false;
bool prefilter = false;
bool som = false;
+ bool combination = false;
+ bool quiet = false;
+
+ unsigned id;
private:
void compile();
// If NFA graph scan failed for some reason, we mark it as bad and skip
std::unique_ptr<CompiledNG> cng; // compiled NFA graph
std::mutex cng_mutex; // serialised accesses to NFA graph
- unsigned id;
-
// Our expression map
const ExpressionMap &m_expr;
};
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
static
bool decodeExprPcre(string &expr, unsigned *flags, bool *highlander,
- bool *prefilter, bool *som, hs_expr_ext *ext) {
+ bool *prefilter, bool *som, bool *combination,
+ bool *quiet, hs_expr_ext *ext) {
string regex;
unsigned int hs_flags = 0;
if (!readExpression(expr, regex, &hs_flags, ext)) {
expr.swap(regex);
- if (!getPcreFlags(hs_flags, flags, highlander, prefilter, som)) {
+ if (!getPcreFlags(hs_flags, flags, highlander, prefilter, som,
+ combination, quiet)) {
return false;
}
bool highlander = false;
bool prefilter = false;
bool som = false;
+ bool combination = false;
+ bool quiet = false;
// we can still match approximate matching patterns with PCRE if edit
// distance 0 is requested
hs_expr_ext ext;
// Decode the flags
- if (!decodeExprPcre(re, &flags, &highlander, &prefilter, &som, &ext)) {
+ if (!decodeExprPcre(re, &flags, &highlander, &prefilter, &som,
+ &combination, &quiet, &ext)) {
throw PcreCompileFailure("Unable to decode flags.");
}
som |= !!somFlags;
// For traditional Hyperscan, add global callout to pattern.
- if (!no_callouts) {
+ if (!combination && !no_callouts) {
addCallout(re);
}
compiled->highlander = highlander;
compiled->prefilter = prefilter;
compiled->som = som;
+ compiled->combination = combination;
+ compiled->quiet = quiet;
compiled->min_offset = ext.min_offset;
compiled->max_offset = ext.max_offset;
compiled->min_length = ext.min_length;
compiled->expression = i->second; // original PCRE
flags |= PCRE_NO_AUTO_POSSESS;
+ if (compiled->combination) {
+ compiled->pl.parseLogicalCombination(id, re.c_str(), ~0U, 0, ~0ULL);
+ compiled->pl.logicalKeyRenumber();
+ compiled->report = id;
+ return compiled;
+ }
+
+
compiled->bytecode =
pcre_compile2(re.c_str(), flags, &errcode, &errptr, &errloc, nullptr);
return ret;
}
+/** \brief Returns 1 if compliant to all logical combinations. */
+static
+char isLogicalCombination(vector<char> &lv, const vector<LogicalOp> &comb,
+ size_t lkeyCount, unsigned start, unsigned result) {
+ assert(start <= result);
+ for (unsigned i = start; i <= result; i++) {
+ const LogicalOp &op = comb[i - lkeyCount];
+ assert(i == op.id);
+ switch (op.op) {
+ case LOGICAL_OP_NOT:
+ lv[op.id] = !lv[op.ro];
+ break;
+ case LOGICAL_OP_AND:
+ lv[op.id] = lv[op.lo] & lv[op.ro]; // &&
+ break;
+ case LOGICAL_OP_OR:
+ lv[op.id] = lv[op.lo] | lv[op.ro]; // ||
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ return lv[result];
+}
+
bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
const string &buffer, ResultSet &rs, string &error) {
+ if (compiled.quiet) {
+ return true;
+ }
+
+ if (compiled.combination) {
+ // Compile and run sub-expressions, store match results.
+ map<unsigned long long, set<MatchResult>> offset_to_matches;
+ map<unsigned long long, set<unsigned>> offset_to_lkeys;
+ set<unsigned> sub_exps;
+ const auto &m_lkey = compiled.pl.getLkeyMap();
+ for (const auto &it_lkey : m_lkey) {
+ if (sub_exps.find(it_lkey.first) == sub_exps.end()) {
+ sub_exps.emplace(it_lkey.first);
+ ResultSet sub_rs(RESULT_FROM_PCRE);
+ shared_ptr<CompiledPcre> sub_pcre;
+ try {
+ sub_pcre = compile(it_lkey.first);
+ }
+ catch (const SoftPcreCompileFailure &err) {
+ return false;
+ }
+ catch (const PcreCompileFailure &err) {
+ return false;
+ }
+ sub_pcre->quiet = false; // force not quiet in sub-exp.
+ if (!run(it_lkey.first, *sub_pcre, buffer, sub_rs, error)) {
+ rs.clear();
+ return false;
+ }
+ for (const auto &it_mr : sub_rs.matches) {
+ offset_to_matches[it_mr.to].emplace(it_mr);
+ offset_to_lkeys[it_mr.to].emplace(it_lkey.second);
+ if (sub_pcre->highlander) {
+ break;
+ }
+ }
+ }
+ }
+ // Calculate rs for combination expression.
+ vector<char> lv;
+ const auto &comb = compiled.pl.getLogicalTree();
+ lv.resize(m_lkey.size() + comb.size());
+ const auto &li = compiled.pl.getCombInfoById(compiled.report);
+ for (const auto &it : offset_to_lkeys) {
+ for (auto report : it.second) {
+ lv[report] = 1;
+ }
+ if (isLogicalCombination(lv, comb, m_lkey.size(),
+ li.start, li.result)) {
+ for (const auto &mr : offset_to_matches.at(it.first)) {
+ if ((mr.to >= compiled.min_offset) &&
+ (mr.to <= compiled.max_offset)) {
+ rs.addMatch(mr.from, mr.to);
+ }
+ }
+ }
+ }
+ return true;
+ }
+
CalloutContext ctx(out);
pcre_extra extra;
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "expressions.h"
#include "ResultSet.h"
+#include "parser/logical_combination.h"
#include <memory>
#include <mutex>
bool highlander = false;
bool prefilter = false;
bool som = false;
+ bool combination = false;
+ bool quiet = false;
+
+ // Parsed logical combinations.
+ ue2::ParsedLogical pl;
+
+ // Combination expression report id.
+ unsigned report;
private:
// If a PCRE has hit its match recursion limit when scanning a corpus, we
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
throw CorpusFailure("Expression could not be read: " + i->second);
}
+ // Combination's corpus is consist of sub-expressions' corpuses.
+ if (hs_flags & HS_FLAG_COMBINATION) {
+ ParsedLogical pl;
+ pl.parseLogicalCombination(id, re.c_str(), ~0U, 0, ~0ULL);
+ pl.logicalKeyRenumber();
+ const auto &m_lkey = pl.getLkeyMap();
+ assert(!m_lkey.empty());
+ u32 a_subid; // arbitrary sub id
+ unordered_map<u32, vector<Corpus>> m_data;
+ for (const auto &it : m_lkey) {
+ a_subid = it.first;
+ vector<Corpus> sub_data;
+ generate(a_subid, sub_data);
+ m_data.emplace(a_subid, move(sub_data));
+ }
+ assert(!m_data.empty());
+ size_t num_corpus = m_data[a_subid].size();
+ data.reserve(data.size() + num_corpus);
+ while (num_corpus) {
+ string cc; // 1 combination corpus
+ for (const auto &it : m_lkey) {
+ assert(!m_data[it.first].empty());
+ cc += m_data[it.first].back().data;
+ if (m_data[it.first].size() > 1) {
+ m_data[it.first].pop_back();
+ }
+ }
+ data.push_back(Corpus(cc));
+ num_corpus--;
+ }
+ return;
+ }
+
if (force_utf8_mode) {
hs_flags |= HS_FLAG_UTF8;
}
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
}
}
+ // Clear all matches.
+ void clear() {
+ matches.clear();
+ dupe_matches.clear();
+ matches_by_block.clear();
+ }
+
// Unexpected out of order match seen.
bool uoom = false;
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include <pcre.h> /* for pcre flags */
bool getPcreFlags(unsigned int hs_flags, unsigned int *flags,
- bool *highlander, bool *prefilter, bool *som) {
+ bool *highlander, bool *prefilter, bool *som,
+ bool *combination, bool *quiet) {
assert(flags);
assert(highlander);
assert(prefilter);
*som = true;
hs_flags &= ~HS_FLAG_SOM_LEFTMOST;
}
+ if (hs_flags & HS_FLAG_COMBINATION) {
+ *combination = true;
+ hs_flags &= ~HS_FLAG_COMBINATION;
+ }
+ if (hs_flags & HS_FLAG_QUIET) {
+ *quiet = true;
+ hs_flags &= ~HS_FLAG_QUIET;
+ }
// Flags that are irrelevant to PCRE.
hs_flags &= ~HS_FLAG_ALLOWEMPTY;
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* Returns false if an unknown hyperscan flag is encountered.
*/
bool getPcreFlags(unsigned int hs_flags, unsigned int *pcre_flags,
- bool *highlander, bool *prefilter, bool *som);
+ bool *highlander, bool *prefilter, bool *som,
+ bool *combination = nullptr, bool *quiet = nullptr);
#endif /* PCRE_UTIL_H */
hyperscan/extparam.cpp
hyperscan/identical.cpp
hyperscan/literals.cpp
+ hyperscan/logical_combination.cpp
hyperscan/main.cpp
hyperscan/multi.cpp
hyperscan/order.cpp
nullptr, &db, &compile_err);
EXPECT_EQ(HS_COMPILER_ERROR, err);
EXPECT_TRUE(compile_err != nullptr);
- EXPECT_STREQ("Unrecognised flag.", compile_err->message);
+ EXPECT_STREQ("only HS_FLAG_QUIET and HS_FLAG_SINGLEMATCH "
+ "are supported in combination "
+ "with HS_FLAG_COMBINATION.", compile_err->message);
hs_free_compile_error(compile_err);
}
148:/\QÀ\Eaaaa/8 #Expression is not valid UTF-8.
149:/[\QÀ\Eaaaa]/8 #Expression is not valid UTF-8.
150:/abcd/{edit_distance=1,hamming_distance=1} #In hs_expr_ext, cannot have both edit distance and Hamming distance.
+151:/141 | abc/C #Unknown character at index 6.
+152:/141 & | 142/C #Not enough operand at index 6.
+153:/141 142 & 143/C #Not enough operator at index 13.
+154:/141 !142/C #Not enough operator at index 8.
+155:/141 & 142 |/C #Not enough operand at index 11.
+156:/)141 & 142 /C #Not enough left parentheses at index 0.
+157:/(141 & (142|!143) |144/C #Not enough right parentheses at index 22.
+158:/141 & (142|!143) )| 144/C #Not enough left parentheses at index 17.
+159:/1234567890 & (142|!143 )/C #Expression id too large at index 10.
+160:/141 & (142|!143 )|/C #Not enough operand at index 18.
+161:/!141/C #Has match from purely negative sub-expressions.
+162:/!141 | 142 | 143/C #Has match from purely negative sub-expressions.
+163:/!141 & !142 & !143/C #Has match from purely negative sub-expressions.
+164:/(141 | !142 & !143)/C #Has match from purely negative sub-expressions.
+165:/!(141 | 142 | 143)/C #Has match from purely negative sub-expressions.
+166:/141/C #No logical operation.
+167:/119 & 121/C #Unknown sub-expression id.
+168:/166 & 167/C #Unknown sub-expression id.
--- /dev/null
+/*
+ * Copyright (c) 2018, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <algorithm>
+#include <array>
+#include <iostream>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "hs.h"
+#include "config.h"
+#include "test_util.h"
+
+using namespace std;
+
+TEST(LogicalCombination, SingleComb1) {
+ hs_database_t *db = nullptr;
+ hs_compile_error_t *compile_err = nullptr;
+ CallBackContext c;
+ string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef";
+ const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+ "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)"};
+ unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION};
+ unsigned ids[] = {101, 102, 103, 104, 105, 1001};
+ hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM,
+ nullptr, &db, &compile_err);
+
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(db != nullptr);
+
+ hs_scratch_t *scratch = nullptr;
+ err = hs_alloc_scratch(db, &scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(scratch != nullptr);
+
+ c.halt = 0;
+ err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+ (void *)&c);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_EQ(16U, c.matches.size());
+ ASSERT_EQ(MatchRecord(3, 101), c.matches[0]);
+ ASSERT_EQ(MatchRecord(6, 102), c.matches[1]);
+ ASSERT_EQ(MatchRecord(18, 103), c.matches[2]);
+ ASSERT_EQ(MatchRecord(18, 1001), c.matches[3]);
+ ASSERT_EQ(MatchRecord(21, 101), c.matches[4]);
+ ASSERT_EQ(MatchRecord(21, 1001), c.matches[5]);
+ ASSERT_EQ(MatchRecord(25, 102), c.matches[6]);
+ ASSERT_EQ(MatchRecord(25, 1001), c.matches[7]);
+ ASSERT_EQ(MatchRecord(38, 104), c.matches[8]);
+ ASSERT_EQ(MatchRecord(38, 1001), c.matches[9]);
+ ASSERT_EQ(MatchRecord(39, 104), c.matches[10]);
+ ASSERT_EQ(MatchRecord(39, 1001), c.matches[11]);
+ ASSERT_EQ(MatchRecord(48, 105), c.matches[12]);
+ ASSERT_EQ(MatchRecord(48, 1001), c.matches[13]);
+ ASSERT_EQ(MatchRecord(53, 102), c.matches[14]);
+ ASSERT_EQ(MatchRecord(53, 1001), c.matches[15]);
+
+ hs_free_database(db);
+ err = hs_free_scratch(scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(LogicalCombination, SingleCombQuietSub1) {
+ hs_database_t *db = nullptr;
+ hs_compile_error_t *compile_err = nullptr;
+ CallBackContext c;
+ string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef";
+ const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+ "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)"};
+ unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET,
+ HS_FLAG_QUIET, 0, HS_FLAG_COMBINATION};
+ unsigned ids[] = {101, 102, 103, 104, 105, 1001};
+ hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM,
+ nullptr, &db, &compile_err);
+
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(db != nullptr);
+
+ hs_scratch_t *scratch = nullptr;
+ err = hs_alloc_scratch(db, &scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(scratch != nullptr);
+
+ c.halt = 0;
+ err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+ (void *)&c);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_EQ(8U, c.matches.size());
+ ASSERT_EQ(MatchRecord(18, 1001), c.matches[0]);
+ ASSERT_EQ(MatchRecord(21, 1001), c.matches[1]);
+ ASSERT_EQ(MatchRecord(25, 1001), c.matches[2]);
+ ASSERT_EQ(MatchRecord(38, 1001), c.matches[3]);
+ ASSERT_EQ(MatchRecord(39, 1001), c.matches[4]);
+ ASSERT_EQ(MatchRecord(48, 105), c.matches[5]);
+ ASSERT_EQ(MatchRecord(48, 1001), c.matches[6]);
+ ASSERT_EQ(MatchRecord(53, 1001), c.matches[7]);
+
+ hs_free_database(db);
+ err = hs_free_scratch(scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(LogicalCombination, MultiCombQuietSub1) {
+ hs_database_t *db = nullptr;
+ hs_compile_error_t *compile_err = nullptr;
+ CallBackContext c;
+ string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef";
+ const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+ "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)",
+ "!101 & 102", "!(!101 | 102)", "101 & !102"};
+ unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET,
+ HS_FLAG_QUIET, 0, HS_FLAG_COMBINATION,
+ HS_FLAG_COMBINATION, HS_FLAG_COMBINATION,
+ HS_FLAG_COMBINATION};
+ unsigned ids[] = {101, 102, 103, 104, 105, 1001, 1002, 1003, 1004};
+ hs_error_t err = hs_compile_multi(expr, flags, ids, 9, HS_MODE_NOSTREAM,
+ nullptr, &db, &compile_err);
+
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(db != nullptr);
+
+ hs_scratch_t *scratch = nullptr;
+ err = hs_alloc_scratch(db, &scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(scratch != nullptr);
+
+ c.halt = 0;
+ err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+ (void *)&c);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_EQ(10U, c.matches.size());
+ ASSERT_EQ(MatchRecord(3, 1003), c.matches[0]);
+ ASSERT_EQ(MatchRecord(3, 1004), c.matches[1]);
+ ASSERT_EQ(MatchRecord(18, 1001), c.matches[2]);
+ ASSERT_EQ(MatchRecord(21, 1001), c.matches[3]);
+ ASSERT_EQ(MatchRecord(25, 1001), c.matches[4]);
+ ASSERT_EQ(MatchRecord(38, 1001), c.matches[5]);
+ ASSERT_EQ(MatchRecord(39, 1001), c.matches[6]);
+ ASSERT_EQ(MatchRecord(48, 105), c.matches[7]);
+ ASSERT_EQ(MatchRecord(48, 1001), c.matches[8]);
+ ASSERT_EQ(MatchRecord(53, 1001), c.matches[9]);
+
+ hs_free_database(db);
+ err = hs_free_scratch(scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(LogicalCombination, MultiHighlanderCombQuietSub1) {
+ hs_database_t *db = nullptr;
+ hs_compile_error_t *compile_err = nullptr;
+ CallBackContext c;
+ string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef";
+ const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+ "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)",
+ "!101 & 102", "!(!101 | 102)", "101 & !102"};
+ unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET,
+ HS_FLAG_QUIET, 0,
+ HS_FLAG_COMBINATION | HS_FLAG_SINGLEMATCH,
+ HS_FLAG_COMBINATION,
+ HS_FLAG_COMBINATION | HS_FLAG_SINGLEMATCH,
+ HS_FLAG_COMBINATION | HS_FLAG_SINGLEMATCH};
+ unsigned ids[] = {101, 102, 103, 104, 105, 1001, 1002, 1003, 1004};
+ hs_error_t err = hs_compile_multi(expr, flags, ids, 9, HS_MODE_NOSTREAM,
+ nullptr, &db, &compile_err);
+
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(db != nullptr);
+
+ hs_scratch_t *scratch = nullptr;
+ err = hs_alloc_scratch(db, &scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(scratch != nullptr);
+
+ c.halt = 0;
+ err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+ (void *)&c);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_EQ(4U, c.matches.size());
+ ASSERT_EQ(MatchRecord(3, 1003), c.matches[0]);
+ ASSERT_EQ(MatchRecord(3, 1004), c.matches[1]);
+ ASSERT_EQ(MatchRecord(18, 1001), c.matches[2]);
+ ASSERT_EQ(MatchRecord(48, 105), c.matches[3]);
+
+ hs_free_database(db);
+ err = hs_free_scratch(scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(LogicalCombination, MultiQuietCombQuietSub1) {
+ hs_database_t *db = nullptr;
+ hs_compile_error_t *compile_err = nullptr;
+ CallBackContext c;
+ string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef";
+ const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+ "ijkl[mMn]", "(101 & 102 & 103) | (104 & !105)",
+ "!101 & 102", "!(!101 | 102)", "101 & !102"};
+ unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET,
+ HS_FLAG_QUIET, 0, HS_FLAG_COMBINATION | HS_FLAG_QUIET,
+ HS_FLAG_COMBINATION, HS_FLAG_COMBINATION,
+ HS_FLAG_COMBINATION | HS_FLAG_QUIET};
+ unsigned ids[] = {101, 102, 103, 104, 105, 1001, 1002, 1003, 1004};
+ hs_error_t err = hs_compile_multi(expr, flags, ids, 9, HS_MODE_NOSTREAM,
+ nullptr, &db, &compile_err);
+
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(db != nullptr);
+
+ hs_scratch_t *scratch = nullptr;
+ err = hs_alloc_scratch(db, &scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(scratch != nullptr);
+
+ c.halt = 0;
+ err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+ (void *)&c);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_EQ(2U, c.matches.size());
+ ASSERT_EQ(MatchRecord(3, 1003), c.matches[0]);
+ ASSERT_EQ(MatchRecord(48, 105), c.matches[1]);
+
+ hs_free_database(db);
+ err = hs_free_scratch(scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(LogicalCombination, SingleComb2) {
+ hs_database_t *db = nullptr;
+ hs_compile_error_t *compile_err = nullptr;
+ CallBackContext c;
+ string data = "abbdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef";
+ const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+ "ijkl[mMn]", "(201 | 202 & 203) & (!204 | 205)"};
+ unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION};
+ unsigned ids[] = {201, 202, 203, 204, 205, 1002};
+ hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM,
+ nullptr, &db, &compile_err);
+
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(db != nullptr);
+
+ hs_scratch_t *scratch = nullptr;
+ err = hs_alloc_scratch(db, &scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(scratch != nullptr);
+
+ c.halt = 0;
+ err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+ (void *)&c);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_EQ(13U, c.matches.size());
+ ASSERT_EQ(MatchRecord(6, 202), c.matches[0]);
+ ASSERT_EQ(MatchRecord(18, 203), c.matches[1]);
+ ASSERT_EQ(MatchRecord(18, 1002), c.matches[2]);
+ ASSERT_EQ(MatchRecord(21, 201), c.matches[3]);
+ ASSERT_EQ(MatchRecord(21, 1002), c.matches[4]);
+ ASSERT_EQ(MatchRecord(25, 202), c.matches[5]);
+ ASSERT_EQ(MatchRecord(25, 1002), c.matches[6]);
+ ASSERT_EQ(MatchRecord(38, 204), c.matches[7]);
+ ASSERT_EQ(MatchRecord(39, 204), c.matches[8]);
+ ASSERT_EQ(MatchRecord(48, 205), c.matches[9]);
+ ASSERT_EQ(MatchRecord(48, 1002), c.matches[10]);
+ ASSERT_EQ(MatchRecord(53, 202), c.matches[11]);
+ ASSERT_EQ(MatchRecord(53, 1002), c.matches[12]);
+
+ hs_free_database(db);
+ err = hs_free_scratch(scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(LogicalCombination, SingleCombQuietSub2) {
+ hs_database_t *db = nullptr;
+ hs_compile_error_t *compile_err = nullptr;
+ CallBackContext c;
+ string data = "abbdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef";
+ const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+ "ijkl[mMn]", "(201 | 202 & 203) & (!204 | 205)"};
+ unsigned flags[] = {0, HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_QUIET,
+ HS_FLAG_COMBINATION};
+ unsigned ids[] = {201, 202, 203, 204, 205, 1002};
+ hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM,
+ nullptr, &db, &compile_err);
+
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(db != nullptr);
+
+ hs_scratch_t *scratch = nullptr;
+ err = hs_alloc_scratch(db, &scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(scratch != nullptr);
+
+ c.halt = 0;
+ err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+ (void *)&c);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_EQ(8U, c.matches.size());
+ ASSERT_EQ(MatchRecord(18, 1002), c.matches[0]);
+ ASSERT_EQ(MatchRecord(21, 201), c.matches[1]);
+ ASSERT_EQ(MatchRecord(21, 1002), c.matches[2]);
+ ASSERT_EQ(MatchRecord(25, 1002), c.matches[3]);
+ ASSERT_EQ(MatchRecord(38, 204), c.matches[4]);
+ ASSERT_EQ(MatchRecord(39, 204), c.matches[5]);
+ ASSERT_EQ(MatchRecord(48, 1002), c.matches[6]);
+ ASSERT_EQ(MatchRecord(53, 1002), c.matches[7]);
+
+ hs_free_database(db);
+ err = hs_free_scratch(scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(LogicalCombination, SingleComb3) {
+ hs_database_t *db = nullptr;
+ hs_compile_error_t *compile_err = nullptr;
+ CallBackContext c;
+ string data = "abcijklndefxxfoobarrrghabcxdefxteakettleeeeexxxxijklnxxdef";
+ const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+ "ijkl[mMn]", "((301 | 302) & 303) & (304 | 305)"};
+ unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION};
+ unsigned ids[] = {301, 302, 303, 304, 305, 1003};
+ hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM,
+ nullptr, &db, &compile_err);
+
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(db != nullptr);
+
+ hs_scratch_t *scratch = nullptr;
+ err = hs_alloc_scratch(db, &scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(scratch != nullptr);
+
+ c.halt = 0;
+ err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+ (void *)&c);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_EQ(17U, c.matches.size());
+ ASSERT_EQ(MatchRecord(3, 301), c.matches[0]);
+ ASSERT_EQ(MatchRecord(8, 305), c.matches[1]);
+ ASSERT_EQ(MatchRecord(11, 302), c.matches[2]);
+ ASSERT_EQ(MatchRecord(23, 303), c.matches[3]);
+ ASSERT_EQ(MatchRecord(23, 1003), c.matches[4]);
+ ASSERT_EQ(MatchRecord(26, 301), c.matches[5]);
+ ASSERT_EQ(MatchRecord(26, 1003), c.matches[6]);
+ ASSERT_EQ(MatchRecord(30, 302), c.matches[7]);
+ ASSERT_EQ(MatchRecord(30, 1003), c.matches[8]);
+ ASSERT_EQ(MatchRecord(43, 304), c.matches[9]);
+ ASSERT_EQ(MatchRecord(43, 1003), c.matches[10]);
+ ASSERT_EQ(MatchRecord(44, 304), c.matches[11]);
+ ASSERT_EQ(MatchRecord(44, 1003), c.matches[12]);
+ ASSERT_EQ(MatchRecord(53, 305), c.matches[13]);
+ ASSERT_EQ(MatchRecord(53, 1003), c.matches[14]);
+ ASSERT_EQ(MatchRecord(58, 302), c.matches[15]);
+ ASSERT_EQ(MatchRecord(58, 1003), c.matches[16]);
+
+ hs_free_database(db);
+ err = hs_free_scratch(scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(LogicalCombination, SingleCombQuietSub3) {
+ hs_database_t *db = nullptr;
+ hs_compile_error_t *compile_err = nullptr;
+ CallBackContext c;
+ string data = "abcijklndefxxfoobarrrghabcxdefxteakettleeeeexxxxijklnxxdef";
+ const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+ "ijkl[mMn]", "((301 | 302) & 303) & (304 | 305)"};
+ unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_QUIET,
+ HS_FLAG_QUIET, HS_FLAG_COMBINATION};
+ unsigned ids[] = {301, 302, 303, 304, 305, 1003};
+ hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM,
+ nullptr, &db, &compile_err);
+
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(db != nullptr);
+
+ hs_scratch_t *scratch = nullptr;
+ err = hs_alloc_scratch(db, &scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(scratch != nullptr);
+
+ c.halt = 0;
+ err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+ (void *)&c);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_EQ(8U, c.matches.size());
+ ASSERT_EQ(MatchRecord(23, 303), c.matches[0]);
+ ASSERT_EQ(MatchRecord(23, 1003), c.matches[1]);
+ ASSERT_EQ(MatchRecord(26, 1003), c.matches[2]);
+ ASSERT_EQ(MatchRecord(30, 1003), c.matches[3]);
+ ASSERT_EQ(MatchRecord(43, 1003), c.matches[4]);
+ ASSERT_EQ(MatchRecord(44, 1003), c.matches[5]);
+ ASSERT_EQ(MatchRecord(53, 1003), c.matches[6]);
+ ASSERT_EQ(MatchRecord(58, 1003), c.matches[7]);
+
+ hs_free_database(db);
+ err = hs_free_scratch(scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(LogicalCombination, MultiCombDupSub4) {
+ hs_database_t *db = nullptr;
+ hs_compile_error_t *compile_err = nullptr;
+ CallBackContext c;
+ string data = "abbdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef";
+ const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+ "ijkl[mMn]", "(201 & 202 & 203) | (204 & !205)",
+ "(201 | 202 & 203) & (!204 | 205)",
+ "((201 | 202) & 203) & (204 | 205)"};
+ unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION,
+ HS_FLAG_COMBINATION, HS_FLAG_COMBINATION};
+ unsigned ids[] = {201, 202, 203, 204, 205, 1001, 1002, 1003};
+ hs_error_t err = hs_compile_multi(expr, flags, ids, 8, HS_MODE_NOSTREAM,
+ nullptr, &db, &compile_err);
+
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(db != nullptr);
+
+ hs_scratch_t *scratch = nullptr;
+ err = hs_alloc_scratch(db, &scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(scratch != nullptr);
+
+ c.halt = 0;
+ err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+ (void *)&c);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_EQ(23U, c.matches.size());
+ ASSERT_EQ(MatchRecord(6, 202), c.matches[0]);
+ ASSERT_EQ(MatchRecord(18, 203), c.matches[1]);
+ ASSERT_EQ(MatchRecord(18, 1002), c.matches[2]);
+ ASSERT_EQ(MatchRecord(21, 201), c.matches[3]);
+ ASSERT_EQ(MatchRecord(21, 1001), c.matches[4]);
+ ASSERT_EQ(MatchRecord(21, 1002), c.matches[5]);
+ ASSERT_EQ(MatchRecord(25, 202), c.matches[6]);
+ ASSERT_EQ(MatchRecord(25, 1001), c.matches[7]);
+ ASSERT_EQ(MatchRecord(25, 1002), c.matches[8]);
+ ASSERT_EQ(MatchRecord(38, 204), c.matches[9]);
+ ASSERT_EQ(MatchRecord(38, 1001), c.matches[10]);
+ ASSERT_EQ(MatchRecord(38, 1003), c.matches[11]);
+ ASSERT_EQ(MatchRecord(39, 204), c.matches[12]);
+ ASSERT_EQ(MatchRecord(39, 1001), c.matches[13]);
+ ASSERT_EQ(MatchRecord(39, 1003), c.matches[14]);
+ ASSERT_EQ(MatchRecord(48, 205), c.matches[15]);
+ ASSERT_EQ(MatchRecord(48, 1001), c.matches[16]);
+ ASSERT_EQ(MatchRecord(48, 1002), c.matches[17]);
+ ASSERT_EQ(MatchRecord(48, 1003), c.matches[18]);
+ ASSERT_EQ(MatchRecord(53, 202), c.matches[19]);
+ ASSERT_EQ(MatchRecord(53, 1001), c.matches[20]);
+ ASSERT_EQ(MatchRecord(53, 1002), c.matches[21]);
+ ASSERT_EQ(MatchRecord(53, 1003), c.matches[22]);
+
+ hs_free_database(db);
+ err = hs_free_scratch(scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(LogicalCombination, MultiCombQuietDupSub4) {
+ hs_database_t *db = nullptr;
+ hs_compile_error_t *compile_err = nullptr;
+ CallBackContext c;
+ string data = "abbdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef";
+ const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+ "ijkl[mMn]", "(201 & 202 & 203) | (204 & !205)",
+ "(201 | 202 & 203) & (!204 | 205)",
+ "((201 | 202) & 203) & (204 | 205)"};
+ unsigned flags[] = {HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, 0,
+ HS_FLAG_QUIET, HS_FLAG_COMBINATION,
+ HS_FLAG_COMBINATION, HS_FLAG_COMBINATION};
+ unsigned ids[] = {201, 202, 203, 204, 205, 1001, 1002, 1003};
+ hs_error_t err = hs_compile_multi(expr, flags, ids, 8, HS_MODE_NOSTREAM,
+ nullptr, &db, &compile_err);
+
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(db != nullptr);
+
+ hs_scratch_t *scratch = nullptr;
+ err = hs_alloc_scratch(db, &scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(scratch != nullptr);
+
+ c.halt = 0;
+ err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+ (void *)&c);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_EQ(17U, c.matches.size());
+ ASSERT_EQ(MatchRecord(18, 1002), c.matches[0]);
+ ASSERT_EQ(MatchRecord(21, 1001), c.matches[1]);
+ ASSERT_EQ(MatchRecord(21, 1002), c.matches[2]);
+ ASSERT_EQ(MatchRecord(25, 1001), c.matches[3]);
+ ASSERT_EQ(MatchRecord(25, 1002), c.matches[4]);
+ ASSERT_EQ(MatchRecord(38, 204), c.matches[5]);
+ ASSERT_EQ(MatchRecord(38, 1001), c.matches[6]);
+ ASSERT_EQ(MatchRecord(38, 1003), c.matches[7]);
+ ASSERT_EQ(MatchRecord(39, 204), c.matches[8]);
+ ASSERT_EQ(MatchRecord(39, 1001), c.matches[9]);
+ ASSERT_EQ(MatchRecord(39, 1003), c.matches[10]);
+ ASSERT_EQ(MatchRecord(48, 1001), c.matches[11]);
+ ASSERT_EQ(MatchRecord(48, 1002), c.matches[12]);
+ ASSERT_EQ(MatchRecord(48, 1003), c.matches[13]);
+ ASSERT_EQ(MatchRecord(53, 1001), c.matches[14]);
+ ASSERT_EQ(MatchRecord(53, 1002), c.matches[15]);
+ ASSERT_EQ(MatchRecord(53, 1003), c.matches[16]);
+
+ hs_free_database(db);
+ err = hs_free_scratch(scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(LogicalCombination, MultiCombUniSub5) {
+ hs_database_t *db = nullptr;
+ hs_compile_error_t *compile_err = nullptr;
+ CallBackContext c;
+ string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"
+ "-----------------------------------------------"
+ "cbbfedxxgoogleeecncbaxfedxhaystacksssssxxxxijkloxxfed"
+ "-----------------------------------------------"
+ "cabijklRfeexxgoobarrrjpcabxfeexshockwaveeeeexxxxijklsxxfee"
+ "------------------------------------------";
+ const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+ "ijkl[mMn]", "cba", "fed", "google.*cn",
+ "haystacks{4,8}", "ijkl[oOp]", "cab", "fee",
+ "goobar.*jp", "shockwave{4,6}", "ijkl[rRs]",
+ "(101 & 102 & 103) | (104 & !105)",
+ "(201 | 202 & 203) & (!204 | 205)",
+ "((301 | 302) & 303) & (304 | 305)"};
+ unsigned flags[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ HS_FLAG_COMBINATION, HS_FLAG_COMBINATION,
+ HS_FLAG_COMBINATION};
+ unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301,
+ 302, 303, 304, 305, 1001, 1002, 1003};
+ hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM,
+ nullptr, &db, &compile_err);
+
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(db != nullptr);
+
+ hs_scratch_t *scratch = nullptr;
+ err = hs_alloc_scratch(db, &scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(scratch != nullptr);
+
+ c.halt = 0;
+ err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+ (void *)&c);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_EQ(46U, c.matches.size());
+ ASSERT_EQ(MatchRecord(3, 101), c.matches[0]);
+ ASSERT_EQ(MatchRecord(6, 102), c.matches[1]);
+ ASSERT_EQ(MatchRecord(18, 103), c.matches[2]);
+ ASSERT_EQ(MatchRecord(18, 1001), c.matches[3]);
+ ASSERT_EQ(MatchRecord(21, 101), c.matches[4]);
+ ASSERT_EQ(MatchRecord(21, 1001), c.matches[5]);
+ ASSERT_EQ(MatchRecord(25, 102), c.matches[6]);
+ ASSERT_EQ(MatchRecord(25, 1001), c.matches[7]);
+ ASSERT_EQ(MatchRecord(38, 104), c.matches[8]);
+ ASSERT_EQ(MatchRecord(38, 1001), c.matches[9]);
+ ASSERT_EQ(MatchRecord(39, 104), c.matches[10]);
+ ASSERT_EQ(MatchRecord(39, 1001), c.matches[11]);
+ ASSERT_EQ(MatchRecord(48, 105), c.matches[12]);
+ ASSERT_EQ(MatchRecord(48, 1001), c.matches[13]);
+ ASSERT_EQ(MatchRecord(53, 102), c.matches[14]);
+ ASSERT_EQ(MatchRecord(53, 1001), c.matches[15]);
+ ASSERT_EQ(MatchRecord(106, 202), c.matches[16]);
+ ASSERT_EQ(MatchRecord(118, 203), c.matches[17]);
+ ASSERT_EQ(MatchRecord(118, 1002), c.matches[18]);
+ ASSERT_EQ(MatchRecord(121, 201), c.matches[19]);
+ ASSERT_EQ(MatchRecord(121, 1002), c.matches[20]);
+ ASSERT_EQ(MatchRecord(125, 202), c.matches[21]);
+ ASSERT_EQ(MatchRecord(125, 1002), c.matches[22]);
+ ASSERT_EQ(MatchRecord(138, 204), c.matches[23]);
+ ASSERT_EQ(MatchRecord(139, 204), c.matches[24]);
+ ASSERT_EQ(MatchRecord(148, 205), c.matches[25]);
+ ASSERT_EQ(MatchRecord(148, 1002), c.matches[26]);
+ ASSERT_EQ(MatchRecord(153, 202), c.matches[27]);
+ ASSERT_EQ(MatchRecord(153, 1002), c.matches[28]);
+ ASSERT_EQ(MatchRecord(203, 301), c.matches[29]);
+ ASSERT_EQ(MatchRecord(208, 305), c.matches[30]);
+ ASSERT_EQ(MatchRecord(211, 302), c.matches[31]);
+ ASSERT_EQ(MatchRecord(223, 303), c.matches[32]);
+ ASSERT_EQ(MatchRecord(223, 1003), c.matches[33]);
+ ASSERT_EQ(MatchRecord(226, 301), c.matches[34]);
+ ASSERT_EQ(MatchRecord(226, 1003), c.matches[35]);
+ ASSERT_EQ(MatchRecord(230, 302), c.matches[36]);
+ ASSERT_EQ(MatchRecord(230, 1003), c.matches[37]);
+ ASSERT_EQ(MatchRecord(243, 304), c.matches[38]);
+ ASSERT_EQ(MatchRecord(243, 1003), c.matches[39]);
+ ASSERT_EQ(MatchRecord(244, 304), c.matches[40]);
+ ASSERT_EQ(MatchRecord(244, 1003), c.matches[41]);
+ ASSERT_EQ(MatchRecord(253, 305), c.matches[42]);
+ ASSERT_EQ(MatchRecord(253, 1003), c.matches[43]);
+ ASSERT_EQ(MatchRecord(258, 302), c.matches[44]);
+ ASSERT_EQ(MatchRecord(258, 1003), c.matches[45]);
+
+ hs_free_database(db);
+ err = hs_free_scratch(scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(LogicalCombination, MultiCombQuietUniSub5) {
+ hs_database_t *db = nullptr;
+ hs_compile_error_t *compile_err = nullptr;
+ CallBackContext c;
+ string data = "abcdefxxfoobarrrghabcxdefxteakettleeeeexxxxijklmxxdef"
+ "-----------------------------------------------"
+ "cbbfedxxgoogleeecncbaxfedxhaystacksssssxxxxijkloxxfed"
+ "-----------------------------------------------"
+ "cabijklRfeexxgoobarrrjpcabxfeexshockwaveeeeexxxxijklsxxfee"
+ "------------------------------------------";
+ const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}",
+ "ijkl[mMn]", "cba", "fed", "google.*cn",
+ "haystacks{4,8}", "ijkl[oOp]", "cab", "fee",
+ "goobar.*jp", "shockwave{4,6}", "ijkl[rRs]",
+ "(101 & 102 & 103) | (104 & !105)",
+ "(201 | 202 & 203) & (!204 | 205)",
+ "((301 | 302) & 303) & (304 | 305)"};
+ unsigned flags[] = {0, HS_FLAG_QUIET, HS_FLAG_QUIET, HS_FLAG_QUIET, 0,
+ HS_FLAG_QUIET, 0, HS_FLAG_QUIET, 0, HS_FLAG_QUIET,
+ HS_FLAG_QUIET, HS_FLAG_QUIET, 0, HS_FLAG_QUIET, 0,
+ HS_FLAG_COMBINATION, HS_FLAG_COMBINATION,
+ HS_FLAG_COMBINATION};
+ unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301,
+ 302, 303, 304, 305, 1001, 1002, 1003};
+ hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM,
+ nullptr, &db, &compile_err);
+
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(db != nullptr);
+
+ hs_scratch_t *scratch = nullptr;
+ err = hs_alloc_scratch(db, &scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_TRUE(scratch != nullptr);
+
+ c.halt = 0;
+ err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
+ (void *)&c);
+ ASSERT_EQ(HS_SUCCESS, err);
+ ASSERT_EQ(30U, c.matches.size());
+ ASSERT_EQ(MatchRecord(3, 101), c.matches[0]);
+ ASSERT_EQ(MatchRecord(18, 1001), c.matches[1]);
+ ASSERT_EQ(MatchRecord(21, 101), c.matches[2]);
+ ASSERT_EQ(MatchRecord(21, 1001), c.matches[3]);
+ ASSERT_EQ(MatchRecord(25, 1001), c.matches[4]);
+ ASSERT_EQ(MatchRecord(38, 1001), c.matches[5]);
+ ASSERT_EQ(MatchRecord(39, 1001), c.matches[6]);
+ ASSERT_EQ(MatchRecord(48, 105), c.matches[7]);
+ ASSERT_EQ(MatchRecord(48, 1001), c.matches[8]);
+ ASSERT_EQ(MatchRecord(53, 1001), c.matches[9]);
+ ASSERT_EQ(MatchRecord(106, 202), c.matches[10]);
+ ASSERT_EQ(MatchRecord(118, 1002), c.matches[11]);
+ ASSERT_EQ(MatchRecord(121, 1002), c.matches[12]);
+ ASSERT_EQ(MatchRecord(125, 202), c.matches[13]);
+ ASSERT_EQ(MatchRecord(125, 1002), c.matches[14]);
+ ASSERT_EQ(MatchRecord(138, 204), c.matches[15]);
+ ASSERT_EQ(MatchRecord(139, 204), c.matches[16]);
+ ASSERT_EQ(MatchRecord(148, 1002), c.matches[17]);
+ ASSERT_EQ(MatchRecord(153, 202), c.matches[18]);
+ ASSERT_EQ(MatchRecord(153, 1002), c.matches[19]);
+ ASSERT_EQ(MatchRecord(208, 305), c.matches[20]);
+ ASSERT_EQ(MatchRecord(223, 303), c.matches[21]);
+ ASSERT_EQ(MatchRecord(223, 1003), c.matches[22]);
+ ASSERT_EQ(MatchRecord(226, 1003), c.matches[23]);
+ ASSERT_EQ(MatchRecord(230, 1003), c.matches[24]);
+ ASSERT_EQ(MatchRecord(243, 1003), c.matches[25]);
+ ASSERT_EQ(MatchRecord(244, 1003), c.matches[26]);
+ ASSERT_EQ(MatchRecord(253, 305), c.matches[27]);
+ ASSERT_EQ(MatchRecord(253, 1003), c.matches[28]);
+ ASSERT_EQ(MatchRecord(258, 1003), c.matches[29]);
+
+ hs_free_database(db);
+ err = hs_free_scratch(scratch);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
case '8': *flags |= HS_FLAG_UTF8; break;
case 'P': *flags |= HS_FLAG_PREFILTER; break;
case 'L': *flags |= HS_FLAG_SOM_LEFTMOST; break;
+ case 'C': *flags |= HS_FLAG_COMBINATION; break;
+ case 'Q': *flags |= HS_FLAG_QUIET; break;
default: fbreak;
}
}
enum ParamKey key = PARAM_NONE;
%%{
- single_flag = [ismW8HPLVO];
+ single_flag = [ismW8HPLVOCQ];
param = ('min_offset' @{ key = PARAM_MIN_OFFSET; } |
'max_offset' @{ key = PARAM_MAX_OFFSET; } |
'min_length' @{ key = PARAM_MIN_LENGTH; } |