/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "parser/unsupported.h"
#include "parser/utf8_validate.h"
#include "rose/rose_build.h"
+#include "rose/rose_internal.h"
#include "som/slot_manager_dump.h"
#include "util/bytecode_ptr.h"
#include "util/compile_error.h"
#include "util/target_info.h"
#include "util/verify_types.h"
+#include "util/ue2string.h"
#include <algorithm>
#include <cassert>
}
+void ParsedLitExpression::parseLiteral(const char *expression, size_t len,
+ bool nocase) {
+ const char *c = expression;
+ for (size_t i = 0; i < len; i++) {
+ lit.push_back(*c, nocase);
+ c++;
+ }
+}
+
+ParsedLitExpression::ParsedLitExpression(unsigned index_in,
+ const char *expression,
+ size_t expLength, unsigned flags,
+ ReportID report)
+ : expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false,
+ SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) {
+ // For pure literal expression, below 'HS_FLAG_'s are unuseful:
+ // DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET
+
+ if (flags & ~HS_FLAG_ALL) {
+ DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
+ throw CompileError("Unrecognised flag.");
+ }
+
+ // FIXME: we disallow highlander + SOM, see UE-1850.
+ if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) {
+ throw CompileError("HS_FLAG_SINGLEMATCH is not supported in "
+ "combination with HS_FLAG_SOM_LEFTMOST.");
+ }
+
+ // Set SOM type.
+ if (flags & HS_FLAG_SOM_LEFTMOST) {
+ expr.som = SOM_LEFT;
+ }
+
+ // Transfer expression text into ue2_literal.
+ bool nocase = flags & HS_FLAG_CASELESS ? true : false;
+ parseLiteral(expression, expLength, nocase);
+
+}
+
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
unsigned flags, ReportID report,
const hs_expr_ext *ext)
}
}
+void addLitExpression(NG &ng, unsigned index, const char *expression,
+ unsigned flags, const hs_expr_ext *ext, ReportID id,
+ size_t expLength) {
+ assert(expression);
+ const CompileContext &cc = ng.cc;
+ DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s', len='%zu'\n", index,
+ id, flags, expression, expLength);
+
+ // Extended parameters are not supported for pure literal patterns.
+ if (ext && ext->flags != 0LLU) {
+ throw CompileError("Extended parameters are not supported for pure "
+ "literal matching API.");
+ }
+
+ // Ensure that our pattern isn't too long (in characters).
+ if (strlen(expression) > cc.grey.limitPatternLength) {
+ throw CompileError("Pattern length exceeds limit.");
+ }
+
+ // filter out flags not supported by pure literal API.
+ u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
+ HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION |
+ HS_FLAG_QUIET;
+
+ if (flags & not_supported) {
+ throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_MULTILINE, "
+ "HS_FLAG_SINGLEMATCH and HS_FLAG_SOM_LEFTMOST are "
+ "supported in literal API.");
+ }
+
+ // This expression must be a pure literal, we can build ue2_literal
+ // directly based on expression text.
+ ParsedLitExpression ple(index, expression, expLength, flags, id);
+
+ // Feed the ue2_literal into Rose.
+ const auto &expr = ple.expr;
+ if (ng.addLiteral(ple.lit, expr.index, expr.report, expr.highlander,
+ expr.som, expr.quiet)) {
+ DEBUG_PRINTF("took pure literal\n");
+ return;
+ }
+}
+
static
bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
const u32 minWidth =
}
-struct hs_database *build(NG &ng, unsigned int *length) {
+struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) {
assert(length);
auto rose = generateRoseEngine(ng);
+ struct RoseEngine *roseHead = rose.get();
+ roseHead->pureLiteral = pureFlag;
+
if (!rose) {
throw CompileError("Unable to generate bytecode.");
}
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "compiler/expression_info.h"
#include "parser/Component.h"
#include "util/noncopyable.h"
+#include "util/ue2string.h"
#include <memory>
std::unique_ptr<Component> component;
};
+
+/** \brief Class gathering together the pieces of a parsed lit-expression. */
+class ParsedLitExpression : noncopyable {
+public:
+ ParsedLitExpression(unsigned index, const char *expression,
+ size_t expLength, unsigned flags, ReportID report);
+
+ void parseLiteral(const char *expression, size_t len, bool nocase);
+
+ /** \brief Expression information (from flags, extparam etc) */
+ ExpressionInfo expr;
+
+ /** \brief Format the lit-expression text into Hyperscan literal type. */
+ ue2_literal lit;
+};
+
/**
* \brief Class gathering together the pieces of an expression that has been
* built into an NFA graph.
void addExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID report);
+void addLitExpression(NG &ng, unsigned index, const char *expression,
+ unsigned flags, const hs_expr_ext *ext, ReportID id,
+ size_t expLength);
+
/**
* Build a Hyperscan database out of the expressions we've been given. A
* fatal error will result in an exception being thrown.
* The global NG object.
* @param[out] length
* The number of bytes occupied by the compiled structure.
+ * @param pureFlag
+ * The flag indicating invocation from literal API or not.
* @return
* The compiled structure. Should be deallocated with the
* hs_database_free() function.
*/
-struct hs_database *build(NG &ng, unsigned int *length);
+struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag);
/**
* Constructs an NFA graph from the given expression tree.
for (size_t i = 0; i < cnt; i++) {
u32 bucket1 = group[i].first;
u32 id1 = group[i].second;
- if (lits[id1].pure) {
- continue;
- }
buildSquashMask(lits, id1, bucket1, i + 1, group, parent_map,
exception_map);
}
u8 size;
u8 flags; //!< bitfield of flags from FDR_LIT_FLAG_* above.
u8 next;
- u8 pure; //!< The pass-on of pure flag from hwlmLiteral.
};
#define FDRC_FLAG_NO_CONFIRM 1
info.flags = flags;
info.size = verify_u8(max(lit.msk.size(), lit.s.size()));
info.groups = lit.groups;
- info.pure = lit.pure;
// these are built up assuming a LE machine
CONF_TYPE msk = all_ones;
u8 oldNext; // initialized in loop
do {
assert(ISALIGNED(li));
- scratch->pure = li->pure;
if (unlikely((conf_key & li->msk) != li->v)) {
goto out;
li++;
} while (oldNext);
scratch->fdr_conf = NULL;
- scratch->pure = 0;
}
#endif
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
ng.rm.logicalKeyRenumber();
unsigned length = 0;
- struct hs_database *out = build(ng, &length);
+ struct hs_database *out = build(ng, &length, 0);
assert(out); // should have thrown exception on error
assert(length);
}
}
+hs_error_t
+hs_compile_lit_multi_int(const char *const *expressions, const unsigned *flags,
+ const unsigned *ids, const hs_expr_ext *const *ext,
+ const size_t *lens, unsigned elements, unsigned mode,
+ const hs_platform_info_t *platform, hs_database_t **db,
+ hs_compile_error_t **comp_error, const Grey &g) {
+ // Check the args: note that it's OK for flags, ids or ext to be null.
+ if (!comp_error) {
+ if (db) {
+ *db = nullptr;
+ }
+ // nowhere to write the string, but we can still report an error code
+ return HS_COMPILER_ERROR;
+ }
+ if (!db) {
+ *comp_error = generateCompileError("Invalid parameter: db is NULL", -1);
+ return HS_COMPILER_ERROR;
+ }
+ if (!expressions) {
+ *db = nullptr;
+ *comp_error
+ = generateCompileError("Invalid parameter: expressions is NULL",
+ -1);
+ return HS_COMPILER_ERROR;
+ }
+ if (!lens) {
+ *db = nullptr;
+ *comp_error = generateCompileError("Invalid parameter: len is NULL", -1);
+ return HS_COMPILER_ERROR;
+ }
+ if (elements == 0) {
+ *db = nullptr;
+ *comp_error = generateCompileError("Invalid parameter: elements is zero", -1);
+ return HS_COMPILER_ERROR;
+ }
+
+#if defined(FAT_RUNTIME)
+ if (!check_ssse3()) {
+ *db = nullptr;
+ *comp_error = generateCompileError("Unsupported architecture", -1);
+ return HS_ARCH_ERROR;
+ }
+#endif
+
+ if (!checkMode(mode, comp_error)) {
+ *db = nullptr;
+ assert(*comp_error); // set by checkMode.
+ return HS_COMPILER_ERROR;
+ }
+
+ if (!checkPlatform(platform, comp_error)) {
+ *db = nullptr;
+ assert(*comp_error); // set by checkPlattform.
+ return HS_COMPILER_ERROR;
+ }
+
+ if (elements > g.limitPatternCount) {
+ *db = nullptr;
+ *comp_error = generateCompileError("Number of patterns too large", -1);
+ return HS_COMPILER_ERROR;
+ }
+
+ // This function is simply a wrapper around both the parser and compiler
+ bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
+ bool isVectored = mode & HS_MODE_VECTORED;
+ unsigned somPrecision = getSomPrecision(mode);
+
+ target_t target_info = platform ? target_t(*platform)
+ : get_current_target();
+
+ try {
+ CompileContext cc(isStreaming, isVectored, target_info, g);
+ NG ng(cc, elements, somPrecision);
+
+ for (unsigned int i = 0; i < elements; i++) {
+ // Add this expression to the compiler
+ try {
+ addLitExpression(ng, i, expressions[i], flags ? flags[i] : 0,
+ ext ? ext[i] : nullptr, ids ? ids[i] : 0,
+ lens[i]);
+ } catch (CompileError &e) {
+ /* Caught a parse error;
+ * throw it upstream as a CompileError with a specific index */
+ e.setExpressionIndex(i);
+ throw; /* do not slice */
+ }
+ }
+
+ // Check sub-expression ids
+ ng.rm.pl.validateSubIDs(ids, expressions, flags, elements);
+ // Renumber and assign lkey to reports
+ ng.rm.logicalKeyRenumber();
+
+ unsigned length = 0;
+ struct hs_database *out = build(ng, &length, 1);
+
+ assert(out); //should have thrown exception on error
+ assert(length);
+
+ *db = out;
+ *comp_error = nullptr;
+
+ return HS_SUCCESS;
+ }
+ catch (const CompileError &e) {
+ // Compiler error occurred
+ *db = nullptr;
+ *comp_error = generateCompileError(e.reason,
+ e.hasIndex ? (int)e.index : -1);
+ return HS_COMPILER_ERROR;
+ }
+ catch (const std::bad_alloc &) {
+ *db = nullptr;
+ *comp_error = const_cast<hs_compile_error_t *>(&hs_enomem);
+ return HS_COMPILER_ERROR;
+ }
+ catch (...) {
+ assert(!"Internal errror, unexpected exception");
+ *db = nullptr;
+ *comp_error = const_cast<hs_compile_error_t *>(&hs_einternal);
+ return HS_COMPILER_ERROR;
+ }
+}
+
} // namespace ue2
extern "C" HS_PUBLIC_API
platform, db, error, Grey());
}
+extern "C" HS_PUBLIC_API
+hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags,
+ const size_t len, unsigned mode,
+ const hs_platform_info_t *platform,
+ hs_database_t **db,
+ hs_compile_error_t **error) {
+ if (expression == nullptr) {
+ *db = nullptr;
+ *error = generateCompileError("Invalid parameter: expression is NULL",
+ -1);
+ return HS_COMPILER_ERROR;
+ }
+
+ unsigned id = 0; // single expressions get zero as an ID
+ const hs_expr_ext * const *ext = nullptr; // unused for this call.
+
+ return hs_compile_lit_multi_int(&expression, &flags, &id, ext, &len, 1,
+ mode, platform, db, error, Grey());
+}
+
+extern "C" HS_PUBLIC_API
+hs_error_t HS_CDECL hs_compile_lit_multi(const char * const *expressions,
+ const unsigned *flags,
+ const unsigned *ids,
+ const size_t *lens,
+ unsigned elements, unsigned mode,
+ const hs_platform_info_t *platform,
+ hs_database_t **db,
+ hs_compile_error_t **error) {
+ const hs_expr_ext * const *ext = nullptr; // unused for this call.
+ return hs_compile_lit_multi_int(expressions, flags, ids, ext, lens,
+ elements, mode, platform, db, error,
+ Grey());
+}
+
static
hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
const hs_expr_ext_t *ext, unsigned int mode,
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
+ * - HS_FLAG_COMBINATION - Parse the expression in logical combination
+ * syntax.
+ * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
+ * the sub-expressions in logical combinations.
*
* @param mode
* Compiler mode flags that affect the database as a whole. One of @ref
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
+ * - HS_FLAG_COMBINATION - Parse the expression in logical combination
+ * syntax.
+ * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
+ * the sub-expressions in logical combinations.
*
* @param ids
* An array of integers specifying the ID number to be associated with the
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
+ * - HS_FLAG_COMBINATION - Parse the expression in logical combination
+ * syntax.
+ * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
+ * the sub-expressions in logical combinations.
*
* @param ids
* An array of integers specifying the ID number to be associated with the
const hs_platform_info_t *platform,
hs_database_t **db, hs_compile_error_t **error);
+/**
+ * The basic pure literal expression compiler.
+ *
+ * This is the function call with which a pure literal expression (not a
+ * common regular expression) is compiled into a Hyperscan database which
+ * can be passed to the runtime functions (such as @ref hs_scan(),
+ * @ref hs_open_stream(), etc.)
+ *
+ * @param expression
+ * The NULL-terminated expression to parse. Note that this string must
+ * represent ONLY the pattern to be matched, with no delimiters or flags;
+ * any global flags should be specified with the @p flags argument. For
+ * example, the expression `/abc?def/i` should be compiled by providing
+ * `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a
+ * flags. Meanwhile, the string content shall be fully parsed in a literal
+ * sense without any regular grammars. For example, the @p expression
+ * `abc?` simply means a char sequence of `a`, `b`, `c`, and `?`. The `?`
+ * here doesn't mean 0 or 1 quantifier under regular semantics.
+ *
+ * @param flags
+ * Flags which modify the behaviour of the expression. Multiple flags may
+ * be used by ORing them together. Compared to @ref hs_compile(), fewer
+ * valid values are provided:
+ * - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
+ * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
+ * - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
+ * expression per stream.
+ * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
+ * when a match is found.
+ *
+ * @param len
+ * The length of the text content of the pure literal expression. As the
+ * text content indicated by @p expression is treated as single character
+ * one by one, the special terminating character `\0` should be allowed
+ * to appear in expression, and not treated as a terminator for a string.
+ * Thus, the end of a pure literal expression cannot be indicated by
+ * identifying `\0`, but by counting to the expression length.
+ *
+ * @param mode
+ * Compiler mode flags that affect the database as a whole. One of @ref
+ * HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
+ * supplied, to select between the generation of a streaming, block or
+ * vectored database. In addition, other flags (beginning with HS_MODE_)
+ * may be supplied to enable specific features. See @ref HS_MODE_FLAG for
+ * more details.
+ *
+ * @param platform
+ * If not NULL, the platform structure is used to determine the target
+ * platform for the database. If NULL, a database suitable for running
+ * on the current host platform is produced.
+ *
+ * @param db
+ * On success, a pointer to the generated database will be returned in
+ * this parameter, or NULL on failure. The caller is responsible for
+ * deallocating the buffer using the @ref hs_free_database() function.
+ *
+ * @param error
+ * If the compile fails, a pointer to a @ref hs_compile_error_t will be
+ * returned, providing details of the error condition. The caller is
+ * responsible for deallocating the buffer using the @ref
+ * hs_free_compile_error() function.
+ *
+ * @return
+ * @ref HS_SUCCESS is returned on successful compilation; @ref
+ * HS_COMPILER_ERROR on failure, with details provided in the error
+ * parameter.
+ */
+hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags,
+ const size_t len, unsigned mode,
+ const hs_platform_info_t *platform,
+ hs_database_t **db,
+ hs_compile_error_t **error);
+/**
+ * The multiple pure literal expression compiler.
+ *
+ * This is the function call with which a set of pure literal expressions is
+ * compiled into a database which can be passed to the runtime functions (such
+ * as @ref hs_scan(), @ref hs_open_stream(), etc.) Each expression can be
+ * labelled with a unique integer which is passed into the match callback to
+ * identify the pattern that has matched.
+ *
+ * @param expressions
+ * The NULL-terminated expression to parse. Note that this string must
+ * represent ONLY the pattern to be matched, with no delimiters or flags;
+ * any global flags should be specified with the @p flags argument. For
+ * example, the expression `/abc?def/i` should be compiled by providing
+ * `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a
+ * flags. Meanwhile, the string content shall be fully parsed in a literal
+ * sense without any regular grammars. For example, the @p expression
+ * `abc?` simply means a char sequence of `a`, `b`, `c`, and `?`. The `?`
+ * here doesn't mean 0 or 1 quantifier under regular semantics.
+ *
+ * @param flags
+ * Array of flags which modify the behaviour of each expression. Multiple
+ * flags may be used by ORing them together. Specifying the NULL pointer
+ * in place of an array will set the flags value for all patterns to zero.
+ * Compared to @ref hs_compile_multi(), fewer valid values are provided:
+ * - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
+ * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
+ * - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
+ * expression per stream.
+ * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
+ * when a match is found.
+ *
+ * @param ids
+ * An array of integers specifying the ID number to be associated with the
+ * corresponding pattern in the expressions array. Specifying the NULL
+ * pointer in place of an array will set the ID value for all patterns to
+ * zero.
+ *
+ * @param lens
+ * Array of lengths of the text content of each pure literal expression.
+ * As the text content indicated by @p expression is treated as single
+ * character one by one, the special terminating character `\0` should be
+ * allowed to appear in expression, and not treated as a terminator for a
+ * string. Thus, the end of a pure literal expression cannot be indicated
+ * by identifying `\0`, but by counting to the expression length.
+ *
+ * @param elements
+ * The number of elements in the input arrays.
+ *
+ * @param mode
+ * Compiler mode flags that affect the database as a whole. One of @ref
+ * HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
+ * supplied, to select between the generation of a streaming, block or
+ * vectored database. In addition, other flags (beginning with HS_MODE_)
+ * may be supplied to enable specific features. See @ref HS_MODE_FLAG for
+ * more details.
+ *
+ * @param platform
+ * If not NULL, the platform structure is used to determine the target
+ * platform for the database. If NULL, a database suitable for running
+ * on the current host platform is produced.
+ *
+ * @param db
+ * On success, a pointer to the generated database will be returned in
+ * this parameter, or NULL on failure. The caller is responsible for
+ * deallocating the buffer using the @ref hs_free_database() function.
+ *
+ * @param error
+ * If the compile fails, a pointer to a @ref hs_compile_error_t will be
+ * returned, providing details of the error condition. The caller is
+ * responsible for deallocating the buffer using the @ref
+ * hs_free_compile_error() function.
+ *
+ * @return
+ * @ref HS_SUCCESS is returned on successful compilation; @ref
+ * HS_COMPILER_ERROR on failure, with details provided in the error
+ * parameter.
+ */
+hs_error_t HS_CDECL hs_compile_lit_multi(const char * const *expressions,
+ const unsigned *flags,
+ const unsigned *ids,
+ const size_t *lens,
+ unsigned elements, unsigned mode,
+ const hs_platform_info_t *platform,
+ hs_database_t **db,
+ hs_compile_error_t **error);
+
/**
* Free an error structure generated by @ref hs_compile(), @ref
* hs_compile_multi() or @ref hs_compile_ext_multi().
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
+ * - HS_FLAG_COMBINATION - Parse the expression in logical combination
+ * syntax.
+ * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
+ * the sub-expressions in logical combinations.
*
* @param info
* On success, a pointer to the pattern information will be returned in
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
+ * - HS_FLAG_COMBINATION - Parse the expression in logical combination
+ * syntax.
+ * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
+ * the sub-expressions in logical combinations.
*
* @param ext
* A pointer to a filled @ref hs_expr_ext_t structure that defines
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
hs_database_t **db,
hs_compile_error_t **comp_error, const Grey &g);
+/** \brief Internal use only: takes a Grey argument so that we can use it in
+ * tools. */
+hs_error_t hs_compile_lit_multi_int(const char *const *expressions,
+ const unsigned *flags, const unsigned *ids,
+ const hs_expr_ext *const *ext,
+ const size_t *lens, unsigned elements,
+ unsigned mode,
+ const hs_platform_info_t *platform,
+ hs_database_t **db,
+ hs_compile_error_t **comp_error,
+ const Grey &g);
} // namespace ue2
extern "C"
* \ref HWLM_MASKLEN. */
hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
bool noruns_in, u32 id_in, hwlm_group_t groups_in,
- const vector<u8> &msk_in, const vector<u8> &cmp_in,
- bool pure_in)
+ const vector<u8> &msk_in, const vector<u8> &cmp_in)
: s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
- groups(groups_in), msk(msk_in), cmp(cmp_in), pure(pure_in) {
+ groups(groups_in), msk(msk_in), cmp(cmp_in) {
assert(s.size() <= HWLM_LITERAL_MAX_LEN);
assert(msk.size() <= HWLM_MASKLEN);
assert(msk.size() == cmp.size());
*/
std::vector<u8> cmp;
- bool pure; //!< \brief The pass-on of pure flag from LitFragment.
-
/** \brief Complete constructor, takes group information and msk/cmp.
*
* This constructor takes a msk/cmp pair. Both must be vectors of length <=
* \ref HWLM_MASKLEN. */
hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in,
u32 id_in, hwlm_group_t groups_in,
- const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in,
- bool pure_in = false);
+ const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in);
/** \brief Simple constructor: no group information, no msk/cmp.
*
return false;
}
- vis.lit.set_pure();
const ue2_literal &lit = vis.lit;
if (lit.empty()) {
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
assert(id && id < t->size); // id is an offset into bytecode
const u64a som = 0;
const u8 flags = 0;
- if (!scratch->pure) {
- return roseRunProgram(t, scratch, id, som, end, flags);
- } else {
+ if (t->pureLiteral) {
return roseRunProgram_l(t, scratch, id, som, end, flags);
+ } else {
+ return roseRunProgram(t, scratch, id, som, end, flags);
}
}
// Our match ID is the program offset.
const u32 program = id;
const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP;
- hwlmcb_rv_t rv =
- roseRunProgram(rose, scratch, program, start, end, flags);
+ hwlmcb_rv_t rv;
+ if (rose->pureLiteral) {
+ rv = roseRunProgram_l(rose, scratch, program, start, end, flags);
+ } else {
+ rv = roseRunProgram(rose, scratch, program, start, end, flags);
+ }
if (rv == HWLM_TERMINATE_MATCHING) {
return MO_HALT_MATCHING;
}
assert(programOffset >= sizeof(struct RoseEngine));
assert(programOffset < t->size);
+ const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP;
const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV;
const char *pc_base = getByOffset(t, programOffset);
}
L_PROGRAM_NEXT_INSTRUCTION
+ L_PROGRAM_CASE(CHECK_GROUPS) {
+ DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n",
+ tctxt->groups, ri->groups);
+ if (!(ri->groups & tctxt->groups)) {
+ DEBUG_PRINTF("halt: no groups are set\n");
+ return HWLM_CONTINUE_MATCHING;
+ }
+ }
+ L_PROGRAM_NEXT_INSTRUCTION
+
+ L_PROGRAM_CASE(CHECK_MASK) {
+ struct core_info *ci = &scratch->core_info;
+ if (!roseCheckMask(ci, ri->and_mask, ri->cmp_mask,
+ ri->neg_mask, ri->offset, end)) {
+ DEBUG_PRINTF("failed mask check\n");
+ assert(ri->fail_jump); // must progress
+ pc += ri->fail_jump;
+ L_PROGRAM_NEXT_INSTRUCTION_JUMP
+ }
+ }
+ L_PROGRAM_NEXT_INSTRUCTION
+
+ L_PROGRAM_CASE(CHECK_MASK_32) {
+ struct core_info *ci = &scratch->core_info;
+ if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask,
+ ri->neg_mask, ri->offset, end)) {
+ assert(ri->fail_jump);
+ pc += ri->fail_jump;
+ L_PROGRAM_NEXT_INSTRUCTION_JUMP
+ }
+ }
+ L_PROGRAM_NEXT_INSTRUCTION
+
+ L_PROGRAM_CASE(CHECK_BYTE) {
+ const struct core_info *ci = &scratch->core_info;
+ if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask,
+ ri->negation, ri->offset, end)) {
+ DEBUG_PRINTF("failed byte check\n");
+ assert(ri->fail_jump); // must progress
+ pc += ri->fail_jump;
+ L_PROGRAM_NEXT_INSTRUCTION_JUMP
+ }
+ }
+ L_PROGRAM_NEXT_INSTRUCTION
+
+ L_PROGRAM_CASE(PUSH_DELAYED) {
+ rosePushDelayedMatch(t, scratch, ri->delay, ri->index, end);
+ }
+ L_PROGRAM_NEXT_INSTRUCTION
+
L_PROGRAM_CASE(CATCH_UP) {
if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
L_PROGRAM_NEXT_INSTRUCTION
+ L_PROGRAM_CASE(REPORT_CHAIN) {
+ // Note: sequence points updated inside this function.
+ if (roseCatchUpAndHandleChainMatch(
+ t, scratch, ri->event, ri->top_squash_distance, end,
+ in_catchup) == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ work_done = 1;
+ }
+ L_PROGRAM_NEXT_INSTRUCTION
+
L_PROGRAM_CASE(REPORT) {
updateSeqPoint(tctxt, end, from_mpv);
if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
}
L_PROGRAM_NEXT_INSTRUCTION
+ L_PROGRAM_CASE(INCLUDED_JUMP) {
+ if (scratch->fdr_conf) {
+ // squash the bucket of included literal
+ u8 shift = scratch->fdr_conf_offset & ~7U;
+ u64a mask = ((~(u64a)ri->squash) << shift);
+ *(scratch->fdr_conf) &= mask;
+
+ pc = getByOffset(t, ri->child_offset);
+ pc_base = pc;
+ programOffset = (const u8 *)pc_base -(const u8 *)t;
+ DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n",
+ pc_base, pc, ri->child_offset, ri->squash);
+ work_done = 0;
+ L_PROGRAM_NEXT_INSTRUCTION_JUMP
+ }
+ }
+ L_PROGRAM_NEXT_INSTRUCTION
+
L_PROGRAM_CASE(SET_LOGICAL) {
DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n",
ri->lkey, ri->offset_adjust);
DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id,
dumpString(lit.s).c_str());
-
- /** 0:/xxabcdefgh/ */
- /** 1:/yyabcdefgh/ */
- /** 2:/yyabcdefgh.+/ */
- // Above 3 patterns should firstly convert into RoseLiteralMap with
- // 2 elements ("xxabcdefgh" and "yyabcdefgh"), then convert into
- // LitFragment with 1 element ("abcdefgh"). Special care should be
- // taken to handle the 'pure' flag during the conversion.
-
- rose_literal_id lit_frag = getFragment(lit);
- auto it = frag_info.find(lit_frag);
- if (it != frag_info.end()) {
- if (!lit_frag.s.get_pure() && it->first.s.get_pure()) {
- struct FragmentInfo f_info = it->second;
- f_info.lit_ids.push_back(lit_id);
- f_info.groups |= groups;
- frag_info.erase(it->first);
- frag_info.emplace(lit_frag, f_info);
- } else {
- it->second.lit_ids.push_back(lit_id);
- it->second.groups |= groups;
- }
- } else {
- struct FragmentInfo f_info;
- f_info.lit_ids.push_back(lit_id);
- f_info.groups |= groups;
- frag_info.emplace(lit_frag, f_info);
- }
+ auto &fi = frag_info[getFragment(lit)];
+ fi.lit_ids.push_back(lit_id);
+ fi.groups |= groups;
}
for (auto &m : frag_info) {
std::pair<u32, bool> insert(const rose_literal_id &lit) {
auto it = lits_index.find(lit);
if (it != lits_index.end()) {
- u32 idx = it->second;
- auto &l = lits.at(idx);
- if (!lit.s.get_pure() && l.s.get_pure()) {
- lits_index.erase(l);
- l.s.unset_pure();
- lits_index.emplace(l, idx);
- }
- return {idx, false};
+ return {it->second, false};
}
u32 id = verify_u32(lits.size());
lits.push_back(lit);
const auto &s_final = lit_final.get_string();
bool nocase = lit_final.any_nocase();
- bool pure = f.s.get_pure();
DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, cmp=%s\n",
f.fragment_id, escapeString(s_final).c_str(), (int)nocase,
const auto &groups = f.groups;
mp.lits.emplace_back(move(s_final), nocase, noruns, f.fragment_id,
- groups, msk, cmp, pure);
+ groups, msk, cmp);
}
static
* nfas). Rose nfa info table can distinguish the cases.
*/
struct RoseEngine {
+ u8 pureLiteral; /* Indicator of pure literal API */
u8 noFloatingRoots; /* only need to run the anchored table if something
* matched in the anchored table */
u8 requiresEodCheck; /* stuff happens at eod time */
s->deduper.current_report_offset = ~0ULL;
s->deduper.som_log_dirty = 1; /* som logs have not been cleared */
s->fdr_conf = NULL;
- s->pure = 0;
// Rose program execution (used for some report paths) depends on these
// values being initialised.
s->scratchSize = alloc_size;
s->scratch_alloc = (char *)s_tmp;
s->fdr_conf = NULL;
- s->pure = 0;
// each of these is at an offset from the previous
char *current = (char *)s + sizeof(*s);
u64a *fdr_conf; /**< FDR confirm value */
u8 fdr_conf_offset; /**< offset where FDR/Teddy front end matches
* in buffer */
- u8 pure; /**< indicator of pure-literal or cutting-literal */
};
/* array of fatbit ptr; TODO: why not an array of fatbits? */
/*
- * Copyright (c) 2015-2017, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
}
void ue2_literal::push_back(char c, bool nc) {
- assert(!nc || ourisalpha(c));
if (nc) {
c = mytoupper(c);
}
size_t hash() const;
- void set_pure() { pure = true; }
- void unset_pure() { pure = false; }
- bool get_pure() const { return pure; }
-
- /* TODO: consider existing member functions possibly related with pure. */
-
private:
friend const_iterator;
std::string s;
boost::dynamic_bitset<> nocase;
- bool pure = false; /**< born from cutting or not (pure literal). */
};
/// Return a reversed copy of this literal.
/*
- * Copyright (c) 2016-2018, Intel Corporation
+ * Copyright (c) 2016-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
extern bool forceEditDistance;
extern unsigned editDistance;
extern bool printCompressSize;
+extern bool useLiteralApi;
/** Structure for the result of a single complete scan. */
struct ResultEntry {
/*
- * Copyright (c) 2016-2018, Intel Corporation
+ * Copyright (c) 2016-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
ext_ptr[i] = &ext[i];
}
- Timer timer;
- timer.start();
-
hs_compile_error_t *compile_err;
+ Timer timer;
-#ifndef RELEASE_BUILD
- err = hs_compile_multi_int(patterns.data(), flags.data(), ids.data(),
- ext_ptr.data(), count, full_mode, nullptr,
- &db, &compile_err, grey);
-#else
- err = hs_compile_ext_multi(patterns.data(), flags.data(), ids.data(),
- ext_ptr.data(), count, full_mode, nullptr,
- &db, &compile_err);
-#endif
+ if (useLiteralApi) {
+ // Pattern length computation should be done before timer start.
+ vector<size_t> lens(count);
+ for (unsigned int i = 0; i < count; i++) {
+ lens[i] = strlen(patterns[i]);
+ }
+ timer.start();
+ err = hs_compile_lit_multi_int(patterns.data(), flags.data(),
+ ids.data(), ext_ptr.data(),
+ lens.data(), count, full_mode,
+ nullptr, &db, &compile_err, grey);
+ timer.complete();
+ } else {
+ timer.start();
+ err = hs_compile_multi_int(patterns.data(), flags.data(),
+ ids.data(), ext_ptr.data(), count,
+ full_mode, nullptr, &db, &compile_err,
+ grey);
+ timer.complete();
+ }
- timer.complete();
compileSecs = timer.seconds();
peakMemorySize = getPeakHeap();
/*
- * Copyright (c) 2016-2018, Intel Corporation
+ * Copyright (c) 2016-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
bool forceEditDistance = false;
unsigned editDistance = 0;
bool printCompressSize = false;
+bool useLiteralApi = false;
// Globals local to this file.
static bool compressStream = false;
printf(" --per-scan Display per-scan Mbit/sec results.\n");
printf(" --echo-matches Display all matches that occur during scan.\n");
printf(" --sql-out FILE Output sqlite db.\n");
+ printf(" --literal-on Use Hyperscan pure literal matching.\n");
printf(" -S NAME Signature set name (for sqlite db).\n");
printf("\n\n");
int do_echo_matches = 0;
int do_sql_output = 0;
int option_index = 0;
+ int literalFlag = 0;
vector<string> sigFiles;
static struct option longopts[] = {
{"echo-matches", no_argument, &do_echo_matches, 1},
{"compress-stream", no_argument, &do_compress, 1},
{"sql-out", required_argument, &do_sql_output, 1},
+ {"literal-on", no_argument, &literalFlag, 1},
{nullptr, 0, nullptr, 0}
};
loadSignatureList(file, sigs);
sigSets.emplace_back(file, move(sigs));
}
+
+ useLiteralApi = (bool)literalFlag;
}
/** Start the global timer. */
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
bool g_forceEditDistance = false;
bool build_sigs = false;
bool check_logical = false;
+bool use_literal_api = false;
unsigned int g_signature;
unsigned int g_editDistance;
unsigned int globalFlags = 0;
#if !defined(RELEASE_BUILD)
// This variant is available in non-release builds and allows us to
// modify greybox settings.
- err = hs_compile_multi_int(®exp, &flags, nullptr, &extp, 1, mode,
- nullptr, &db, &compile_err, *g_grey);
+ if (use_literal_api) {
+ size_t len = strlen(regexp);
+ err = hs_compile_lit_multi_int(®exp, &flags, nullptr, &extp,
+ &len, 1, mode, nullptr, &db,
+ &compile_err, *g_grey);
+ } else {
+ err = hs_compile_multi_int(®exp, &flags, nullptr, &extp, 1,
+ mode, nullptr, &db, &compile_err,
+ *g_grey);
+ }
#else
- err = hs_compile_ext_multi(®exp, &flags, nullptr, &extp, 1, mode,
- nullptr, &db, &compile_err);
+ if (use_literal_api) {
+ size_t len = strlen(regexp);
+ err = hs_compile_lit_multi_int(®exp, &flags, nullptr, &extp,
+ &len, 1, mode, nullptr, &db,
+ &compile_err, *g_grey);
+ } else {
+ err = hs_compile_ext_multi(®exp, &flags, nullptr, &extp, 1,
+ mode, nullptr, &db, &compile_err);
+ }
#endif
if (err == HS_SUCCESS) {
ExprExtMap::const_iterator it;
while (getNextLogicalExpression(it)) {
+ if (use_literal_api) {
+ recordSuccess(g_exprMap, it->first);
+ continue;
+ }
+
const ParsedExpr &comb = it->second;
vector<unsigned> subIds;
<< " -h Display this help." << endl
<< " -B Build signature set." << endl
<< " -C Check logical combinations (default: off)." << endl
+ << " --literal-on Processing pure literals, no need to check." << endl
<< endl;
}
void processArgs(int argc, char *argv[], UNUSED unique_ptr<Grey> &grey) {
const char options[] = "e:E:s:z:hHLNV8G:T:BC";
bool signatureSet = false;
+ int literalFlag = 0;
+
+ static struct option longopts[] = {
+ {"literal-on", no_argument, &literalFlag, 1},
+ {nullptr, 0, nullptr, 0}
+ };
for (;;) {
- int c = getopt_long(argc, argv, options, nullptr, nullptr);
+ int c = getopt_long(argc, argv, options, longopts, nullptr);
if (c < 0) {
break;
}
case 'C':
check_logical = true;
break;
+ case 0:
+ case 1:
+ break;
default:
usage();
exit(1);
usage();
exit(1);
}
+
+ use_literal_api = (bool)literalFlag;
}
static
#include "parser/Parser.h"
#include "parser/parse_error.h"
#include "util/make_unique.h"
+#include "util/string_util.h"
#include "util/unicode_def.h"
#include "util/unordered.h"
return false;
}
+ if (use_literal_api) {
+ // filter out flags not supported by pure literal API.
+ u32 not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
+ HS_FLAG_UCP | HS_FLAG_PREFILTER;
+ hs_flags &= ~not_supported;
+ force_utf8 = false;
+ force_prefilter = false;
+ }
+
expr.swap(regex);
if (!getPcreFlags(hs_flags, flags, highlander, prefilter, som,
throw PcreCompileFailure("Unable to decode flags.");
}
+ // When hyperscan literal api is on, transfer the regex string into hex.
+ if (use_literal_api && !combination) {
+ unsigned char *pat
+ = reinterpret_cast<unsigned char *>(const_cast<char *>(re.c_str()));
+ char *str = makeHex(pat, re.length());
+ if (!str) {
+ throw PcreCompileFailure("makeHex() malloc failure.");
+ }
+ re.assign(str);
+ free(str);
+ }
+
// filter out flags not supported by PCRE
u64a supported = HS_EXT_FLAG_MIN_OFFSET | HS_EXT_FLAG_MAX_OFFSET |
HS_EXT_FLAG_MIN_LENGTH;
+ if (use_literal_api) {
+ ext.flags &= 0ULL;
+ ext.min_offset = 0;
+ ext.max_offset = MAX_OFFSET;
+ ext.min_length = 0;
+ ext.edit_distance = 0;
+ ext.hamming_distance = 0;
+ }
if (ext.flags & ~supported) {
// edit distance is a known unsupported flag, so just throw a soft error
if (ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) {
return compiled;
}
-
compiled->bytecode =
pcre_compile2(re.c_str(), flags, &errcode, &errptr, &errloc, nullptr);
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "ng_corpus_generator.h"
#include "NfaGeneratedCorpora.h"
#include "ExpressionParser.h"
+#include "common.h"
#include "grey.h"
#include "hs_compile.h"
#include "util/compile_context.h"
#include "util/compile_error.h"
#include "util/report_manager.h"
+#include "util/string_util.h"
#include "util/target_info.h"
#include <string>
throw CorpusFailure("Expression could not be read: " + i->second);
}
+ // When hyperscan literal api is on, transfer the regex string into hex.
+ if (use_literal_api && !(hs_flags & HS_FLAG_COMBINATION)) {
+ unsigned char *pat
+ = reinterpret_cast<unsigned char *>(const_cast<char *>(re.c_str()));
+ char *str = makeHex(pat, re.length());
+ if (!str) {
+ throw CorpusFailure("makeHex() malloc failure.");
+ }
+ re.assign(str);
+ free(str);
+ }
+
// Combination's corpus is consist of sub-expressions' corpuses.
if (hs_flags & HS_FLAG_COMBINATION) {
ParsedLogical pl;
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
const unsigned count = patterns.size();
hs_database_t *db = nullptr;
hs_compile_error_t *compile_err;
+ hs_error_t err;
- hs_error_t err = hs_compile_multi_int(&patterns[0], &flags[0],
- &idsvec[0], ext.c_array(), count,
- mode, platform, &db,
- &compile_err, grey);
+ if (use_literal_api) {
+ // Compute length of each pattern.
+ vector<size_t> lens(count);
+ for (unsigned int i = 0; i < count; i++) {
+ lens[i] = strlen(patterns[i]);
+ }
+ err = hs_compile_lit_multi_int(&patterns[0], &flags[0], &idsvec[0],
+ ext.c_array(), &lens[0], count, mode,
+ platform, &db, &compile_err, grey);
+ } else {
+ err = hs_compile_multi_int(&patterns[0], &flags[0], &idsvec[0],
+ ext.c_array(), count, mode, platform, &db,
+ &compile_err, grey);
+ }
if (err != HS_SUCCESS) {
error = compile_err->message;
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
printf(" --abort-on-fail Abort, rather than exit, on failure.\n");
printf(" --no-signal-handler Do not handle handle signals (to generate "
"backtraces).\n");
+ printf(" --literal-on Use Hyperscan pure literal matching.\n");
printf("\n");
printf("Memory and resource control options:\n");
printf("\n");
int mangleScratch = 0;
int compressFlag = 0;
int compressResetFlag = 0;
+ int literalFlag = 0;
static const struct option longopts[] = {
{"copy-scratch", 0, ©Scratch, 1},
{"copy-stream", 0, ©Stream, 1},
{"compress-expand", 0, &compressFlag, 1},
{"compress-reset-expand", 0, &compressResetFlag, 1},
{"no-groups", 0, &no_groups, 1},
+ {"literal-on", 0, &literalFlag, 1},
{nullptr, 0, nullptr, 0}};
for (;;) {
use_mangle_scratch = (bool) mangleScratch;
use_compress_expand = (bool)compressFlag;
use_compress_reset_expand = (bool)compressResetFlag;
+ use_literal_api = (bool)literalFlag;
}
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
extern bool use_mangle_scratch;
extern bool use_compress_expand;
extern bool use_compress_reset_expand;
+extern bool use_literal_api;
extern int abort_on_failure;
extern int no_signal_handler;
extern bool force_edit_distance;
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
bool use_mangle_scratch = false;
bool use_compress_expand = false;
bool use_compress_reset_expand = false;
+bool use_literal_api = false;
int abort_on_failure = 0;
int no_signal_handler = 0;
size_t max_scan_queue_len = 25000;
/*
- * Copyright (c) 2015-2018, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
bool force_edit_distance = false;
u32 edit_distance = 0;
+int use_literal_api = 0;
+
} // namespace
// Usage statement.
printf(" -8 Force UTF8 mode on all patterns.\n");
printf(" -L Apply HS_FLAG_SOM_LEFTMOST to all patterns.\n");
printf(" --prefilter Apply HS_FLAG_PREFILTER to all patterns.\n");
+ printf(" --literal-on Use Hyperscan pure literal matching API.\n");
printf("\n");
printf("Example:\n");
printf("$ %s -e pattern.file -s sigfile\n", name);
{"utf8", no_argument, nullptr, '8'},
{"prefilter", no_argument, &force_prefilter, 1},
{"som-width", required_argument, nullptr, 'd'},
+ {"literal-on", no_argument, &use_literal_api, 1},
{nullptr, 0, nullptr, 0}
};
hs_database_t *db = nullptr;
hs_compile_error_t *compile_err;
- hs_error_t err = hs_compile_multi_int(
- patterns.data(), flags.data(), ids.data(), ext.c_array(),
- patterns.size(), mode, plat_info.get(), &db, &compile_err, grey);
+ hs_error_t err;
+ const size_t count = patterns.size();
+ if (use_literal_api) {
+ // Compute length of each pattern.
+ vector<size_t> lens(count);
+ for (unsigned int i = 0; i < count; i++) {
+ lens[i] = strlen(patterns[i]);
+ }
+ err = hs_compile_lit_multi_int(patterns.data(), flags.data(),
+ ids.data(), ext.c_array(), lens.data(),
+ count, mode, plat_info.get(), &db,
+ &compile_err, grey);
+ } else {
+ err = hs_compile_multi_int(patterns.data(), flags.data(), ids.data(),
+ ext.c_array(), count, mode, plat_info.get(),
+ &db, &compile_err, grey);
+ }
if (err != HS_SUCCESS) {
if (compile_err && compile_err->message) {
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2019, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
}
}
+// Transfer given string into a hex-escaped pattern.
+static really_inline
+char *makeHex(const unsigned char *pat, unsigned patlen) {
+ size_t hexlen = patlen * 4;
+ char *hexbuf = (char *)malloc(hexlen + 1);
+ unsigned i;
+ char *buf;
+ for (i = 0, buf = hexbuf; i < patlen; i++, buf += 4) {
+ snprintf(buf, 5, "\\x%02x", (unsigned char)pat[i]);
+ }
+ hexbuf[hexlen] = '\0';
+ return hexbuf;
+}
+
#endif // STRING_UTIL_H