src/compiler/compiler.h
src/compiler/error.cpp
src/compiler/error.h
+ src/compiler/expression_info.h
src/fdr/engine_description.cpp
src/fdr/engine_description.h
src/fdr/fdr_compile.cpp
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* word-to-word and word-to-nonword) are dropped.
*/
#include "asserts.h"
+
+#include "compiler/compiler.h"
#include "nfagraph/ng.h"
#include "nfagraph/ng_prune.h"
#include "nfagraph/ng_redundancy.h"
typedef map<pair<NFAVertex, NFAVertex>, NFAEdge> edge_cache_t;
static
-void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
- u32 &assert_edge_count) {
+void replaceAssertVertex(NGHolder &g, NFAVertex t, const ExpressionInfo &expr,
+ edge_cache_t &edge_cache, u32 &assert_edge_count) {
DEBUG_PRINTF("replacing assert vertex %zu\n", g[t].index);
const u32 flags = g[t].assert_flags;
edge_cache.emplace(cache_key, e);
g[e].assert_flags = flags;
if (++assert_edge_count > MAX_ASSERT_EDGES) {
- throw CompileError(g.expressionIndex,
- "Pattern is too large.");
+ throw CompileError(expr.index, "Pattern is too large.");
}
} else {
NFAEdge e = ecit->second;
}
static
-void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) {
+void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
+ NFAVertex v, s32 adj) {
// Don't try and set the report ID of a special vertex.
assert(!is_special(v, g));
// There should be no reports set already.
assert(g[v].reports.empty());
- Report r = rm.getBasicInternalReport(g, adj);
+ Report r = rm.getBasicInternalReport(expr, adj);
g[v].reports.insert(rm.getInternalId(r));
DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
}
static
-void checkForMultilineStart(ReportManager &rm, NGWrapper &g) {
+void checkForMultilineStart(ReportManager &rm, NGHolder &g,
+ const ExpressionInfo &expr) {
vector<NFAEdge> dead;
for (auto v : adjacent_vertices_range(g.start, g)) {
if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) {
for (const auto &e : dead) {
NFAVertex dummy = add_vertex(g);
g[dummy].char_reach.setall();
- setReportId(rm, g, dummy, -1);
+ setReportId(rm, g, expr, dummy, -1);
add_edge(source(e, g), dummy, g[e], g);
add_edge(dummy, g.accept, g);
}
* Remove the horrors that are the temporary assert vertices which arise from
* our construction method. Allows the rest of our code base to live in
* blissful ignorance of their existence. */
-void removeAssertVertices(ReportManager &rm, NGWrapper &g) {
+void removeAssertVertices(ReportManager &rm, NGHolder &g,
+ const ExpressionInfo &expr) {
size_t num = 0;
DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g));
for (auto v : vertices_range(g)) {
if (g[v].assert_flags & WORDBOUNDARY_FLAGS) {
- replaceAssertVertex(g, v, edge_cache, assert_edge_count);
+ replaceAssertVertex(g, v, expr, edge_cache, assert_edge_count);
num++;
}
}
- checkForMultilineStart(rm, g);
+ checkForMultilineStart(rm, g, expr);
if (num) {
DEBUG_PRINTF("resolved %zu assert vertices\n", num);
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
namespace ue2 {
+class ExpressionInfo;
class ReportManager;
-class NGWrapper;
+class NGHolder;
/** \brief Convert temporary assert vertices (from construction method) to
* edge-based flags.
* Remove the horrors that are the temporary assert vertices which arise from
* our construction method. Allows the rest of our code base to live in
* blissful ignorance of their existence. */
-void removeAssertVertices(ReportManager &rm, NGWrapper &g);
+void removeAssertVertices(ReportManager &rm, NGHolder &g,
+ const ExpressionInfo &expr);
} // namespace ue2
namespace ue2 {
-
static
void validateExt(const hs_expr_ext &ext) {
static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET |
}
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
- unsigned flags, ReportID actionId,
+ unsigned flags, ReportID report,
const hs_expr_ext *ext)
- : utf8(false),
- allow_vacuous(flags & HS_FLAG_ALLOWEMPTY),
- highlander(flags & HS_FLAG_SINGLEMATCH),
- prefilter(flags & HS_FLAG_PREFILTER),
- som(SOM_NONE),
- index(index_in),
- id(actionId),
- min_offset(0),
- max_offset(MAX_OFFSET),
- min_length(0),
- edit_distance(0) {
+ : expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH,
+ false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET,
+ 0, 0) {
ParseMode mode(flags);
component = parse(expression, mode);
- utf8 = mode.utf8; /* utf8 may be set by parse() */
+ expr.utf8 = mode.utf8; /* utf8 may be set by parse() */
- if (utf8 && !isValidUtf8(expression)) {
+ if (expr.utf8 && !isValidUtf8(expression)) {
throw ParseError("Expression is not valid UTF-8.");
}
// Set SOM type.
if (flags & HS_FLAG_SOM_LEFTMOST) {
- som = SOM_LEFT;
+ expr.som = SOM_LEFT;
}
// Set extended parameters, if we have them.
validateExt(*ext);
if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
- min_offset = ext->min_offset;
+ expr.min_offset = ext->min_offset;
}
if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
- max_offset = ext->max_offset;
+ expr.max_offset = ext->max_offset;
}
if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) {
- min_length = ext->min_length;
+ expr.min_length = ext->min_length;
}
if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) {
- edit_distance = ext->edit_distance;
+ expr.edit_distance = ext->edit_distance;
}
}
// These are validated in validateExt, so an error will already have been
// thrown if these conditions don't hold.
- assert(max_offset >= min_offset);
- assert(max_offset >= min_length);
+ assert(expr.max_offset >= expr.min_offset);
+ assert(expr.max_offset >= expr.min_length);
// Since prefiltering and SOM aren't supported together, we must squash any
// min_length constraint as well.
- if (flags & HS_FLAG_PREFILTER && min_length) {
+ if (flags & HS_FLAG_PREFILTER && expr.min_length) {
DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n");
- min_length = 0;
+ expr.min_length = 0;
}
}
* \brief Dumps the parse tree to screen in debug mode and to disk in dump
* mode.
*/
-void dumpExpression(UNUSED const ParsedExpression &expr,
+void dumpExpression(UNUSED const ParsedExpression &pe,
UNUSED const char *stage, UNUSED const Grey &grey) {
#if defined(DEBUG)
- DEBUG_PRINTF("===== Rule ID: %u (internalID: %u) =====\n", expr.id,
- expr.index);
+ DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n",
+ pe.expr.report, pe.expr.index);
ostringstream debug_tree;
- dumpTree(debug_tree, expr.component.get());
+ dumpTree(debug_tree, pe.component.get());
printf("%s\n", debug_tree.str().c_str());
#endif // DEBUG
#if defined(DUMP_SUPPORT)
if (grey.dumpFlags & Grey::DUMP_PARSE) {
stringstream ss;
- ss << grey.dumpPath << "Expr_" << expr.index << "_componenttree_"
+ ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_"
<< stage << ".txt";
ofstream out(ss.str().c_str());
- out << "Component Tree for " << expr.id << endl;
- dumpTree(out, expr.component.get());
- if (expr.utf8) {
+ out << "Component Tree for " << pe.expr.report << endl;
+ dumpTree(out, pe.component.get());
+ if (pe.expr.utf8) {
out << "UTF8 mode" << endl;
}
}
/** \brief Run Component tree optimisations on \a expr. */
static
-void optimise(ParsedExpression &expr) {
- if (expr.min_length || expr.som) {
+void optimise(ParsedExpression &pe) {
+ if (pe.expr.min_length || pe.expr.som) {
return;
}
DEBUG_PRINTF("optimising\n");
- expr.component->optimise(true /* root is connected to sds */);
+ pe.component->optimise(true /* root is connected to sds */);
}
void addExpression(NG &ng, unsigned index, const char *expression,
// Do per-expression processing: errors here will result in an exception
// being thrown up to our caller
- ParsedExpression expr(index, expression, flags, id, ext);
- dumpExpression(expr, "orig", cc.grey);
+ ParsedExpression pe(index, expression, flags, id, ext);
+ dumpExpression(pe, "orig", cc.grey);
// Apply prefiltering transformations if desired.
- if (expr.prefilter) {
- prefilterTree(expr.component, ParseMode(flags));
- dumpExpression(expr, "prefiltered", cc.grey);
+ if (pe.expr.prefilter) {
+ prefilterTree(pe.component, ParseMode(flags));
+ dumpExpression(pe, "prefiltered", cc.grey);
}
// Expressions containing zero-width assertions and other extended pcre
// types aren't supported yet. This call will throw a ParseError exception
// if the component tree contains such a construct.
- checkUnsupported(*expr.component);
+ checkUnsupported(*pe.component);
- expr.component->checkEmbeddedStartAnchor(true);
- expr.component->checkEmbeddedEndAnchor(true);
+ pe.component->checkEmbeddedStartAnchor(true);
+ pe.component->checkEmbeddedEndAnchor(true);
if (cc.grey.optimiseComponentTree) {
- optimise(expr);
- dumpExpression(expr, "opt", cc.grey);
+ optimise(pe);
+ dumpExpression(pe, "opt", cc.grey);
}
DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n",
- expr.component.get(), expr.index, expr.id);
+ pe.component.get(), pe.expr.index, pe.expr.report);
// You can only use the SOM flags if you've also specified an SOM
// precision mode.
- if (expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
+ if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
throw CompileError("To use a SOM expression flag in streaming mode, "
"an SOM precision mode (e.g. "
"HS_MODE_SOM_HORIZON_LARGE) must be specified.");
// If this expression is a literal, we can feed it directly to Rose rather
// than building the NFA graph.
- if (shortcutLiteral(ng, expr)) {
+ if (shortcutLiteral(ng, pe)) {
DEBUG_PRINTF("took literal short cut\n");
return;
}
- unique_ptr<NGWrapper> g = buildWrapper(ng.rm, cc, expr);
-
- if (!g) {
+ auto built_expr = buildGraph(ng.rm, cc, pe);
+ if (!built_expr.g) {
DEBUG_PRINTF("NFA build failed on ID %u, but no exception was "
- "thrown.\n", expr.id);
+ "thrown.\n", pe.expr.report);
throw CompileError("Internal error.");
}
- if (!expr.allow_vacuous && matches_everywhere(*g)) {
+ auto &g = *built_expr.g;
+ if (!pe.expr.allow_vacuous && matches_everywhere(g)) {
throw CompileError("Pattern matches empty buffer; use "
"HS_FLAG_ALLOWEMPTY to enable support.");
}
- if (!ng.addGraph(*g)) {
- DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", expr.id);
+ if (!ng.addGraph(built_expr.expr, g)) {
+ DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report);
throw CompileError("Error compiling expression.");
}
}
}
#endif
-unique_ptr<NGWrapper> buildWrapper(ReportManager &rm, const CompileContext &cc,
- const ParsedExpression &expr) {
- assert(isSupported(*expr.component));
+BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc,
+ const ParsedExpression &pe) {
+ assert(isSupported(*pe.component));
- const unique_ptr<NFABuilder> builder = makeNFABuilder(rm, cc, expr);
+ const auto builder = makeNFABuilder(rm, cc, pe);
assert(builder);
// Set up START and ACCEPT states; retrieve the special states
- const auto bs = makeGlushkovBuildState(*builder, expr.prefilter);
+ const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter);
// Map position IDs to characters/components
- expr.component->notePositions(*bs);
+ pe.component->notePositions(*bs);
// Wire the start dotstar state to the firsts
- connectInitialStates(*bs, expr);
+ connectInitialStates(*bs, pe);
DEBUG_PRINTF("wire up body of expr\n");
// Build the rest of the FOLLOW set
vector<PositionInfo> initials = {builder->getStartDotStar(),
builder->getStart()};
- expr.component->buildFollowSet(*bs, initials);
+ pe.component->buildFollowSet(*bs, initials);
// Wire the lasts to the accept state
- connectFinalStates(*bs, expr);
+ connectFinalStates(*bs, pe);
// Create our edges
bs->buildEdges();
- auto g = builder->getGraph();
- assert(g);
+ BuiltExpression built_expr = builder->getGraph();
+ assert(built_expr.g);
- dumpDotWrapper(*g, "00_before_asserts", cc.grey);
- removeAssertVertices(rm, *g);
+ dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts",
+ cc.grey);
+ removeAssertVertices(rm, *built_expr.g, built_expr.expr);
- return g;
+ return built_expr;
}
} // namespace ue2
#include "ue2common.h"
#include "database.h"
+#include "compiler/expression_info.h"
#include "parser/Component.h"
-#include "som/som.h"
#include <memory>
#include <boost/core/noncopyable.hpp>
struct Grey;
struct target_t;
class NG;
+class NGHolder;
class ReportManager;
-class NGWrapper;
-/** Class gathering together the pieces of a parsed expression.
- * Note: Owns the provided component.
- */
+/** \brief Class gathering together the pieces of a parsed expression. */
class ParsedExpression : boost::noncopyable {
public:
ParsedExpression(unsigned index, const char *expression, unsigned flags,
- ReportID actionId, const hs_expr_ext *ext = nullptr);
-
- bool utf8; //!< UTF-8 mode flag specified
+ ReportID report, const hs_expr_ext *ext = nullptr);
- /** \brief root node of parsed component tree. */
- std::unique_ptr<ue2::Component> component;
+ /** \brief Expression information (from flags, extparam etc) */
+ ExpressionInfo expr;
- const bool allow_vacuous; //!< HS_FLAG_ALLOWEMPTY specified
- const bool highlander; //!< HS_FLAG_SINGLEMATCH specified
- const bool prefilter; //!< HS_FLAG_PREFILTER specified
- som_type som; //!< chosen SOM mode, or SOM_NONE
+ /** \brief Root node of parsed component tree. */
+ std::unique_ptr<Component> component;
+};
- /** \brief index in expressions array passed to \ref hs_compile_multi */
- const unsigned index;
+/**
+ * \brief Class gathering together the pieces of an expression that has been
+ * built into an NFA graph.
+ */
+struct BuiltExpression {
+ /** \brief Expression information (from flags, extparam etc) */
+ ExpressionInfo expr;
- const ReportID id; //!< user-specified pattern ID
- u64a min_offset; //!< 0 if not used
- u64a max_offset; //!< MAX_OFFSET if not used
- u64a min_length; //!< 0 if not used
- u32 edit_distance; //!< 0 if not used
+ /** \brief Built Glushkov NFA graph. */
+ std::unique_ptr<NGHolder> g;
};
/**
* @param ext
* Struct containing extra parameters for this expression, or NULL if
* none.
- * @param actionId
+ * @param report
* The identifier to associate with the expression; returned by engine on
* match.
*/
void addExpression(NG &ng, unsigned index, const char *expression,
- unsigned flags, const hs_expr_ext *ext, ReportID actionId);
+ unsigned flags, const hs_expr_ext *ext, ReportID report);
/**
* Build a Hyperscan database out of the expressions we've been given. A
* @return
* nullptr on error.
*/
-std::unique_ptr<NGWrapper> buildWrapper(ReportManager &rm,
- const CompileContext &cc,
- const ParsedExpression &expr);
+BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc,
+ const ParsedExpression &expr);
/**
* Build a platform_t out of a target_t.
--- /dev/null
+/*
+ * Copyright (c) 2017, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * \brief ExpressionInfo class for storing the properties of an expression.
+ */
+
+#ifndef COMPILER_EXPRESSION_INFO_H
+#define COMPILER_EXPRESSION_INFO_H
+
+#include "ue2common.h"
+#include "som/som.h"
+
+namespace ue2 {
+
+/** \brief Properties of an expression. */
+class ExpressionInfo {
+public:
+ ExpressionInfo(unsigned int index_in, bool allow_vacuous_in,
+ bool highlander_in, bool utf8_in, bool prefilter_in,
+ som_type som_in, ReportID report_in, u64a min_offset_in,
+ u64a max_offset_in, u64a min_length_in, u32 edit_distance_in)
+ : index(index_in), report(report_in), allow_vacuous(allow_vacuous_in),
+ highlander(highlander_in), utf8(utf8_in), prefilter(prefilter_in),
+ som(som_in), min_offset(min_offset_in), max_offset(max_offset_in),
+ min_length(min_length_in), edit_distance(edit_distance_in) {}
+
+ /**
+ * \brief Index of the expression represented by this graph.
+ *
+ * Used:
+ * - down the track in error handling;
+ * - for identifying parts of an expression in highlander mode.
+ */
+ unsigned int index;
+
+ /** \brief Report ID specified by the user. */
+ ReportID report;
+
+ /** \brief Vacuous pattern is allowed. (HS_FLAG_ALLOWEMPTY) */
+ bool allow_vacuous;
+
+ /** \brief "Highlander" (single match) pattern. (HS_FLAG_SINGLEMATCH) */
+ bool highlander;
+
+ /** \brief UTF-8 pattern. (HS_FLAG_UTF8) */
+ bool utf8;
+
+ /** \brief Prefiltering pattern. (HS_FLAG_PREFILTER) */
+ bool prefilter;
+
+ /** \brief Start-of-match type requested, or SOM_NONE. */
+ som_type som;
+
+ /** \brief Minimum match offset extended parameter. 0 if not used. */
+ u64a min_offset;
+
+ /**
+ * \brief Maximum match offset extended parameter.
+ * MAX_OFFSET if not used.
+ */
+ u64a max_offset;
+
+ /** \brief Minimum match length extended parameter. 0 if not used. */
+ u64a min_length;
+
+ /**
+ * \brief Approximate matching edit distance extended parameter.
+ * 0 if not used.
+ */
+ u32 edit_distance;
+};
+
+}
+
+#endif // COMPILER_EXPRESSION_INFO_H
assert(pe.component);
// Apply prefiltering transformations if desired.
- if (pe.prefilter) {
+ if (pe.expr.prefilter) {
prefilterTree(pe.component, ParseMode(flags));
}
- unique_ptr<NGWrapper> g = buildWrapper(rm, cc, pe);
+ auto built_expr = buildGraph(rm, cc, pe);
+ unique_ptr<NGHolder> &g = built_expr.g;
+ ExpressionInfo &expr = built_expr.expr;
if (!g) {
DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n");
}
// validate graph's suitability for fuzzing
- validate_fuzzy_compile(*g, g->edit_distance, g->utf8, cc.grey);
+ validate_fuzzy_compile(*g, expr.edit_distance, expr.utf8, cc.grey);
// fuzz graph - this must happen before any transformations are made
- make_fuzzy(*g, g->edit_distance, cc.grey);
+ make_fuzzy(*g, expr.edit_distance, cc.grey);
- handleExtendedParams(rm, *g, cc);
- fillExpressionInfo(rm, *g, &local_info);
+ handleExtendedParams(rm, *g, expr, cc);
+ fillExpressionInfo(rm, *g, expr, &local_info);
}
catch (const CompileError &e) {
// Compiler error occurred
*/
/** \file
- * \brief NG, NGHolder, NGWrapper and graph handling.
+ * \brief NG and graph handling.
*/
-#include "grey.h"
#include "ng.h"
+
+#include "grey.h"
#include "ng_anchored_acyclic.h"
#include "ng_anchored_dots.h"
#include "ng_asserts.h"
#include "ng_util.h"
#include "ng_width.h"
#include "ue2common.h"
+#include "compiler/compiler.h"
#include "nfa/goughcompile.h"
#include "rose/rose_build.h"
#include "smallwrite/smallwrite_build.h"
* \throw CompileError if SOM cannot be supported for the component.
*/
static
-bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w,
+bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
const som_type som, const u32 comp_id) {
DEBUG_PRINTF("doing som\n");
- dumpComponent(g, "03_presom", w.expressionIndex, comp_id, ng.cc.grey);
+ dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey);
assert(hasCorrectlyNumberedVertices(g));
- assert(allMatchStatesHaveReports(w));
+ assert(allMatchStatesHaveReports(g));
// First, we try the "SOM chain" support in ng_som.cpp.
- sombe_rv rv = doSom(ng, g, w, comp_id, som);
+ sombe_rv rv = doSom(ng, g, expr, comp_id, som);
if (rv == SOMBE_HANDLED_INTERNAL) {
return false;
} else if (rv == SOMBE_HANDLED_ALL) {
assert(rv == SOMBE_FAIL);
/* Next, Sombe style approaches */
- rv = doSomWithHaig(ng, g, w, comp_id, som);
+ rv = doSomWithHaig(ng, g, expr, comp_id, som);
if (rv == SOMBE_HANDLED_INTERNAL) {
return false;
} else if (rv == SOMBE_HANDLED_ALL) {
vector<vector<CharReach> > triggers; /* empty for outfix */
assert(g.kind == NFA_OUTFIX);
- dumpComponent(g, "haig", w.expressionIndex, comp_id, ng.cc.grey);
+ dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey);
makeReportsSomPass(ng.rm, g);
auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers,
ng.cc.grey);
/* Our various strategies for supporting SOM for this pattern have failed.
* Provide a generic pattern not supported/too large return value as it is
* unclear what the meaning of a specific SOM error would be */
- throw CompileError(w.expressionIndex, "Pattern is too large.");
+ throw CompileError(expr.index, "Pattern is too large.");
assert(0); // unreachable
return false;
}
static
-bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
- const u32 comp_id) {
+bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
+ const som_type som, const u32 comp_id) {
const CompileContext &cc = ng.cc;
assert(hasCorrectlyNumberedVertices(g));
DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n",
- w.expressionIndex, comp_id, num_vertices(g), num_edges(g));
+ expr.index, comp_id, num_vertices(g), num_edges(g));
- dumpComponent(g, "01_begin", w.expressionIndex, comp_id, ng.cc.grey);
+ dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey);
- assert(allMatchStatesHaveReports(w));
+ assert(allMatchStatesHaveReports(g));
- reduceGraph(g, som, w.utf8, cc);
+ reduceGraph(g, som, expr.utf8, cc);
- dumpComponent(g, "02_reduced", w.expressionIndex, comp_id, ng.cc.grey);
+ dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey);
// There may be redundant regions that we can remove
if (cc.grey.performGraphSimplification) {
// Start Of Match handling.
if (som) {
- if (addComponentSom(ng, g, w, som, comp_id)) {
+ if (addComponentSom(ng, g, expr, som, comp_id)) {
return true;
}
}
- assert(allMatchStatesHaveReports(w));
+ assert(allMatchStatesHaveReports(g));
if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) {
return true;
return true;
}
- if (doViolet(*ng.rose, g, w.prefilter, false, ng.rm, cc)) {
+ if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) {
return true;
}
- if (splitOffPuffs(*ng.rose, ng.rm, g, w.prefilter, cc)) {
+ if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) {
return true;
}
return true;
}
- if (doViolet(*ng.rose, g, w.prefilter, true, ng.rm, cc)) {
+ if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) {
return true;
}
// Returns true if all components have been added.
static
-bool processComponents(NG &ng, NGWrapper &w,
+bool processComponents(NG &ng, ExpressionInfo &expr,
deque<unique_ptr<NGHolder>> &g_comp,
const som_type som) {
const u32 num_components = g_comp.size();
if (!g_comp[i]) {
continue;
}
- if (addComponent(ng, *g_comp[i], w, som, i)) {
+ if (addComponent(ng, *g_comp[i], expr, som, i)) {
g_comp[i].reset();
continue;
}
return false;
}
-bool NG::addGraph(NGWrapper &w) {
+bool NG::addGraph(ExpressionInfo &expr, NGHolder &g) {
// remove reports that aren't on vertices connected to accept.
- clearReports(w);
+ clearReports(g);
- som_type som = w.som;
- if (som && isVacuous(w)) {
- throw CompileError(w.expressionIndex, "Start of match is not "
+ som_type som = expr.som;
+ if (som && isVacuous(g)) {
+ throw CompileError(expr.index, "Start of match is not "
"currently supported for patterns which match an "
"empty buffer.");
}
- dumpDotWrapper(w, "01_initial", cc.grey);
- assert(allMatchStatesHaveReports(w));
+ dumpDotWrapper(g, expr, "01_initial", cc.grey);
+ assert(allMatchStatesHaveReports(g));
/* ensure utf8 starts at cp boundary */
- ensureCodePointStart(rm, w);
+ ensureCodePointStart(rm, g, expr);
- if (can_never_match(w)) {
- throw CompileError(w.expressionIndex, "Pattern can never match.");
+ if (can_never_match(g)) {
+ throw CompileError(expr.index, "Pattern can never match.");
}
// validate graph's suitability for fuzzing before resolving asserts
- validate_fuzzy_compile(w, w.edit_distance, w.utf8, cc.grey);
+ validate_fuzzy_compile(g, expr.edit_distance, expr.utf8, cc.grey);
- resolveAsserts(rm, w);
- dumpDotWrapper(w, "02_post_assert_resolve", cc.grey);
- assert(allMatchStatesHaveReports(w));
+ resolveAsserts(rm, g, expr);
+ dumpDotWrapper(g, expr, "02_post_assert_resolve", cc.grey);
+ assert(allMatchStatesHaveReports(g));
- make_fuzzy(w, w.edit_distance, cc.grey);
- dumpDotWrapper(w, "02a_post_fuzz", cc.grey);
+ make_fuzzy(g, expr.edit_distance, cc.grey);
+ dumpDotWrapper(g, expr, "02a_post_fuzz", cc.grey);
- pruneUseless(w);
- pruneEmptyVertices(w);
+ pruneUseless(g);
+ pruneEmptyVertices(g);
- if (can_never_match(w)) {
- throw CompileError(w.expressionIndex, "Pattern can never match.");
+ if (can_never_match(g)) {
+ throw CompileError(expr.index, "Pattern can never match.");
}
- optimiseVirtualStarts(w); /* good for som */
+ optimiseVirtualStarts(g); /* good for som */
- handleExtendedParams(rm, w, cc);
- if (w.min_length) {
+ handleExtendedParams(rm, g, expr, cc);
+ if (expr.min_length) {
// We have a minimum length constraint, which we currently use SOM to
// satisfy.
som = SOM_LEFT;
// first, we can perform graph work that can be done on an individual
// expression basis.
- if (w.utf8) {
- relaxForbiddenUtf8(w);
+ if (expr.utf8) {
+ relaxForbiddenUtf8(g, expr);
}
- if (w.highlander && !w.min_length && !w.min_offset) {
+ if (expr.highlander && !expr.min_length && !expr.min_offset) {
// In highlander mode: if we don't have constraints on our reports that
// may prevent us accepting our first match (i.e. extended params) we
// can prune the other out-edges of all vertices connected to accept.
- pruneHighlanderAccepts(w, rm);
+ pruneHighlanderAccepts(g, rm);
}
- dumpDotWrapper(w, "02b_fairly_early", cc.grey);
+ dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey);
// If we're a vacuous pattern, we can handle this early.
- if (splitOffVacuous(boundary, rm, w)) {
+ if (splitOffVacuous(boundary, rm, g, expr)) {
DEBUG_PRINTF("split off vacuous\n");
}
// We might be done at this point: if we've run out of vertices, we can
// stop processing.
- if (num_vertices(w) == N_SPECIALS) {
+ if (num_vertices(g) == N_SPECIALS) {
DEBUG_PRINTF("all vertices claimed by vacuous handling\n");
return true;
}
// Now that vacuous edges have been removed, update the min width exclusive
// of boundary reports.
- minWidth = min(minWidth, findMinWidth(w));
+ minWidth = min(minWidth, findMinWidth(g));
// Add the pattern to the small write builder.
- smwr->add(w);
+ smwr->add(g, expr);
if (!som) {
- removeSiblingsOfStartDotStar(w);
+ removeSiblingsOfStartDotStar(g);
}
- dumpDotWrapper(w, "03_early", cc.grey);
+ dumpDotWrapper(g, expr, "03_early", cc.grey);
// Perform a reduction pass to merge sibling character classes together.
if (cc.grey.performGraphSimplification) {
- removeRedundancy(w, som);
- prunePathsRedundantWithSuccessorOfCyclics(w, som);
+ removeRedundancy(g, som);
+ prunePathsRedundantWithSuccessorOfCyclics(g, som);
}
- dumpDotWrapper(w, "04_reduced", cc.grey);
+ dumpDotWrapper(g, expr, "04_reduced", cc.grey);
// If we've got some literals that span the graph from start to accept, we
// can split them off into Rose from here.
if (!som) {
- if (splitOffLiterals(*this, w)) {
+ if (splitOffLiterals(*this, g)) {
DEBUG_PRINTF("some vertices claimed by literals\n");
}
}
// We might be done at this point: if we've run out of vertices, we can
// stop processing.
- if (num_vertices(w) == N_SPECIALS) {
+ if (num_vertices(g) == N_SPECIALS) {
DEBUG_PRINTF("all vertices claimed before calc components\n");
return true;
}
// Split the graph into a set of connected components.
- deque<unique_ptr<NGHolder>> g_comp = calcComponents(w);
+ deque<unique_ptr<NGHolder>> g_comp = calcComponents(g);
assert(!g_comp.empty());
if (!som) {
recalcComponents(g_comp);
}
- if (processComponents(*this, w, g_comp, som)) {
+ if (processComponents(*this, expr, g_comp, som)) {
return true;
}
// If we're in prefiltering mode, we can run the prefilter reductions and
// have another shot at accepting the graph.
- if (cc.grey.prefilterReductions && w.prefilter) {
+ if (cc.grey.prefilterReductions && expr.prefilter) {
for (u32 i = 0; i < g_comp.size(); i++) {
if (!g_comp[i]) {
continue;
prefilterReductions(*g_comp[i], cc);
}
- if (processComponents(*this, w, g_comp, som)) {
+ if (processComponents(*this, expr, g_comp, som)) {
return true;
}
}
if (g_comp[i]) {
DEBUG_PRINTF("could not compile component %u with %zu vertices\n",
i, num_vertices(*g_comp[i]));
- throw CompileError(w.expressionIndex, "Pattern is too large.");
+ throw CompileError(expr.index, "Pattern is too large.");
}
}
}
/** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */
-bool NG::addHolder(NGHolder &w) {
- DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(w));
- assert(allMatchStatesHaveReports(w));
- assert(hasCorrectlyNumberedVertices(w));
+bool NG::addHolder(NGHolder &g) {
+ DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g));
+ assert(allMatchStatesHaveReports(g));
+ assert(hasCorrectlyNumberedVertices(g));
/* We don't update the global minWidth here as we care about the min width
* of the whole pattern - not a just a prefix of it. */
bool prefilter = false;
- //dumpDotComp(comp, w, *this, 20, "prefix_init");
+ //dumpDotComp(comp, g, *this, 20, "prefix_init");
som_type som = SOM_NONE; /* the prefixes created by the SOM code do not
themselves track som */
bool utf8 = false; // handling done earlier
- reduceGraph(w, som, utf8, cc);
+ reduceGraph(g, som, utf8, cc);
// There may be redundant regions that we can remove
if (cc.grey.performGraphSimplification) {
- removeRegionRedundancy(w, som);
+ removeRegionRedundancy(g, som);
}
// "Short Exhaustible Passthrough" patterns always become outfixes.
- if (isSEP(w, rm, cc.grey)) {
+ if (isSEP(g, rm, cc.grey)) {
DEBUG_PRINTF("graph is SEP\n");
- if (rose->addOutfix(w)) {
+ if (rose->addOutfix(g)) {
return true;
}
}
- if (splitOffAnchoredAcyclic(*rose, w, cc)) {
+ if (splitOffAnchoredAcyclic(*rose, g, cc)) {
return true;
}
- if (handleSmallLiteralSets(*rose, w, cc)
- || handleFixedWidth(*rose, w, cc.grey)) {
+ if (handleSmallLiteralSets(*rose, g, cc)
+ || handleFixedWidth(*rose, g, cc.grey)) {
return true;
}
- if (handleDecoratedLiterals(*rose, w, cc)) {
+ if (handleDecoratedLiterals(*rose, g, cc)) {
return true;
}
- if (doViolet(*rose, w, prefilter, false, rm, cc)) {
+ if (doViolet(*rose, g, prefilter, false, rm, cc)) {
return true;
}
- if (splitOffPuffs(*rose, rm, w, prefilter, cc)) {
+ if (splitOffPuffs(*rose, rm, g, prefilter, cc)) {
return true;
}
- if (doViolet(*rose, w, prefilter, true, rm, cc)) {
+ if (doViolet(*rose, g, prefilter, true, rm, cc)) {
return true;
}
DEBUG_PRINTF("trying for outfix\n");
- if (rose->addOutfix(w)) {
+ if (rose->addOutfix(g)) {
DEBUG_PRINTF("ok\n");
return true;
}
return true;
}
-NGWrapper::NGWrapper(unsigned int ei, bool highlander_in, bool utf8_in,
- bool prefilter_in, som_type som_in, ReportID r,
- u64a min_offset_in, u64a max_offset_in, u64a min_length_in,
- u32 edit_distance_in)
- : expressionIndex(ei), reportId(r), highlander(highlander_in),
- utf8(utf8_in), prefilter(prefilter_in), som(som_in),
- min_offset(min_offset_in), max_offset(max_offset_in),
- min_length(min_length_in), edit_distance(edit_distance_in) {
- // All special nodes/edges are added in NGHolder's constructor.
- DEBUG_PRINTF("built %p: expr=%u report=%u%s%s%s%s "
- "min_offset=%llu max_offset=%llu min_length=%llu "
- "edit_distance=%u\n",
- this, expressionIndex, reportId,
- highlander ? " highlander" : "",
- utf8 ? " utf8" : "",
- prefilter ? " prefilter" : "",
- (som != SOM_NONE) ? " som" : "",
- min_offset, max_offset, min_length, edit_distance);
-}
-
-NGWrapper::~NGWrapper() {}
-
} // namespace ue2
*/
/** \file
- * \brief NG, NGHolder, NGWrapper declarations.
+ * \brief NG declaration.
*/
#ifndef NG_H
struct CompileContext;
struct ue2_literal;
-class NGWrapper : public NGHolder {
-public:
- NGWrapper(unsigned int expressionIndex, bool highlander, bool utf8,
- bool prefilter, const som_type som, ReportID rid, u64a min_offset,
- u64a max_offset, u64a min_length, u32 edit_distance);
-
- ~NGWrapper() override;
-
- /** index of the expression represented by this graph, used
- * - down the track in error handling
- * - identifying parts of an expression in highlander mode
- */
- const unsigned int expressionIndex;
-
- const ReportID reportId; /**< user-visible report id */
- const bool highlander; /**< user-specified single match only */
- const bool utf8; /**< UTF-8 mode */
- const bool prefilter; /**< prefiltering mode */
- const som_type som; /**< SOM type requested */
- u64a min_offset; /**< extparam min_offset value */
- u64a max_offset; /**< extparam max_offset value */
- u64a min_length; /**< extparam min_length value */
- u32 edit_distance; /**< extparam edit_distance value */
-};
-
+class ExpressionInfo;
class RoseBuild;
class SmallWriteBuild;
/** \brief Consumes a pattern, returns false or throws a CompileError
* exception if the graph cannot be consumed. */
- bool addGraph(NGWrapper &w);
+ bool addGraph(ExpressionInfo &expr, NGHolder &h);
/** \brief Consumes a graph, cut-down version of addGraph for use by SOM
* processing. */
bool addHolder(NGHolder &h);
- /** \brief Adds a literal to Rose, used by literal shortcut passes (instead of
- * using \ref addGraph) */
+ /** \brief Adds a literal to Rose, used by literal shortcut passes (instead
+ * of using \ref addGraph) */
bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report,
bool highlander, som_type som);
*
* Shared with the small write compiler.
*/
-void reduceGraph(NGHolder &g, som_type som, bool utf8, const CompileContext &cc);
+void reduceGraph(NGHolder &g, som_type som, bool utf8,
+ const CompileContext &cc);
} // namespace ue2
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "ng_prune.h"
#include "ng_redundancy.h"
#include "ng_util.h"
+#include "compiler/compiler.h"
#include "parser/position.h" // for POS flags
#include "util/bitutils.h" // for findAndClearLSB_32
#include "util/boundary_reports.h"
}
static
-void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) {
+void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
+ NFAVertex v, s32 adj) {
// Don't try and set the report ID of a special vertex.
assert(!is_special(v, g));
// If there's a report set already, we're replacing it.
g[v].reports.clear();
- Report ir = rm.getBasicInternalReport(g, adj);
+ Report ir = rm.getBasicInternalReport(expr, adj);
g[v].reports.insert(rm.getInternalId(ir));
DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
}
static
-NFAVertex makeClone(ReportManager &rm, NGWrapper &g, NFAVertex v,
- const CharReach &cr_mask) {
+NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
+ NFAVertex v, const CharReach &cr_mask) {
NFAVertex clone = clone_vertex(g, v);
g[clone].char_reach &= cr_mask;
clone_out_edges(g, v, clone);
clone_in_edges(g, v, clone);
if (v == g.startDs) {
- if (g.utf8) {
+ if (expr.utf8) {
g[clone].char_reach &= ~UTF_START_CR;
}
DEBUG_PRINTF("marked as virt\n");
g[clone].assert_flags = POS_FLAG_VIRTUAL_START;
- setReportId(rm, g, clone, 0);
+ setReportId(rm, g, expr, clone, 0);
}
return clone;
}
static
-void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) {
+void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
+ NFAVertex v, bool ucp) {
assert(v != g.start);
assert(v != g.accept);
assert(v != g.acceptEod);
auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; };
// Split v into word/nonword vertices with only asserting out-edges.
- NFAVertex w_out = makeClone(rm, g, v, cr_word);
- NFAVertex nw_out = makeClone(rm, g, v, cr_nonword);
+ NFAVertex w_out = makeClone(rm, g, expr, v, cr_word);
+ NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword);
remove_out_edge_if(w_out, has_no_assert, g);
remove_out_edge_if(nw_out, has_no_assert, g);
// Split v into word/nonword vertices with only asserting in-edges.
- NFAVertex w_in = makeClone(rm, g, v, cr_word);
- NFAVertex nw_in = makeClone(rm, g, v, cr_nonword);
+ NFAVertex w_in = makeClone(rm, g, expr, v, cr_word);
+ NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword);
remove_in_edge_if(w_in, has_no_assert, g);
remove_in_edge_if(nw_in, has_no_assert, g);
}
static
-void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
+void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
+ set<NFAEdge> *dead) {
for (const auto &e : edges_range(g)) {
u32 flags = g[e].assert_flags;
if (!flags) {
} else if (v_w) {
/* need to add a word byte */
NFAVertex vv = add_vertex(g);
- setReportId(rm, g, vv, -1);
+ setReportId(rm, g, expr, vv, -1);
g[vv].char_reach = CHARREACH_WORD;
add_edge(vv, g.accept, g);
g[e].assert_flags = 0;
} else {
/* need to add a non word byte or see eod */
NFAVertex vv = add_vertex(g);
- setReportId(rm, g, vv, -1);
+ setReportId(rm, g, expr, vv, -1);
g[vv].char_reach = CHARREACH_NONWORD;
add_edge(vv, g.accept, g);
g[e].assert_flags = 0;
} else if (v_w) {
/* need to add a word byte */
NFAVertex vv = add_vertex(g);
- setReportId(rm, g, vv, -1);
+ setReportId(rm, g, expr, vv, -1);
g[vv].char_reach = CHARREACH_WORD_UCP_PRE;
add_edge(vv, g.accept, g);
g[e].assert_flags = 0;
} else {
/* need to add a non word byte or see eod */
NFAVertex vv = add_vertex(g);
- setReportId(rm, g, vv, -1);
+ setReportId(rm, g, expr, vv, -1);
g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE;
add_edge(vv, g.accept, g);
g[e].assert_flags = 0;
}
}
-void resolveAsserts(ReportManager &rm, NGWrapper &g) {
+void resolveAsserts(ReportManager &rm, NGHolder &g,
+ const ExpressionInfo &expr) {
vector<NFAEdge> asserts = getAsserts(g);
if (asserts.empty()) {
return;
map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */
findSplitters(g, asserts, &to_split, &to_split_ucp);
if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) {
- throw CompileError(g.expressionIndex, "Pattern is too large.");
+ throw CompileError(expr.index, "Pattern is too large.");
}
for (const auto &m : to_split) {
assert(!contains(to_split_ucp, m.first));
- splitVertex(rm, g, m.second, false);
+ splitVertex(rm, g, expr, m.second, false);
}
for (const auto &m : to_split_ucp) {
- splitVertex(rm, g, m.second, true);
+ splitVertex(rm, g, expr, m.second, true);
}
set<NFAEdge> dead;
- resolveEdges(rm, g, &dead);
+ resolveEdges(rm, g, expr, &dead);
remove_edges(dead, g);
renumber_vertices(g);
clearReports(g);
}
-void ensureCodePointStart(ReportManager &rm, NGWrapper &g) {
+void ensureCodePointStart(ReportManager &rm, NGHolder &g,
+ const ExpressionInfo &expr) {
/* In utf8 mode there is an implicit assertion that we start at codepoint
* boundaries. Assert resolution handles the badness coming from asserts.
* The only other source of trouble is startDs->accept connections.
*/
NFAEdge orig = edge(g.startDs, g.accept, g);
- if (g.utf8 && orig) {
- DEBUG_PRINTF("rectifying %u\n", g.reportId);
- Report ir = rm.getBasicInternalReport(g);
+ if (expr.utf8 && orig) {
+ DEBUG_PRINTF("rectifying %u\n", expr.report);
+ Report ir = rm.getBasicInternalReport(expr);
ReportID rep = rm.getInternalId(ir);
NFAVertex v_a = add_vertex(g);
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
namespace ue2 {
struct BoundaryReports;
-class NGWrapper;
+class ExpressionInfo;
+class NGHolder;
class ReportManager;
-void resolveAsserts(ReportManager &rm, NGWrapper &g);
+void resolveAsserts(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr);
-void ensureCodePointStart(ReportManager &rm, NGWrapper &g);
+void ensureCodePointStart(ReportManager &rm, NGHolder &g,
+ const ExpressionInfo &expr);
} // namespace ue2
/** \file
* \brief: NFA Graph Builder: used by Glushkov construction to construct an
- * NGWrapper from a parsed expression.
+ * NGHolder from a parsed expression.
*/
+
+#include "ng_builder.h"
+
#include "grey.h"
#include "ng.h"
-#include "ng_builder.h"
#include "ng_util.h"
#include "ue2common.h"
#include "compiler/compiler.h" // for ParsedExpression
void cloneRegion(Position first, Position last,
unsigned posOffset) override;
- unique_ptr<NGWrapper> getGraph() override;
+ BuiltExpression getGraph() override;
private:
/** fetch a vertex given its Position ID. */
/** \brief Greybox: used for resource limits. */
const Grey &grey;
- /** \brief Underlying NGWrapper graph. */
- unique_ptr<NGWrapper> graph;
+ /** \brief Underlying graph. */
+ unique_ptr<NGHolder> graph;
+
+ /** \brief Underlying expression info. */
+ ExpressionInfo expr;
/** \brief mapping from position to vertex. Use \ref getVertex for access.
* */
} // namespace
NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in,
- const ParsedExpression &expr)
- : rm(rm_in), grey(grey_in),
- graph(ue2::make_unique<NGWrapper>(
- expr.index, expr.highlander, expr.utf8, expr.prefilter, expr.som,
- expr.id, expr.min_offset, expr.max_offset, expr.min_length,
- expr.edit_distance)),
- vertIdx(N_SPECIALS) {
+ const ParsedExpression &parsed)
+ : rm(rm_in), grey(grey_in), graph(ue2::make_unique<NGHolder>()),
+ expr(parsed.expr), vertIdx(N_SPECIALS) {
// Reserve space for a reasonably-sized NFA
id2vertex.reserve(64);
(*graph)[v].index = pos;
}
-unique_ptr<NGWrapper> NFABuilderImpl::getGraph() {
+BuiltExpression NFABuilderImpl::getGraph() {
DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n",
num_vertices(*graph), num_edges(*graph));
throw CompileError("Pattern too large.");
}
- return move(graph);
+ return { expr, move(graph) };
}
void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) {
- Report ir = rm.getBasicInternalReport(*graph, offsetAdjust);
+ Report ir = rm.getBasicInternalReport(expr, offsetAdjust);
DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n",
- pos, graph->reportId, offsetAdjust, ir.ekey);
+ pos, expr.report, offsetAdjust, ir.ekey);
NFAVertex v = getVertex(pos);
auto &reports = (*graph)[v].reports;
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
/** \file
* \brief: NFA Graph Builder: used by Glushkov construction to construct an
- * NGWrapper from a parsed expression.
+ * NGHolder from a parsed expression.
*/
#ifndef NG_BUILDER_H
namespace ue2 {
class CharReach;
-class NGWrapper;
class ReportManager;
+struct BuiltExpression;
struct CompileContext;
class ParsedExpression;
unsigned posOffset) = 0;
/**
- * \brief Returns the built NGWrapper graph.
+ * \brief Returns the built NGHolder graph and ExpressionInfo.
* Note that this builder cannot be used after this call.
*/
- virtual std::unique_ptr<NGWrapper> getGraph() = 0;
+ virtual BuiltExpression getGraph() = 0;
};
/** Construct a usable NFABuilder. */
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "config.h"
-#include "ng_dump.h"
+#include "nfagraph/ng_dump.h"
-#include "hwlm/hwlm_build.h"
-#include "ng.h"
-#include "ng_util.h"
-#include "parser/position.h"
+#include "hs_compile.h" /* for HS_MODE_* flags */
#include "ue2common.h"
+#include "compiler/compiler.h"
+#include "hwlm/hwlm_build.h"
#include "nfa/accel.h"
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
-#include "smallwrite/smallwrite_dump.h"
+#include "nfagraph/ng.h"
+#include "nfagraph/ng_util.h"
+#include "parser/position.h"
#include "rose/rose_build.h"
#include "rose/rose_internal.h"
+#include "smallwrite/smallwrite_dump.h"
#include "util/bitutils.h"
#include "util/dump_charclass.h"
#include "util/report.h"
#include "util/report_manager.h"
#include "util/ue2string.h"
-#include "hs_compile.h" /* for HS_MODE_* flags */
#include <cmath>
#include <fstream>
// manual instantiation of templated dumpGraph above.
template void dumpGraphImpl(const char *, const NGHolder &);
-void dumpDotWrapperImpl(const NGWrapper &nw, const char *name,
- const Grey &grey) {
+void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr,
+ const char *name, const Grey &grey) {
if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) {
stringstream ss;
- ss << grey.dumpPath << "Expr_" << nw.expressionIndex << "_" << name << ".dot";
+ ss << grey.dumpPath << "Expr_" << expr.index << "_" << name << ".dot";
DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str());
- dumpGraphImpl(ss.str().c_str(), nw);
+ dumpGraphImpl(ss.str().c_str(), g);
}
}
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
class NGHolder;
class NG;
-class NGWrapper;
+class ExpressionInfo;
class ReportManager;
// Implementations for stubs below -- all have the suffix "Impl".
template <typename GraphT>
void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm);
-void dumpDotWrapperImpl(const NGWrapper &w, const char *name, const Grey &grey);
+void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr,
+ const char *name, const Grey &grey);
void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp,
const Grey &grey);
// Stubs which call through to dump code if compiled in.
UNUSED static inline
-void dumpDotWrapper(UNUSED const NGWrapper &w, UNUSED const char *name,
- UNUSED const Grey &grey) {
+void dumpDotWrapper(UNUSED const NGHolder &g, UNUSED const ExpressionInfo &expr,
+ UNUSED const char *name, UNUSED const Grey &grey) {
#ifdef DUMP_SUPPORT
- dumpDotWrapperImpl(w, name, grey);
+ dumpDotWrapperImpl(g, expr, name, grey);
#endif
}
*/
/** \file
- * \brief Code for discovering properties of an NGWrapper used by
- * hs_expression_info.
+ * \brief Code for discovering properties of an NFA graph used by
+ * hs_expression_info().
*/
#include "ng_expr_info.h"
/* get rid of leading \b and multiline ^ vertices */
static
-void removeLeadingVirtualVerticesFromRoot(NGWrapper &w, NFAVertex root) {
+void removeLeadingVirtualVerticesFromRoot(NGHolder &g, NFAVertex root) {
vector<NFAVertex> victims;
- for (auto v : adjacent_vertices_range(root, w)) {
- if (w[v].assert_flags & POS_FLAG_VIRTUAL_START) {
+ for (auto v : adjacent_vertices_range(root, g)) {
+ if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n");
victims.push_back(v);
}
}
for (auto u : victims) {
- for (auto v : adjacent_vertices_range(u, w)) {
- add_edge_if_not_present(root, v, w);
+ for (auto v : adjacent_vertices_range(u, g)) {
+ add_edge_if_not_present(root, v, g);
}
}
- remove_vertices(victims, w);
+ remove_vertices(victims, g);
}
static
-void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v,
+void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v,
const vector<DepthMinMax> &depths, DepthMinMax &info) {
- if (is_any_accept(v, w)) {
+ if (is_any_accept(v, g)) {
return;
}
- if (is_any_start(v, w)) {
+ if (is_any_start(v, g)) {
info.min = 0;
info.max = max(info.max, depth(0));
return;
}
- u32 idx = w[v].index;
+ u32 idx = g[v].index;
assert(idx < depths.size());
const DepthMinMax &d = depths.at(idx);
- for (ReportID report_id : w[v].reports) {
+ for (ReportID report_id : g[v].reports) {
const Report &report = rm.getReport(report_id);
assert(report.type == EXTERNAL_CALLBACK);
rd.max = min(rd.max, max_offset);
}
- DEBUG_PRINTF("vertex %zu report %u: %s\n", w[v].index, report_id,
+ DEBUG_PRINTF("vertex %zu report %u: %s\n", g[v].index, report_id,
rd.str().c_str());
info = unionDepthMinMax(info, rd);
}
static
-bool hasOffsetAdjust(const ReportManager &rm, const NGWrapper &w) {
- for (const auto &report_id : all_reports(w)) {
+bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) {
+ for (const auto &report_id : all_reports(g)) {
if (rm.getReport(report_id).offsetAdjust) {
return true;
}
return false;
}
-void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info) {
+void fillExpressionInfo(ReportManager &rm, NGHolder &g,
+ const ExpressionInfo &expr, hs_expr_info *info) {
assert(info);
/* ensure utf8 starts at cp boundary */
- ensureCodePointStart(rm, w);
- resolveAsserts(rm, w);
- optimiseVirtualStarts(w);
+ ensureCodePointStart(rm, g, expr);
+ resolveAsserts(rm, g, expr);
+ optimiseVirtualStarts(g);
- removeLeadingVirtualVerticesFromRoot(w, w.start);
- removeLeadingVirtualVerticesFromRoot(w, w.startDs);
+ removeLeadingVirtualVerticesFromRoot(g, g.start);
+ removeLeadingVirtualVerticesFromRoot(g, g.startDs);
vector<DepthMinMax> depths;
- calcDepthsFrom(w, w.start, depths);
+ calcDepthsFrom(g, g.start, depths);
DepthMinMax d;
- for (auto u : inv_adjacent_vertices_range(w.accept, w)) {
- checkVertex(rm, w, u, depths, d);
+ for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
+ checkVertex(rm, g, u, depths, d);
}
- for (auto u : inv_adjacent_vertices_range(w.acceptEod, w)) {
- checkVertex(rm, w, u, depths, d);
+ for (auto u : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ checkVertex(rm, g, u, depths, d);
}
if (d.max.is_finite()) {
info->min_width = UINT_MAX;
}
- info->unordered_matches = hasOffsetAdjust(rm, w);
- info->matches_at_eod = can_match_at_eod(w);
- info->matches_only_at_eod = can_only_match_at_eod(w);
+ info->unordered_matches = hasOffsetAdjust(rm, g);
+ info->matches_at_eod = can_match_at_eod(g);
+ info->matches_only_at_eod = can_only_match_at_eod(g);
}
} // namespace ue2
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*/
/** \file
- * \brief Code for discovering properties of an NGWrapper used by
+ * \brief Code for discovering properties of an expression used by
* hs_expression_info.
*/
struct hs_expr_info;
-#include "ue2common.h"
-
namespace ue2 {
-class NGWrapper;
+class ExpressionInfo;
+class NGHolder;
class ReportManager;
-void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info);
+void fillExpressionInfo(ReportManager &rm, NGHolder &g,
+ const ExpressionInfo &expr, hs_expr_info *info);
} // namespace ue2
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* match given these constraints, or transform the graph in order to make a
* constraint implicit.
*/
+
+#include "ng_extparam.h"
+
#include "ng.h"
#include "ng_depth.h"
#include "ng_dump.h"
-#include "ng_extparam.h"
#include "ng_prune.h"
#include "ng_reports.h"
#include "ng_som_util.h"
#include "ng_width.h"
#include "ng_util.h"
#include "ue2common.h"
+#include "compiler/compiler.h"
#include "parser/position.h"
#include "util/compile_context.h"
#include "util/compile_error.h"
/** \brief Replace the graph's reports with new reports that specify bounds. */
static
-void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept,
+void updateReportBounds(ReportManager &rm, NGHolder &g,
+ const ExpressionInfo &expr, NFAVertex accept,
set<NFAVertex> &done) {
for (auto v : inv_adjacent_vertices_range(accept, g)) {
// Don't operate on g.accept itself.
// Note that we need to cope with offset adjustment here.
- ir.minOffset = g.min_offset - ir.offsetAdjust;
- if (g.max_offset == MAX_OFFSET) {
+ ir.minOffset = expr.min_offset - ir.offsetAdjust;
+ if (expr.max_offset == MAX_OFFSET) {
ir.maxOffset = MAX_OFFSET;
} else {
- ir.maxOffset = g.max_offset - ir.offsetAdjust;
+ ir.maxOffset = expr.max_offset - ir.offsetAdjust;
}
assert(ir.maxOffset >= ir.minOffset);
- ir.minLength = g.min_length;
- if (g.min_length && !g.som) {
+ ir.minLength = expr.min_length;
+ if (expr.min_length && !expr.som) {
ir.quashSom = true;
}
* anchored and unanchored paths, but it's too tricky for the moment.
*/
static
-bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth,
+bool anchorPatternWithBoundedRepeat(NGHolder &g, const ExpressionInfo &expr,
+ const depth &minWidth,
const depth &maxWidth) {
- assert(!g.som);
- assert(g.max_offset != MAX_OFFSET);
+ assert(!expr.som);
+ assert(expr.max_offset != MAX_OFFSET);
assert(minWidth <= maxWidth);
assert(maxWidth.is_reachable());
DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n",
- minWidth.str().c_str(), maxWidth.str().c_str(), g.min_offset,
- g.max_offset);
+ minWidth.str().c_str(), maxWidth.str().c_str(),
+ expr.min_offset, expr.max_offset);
- if (g.max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
+ if (expr.max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
return false;
}
- if (g.max_offset < minWidth) {
+ if (expr.max_offset < minWidth) {
assert(0);
return false;
}
u32 min_bound, max_bound;
if (maxWidth.is_infinite()) {
min_bound = 0;
- max_bound = g.max_offset - minWidth;
+ max_bound = expr.max_offset - minWidth;
} else {
- min_bound = g.min_offset > maxWidth ? g.min_offset - maxWidth : 0;
- max_bound = g.max_offset - minWidth;
+ min_bound = expr.min_offset > maxWidth ? expr.min_offset - maxWidth : 0;
+ max_bound = expr.max_offset - minWidth;
}
DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound);
}
static
-bool hasOffsetAdjust(const ReportManager &rm, NGWrapper &g,
+bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g,
int *adjust) {
const auto &reports = all_reports(g);
if (reports.empty()) {
* /foo.*bar/{min_length=100} --> /foo.{94,}bar/
*/
static
-bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
- assert(g.min_length);
+bool transformMinLengthToRepeat(const ReportManager &rm, NGHolder &g,
+ ExpressionInfo &expr) {
+ assert(expr.min_length);
- if (g.min_length > MAX_MINLENGTH_TO_CONVERT) {
+ if (expr.min_length > MAX_MINLENGTH_TO_CONVERT) {
return false;
}
DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width,
g[cyclic].index);
- if (width >= g.min_length) {
+ if (width >= expr.min_length) {
DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n",
- g.min_length, width);
- g.min_length = 0;
+ expr.min_length, width);
+ expr.min_length = 0;
return true;
}
const CharReach &cr = g[cyclic].char_reach;
- for (u32 i = 0; i < g.min_length - width - 1; ++i) {
+ for (u32 i = 0; i < expr.min_length - width - 1; ++i) {
v = add_vertex(g);
g[v].char_reach = cr;
renumber_edges(g);
clearReports(g);
- g.min_length = 0;
+ expr.min_length = 0;
return true;
}
static
-bool hasExtParams(const NGWrapper &g) {
- if (g.min_length != 0) {
+bool hasExtParams(const ExpressionInfo &expr) {
+ if (expr.min_length != 0) {
return true;
}
- if (g.min_offset != 0) {
+ if (expr.min_offset != 0) {
return true;
}
- if (g.max_offset != MAX_OFFSET) {
+ if (expr.max_offset != MAX_OFFSET) {
return true;
}
return false;
}
static
-bool isEdgePrunable(const NGWrapper &g,
+bool isEdgePrunable(const NGHolder &g, const ExpressionInfo &expr,
const vector<NFAVertexBidiDepth> &depths,
const NFAEdge &e) {
const NFAVertex u = source(e, g);
const NFAVertexBidiDepth &du = depths.at(u_idx);
const NFAVertexBidiDepth &dv = depths.at(v_idx);
- if (g.min_offset) {
+ if (expr.min_offset) {
depth max_offset = maxDistFromStart(du) + maxDistToAccept(dv);
- if (max_offset.is_finite() && max_offset < g.min_offset) {
+ if (max_offset.is_finite() && max_offset < expr.min_offset) {
DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str());
return true;
}
}
- if (g.max_offset != MAX_OFFSET) {
+ if (expr.max_offset != MAX_OFFSET) {
depth min_offset = minDistFromStart(du) + minDistToAccept(dv);
assert(min_offset.is_finite());
- if (min_offset > g.max_offset) {
+ if (min_offset > expr.max_offset) {
DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str());
return true;
}
}
- if (g.min_length && is_any_accept(v, g)) {
+ if (expr.min_length && is_any_accept(v, g)) {
// Simple take on min_length. If we're an edge to accept and our max
// dist from start is too small, we can be pruned.
const depth &width = du.fromStart.max;
- if (width.is_finite() && width < g.min_length) {
+ if (width.is_finite() && width < expr.min_length) {
DEBUG_PRINTF("max width %s from start too small for min_length\n",
width.str().c_str());
return true;
}
static
-void pruneExtUnreachable(NGWrapper &g) {
+void pruneExtUnreachable(NGHolder &g, const ExpressionInfo &expr) {
vector<NFAVertexBidiDepth> depths;
calcDepths(g, depths);
vector<NFAEdge> dead;
for (const auto &e : edges_range(g)) {
- if (isEdgePrunable(g, depths, e)) {
+ if (isEdgePrunable(g, expr, depths, e)) {
DEBUG_PRINTF("pruning\n");
dead.push_back(e);
}
/** Remove vacuous edges in graphs where the min_offset or min_length
* constraints dictate that they can never produce a match. */
static
-void pruneVacuousEdges(NGWrapper &g) {
- if (!g.min_length && !g.min_offset) {
+void pruneVacuousEdges(NGHolder &g, const ExpressionInfo &expr) {
+ if (!expr.min_length && !expr.min_offset) {
return;
}
// Special case: Crudely remove vacuous edges from start in graphs with a
// min_offset.
- if (g.min_offset && u == g.start && is_any_accept(v, g)) {
+ if (expr.min_offset && u == g.start && is_any_accept(v, g)) {
DEBUG_PRINTF("vacuous edge in graph with min_offset!\n");
dead.push_back(e);
continue;
}
// If a min_length is set, vacuous edges can be removed.
- if (g.min_length && is_any_start(u, g) && is_any_accept(v, g)) {
+ if (expr.min_length && is_any_start(u, g) && is_any_accept(v, g)) {
DEBUG_PRINTF("vacuous edge in graph with min_length!\n");
dead.push_back(e);
continue;
}
static
-void pruneUnmatchable(NGWrapper &g, const vector<DepthMinMax> &depths,
+void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr,
+ const vector<DepthMinMax> &depths,
const ReportManager &rm, NFAVertex accept) {
vector<NFAEdge> dead;
d.min += adj.first;
d.max += adj.second;
- if (d.max.is_finite() && d.max < g.min_length) {
+ if (d.max.is_finite() && d.max < expr.min_length) {
DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n",
- d.max.str().c_str(), g.min_length);
+ d.max.str().c_str(), expr.min_length);
dead.push_back(e);
continue;
}
- if (g.max_offset != MAX_OFFSET && d.min > g.max_offset) {
+ if (expr.max_offset != MAX_OFFSET && d.min > expr.max_offset) {
DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n",
- d.min.str().c_str(), g.max_offset);
+ d.min.str().c_str(), expr.max_offset);
dead.push_back(e);
continue;
}
/** Remove edges to accepts that can never produce a match long enough to
* satisfy our min_length and max_offset constraints. */
static
-void pruneUnmatchable(NGWrapper &g, const ReportManager &rm) {
- if (!g.min_length) {
+void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr,
+ const ReportManager &rm) {
+ if (!expr.min_length) {
return;
}
vector<DepthMinMax> depths = getDistancesFromSOM(g);
- pruneUnmatchable(g, depths, rm, g.accept);
- pruneUnmatchable(g, depths, rm, g.acceptEod);
+ pruneUnmatchable(g, expr, depths, rm, g.accept);
+ pruneUnmatchable(g, expr, depths, rm, g.acceptEod);
pruneUseless(g);
}
return false;
}
-void handleExtendedParams(ReportManager &rm, NGWrapper &g,
+void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr,
UNUSED const CompileContext &cc) {
- if (!hasExtParams(g)) {
+ if (!hasExtParams(expr)) {
return;
}
DepthMinMax match_depths = findMatchLengths(rm, g);
DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str());
- if (is_anchored && maxWidth.is_finite() && g.min_offset > maxWidth) {
+ if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) {
ostringstream oss;
oss << "Expression is anchored and cannot satisfy min_offset="
- << g.min_offset << " as it can only produce matches of length "
+ << expr.min_offset << " as it can only produce matches of length "
<< maxWidth << " bytes at most.";
- throw CompileError(g.expressionIndex, oss.str());
+ throw CompileError(expr.index, oss.str());
}
- if (minWidth > g.max_offset) {
+ if (minWidth > expr.max_offset) {
ostringstream oss;
- oss << "Expression has max_offset=" << g.max_offset << " but requires "
- << minWidth << " bytes to match.";
- throw CompileError(g.expressionIndex, oss.str());
+ oss << "Expression has max_offset=" << expr.max_offset
+ << " but requires " << minWidth << " bytes to match.";
+ throw CompileError(expr.index, oss.str());
}
- if (maxWidth.is_finite() && match_depths.max < g.min_length) {
+ if (maxWidth.is_finite() && match_depths.max < expr.min_length) {
ostringstream oss;
- oss << "Expression has min_length=" << g.min_length << " but can "
+ oss << "Expression has min_length=" << expr.min_length << " but can "
"only produce matches of length " << match_depths.max <<
" bytes at most.";
- throw CompileError(g.expressionIndex, oss.str());
+ throw CompileError(expr.index, oss.str());
}
- if (g.min_length && g.min_length <= match_depths.min) {
+ if (expr.min_length && expr.min_length <= match_depths.min) {
DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n",
- g.min_length);
- g.min_length = 0;
+ expr.min_length);
+ expr.min_length = 0;
}
- if (!hasExtParams(g)) {
+ if (!hasExtParams(expr)) {
return;
}
- pruneVacuousEdges(g);
- pruneUnmatchable(g, rm);
+ pruneVacuousEdges(g, expr);
+ pruneUnmatchable(g, expr, rm);
if (!has_offset_adj) {
- pruneExtUnreachable(g);
+ pruneExtUnreachable(g, expr);
}
// We may have removed all the edges to accept, in which case this
// expression cannot match.
if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) {
- throw CompileError(g.expressionIndex, "Extended parameter "
+ throw CompileError(expr.index, "Extended parameter "
"constraints can not be satisfied for any match from "
"this expression.");
}
// If the pattern is completely anchored and has a min_length set, this can
// be converted to a min_offset.
- if (g.min_length && (g.min_offset <= g.min_length) && is_anchored) {
- DEBUG_PRINTF("converting min_length to min_offset=%llu for "
- "anchored case\n", g.min_length);
- g.min_offset = g.min_length;
- g.min_length = 0;
+ if (expr.min_length && (expr.min_offset <= expr.min_length) &&
+ is_anchored) {
+ DEBUG_PRINTF("convertinexpr.min_length to min_offset=%llu for "
+ "anchored case\n", expr.min_length);
+ expr.min_offset = expr.min_length;
+ expr.min_length = 0;
}
- if (g.min_offset && g.min_offset <= minWidth && !has_offset_adj) {
+ if (expr.min_offset && expr.min_offset <= minWidth && !has_offset_adj) {
DEBUG_PRINTF("min_offset=%llu constraint is unnecessary\n",
- g.min_offset);
- g.min_offset = 0;
+ expr.min_offset);
+ expr.min_offset = 0;
}
- if (!hasExtParams(g)) {
+ if (!hasExtParams(expr)) {
return;
}
// If the pattern has a min_length and is of "ratchet" form with one
// unbounded repeat, that repeat can become a bounded repeat.
// e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/
- if (g.min_length && transformMinLengthToRepeat(rm, g)) {
+ if (expr.min_length && transformMinLengthToRepeat(rm, g, expr)) {
DEBUG_PRINTF("converted min_length to bounded repeat\n");
// recalc
minWidth = findMinWidth(g);
// Note that it is possible to handle graphs that have a combination of
// anchored and unanchored paths, but it's too tricky for the moment.
- if (g.max_offset != MAX_OFFSET && !g.som && !g.min_length &&
- !has_offset_adj && isUnanchored(g)) {
- if (anchorPatternWithBoundedRepeat(g, minWidth, maxWidth)) {
+ if (expr.max_offset != MAX_OFFSET && !expr.som && !expr.min_length &&
+ !has_offset_adj && isUnanchored(g)) {
+ if (anchorPatternWithBoundedRepeat(g, expr, minWidth, maxWidth)) {
DEBUG_PRINTF("minWidth=%s, maxWidth=%s\n", minWidth.str().c_str(),
maxWidth.str().c_str());
if (minWidth == maxWidth) {
// For a fixed width pattern, we can retire the offsets as they
// are implicit in the graph now.
- g.min_offset = 0;
- g.max_offset = MAX_OFFSET;
+ expr.min_offset = 0;
+ expr.max_offset = MAX_OFFSET;
}
}
}
//dumpGraph("final.dot", g);
- if (!hasExtParams(g)) {
+ if (!hasExtParams(expr)) {
return;
}
set<NFAVertex> done;
- updateReportBounds(rm, g, g.accept, done);
- updateReportBounds(rm, g, g.acceptEod, done);
+ updateReportBounds(rm, g, expr, g.accept, done);
+ updateReportBounds(rm, g, expr, g.acceptEod, done);
}
} // namespace ue2
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
namespace ue2 {
struct CompileContext;
-class NGWrapper;
+class ExpressionInfo;
+class NGHolder;
class ReportManager;
-void handleExtendedParams(ReportManager &rm, NGWrapper &g,
+void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr,
const CompileContext &cc);
} // namespace ue2
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* \brief Literal Component Splitting. Identifies literals that span the
* graph and moves them into Rose.
*/
+
+#include "ng_literal_component.h"
+
#include "grey.h"
#include "ng.h"
-#include "ng_literal_component.h"
#include "ng_prune.h"
#include "ng_util.h"
#include "ue2common.h"
+#include "compiler/compiler.h"
#include "rose/rose_build.h"
#include "util/container.h"
#include "util/graph.h"
namespace ue2 {
static
-bool isLiteralChar(const NGWrapper &g, NFAVertex v,
- bool &nocase, bool &casefixed) {
+bool isLiteralChar(const NGHolder &g, NFAVertex v, bool &nocase,
+ bool &casefixed) {
const CharReach &cr = g[v].char_reach;
const size_t num = cr.count();
if (num > 2) {
}
static
-bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored,
+bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored,
set<NFAVertex> &dead) {
DEBUG_PRINTF("examine vertex %zu\n", g[v].index);
bool nocase = false, casefixed = false;
}
/** \brief Split off literals. True if any changes were made to the graph. */
-bool splitOffLiterals(NG &ng, NGWrapper &g) {
+bool splitOffLiterals(NG &ng, NGHolder &g) {
if (!ng.cc.grey.allowLiteral) {
return false;
}
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
namespace ue2 {
class NG;
-class NGWrapper;
+class NGHolder;
/** \brief Split off literals. True if any changes were made to the graph. */
-bool splitOffLiterals(NG &ng, NGWrapper &graph);
+bool splitOffLiterals(NG &ng, NGHolder &g);
} // namespace ue2
/** \file
* \brief SOM ("Start of Match") analysis.
*/
+
+#include "ng_som.h"
+
#include "ng.h"
#include "ng_dump.h"
#include "ng_equivalence.h"
#include "ng_redundancy.h"
#include "ng_region.h"
#include "ng_reports.h"
-#include "ng_som.h"
#include "ng_som_add_redundancy.h"
#include "ng_som_util.h"
#include "ng_split.h"
#include "ng_width.h"
#include "grey.h"
#include "ue2common.h"
+#include "compiler/compiler.h"
#include "nfa/goughcompile.h"
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
#include "parser/position.h"
* implement the full pattern.
*/
static
-void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
- vector<som_plan> &plan, const u32 first_som_slot) {
+void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id,
+ NGHolder &g, vector<som_plan> &plan,
+ const u32 first_som_slot) {
ReportManager &rm = ng.rm;
SomSlotManager &ssm = ng.ssm;
// Root plan, which already has a SOM slot assigned (first_som_slot).
dumpSomPlan(g, plan.front(), 0);
- dumpSomSubComponent(*plan.front().prefix, "04_som", w.expressionIndex,
- comp_id, 0, ng.cc.grey);
+ dumpSomSubComponent(*plan.front().prefix, "04_som", expr.index, comp_id, 0,
+ ng.cc.grey);
assert(plan.front().prefix);
if (plan.front().escapes.any() && !plan.front().is_reset) {
/* setup escaper for first som location */
if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes,
first_som_slot)) {
- throw CompileError(w.expressionIndex, "Pattern is too large.");
+ throw CompileError(expr.index, "Pattern is too large.");
}
}
for (++it; it != plan.end(); ++it) {
const u32 plan_num = it - plan.begin();
dumpSomPlan(g, *it, plan_num);
- dumpSomSubComponent(*it->prefix, "04_som", w.expressionIndex, comp_id,
+ dumpSomSubComponent(*it->prefix, "04_som", expr.index, comp_id,
plan_num, ng.cc.grey);
assert(it->parent < plan_num);
assert(!it->no_implement);
if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) {
- throw CompileError(w.expressionIndex, "Pattern is too large.");
+ throw CompileError(expr.index, "Pattern is too large.");
}
updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in);
updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out);
renumber_vertices(*plan.front().prefix);
assert(plan.front().prefix->kind == NFA_OUTFIX);
if (!ng.addHolder(*plan.front().prefix)) {
- throw CompileError(w.expressionIndex, "Pattern is too large.");
+ throw CompileError(expr.index, "Pattern is too large.");
}
}
}
}
static
-u32 doSomRevNfaPrefix(NG &ng, const NGWrapper &w, NGHolder &g,
+u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g,
const CompileContext &cc) {
depth maxWidth = findMaxWidth(g);
auto nfa = makeBareSomRevNfa(g, cc);
if (!nfa) {
- throw CompileError(w.expressionIndex, "Pattern is too large.");
+ throw CompileError(expr.index, "Pattern is too large.");
}
if (ng.cc.streaming) {
}
static
-sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
- som_type som,
+sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
+ u32 comp_id, som_type som,
const ue2::unordered_map<NFAVertex, u32> ®ions,
const map<u32, region_info> &info,
map<u32, region_info>::const_iterator lower_bound) {
// This is an optimisation: if we can't build a Haig from a portion of
// the graph, then we won't be able to manage it as an outfix either
// when we fall back.
- throw CompileError(w.expressionIndex, "Pattern is too large.");
+ throw CompileError(expr.index, "Pattern is too large.");
}
while (1) {
goto next_try;
}
- implementSomPlan(ng, w, comp_id, g, plan, som_loc);
+ implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
Report ir = makeCallback(0U, 0);
assert(!plan.empty());
return prefix;
}
-sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
+sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id,
som_type som) {
assert(som);
DEBUG_PRINTF("som hello\n");
/* create prefix to set the som_loc */
updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET);
if (prefix_by_rev) {
- u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc);
+ u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
}
renumber_vertices(*prefix);
updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET);
}
if (prefix_by_rev && !plan.front().no_implement) {
- u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc);
+ u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
}
- implementSomPlan(ng, w, comp_id, g, plan, som_loc);
+ implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
DEBUG_PRINTF("success\n");
return SOMBE_HANDLED_INTERNAL;
}
-sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
- som_type som) {
+sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr,
+ u32 comp_id, som_type som) {
assert(som);
DEBUG_PRINTF("som+haig hello\n");
buildRegionMapping(g, regions, info, true);
sombe_rv rv =
- doHaigLitSom(ng, g, w, comp_id, som, regions, info, info.begin());
+ doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin());
if (rv == SOMBE_FAIL) {
clear_graph(g);
cloneHolder(g, g_pristine);
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#define NG_SOM_H
#include "som/som.h"
+#include "ue2common.h"
namespace ue2 {
+class ExpressionInfo;
class NG;
class NGHolder;
-class NGWrapper;
+class ReportManager;
struct Grey;
enum sombe_rv {
* May throw a "Pattern too large" exception if prefixes of the
* pattern are too large to compile.
*/
-sombe_rv doSom(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id,
+sombe_rv doSom(NG &ng, NGHolder &h, const ExpressionInfo &expr, u32 comp_id,
som_type som);
/** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established.
* May also throw pattern too large if prefixes of the pattern are too large to
* compile. */
-sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id,
- som_type som);
+sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const ExpressionInfo &expr,
+ u32 comp_id, som_type som);
void makeReportsSomPass(ReportManager &rm, NGHolder &g);
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "ng.h"
#include "ng_prune.h"
#include "ng_util.h"
+#include "compiler/compiler.h"
#include "util/graph_range.h"
#include "util/unicode_def.h"
namespace ue2 {
static
-void allowIllegal(NGWrapper &w, NFAVertex v, u8 pred_char) {
- if (in_degree(v, w) != 1) {
+void allowIllegal(NGHolder &g, NFAVertex v, u8 pred_char) {
+ if (in_degree(v, g) != 1) {
DEBUG_PRINTF("unexpected pred\n");
assert(0); /* should be true due to the early stage of this analysis */
return;
}
- CharReach &cr = w[v].char_reach;
+ CharReach &cr = g[v].char_reach;
if (pred_char == 0xe0) {
assert(cr.isSubsetOf(CharReach(0xa0, 0xbf)));
if (cr == CharReach(0xa0, 0xbf)) {
* above \\x{10ffff} or they represent overlong encodings. As we require valid
* UTF-8 input, we have no defined behaviour in these cases, as a result we can
* accept them if it simplifies the graph. */
-void relaxForbiddenUtf8(NGWrapper &w) {
- if (!w.utf8) {
+void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) {
+ if (!expr.utf8) {
return;
}
const CharReach f0(0xf0);
const CharReach f4(0xf4);
- for (auto v : vertices_range(w)) {
- const CharReach &cr = w[v].char_reach;
+ for (auto v : vertices_range(g)) {
+ const CharReach &cr = g[v].char_reach;
if (cr == e0 || cr == f0 || cr == f4) {
u8 pred_char = cr.find_first();
- for (auto t : adjacent_vertices_range(v, w)) {
- allowIllegal(w, t, pred_char);
+ for (auto t : adjacent_vertices_range(v, g)) {
+ allowIllegal(g, t, pred_char);
}
}
}
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
namespace ue2 {
-class NGWrapper;
+class ExpressionInfo;
class NGHolder;
/** \brief Relax forbidden UTF-8 sequences.
* above \\x{10ffff} or they represent overlong encodings. As we require valid
* UTF-8 input, we have no defined behaviour in these cases, as a result we can
* accept them if it simplifies the graph. */
-void relaxForbiddenUtf8(NGWrapper &w);
+void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr);
/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex
* where possible, based on the assumption that we will always be matching
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "grey.h"
#include "ng.h"
#include "ng_util.h"
+#include "compiler/compiler.h"
using namespace std;
namespace ue2 {
static
-ReportID getInternalId(ReportManager &rm, const NGWrapper &graph) {
- Report ir = rm.getBasicInternalReport(graph);
+ReportID getInternalId(ReportManager &rm, const ExpressionInfo &expr) {
+ Report ir = rm.getBasicInternalReport(expr);
// Apply any extended params.
- if (graph.min_offset || graph.max_offset != MAX_OFFSET) {
- ir.minOffset = graph.min_offset;
- ir.maxOffset = graph.max_offset;
+ if (expr.min_offset || expr.max_offset != MAX_OFFSET) {
+ ir.minOffset = expr.min_offset;
+ ir.maxOffset = expr.max_offset;
}
- assert(!graph.min_length); // should be handled elsewhere.
+ assert(!expr.min_length); // should be handled elsewhere.
return rm.getInternalId(ir);
}
static
-void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGWrapper &g) {
- const ReportID r = getInternalId(rm, g);
+void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGHolder &g,
+ const ExpressionInfo &expr) {
+ const ReportID r = getInternalId(rm, expr);
boundary.report_at_0_eod.insert(r);
boundary.report_at_0.insert(r);
static
void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
- NGWrapper &g) {
- boundary.report_at_0.insert(getInternalId(rm, g));
+ NGHolder &g, const ExpressionInfo &expr) {
+ boundary.report_at_0.insert(getInternalId(rm, expr));
remove_edge(g.start, g.accept, g);
remove_edge(g.start, g.acceptEod, g);
g[g.start].reports.clear();
static
void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
- NGWrapper &g) {
- boundary.report_at_eod.insert(getInternalId(rm, g));
+ NGHolder &g, const ExpressionInfo &expr) {
+ boundary.report_at_eod.insert(getInternalId(rm, expr));
remove_edge(g.startDs, g.acceptEod, g);
remove_edge(g.start, g.acceptEod, g);
g[g.start].reports.clear();
static
void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm,
- NGWrapper &g) {
- boundary.report_at_0_eod.insert(getInternalId(rm, g));
+ NGHolder &g, const ExpressionInfo &expr) {
+ boundary.report_at_0_eod.insert(getInternalId(rm, expr));
remove_edge(g.start, g.acceptEod, g);
g[g.start].reports.clear();
}
bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
- NGWrapper &g) {
+ NGHolder &g, const ExpressionInfo &expr) {
if (edge(g.startDs, g.accept, g).second) {
// e.g. '.*'; match "between" every byte
DEBUG_PRINTF("graph is firehose\n");
- makeFirehose(boundary, rm, g);
+ makeFirehose(boundary, rm, g, expr);
return true;
}
if (edge(g.start, g.accept, g).second) {
DEBUG_PRINTF("creating anchored acceptor\n");
- makeAnchoredAcceptor(boundary, rm, g);
+ makeAnchoredAcceptor(boundary, rm, g, expr);
work_done = true;
}
if (edge(g.startDs, g.acceptEod, g).second) {
DEBUG_PRINTF("creating end-anchored acceptor\n");
- makeEndAnchoredAcceptor(boundary, rm, g);
+ makeEndAnchoredAcceptor(boundary, rm, g, expr);
work_done = true;
}
if (edge(g.start, g.acceptEod, g).second) {
DEBUG_PRINTF("creating nothing acceptor\n");
- makeNothingAcceptor(boundary, rm, g);
+ makeNothingAcceptor(boundary, rm, g, expr);
work_done = true;
}
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
namespace ue2 {
struct BoundaryReports;
-class NGWrapper;
+class ExpressionInfo;
+class NGHolder;
class ReportManager;
// Returns true if a "vacuous" reporter was created.
bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
- NGWrapper &graph);
+ NGHolder &g, const ExpressionInfo &expr);
} // namespace ue2
ConstructLiteralVisitor::~ConstructLiteralVisitor() {}
/** \brief True if the literal expression \a expr could be added to Rose. */
-bool shortcutLiteral(NG &ng, const ParsedExpression &expr) {
- assert(expr.component);
+bool shortcutLiteral(NG &ng, const ParsedExpression &pe) {
+ assert(pe.component);
if (!ng.cc.grey.allowLiteral) {
return false;
}
+ const auto &expr = pe.expr;
+
// XXX: don't shortcut literals with extended params (yet)
if (expr.min_offset || expr.max_offset != MAX_OFFSET || expr.min_length ||
expr.edit_distance) {
ConstructLiteralVisitor vis;
try {
- assert(expr.component);
- expr.component->accept(vis);
+ assert(pe.component);
+ pe.component->accept(vis);
assert(vis.repeat_stack.empty());
} catch (const ConstructLiteralVisitor::NotLiteral&) {
DEBUG_PRINTF("not a literal\n");
}
DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str());
- return ng.addLiteral(lit, expr.index, expr.id, expr.highlander, expr.som);
+ return ng.addLiteral(lit, expr.index, expr.report, expr.highlander,
+ expr.som);
}
} // namespace ue2
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "grey.h"
#include "ue2common.h"
+#include "compiler/compiler.h"
#include "nfa/dfa_min.h"
#include "nfa/mcclellancompile.h"
#include "nfa/mcclellancompile_util.h"
// Construct a runtime implementation.
aligned_unique_ptr<SmallWriteEngine> build(u32 roseQuality) override;
- void add(const NGWrapper &w) override;
+ void add(const NGHolder &g, const ExpressionInfo &expr) override;
void add(const ue2_literal &literal, ReportID r) override;
set<ReportID> all_reports() const override;
return modified;
}
-void SmallWriteBuildImpl::add(const NGWrapper &w) {
+void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) {
// If the graph is poisoned (i.e. we can't build a SmallWrite version),
// we don't even try.
if (poisoned) {
return;
}
- if (w.som || w.min_length || isVacuous(w)) { /* cannot support in smwr */
- poisoned = true;
+ if (expr.som || expr.min_length || isVacuous(g)) {
+ poisoned = true; /* cannot support in smwr */
return;
}
- DEBUG_PRINTF("w=%p\n", &w);
+ DEBUG_PRINTF("g=%p\n", &g);
// make a copy of the graph so that we can modify it for our purposes
- unique_ptr<NGHolder> h = cloneHolder(w);
+ unique_ptr<NGHolder> h = cloneHolder(g);
pruneOverlong(*h, depth(cc.grey.smallWriteLargestBuffer), rm);
- reduceGraph(*h, SOM_NONE, w.utf8, cc);
+ reduceGraph(*h, SOM_NONE, expr.utf8, cc);
if (can_never_match(*h)) {
DEBUG_PRINTF("graph can never match in small block\n");
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
struct CompileContext;
struct ue2_literal;
-class NGWrapper;
-class ReportManager;
+class ExpressionInfo;
+class NGHolder;
+class ReportManager;
// Abstract interface intended for callers from elsewhere in the tree, real
// underlying implementation is SmallWriteBuildImpl in smwr_build_impl.h.
// Construct a runtime implementation.
virtual ue2::aligned_unique_ptr<SmallWriteEngine> build(u32 roseQuality) = 0;
- virtual void add(const NGWrapper &w) = 0;
+ virtual void add(const NGHolder &g, const ExpressionInfo &expr) = 0;
virtual void add(const ue2_literal &literal, ReportID r) = 0;
virtual std::set<ReportID> all_reports() const = 0;
};
// Construct a usable SmallWrite builder.
-std::unique_ptr<SmallWriteBuild> makeSmallWriteBuilder(size_t num_patterns,
- const ReportManager &rm,
- const CompileContext &cc);
+std::unique_ptr<SmallWriteBuild>
+makeSmallWriteBuilder(size_t num_patterns, const ReportManager &rm,
+ const CompileContext &cc);
size_t smwrSize(const SmallWriteEngine *t);
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
/** \file
* \brief ReportManager: tracks Report structures, exhaustion and dedupe keys.
*/
-#include "grey.h"
+
#include "report_manager.h"
+
+#include "grey.h"
#include "ue2common.h"
+#include "compiler/compiler.h"
#include "nfagraph/ng.h"
#include "rose/rose_build.h"
#include "util/compile_error.h"
}
}
-Report ReportManager::getBasicInternalReport(const NGWrapper &g, s32 adj) {
+Report ReportManager::getBasicInternalReport(const ExpressionInfo &expr,
+ s32 adj) {
/* validate that we are not violating highlander constraints, this will
* throw a CompileError if so. */
- registerExtReport(g.reportId,
- external_report_info(g.highlander, g.expressionIndex));
+ registerExtReport(expr.report,
+ external_report_info(expr.highlander, expr.index));
/* create the internal report */
u32 ekey = INVALID_EKEY;
- if (g.highlander) {
+ if (expr.highlander) {
/* all patterns with the same report id share an ekey */
- ekey = getExhaustibleKey(g.reportId);
+ ekey = getExhaustibleKey(expr.report);
}
- return makeECallback(g.reportId, adj, ekey);
+ return makeECallback(expr.report, adj, ekey);
}
void ReportManager::setProgramOffset(ReportID id, u32 programOffset) {
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
struct Grey;
class RoseBuild;
-class NGWrapper;
+class ExpressionInfo;
struct external_report_info {
external_report_info(bool h, u32 fpi)
const std::vector<Report> &reports() const { return reportIds; }
/**
- * Get a simple internal report corresponding to the wrapper. An ekey will
- * be setup as required.
+ * Get a simple internal report corresponding to the expression. An ekey
+ * will be setup if required.
*
* Note: this function may throw a CompileError if constraints on external
* match id are violated (mixed highlander status for example).
*/
- Report getBasicInternalReport(const NGWrapper &g, s32 adj = 0);
+ Report getBasicInternalReport(const ExpressionInfo &expr, s32 adj = 0);
/** \brief Register an external report and validate that we are not
* violating highlander constraints (which will cause an exception to be
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
const CompileContext cc(true, false, target, grey);
ReportManager rm(cc.grey);
ParsedExpression parsed(0, pattern.c_str(), flags, 0);
- unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed);
+ auto built_expr = buildGraph(rm, cc, parsed);
+ const auto &g = built_expr.g;
ASSERT_TRUE(g != nullptr);
clearReports(*g);
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
CompileContext cc(false, false, target, Grey());
ReportManager rm(cc.grey);
ParsedExpression parsed(0, expr.c_str(), flags, 0);
- unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed);
+ auto built_expr = buildGraph(rm, cc, parsed);
+ const auto &g = built_expr.g;
ASSERT_TRUE(g != nullptr);
clearReports(*g);
CompileContext cc(false, false, get_current_target(), Grey());
ReportManager rm(cc.grey);
ParsedExpression parsed(0, expr.c_str(), flags, 0);
- unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed);
+ auto built_expr = buildGraph(rm, cc, parsed);
+ const auto &g = built_expr.g;
ASSERT_TRUE(g != nullptr);
clearReports(*g);
CompileContext cc(true, false, get_current_target(), Grey());
ParsedExpression parsed(0, expr.c_str(), flags, 0);
ReportManager rm(cc.grey);
- unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed);
+ auto built_expr = buildGraph(rm, cc, parsed);
+ const auto &g = built_expr.g;
ASSERT_TRUE(g != nullptr);
clearReports(*g);
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
// Helper function: construct a graph from an expression, flags and context.
inline
-std::unique_ptr<NGWrapper> constructGraphWithCC(const std::string &expr,
- CompileContext &cc,
- unsigned flags) {
+std::unique_ptr<NGHolder> constructGraphWithCC(const std::string &expr,
+ CompileContext &cc,
+ unsigned flags) {
ReportManager rm(cc.grey);
ParsedExpression parsed(0, expr.c_str(), flags, 0);
- return buildWrapper(rm, cc, parsed);
+ auto built_expr = buildGraph(rm, cc, parsed);
+ return std::move(built_expr.g);
}
// Helper function: construct a graph from an expression and its flags.
inline
-std::unique_ptr<NGWrapper> constructGraph(const std::string &expr,
- unsigned flags) {
+std::unique_ptr<NGHolder> constructGraph(const std::string &expr,
+ unsigned flags) {
CompileContext cc(false, false, get_current_target(), Grey());
return constructGraphWithCC(expr, cc, flags);
}
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
// The graph should be merged into: a(b|c)
CompileContext cc(false, false, get_current_target(), Grey());
- unique_ptr<NGWrapper> graph(constructGraphWithCC("(ab|ac)", cc, 0));
+ auto graph(constructGraphWithCC("(ab|ac)", cc, 0));
ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph;
g.kind = NFA_SUFFIX;
// The graph should be merged into: (b|c)a
CompileContext cc(false, false, get_current_target(), Grey());
- unique_ptr<NGWrapper> graph(constructGraphWithCC("(ba|ca)", cc, 0));
+ auto graph(constructGraphWithCC("(ba|ca)", cc, 0));
ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph;
g.kind = NFA_SUFFIX;
// The graph should be merged into: a(..)+(X|Y)
CompileContext cc(false, false, get_current_target(), Grey());
- unique_ptr<NGWrapper> graph(constructGraphWithCC("a(..)+X|a(..)+Y", cc,
- HS_FLAG_DOTALL));
+ auto graph(constructGraphWithCC("a(..)+X|a(..)+Y", cc, HS_FLAG_DOTALL));
ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph;
g.kind = NFA_SUFFIX;
// The graph should be merged into: (X|Y)(..)+a
CompileContext cc(false, false, get_current_target(), Grey());
- unique_ptr<NGWrapper> graph(constructGraphWithCC("X(..)+a|Y(..)+a", cc,
- HS_FLAG_DOTALL));
+ auto graph(constructGraphWithCC("X(..)+a|Y(..)+a", cc, HS_FLAG_DOTALL));
ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph;
g.kind = NFA_SUFFIX;
// The graph should be merged into: [^\x00]*[\x00]
CompileContext cc(false, false, get_current_target(), Grey());
- unique_ptr<NGWrapper> graph(constructGraphWithCC("[^\\x00][^\\x00]*[\\x00]",
- cc, 0));
+ auto graph(constructGraphWithCC("[^\\x00][^\\x00]*[\\x00]", cc, 0));
ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph;
g.kind = NFA_PREFIX;
TEST(NFAGraph, RemoveEquivalence6) {
// Build a small graph with two redundant vertices: ^(.*|.*)a
// The graph should be merged into: a
- unique_ptr<NGWrapper> graph(constructGraph("^(.*|.*)a", HS_FLAG_DOTALL));
+ auto graph(constructGraph("^(.*|.*)a", HS_FLAG_DOTALL));
ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph;
TEST(NFAGraph, RemoveEquivalence7) {
// Build a small graph with no redundant vertices: ^.+a
// Make sure we don't merge anything
- unique_ptr<NGWrapper> graph(constructGraph("^.+a", HS_FLAG_DOTALL));
+ auto graph(constructGraph("^.+a", HS_FLAG_DOTALL));
ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph;
CompileContext cc(false, false, get_current_target(), Grey());
ReportManager rm(cc.grey);
ParsedExpression parsed(0, t.pattern.c_str(), t.flags, 0);
- auto g = buildWrapper(rm, cc, parsed);
+ auto built_expr = buildGraph(rm, cc, parsed);
+ const auto &g = built_expr.g;
bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;
set<pair<size_t, size_t>> matches;
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
// The character reachability should be merged into: [ab]c
CompileContext cc(false, false, get_current_target(), Grey());
- unique_ptr<NGWrapper> graph(constructGraphWithCC("(a|b)c", cc, 0));
+ auto graph(constructGraphWithCC("(a|b)c", cc, 0));
ASSERT_TRUE(graph.get() != nullptr);
NGHolder &g = *graph;
// Build a small graph with a redundant vertex: a.*b?c
// The dot-star should swallow the 'b?', leaving a.*c
CompileContext cc(false, false, get_current_target(), Grey());
- unique_ptr<NGWrapper> graph(constructGraphWithCC("a.*b?c", cc,
- HS_FLAG_DOTALL));
+ auto graph(constructGraphWithCC("a.*b?c", cc, HS_FLAG_DOTALL));
ASSERT_TRUE(graph.get() != nullptr);
NGHolder &g = *graph;
TEST(NFAGraph, RemoveRedundancy3) {
CompileContext cc(false, false, get_current_target(), Grey());
- unique_ptr<NGWrapper> graph(constructGraphWithCC("foobar.*(a|b)?teakettle",
- cc, 0));
+ auto graph(constructGraphWithCC("foobar.*(a|b)?teakettle", cc, 0));
ASSERT_TRUE(graph.get() != nullptr);
unsigned countBefore = num_vertices(*graph);
TEST(NFAGraph, RemoveRedundancy4) {
CompileContext cc(false, false, get_current_target(), Grey());
- unique_ptr<NGWrapper> graph(constructGraphWithCC("foo([A-Z]|a|b|q)", cc, 0));
+ auto graph(constructGraphWithCC("foo([A-Z]|a|b|q)", cc, 0));
ASSERT_TRUE(graph.get() != nullptr);
unsigned countBefore = num_vertices(*graph);
TEST(NFAGraph, RemoveRedundancy5) {
CompileContext cc(false, false, get_current_target(), Grey());
- unique_ptr<NGWrapper> graph(constructGraphWithCC("[0-9]?badgerbrush",
- cc, 0));
+ auto graph(constructGraphWithCC("[0-9]?badgerbrush", cc, 0));
ASSERT_TRUE(graph.get() != nullptr);
unsigned countBefore = num_vertices(*graph);
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
TEST_P(NFAWidthTest, Check) {
const WidthTest &t = GetParam();
SCOPED_TRACE(testing::Message() << "Pattern: " << t.pattern);
- unique_ptr<NGWrapper> w(constructGraph(t.pattern, 0));
+ auto g = constructGraph(t.pattern, 0);
- ASSERT_EQ(t.minWidth, findMinWidth(*w));
- ASSERT_EQ(t.maxWidth, findMaxWidth(*w));
+ ASSERT_EQ(t.minWidth, findMinWidth(*g));
+ ASSERT_EQ(t.maxWidth, findMaxWidth(*g));
}
// for google test
#include "ng_corpus_generator.h"
#include "ng_corpus_editor.h"
+#include "compiler/compiler.h"
#include "nfagraph/ng.h"
#include "nfagraph/ng_util.h"
#include "ue2common.h"
/** \brief Concrete implementation */
class CorpusGeneratorImpl : public CorpusGenerator {
public:
- CorpusGeneratorImpl(const NGWrapper &graph_in, CorpusProperties &props);
- ~CorpusGeneratorImpl() {}
+ CorpusGeneratorImpl(const NGHolder &graph_in, const ExpressionInfo &expr_in,
+ CorpusProperties &props);
+ ~CorpusGeneratorImpl() = default;
void generateCorpus(vector<string> &data);
* bytes in length. */
void addRandom(const min_max &mm, string *out);
+ /** \brief Info about this expression. */
+ const ExpressionInfo &expr;
+
/** \brief The NFA graph we operate over. */
const NGHolder &graph;
CorpusProperties &cProps;
};
-CorpusGeneratorImpl::CorpusGeneratorImpl(const NGWrapper &graph_in,
+CorpusGeneratorImpl::CorpusGeneratorImpl(const NGHolder &graph_in,
+ const ExpressionInfo &expr_in,
CorpusProperties &props)
- : graph(graph_in), cProps(props) {
+ : expr(expr_in), graph(graph_in), cProps(props) {
// if this pattern is to be matched approximately
- if (graph_in.edit_distance && !props.editDistance) {
- props.editDistance = props.rand(0, graph_in.edit_distance + 1);
+ if (expr.edit_distance && !props.editDistance) {
+ props.editDistance = props.rand(0, expr.edit_distance + 1);
}
}
/** \brief Concrete implementation for UTF-8 */
class CorpusGeneratorUtf8 : public CorpusGenerator {
public:
- CorpusGeneratorUtf8(const NGWrapper &graph_in, CorpusProperties &props);
- ~CorpusGeneratorUtf8() {}
+ CorpusGeneratorUtf8(const NGHolder &graph_in, const ExpressionInfo &expr_in,
+ CorpusProperties &props);
+ ~CorpusGeneratorUtf8() = default;
void generateCorpus(vector<string> &data);
* length. */
void addRandom(const min_max &mm, vector<unichar> *out);
+ /** \brief Info about this expression. */
+ const ExpressionInfo &expr;
+
/** \brief The NFA graph we operate over. */
- const NGWrapper &graph;
+ const NGHolder &graph;
/** \brief Reference to our corpus generator properties object (stores some
* state) */
CorpusProperties &cProps;
};
-CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGWrapper &graph_in,
+CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGHolder &graph_in,
+ const ExpressionInfo &expr_in,
CorpusProperties &props)
- : graph(graph_in), cProps(props) {
+ : expr(expr_in), graph(graph_in), cProps(props) {
// we do not support Utf8 for approximate matching
- if (graph.edit_distance) {
+ if (expr.edit_distance) {
throw CorpusGenerationFailure("UTF-8 for edited patterns is not "
"supported.");
}
// External entry point
-unique_ptr<CorpusGenerator> makeCorpusGenerator(const NGWrapper &graph,
+unique_ptr<CorpusGenerator> makeCorpusGenerator(const NGHolder &graph,
+ const ExpressionInfo &expr,
CorpusProperties &props) {
- if (graph.utf8) {
- return ue2::make_unique<CorpusGeneratorUtf8>(graph, props);
+ if (expr.utf8) {
+ return ue2::make_unique<CorpusGeneratorUtf8>(graph, expr, props);
} else {
- return ue2::make_unique<CorpusGeneratorImpl>(graph, props);
+ return ue2::make_unique<CorpusGeneratorImpl>(graph, expr, props);
}
}
namespace ue2 {
-class NGWrapper;
+class ExpressionInfo;
+class NGHolder;
} // namespace ue2
/** \brief Build a concrete impl conforming to the \ref CorpusGenerator
* interface. */
std::unique_ptr<CorpusGenerator>
-makeCorpusGenerator(const ue2::NGWrapper &graph, CorpusProperties &props);
+makeCorpusGenerator(const ue2::NGHolder &g, const ue2::ExpressionInfo &expr,
+ CorpusProperties &props);
#endif