src/nfa/mpv_internal.h
src/nfa/mpvcompile.cpp
src/nfa/mpvcompile.h
+ src/nfa/multiaccel_compilehelper.cpp
+ src/nfa/multiaccel_compilehelper.h
src/nfa/nfa_api.h
src/nfa/nfa_api_queue.h
src/nfa/nfa_api_util.h
aux->accel_type = ACCEL_NONE;
}
+static
+void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
+ if (info.ma_type == MultibyteAccelInfo::MAT_NONE) {
+ DEBUG_PRINTF("no multimatch for us :(");
+ return;
+ }
+
+ u32 offset = info.multiaccel_offset;
+ const CharReach &stops = info.multiaccel_stops;
+
+ assert(aux->accel_type == ACCEL_NONE);
+ if (stops.all()) {
+ return;
+ }
+
+ size_t outs = stops.count();
+ DEBUG_PRINTF("%zu outs\n", outs);
+ assert(outs && outs < 256);
+
+ switch (info.ma_type) {
+ case MultibyteAccelInfo::MAT_LONG:
+ if (outs == 1) {
+ aux->accel_type = ACCEL_MLVERM;
+ aux->mverm.offset = offset;
+ aux->mverm.c = stops.find_first();
+ aux->mverm.len = info.ma_len1;
+ DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
+ return;
+ }
+ if (outs == 2 && stops.isCaselessChar()) {
+ aux->accel_type = ACCEL_MLVERM_NOCASE;
+ aux->mverm.offset = offset;
+ aux->mverm.c = stops.find_first() & CASE_CLEAR;
+ aux->mverm.len = info.ma_len1;
+ DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
+ aux->verm.c);
+ return;
+ }
+ break;
+ case MultibyteAccelInfo::MAT_LONGGRAB:
+ if (outs == 1) {
+ aux->accel_type = ACCEL_MLGVERM;
+ aux->mverm.offset = offset;
+ aux->mverm.c = stops.find_first();
+ aux->mverm.len = info.ma_len1;
+ DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
+ return;
+ }
+ if (outs == 2 && stops.isCaselessChar()) {
+ aux->accel_type = ACCEL_MLGVERM_NOCASE;
+ aux->mverm.offset = offset;
+ aux->mverm.c = stops.find_first() & CASE_CLEAR;
+ aux->mverm.len = info.ma_len1;
+ DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
+ aux->verm.c);
+ return;
+ }
+ break;
+ case MultibyteAccelInfo::MAT_SHIFT:
+ if (outs == 1) {
+ aux->accel_type = ACCEL_MSVERM;
+ aux->mverm.offset = offset;
+ aux->mverm.c = stops.find_first();
+ aux->mverm.len = info.ma_len1;
+ DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
+ return;
+ }
+ if (outs == 2 && stops.isCaselessChar()) {
+ aux->accel_type = ACCEL_MSVERM_NOCASE;
+ aux->mverm.offset = offset;
+ aux->mverm.c = stops.find_first() & CASE_CLEAR;
+ aux->mverm.len = info.ma_len1;
+ DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
+ aux->verm.c);
+ return;
+ }
+ break;
+ case MultibyteAccelInfo::MAT_SHIFTGRAB:
+ if (outs == 1) {
+ aux->accel_type = ACCEL_MSGVERM;
+ aux->mverm.offset = offset;
+ aux->mverm.c = stops.find_first();
+ aux->mverm.len = info.ma_len1;
+ DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
+ return;
+ }
+ if (outs == 2 && stops.isCaselessChar()) {
+ aux->accel_type = ACCEL_MSGVERM_NOCASE;
+ aux->mverm.offset = offset;
+ aux->mverm.c = stops.find_first() & CASE_CLEAR;
+ aux->mverm.len = info.ma_len1;
+ DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
+ aux->verm.c);
+ return;
+ }
+ break;
+ case MultibyteAccelInfo::MAT_DSHIFT:
+ if (outs == 1) {
+ aux->accel_type = ACCEL_MDSVERM;
+ aux->mdverm.offset = offset;
+ aux->mdverm.c = stops.find_first();
+ aux->mdverm.len1 = info.ma_len1;
+ aux->mdverm.len2 = info.ma_len2;
+ DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
+ return;
+ }
+ if (outs == 2 && stops.isCaselessChar()) {
+ aux->accel_type = ACCEL_MDSVERM_NOCASE;
+ aux->mverm.offset = offset;
+ aux->mverm.c = stops.find_first() & CASE_CLEAR;
+ aux->mdverm.len1 = info.ma_len1;
+ aux->mdverm.len2 = info.ma_len2;
+ DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
+ aux->verm.c);
+ return;
+ }
+ break;
+ case MultibyteAccelInfo::MAT_DSHIFTGRAB:
+ if (outs == 1) {
+ aux->accel_type = ACCEL_MDSGVERM;
+ aux->mdverm.offset = offset;
+ aux->mdverm.c = stops.find_first();
+ aux->mdverm.len1 = info.ma_len1;
+ aux->mdverm.len2 = info.ma_len2;
+ DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
+ return;
+ }
+ if (outs == 2 && stops.isCaselessChar()) {
+ aux->accel_type = ACCEL_MDSGVERM_NOCASE;
+ aux->mverm.offset = offset;
+ aux->mverm.c = stops.find_first() & CASE_CLEAR;
+ aux->mdverm.len1 = info.ma_len1;
+ aux->mdverm.len2 = info.ma_len2;
+ DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
+ aux->verm.c);
+ return;
+ }
+ break;
+ default:
+ // shouldn't happen
+ assert(0);
+ return;
+ }
+
+ DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
+
+ switch (info.ma_type) {
+ case MultibyteAccelInfo::MAT_LONG:
+ if (shuftiBuildMasks(stops, &aux->mshufti.lo,
+ &aux->mshufti.hi) == -1) {
+ break;
+ }
+ aux->accel_type = ACCEL_MLSHUFTI;
+ aux->mshufti.offset = offset;
+ aux->mshufti.len = info.ma_len1;
+ return;
+ case MultibyteAccelInfo::MAT_LONGGRAB:
+ if (shuftiBuildMasks(stops, &aux->mshufti.lo,
+ &aux->mshufti.hi) == -1) {
+ break;
+ }
+ aux->accel_type = ACCEL_MLGSHUFTI;
+ aux->mshufti.offset = offset;
+ aux->mshufti.len = info.ma_len1;
+ return;
+ case MultibyteAccelInfo::MAT_SHIFT:
+ if (shuftiBuildMasks(stops, &aux->mshufti.lo,
+ &aux->mshufti.hi) == -1) {
+ break;
+ }
+ aux->accel_type = ACCEL_MSSHUFTI;
+ aux->mshufti.offset = offset;
+ aux->mshufti.len = info.ma_len1;
+ return;
+ case MultibyteAccelInfo::MAT_SHIFTGRAB:
+ if (shuftiBuildMasks(stops, &aux->mshufti.lo,
+ &aux->mshufti.hi) == -1) {
+ break;
+ }
+ aux->accel_type = ACCEL_MSGSHUFTI;
+ aux->mshufti.offset = offset;
+ aux->mshufti.len = info.ma_len1;
+ return;
+ case MultibyteAccelInfo::MAT_DSHIFT:
+ if (shuftiBuildMasks(stops, &aux->mdshufti.lo,
+ &aux->mdshufti.hi) == -1) {
+ break;
+ }
+ aux->accel_type = ACCEL_MDSSHUFTI;
+ aux->mdshufti.offset = offset;
+ aux->mdshufti.len1 = info.ma_len1;
+ aux->mdshufti.len2 = info.ma_len2;
+ return;
+ case MultibyteAccelInfo::MAT_DSHIFTGRAB:
+ if (shuftiBuildMasks(stops, &aux->mdshufti.lo,
+ &aux->mdshufti.hi) == -1) {
+ break;
+ }
+ aux->accel_type = ACCEL_MDSGSHUFTI;
+ aux->mdshufti.offset = offset;
+ aux->mdshufti.len1 = info.ma_len1;
+ aux->mdshufti.len2 = info.ma_len2;
+ return;
+ default:
+ // shouldn't happen
+ assert(0);
+ return;
+ }
+ DEBUG_PRINTF("shufti build failed, falling through\n");
+
+ if (outs <= ACCEL_MAX_STOP_CHAR) {
+ DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
+ switch (info.ma_type) {
+ case MultibyteAccelInfo::MAT_LONG:
+ aux->accel_type = ACCEL_MLTRUFFLE;
+ aux->mtruffle.offset = offset;
+ aux->mtruffle.len = info.ma_len1;
+ truffleBuildMasks(stops, &aux->mtruffle.mask1,
+ &aux->mtruffle.mask2);
+ break;
+ case MultibyteAccelInfo::MAT_LONGGRAB:
+ aux->accel_type = ACCEL_MLGTRUFFLE;
+ aux->mtruffle.offset = offset;
+ aux->mtruffle.len = info.ma_len1;
+ truffleBuildMasks(stops, &aux->mtruffle.mask1,
+ &aux->mtruffle.mask2);
+ break;
+ case MultibyteAccelInfo::MAT_SHIFT:
+ aux->accel_type = ACCEL_MSTRUFFLE;
+ aux->mtruffle.offset = offset;
+ aux->mtruffle.len = info.ma_len1;
+ truffleBuildMasks(stops, &aux->mtruffle.mask1,
+ &aux->mtruffle.mask2);
+ break;
+ case MultibyteAccelInfo::MAT_SHIFTGRAB:
+ aux->accel_type = ACCEL_MSGTRUFFLE;
+ aux->mtruffle.offset = offset;
+ aux->mtruffle.len = info.ma_len1;
+ truffleBuildMasks(stops, &aux->mtruffle.mask1,
+ &aux->mtruffle.mask2);
+ break;
+ case MultibyteAccelInfo::MAT_DSHIFT:
+ aux->accel_type = ACCEL_MDSTRUFFLE;
+ aux->mdtruffle.offset = offset;
+ aux->mdtruffle.len1 = info.ma_len1;
+ aux->mdtruffle.len2 = info.ma_len2;
+ truffleBuildMasks(stops, &aux->mtruffle.mask1,
+ &aux->mdtruffle.mask2);
+ break;
+ case MultibyteAccelInfo::MAT_DSHIFTGRAB:
+ aux->accel_type = ACCEL_MDSGTRUFFLE;
+ aux->mdtruffle.offset = offset;
+ aux->mdtruffle.len1 = info.ma_len1;
+ aux->mdtruffle.len2 = info.ma_len2;
+ truffleBuildMasks(stops, &aux->mtruffle.mask1,
+ &aux->mdtruffle.mask2);
+ break;
+ default:
+ // shouldn't happen
+ assert(0);
+ return;
+ }
+ return;
+ }
+
+ DEBUG_PRINTF("unable to accelerate multibyte case with %zu outs\n", outs);
+}
+
bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
assert(aux->accel_type == ACCEL_NONE);
if (info.single_stops.none()) {
DEBUG_PRINTF("picked red tape\n");
aux->accel_type = ACCEL_RED_TAPE;
aux->generic.offset = info.single_offset;
- } else {
+ }
+ if (aux->accel_type == ACCEL_NONE) {
+ buildAccelMulti(info, aux);
+ }
+ if (aux->accel_type == ACCEL_NONE) {
buildAccelDouble(info, aux);
}
if (aux->accel_type == ACCEL_NONE) {
#include "ue2common.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
+#include "nfagraph/ng_limex_accel.h"
union AccelAux;
struct AccelInfo {
AccelInfo() : single_offset(0U), double_offset(0U),
- single_stops(CharReach::dot()) {}
+ single_stops(CharReach::dot()),
+ multiaccel_offset(0), ma_len1(0), ma_len2(0),
+ ma_type(MultibyteAccelInfo::MAT_NONE) {}
u32 single_offset; /**< offset correction to apply to single schemes */
u32 double_offset; /**< offset correction to apply to double schemes */
CharReach double_stop1; /**< single-byte accel stop literals for double
flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop
* literals */
CharReach single_stops; /**< escapes for single byte acceleration */
+ u32 multiaccel_offset; /**< offset correction to apply to multibyte schemes */
+ CharReach multiaccel_stops; /**< escapes for multibyte acceleration */
+ u32 ma_len1; /**< multiaccel len1 */
+ u32 ma_len2; /**< multiaccel len2 */
+ MultibyteAccelInfo::multiaccel_type ma_type; /**< multiaccel type */
};
bool buildAccelAux(const AccelInfo &info, AccelAux *aux);
CharReach double_cr;
flat_set<pair<u8, u8>> double_lits; /* double-byte accel stop literals */
u32 double_offset;
+
+ MultibyteAccelInfo ma_info;
};
-struct meteor_accel_info {
+struct limex_accel_info {
ue2::unordered_set<NFAVertex> accelerable;
map<NFAStateSet, precalcAccel> precalc;
ue2::unordered_map<NFAVertex, flat_set<NFAVertex> > friends;
bool stateCompression;
const CompileContext &cc;
u32 num_states;
- meteor_accel_info accel;
+ limex_accel_info accel;
};
// Constants for scoring mechanism
}
struct AccelBuild {
- AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0) {}
+ AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0), ma_len1(0),
+ ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {}
NFAVertex v;
u32 state;
u32 offset; // offset correction to apply
CharReach stop1; // single-byte accel stop literals
flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals
+ u32 ma_len1; // multiaccel len1
+ u32 ma_len2; // multiaccel len2
+ MultibyteAccelInfo::multiaccel_type ma_type; // multiaccel type
};
static
build.stop1 = CharReach::dot();
} else {
const precalcAccel &precalc = bi.accel.precalc.at(ss);
- if (precalc.double_lits.empty()) {
+ unsigned ma_len = precalc.ma_info.len1 + precalc.ma_info.len2;
+ if (ma_len >= MULTIACCEL_MIN_LEN) {
+ build.ma_len1 = precalc.ma_info.len1;
+ build.stop1 = precalc.ma_info.cr;
+ build.offset = precalc.ma_info.offset;
+ } else if (precalc.double_lits.empty()) {
build.stop1 = precalc.single_cr;
build.offset = precalc.single_offset;
} else {
}
static
-bool containsBadSubset(const meteor_accel_info &accel,
+bool containsBadSubset(const limex_accel_info &accel,
const NFAStateSet &state_set, const u32 effective_sds) {
NFAStateSet subset(state_set.size());
for (size_t j = state_set.find_first(); j != state_set.npos;
ue2::unordered_map<NFAVertex, AccelScheme> &accel_map,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
- const u32 num_states, meteor_accel_info *accel) {
+ const u32 num_states, limex_accel_info *accel,
+ const CompileContext &cc) {
vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
vector<NFAVertex> astates;
DEBUG_PRINTF("accel %u ok with offset %u\n", i, as.offset);
+ // try multibyte acceleration first
+ MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc);
+
precalcAccel &pa = accel->precalc[state_set];
+ useful |= state_set;
+
+ // if we successfully built a multibyte accel scheme, use that
+ if (mai.type != MultibyteAccelInfo::MAT_NONE) {
+ pa.ma_info = mai;
+
+ DEBUG_PRINTF("multibyte acceleration!\n");
+ continue;
+ }
+
pa.single_offset = as.offset;
pa.single_cr = as.cr;
- useful |= state_set;
if (states.size() == 1) {
DoubleAccelInfo b = findBestDoubleAccelInfo(g, states.front());
filterAccelStates(bi.h, bi.tops, &bi.accel.accel_map);
assert(bi.accel.accel_map.size() <= NFA_MAX_ACCEL_STATES);
doAccelCommon(bi.h, bi.accel.accel_map, bi.state_ids, bi.br_cyclic,
- bi.num_states, &bi.accel);
+ bi.num_states, &bi.accel, bi.cc);
}
/** The AccelAux structure has large alignment specified, and this makes some
void buildAccel(const build_info &args, NFAStateSet &accelMask,
NFAStateSet &accelFriendsMask, AccelAuxVector &auxvec,
vector<u8> &accelTable) {
- const meteor_accel_info &accel = args.accel;
+ const limex_accel_info &accel = args.accel;
// Init, all zeroes.
accelMask.resize(args.num_states);
if (contains(accel.precalc, states)) {
const precalcAccel &precalc = accel.precalc.at(states);
- ainfo.single_offset = precalc.single_offset;
- ainfo.single_stops = precalc.single_cr;
+ if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) {
+ ainfo.ma_len1 = precalc.ma_info.len1;
+ ainfo.ma_len2 = precalc.ma_info.len2;
+ ainfo.multiaccel_offset = precalc.ma_info.offset;
+ ainfo.multiaccel_stops = precalc.ma_info.cr;
+ ainfo.ma_type = precalc.ma_info.type;
+ } else {
+ ainfo.single_offset = precalc.single_offset;
+ ainfo.single_stops = precalc.single_cr;
+ }
}
buildAccelAux(ainfo, &aux);
--- /dev/null
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "multiaccel_compilehelper.h"
+
+using namespace std;
+using namespace ue2;
+
+#ifdef DEBUG
+static const char* state_to_str[] = {
+ "FIRST_RUN",
+ "SECOND_RUN",
+ "WAITING_FOR_GRAB",
+ "FIRST_TAIL",
+ "SECOND_TAIL",
+ "STOPPED",
+ "INVALID"
+};
+static const char* type_to_str[] = {
+ "SHIFT",
+ "SHIFTGRAB",
+ "DOUBLESHIFT",
+ "DOUBLESHIFTGRAB",
+ "LONG",
+ "LONGGRAB",
+ "NONE"
+};
+
+static
+void dumpMultiaccelState(const accel_data &d) {
+ DEBUG_PRINTF("type: %s state: %s len1: %u tlen1: %u len2: %u tlen2: %u\n",
+ type_to_str[(unsigned) d.type],
+ state_to_str[(unsigned) d.state],
+ d.len1, d.tlen1, d.len2, d.tlen2);
+}
+#endif
+
+/* stop all the matching. this may render most schemes invalid. */
+static
+void stop(accel_data &d) {
+ switch (d.state) {
+ case STATE_STOPPED:
+ case STATE_INVALID:
+ break;
+ case STATE_FIRST_TAIL:
+ case STATE_SECOND_RUN:
+ /*
+ * Shift matchers are special case, because they have "tails".
+ * When shift matcher reaches a mid/endpoint, tail mode is
+ * activated, which looks for more matches to extend the match.
+ *
+ * For example, consider pattern /a{5}ba{3}/. Under normal circumstances,
+ * long-grab matcher will be picked for this pattern (matching a run of a's,
+ * followed by a not-a), because doubleshift matcher would be confused by
+ * consecutive a's and would parse the pattern as a.{0}a.{0}a (two shifts
+ * by 1) and throw out the rest of the pattern.
+ *
+ * With tails, we defer ending the run until we actually run out of
+ * matching characters, so the above pattern will now be parsed by
+ * doubleshift matcher as /a.{3}a.{3}a/ (two shifts by 4).
+ *
+ * So if we are stopping shift matchers, we should check if we aren't in
+ * the process of matching first tail or second run. If we are, we can't
+ * finish the second run as we are stopping, but we can try and split
+ * the first tail instead to obtain a valid second run.
+ */
+ if ((d.type == MultibyteAccelInfo::MAT_DSHIFT ||
+ d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.tlen1 == 0) {
+ // can't split an empty void...
+ d.state = STATE_INVALID;
+ break;
+ }
+ d.len2 = 0;
+ d.state = STATE_STOPPED;
+ break;
+ case STATE_SECOND_TAIL:
+ d.state = STATE_STOPPED;
+ break;
+ case STATE_WAITING_FOR_GRAB:
+ case STATE_FIRST_RUN:
+ if (d.type == MultibyteAccelInfo::MAT_LONG) {
+ d.state = STATE_STOPPED;
+ } else {
+ d.state = STATE_INVALID;
+ }
+ break;
+ }
+}
+
+static
+void validate(accel_data &d, unsigned max_len) {
+ // try and fit in all our tails
+ if (d.len1 + d.tlen1 + d.len2 + d.tlen2 < max_len && d.len2 > 0) {
+ // case 1: everything fits in
+ d.len1 += d.tlen1;
+ d.len2 += d.tlen2;
+ d.tlen1 = 0;
+ d.tlen2 = 0;
+ } else if (d.len1 + d.tlen1 + d.len2 < max_len && d.len2 > 0) {
+ // case 2: everything but the second tail fits in
+ d.len1 += d.tlen1;
+ d.tlen1 = 0;
+ // try going for a partial tail
+ if (d.tlen2 != 0) {
+ int new_tlen2 = max_len - 1 - d.len1 - d.len2;
+ if (new_tlen2 > 0) {
+ d.len2 += new_tlen2;
+ }
+ d.tlen2 = 0;
+ }
+ } else if (d.len1 + d.tlen1 < max_len) {
+ // case 3: first run and its tail fits in
+ if (d.type == MultibyteAccelInfo::MAT_DSHIFT ||
+ d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) {
+ // split the tail into a second run
+ d.len2 = d.tlen1;
+ } else {
+ d.len1 += d.tlen1;
+ d.len2 = 0;
+ }
+ d.tlen1 = 0;
+ d.tlen2 = 0;
+ } else if (d.len1 < max_len) {
+ // case 4: nothing but the first run fits in
+ // try going for a partial tail
+ if (d.tlen1 != 0) {
+ int new_tlen1 = max_len - 1 - d.len1;
+ if (new_tlen1 > 0) {
+ d.len1 += new_tlen1;
+ }
+ d.tlen1 = 0;
+ }
+ d.len2 = 0;
+ d.tlen2 = 0;
+ }
+ // if we removed our second run, doubleshift matchers are no longer valid
+ if ((d.type == MultibyteAccelInfo::MAT_DSHIFT ||
+ d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.len2 == 0) {
+ d.state = STATE_INVALID;
+ } else if ((d.type == MultibyteAccelInfo::MAT_LONG) && d.len1 >= max_len) {
+ // long matchers can just stop whenever they want to
+ d.len1 = max_len - 1;
+ }
+
+ // now, general sanity checks
+ if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) >= max_len) {
+ d.state = STATE_INVALID;
+ }
+ if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) < MULTIACCEL_MIN_LEN) {
+ d.state = STATE_INVALID;
+ }
+}
+
+static
+void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) {
+ switch (d.type) {
+ case MultibyteAccelInfo::MAT_LONG:
+ {
+ /*
+ * For long matcher, we want lots of consecutive same-or-subset
+ * char-reaches
+ */
+ if ((ref_cr & cur_cr) == cur_cr) {
+ d.len1++;
+ } else {
+ d.state = STATE_STOPPED;
+ }
+ }
+ break;
+
+ case MultibyteAccelInfo::MAT_LONGGRAB:
+ {
+ /*
+ * For long-grab matcher, we want lots of consecutive same-or-subset
+ * char-reaches with a negative match in the end.
+ */
+ if ((ref_cr & cur_cr) == cur_cr) {
+ d.len1++;
+ } else if (!(ref_cr & cur_cr).any()) {
+ /* we grabbed, stop immediately */
+ d.state = STATE_STOPPED;
+ } else {
+ /* our run-n-grab was interrupted; mark as invalid */
+ d.state = STATE_INVALID;
+ }
+ }
+ break;
+
+ case MultibyteAccelInfo::MAT_SHIFTGRAB:
+ {
+ /*
+ * For shift-grab matcher, we want two matches separated by anything;
+ * however the second vertex *must* be a negative (non-overlapping) match.
+ *
+ * Shiftgrab matcher is identical to shift except for presence of grab.
+ */
+ if (d.state == STATE_WAITING_FOR_GRAB) {
+ if ((ref_cr & cur_cr).any()) {
+ d.state = STATE_INVALID;
+ } else {
+ d.state = STATE_FIRST_RUN;
+ d.len1++;
+ }
+ return;
+ }
+ }
+ /* no break, falling through */
+ case MultibyteAccelInfo::MAT_SHIFT:
+ {
+ /*
+ * For shift-matcher, we want two matches separated by anything.
+ */
+ if (ref_cr == cur_cr) {
+ // keep matching tail
+ switch (d.state) {
+ case STATE_FIRST_RUN:
+ d.state = STATE_FIRST_TAIL;
+ break;
+ case STATE_FIRST_TAIL:
+ d.tlen1++;
+ break;
+ default:
+ // shouldn't happen
+ assert(0);
+ }
+ } else {
+ switch (d.state) {
+ case STATE_FIRST_RUN:
+ // simply advance
+ d.len1++;
+ break;
+ case STATE_FIRST_TAIL:
+ // we found a non-matching char after tail, so stop
+ d.state = STATE_STOPPED;
+ break;
+ default:
+ // shouldn't happen
+ assert(0);
+ }
+ }
+ }
+ break;
+
+ case MultibyteAccelInfo::MAT_DSHIFTGRAB:
+ {
+ /*
+ * For double shift-grab matcher, we want two matches separated by
+ * either negative matches or dots; however the second vertex *must*
+ * be a negative match.
+ *
+ * Doubleshiftgrab matcher is identical to doubleshift except for
+ * presence of grab.
+ */
+ if (d.state == STATE_WAITING_FOR_GRAB) {
+ if ((ref_cr & cur_cr).any()) {
+ d.state = STATE_INVALID;
+ } else {
+ d.state = STATE_FIRST_RUN;
+ d.len1++;
+ }
+ return;
+ }
+ }
+ /* no break, falling through */
+ case MultibyteAccelInfo::MAT_DSHIFT:
+ {
+ /*
+ * For double shift matcher, we want three matches, each separated
+ * by a lot of anything.
+ *
+ * Doubleshift matcher is complicated by presence of tails.
+ */
+ if (ref_cr == cur_cr) {
+ // decide if we are activating second shift or matching tails
+ switch (d.state) {
+ case STATE_FIRST_RUN:
+ d.state = STATE_FIRST_TAIL;
+ d.len2 = 1; // we're now ready for our second run
+ break;
+ case STATE_FIRST_TAIL:
+ d.tlen1++;
+ break;
+ case STATE_SECOND_RUN:
+ d.state = STATE_SECOND_TAIL;
+ break;
+ case STATE_SECOND_TAIL:
+ d.tlen2++;
+ break;
+ default:
+ // shouldn't happen
+ assert(0);
+ }
+ } else {
+ switch (d.state) {
+ case STATE_FIRST_RUN:
+ d.len1++;
+ break;
+ case STATE_FIRST_TAIL:
+ // start second run
+ d.state = STATE_SECOND_RUN;
+ d.len2++;
+ break;
+ case STATE_SECOND_RUN:
+ d.len2++;
+ break;
+ case STATE_SECOND_TAIL:
+ // stop
+ d.state = STATE_STOPPED;
+ break;
+ default:
+ // shouldn't happen
+ assert(0);
+ }
+ }
+ }
+ break;
+
+ default:
+ // shouldn't happen
+ assert(0);
+ break;
+ }
+}
+
+MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr, u32 off,
+ unsigned max_len) :
+ cr(ref_cr), offset(off), max_len(max_len) {
+ int accel_num = (int) MultibyteAccelInfo::MAT_MAX;
+ accels.resize(accel_num);
+
+ // mark everything as valid
+ for (int i = 0; i < accel_num; i++) {
+ accel_data &ad = accels[i];
+ ad.len1 = 1;
+ ad.type = (MultibyteAccelInfo::multiaccel_type) i;
+
+ /* for shift-grab matchers, we are waiting for the grab right at the start */
+ if (ad.type == MultibyteAccelInfo::MAT_SHIFTGRAB
+ || ad.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) {
+ ad.state = STATE_WAITING_FOR_GRAB;
+ } else {
+ ad.state = STATE_FIRST_RUN;
+ }
+ }
+}
+
+bool MultiaccelCompileHelper::canAdvance() {
+ for (const accel_data &ad : accels) {
+ if (ad.state != STATE_STOPPED && ad.state != STATE_INVALID) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void MultiaccelCompileHelper::advance(const CharReach &cur_cr) {
+ for (accel_data &ad : accels) {
+ if (ad.state == STATE_STOPPED || ad.state == STATE_INVALID) {
+ continue;
+ }
+ match(ad, cr, cur_cr);
+#ifdef DEBUG
+ dumpMultiaccelState(ad);
+#endif
+ }
+}
+
+MultibyteAccelInfo MultiaccelCompileHelper::getBestScheme() {
+ int best_len = 0;
+ accel_data best;
+
+ DEBUG_PRINTF("Stopping multiaccel compile\n");
+
+ for (accel_data &ad : accels) {
+ // stop our matching
+ stop(ad);
+ validate(ad, max_len);
+
+#ifdef DEBUG
+ dumpMultiaccelState(ad);
+#endif
+
+ // skip invalid schemes
+ if (ad.state == STATE_INVALID) {
+ continue;
+ }
+ DEBUG_PRINTF("Marking as viable\n");
+
+ // TODO: relative strengths of accel schemes? maybe e.g. a shorter
+ // long match would in some cases be preferable to a longer
+ // double shift match (for example, depending on length)?
+ int as_len = ad.len1 + ad.len2;
+ if (as_len >= best_len) {
+ DEBUG_PRINTF("Marking as best\n");
+ best_len = as_len;
+ best = ad;
+ }
+ }
+ // if we found at least one accel scheme, return it
+ if (best.state != STATE_INVALID) {
+#ifdef DEBUG
+ DEBUG_PRINTF("Picked best multiaccel state:\n");
+ dumpMultiaccelState(best);
+#endif
+ MultibyteAccelInfo info;
+ info.cr = cr;
+ info.offset = offset;
+ info.len1 = best.len1;
+ info.len2 = best.len2;
+ info.type = best.type;
+ return info;
+ }
+ return MultibyteAccelInfo();
+}
--- /dev/null
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MULTIACCELCOMPILE_H_
+#define MULTIACCELCOMPILE_H_
+
+#include "ue2common.h"
+
+#include "nfagraph/ng_limex_accel.h"
+
+#include <vector>
+
+namespace ue2 {
+
+/* accel scheme state machine */
+enum accel_scheme_state {
+ STATE_FIRST_RUN,
+ STATE_SECOND_RUN,
+ STATE_WAITING_FOR_GRAB,
+ STATE_FIRST_TAIL,
+ STATE_SECOND_TAIL,
+ STATE_STOPPED,
+ STATE_INVALID
+};
+
+struct accel_data {
+ MultibyteAccelInfo::multiaccel_type type = MultibyteAccelInfo::MAT_NONE;
+ accel_scheme_state state = STATE_INVALID;
+ unsigned len1 = 0; /* length of first run */
+ unsigned len2 = 0; /* length of second run, if present */
+ unsigned tlen1 = 0; /* first tail length */
+ unsigned tlen2 = 0; /* second tail length */
+};
+
+class MultiaccelCompileHelper {
+private:
+ const CharReach &cr;
+ u32 offset;
+ std::vector<accel_data> accels;
+ unsigned max_len;
+public:
+ MultiaccelCompileHelper(const CharReach &cr, u32 off, unsigned max_len);
+ bool canAdvance();
+ MultibyteAccelInfo getBestScheme();
+ void advance(const ue2::CharReach &cr);
+};
+
+}; // namespace
+
+#endif /* MULTIACCELCOMPILE_H_ */
#include "ue2common.h"
#include "nfa/accel.h"
+#include "nfa/multiaccel_compilehelper.h"
#include "util/bitutils.h" // for CASE_CLEAR
#include "util/charreach.h"
+#include "util/compile_context.h"
#include "util/container.h"
#include "util/dump_charclass.h"
#include "util/graph_range.h"
+#include "util/target_info.h"
#include <algorithm>
#include <map>
return g.startDs;
}
+static
+NFAVertex find_next(const NFAVertex v, const NGHolder &g) {
+ NFAVertex res = NFAGraph::null_vertex();
+ for (NFAVertex u : adjacent_vertices_range(v, g)) {
+ if (u != v) {
+ res = u;
+ break;
+ }
+ }
+ return res;
+}
+
+/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */
+MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
+ const vector<NFAVertex> &states,
+ const CompileContext &cc) {
+ // For a set of states to be accelerable, we basically have to have only
+ // one state to accelerate.
+ if (states.size() != 1) {
+ DEBUG_PRINTF("can't accelerate multiple states\n");
+ return MultibyteAccelInfo();
+ }
+
+ // Get our base vertex
+ NFAVertex v = states[0];
+
+ // We need the base vertex to be a self-looping dotall leading to exactly
+ // one vertex.
+ if (!hasSelfLoop(v, g)) {
+ DEBUG_PRINTF("base vertex has self-loop\n");
+ return MultibyteAccelInfo();
+ }
+
+ if (!g[v].char_reach.all()) {
+ DEBUG_PRINTF("can't accelerate anything but dot\n");
+ return MultibyteAccelInfo();
+ }
+
+ if (proper_out_degree(v, g) != 1) {
+ DEBUG_PRINTF("can't accelerate states with multiple successors\n");
+ return MultibyteAccelInfo();
+ }
+
+ // find our start vertex
+ NFAVertex cur = find_next(v, g);
+ if (cur == NFAGraph::null_vertex()) {
+ DEBUG_PRINTF("invalid start vertex\n");
+ return MultibyteAccelInfo();
+ }
+
+ bool has_offset = false;
+ u32 offset = 0;
+ CharReach cr = g[cur].char_reach;
+
+ // if we start with a dot, we have an offset, so defer figuring out the
+ // real CharReach for this accel scheme
+ if (cr == CharReach::dot()) {
+ has_offset = true;
+ offset = 1;
+ }
+
+ // figure out our offset
+ while (has_offset) {
+ // vertices have to have no self loops
+ if (hasSelfLoop(cur, g)) {
+ DEBUG_PRINTF("can't have self-loops\n");
+ return MultibyteAccelInfo();
+ }
+
+ // we have to have exactly 1 successor to have this acceleration scheme
+ if (out_degree(cur, g) != 1) {
+ DEBUG_PRINTF("can't have multiple successors\n");
+ return MultibyteAccelInfo();
+ }
+
+ cur = *adjacent_vertices(cur, g).first;
+
+ // if we met a special vertex, bail out
+ if (is_special(cur, g)) {
+ DEBUG_PRINTF("can't have special vertices\n");
+ return MultibyteAccelInfo();
+ }
+
+ // now, get the real char reach
+ if (g[cur].char_reach != CharReach::dot()) {
+ cr = g[cur].char_reach;
+ has_offset = false;
+ } else {
+ offset++;
+ }
+ }
+
+ // now, fire up the compilation machinery
+ target_t ti = cc.target_info;
+ unsigned max_len = ti.has_avx2() ? MULTIACCEL_MAX_LEN_AVX2 : MULTIACCEL_MAX_LEN_SSE;
+ MultiaccelCompileHelper mac(cr, offset, max_len);
+
+ while (mac.canAdvance()) {
+ // vertices have to have no self loops
+ if (hasSelfLoop(cur, g)) {
+ break;
+ }
+
+ // we have to have exactly 1 successor to have this acceleration scheme
+ if (out_degree(cur, g) != 1) {
+ break;
+ }
+
+ cur = *adjacent_vertices(cur, g).first;
+
+ // if we met a special vertex, bail out
+ if (is_special(cur, g)) {
+ break;
+ }
+
+ mac.advance(g[cur].char_reach);
+ }
+ MultibyteAccelInfo mai = mac.getBestScheme();
+#ifdef DEBUG
+ DEBUG_PRINTF("Multibyte acceleration scheme: type: %u offset: %u lengths: %u,%u\n",
+ mai.type, mai.offset, mai.len1, mai.len2);
+ for (size_t c = mai.cr.find_first(); c != CharReach::npos; c = mai.cr.find_next(c)) {
+ DEBUG_PRINTF("multibyte accel char: %zu\n", c);
+ }
+#endif
+ return mai;
+}
+
/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
const vector<CharReach> &refined_cr,
#define MAX_MERGED_ACCEL_STOPS 200
#define ACCEL_MAX_STOP_CHAR 24
#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
+#define MULTIACCEL_MIN_LEN 3
+#define MULTIACCEL_MAX_LEN_SSE 15
+#define MULTIACCEL_MAX_LEN_AVX2 31
+
+// forward-declaration of CompileContext
+struct CompileContext;
void findAccelFriends(const NGHolder &g, NFAVertex v,
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
DoubleAccelInfo findBestDoubleAccelInfo(const NGHolder &g, NFAVertex v);
+struct MultibyteAccelInfo {
+ /* multibyte accel schemes, ordered by strength */
+ enum multiaccel_type {
+ MAT_SHIFT,
+ MAT_SHIFTGRAB,
+ MAT_DSHIFT,
+ MAT_DSHIFTGRAB,
+ MAT_LONG,
+ MAT_LONGGRAB,
+ MAT_MAX,
+ MAT_NONE = MAT_MAX
+ };
+ CharReach cr;
+ u32 offset = 0;
+ u32 len1 = 0;
+ u32 len2 = 0;
+ multiaccel_type type = MAT_NONE;
+};
+
struct AccelScheme {
AccelScheme(const CharReach &cr_in, u32 offset_in)
: cr(cr_in), offset(offset_in) {
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
AccelScheme *as, bool allow_wide);
+/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */
+MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
+ const std::vector<NFAVertex> &verts,
+ const CompileContext &cc);
+
} // namespace ue2
#endif