src/nfa/vermicelli.h
src/nfa/vermicelli_run.h
src/nfa/vermicelli_sse.h
- src/sidecar/sidecar.c
- src/sidecar/sidecar.h
- src/sidecar/sidecar_generic.h
- src/sidecar/sidecar_internal.h
- src/sidecar/sidecar_shufti.c
- src/sidecar/sidecar_shufti.h
src/som/som.h
src/som/som_runtime.h
src/som/som_runtime.c
src/rose/match.c
src/rose/miracle.h
src/rose/runtime.h
- src/rose/rose_sidecar_runtime.h
src/rose/rose.h
src/rose/rose_internal.h
src/rose/rose_types.h
src/parser/unsupported.h
src/parser/utf8_validate.h
src/parser/utf8_validate.cpp
- src/sidecar/sidecar_compile.cpp
- src/sidecar/sidecar_compile.h
src/smallwrite/smallwrite_build.cpp
src/smallwrite/smallwrite_build.h
src/smallwrite/smallwrite_internal.h
src/parser/dump.cpp
src/parser/dump.h
src/parser/position_dump.h
- src/sidecar/sidecar_dump.cpp
- src/sidecar/sidecar_dump.h
src/smallwrite/smallwrite_dump.cpp
src/smallwrite/smallwrite_dump.h
src/som/slot_manager_dump.cpp
allowRose(true),
allowExtendedNFA(true), /* bounded repeats of course */
allowLimExNFA(true),
- allowSidecar(false),
allowAnchoredAcyclic(true),
allowSmallLiteralSet(true),
allowCastle(true),
G_UPDATE(allowRose);
G_UPDATE(allowExtendedNFA);
G_UPDATE(allowLimExNFA);
- G_UPDATE(allowSidecar);
G_UPDATE(allowAnchoredAcyclic);
G_UPDATE(allowSmallLiteralSet);
G_UPDATE(allowCastle);
bool allowRose;
bool allowExtendedNFA;
bool allowLimExNFA;
- bool allowSidecar;
bool allowAnchoredAcyclic;
bool allowSmallLiteralSet;
bool allowCastle;
bool improveGraph(NGHolder &g, som_type som);
/** Sometimes the reach of a vertex is greater than it needs to be to reduce
- * stop chars for the benefit of the rest of our code base (accel, sidecar,
- * etc). In these circumstances, we can treat the reach as the smaller one as
+ * stop chars for the benefit of the rest of our code base (accel, etc). In
+ * these circumstances, we can treat the reach as the smaller one as
* the graphs are equivalent. */
CharReach reduced_cr(NFAVertex v, const NGHolder &g,
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic);
}
static
-bool isStarCliche(const NGHolder &g, const ue2_literal &succ_lit,
- const Grey &grey, CharReach *escapes_out) {
+bool isStarCliche(const NGHolder &g) {
DEBUG_PRINTF("checking graph with %zu vertices\n", num_vertices(g));
bool nonspecials_seen = false;
- CharReach escapes;
-
- // Escapes are only available if we have the Sidecar engine available to
- // implement them.
- const u32 max_escapes = grey.allowSidecar ? MAX_ESCAPE_CHARS : 0;
for (auto v : vertices_range(g)) {
if (is_special(v, g)) {
}
nonspecials_seen = true;
- escapes = ~g[v].char_reach;
- if (escapes.count() > max_escapes) {
+ if (!g[v].char_reach.all()) {
return false;
}
return false;
}
- /* we need to check that succ lit does not intersect with the escapes. */
- for (const auto &c : succ_lit) {
- if ((escapes & c).any()) {
- return false;
- }
- }
-
- *escapes_out = escapes;
return true;
}
if (delay != max_allowed_delay) {
restoreTrailingLiteralStates(*h_new, lit2, delay);
- delay = removeTrailingLiteralStates(*h_new, lit2,
- max_allowed_delay);
+ delay = removeTrailingLiteralStates(*h_new, lit2, max_allowed_delay);
}
- CharReach escapes;
- if (isStarCliche(*h_new, lit2, cc.grey, &escapes)) {
+ if (isStarCliche(*h_new)) {
DEBUG_PRINTF("is a X star!\n");
ig[e].graph.reset();
ig[e].graph_lag = 0;
- ig[e].escapes = escapes;
} else {
ig[e].graph = move(h_new);
ig[e].graph_lag = delay;
void makeNocaseWithPrefixMask(RoseInGraph &g, RoseInVertex v) {
for (const auto &e : in_edges_range(v, g)) {
const RoseInVertex u = source(e, g);
- CharReach &escapes = g[e].escapes;
if (!g[e].graph) {
g[e].graph = make_shared<NGHolder>(whatRoseIsThis(g, e));
assert(!g[e].maxBound || g[e].maxBound == ROSE_BOUND_INF);
if (g[u].type == RIV_START) {
- assert(escapes.none());
add_edge(h.startDs, h.accept, h);
h[h.startDs].reports.insert(0);
} else if (g[e].maxBound == ROSE_BOUND_INF) {
add_edge(h.start, h.accept, h);
NFAVertex ds = add_vertex(h);
- // Cyclic vertex which takes over handling the escapes inside
- // the prefix graph.
- h[ds].char_reach = ~escapes;
- escapes.clear();
+ h[ds].char_reach = CharReach::dot();
add_edge(h.start, ds, h);
add_edge(ds, ds, h);
h[h.start].reports.insert(0);
h[ds].reports.insert(0);
} else {
- assert(escapes.none());
add_edge(h.start, h.accept, h);
h[h.start].reports.insert(0);
}
#include "nfa/nfa_rev_api.h"
#include "nfa/mcclellan.h"
#include "util/fatbit.h"
-#include "rose_sidecar_runtime.h"
#include "rose.h"
#include "rose_common.h"
} while (1);
}
-static really_inline
-void init_sidecar(const struct RoseEngine *t, struct hs_scratch *scratch) {
- if (!t->smatcherOffset) {
- return;
- }
-
- DEBUG_PRINTF("welcome to the sidecar\n");
- assert(t->initSideEnableOffset);
- // We have to enable some sidecar literals
- const char *template = (const char *)t + t->initSideEnableOffset;
-
- memcpy(&scratch->side_enabled, template, t->stateOffsets.sidecar_size);
-}
-
static really_inline
void init_state_for_block(const struct RoseEngine *t, u8 *state) {
assert(t);
tctxt->next_mpv_offset = 0;
tctxt->curr_anchored_loc = MMB_INVALID;
tctxt->curr_row_offset = 0;
- tctxt->side_curr = 0;
scratch->am_log_sum = 0; /* clear the anchored logs */
scratch->al_log_sum = 0;
fatbit_clear(scratch->aqa);
- init_sidecar(t, scratch); /* Init the sidecar enabled state */
-
scratch->catchup_pq.qm_size = 0;
init_outfixes_for_block(t, scratch, state, is_small_block);
#include "catchup.h"
#include "match.h"
-#include "rose_sidecar_runtime.h"
#include "rose.h"
#include "util/fatbit.h"
DEBUG_PRINTF("eod offset=%llu, eod length=%zu\n", offset, eod_len);
struct RoseContext *tctxt = &scratch->tctxt;
-
- /* update side_curr for eod_len */
- tctxt->side_curr = offset - eod_len;
-
- /* no need to enable any sidecar groups as they are for .*A.* constructs
- * not allowed in the eod table */
-
const struct HWLM *etable = getELiteralMatcher(t);
hwlmExec(etable, eod_data, eod_len, adj, roseCallback, tctxt, tctxt->groups);
// Flush history to make sure it's consistent.
roseFlushLastByteHistory(t, state, offset, tctxt);
-
- // Catch up the sidecar to cope with matches raised in the etable.
- catchup_sidecar(tctxt, offset);
}
static rose_inline
// Unset the reports we just fired so we don't fire them again below.
mmbit_clear(getRoleState(state), t->rolesWithStateCount);
mmbit_clear(getActiveLeafArray(t, state), t->activeArrayCount);
- sidecar_enabled_populate(t, scratch, state);
hwlmcb_rv_t rv = roseEodRunMatcher(t, offset, scratch, is_streaming);
if (rv == HWLM_TERMINATE_MATCHING) {
struct RoseContext *tctxt) {
roseFlushLastByteHistory(t, state, length, tctxt);
tctxt->lastEndOffset = length;
- if (t->requiresEodSideCatchup) {
- catchup_sidecar(tctxt, length);
- }
}
void roseBlockEodExec(const struct RoseEngine *t, u64a offset,
#include "nfa/mcclellan.h"
#include "nfa/nfa_api_util.h"
#include "nfa/nfa_internal.h"
-#include "sidecar/sidecar.h"
-#include "sidecar/sidecar_internal.h"
#include "util/multibit.h"
#include <string.h>
rstate->broken = NOT_BROKEN;
}
-static really_inline
-void init_sidecar(const struct RoseEngine *t, u8 *state) {
- assert(getSLiteralMatcher(t));
-
- struct sidecar_enabled *enabled_state
- = (struct sidecar_enabled *)(state + t->stateOffsets.sidecar);
-
- DEBUG_PRINTF("welcome to the sidecar\n");
- assert(t->initSideEnableOffset);
- // We have to enable some sidecar literals
- const char *template = (const char *)t + t->initSideEnableOffset;
-
- memcpy(enabled_state, template, t->stateOffsets.sidecar_size);
-}
-
static really_inline
void init_outfixes(const struct RoseEngine *t, u8 *state) {
/* The active leaf array has been init'ed by the scatter with outfix
init_rstate(t, state);
- // Init the sidecar state
- if (t->smatcherOffset) {
- init_sidecar(t, state);
- }
-
init_state(t, state);
init_outfixes(t, state);
#include "infix.h"
#include "match.h"
#include "miracle.h"
-#include "rose_sidecar_runtime.h"
#include "rose.h"
#include "som/som_runtime.h"
#include "util/bitutils.h"
return tctx->groups;
}
-/* Note: uses the stashed sparse iter state; cannot be called from
- * anybody else who is using it
- */
-static never_inline
-void roseSquashStates(const struct RoseEngine *t, const struct RoseSide *tsb,
- struct RoseContext *tctxt) {
- DEBUG_PRINTF("attempting to squash states\n");
-
- struct mmbit_sparse_state *s = tctxtToScratch(tctxt)->sparse_iter_state;
- u8 *state = tctxt->state;
- void *role_state = getRoleState(state);
- u32 role_count = t->rolesWithStateCount;
- const struct mmbit_sparse_iter *it = getByOffset(t, tsb->squashIterOffset);
- assert(ISALIGNED(it));
-
- /* we can squash willy-nilly */
- DEBUG_PRINTF("squashing iter off = %u\n", tsb->squashIterOffset);
- mmbit_sparse_iter_unset(role_state, role_count, it, s);
- DEBUG_PRINTF("squashing groups with = %016llx\n", tsb->squashGroupMask);
- tctxt->groups &= tsb->squashGroupMask;
-}
-
static really_inline
hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t,
struct hs_scratch *scratch, u32 qi, s64a loc,
// offset-tracking role.
if (alreadySet) {
DEBUG_PRINTF("role already set\n");
- if (tr->sidecarEnableOffset) {
- enable_sidecar(tctxt, tr);
- }
return;
}
// Switch on this role's groups
tctxt->groups |= tr->groups;
-
- if (tr->sidecarEnableOffset) {
- // We have to enable some sidecar literals
- enable_sidecar(tctxt, tr);
- }
}
static rose_inline
}
}
-void roseSidecarCallback(UNUSED u64a offset, u32 side_id, void *context) {
- struct RoseContext *tctxt = context;
- const struct RoseEngine *t = tctxt->t;
-
- DEBUG_PRINTF("SIDE MATCH side_id=%u offset=[%llu, %llu]\n", side_id,
- offset, offset + 1);
- assert(side_id < t->sideCount);
-
- const struct RoseSide *side = &getSideEntryTable(t)[side_id];
- roseSquashStates(t, side, tctxt);
-
- DEBUG_PRINTF("done with sc\n");
-}
-
/* handles catchup, som, cb, etc */
static really_inline
hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, u8 *state,
tctxt->lastEndOffset = real_end;
}
- if (tl->requires_side
- && real_end <= t->floatingMinLiteralMatchOffset) {
- /* Catch up the sidecar to the literal location. This means that all
- * squashing events are delivered before any 'side involved' literal
- * matches at a given location. */
-
- catchup_sidecar(tctxt, real_end);
- }
-
/* anchored literals are root only */
if (!roseWalkRootRoles(t, tl, real_end, tctxt, 1, 0)) {
rv = HWLM_TERMINATE_MATCHING;
return HWLM_CONTINUE_MATCHING;
}
- // If the current literal requires sidecar support, run to current
- // location.
- if (tl->requires_side) {
- /* Catch up the sidecar to the literal location. This means that all
- * squashing events are delivered before any 'side involved' literal
- * matches at a given location. */
-
- if (tl->rootRoleCount || tl->minDepth <= tctxt->depth) {
- catchup_sidecar(tctxt, end);
- }
- }
-
if (tl->minDepth > tctxt->depth) {
DEBUG_PRINTF("IGNORE: minDepth=%u > %u\n", tl->minDepth, tctxt->depth);
goto root_roles;
DEBUG_PRINTF("DONE depth=%u, groups=0x%016llx\n", tctxt->depth,
tctxt->groups);
- /* delayed literals can't safely set groups, squashing may from side.
+ /* delayed literals can't safely set groups.
* However we may be setting groups that successors already have
* worked out that we don't need to match the group */
DEBUG_PRINTF("groups in %016llx out %016llx\n", old_groups,
DEBUG_PRINTF("DONE depth=%u, groups=0x%016llx\n", tctxt->depth,
tctxt->groups);
- /* anchored literals can't safely set groups, squashing may from
- * side. However we may be setting groups that successors already
+ /* anchored literals can't safely set groups.
+ * However we may be setting groups that successors already
* have worked out that we don't need to match the group */
DEBUG_PRINTF("groups in %016llx out %016llx\n", old_groups,
tctxt->groups);
g[v].idx = build->vertexIndex++;
g[v].min_offset = min_offset;
g[v].max_offset = max_offset;
- /* no escapes */
DEBUG_PRINTF("insert vertex %zu into literal %u's vertex set\n", g[v].idx,
literalId);
namespace {
struct created_key {
explicit created_key(const RoseInEdgeProps &trep)
- : prefix(trep.graph.get()), lag(trep.graph_lag), escapes(trep.escapes) {
- assert(escapes.none() || !prefix);
+ : prefix(trep.graph.get()), lag(trep.graph_lag) {
}
bool operator<(const created_key &b) const {
const created_key &a = *this;
ORDER_CHECK(prefix);
ORDER_CHECK(lag);
- ORDER_CHECK(escapes);
return false;
}
NGHolder *prefix;
u32 lag;
- CharReach escapes;
};
}
}
NFAVertex p = pv.first;
- if (isLeafNode(p, g)) {
- DEBUG_PRINTF("setting escapes (reach %s) on parent\n",
- describeClass(key.escapes, 20, CC_OUT_TEXT).c_str());
- g[p].escapes = key.escapes;
- } else if (key.escapes != g[p].escapes) {
- DEBUG_PRINTF("creating differently escaped version of parent\n");
- p = duplicate(tbi, p);
- g[p].escapes = key.escapes;
- }
RoseEdge e;
bool added;
DEBUG_PRINTF("edge bounds\n");
return true;
}
- if (ig[e].escapes.any()) {
- DEBUG_PRINTF("escapes\n");
- return true;
- }
RoseInVertex u = source(e, ig);
if (ig[u].type == RIV_START) {
#include "nfagraph/ng_stop.h"
#include "nfagraph/ng_util.h"
#include "nfagraph/ng_width.h"
-#include "sidecar/sidecar.h"
-#include "sidecar/sidecar_compile.h"
#include "som/slot_manager.h"
#include "util/alloc.h"
#include "util/bitutils.h"
}
static
-void fillStateOffsets(const RoseBuildImpl &tbi, const sidecar *side,
- u32 rolesWithStateCount, u32 anchorStateSize,
- u32 activeArrayCount, u32 activeLeftCount,
- u32 laggedRoseCount, u32 floatingStreamStateRequired,
- u32 historyRequired, RoseStateOffsets *so) {
+void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount,
+ u32 anchorStateSize, u32 activeArrayCount,
+ u32 activeLeftCount, u32 laggedRoseCount,
+ u32 floatingStreamStateRequired, u32 historyRequired,
+ RoseStateOffsets *so) {
/* runtime state (including role state) first and needs to be u32-aligned */
u32 curr_offset = sizeof(RoseRuntimeState)
+ mmbit_size(rolesWithStateCount);
- so->sidecar = curr_offset;
- if (side) {
- so->sidecar_size = sidecarEnabledSize(side);
- curr_offset += so->sidecar_size;
- }
-
so->activeLeafArray = curr_offset; /* TODO: limit size of array */
curr_offset += mmbit_size(activeArrayCount);
return m ? m - 1 : 0;
}
-static
-u32 sizeSideSuccMasks(const sidecar *stable,
- const map<set<u32>, set<RoseVertex> > &side_succ_map) {
- if (!stable) {
- return 0;
- }
-
- return verify_u32((side_succ_map.size() + 1 /* for init */)
- * sidecarEnabledSize(stable));
-}
-
-static
-void populateSideSuccLists(const RoseBuildImpl &tbi, build_context &bc,
- const sidecar *stable, RoseEngine *engine, u32 base_offset,
- const map<set<u32>, set<RoseVertex> > &sidecar_succ_map) {
- const RoseGraph &g = tbi.g;
-
- if (!stable) {
- return;
- }
-
- u32 enabled_size = sidecarEnabledSize(stable);
- char *curr = (char *)engine + base_offset;
-
- for (const auto &e : sidecar_succ_map) {
- u32 offset = verify_u32(curr - (char *)engine);
-
- memset(curr, 0, enabled_size);
- /* populate the list */
- for (u32 side_id : e.first) {
- sidecarEnabledAdd(stable, (sidecar_enabled *)curr, side_id);
- }
-
- curr += enabled_size;
-
- /* update the role entries */
- for (RoseVertex v : e.second) {
- if (v == tbi.root) {
- DEBUG_PRINTF("setting root emask\n");
- engine->initSideEnableOffset = offset;
- } else {
- DEBUG_PRINTF("setting boring emask\n");
- assert(g[v].role < bc.roleTable.size());
- bc.roleTable[g[v].role].sidecarEnableOffset = offset;
- }
- }
- }
-
- if (!engine->initSideEnableOffset) {
- DEBUG_PRINTF("add a blank enabled for root\n");
- engine->initSideEnableOffset = verify_u32(curr - (char *)engine);
- memset(curr, 0, enabled_size);
- curr += enabled_size;
- }
-}
-
-/* Also creates a map of sidecar id set to the roles which enables that set
- */
-static
-void markSideEnablers(RoseBuildImpl &build,
- map<set<u32>, set<RoseVertex> > *scmap) {
- map<RoseVertex, set<u32> > enablers;
- u32 side_id = 0;
- for (const auto &e : build.side_squash_roles) {
- for (RoseVertex v : e.second) {
- enablers[v].insert(side_id);
- }
-
- side_id++;
- }
-
- for (const auto &e : enablers) {
- (*scmap)[e.second].insert(e.first);
- }
-}
-
#ifdef DEBUG
static UNUSED
string dumpMask(const vector<u8> &v) {
return false;
}
- if (g[u].escapes.any()) {
- DEBUG_PRINTF("u=%zu has escapes\n", g[u].idx);
- return false;
- }
-
/* TODO: handle non-root roles as well. It can't be that difficult... */
if (!in_degree_equal_to(u, g, 1)) {
return etable;
}
-static
-aligned_unique_ptr<sidecar> buildSideMatcher(const RoseBuildImpl &tbi,
- size_t *ssize) {
- *ssize = 0;
-
- if (tbi.side_squash_roles.empty()) {
- DEBUG_PRINTF("no sidecar\n");
- return nullptr;
- }
- assert(tbi.cc.grey.allowSidecar);
-
- vector<CharReach> sl;
-
- /* TODO: ensure useful sidecar entries only */
- for (const CharReach &cr : tbi.side_squash_roles | map_keys) {
- sl.push_back(cr);
- }
-
- aligned_unique_ptr<sidecar> stable = sidecarCompile(sl);
- if (!stable) {
- throw CompileError("Unable to generate bytecode.");
- }
-
- *ssize = sidecarSize(stable.get());
- assert(*ssize);
- DEBUG_PRINTF("built sidecar literal table size %zu bytes\n", *ssize);
- return stable;
-}
-
// Adds a sparse iterator to the end of the iterator table, returning its
// offset.
static
return addPredSparseIter(bc, predStates);
}
-static
-void buildSideEntriesAndIters(const RoseBuildImpl &tbi, build_context &bc,
- const set<RoseVertex> &squash_roles,
- vector<RoseSide> &sideTable) {
- const RoseGraph &g = tbi.g;
-
- sideTable.push_back(RoseSide()); /* index in array gives an implicit id */
- RoseSide &tsb = sideTable.back();
- memset(&tsb, 0, sizeof(tsb));
-
- if (squash_roles.empty()) {
- return;
- }
-
- set<RoseVertex> squashed_succ;
-
- // Build a vector of the roles' state IDs
- vector<u32> states;
- for (RoseVertex v : squash_roles) {
- assert(g[v].role < bc.roleTable.size());
- const RoseRole &tr = bc.roleTable[g[v].role];
- DEBUG_PRINTF("considering role %u, state index %u\n", g[v].role,
- tr.stateIndex);
- assert(tr.stateIndex != MMB_INVALID);
-
- states.push_back(tr.stateIndex);
- DEBUG_PRINTF("side %zu squashes state index %u/role %u\n",
- sideTable.size() - 1, tr.stateIndex, g[v].role);
-
- /* we cannot allow groups to be squashed if the source vertex is in an
- * anchored table due to ordering issue mean that a literals cannot
- * set groups */
- if (tbi.isAnchored(v) && g[v].max_offset != 1) {
- DEBUG_PRINTF("%u has anchored table pred no squashy\n", g[v].role);
- continue;
- }
-
- DEBUG_PRINTF("role %u is fine to g squash\n", g[v].role);
-
- for (auto w : adjacent_vertices_range(v, g)) {
- if (in_degree(w, g) == 1) { /* TODO: improve: check that each pred
- * is in id's squash role */
- squashed_succ.insert(w);
- }
- }
- }
-
- // Build sparse iterators and add to table.
- assert(!states.empty());
-
- vector<mmbit_sparse_iter> iter;
- mmbBuildSparseIterator(iter, states, bc.numStates);
- assert(!iter.empty());
- tsb.squashIterOffset = addIteratorToTable(bc, iter);
-
- // Build a mask of groups.
- rose_group squash_groups = 0;
- for (u32 i = 0; i < ROSE_GROUPS_MAX; i++) {
- if (!contains(tbi.group_to_literal, i)) {
- continue;
- }
-
- DEBUG_PRINTF("checking group %u for %zu's squash mask\n", i,
- sideTable.size() - 1);
-
- const set<u32> &group_lits = tbi.group_to_literal.find(i)->second;
-
- /* check for each literal in this group if it is squashed by this
- * sidecar escape */
- for (u32 lit : group_lits) {
- DEBUG_PRINTF("inspecting lit %u\n", lit);
- const rose_literal_info &this_info = tbi.literal_info.at(lit);
-
- /* check that all roles belonging to this literal are squashed */
- for (RoseVertex v : this_info.vertices) {
- DEBUG_PRINTF("checking if role is squashed %u...\n", g[v].role);
- if (squashed_succ.find(v) != squashed_succ.end()) {
- continue;
- }
-
- DEBUG_PRINTF("...role not taken %u\n", g[v].role);
-
- /* if the literal is length 1 and anchored (0,0) when can ignore
- * it as any matching must have happened before the side lit
- * arrived */
- if (g[v].max_offset == 1) {
- DEBUG_PRINTF("we can ignore this role as 1st byte only\n");
- continue;
- }
-
- goto fail_group;
- }
- }
-
- continue;
-
- fail_group:
- DEBUG_PRINTF("group %u is not squashed\n", i);
- /* we need to keep this group active */
- squash_groups |= 1ULL << i;
- }
-
- DEBUG_PRINTF("%zu group squash mask: %016llx\n", sideTable.size() - 1,
- squash_groups);
- tsb.squashGroupMask = squash_groups;
-}
-
-// Construct sparse iterators for squashes
-static
-void buildSideTable(const RoseBuildImpl &build, build_context &bc,
- vector<RoseSide> &sideTable) {
- for (const auto &e : build.side_squash_roles) {
- buildSideEntriesAndIters(build, bc, e.second, sideTable);
- }
-}
-
static
void fillLookaroundTables(char *look_base, char *reach_base,
const vector<LookEntry> &look_vec) {
return false;
}
-static
-bool needsSidecarCatchup(const RoseBuildImpl &build, u32 id) {
- const RoseGraph &g = build.g;
-
- for (RoseVertex v : build.literal_info.at(id).vertices) {
- if (g[v].escapes.any()) {
- return true;
- }
-
- for (RoseVertex u : inv_adjacent_vertices_range(v, g)) {
- if (g[u].escapes.any()) {
- return true;
- }
- }
- }
-
- return false;
-}
-
static
void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc,
vector<RoseLiteral> &literalTable) {
const u32 final_id = verify_u32(literalTable.size());
assert(contains(tbi.final_id_to_literal, final_id));
- const u32 literalId = *tbi.final_id_to_literal.at(final_id).begin();
+ const UNUSED u32 literalId = *tbi.final_id_to_literal.at(final_id).begin();
/* all literal ids associated with this final id should result in identical
* literal entry */
const auto &lit_infos = getLiteralInfoByFinalId(tbi, final_id);
}
assert(!tbi.literals.right.at(literalId).delay || !tl.delay_mask);
-
- tl.requires_side = needsSidecarCatchup(tbi, literalId);
}
// Construct the literal table.
if (!engine->anchoredDistance) {
return;
}
-
- /* could be improved, if we have any side squash stuff and an anchored table
- * set the min float distance to 0 */
- if (!build.side_squash_roles.empty()) {
- engine->floatingMinDistance = 0;
- }
}
aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
DerivedBoundaryReports dboundary(boundary);
// Build literal matchers
- size_t asize = 0, fsize = 0, ssize = 0, esize = 0, sbsize = 0;
+ size_t asize = 0, fsize = 0, esize = 0, sbsize = 0;
size_t floatingStreamStateRequired = 0;
size_t historyRequired = calcHistoryRequired(); // Updated by HWLM.
buildAnchoredAutomataMatcher(*this, &asize);
aligned_unique_ptr<HWLM> ftable = buildFloatingMatcher(
*this, &fsize, &historyRequired, &floatingStreamStateRequired);
- aligned_unique_ptr<sidecar> stable = buildSideMatcher(*this, &ssize);
aligned_unique_ptr<HWLM> etable = buildEodAnchoredMatcher(*this, &esize);
aligned_unique_ptr<HWLM> sbtable = buildSmallBlockMatcher(*this, &sbsize);
tie(eodIterMapOffset, eodIterOffset) = buildEodAnchorRoles(*this, bc,
predTable);
- vector<RoseSide> sideTable;
- buildSideTable(*this, bc, sideTable);
-
vector<mmbit_sparse_iter> activeLeftIter;
buildActiveLeftIter(leftInfoTable, activeLeftIter);
u32 amatcherOffset = 0;
u32 fmatcherOffset = 0;
- u32 smatcherOffset = 0;
u32 ematcherOffset = 0;
u32 sbmatcherOffset = 0;
currOffset += (u32)fsize;
}
- if (stable) {
- currOffset = ROUNDUP_CL(currOffset);
- smatcherOffset = currOffset;
- currOffset += (u32)ssize;
- }
-
if (etable) {
currOffset = ROUNDUP_CL(currOffset);
ematcherOffset = currOffset;
u32 literalLen = sizeof(RoseLiteral) * literalTable.size();
currOffset = literalOffset + literalLen;
- u32 sideOffset = ROUNDUP_N(currOffset, alignof(RoseSide));
- currOffset = sideOffset + byte_length(sideTable);
-
u32 roleOffset = ROUNDUP_N(currOffset, alignof(RoseRole));
u32 roleLen = sizeof(RoseRole) * bc.roleTable.size();
currOffset = roleOffset + roleLen;
u32 anchoredReportInverseMapOffset = currOffset;
currOffset += arit.size() * sizeof(u32);
- /* sidecar may contain sse in silly cases */
- currOffset = ROUNDUP_N(currOffset, 16);
- u32 sideSuccListOffset = currOffset;
- map<set<u32>, set<RoseVertex> > sidecar_succ_map;
- markSideEnablers(*this, &sidecar_succ_map);
- currOffset += sizeSideSuccMasks(stable.get(), sidecar_succ_map);
-
currOffset = ROUNDUP_N(currOffset, alignof(ReportID));
u32 multidirectOffset = currOffset;
currOffset += mdr_reports.size() * sizeof(ReportID);
RoseStateOffsets stateOffsets;
memset(&stateOffsets, 0, sizeof(stateOffsets));
- fillStateOffsets(*this, stable.get(), bc.numStates, anchorStateSize,
+ fillStateOffsets(*this, bc.numStates, anchorStateSize,
activeArrayCount, activeLeftCount, laggedRoseCount,
floatingStreamStateRequired, historyRequired,
&stateOffsets);
assert(fmatcherOffset >= base_nfa_offset);
memcpy(ptr + fmatcherOffset, ftable.get(), fsize);
}
- if (stable) {
- assert(smatcherOffset);
- assert(smatcherOffset >= base_nfa_offset);
- memcpy(ptr + smatcherOffset, stable.get(), ssize);
- }
if (etable) {
assert(ematcherOffset);
assert(ematcherOffset >= base_nfa_offset);
engine->runtimeImpl = pickRuntimeImpl(*this, outfixEndQueue);
engine->mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this);
- engine->sideOffset = sideOffset;
- engine->sideCount = verify_u32(sideTable.size());
-
engine->activeArrayCount = activeArrayCount;
engine->activeLeftCount = activeLeftCount;
engine->queueCount = queue_count;
engine->nonbenefits_base_id = nonbenefits_base_id;
engine->literalBenefitsOffsets = base_lits_benefits_offset;
- populateSideSuccLists(*this, bc, stable.get(), engine.get(),
- sideSuccListOffset, sidecar_succ_map);
engine->rosePrefixCount = rosePrefixCount;
engine->activeLeftIterOffset
engine->ematcherOffset = ematcherOffset;
engine->sbmatcherOffset = sbmatcherOffset;
engine->fmatcherOffset = fmatcherOffset;
- engine->smatcherOffset = smatcherOffset;
engine->amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED);
engine->fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING);
engine->eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED);
engine->hasFloatingDirectReports = floating_direct_report;
engine->requiresEodCheck = hasEodAnchors(*this, built_nfas,
outfixEndQueue);
- engine->requiresEodSideCatchup = hasEodSideLink();
engine->hasOutfixesInSmallBlock = hasNonSmallBlockOutfix(outfixes);
engine->canExhaust = rm.patternSetCanExhaust();
engine->hasSom = hasSom;
copy_bytes(ptr + engine->anchoredReportInverseMapOffset, arit);
copy_bytes(ptr + engine->multidirectOffset, mdr_reports);
copy_bytes(ptr + engine->activeLeftIterOffset, activeLeftIter);
- copy_bytes(ptr + engine->sideOffset, sideTable);
DEBUG_PRINTF("rose done %p\n", engine.get());
return engine;
return trailer;
}
-/* note: last byte cannot conflict as escapes are processed after other
- * lits at same offset */
-static
-bool conflictsWithEscape(const rose_literal_id &litv, const CharReach &cr) {
- if (cr.none()) {
- return false;
- }
-
- if (litv.delay) {
- return true;
- }
-
- return contains(litv.s, cr);
-}
-
static
RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) {
const RoseGraph &g = tbi.g;
if (g[v].reports.empty() ||
g[v].eod_accept || // no accept EOD
- g[v].escapes.any() ||
!g[v].isBoring() ||
!isLeafNode(v, g) || // Must have no out-edges
in_degree(v, g) != 1) { // Role must have exactly one in-edge
return literal_info.at(id).final_id != MO_INVALID_IDX;
}
-static
-void doSidecarLiterals(RoseBuildImpl &tbi) {
- map<CharReach, set<RoseVertex> > escapes;
- const RoseGraph &g = tbi.g;
-
- /* find escapes */
- for (auto v : vertices_range(g)) {
- const CharReach &cr = g[v].escapes;
- if (cr.none()) {
- continue;
- }
-
- DEBUG_PRINTF("vertex %zu has %zu escapes\n", g[v].idx, cr.count());
-
- // We only have an implementation for these escapes if the Sidecar is
- // available for use.
- assert(tbi.cc.grey.allowSidecar);
-
- assert(!isLeafNode(v, g));
-
- /* Verify that all the successors are floating */
- for (UNUSED auto w : adjacent_vertices_range(v, g)) {
- assert(!tbi.isAnchored(w));
- }
-
- escapes[cr].insert(v);
- }
-
- if (escapes.size() > 32) {
- /* ensure that a most one sparse iterator is triggered per char */
- escapes = make_disjoint(escapes);
- }
-
- /* create the squash/escape sidecar entries for the vertices and associate
- * with appropriate roles */
- for (const auto &e : escapes) {
- const CharReach &cr = e.first;
- insert(&tbi.side_squash_roles[cr], e.second);
- }
-}
-
static
bool eligibleForAlwaysOnGroup(const RoseBuildImpl &tbi, u32 id) {
/* returns true if it or any of its delay versions have root role */
return true;
}
-static
-bool escapesAllPreds(const RoseGraph &g, RoseVertex v, const CharReach &cr) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if ((~g[u].escapes & cr).any()) {
- return false;
- }
- }
-
- return true;
-}
-
-static
-bool mayNotSeeSubsequentPredsInOrder(const RoseBuildImpl &tbi, RoseVertex v) {
- const RoseGraph &g = tbi.g;
-
- if (in_degree(v, g) == 1) {
- /* if the pred can only match once, there are no subsequent preds */
- RoseVertex u = source(*in_edges(v, g).first, g);
- if (g[u].max_offset == g[u].min_offset) {
- return false;
- }
- }
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- for (u32 lit_id : g[u].literals) {
- const rose_literal_id &lit = tbi.literals.right.at(lit_id);
- if (lit.table == ROSE_ANCHORED) {
- return true;
- }
- }
-
- }
-
- return false;
-}
-
static
bool isGroupSquasher(const RoseBuildImpl &tbi, const u32 id /* literal id */,
rose_group forbidden_squash_group) {
return false;
}
- /* Can only squash cases with escapes if all preds have the same escapes
- * and none of the literals overlap with the escape
- *
- * Additionally, if we may not see one of the preds in time to turn on
- * the group again we have problems.
- *
- * ARGHHHH
- */
- if (g[v].escapes.any()) {
- if (!escapesAllPreds(g, v, g[v].escapes)
- || mayNotSeeSubsequentPredsInOrder(tbi, v)) {
- return false;
- }
-
- if (g[v].literals.size() == 1) {
- if (conflictsWithEscape(tbi.literals.right.at(id),
- g[v].escapes)) {
- return false;
- }
- } else {
- for (const auto &lit_id : g[v].literals) {
- const rose_literal_id &lit = tbi.literals.right.at(lit_id);
- if (lit.delay || contains(lit.s, g[v].escapes)) {
- return false;
- }
- }
- }
- }
-
// Out-edges must have inf max bound, + no other shenanigans */
for (const auto &e : out_edges_range(v, g)) {
if (g[e].maxBound != ROSE_BOUND_INF) {
for (auto v : lit_info.vertices) {
assert(!tbi.isAnyStart(v));
- // Can't squash cases with accepts or escapes
- if (!g[v].reports.empty()
- || (g[v].escapes.any() && !escapesAllPreds(g, v, g[v].escapes))) {
+ // Can't squash cases with accepts
+ if (!g[v].reports.empty()) {
return false;
}
// Clone vertex with the new literal ID.
RoseVertex v = add_vertex(g[lit_v], g);
g[v].idx = tbi.vertexIndex++;
- g[v].escapes.clear();
g[v].literals.clear();
g[v].literals.insert(lit_id);
g[v].min_offset = sai.min_bound + sai.literal.length();
assignGroupsToRoles();
findGroupSquashers(*this);
- // Collect squash literals for the sidecar
- doSidecarLiterals(*this);
-
/* final prep work */
remapCastleTops(*this);
allocateFinalLiteralId(*this);
unique_ptr<NGHolder> convertLeafToHolder(const RoseGraph &g,
const RoseEdge &t_e,
const RoseLiteralMap &literals) {
- RoseVertex t_u = source(t_e, g);
RoseVertex t_v = target(t_e, g); // leaf vertex for demolition.
- const CharReach escape_cr(~g[t_u].escapes);
u32 minBound = g[t_e].minBound;
u32 maxBound = g[t_e].maxBound;
+ const CharReach dot = CharReach::dot();
+
assert(!g[t_v].left);
auto out = ue2::make_unique<NGHolder>(NFA_SUFFIX);
u32 i = 1;
NFAVertex last = out->start;
for (; i <= minBound; i++) {
- NFAVertex v = addHolderVertex(escape_cr, *out);
+ NFAVertex v = addHolderVertex(dot, *out);
add_edge(last, v, *out);
last = v;
}
NFAVertex last_mand = last;
if (maxBound != ROSE_BOUND_INF) {
for (; i <= maxBound; i++) {
- NFAVertex v = addHolderVertex(escape_cr, *out);
+ NFAVertex v = addHolderVertex(dot, *out);
add_edge(last_mand, v, *out);
if (last != last_mand) {
add_edge(last, v, *out);
if (minBound) {
add_edge(last_mand, last_mand, *out);
} else {
- NFAVertex v = addHolderVertex(escape_cr, *out);
+ NFAVertex v = addHolderVertex(dot, *out);
add_edge(last_mand, v, *out);
add_edge(v, v, *out);
last = v;
return true;
}
- /* more arbitrary magic numbers as riskier transform */
if (g[e].maxBound == ROSE_BOUND_INF) {
- if (!tbi.cc.grey.roseConvertInfBadLeaves) {
- return true;
- }
-
- if (g[e].minBound > 20) {
- DEBUG_PRINTF("fail minbound (%u)\n", maxbound);
- return true;
- }
-
- if (max_lit_len > 2) {
- DEBUG_PRINTF("fail length\n");
- return true;
- }
-
- if (g[u].escapes.none()) {
- /* slightly risky as nfa won't die and we don't avoid running the
- sidecar */
- DEBUG_PRINTF("fail: .*\n");
- return true;
- }
+ /* slightly risky as nfa won't die */
+ DEBUG_PRINTF("fail: .*\n");
+ return true;
}
return false;
RoseVertex u = source(e, g);
assert(!g[u].suffix);
- g[u].escapes = CharReach();
g[u].suffix.graph = h;
DEBUG_PRINTF("%zu's nfa holder %p\n", g[u].idx, h.get());
}
}
- if (g[v].escapes.any()) {
- os << "\\nescapes=";
- describeClass(os, g[v].escapes, 5, CC_OUT_DOT);
- }
if (ghost.find(v) != ghost.end()) {
os << "\\nGHOST";
}
std::unique_ptr<RoseDedupeAux> generateDedupeAux() const override;
- bool hasEodSideLink() const;
-
// Find the maximum bound on the edges to this vertex's successors.
u32 calcSuccMaxBound(RoseVertex u) const;
u32 group_weak_end;
u32 group_end;
- std::map<CharReach, std::set<RoseVertex> > side_squash_roles;
-
u32 anchored_base_id;
u32 nonbenefits_base_id;
return true;
}
-bool RoseBuildImpl::hasEodSideLink(void) const {
- for (auto v : vertices_range(g)) {
- if (!g[v].eod_accept) {
- continue;
- }
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (g[u].escapes.any()) {
- return true;
- }
- }
- }
-
- return false;
-}
-
size_t RoseBuildImpl::maxLiteralLen(RoseVertex v) const {
const auto &lit_ids = g[v].literals;
assert(!lit_ids.empty());
const RoseGraph &g = build.g;
const RoseVertexProps &aprops = g[a], &bprops = g[b];
- if (aprops.eod_accept != bprops.eod_accept
- || aprops.escapes != bprops.escapes) {
+ if (aprops.eod_accept != bprops.eod_accept) {
return false;
}
static
void removeVertexFromMaps(RoseVertex v, RoseBuildImpl &build, revRoseMap &rrm) {
- // Remove vertex 'a' from literal squash roles. Only sidecar literals can
- // squash vertices, so they're the only ones we have to check.
- for (auto &roles : build.side_squash_roles | map_values) {
- roles.erase(v);
- }
-
if (build.g[v].left) {
const left_id left(build.g[v].left);
assert(contains(rrm[left], v));
// Merge role properties.
assert(g[a].eod_accept == g[b].eod_accept);
- assert(g[a].escapes == g[b].escapes);
assert(g[a].left == g[b].left);
insert(&g[b].reports, g[a].reports);
// Merge role properties. For a diamond merge, most properties are already
// the same (with the notable exception of the literal set).
assert(g[a].eod_accept == g[b].eod_accept);
- assert(g[a].escapes == g[b].escapes);
assert(g[a].left == g[b].left);
assert(g[a].reports == g[b].reports);
assert(g[a].suffix == g[b].suffix);
#include "nfa/nfa_build_util.h"
#include "nfa/nfa_dump_api.h"
#include "nfa/nfa_internal.h"
-#include "sidecar/sidecar.h"
-#include "sidecar/sidecar_compile.h"
-#include "sidecar/sidecar_dump.h"
#include "util/multibit_internal.h"
#include <algorithm>
return (const HWLM *)loadFromByteCodeOffset(t, t->fmatcherOffset);
}
-static
-const sidecar *getSidecarMatcher(const RoseEngine *t) {
- return (const sidecar *)loadFromByteCodeOffset(t, t->smatcherOffset);
-}
-
static
const HWLM *getEodMatcher(const RoseEngine *t) {
return (const HWLM *)loadFromByteCodeOffset(t, t->ematcherOffset);
const void *atable = getAnchoredMatcher(t);
const HWLM *ftable = getFloatingMatcher(t);
- const sidecar *stable = getSidecarMatcher(t);
const HWLM *etable = getEodMatcher(t);
const HWLM *sbtable = getSmallBlockMatcher(t);
} else {
fprintf(f, "\n");
}
- fprintf(f, " - sidecar matcher : %u bytes\n",
- stable ? sidecarSize(stable) : 0);
fprintf(f, " - eod-anch matcher : %zu bytes over last %u bytes\n",
etable ? hwlmSize(etable) : 0, t->ematcherRegionSize);
fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n",
sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance);
fprintf(f, " - literal table : %zu bytes\n",
t->literalCount * sizeof(RoseLiteral));
- fprintf(f, " - side table : %zu bytes\n",
- t->sideCount * sizeof(RoseSide));
fprintf(f, " - role table : %zu bytes\n",
t->roleCount * sizeof(RoseRole));
fprintf(f, " - pred table : %zu bytes\n",
fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize);
fprintf(f, " - runtime state : %zu bytes\n", sizeof(RoseRuntimeState));
fprintf(f, " - floating matcher : %u bytes\n", t->floatingStreamState);
- fprintf(f, " - sidecar : %u bytes\n",
- stable ? sidecarEnabledSize(stable) : 0U);
fprintf(f, " - active array : %u bytes\n",
mmbit_size(t->activeArrayCount));
fprintf(f, " - active rose : %u bytes\n",
literalsWithDirectReports(t));
fprintf(f, " - that squash group : %u\n",
literalsWithProp(t, &RoseLiteral::squashesGroup));
- fprintf(f, " - need side catchup : %u\n",
- literalsWithProp(t, &RoseLiteral::requires_side));
fprintf(f, " - with benefits : %u\n", t->nonbenefits_base_id);
u32 group_weak_end = t->group_weak_end;
hwlmPrintStats(ftable, f);
}
- if (stable) {
- fprintf(f, "\nSidecar literal matcher stats:\n\n");
- fprintf(f, " Side Entries : %u\n", t->sideCount);
- sidecarDump(stable, f);
- }
-
if (etable) {
fprintf(f, "\nEOD-anchored literal matcher stats:\n\n");
hwlmPrintStats(etable, f);
DUMP_U8(t, hasFloatingDirectReports);
DUMP_U8(t, noFloatingRoots);
DUMP_U8(t, requiresEodCheck);
- DUMP_U8(t, requiresEodSideCatchup);
DUMP_U8(t, hasEodEventLiteral);
DUMP_U8(t, hasOutfixesInSmallBlock);
DUMP_U8(t, runtimeImpl);
DUMP_U32(t, amatcherOffset);
DUMP_U32(t, ematcherOffset);
DUMP_U32(t, fmatcherOffset);
- DUMP_U32(t, smatcherOffset);
DUMP_U32(t, sbmatcherOffset);
DUMP_U32(t, amatcherMinWidth);
DUMP_U32(t, fmatcherMinWidth);
DUMP_U32(t, intReportCount);
DUMP_U32(t, literalOffset);
DUMP_U32(t, literalCount);
- DUMP_U32(t, sideOffset);
- DUMP_U32(t, sideCount);
DUMP_U32(t, multidirectOffset);
DUMP_U32(t, activeArrayCount);
DUMP_U32(t, activeLeftCount);
DUMP_U32(t, delayRebuildLength);
DUMP_U32(t, stateOffsets.history);
DUMP_U32(t, stateOffsets.exhausted);
- DUMP_U32(t, stateOffsets.sidecar);
- DUMP_U32(t, stateOffsets.sidecar_size);
DUMP_U32(t, stateOffsets.activeLeafArray);
DUMP_U32(t, stateOffsets.activeLeftArray);
DUMP_U32(t, stateOffsets.activeLeftArray_size);
DUMP_U32(t, boundary.reportZeroEodOffset);
DUMP_U32(t, totalNumLiterals);
DUMP_U32(t, asize);
- DUMP_U32(t, initSideEnableOffset);
DUMP_U32(t, outfixBeginQueue);
DUMP_U32(t, outfixEndQueue);
DUMP_U32(t, leftfixBeginQueue);
DUMP_U32(p, leftfixReport);
DUMP_U32(p, leftfixLag);
DUMP_U32(p, leftfixQueue);
- DUMP_U32(p, sidecarEnableOffset);
DUMP_U32(p, somAdjust);
DUMP_U32(p, lookaroundIndex);
DUMP_U32(p, lookaroundCount);
const void *atable = getAnchoredMatcher(t);
const HWLM *ftable = getFloatingMatcher(t);
- const sidecar *stable = getSidecarMatcher(t);
const HWLM *etable = getEodMatcher(t);
if (atable) {
}
}
- if (stable) {
- FILE *f = fopen((base + "/sidecar.raw").c_str(), "w");
- if (f) {
- fwrite(stable, 1, sidecarSize(stable), f);
- fclose(f);
- }
- }
-
if (etable) {
FILE *f = fopen((base + "/eod.raw").c_str(), "w");
if (f) {
/** \brief Bitmask of groups that this role sets. */
rose_group groups = 0;
- /** \brief Characters that escape and squash this role. */
- CharReach escapes;
-
/** \brief Minimum role (end of literal) offset depth in bytes. */
u32 min_offset = ~u32{0};
std::shared_ptr<raw_som_dfa> haig;
u32 graph_lag;
-
- /** \brief Escape characters, can be used instead of graph.
- *
- * currently must not intersect with succ literal and must be a literal -
- * literal edge, TODO: handle */
- CharReach escapes;
};
typedef boost::adjacency_list<boost::listS, boost::listS, boost::bidirectionalS,
u8 minDepth; // the minimum of this literal's roles' depths (for depths > 1)
u8 squashesGroup; /**< literal switches off its group behind it if it sets a
* role */
- u8 requires_side; // need to catch up sidecar for this literal
u32 delay_mask; /**< bit set indicates that the literal inserts a delayed
* match at the given offset */
u32 delayIdsOffset; // offset to array of ids to poke in the delay structure
};
-/* properties for sidecar entries, yay */
-struct RoseSide {
- u32 squashIterOffset; // offset of the squash sparse iterator, rose relative
- rose_group squashGroupMask; // squash literal squash masks
-};
-
/* Allocation of Rose literal ids
*
* The rose literal id space is segmented:
* | |
* | |
* ----
- *
- * Note: sidecar 'literals' are in a complete separate space
*/
/* Rose Literal Sources
* 1) The floating table
* 2) The anchored table
* 3) Delayed literals
- * 4) Sidecar literal matcher
- * 5) suffixes NFAs
- * 6) masksv2 (literals with benefits)
- * 7) End anchored table
- * 8) prefix / infix nfas
+ * 4) suffixes NFAs
+ * 5) masksv2 (literals with benefits)
+ * 6) End anchored table
+ * 7) prefix / infix nfas
*
* Care is required to ensure that events appear to come into Rose in order
* (or sufficiently ordered for Rose to cope). Generally the progress of the
* Delayed literal ordering is handled by delivering any pending delayed
* literals before processing any floating match.
*
- * Sidecar:
- * The sidecar matcher is unique in that it does not return match
- * location information. Sidecar literals are escapes between two normal
- * roles. The sidecar matcher is caught up to the floating matcher
- * before any possible predecessor role, any possible successor role, and
- * at stream boundaries^3.
- *
* Suffix:
* Suffixes are always pure terminal roles. Prior to raising a match^2, pending
* NFA queues are run to the current point (floating or delayed literal) as
* leftfix engine status */
u32 leftfixQueue; /**< queue index of the prefix/infix before role */
u32 infixTriggerOffset; /* offset to list of infix roses to trigger */
- u32 sidecarEnableOffset; /**< offset to list of sidecar literals to enable
- */
u32 somAdjust; /**< som for the role is offset from end match offset */
u32 lookaroundIndex; /**< index of lookaround offset/reach in table, or
* reports with that ekey should not be delivered to the user. */
u32 exhausted;
- /** Sidecar state. */
- u32 sidecar;
-
- /** Size of sidecar state, in bytes. */
- u32 sidecar_size;
-
/** Multibit for active suffix/outfix engines. */
u32 activeLeafArray;
// In memory, we follow this with:
// 1a. anchored 'literal' matcher table
// 1b. floating literal matcher table
-// 1c. sidecar 'literal' matcher table
-// 1d. eod-anchored literal matcher table
-// 1e. small block table
+// 1c. eod-anchored literal matcher table
+// 1d. small block table
// 2. array of RoseLiteral (literalCount entries)
// 3. array of RoseRole (roleCount entries)
// 4. array of RosePred (predCount entries)
u8 noFloatingRoots; /* only need to run the anchored table if something
* matched in the anchored table */
u8 requiresEodCheck; /* stuff happens at eod time */
- u8 requiresEodSideCatchup; /* we need to do a sidecar catchup before eod
- * checks */
u8 hasEodEventLiteral; // fires a ROSE_EVENT literal at eod time.
u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even
in small block scans. */
u32 amatcherOffset; // offset of the anchored literal matcher (bytes)
u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes)
u32 fmatcherOffset; // offset of the floating literal matcher (bytes)
- u32 smatcherOffset; // offset of the sidecar literal matcher (bytes)
u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes)
u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern
* involved with the anchored table to produce a full
u32 intReportCount; /**< number of internal_report structures */
u32 literalOffset; // offset of RoseLiteral array (bytes)
u32 literalCount; // number of RoseLiteral entries [NOT number of literals]
- u32 sideOffset; /**< offset of RoseSide array (bytes), indexed by
- *sidecar ids */
- u32 sideCount; /**< number of RoseSide entries */
u32 multidirectOffset; /**< offset of multi-direct report list. */
u32 activeArrayCount; //number of nfas tracked in the active array
u32 activeLeftCount; //number of nfas tracked in the active rose array
struct RoseBoundaryReports boundary;
u32 totalNumLiterals; /* total number of literals including dr */
u32 asize; /* size of the atable */
- u32 initSideEnableOffset; /* sidecar literals enabled initially */
u32 outfixBeginQueue; /* first outfix queue */
u32 outfixEndQueue; /* one past the last outfix queue */
u32 leftfixBeginQueue; /* first prefix/infix queue */
return (const struct HWLM *)lt;
}
-static really_inline
-const void *getSLiteralMatcher(const struct RoseEngine *t) {
- if (!t->smatcherOffset) {
- return NULL;
- }
-
- const char *st = (const char *)t + t->smatcherOffset;
- assert(ISALIGNED_N(st, 8));
- return st;
-}
-
static really_inline
const void *getELiteralMatcher(const struct RoseEngine *t) {
if (!t->ematcherOffset) {
return tl;
}
-static really_inline
-const struct RoseSide *getSideEntryTable(const struct RoseEngine *t) {
- const struct RoseSide *rs
- = (const struct RoseSide *)((const char *)t + t->sideOffset);
- assert(ISALIGNED(rs));
- return rs;
-}
-
static really_inline
const struct RoseRole *getRoleTable(const struct RoseEngine *t) {
const struct RoseRole *r
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_SIDECAR_RUNTIME_H_1F746F6F237176
-#define ROSE_SIDECAR_RUNTIME_H_1F746F6F237176
-
-#include "hwlm/hwlm.h"
-#include "scratch.h"
-#include "sidecar/sidecar.h"
-#include "rose_common.h"
-#include "ue2common.h"
-
-// Callback defined in match.c
-void roseSidecarCallback(u64a offset, u32 side_id, void *context);
-
-static really_inline
-void catchup_sidecar(struct RoseContext *tctxt, u64a end) {
- DEBUG_PRINTF("catching up the sidecar from %llu to %llu\n",
- tctxt->side_curr, end);
- const struct sidecar *sidecar = getSLiteralMatcher(tctxt->t);
- struct hs_scratch *scratch = tctxtToScratch(tctxt);
- struct core_info *ci = &scratch->core_info;
-
- if (!sidecar || tctxt->side_curr == end) {
- return;
- }
-
- const u8 *start;
- if (tctxt->side_curr >= ci->buf_offset) {
- start = ci->buf + tctxt->side_curr - ci->buf_offset;
- assert(end <= ci->buf_offset + ci->len);
- } else {
- /* at eod time we are called running over the histroy */
- start = ci->hbuf + tctxt->side_curr - ci->buf_offset + ci->hlen;
- assert(end <= ci->buf_offset);
- }
- size_t len = end - tctxt->side_curr;
-
- DEBUG_PRINTF("enabled-->%02hhx\n", *(u8 *)&scratch->side_enabled.arb);
- sidecarExec(sidecar, start, len, &scratch->side_enabled.arb,
- scratch->side_scratch, tctxt->side_curr, roseSidecarCallback,
- tctxt);
- tctxt->side_curr = end;
-
- DEBUG_PRINTF("finished catching up the sidecar to %llu\n", end);
-}
-
-static rose_inline
-void enable_sidecar(struct RoseContext *tctxt, const struct RoseRole *tr) {
- assert(tr->sidecarEnableOffset);
- const struct sidecar *sidecar = getSLiteralMatcher(tctxt->t);
- assert(sidecar);
- struct hs_scratch *scratch = tctxtToScratch(tctxt);
- DEBUG_PRINTF("welcome to the sidecar\n");
- sidecarEnabledUnion(sidecar, &scratch->side_enabled.arb,
- (const void *)((const char *)tctxt->t + tr->sidecarEnableOffset));
-}
-
-static really_inline
-void sidecar_enabled_populate(const struct RoseEngine *t,
- struct hs_scratch *scratch, const u8 *state) {
- DEBUG_PRINTF("enabled-->%02hhx\n", *(state + t->stateOffsets.sidecar));
- memcpy(&scratch->side_enabled, state + t->stateOffsets.sidecar,
- t->stateOffsets.sidecar_size);
- DEBUG_PRINTF("enabled-->%02hhx\n", *(u8 *)&scratch->side_enabled.arb);
-}
-
-static really_inline
-void sidecar_enabled_preserve(const struct RoseEngine *t,
- const struct hs_scratch *scratch, u8 *state) {
- memcpy(state + t->stateOffsets.sidecar, &scratch->side_enabled,
- t->stateOffsets.sidecar_size);
-}
-
-
-#endif /* ROSE_SIDECAR_RUNTIME_H_1F746F6F237176 */
#include "nfa/nfa_api_queue.h"
#include "nfa/nfa_internal.h"
#include "util/fatbit.h"
-#include "rose_sidecar_runtime.h"
#include "rose.h"
static rose_inline
roseCatchUpLeftfixes(t, state, scratch);
roseFlushLastByteHistory(t, state, offset + length, tctxt);
tctxt->lastEndOffset = offset + length;
- catchup_sidecar(tctxt, offset + length);
- sidecar_enabled_preserve(t, scratch, state);
storeGroups(t, state, tctxt->groups);
struct RoseRuntimeState *rstate = getRuntimeState(state);
rstate->stored_depth = tctxt->depth;
tctxt->next_mpv_offset = 0;
tctxt->curr_anchored_loc = MMB_INVALID;
tctxt->curr_row_offset = 0;
- tctxt->side_curr = offset;
-
DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu\n",
scratch->core_info.hlen, scratch->core_info.len);
streamInitSufPQ(t, state, scratch);
}
- sidecar_enabled_populate(t, scratch, state);
-
u8 delay_rb_status = rstate->flags;
u32 alen = t->anchoredDistance > offset ?
#include "database.h"
#include "nfa/limex_context.h" // for NFAContext128 etc
#include "nfa/nfa_api_queue.h"
-#include "sidecar/sidecar.h"
#include "rose/rose_internal.h"
#include "util/fatbit.h"
#include "util/multibit.h"
+ som_store_size
+ som_now_size
+ som_attempted_size
- + som_attempted_store_size
- + proto->sideScratchSize + 15;
+ + som_attempted_store_size + 15;
/* the struct plus the allocated stuff plus padding for cacheline
* alignment */
s->som_attempted_set = (struct fatbit *)current;
current += som_attempted_size;
- current = ROUNDUP_PTR(current, 16);
- s->side_scratch = (void *)current;
- current += proto->sideScratchSize;
-
current = ROUNDUP_PTR(current, 64);
assert(ISALIGNED_CL(current));
s->fullState = (char *)current;
proto->tStateSize = rose->tStateSize;
}
- const struct sidecar *side = getSLiteralMatcher(rose);
- if (side && sidecarScratchSize(side) > proto->sideScratchSize) {
- resize = 1;
- proto->sideScratchSize = sidecarScratchSize(side);
- }
-
u32 som_store_count = rose->somLocationCount;
if (som_store_count > proto->som_store_count) {
resize = 1;
#include "ue2common.h"
#include "util/multibit_internal.h"
-#include "sidecar/sidecar_internal.h"
#include "rose/rose_types.h"
#ifdef __cplusplus
u32 filledDelayedSlots;
u32 curr_anchored_loc; /**< last read/written row */
u32 curr_row_offset; /**< last read/written entry */
- u64a side_curr; /**< current location of the sidecar scan (abs offset) */
u32 curr_qi; /**< currently executing main queue index during
* \ref nfaQueueExec */
};
u32 anchored_literal_count;
u32 delay_count;
u32 scratchSize;
- u32 sideScratchSize;
u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE];
u32 roleCount;
struct fatbit *handled_roles; /**< mmbit of ROLES (not states) already
u64a som_set_now_offset; /**< offset at which som_set_now represents */
u32 som_store_count;
struct mmbit_sparse_state sparse_iter_state[MAX_SPARSE_ITER_STATES];
- union sidecar_enabled_any ALIGN_CL_DIRECTIVE side_enabled;
- struct sidecar_scratch *side_scratch;
};
static really_inline
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "sidecar.h"
-#include "sidecar_internal.h"
-#include "sidecar_shufti.h"
-#include "ue2common.h"
-#include "nfa/vermicelli.h"
-#include "util/bitutils.h"
-#include "util/uniform_ops.h"
-
-static really_inline
-u32 findAndClearLSB_8(u8 *v) {
- u32 t = *v;
- u32 rv = findAndClearLSB_32(&t);
- *v = t;
- return rv;
-}
-
-static really_inline
-u32 findAndClearLSB_128(m128 *v) {
- union {
- u32 words[sizeof(m128)/sizeof(u32)];
- m128 simd;
- } s;
- s.simd = *v;
- u32 rv = 0;
- for (u32 i = 0; i < ARRAY_LENGTH(s.words); i++) {
- u32 *w = &s.words[i];
- if (*w) {
- rv = findAndClearLSB_32(w) + 32 * i;
- break;
- }
- }
-
- *v = s.simd;
- return rv;
-}
-
-static never_inline
-u32 findAndClearLSB_256(m256 *v) {
- union {
- u32 words[sizeof(m256)/sizeof(u32)];
- m256 simd;
- } s;
- s.simd = *v;
- u32 rv = 0;
- for (u32 i = 0; i < ARRAY_LENGTH(s.words); i++) {
- u32 *w = &s.words[i];
- if (*w) {
- rv = findAndClearLSB_32(w) + 32 * i;
- break;
- }
- }
-
- *v = s.simd;
- return rv;
-}
-
-#define DO_DEAD_CHECK 1
-
-#define TAG 8
-#define STATE_T u8
-#include "sidecar_generic.h"
-
-#define TAG 32
-#define STATE_T u32
-#include "sidecar_generic.h"
-
-#define TAG 64
-#define STATE_T u64a
-#include "sidecar_generic.h"
-
-#define TAG 128
-#define STATE_T m128
-#include "sidecar_generic.h"
-
-#define TAG 256
-#define STATE_T m256
-#include "sidecar_generic.h"
-
-
-static never_inline
-void sidecarExec_N(const struct sidecar_N *n, const u8 *b, size_t len,
- struct sidecar_enabled_N *enabled,
- UNUSED struct sidecar_scratch *scratch,
- u64a base_offset, SidecarCallback cb, void *context) {
- DEBUG_PRINTF("N: %hhu %hhu nc %hhu\n", n->c, b[0], n->nocase);
- if (!enabled->bits) {
- return;
- }
-
- const u8 *loc = vermicelliExec(n->c, n->nocase, b, b + len);
-
- if (loc == b + len) {
- return;
- }
-
- enabled->bits = 0;
- for (u32 i = 0; i < n->report_count; i++) {
- cb(loc - b + base_offset, n->reports[i], context);
- }
-}
-
-static really_inline
-void sidecarEnabledInit_N(struct sidecar_enabled *enabled) {
- struct sidecar_enabled_N *e = (void *)enabled;
- e->bits = 0;
-}
-
-static really_inline
-void sidecarExec_i_S(UNUSED const struct sidecar_S *n,
- UNUSED const u8 *b, UNUSED size_t len,
- UNUSED struct sidecar_enabled_S *enabled,
- UNUSED u64a base_offset, UNUSED SidecarCallback cb,
- UNUSED void *context) {
- if (!enabled->bits) {
- DEBUG_PRINTF("bail early, bail often\n");
- return;
- }
-
- u8 state;
- if (len >= 16) {
- state = sidecarExec_S_int(n, b, len, enabled->bits);
- } else {
- const u8 *lo = (const u8 *)&n->lo;
- const u8 *hi = (const u8 *)&n->hi;
- state = enabled->bits;
- for (u32 i = 0; i < len; i++) {
- u8 c = b[i];
- state &= lo[c & 0xf] | hi[c >> 4];
- }
- }
-
- state = ~state & enabled->bits;
- if (!state) {
- DEBUG_PRINTF("bail\n");
- return;
- }
-
- enabled->bits &= ~state;
- DEBUG_PRINTF("s = %02hhx e = %02hhx\n", state, enabled->bits);
- u8 unshared = n->unshared_mask;
- const u8 *masks = sidecar_ids_to_mask_const(n);
- const struct sidecar_id_offset *id_map = n->id_list;
- while (state) {
- u32 bit = findAndClearLSB_8(&state);
- DEBUG_PRINTF("found bit %u\n", bit);
- const u32 *id_base = (const u32 *)((const char *)n
- + id_map[bit].first_offset);
- u32 count = id_map[bit].count;
- for (u32 i = 0; i < count; ++i) {
- DEBUG_PRINTF("firing %u\n", id_base[i]);
- cb(base_offset, id_base[i], context);
- enabled->bits &= ~(masks[id_base[i]] & unshared);
- }
- }
- DEBUG_PRINTF("s = %02hhx e = %02hhx\n", state, enabled->bits);
-}
-
-static really_inline
-void sidecarEnabledInit_S(struct sidecar_enabled *enabled) {
- struct sidecar_enabled_S *e = (void *)enabled;
- e->bits = 0;
-}
-
-static never_inline
-void sidecarExec_S(const struct sidecar_S *n, const u8 *b, size_t len,
- struct sidecar_enabled_S *enabled,
- UNUSED struct sidecar_scratch *scratch,
- u64a base_offset, SidecarCallback cb, void *context) {
- if (len > 1) {
- sidecarExec_i_S(n, b + 1, len - 1, enabled, base_offset + 1, cb,
- context);
- }
-
- u8 bits = enabled->bits; /* first byte doesn't change enabled */
- sidecarExec_i_S(n, b, 1, enabled, base_offset, cb, context);
- enabled->bits = bits;
-}
-
-void sidecarExec(const struct sidecar *n, const u8 *buffer, size_t len,
- struct sidecar_enabled *enabled,
- UNUSED struct sidecar_scratch *scratch, u64a base_offset,
- SidecarCallback cb, void *ctxt) {
- assert(n);
- assert(enabled);
- assert(len);
-
- assert(ISALIGNED_N(n, 16));
- assert(ISALIGNED_N(scratch, 16));
-
- if (!len) {
- return;
- }
-
-#define EXEC_CASE(tag) \
- case SIDECAR_##tag: \
- sidecarExec_##tag((const struct sidecar_##tag *)n, buffer, len, \
- (struct sidecar_enabled_##tag *)enabled, scratch, \
- base_offset, cb, ctxt); \
- break;
-
- switch(n->type) {
- EXEC_CASE(8)
- EXEC_CASE(32)
- EXEC_CASE(64)
- EXEC_CASE(128)
- EXEC_CASE(256)
- EXEC_CASE(N)
- EXEC_CASE(S)
- default:
- assert(0);
- }
-
-#undef EXEC_CASE
-}
-
-void sidecarEnabledInit(const struct sidecar *n,
- struct sidecar_enabled *enabled) {
- switch(n->type) {
- case SIDECAR_8:
- sidecarEnabledInit_8(enabled);
- break;
- case SIDECAR_32:
- sidecarEnabledInit_32(enabled);
- break;
- case SIDECAR_64:
- sidecarEnabledInit_64(enabled);
- break;
- case SIDECAR_128:
- sidecarEnabledInit_128(enabled);
- break;
- case SIDECAR_256:
- sidecarEnabledInit_256(enabled);
- break;
- case SIDECAR_N:
- sidecarEnabledInit_N(enabled);
- break;
- case SIDECAR_S:
- sidecarEnabledInit_S(enabled);
- break;
- default:
- assert(0);
- }
-}
-
-u32 sidecarScratchSize(const struct sidecar *n) {
- u32 width;
-
- switch(n->type) {
- case SIDECAR_8:
- width = sizeof(struct sidecar_mr_8);
- break;
- case SIDECAR_32:
- width = sizeof(struct sidecar_mr_32);
- break;
- case SIDECAR_64:
- width = sizeof(struct sidecar_mr_64);
- break;
- case SIDECAR_128:
- width = sizeof(struct sidecar_mr_128);
- break;
- case SIDECAR_256:
- width = sizeof(struct sidecar_mr_256);
- break;
- case SIDECAR_N:
- return 0; /* no scratch required for N */
- case SIDECAR_S:
- width = sizeof(struct sidecar_mr_8);
- break;
- default:
- assert(0);
- return 0;
- }
-
- /* + 1, for first byte offset */
- return width * (n->mask_bit_count + 1);
-}
-
-static really_inline
-void sidecarEnabledUnion_N(struct sidecar_enabled *dest,
- const struct sidecar_enabled *src) {
- struct sidecar_enabled_N *d = (void *)dest;
- const struct sidecar_enabled_N *s = (const void *)src;
- d->bits |= s->bits;
-}
-
-static really_inline
-void sidecarEnabledUnion_S(struct sidecar_enabled *dest,
- const struct sidecar_enabled *src) {
- struct sidecar_enabled_S *d = (void *)dest;
- const struct sidecar_enabled_S *s = (const void *)src;
- d->bits |= s->bits;
-}
-
-void sidecarEnabledUnion(const struct sidecar *n, struct sidecar_enabled *dest,
- const struct sidecar_enabled *src) {
- switch(n->type) {
- case SIDECAR_8:
- sidecarEnabledUnion_8(dest, src);
- break;
- case SIDECAR_32:
- sidecarEnabledUnion_32(dest, src);
- break;
- case SIDECAR_64:
- sidecarEnabledUnion_64(dest, src);
- break;
- case SIDECAR_128:
- sidecarEnabledUnion_128(dest, src);
- break;
- case SIDECAR_256:
- sidecarEnabledUnion_256(dest, src);
- break;
- case SIDECAR_N:
- sidecarEnabledUnion_N(dest, src);
- break;
- case SIDECAR_S:
- sidecarEnabledUnion_S(dest, src);
- break;
- default:
- assert(0);
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef SIDECAR_H
-#define SIDECAR_H
-
-#include "ue2common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct sidecar;
-struct sidecar_enabled;
-struct sidecar_scratch;
-
-/*
- * Sidecar is guaranteed to return the first match of a given id. However, in
- * various cases later matches may also be returned, as may matches for disabled
- * ids
- */
-typedef void (*SidecarCallback)(u64a offset, u32 id, void *context);
-
-void sidecarExec(const struct sidecar *n, const u8 *buffer, size_t len,
- struct sidecar_enabled *enabled,
- struct sidecar_scratch *sidecar_scratch,
- u64a base_offset, SidecarCallback cb, void *context);
-
-u32 sidecarScratchSize(const struct sidecar *n);
-
-void sidecarEnabledInit(const struct sidecar *n,
- struct sidecar_enabled *enabled);
-
-/* Note: sidecar literals need to be reenabled after they match.
- * This is purely because this behaviour is handy for rose.
- * In rose, they always set their roles when fired (never have to postpone due
- * to history) and if cleared their preds are also cleared so a pred would also
- * have to match again before we need to care about them again
- */
-void sidecarEnabledUnion(const struct sidecar *n, struct sidecar_enabled *dest,
- const struct sidecar_enabled *src);
-
-#define ID_TERMINATOR (~0U)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "sidecar_compile.h"
-#include "sidecar_internal.h"
-#include "ue2common.h"
-#include "nfa/shufticompile.h"
-#include "util/alloc.h"
-#include "util/charreach.h"
-#include "util/simd_utils.h"
-#include "util/verify_types.h"
-
-#include <array>
-#include <map>
-#include <set>
-
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
-using boost::adaptors::map_keys;
-
-namespace ue2 {
-
-static
-void prune(array<set<u32>, N_CHARS> &by_char, u32 p,
- map<CharReach, set<u32>> *impl_classes) {
- CharReach cr;
- assert(!by_char[p].empty());
-
- for (u32 i = 0; i < N_CHARS; i++) {
- if (by_char[i] == by_char[p]) {
- cr.set(i);
- }
- }
-
- assert(impl_classes->find(cr) == impl_classes->end());
- (*impl_classes)[cr] = by_char[p];
-
- for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
- by_char[i].clear();
- }
-
-}
-
-static really_inline
-void set_bit(u8 *a, size_t i) {
- assert(i < 8);
- *a |= 1U << i;
-}
-
-static really_inline
-void set_bit(u32 *a, size_t i) {
- assert(i < 32);
- *a |= 1U << i;
-}
-
-static really_inline
-void set_bit(u64a *a, size_t i) {
- assert(i < 64);
- *a |= 1ULL << i;
-}
-
-static really_inline
-void set_bit(m128 *a, size_t i) {
- setbit128(a, i);
-}
-
-static really_inline
-void set_bit(m256 *a, size_t i) {
- setbit256(a, i);
-}
-
-template<typename s>
-static really_inline
-void flip(s *v) {
- *v = ~*v;
-}
-
-static really_inline
-void flip(m128 *v) {
- *v = not128(*v);
-}
-
-static really_inline
-void flip(m256 *v) {
- *v = not256(*v);
-}
-
-template<typename s>
-static really_inline
-void or_into_mask(s *a, const s b) {
- *a |= b;
-}
-
-static really_inline
-void or_into_mask(m128 *a, const m128 b) {
- *a = or128(*a, b);
-}
-
-static really_inline
-void or_into_mask(m256 *a, const m256 b) {
- *a = or256(*a, b);
-}
-
-template<u8 s_type> struct sidecar_traits { };
-#define MAKE_TRAITS(type_id, base_type_in, mask_bits) \
- template<> struct sidecar_traits<type_id> { \
- typedef base_type_in base_type; \
- static const u32 bits = mask_bits; \
- typedef sidecar_##mask_bits impl_type; \
- typedef sidecar_enabled_##mask_bits enabled_type; \
- };
-
-MAKE_TRAITS(SIDECAR_8, u8, 8)
-MAKE_TRAITS(SIDECAR_32, u32, 32)
-MAKE_TRAITS(SIDECAR_64, u64a, 64)
-MAKE_TRAITS(SIDECAR_128, m128, 128)
-MAKE_TRAITS(SIDECAR_256, m256, 256)
-
-template<> struct sidecar_traits<SIDECAR_N> {
- typedef sidecar_N impl_type;
-};
-
-template<> struct sidecar_traits<SIDECAR_S> {
- typedef u8 base_type;
- typedef sidecar_S impl_type;
-};
-
-/* builds the main char reach table */
-template <u8 s_type>
-static
-void populateTable(const map<CharReach, set<u32>> &impl_classes,
- typename sidecar_traits<s_type>::impl_type *ns) {
- assert(impl_classes.size()
- <= sizeof(typename sidecar_traits<s_type>::base_type) * 8);
-
- u32 b = 0;
- for (const CharReach &cr : impl_classes | map_keys) {
- for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
- set_bit(&ns->reach[i], b);
- }
- b++;
- }
-
- for (u32 i = 0; i < N_CHARS; i++) {
- flip(&ns->reach[i]);
- }
-}
-
-/* builds the table controlling which bits in the mask to turn on for each
- * external id */
-template <u8 s_type>
-static
-void populateIdMasks(const map<CharReach, set<u32>> &impl_classes,
- typename sidecar_traits<s_type>::impl_type *ns) {
- typedef typename sidecar_traits<s_type>::base_type base;
- base *table = (base *)((char *)ns + sizeof(*ns));
- u32 b = 0;
- for (const set<u32> &id_list : impl_classes | map_values) {
- for (const u32 id : id_list) {
- set_bit(&table[id], b);
- }
- if (id_list.size() == 1) {
- set_bit(&ns->unshared_mask, b);
- }
- b++;
- }
-}
-
-/* builds the lists of ids to report for each set bit */
-template <u8 s_type>
-static
-void populateMaskInfo(const map<CharReach, set<u32>> &impl_classes,
- u32 num_ext_classes,
- typename sidecar_traits<s_type>::impl_type *ns,
- sidecar_id_offset *mask_info) {
- typedef typename sidecar_traits<s_type>::base_type base;
-
- u32 *curr_ptr = (u32 *)((char *)ns + sizeof(*ns)
- + sizeof(base) * num_ext_classes);
- curr_ptr = ROUNDUP_PTR(curr_ptr, sizeof(u32));
-
- u32 b = 0;
- for (const set<u32> &id_list : impl_classes | map_values) {
- mask_info[b].first_offset = verify_u32((char *)curr_ptr - (char *)ns);
- mask_info[b].count = verify_u32(id_list.size());
- for (const u32 id : id_list) {
- *curr_ptr = id;
- curr_ptr++;
- }
- b++;
- }
-}
-
-static
-size_t calcIdListSize(const map<CharReach, set<u32>> &impl_classes) {
- size_t id_count = 0;
- for (const auto &id_list : impl_classes | map_values) {
- id_count += id_list.size();
- }
-
- return id_count * sizeof(u32);
-}
-
-template<u8 s_type>
-static
-aligned_unique_ptr<sidecar> construct(const vector<CharReach> &ext_classes,
- const map<CharReach, set<u32> > &impl_classes_in,
- bool allow_collapse) {
- if (impl_classes_in.size() > sidecar_traits<s_type>::bits) {
- return nullptr;
- }
-
- map<CharReach, set<u32>> impl_classes_loc;
- const map<CharReach, set<u32>> *impl_classes;
-
- if (ext_classes.size() <= sidecar_traits<s_type>::bits) {
- /* we can directly map internal bits to external ids; no need for
- * indirection */
- for (u32 i = 0; i < ext_classes.size(); i++) {
- impl_classes_loc[ext_classes[i]].insert(i);
- }
-
- impl_classes = &impl_classes_loc;
- } else {
- /* TODO: spread classes out if possible */
- if (!allow_collapse) {
- return nullptr;
- }
- impl_classes = &impl_classes_in;
- }
-
- typedef typename sidecar_traits<s_type>::base_type base;
- typedef typename sidecar_traits<s_type>::impl_type impl;
-
- u32 id_count = verify_u32(ext_classes.size());
- size_t total_id_list_size = calcIdListSize(*impl_classes);
- size_t size = sizeof(impl) + id_count * sizeof(base); /* ids -> masks */
- size = ROUNDUP_N(size, sizeof(u32));
- size += total_id_list_size;
- DEBUG_PRINTF("allocated %zu\n", size);
-
- auto s = aligned_zmalloc_unique<sidecar>(size);
- assert(s); // otherwise we would have thrown std::bad_alloc
- impl *ns = (impl *)(s.get());
-
- ns->header.type = s_type;
- ns->header.size = size;
- ns->header.id_count = id_count;
- ns->header.mask_bit_count = verify_u32(impl_classes->size());
-
- populateTable<s_type>(*impl_classes, ns);
- populateIdMasks<s_type>(*impl_classes, ns);
- populateMaskInfo<s_type>(*impl_classes, id_count, ns, ns->id_list);
-
- return s;
-}
-
-static
-bool isNoodable(const CharReach &cr) {
- return cr.count() == 1 || (cr.count() == 2 && cr.isBit5Insensitive());
-}
-
-template <>
-aligned_unique_ptr<sidecar>
-construct<SIDECAR_N>(const vector<CharReach> &ext_classes,
- const map<CharReach, set<u32>> &impl_classes,
- bool) {
- if (impl_classes.size() != 1 || !isNoodable(impl_classes.begin()->first)) {
- return nullptr;
- }
-
- const CharReach &cr = impl_classes.begin()->first;
- const set<u32> &reports = impl_classes.begin()->second;
-
- u32 id_count = verify_u32(ext_classes.size());
- size_t size = sizeof(sidecar_N) + sizeof(u32) * reports.size();
- DEBUG_PRINTF("allocated %zu\n", size);
-
- auto s = aligned_zmalloc_unique<sidecar>(size);
- assert(s); // otherwise we would have thrown std::bad_alloc
- sidecar_N *ns = (sidecar_N *)(s.get());
-
- ns->header.type = SIDECAR_N;
- ns->header.size = size;
- ns->header.id_count = id_count;
- ns->header.mask_bit_count = verify_u32(impl_classes.size());
-
- ns->c = cr.find_first();
- ns->nocase = cr.isBit5Insensitive();
-
- ns->report_count = verify_u32(reports.size());
- u32 *p = ns->reports;
- for (u32 report : reports) {
- *p = report;
- }
-
- return s;
-}
-
-static
-void flipShuftiMask(m128 *a) {
- *a = not128(*a);
-}
-
-template <>
-aligned_unique_ptr<sidecar>
-construct<SIDECAR_S>(const vector<CharReach> &ext_classes,
- const map<CharReach, set<u32>> &impl_classes,
- bool) {
- u32 id_count = verify_u32(ext_classes.size());
- size_t total_id_list_size = calcIdListSize(impl_classes);
- size_t size = sizeof(sidecar_S)
- + id_count * sizeof(u8); /* ids -> masks */
- size = ROUNDUP_N(size, sizeof(u32));
- size += total_id_list_size;
- DEBUG_PRINTF("allocated %zu\n", size);
-
- auto s = aligned_zmalloc_unique<sidecar>(size);
- assert(s); // otherwise we would have thrown std::bad_alloc
- sidecar_S *ns = (sidecar_S *)(s.get());
-
- ns->header.type = SIDECAR_S;
- ns->header.size = size;
- ns->header.id_count = id_count;
-
- vector<const CharReach *> shuf_bit_to_impl;
-
- /* populate the shufti masks */
- u32 used_bits = 0;
- for (const CharReach &cr : impl_classes | map_keys) {
- m128 lo, hi;
- int bits = shuftiBuildMasks(cr, &lo, &hi);
-
- if (bits < 0 || used_bits + bits > 8) {
- return nullptr;
- }
-
- mergeShuftiMask(&ns->lo, lo, used_bits);
- mergeShuftiMask(&ns->hi, hi, used_bits);
- for (u32 i = used_bits; i < used_bits + bits; i++) {
- shuf_bit_to_impl.push_back(&cr);
- }
- used_bits += bits;
- }
-
- flipShuftiMask(&ns->lo); /* we are shift-or around here */
- flipShuftiMask(&ns->hi);
- ns->header.mask_bit_count = used_bits;
-
- /* populate the enable masks */
- u8 *table = (u8 *)((char *)ns + sizeof(*ns));
- u32 b = 0;
- for (const CharReach *cr : shuf_bit_to_impl) {
- const set<u32> &rep_set = impl_classes.find(*cr)->second;
- for (u32 report : rep_set) {
- set_bit(&table[report], b);
- }
- if (rep_set.size() == 1) {
- set_bit(&ns->unshared_mask, b);
- }
- b++;
- }
-
- /* populate the report id masks */
- sidecar_id_offset temp_id_list[8];
- populateMaskInfo<SIDECAR_S>(impl_classes, id_count, ns, temp_id_list);
-
- u32 i = 0, j = 0;
- auto iit = impl_classes.begin();
- while (i < shuf_bit_to_impl.size()) {
- assert(iit != impl_classes.end());
- if (shuf_bit_to_impl[i] == &iit->first) {
- ns->id_list[i] = temp_id_list[j];
- i++;
- } else {
- j++;
- ++iit;
- }
- }
-
- return s;
-}
-
-static
-aligned_unique_ptr<sidecar>
-constructWithHint(int hint, const vector<CharReach> &classes,
- const map<CharReach, set<u32>> &impl_classes) {
- switch (hint) {
- case SIDECAR_8:
- return construct<SIDECAR_8>(classes, impl_classes, true);
- case SIDECAR_32:
- return construct<SIDECAR_32>(classes, impl_classes, true);
- case SIDECAR_64:
- return construct<SIDECAR_64>(classes, impl_classes, true);
- case SIDECAR_128:
- return construct<SIDECAR_128>(classes, impl_classes, true);
- case SIDECAR_256:
- return construct<SIDECAR_256>(classes, impl_classes, true);
- case SIDECAR_N:
- return construct<SIDECAR_N>(classes, impl_classes, true);
- case SIDECAR_S:
- return construct<SIDECAR_S>(classes, impl_classes, true);
- default:
- DEBUG_PRINTF("derp\n");
- assert(0);
- return nullptr;
- }
-}
-
-aligned_unique_ptr<sidecar> sidecarCompile(const vector<CharReach> &classes,
- int hint) {
- array<set<u32>, N_CHARS> by_char;
-
- for (u32 i = 0; i < classes.size(); i++) {
- const CharReach &cr = classes[i];
- for (size_t j = cr.find_first(); j != cr.npos; j = cr.find_next(j)) {
- by_char[j].insert(i);
- }
- }
-
- map<CharReach, set<u32>> impl_classes;
-
- bool changed;
- do {
- changed = false;
- u32 smallest = N_CHARS;
- for (u32 i = 0; i < N_CHARS; i++) {
- if (by_char[i].empty()) {
- continue;
- }
-
- if (by_char[i].size() == 1) {
- prune(by_char, i, &impl_classes);
- changed = true;
- } else if (smallest == N_CHARS ||
- by_char[i].size() < by_char[smallest].size()) {
- smallest = i;
- }
- }
-
- if (!changed && smallest != N_CHARS) {
- prune(by_char, smallest, &impl_classes);
- changed = true;
- }
- } while (changed);
-
- DEBUG_PRINTF("matching %zu classes; %zu impl classes\n", classes.size(),
- impl_classes.size());
- assert(impl_classes.size() <= N_CHARS);
-
- if (hint != SIDECAR_NO_HINT) {
- return constructWithHint(hint, classes, impl_classes);
- }
-
- aligned_unique_ptr<sidecar> (*facts[])(const vector<CharReach> &,
- const map<CharReach, set<u32> > &, bool) = {
- construct<SIDECAR_N>,
- // construct<SIDECAR_S>, TODO: first offset stuff for S
- construct<SIDECAR_8>,
- construct<SIDECAR_32>,
- construct<SIDECAR_64>,
- construct<SIDECAR_128>,
- construct<SIDECAR_256>
- };
-
- for (u32 i = 0; i < ARRAY_LENGTH(facts); i++) {
- auto sc = facts[i](classes, impl_classes, false);
- if (sc) {
- return sc;
- }
- }
-
- for (u32 i = 0; i < ARRAY_LENGTH(facts); i++) {
- auto sc = facts[i](classes, impl_classes, true);
- if (sc) {
- return sc;
- }
- }
-
- return nullptr;
-}
-
-u32 sidecarSize(const sidecar *ns) {
- return ns->size;
-}
-
-u32 sidecarEnabledSize(const sidecar *n) {
- switch (n->type) {
- case SIDECAR_8:
- return sizeof(struct sidecar_enabled_8);
- case SIDECAR_32:
- return sizeof(struct sidecar_enabled_32);
- case SIDECAR_64:
- return sizeof(struct sidecar_enabled_64);
- case SIDECAR_128:
- return sizeof(struct sidecar_enabled_128);
- case SIDECAR_256:
- return sizeof(struct sidecar_enabled_256);
- case SIDECAR_N:
- return sizeof(struct sidecar_enabled_N);
- case SIDECAR_S:
- return sizeof(struct sidecar_enabled_S);
- default:
- assert(0);
- }
- return 0;
-}
-
-template<u8 s_type>
-static
-void sidecarEnabledAdd_int(const sidecar *nn, struct sidecar_enabled *enabled,
- u32 id) {
- typedef typename sidecar_traits<s_type>::enabled_type e_type;
- typedef typename sidecar_traits<s_type>::impl_type n_type;
- e_type *e = (e_type *)enabled;
- const n_type *n = (const n_type *)nn;
-
- DEBUG_PRINTF("enabling %u\n", id);
- typedef typename sidecar_traits<s_type>::base_type base;
- const base *masks = (const base *)sidecar_ids_to_mask_const(n);
- or_into_mask(&e->bits, masks[id]);
-}
-
-template<>
-void sidecarEnabledAdd_int<SIDECAR_S>(const sidecar *nn,
- sidecar_enabled *enabled, u32 id) {
- const sidecar_S *n = (const sidecar_S *)nn;
- sidecar_enabled_S *e = (sidecar_enabled_S *)enabled;
- const u8 *masks = (const u8 *)sidecar_ids_to_mask_const(n);
- e->bits |= masks[id];
-}
-
-template<>
-void sidecarEnabledAdd_int<SIDECAR_N>(UNUSED const sidecar *n,
- struct sidecar_enabled *enabled,
- UNUSED u32 id) {
- sidecar_enabled_N *e = (sidecar_enabled_N *)enabled;
- /* assuming we are not being called by a complete idiot, there is only one
- * thing we could be asked to do here */
- e->bits = 1;
-}
-
-void sidecarEnabledAdd(const sidecar *n, struct sidecar_enabled *enabled,
- u32 id) {
- DEBUG_PRINTF("enabling %hhu:%u\n", n->type, id);
- switch (n->type) {
- case SIDECAR_8:
- sidecarEnabledAdd_int<SIDECAR_8>(n, enabled, id);
- break;
- case SIDECAR_32:
- sidecarEnabledAdd_int<SIDECAR_32>(n, enabled, id);
- break;
- case SIDECAR_64:
- sidecarEnabledAdd_int<SIDECAR_64>(n, enabled, id);
- break;
- case SIDECAR_128:
- sidecarEnabledAdd_int<SIDECAR_128>(n, enabled, id);
- break;
- case SIDECAR_256:
- sidecarEnabledAdd_int<SIDECAR_256>(n, enabled, id);
- break;
- case SIDECAR_N:
- sidecarEnabledAdd_int<SIDECAR_N>(n, enabled, id);
- break;
- case SIDECAR_S:
- sidecarEnabledAdd_int<SIDECAR_S>(n, enabled, id);
- break;
- default:
- assert(0);
- }
-}
-
-} // namespace ue2
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef SIDECAR_COMPILE_H
-#define SIDECAR_COMPILE_H
-
-#include "ue2common.h"
-#include "util/alloc.h"
-
-#include <memory>
-#include <vector>
-
-struct sidecar;
-struct sidecar_enabled;
-
-namespace ue2 {
-
-class CharReach;
-
-#define SIDECAR_NO_HINT (-1)
-
-/*
- * match ids are given by position in the report_map vector
- */
-aligned_unique_ptr<sidecar>
-sidecarCompile(const std::vector<CharReach> &classes,
- int hint = SIDECAR_NO_HINT);
-
-u32 sidecarSize(const sidecar *ns);
-u32 sidecarEnabledSize(const sidecar *n);
-void sidecarEnabledAdd(const sidecar *n, struct sidecar_enabled *enabled,
- u32 id);
-
-} // namespace ue2
-
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#include "sidecar_dump.h"
-#include "sidecar_internal.h"
-#include "ue2common.h"
-
-#include <cstdio>
-
-#ifndef DUMP_SUPPORT
-#error No dump support!
-#endif
-
-namespace ue2 {
-
-static
-void dumpSideShuf(const sidecar_S *s, FILE *f) {
- fprintf(f, "lo:");
- for (u32 i = 0; i < 16; i++) {
- fprintf(f, " %02hhx", ((const u8 *)&s->lo)[i]);
- }
- fprintf(f, "\n");
-
- fprintf(f, "hi:");
- for (u32 i = 0; i < 16; i++) {
- fprintf(f, " %02hhx", ((const u8 *)&s->hi)[i]);
- }
- fprintf(f, "\n");
-
- const u8 *enables = (const u8 *)sidecar_ids_to_mask_const(s);
- fprintf(f, "shufti masks per id\n");
- for (u32 i = 0; i < s->header.id_count; i++) {
- fprintf(f, "%u: %02hhx\n", i, enables[i]);
- }
-}
-
-void sidecarDump(const sidecar *s, FILE *f) {
- const char *type = "?";
- switch(s->type) {
- case SIDECAR_8:
- type = "8";
- break;
- case SIDECAR_32:
- type = "32";
- break;
- case SIDECAR_64:
- type = "64";
- break;
- case SIDECAR_128:
- type = "128";
- break;
- case SIDECAR_256:
- type = "256";
- break;
- case SIDECAR_N:
- type = "N";
- break;
- case SIDECAR_S:
- type = "S";
- break;
- default:
- assert(0);
- }
-
- fprintf(f, "Sidecar: %s\n", type);
- fprintf(f, " size: %u\n", s->size);
- fprintf(f, " used bits: %u\n", s->mask_bit_count);
- fprintf(f, " ids: %u\n", s->id_count);
- if (s->type == SIDECAR_S) {
- dumpSideShuf((const sidecar_S *)s, f);
- }
-}
-
-} // namespace ue2
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef SIDECAR_DUMP_H
-#define SIDECAR_DUMP_H
-
-#if defined(DUMP_SUPPORT)
-
-#include <cstdio>
-
-struct sidecar;
-
-namespace ue2 {
-
-void sidecarDump(const sidecar *s, FILE *f);
-
-} // namespace ue2
-
-#endif
-
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* in param TAG, STATE_T */
-
-#include "util/join.h"
-
-#if TAG == 8
-#define ISTATE_T u32
-#else
-#define ISTATE_T STATE_T
-#endif
-
-#define EXEC_FN JOIN(sidecarExec_, TAG)
-#define EXEC_I_FN JOIN(sidecarExec_i_, TAG)
-#define ENABLED_INIT_FN JOIN(sidecarEnabledInit_, TAG)
-#define ENABLED_UNION_FN JOIN(sidecarEnabledUnion_, TAG)
-#define ENABLED_STRUCT JOIN(struct sidecar_enabled_, TAG)
-#define PLAY_CB_FB JOIN(sidecarPlayCallbacks_, STATE_T)
-#define SIDECAR_STRUCT JOIN(struct sidecar_, TAG)
-#define MR_STRUCT JOIN(struct sidecar_mr_, TAG)
-#define load_state JOIN(load_, STATE_T)
-#define store_state JOIN(store_, STATE_T)
-#define and_state JOIN(and_, STATE_T)
-#define iand_state JOIN(and_, ISTATE_T)
-#define andnot_state JOIN(andnot_, STATE_T)
-#define or_state JOIN(or_, STATE_T)
-#define is_zero JOIN(isZero_, STATE_T)
-#define iis_zero JOIN(isZero_, ISTATE_T)
-#define is_not_zero JOIN(isNonZero_, ISTATE_T)
-#define not_eq JOIN(noteq_, STATE_T)
-#define inot_eq JOIN(noteq_, ISTATE_T)
-#define find_and_clear_lsb JOIN(findAndClearLSB_, TAG)
-#define zero_state JOIN(zero_, ISTATE_T)
-
-#if TAG <= 64
-#define TDEBUG_PRINTF(...) DEBUG_PRINTF(__VA_ARGS__)
-#define ATDEBUG_PRINTF(...) ADEBUG_PRINTF(__VA_ARGS__)
-#else
-#define TDEBUG_PRINTF(...) do { } while(0)
-#define ATDEBUG_PRINTF(...) do { } while(0)
-#endif
-
-MR_STRUCT {
- const u8 *loc;
- STATE_T mask;
-};
-
-static really_inline
-void PLAY_CB_FB(const SIDECAR_STRUCT *n, const u8 *b, const MR_STRUCT *matches,
- u32 match_len, ENABLED_STRUCT *enabled, u64a base_offset,
- SidecarCallback cb, void *context) {
- const STATE_T *id_mask_map = sidecar_ids_to_mask_const(n);
- const struct sidecar_id_offset *id_map = n->id_list;
-
- STATE_T e_local = load_state(&enabled->bits);
-
- DEBUG_PRINTF("playing %u matches\n", match_len);
- TDEBUG_PRINTF("enabled %08llu\n", (u64a)enabled->bits);
-
- for (u32 i = 0; i < match_len; i++) {
- u64a offset = matches[i].loc - b + base_offset;
- DEBUG_PRINTF("match at %llu\n", offset);
-
- STATE_T local_m = andnot_state(load_state(&matches[i].mask), e_local);
-
- e_local = and_state(matches[i].mask, e_local);
-
- TDEBUG_PRINTF("%08llu=~%08llu^%08llu\n", (u64a)local_m,
- (u64a)matches[i].mask, (u64a)e_local);
-
- while (is_not_zero(local_m)) {
- u32 bit = find_and_clear_lsb(&local_m);
- DEBUG_PRINTF("bit %u at %llu\n", bit, offset);
- const u32 *id_base = (const u32 *)
- ((const char *)n + id_map[bit].first_offset);
- assert(ISALIGNED_N(id_base, 4));
- u32 count = id_map[bit].count;
- for (u32 j = 0; j < count; ++j) {
- cb(offset, id_base[j], context);
- STATE_T u_local = and_state(id_mask_map[id_base[j]],
- load_state(&n->unshared_mask));
- DEBUG_PRINTF("squashing unshared???\n");
- e_local = andnot_state(u_local, e_local);
- local_m = andnot_state(u_local, local_m);
- }
- }
- }
-
- TDEBUG_PRINTF("enabled %08llu\n", (u64a)e_local);
- store_state(&enabled->bits, e_local);
-}
-
-/* returns count of match locations */
-static really_inline
-MR_STRUCT *EXEC_I_FN(const SIDECAR_STRUCT *n, const u8 *b, const u8 *b_end,
- STATE_T state_in, MR_STRUCT *matches) {
- DEBUG_PRINTF("running over %zu\n", b_end - b);
- const STATE_T *table = (const STATE_T *)&n->reach;
- ISTATE_T s = state_in;
-
- b_end--; /* last byte is unrolled at end of function */
- for (; b < b_end; b++) {
- u8 c = *b;
- ISTATE_T r = table[c];
- ISTATE_T s1 = iand_state(s, r);
- if (inot_eq(s1, s)) {
- TDEBUG_PRINTF("recording match %08llu\n", (u64a)s1);
- matches->loc = b;
- store_state(&matches->mask, s1);
- matches++;
- if (DO_DEAD_CHECK && iis_zero(s1)) {
- goto done;
- }
- }
- s = s1;
- }
-
- /* do final byte by itself; gain blessing from the gcc gods */
- u8 c = *b;
- ISTATE_T r = table[c];
- ISTATE_T s1 = iand_state(s, r);
- if (inot_eq(s1, s)) {
- TDEBUG_PRINTF("recording match %08llu\n", (u64a)s1);
- matches->loc = b;
- matches->mask = s1;
- matches++;
- }
-
-done:
- return matches;
-}
-
-static never_inline
-void EXEC_FN(const SIDECAR_STRUCT *n, const u8 *b, size_t len,
- ENABLED_STRUCT *enabled, struct sidecar_scratch *scratch,
- u64a base_offset, SidecarCallback cb, void *context) {
- STATE_T e_local = load_state(&enabled->bits);
- if (is_zero(e_local)) {
- return;
- }
-
- MR_STRUCT *matches = (MR_STRUCT *)scratch;
- DEBUG_PRINTF("running sidecar over %zu len\n", len);
- DEBUG_PRINTF("enabled %p scratch %p\n", enabled, scratch);
- TDEBUG_PRINTF("enabled %08llu\n", (u64a)enabled->bits);
- MR_STRUCT *matches_out = EXEC_I_FN(n, b, b + len, e_local, matches);
- TDEBUG_PRINTF("enabled %08llu\n", (u64a)enabled->bits);
- if (matches_out - matches) {
- PLAY_CB_FB(n, b, matches, matches_out - matches, enabled, base_offset,
- cb, context);
- }
-
- TDEBUG_PRINTF("enabled %08llu\n", (u64a)enabled->bits);
-}
-
-static really_inline
-void ENABLED_INIT_FN(struct sidecar_enabled *enabled) {
- ENABLED_STRUCT *e = (void *)enabled;
- store_state(&e->bits, zero_state);
-}
-
-static really_inline
-void ENABLED_UNION_FN(struct sidecar_enabled *dest,
- const struct sidecar_enabled *src) {
- ENABLED_STRUCT *d = (void *)dest;
- const ENABLED_STRUCT *s = (const void *)src;
- store_state(&d->bits, or_state(load_state(&d->bits), load_state(&s->bits)));
-}
-
-
-#undef ENABLED_STRUCT
-#undef ENABLED_INIT_FN
-#undef ENABLED_UNION_FN
-#undef EXEC_FN
-#undef EXEC_I_FN
-#undef load_state
-#undef MR_STRUCT
-#undef PLAY_CB_FB
-#undef SIDECAR_STRUCT
-#undef store_state
-#undef and_state
-#undef iand_state
-#undef andnot_state
-#undef not_eq
-#undef inot_eq
-#undef or_state
-#undef is_zero
-#undef is_not_zero
-#undef zero_state
-
-#undef TDEBUG_PRINTF
-#undef ATDEBUG_PRINTF
-
-#undef ISTATE_T
-
-#undef TAG
-#undef STATE_T
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef SIDECAR_INTERNAL_H
-#define SIDECAR_INTERNAL_H
-
-#include "ue2common.h"
-
-#define SIDECAR_8 0
-#define SIDECAR_32 1
-#define SIDECAR_64 2
-#define SIDECAR_128 3
-#define SIDECAR_256 4
-#define SIDECAR_N 5
-#define SIDECAR_S 6
-
-struct sidecar_id_offset {
- u32 first_offset; /* from base of sidecar */
- u32 count;
-};
-
-struct sidecar {
- u8 type;
- u32 size;
- u32 id_count;
- u32 mask_bit_count;
-}; /* .. followed in memory by reach table */
-
-#define SIDECAR_SPEC(bit_count, base_type) \
-struct sidecar_##bit_count { \
- struct sidecar header; \
- base_type reach[N_CHARS]; \
- struct sidecar_id_offset id_list[bit_count];\
- base_type unshared_mask; \
-};
-
-struct sidecar_N {
- struct sidecar header;
- char c;
- char nocase;
- u32 report_count;
- u32 reports[];
-};
-
-struct sidecar_S {
- struct sidecar header;
- m128 hi;
- m128 lo;
- struct sidecar_id_offset id_list[8];
- u8 unshared_mask;
-};
-
-SIDECAR_SPEC(8, u8)
-SIDECAR_SPEC(32, u32)
-SIDECAR_SPEC(64, u64a)
-SIDECAR_SPEC(128, m128)
-SIDECAR_SPEC(256, m256)
-
-struct sidecar_enabled {
- u8 null;
-};
-
-struct sidecar_enabled_8 {
- u8 bits;
-};
-
-struct sidecar_enabled_32 {
- u32 bits;
-};
-
-struct sidecar_enabled_64 {
- u64a bits;
-};
-
-struct sidecar_enabled_128 {
- m128 bits;
-};
-
-struct sidecar_enabled_256 {
- m256 bits;
-};
-
-struct sidecar_enabled_N {
- u8 bits;
-};
-
-struct sidecar_enabled_S {
- u8 bits;
-};
-
-union sidecar_enabled_any {
- struct sidecar_enabled arb;
- struct sidecar_enabled_8 e8;
- struct sidecar_enabled_32 e32;
- struct sidecar_enabled_64 e64;
- struct sidecar_enabled_128 e128;
- struct sidecar_enabled_256 e256;
- struct sidecar_enabled_N eN;
- struct sidecar_enabled_S eS;
-};
-
-/* ASCII ART TIME
- *
- * non-noodle sidecars
- *
- * ---------------------
- * [ struct sidecar ] ROUNDUP_16(sizeof(sidecar))
- * --------------------- |
- * [ ] | Shufti: masks here
- * [ reach table ] sizeof(N) * N_CHARS |
- * [ ] |
- * ---------------------
- * [ bit->id list head ] N * sizeof(sidecar_id_offset)
- * ---------------------
- * --------------------- sizeof(sidecar_N)
- * [ ]
- * [ id->masks ] count(id) * sizeof(N)
- * [ ]
- * ---------------------
- * [ ]
- * [ id lists ] complicated * sizeof(report)
- * [ ]
- * ---------------------
- */
-
-#define sidecar_ids_to_mask_const(side_struct) \
- ((const void *)((const char *)side_struct + sizeof(*side_struct)))
-
-
-
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "sidecar_shufti.h"
-#include "sidecar_internal.h"
-#include "ue2common.h"
-#include "util/simd_utils.h"
-#include "util/simd_utils_ssse3.h"
-
-#define GET_LO_4(chars) and128(chars, low4bits)
-#define GET_HI_4(chars) rshift2x64(andnot128(low4bits, chars), 4)
-
-#ifdef DEBUG
-#include <ctype.h>
-UNUSED static void dumpMsk(m128 msk) {
- u8 *maskAsU8 = (u8 *)&msk;
- for (int i = 0; i < 16; i++) {
- printf("%02hhx ", maskAsU8[i]);
- }
-}
-
-UNUSED static void dumpMskAsChars(m128 msk) {
- u8 *maskAsU8 = (u8 *)&msk;
- for (int i = 0; i < 16; i++) {
- u8 c = maskAsU8[i];
- if (isprint(c))
- printf("%c",c);
- else
- printf(".");
- }
-}
-#endif
-
-static really_inline
-u8 squash(m128 t) {
- m128 u = byteShiftRight128(t, 8);
- t = and128(t, u);
- m128 v = byteShiftRight128(t, 4);
- t = and128(t, v);
- u32 gpr = movd(t);
- gpr &= gpr >> 16;
- gpr &= gpr >> 8;
- DEBUG_PRINTF(" gpr: %02x\n", (u8)gpr);
- return (u8)gpr;
-}
-
-
-static really_inline
-m128 mainLoop(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits) {
- m128 c_lo = pshufb(mask_lo, GET_LO_4(chars));
- m128 c_hi = pshufb(mask_hi, GET_HI_4(chars));
- m128 t = or128(c_lo, c_hi);
-
-#ifdef DEBUG
- DEBUG_PRINTF(" chars: "); dumpMskAsChars(chars); printf("\n");
- DEBUG_PRINTF(" char: "); dumpMsk(chars); printf("\n");
- DEBUG_PRINTF(" c_lo: "); dumpMsk(c_lo); printf("\n");
- DEBUG_PRINTF(" c_hi: "); dumpMsk(c_hi); printf("\n");
- DEBUG_PRINTF(" t: "); dumpMsk(t); printf("\n");
-#endif
-
- return t;
-}
-
-u8 sidecarExec_S_int(const struct sidecar_S *n, const u8 *b,
- size_t len, u8 state) {
- const m128 low4bits = _mm_set1_epi8(0xf);
- const u8 *b_end = b + len;
- m128 mask_lo = n->lo;
- m128 mask_hi = n->hi;
-
- // Preconditioning: most of the time our buffer won't be aligned
- DEBUG_PRINTF("warmup %02hhx\n", state);
- m128 chars = loadu128(b);
- m128 t = _mm_set1_epi8(state);
- t = and128(t, mainLoop(mask_lo, mask_hi, chars, low4bits));
- b = ROUNDUP_PTR(b + 1, 16);
-
- // Unrolling was here, but it wasn't doing anything but taking up space.
- // Reroll FTW.
-
- DEBUG_PRINTF("main %02hhx\n", state);
- const u8 *last_block = b_end - 16;
- while (b < last_block) {
- m128 lchars = load128(b);
- m128 rv = mainLoop(mask_lo, mask_hi, lchars, low4bits);
- t = and128(t, rv);
- b += 16;
- if (!squash(t)) {
- return 0;
- }
- }
-
- DEBUG_PRINTF("cool down %02hhx\n", state);
- assert(b <= b_end && b >= b_end - 16);
- // do an unaligned load the end to accurate picture to the end
- chars = loadu128(b_end - 16);
- m128 rv = mainLoop(mask_lo, mask_hi, chars, low4bits);
- t = and128(t, rv);
-
- return squash(t);
-}
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef SIDECAR_SHUFTI_H
-#define SIDECAR_SHUFTI_H
-
-#include "ue2common.h"
-
-struct sidecar_S;
-
-u8 sidecarExec_S_int(const struct sidecar_S *n, const u8 *b, size_t len,
- u8 state_in);
-#endif
internal/repeat.cpp
internal/rose_build_merge.cpp
internal/rvermicelli.cpp
- internal/sidecar.cpp
internal/simd_utils.cpp
internal/shuffle.cpp
internal/shufti.cpp
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#include "ue2common.h"
-#include "sidecar/sidecar.h"
-#include "sidecar/sidecar_compile.h"
-#include "sidecar/sidecar_internal.h"
-#include "util/alloc.h"
-#include "util/charreach.h"
-
-#include <tuple>
-#include <vector>
-#include "gtest/gtest.h"
-
-using namespace testing;
-using namespace ue2;
-using std::vector;
-using std::set;
-using std::tie;
-using std::tuple;
-
-namespace {
-
-void ns_cb(UNUSED u64a offset, u32 id, void *ctxt) {
- u32 *seen = (u32 *)ctxt;
- *seen |= 1U << id;
-}
-
-void set_cb(UNUSED u64a offset, u32 id, void *ctxt) {
- set<u32> *seen = (set<u32> *)ctxt;
- seen->insert(id);
-}
-
-TEST(Sidecar, ns1) {
- const size_t data_len = 1024;
- u8 data[data_len];
-
- CharReach c_1;
- c_1.set('f');
- vector<CharReach> charclasses;
- charclasses.push_back(c_1);
- auto ns = sidecarCompile(charclasses);
-
- ASSERT_TRUE(ns != nullptr);
- ASSERT_LT(0U, sidecarSize(ns.get()));
-
- auto enabled =
- aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
- sidecarEnabledInit(ns.get(), enabled.get());
- auto scratch =
- aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));
-
- for (u32 i = 0; i < 256; i++) {
- SCOPED_TRACE(i);
- u32 seen = 0;
- memset(data, i, data_len);
- sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
- ns_cb, &seen);
- ASSERT_EQ(0U, seen);
- }
-
- sidecarEnabledAdd(ns.get(), enabled.get(), 0);
-
- for (u32 i = 0; i < 256; i++) {
- SCOPED_TRACE(i);
- u32 seen = 0;
- memset(data, i, data_len);
- sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
- ns_cb, &seen);
- if (i == 'f') {
- ASSERT_EQ(1U, seen);
- } else {
- ASSERT_EQ(0U, seen);
- }
- }
-}
-
-const char* sidecarStrings[] = {
- "f",
- "a",
- "A",
- "ab",
- "\r\n", // an old favourite
- "\t\r\n",
- " \r\n",
- "xyz",
- "z0y1",
- "01234567", // 8 elements
- "!@#$%^&*()", // 10 elements
- "qwertyuiopasdfgh", // 16 elements
- "qwertyuiopasdfghj", // 17 elements
- "qwertyuiopasdfghjklzxcvb", // 24 elements
- "qwertyuiopasdfghjklzxcvbnm012345", // 32 elements
- "qwertyuiopasdfghjklzxcvbnm0123456" // 33 elements
-};
-
-const u32 sidecarModels[] = {
- SIDECAR_8,
- SIDECAR_32,
- SIDECAR_64,
- SIDECAR_128,
- SIDECAR_256,
- SIDECAR_N,
- SIDECAR_S
-};
-
-// Number of elements we can handle in each model
-const u32 sidecarSizes[] = {
- 8,
- 32,
- 64,
- 128,
- 256,
- 1,
- 8
-};
-
-// Parameterized test case for string of single-byte classes
-class SidecarTest : public TestWithParam<tuple<u32, const char *>> {
-protected:
- virtual void SetUp() {
- tie(model, chars) = GetParam();
- size_t num = strlen(chars);
- charclasses.resize(num);
-
- for (size_t i = 0; i < num; i++) {
- charclasses[i].set(chars[i]);
- }
- }
-
- virtual bool fitsModel() {
- for (size_t i = 0; i < ARRAY_LENGTH(sidecarModels); i++) {
- if (sidecarModels[i] == model) {
- return charclasses.size() <= sidecarSizes[i];
- }
- }
- return false;
- }
-
- u32 model;
- const char *chars;
- vector<CharReach> charclasses;
-};
-
-TEST_P(SidecarTest, Individual) {
- SCOPED_TRACE(chars);
-
- // Skip this test if the model is too small
- if (!fitsModel()) {
- return;
- }
-
- auto ns = sidecarCompile(charclasses, model);
- if (!ns && model == SIDECAR_S) { /* shufti is fussi */
- return;
- }
- ASSERT_TRUE(ns != nullptr);
- ASSERT_LT(0U, sidecarSize(ns.get()));
-
- auto enabled =
- aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
- sidecarEnabledInit(ns.get(), enabled.get());
- auto local_enabled =
- aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
- auto scratch =
- aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));
-
- const size_t data_len = 1024;
- u8 data[data_len];
-
- // with nothing enabled, nothing should fire
- for (u32 i = 0; i < 256; i++) {
- SCOPED_TRACE(i);
- memset(data, i, data_len);
- set<u32> seen;
- sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
- set_cb, &seen);
- ASSERT_TRUE(seen.empty());
- }
-
- // test that every char class fires when enabled separately
- for (u32 j = 0; j < charclasses.size(); j++) {
- u32 c = chars[j];
- SCOPED_TRACE(c);
-
- // build a "compile time" enabled structure and add class j to it.
- sidecarEnabledInit(ns.get(), local_enabled.get());
- sidecarEnabledAdd(ns.get(), local_enabled.get(), j);
-
- // union class j into our runtime enabled structure.
- sidecarEnabledUnion(ns.get(), enabled.get(), local_enabled.get());
-
- for (u32 i = 0; i < 256; i++) {
- SCOPED_TRACE(i);
- memset(data, i, data_len);
- set<u32> seen;
- sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(),
- 0, set_cb, &seen);
- if (i == c) {
- ASSERT_EQ(1U, seen.size());
- ASSERT_EQ(j, *seen.begin());
- } else {
- ASSERT_TRUE(seen.empty());
- }
- }
- }
-}
-
-TEST_P(SidecarTest, Together) {
- SCOPED_TRACE(chars);
-
- // Skip this test if the model is too small
- if (!fitsModel()) {
- return;
- }
-
- auto ns = sidecarCompile(charclasses, model);
- if (!ns && model == SIDECAR_S) { /* shufti is fussi */
- return;
- }
- ASSERT_TRUE(ns != nullptr);
- ASSERT_LT(0U, sidecarSize(ns.get()));
-
- auto enabled =
- aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
- sidecarEnabledInit(ns.get(), enabled.get());
- auto local_enabled =
- aligned_zmalloc_unique<sidecar_enabled>(sidecarEnabledSize(ns.get()));
- auto scratch =
- aligned_zmalloc_unique<sidecar_scratch>(sidecarScratchSize(ns.get()));
-
- const size_t data_len = 1024;
- u8 data[data_len];
-
- // with nothing enabled, nothing should fire
- for (u32 i = 0; i < 256; i++) {
- SCOPED_TRACE(i);
- memset(data, i, data_len);
- set<u32> seen;
- sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(), 0,
- set_cb, &seen);
- ASSERT_TRUE(seen.empty());
- }
-
- // test that every char class fires
- for (u32 j = 0; j < charclasses.size(); j++) {
- // enable the whole lot
- sidecarEnabledInit(ns.get(), enabled.get());
- for (u32 i = 0; i < charclasses.size(); i++) {
- // build a "compile time" enabled structure and add class j to it.
- sidecarEnabledInit(ns.get(), local_enabled.get());
- sidecarEnabledAdd(ns.get(), local_enabled.get(), i);
-
- // union class j into our runtime enabled structure.
- sidecarEnabledUnion(ns.get(), enabled.get(), local_enabled.get());
- }
-
- u32 c = chars[j];
- SCOPED_TRACE(c);
-
- for (u32 i = 0; i < 256; i++) {
- SCOPED_TRACE(i);
- memset(data, i, data_len);
- set<u32> seen;
- sidecarExec(ns.get(), data, data_len, enabled.get(), scratch.get(),
- 0, set_cb, &seen);
- if (i == c) {
- // seen should contain only `c'
- ASSERT_EQ(1U, seen.size());
- ASSERT_FALSE(seen.end() == seen.find(j));
- } else {
- // seen should not contain `c', and either zero or one char can
- // have matched
- ASSERT_GT(2U, seen.size());
- ASSERT_TRUE(seen.end() == seen.find(j));
- }
- }
- }
-}
-
-INSTANTIATE_TEST_CASE_P(Sidecar, SidecarTest,
- Combine(ValuesIn(sidecarModels),
- ValuesIn(sidecarStrings)));
-
-}