char castleInAccept(const struct Castle *c, struct mq *q,
const ReportID report, const u64a offset) {
DEBUG_PRINTF("offset=%llu\n", offset);
+ /* ignore when just catching up due to full queue */
+ if (report == MO_INVALID_IDX) {
+ return 0;
+ }
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState,
void *full_state, void *stream_state) {
DEBUG_PRINTF("offset=%llu\n", offset);
+ if (!c->staleIterOffset) {
+ DEBUG_PRINTF("{no repeats can go stale}\n");
+ return; /* no subcastle can ever go stale */
+ }
+
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize);
if (activeIdx < c->numRepeats) {
if (!c->pureExclusive) {
const u8 *active = (const u8 *)stream_state + c->activeIdxSize;
- for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
- i != MMB_INVALID;
- i = mmbit_iterate(active, c->numRepeats, i)) {
+ const struct mmbit_sparse_iter *it
+ = (const void *)((const char *)c + c->staleIterOffset);
+
+ struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
+ u32 numRepeats = c->numRepeats;
+ u32 idx = 0;
+
+ u32 i = mmbit_sparse_iter_begin(active, numRepeats, &idx, it, si_state);
+ while(i != MMB_INVALID) {
DEBUG_PRINTF("subcastle %u\n", i);
- subCastleDeactivateStaleSubs(c, offset, full_state,
- stream_state, i);
+ subCastleDeactivateStaleSubs(c, offset, full_state, stream_state, i);
+ i = mmbit_sparse_iter_next(active, numRepeats, i, &idx, it,
+ si_state);
}
}
}
static really_inline
void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset,
- void *full_state, void *stream_state) {
+ void *full_state, void *stream_state,
+ UNUSED char stale_checked) {
assert(top < c->numRepeats);
const struct SubCastle *sub = getSubCastle(c, top);
} else {
DEBUG_PRINTF("repeat %u is already alive\n", top);
// Caller should ensure we're not stale.
- assert(repeatHasMatch(info, rctrl, rstate, offset) !=
- REPEAT_STALE);
+ assert(!stale_checked
+ || repeatHasMatch(info, rctrl, rstate, offset) != REPEAT_STALE);
// Ignore duplicate top events.
u64a last = repeatLastTop(info, rctrl, rstate);
}
static really_inline
-void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp) {
+char castleRevScanVerm(const struct Castle *c, const u8 *buf,
+ const size_t begin, const size_t end, size_t *loc) {
+ const u8 *ptr = rvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
+ if (ptr == buf + begin - 1) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ assert(ptr >= buf && ptr < buf + end);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ return 1;
+}
+
+static really_inline
+char castleRevScanNVerm(const struct Castle *c, const u8 *buf,
+ const size_t begin, const size_t end, size_t *loc) {
+ const u8 *ptr = rnvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
+ if (ptr == buf + begin - 1) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ assert(ptr >= buf && ptr < buf + end);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ return 1;
+}
+
+static really_inline
+char castleRevScanShufti(const struct Castle *c, const u8 *buf,
+ const size_t begin, const size_t end, size_t *loc) {
+ const m128 mask_lo = c->u.shuf.mask_lo;
+ const m128 mask_hi = c->u.shuf.mask_hi;
+ const u8 *ptr = rshuftiExec(mask_lo, mask_hi, buf + begin, buf + end);
+ if (ptr == buf + begin - 1) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ assert(ptr >= buf && ptr < buf + end);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ return 1;
+}
+
+static really_inline
+char castleRevScanTruffle(const struct Castle *c, const u8 *buf,
+ const size_t begin, const size_t end, size_t *loc) {
+ const u8 *ptr = rtruffleExec(c->u.truffle.mask1, c->u.truffle.mask2,
+ buf + begin, buf + end);
+ if (ptr == buf + begin - 1) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ assert(ptr >= buf && ptr < buf + end);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ return 1;
+}
+
+static really_inline
+char castleRevScan(const struct Castle *c, const u8 *buf, const size_t begin,
+ const size_t end, size_t *loc) {
+ assert(begin <= end);
+ DEBUG_PRINTF("scanning backwards over (%zu,%zu]\n", begin, end);
+ if (begin == end) {
+ return 0;
+ }
+
+ switch (c->type) {
+ case CASTLE_DOT:
+ // Nothing can stop a dot scan!
+ return 0;
+ case CASTLE_VERM:
+ return castleRevScanVerm(c, buf, begin, end, loc);
+ case CASTLE_NVERM:
+ return castleRevScanNVerm(c, buf, begin, end, loc);
+ case CASTLE_SHUFTI:
+ return castleRevScanShufti(c, buf, begin, end, loc);
+ case CASTLE_TRUFFLE:
+ return castleRevScanTruffle(c, buf, begin, end, loc);
+ default:
+ DEBUG_PRINTF("unknown scan type!\n");
+ assert(0);
+ return 0;
+ }
+}
+
+static really_inline
+void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp,
+ char stale_checked) {
const u32 event = q->items[q->cur].type;
switch (event) {
case MQE_TOP:
assert(event < MQE_INVALID);
u32 top = event - MQE_TOP_FIRST;
DEBUG_PRINTF("top %u at offset %llu\n", top, sp);
- castleProcessTop(c, top, sp, q->state, q->streamState);
+ castleProcessTop(c, top, sp, q->state, q->streamState, stale_checked);
break;
}
}
+static really_inline
+void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) {
+ DEBUG_PRINTF("clearing active repeats due to escape\n");
+ if (c->exclusive) {
+ partial_store_u32(q->streamState, c->numRepeats, c->activeIdxSize);
+ }
+
+ if (!c->pureExclusive) {
+ mmbit_clear(active, c->numRepeats);
+ }
+}
+
static really_inline
char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
enum MatchMode mode) {
}
if (escape_found) {
- DEBUG_PRINTF("clearing active repeats due to escape\n");
- if (c->exclusive) {
- partial_store_u32(q->streamState, c->numRepeats,
- c->activeIdxSize);
- }
-
- if (!c->pureExclusive) {
- mmbit_clear(active, c->numRepeats);
- }
+ clear_repeats(c, q, active);
}
}
}
sp = q_cur_offset(q);
- castleHandleEvent(c, q, sp);
+ castleHandleEvent(c, q, sp, 1);
q->cur++;
}
return nfaExecCastle0_Q_i(n, q, end, STOP_AT_MATCH);
}
-static really_inline
-void castleStreamSilent(const struct Castle *c, u8 *active, const u8 *buf,
- size_t length) {
- DEBUG_PRINTF("entry\n");
+static
+s64a castleLastKillLoc(const struct Castle *c, struct mq *q) {
+ assert(q_cur_type(q) == MQE_START);
+ assert(q_last_type(q) == MQE_END);
+ s64a sp = q_cur_loc(q);
+ s64a ep = q_last_loc(q);
- // This call doesn't produce matches, so we elide the castleMatchLoop call
- // entirely and just do escape scans to maintain the repeat.
+ DEBUG_PRINTF("finding final squash in (%lld, %lld]\n", sp, ep);
- size_t eloc = 0;
- char escaped = castleScan(c, buf, 0, length, &eloc);
- if (escaped) {
- assert(eloc < length);
- DEBUG_PRINTF("escape found at %zu, clearing castle\n", eloc);
- if (c->exclusive) {
- partial_store_u32(active - c->activeIdxSize,
- c->numRepeats, c->activeIdxSize);
+ size_t loc;
+
+ if (ep > 0) {
+ if (castleRevScan(c, q->buffer, sp > 0 ? sp : 0, ep, &loc)) {
+ return (s64a)loc;
}
+ ep = 0;
+ }
- if (!c->pureExclusive) {
- mmbit_clear(active, c->numRepeats);
+ if (sp < 0) {
+ s64a hlen = q->hlength;
+
+ if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) {
+ return (s64a)loc - hlen;
}
+ ep = 0;
}
+
+ return sp - 1; /* the repeats are never killed */
}
char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) {
assert(q->cur + 1 < q->end); /* require at least two items */
assert(q_cur_type(q) == MQE_START);
- u64a sp = q_cur_offset(q);
- q->cur++;
- DEBUG_PRINTF("sp=%llu\n", sp);
const struct Castle *c = getImplNfa(n);
u8 *active = (u8 *)q->streamState + c->activeIdxSize;
- char found = 0;
- while (q->cur < q->end) {
- DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
- q_cur_offset(q));
- found = 0;
- if (c->exclusive) {
- const u32 activeIdx = partial_load_u32(q->streamState,
- c->activeIdxSize);
- if (activeIdx < c->numRepeats) {
- found = 1;
- } else if (c->pureExclusive) {
- DEBUG_PRINTF("castle is dead\n");
- goto scan_done;
- }
- }
-
- if (!found && !mmbit_any(active, c->numRepeats)) {
- DEBUG_PRINTF("castle is dead\n");
- goto scan_done;
- }
- u64a ep = q_cur_offset(q);
+ u64a end_offset = q_last_loc(q) + q->offset;
+ s64a last_kill_loc = castleLastKillLoc(c, q);
+ DEBUG_PRINTF("all repeats killed at %lld (exec range %lld, %lld)\n",
+ last_kill_loc, q_cur_loc(q), q_last_loc(q));
+ assert(last_kill_loc < q_last_loc(q));
- if (sp < q->offset) {
- DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
- assert(q->offset - sp <= q->hlength);
- u64a local_ep = MIN(q->offset, ep);
- const u8 *ptr = q->history + q->hlength + sp - q->offset;
- castleStreamSilent(c, active, ptr, local_ep - sp);
- sp = local_ep;
- }
-
- found = 0;
- if (c->exclusive) {
- const u32 activeIdx = partial_load_u32(q->streamState,
- c->activeIdxSize);
- if (activeIdx < c->numRepeats) {
- found = 1;
- } else if (c->pureExclusive) {
- DEBUG_PRINTF("castle is dead\n");
- goto scan_done;
- }
- }
+ if (last_kill_loc != q_cur_loc(q) - 1) {
+ clear_repeats(c, q, active);
+ }
- if (!found && !mmbit_any(active, c->numRepeats)) {
- DEBUG_PRINTF("castle is dead\n");
- goto scan_done;
- }
+ q->cur++; /* skip start event */
- if (sp < ep) {
- DEBUG_PRINTF("MAIN BUFFER SCAN\n");
- assert(ep - q->offset <= q->length);
- const u8 *ptr = q->buffer + sp - q->offset;
- castleStreamSilent(c, active, ptr, ep - sp);
- }
+ /* skip events prior to the repeats being squashed */
+ while (q_cur_loc(q) <= last_kill_loc) {
+ DEBUG_PRINTF("skipping moot event at %lld\n", q_cur_loc(q));
+ q->cur++;
+ assert(q->cur < q->end);
+ }
-scan_done:
- sp = q_cur_offset(q);
- castleDeactivateStaleSubs(c, sp, q->state, q->streamState);
- castleHandleEvent(c, q, sp);
+ while (q->cur < q->end) {
+ DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
+ q_cur_offset(q));
+ u64a sp = q_cur_offset(q);
+ castleHandleEvent(c, q, sp, 0);
q->cur++;
}
- found = 0;
+ castleDeactivateStaleSubs(c, end_offset, q->state, q->streamState);
+
+ char found = 0;
if (c->exclusive) {
const u32 activeIdx = partial_load_u32(q->streamState,
c->activeIdxSize);
return 0;
}
- if (castleInAccept(c, q, report, sp)) {
+ if (castleInAccept(c, q, report, end_offset)) {
return MO_MATCHES_PENDING;
}
}
return 0;
}
-
#include "castlecompile.h"
#include "castle_internal.h"
+#include "limex_limits.h"
#include "nfa_internal.h"
#include "repeatcompile.h"
#include "shufticompile.h"
#include "util/dump_charclass.h"
#include "util/graph.h"
#include "util/make_unique.h"
+#include "util/multibit_build.h"
#include "util/multibit_internal.h"
#include "util/ue2_containers.h"
#include "util/verify_types.h"
namespace ue2 {
-#define CASTLE_MAX_TOPS 32
#define CLIQUE_GRAPH_MAX_SIZE 1000
static
const vector<pair<depth, bool>> &repeatInfoPair,
u32 &scratchStateSize, u32 &streamStateSize,
u32 &tableSize, vector<u64a> &tables, u32 &sparseRepeats,
- const set<u32> &exclusiveGroup) {
+ const set<u32> &exclusiveGroup, vector<u32> &may_stale) {
u32 i = 0;
u32 maxStreamSize = 0;
bool exclusive = exclusiveGroup.size() > 1;
streamStateSize += subStreamStateSize;
}
+ if (pr.bounds.max.is_finite()) {
+ may_stale.push_back(i);
+ }
+
info.type = verify_u8(rtype);
info.repeatMin = depth_to_u32(pr.bounds.min);
info.repeatMax = depth_to_u32(pr.bounds.max);
u32 tableSize = 0;
u32 sparseRepeats = 0;
+ vector<u32> may_stale; /* sub castles that may go stale */
+
buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair,
scratchStateSize, streamStateSize, tableSize,
- tables, sparseRepeats, exclusiveGroup);
+ tables, sparseRepeats, exclusiveGroup, may_stale);
+
+ DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size());
+ vector<mmbit_sparse_iter> stale_iter;
+ if (!may_stale.empty()) {
+ mmbBuildSparseIterator(stale_iter, may_stale, numRepeats);
+ }
+
- const size_t total_size =
+ size_t total_size =
sizeof(NFA) + // initial NFA structure
sizeof(Castle) + // Castle structure
sizeof(SubCastle) * subs.size() + // SubCastles themselves
sizeof(u64a) * sparseRepeats; // paddings for
// REPEAT_SPARSE_OPTIMAL_P tables
+ total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter));
+ total_size += byte_length(stale_iter); // stale sparse iter
+
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
nfa->type = verify_u8(CASTLE_NFA_0);
nfa->length = verify_u32(total_size);
nfa->minWidth = verify_u32(minWidth);
nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0;
- char *ptr = (char *)nfa.get() + sizeof(NFA);
+ char * const base_ptr = (char *)nfa.get() + sizeof(NFA);
+ char *ptr = base_ptr;
Castle *c = (Castle *)ptr;
c->numRepeats = verify_u32(subs.size());
c->exclusive = exclusive;
sub->exclusive = 0;
}
}
+
+ ptr = base_ptr + total_size - sizeof(NFA) - byte_length(stale_iter);
+
+ assert(ptr + byte_length(stale_iter) == base_ptr + total_size - sizeof(NFA));
+ if (!stale_iter.empty()) {
+ c->staleIterOffset = verify_u32(ptr - base_ptr);
+ copy_bytes(ptr, stale_iter);
+ ptr += byte_length(stale_iter);
+ }
+
return nfa;
}
unique_ptr<NGHolder> g = ue2::make_unique<NGHolder>(kind);
for (const auto &m : proto.repeats) {
- if (m.first >= CASTLE_MAX_TOPS) {
+ if (m.first >= NFA_MAX_TOP_MASKS) {
DEBUG_PRINTF("top %u too big for an NFA\n", m.first);
return nullptr;
}