src/nfa/lbr.h
src/nfa/lbr_common_impl.h
src/nfa/lbr_internal.h
- src/nfa/mcclellan.c
- src/nfa/mcclellan.h
- src/nfa/mcclellan_common_impl.h
- src/nfa/mcclellan_internal.h
src/nfa/limex_accel.c
src/nfa/limex_accel.h
src/nfa/limex_exceptional.h
src/nfa/limex_runtime_impl.h
src/nfa/limex_shuffle.h
src/nfa/limex_state_impl.h
+ src/nfa/mcclellan.c
+ src/nfa/mcclellan.h
+ src/nfa/mcclellan_common_impl.h
+ src/nfa/mcclellan_internal.h
+ src/nfa/mcsheng.c
+ src/nfa/mcsheng_data.c
+ src/nfa/mcsheng.h
+ src/nfa/mcsheng_internal.h
src/nfa/mpv.h
src/nfa/mpv.c
src/nfa/mpv_internal.h
src/nfa/mcclellancompile.h
src/nfa/mcclellancompile_util.cpp
src/nfa/mcclellancompile_util.h
+ src/nfa/mcsheng_compile.cpp
+ src/nfa/mcsheng_compile.h
src/nfa/limex_compile.cpp
src/nfa/limex_compile.h
src/nfa/limex_accel.h
src/nfa/nfa_internal.h
src/nfa/nfa_kind.h
src/nfa/rdfa.h
+ src/nfa/rdfa_graph.cpp
+ src/nfa/rdfa_graph.h
src/nfa/rdfa_merge.cpp
src/nfa/rdfa_merge.h
src/nfa/repeat_internal.h
src/nfa/limex_dump.cpp
src/nfa/mcclellandump.cpp
src/nfa/mcclellandump.h
+ src/nfa/mcsheng_dump.cpp
+ src/nfa/mcsheng_dump.h
src/nfa/mpv_dump.cpp
src/nfa/nfa_dump_api.h
src/nfa/nfa_dump_dispatch.cpp
allowLbr(true),
allowMcClellan(true),
allowSheng(true),
+ allowMcSheng(true),
allowPuff(true),
allowLiteral(true),
allowRose(true),
G_UPDATE(allowLbr);
G_UPDATE(allowMcClellan);
G_UPDATE(allowSheng);
+ G_UPDATE(allowMcSheng);
G_UPDATE(allowPuff);
G_UPDATE(allowLiteral);
G_UPDATE(allowRose);
bool allowLbr;
bool allowMcClellan;
bool allowSheng;
+ bool allowMcSheng;
bool allowPuff;
bool allowLiteral;
bool allowRose;
size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
- u32 idx = packedExtract32(s, accel);
+ u32 idx = pext32(s, accel);
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
size_t doAccel64(u64a s, u64a accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
- u32 idx = packedExtract64(s, accel);
+ u32 idx = pext64(s, accel);
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
#else
size_t doAccel64(m128 s, m128 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
- u32 idx = packedExtract64(movq(s), movq(accel));
+ u32 idx = pext64(movq(s), movq(accel));
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
#endif
#include "util/bitutils.h"
#include "util/simd_utils.h"
-#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__))
-#define HAVE_PEXT
-#endif
-
-static really_inline
-u32 packedExtract32(u32 x, u32 mask) {
-#if defined(HAVE_PEXT)
- // Intel BMI2 can do this operation in one instruction.
- return _pext_u32(x, mask);
-#else
-
- u32 result = 0, num = 1;
- while (mask != 0) {
- u32 bit = findAndClearLSB_32(&mask);
- if (x & (1U << bit)) {
- assert(num != 0); // more than 32 bits!
- result |= num;
- }
- num <<= 1;
- }
- return result;
-#endif
-}
-
-static really_inline
-u32 packedExtract64(u64a x, u64a mask) {
-#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
- // Intel BMI2 can do this operation in one instruction.
- return _pext_u64(x, mask);
-#else
-
- u32 result = 0, num = 1;
- while (mask != 0) {
- u32 bit = findAndClearLSB_64(&mask);
- if (x & (1ULL << bit)) {
- assert(num != 0); // more than 32 bits!
- result |= num;
- }
- num <<= 1;
- }
- return result;
-#endif
-}
-
-#undef HAVE_PEXT
-
static really_inline
u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
m128 shuffled = pshufb(s, permute);
if (mode == STOP_AT_MATCH) {
*c_final = buf;
}
- return MO_CONTINUE_MATCHING;
+ return MO_ALIVE;
}
u32 s = *state;
if (mode == STOP_AT_MATCH) {
*state = s & STATE_MASK;
*c_final = c - 1;
- return MO_CONTINUE_MATCHING;
+ return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
+ return MO_DEAD; /* termination requested */
}
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
+ return MO_DEAD;
}
}
if (mode == STOP_AT_MATCH) {
*state = s & STATE_MASK;
*c_final = c - 1;
- return MO_CONTINUE_MATCHING;
+ return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
+ return MO_DEAD; /* termination requested */
}
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
+ return MO_DEAD;
}
}
}
*state = s;
- return MO_CONTINUE_MATCHING;
+ return MO_ALIVE;
}
static never_inline
char single, const u8 **c_final, enum MatchMode mode) {
if (!len) {
*c_final = buf;
- return MO_CONTINUE_MATCHING;
+ return MO_ALIVE;
}
u32 s = *state;
const u8 *c = buf;
u32 cached_accept_id = 0;
u32 cached_accept_state = 0;
- DEBUG_PRINTF("accel %hu, accept %hu\n",
- m->accel_limit_8, m->accept_limit_8);
+ DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit);
DEBUG_PRINTF("s: %u, len %zu\n", s, len);
DEBUG_PRINTF("match - pausing\n");
*state = s;
*c_final = c - 1;
- return MO_CONTINUE_MATCHING;
+ return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
+ return MO_DEAD;
}
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
+ return MO_DEAD;
}
}
DEBUG_PRINTF("match - pausing\n");
*state = s;
*c_final = c - 1;
- return MO_CONTINUE_MATCHING;
+ return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
+ return MO_DEAD;
}
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
+ return MO_DEAD;
}
}
if (mode == STOP_AT_MATCH) {
*c_final = c_end;
}
- return MO_CONTINUE_MATCHING;
+ return MO_ALIVE;
}
static never_inline
q->report_current = 0;
if (rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
+ return MO_DEAD;
}
}
/* do main buffer region */
const u8 *final_look;
- if (mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp,
- offset + sp, cb, context, single, &final_look,
- mode)
- == MO_HALT_MATCHING) {
+ char rv = mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp,
+ offset + sp, cb, context, single,
+ &final_look, mode);
+ if (rv == MO_DEAD) {
*(u16 *)q->state = 0;
- return 0;
+ return MO_DEAD;
}
- if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) {
+ if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
DEBUG_PRINTF("this is as far as we go\n");
- assert(q->cur);
DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
+
+ assert(q->cur);
+ assert(final_look != cur_buf + local_ep);
+
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = final_look - cur_buf + 1; /* due to
return MO_MATCHES_PENDING;
}
+ assert(rv == MO_ALIVE);
assert(q->cur);
if (mode != NO_MATCHES && q->items[q->cur].location > end) {
DEBUG_PRINTF("this is as far as we go\n");
case MQE_END:
*(u16 *)q->state = s;
q->cur++;
- return s ? MO_ALIVE : 0;
+ return s ? MO_ALIVE : MO_DEAD;
default:
assert(!"invalid queue event");
}
if (mcclellanExec16_i(m, &s, buffer, length, offset, cb, context, single,
NULL, CALLBACK_OUTPUT)
- == MO_HALT_MATCHING) {
- return 0;
+ == MO_DEAD) {
+ return s ? MO_ALIVE : MO_DEAD;
}
const struct mstate_aux *aux = get_aux(m, s);
doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL);
}
- return !!s;
+ return MO_ALIVE;
}
static really_inline
q->report_current = 0;
if (rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
+ return MO_DEAD;
}
}
}
const u8 *final_look;
- if (mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, offset + sp,
- cb, context, single, &final_look, mode)
- == MO_HALT_MATCHING) {
+ char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp,
+ offset + sp, cb, context, single,
+ &final_look, mode);
+ if (rv == MO_HALT_MATCHING) {
*(u8 *)q->state = 0;
- return 0;
+ return MO_DEAD;
}
- if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) {
- /* found a match */
- DEBUG_PRINTF("found a match\n");
+ if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
+
assert(q->cur);
+ assert(final_look != cur_buf + local_ep);
+
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = final_look - cur_buf + 1; /* due to
return MO_MATCHES_PENDING;
}
+ assert(rv == MO_ALIVE);
assert(q->cur);
if (mode != NO_MATCHES && q->items[q->cur].location > end) {
DEBUG_PRINTF("this is as far as we go\n");
case MQE_END:
*(u8 *)q->state = s;
q->cur++;
- return s ? MO_ALIVE : 0;
+ return s ? MO_ALIVE : MO_DEAD;
default:
assert(!"invalid queue event");
}
if (mcclellanExec8_i(m, &s, buffer, length, offset, cb, context, single,
NULL, CALLBACK_OUTPUT)
- == MO_HALT_MATCHING) {
- return 0;
+ == MO_DEAD) {
+ return MO_DEAD;
}
const struct mstate_aux *aux = get_aux(m, s);
doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL);
}
- return s;
+ return s ? MO_ALIVE : MO_DEAD;
}
char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer,
u16 start_floating; /**< floating start state */
u32 aux_offset; /**< offset of the aux structures relative to the start of
* the nfa structure */
- u32 sherman_offset; /**< offset of to array of sherman state offsets
- * the state_info structures relative to the start of the
- * nfa structure */
- u32 sherman_end; /**< offset of the end of the state_info structures relative
- * to the start of the nfa structure */
+ u32 sherman_offset; /**< offset of array of sherman state offsets the
+ * state_info structures relative to the start of the
+ * nfa structure */
+ u32 sherman_end; /**< offset of the end of the state_info structures
+ * relative to the start of the nfa structure */
u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
u16 accept_limit_8; /**< 8 bit, lowest accept state */
u16 sherman_limit; /**< lowest sherman state */
u8 alphaShift;
u8 flags;
- u8 has_accel; /**< 1 iff there are any accel planes */
+ u8 has_accel; /**< 1 iff there are any accel plans */
u8 remap[256]; /**< remaps characters to a smaller alphabet */
ReportID arb_report; /**< one of the accepts that this dfa may raise */
u32 accel_offset; /**< offset of the accel structures from start of NFA */
: info.raw.start_floating);
}
-/* returns non-zero on error */
+/* returns false on error */
static
-int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
+bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
info.states[0].impl_id = 0; /* dead is always 0 */
vector<dstate_id_t> norm;
if (info.size() > (1 << 16)) {
DEBUG_PRINTF("too many states\n");
*sherman_base = 0;
- return 1;
+ return false;
}
for (u32 i = 1; i < info.size(); i++) {
/* Check to see if we haven't over allocated our states */
DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman,
(dstate_id_t)(next_sherman & STATE_MASK));
- return (next_sherman - 1) != ((next_sherman - 1) & STATE_MASK);
+ return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK);
}
static
assert(alphaShift <= 8);
u16 count_real_states;
- if (allocateFSN16(info, &count_real_states)) {
+ if (!allocateFSN16(info, &count_real_states)) {
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
info.size());
return nullptr;
#include "accel_dfa_build_strat.h"
#include "rdfa.h"
#include "ue2common.h"
-#include "util/accel_scheme.h"
#include "util/alloc.h"
-#include "util/charreach.h"
#include "util/ue2_containers.h"
#include <memory>
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mcsheng.h"
+
+#include "accel.h"
+#include "mcsheng_internal.h"
+#include "nfa_api.h"
+#include "nfa_api_queue.h"
+#include "nfa_internal.h"
+#include "util/bitutils.h"
+#include "util/compare.h"
+#include "util/simd_utils.h"
+#include "ue2common.h"
+
+enum MatchMode {
+ CALLBACK_OUTPUT,
+ STOP_AT_MATCH,
+ NO_MATCHES
+};
+
+static really_inline
+const struct mstate_aux *get_aux(const struct mcsheng *m, u32 s) {
+ const char *nfa = (const char *)m - sizeof(struct NFA);
+ const struct mstate_aux *aux
+ = s + (const struct mstate_aux *)(nfa + m->aux_offset);
+
+ assert(ISALIGNED(aux));
+ return aux;
+}
+
+static really_inline
+u32 mcshengEnableStarts(const struct mcsheng *m, u32 s) {
+ const struct mstate_aux *aux = get_aux(m, s);
+
+ DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top);
+ return aux->top;
+}
+
+static really_inline
+u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table,
+ u32 as) {
+ assert(ISALIGNED_N(sherman_state, 16));
+
+ u8 len = *(const u8 *)(sherman_state + SHERMAN_LEN_OFFSET);
+
+ if (len) {
+ m128 ss_char = load128(sherman_state);
+ m128 cur_char = set16x8(cprime);
+
+ u32 z = movemask128(eq128(ss_char, cur_char));
+
+ /* remove header cruft: type 1, len 1, daddy 2*/
+ z &= ~0xf;
+ z &= (1U << (len + 4)) - 1;
+
+ if (z) {
+ u32 i = ctz32(z & ~0xf) - 4;
+
+ u32 s_out = unaligned_load_u16((const u8 *)sherman_state
+ + SHERMAN_STATES_OFFSET(len)
+ + sizeof(u16) * i);
+ DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu s=%u\n", i,
+ len, cprime, s_out);
+ return s_out;
+ }
+ }
+
+ u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET);
+ return succ_table[(daddy << as) + cprime];
+}
+
+static really_inline
+char doComplexReport(NfaCallback cb, void *ctxt, const struct mcsheng *m,
+ u32 s, u64a loc, char eod, u32 *cached_accept_state,
+ u32 *cached_accept_id) {
+ DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n",
+ s & STATE_MASK, loc, eod);
+
+ if (!eod && s == *cached_accept_state) {
+ if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+ }
+
+ const struct mstate_aux *aux = get_aux(m, s);
+ size_t offset = eod ? aux->accept_eod : aux->accept;
+
+ assert(offset);
+ const struct report_list *rl
+ = (const void *)((const char *)m + offset - sizeof(struct NFA));
+ assert(ISALIGNED(rl));
+
+ DEBUG_PRINTF("report list size %u\n", rl->count);
+ u32 count = rl->count;
+
+ if (!eod && count == 1) {
+ *cached_accept_state = s;
+ *cached_accept_id = rl->report[0];
+
+ DEBUG_PRINTF("reporting %u\n", rl->report[0]);
+ if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+ }
+
+ for (u32 i = 0; i < count; i++) {
+ DEBUG_PRINTF("reporting %u\n", rl->report[i]);
+ if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+}
+
+#define SHENG_CHUNK 8
+
+static really_inline
+u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
+ const u8 *hard_c_end, u32 s_in, char do_accel) {
+ assert(s_in < m->sheng_end);
+ assert(s_in); /* should not already be dead */
+ assert(soft_c_end <= hard_c_end);
+ DEBUG_PRINTF("s_in = %u (adjusted %u)\n", s_in, s_in - 1);
+ m128 s = set16x8(s_in - 1);
+ const u8 *c = *c_inout;
+ const u8 *c_end = hard_c_end - SHENG_CHUNK + 1;
+ if (!do_accel) {
+ c_end = MIN(soft_c_end, hard_c_end - SHENG_CHUNK + 1);
+ }
+ const m128 *masks = m->sheng_masks;
+ u8 sheng_limit = m->sheng_end - 1; /* - 1: no dead state */
+ u8 sheng_stop_limit = do_accel ? m->sheng_accel_limit : sheng_limit;
+
+ /* When we use movd to get a u32 containing our state, it will have 4 lanes
+ * all duplicating the state. We can create versions of our limits with 4
+ * copies to directly compare against, this prevents us generating code to
+ * extract a single copy of the state from the u32 for checking. */
+ u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101;
+
+#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
+ u32 sheng_limit_x4 = sheng_limit * 0x01010101;
+ m128 simd_stop_limit = set4x32(sheng_stop_limit_x4);
+ m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit);
+ DEBUG_PRINTF("end %hu, accel %hhu --> limit %hhu\n", sheng_limit,
+ m->sheng_accel_limit, sheng_stop_limit);
+#endif
+
+#define SHENG_SINGLE_ITER do { \
+ m128 shuffle_mask = masks[*(c++)]; \
+ s = pshufb(shuffle_mask, s); \
+ u32 s_gpr_x4 = movd(s); /* convert to u8 */ \
+ DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr); \
+ if (s_gpr_x4 >= sheng_stop_limit_x4) { \
+ s_gpr = s_gpr_x4; \
+ goto exit; \
+ } \
+ } while (0)
+
+ u8 s_gpr;
+ while (c < c_end) {
+#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
+ /* This version uses pext for efficently bitbashing out scaled
+ * versions of the bytes to process from a u64a */
+
+ u64a data_bytes = unaligned_load_u64a(c);
+ u64a cc0 = pdep64(data_bytes, 0xff0); /* extract scaled low byte */
+ data_bytes &= ~0xffULL; /* clear low bits for scale space */
+ m128 shuffle_mask0 = load128((const char *)masks + cc0);
+ s = pshufb(shuffle_mask0, s);
+ m128 s_max = s;
+ m128 s_max0 = s_max;
+ DEBUG_PRINTF("c %02llx --> s %hhu\n", cc0 >> 4, movd(s));
+
+#define SHENG_SINGLE_UNROLL_ITER(iter) \
+ assert(iter); \
+ u64a cc##iter = pext64(data_bytes, mcsheng_pext_mask[iter]); \
+ assert(cc##iter == (u64a)c[iter] << 4); \
+ m128 shuffle_mask##iter = load128((const char *)masks + cc##iter); \
+ s = pshufb(shuffle_mask##iter, s); \
+ if (do_accel && iter == 7) { \
+ /* in the final iteration we also have to check against accel */ \
+ m128 s_temp = sadd_u8_m128(s, accel_delta); \
+ s_max = max_u8_m128(s_max, s_temp); \
+ } else { \
+ s_max = max_u8_m128(s_max, s); \
+ } \
+ m128 s_max##iter = s_max; \
+ DEBUG_PRINTF("c %02llx --> s %hhu max %hhu\n", cc##iter >> 4, \
+ movd(s), movd(s_max));
+
+ SHENG_SINGLE_UNROLL_ITER(1);
+
+ SHENG_SINGLE_UNROLL_ITER(2);
+ SHENG_SINGLE_UNROLL_ITER(3);
+
+ SHENG_SINGLE_UNROLL_ITER(4);
+ SHENG_SINGLE_UNROLL_ITER(5);
+
+ SHENG_SINGLE_UNROLL_ITER(6);
+ SHENG_SINGLE_UNROLL_ITER(7);
+
+ if (movd(s_max7) >= sheng_limit_x4) {
+ DEBUG_PRINTF("exit found\n");
+
+ /* Explicitly check the last byte as it is more likely as it also
+ * checks for acceleration. */
+ if (movd(s_max6) < sheng_limit_x4) {
+ c += SHENG_CHUNK;
+ s_gpr = movq(s);
+ assert(s_gpr >= sheng_stop_limit);
+ goto exit;
+ }
+
+ /* use shift-xor to create a register containing all of the max
+ * values */
+ m128 blended = rshift64_m128(s_max0, 56);
+ blended = xor128(blended, rshift64_m128(s_max1, 48));
+ blended = xor128(blended, rshift64_m128(s_max2, 40));
+ blended = xor128(blended, rshift64_m128(s_max3, 32));
+ blended = xor128(blended, rshift64_m128(s_max4, 24));
+ blended = xor128(blended, rshift64_m128(s_max5, 16));
+ blended = xor128(blended, rshift64_m128(s_max6, 8));
+ blended = xor128(blended, s);
+ blended = xor128(blended, rshift64_m128(blended, 8));
+ DEBUG_PRINTF("blended %016llx\n", movq(blended));
+
+ m128 final = min_u8_m128(blended, simd_stop_limit);
+ m128 cmp = sub_u8_m128(final, simd_stop_limit);
+ u64a stops = ~movemask128(cmp);
+ assert(stops);
+ u32 earliest = ctz32(stops);
+ DEBUG_PRINTF("stops %02llx, earliest %u\n", stops, earliest);
+ assert(earliest < 8);
+ c += earliest + 1;
+ s_gpr = movq(blended) >> (earliest * 8);
+ assert(s_gpr >= sheng_stop_limit);
+ goto exit;
+ } else {
+ c += SHENG_CHUNK;
+ }
+#else
+ SHENG_SINGLE_ITER;
+ SHENG_SINGLE_ITER;
+ SHENG_SINGLE_ITER;
+ SHENG_SINGLE_ITER;
+
+ SHENG_SINGLE_ITER;
+ SHENG_SINGLE_ITER;
+ SHENG_SINGLE_ITER;
+ SHENG_SINGLE_ITER;
+#endif
+ }
+
+ assert(c_end - c < SHENG_CHUNK);
+ if (c < soft_c_end) {
+ assert(soft_c_end - c < SHENG_CHUNK);
+ switch (soft_c_end - c) {
+ case 7:
+ SHENG_SINGLE_ITER;
+ case 6:
+ SHENG_SINGLE_ITER;
+ case 5:
+ SHENG_SINGLE_ITER;
+ case 4:
+ SHENG_SINGLE_ITER;
+ case 3:
+ SHENG_SINGLE_ITER;
+ case 2:
+ SHENG_SINGLE_ITER;
+ case 1:
+ SHENG_SINGLE_ITER;
+ }
+ }
+
+ assert(c >= soft_c_end);
+
+ s_gpr = movd(s);
+exit:
+ assert(c <= hard_c_end);
+ DEBUG_PRINTF("%zu from end; s %hhu\n", c_end - c, s_gpr);
+ assert(c >= soft_c_end || s_gpr >= sheng_stop_limit);
+ /* undo state adjustment to match mcclellan view */
+ if (s_gpr == sheng_limit) {
+ s_gpr = 0;
+ } else if (s_gpr < sheng_limit) {
+ s_gpr++;
+ }
+
+ *c_inout = c;
+ return s_gpr;
+}
+
+static really_inline
+const char *findShermanState(UNUSED const struct mcsheng *m,
+ const char *sherman_base_offset, u32 sherman_base,
+ u32 s) {
+ const char *rv
+ = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base);
+ assert(rv < (const char *)m + m->length - sizeof(struct NFA));
+ UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET);
+ assert(type == SHERMAN_STATE);
+ return rv;
+}
+
+static really_inline
+const u8 *run_mcsheng_accel(const struct mcsheng *m,
+ const struct mstate_aux *aux, u32 s,
+ const u8 **min_accel_offset,
+ const u8 *c, const u8 *c_end) {
+ DEBUG_PRINTF("skipping\n");
+ u32 accel_offset = aux[s].accel_offset;
+
+ assert(aux[s].accel_offset);
+ assert(accel_offset >= m->aux_offset);
+ assert(!m->sherman_offset || accel_offset < m->sherman_offset);
+
+ const union AccelAux *aaux = (const void *)((const char *)m + accel_offset);
+ const u8 *c2 = run_accel(aaux, c, c_end);
+
+ if (c2 < *min_accel_offset + BAD_ACCEL_DIST) {
+ *min_accel_offset = c2 + BIG_ACCEL_PENALTY;
+ } else {
+ *min_accel_offset = c2 + SMALL_ACCEL_PENALTY;
+ }
+
+ if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) {
+ *min_accel_offset = c_end;
+ }
+
+ DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n",
+ c2 - c, *min_accel_offset - c2, c_end - c2);
+
+ return c2;
+}
+
+static really_inline
+u32 doNormal16(const struct mcsheng *m, const u8 **c_inout, const u8 *end,
+ u32 s, char do_accel, enum MatchMode mode) {
+ const u8 *c = *c_inout;
+
+ const u16 *succ_table
+ = (const u16 *)((const char *)m + sizeof(struct mcsheng));
+ assert(ISALIGNED_N(succ_table, 2));
+ u32 sheng_end = m->sheng_end;
+ u32 sherman_base = m->sherman_limit;
+ const char *sherman_base_offset
+ = (const char *)m - sizeof(struct NFA) + m->sherman_offset;
+ u32 as = m->alphaShift;
+
+ /* Adjust start of succ table so we can index into using state id (rather
+ * than adjust to normal id). As we will not be processing states with low
+ * state ids, we will not be accessing data before the succ table. Note: due
+ * to the size of the sheng tables, the succ_table pointer will still be
+ * inside the engine.*/
+ succ_table -= sheng_end << as;
+
+ s &= STATE_MASK;
+
+ while (c < end && s >= sheng_end) {
+ u8 cprime = m->remap[*c];
+ DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c,
+ ourisprint(*c) ? *c : '?', cprime, s);
+ if (s < sherman_base) {
+ DEBUG_PRINTF("doing normal\n");
+ assert(s < m->state_count);
+ s = succ_table[(s << as) + cprime];
+ } else {
+ const char *sherman_state
+ = findShermanState(m, sherman_base_offset, sherman_base, s);
+ DEBUG_PRINTF("doing sherman (%u)\n", s);
+ s = doSherman16(sherman_state, cprime, succ_table, as);
+ }
+
+ DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK);
+ c++;
+
+ if (do_accel && (s & ACCEL_FLAG)) {
+ break;
+ }
+ if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
+ break;
+ }
+
+ s &= STATE_MASK;
+ }
+
+ *c_inout = c;
+ return s;
+}
+
+static really_inline
+char mcshengExec16_i(const struct mcsheng *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **c_final, enum MatchMode mode) {
+ assert(ISALIGNED_N(state, 2));
+ if (!len) {
+ if (mode == STOP_AT_MATCH) {
+ *c_final = buf;
+ }
+ return MO_ALIVE;
+ }
+
+ u32 s = *state;
+ const u8 *c = buf;
+ const u8 *c_end = buf + len;
+ const u8 sheng_end = m->sheng_end;
+ const struct mstate_aux *aux
+ = (const struct mstate_aux *)((const char *)m + m->aux_offset
+ - sizeof(struct NFA));
+
+ s &= STATE_MASK;
+
+ u32 cached_accept_id = 0;
+ u32 cached_accept_state = 0;
+
+ DEBUG_PRINTF("s: %u, len %zu\n", s, len);
+
+ const u8 *min_accel_offset = c;
+ if (!m->has_accel || len < ACCEL_MIN_LEN) {
+ min_accel_offset = c_end;
+ goto without_accel;
+ }
+
+ goto with_accel;
+
+without_accel:
+ do {
+ assert(c < min_accel_offset);
+ int do_accept;
+ if (!s) {
+ goto exit;
+ } else if (s < sheng_end) {
+ s = doSheng(m, &c, min_accel_offset, c_end, s, 0);
+ do_accept = mode != NO_MATCHES && get_aux(m, s)->accept;
+ } else {
+ s = doNormal16(m, &c, min_accel_offset, s, 0, mode);
+
+ do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG);
+ }
+
+ if (do_accept) {
+ if (mode == STOP_AT_MATCH) {
+ *state = s & STATE_MASK;
+ *c_final = c - 1;
+ return MO_MATCHES_PENDING;
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
+ return MO_DEAD; /* termination requested */
+ }
+ } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
+ &cached_accept_state, &cached_accept_id)
+ == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ }
+
+ assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */
+ } while (c < min_accel_offset);
+
+ if (c == c_end) {
+ goto exit;
+ }
+
+with_accel:
+ do {
+ assert(c < c_end);
+ int do_accept;
+
+ if (!s) {
+ goto exit;
+ } else if (s < sheng_end) {
+ if (s > m->sheng_accel_limit) {
+ c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end);
+ if (c == c_end) {
+ goto exit;
+ } else {
+ goto without_accel;
+ }
+ }
+ s = doSheng(m, &c, c_end, c_end, s, 1);
+ do_accept = mode != NO_MATCHES && get_aux(m, s)->accept;
+ } else {
+ if (s & ACCEL_FLAG) {
+ DEBUG_PRINTF("skipping\n");
+ s &= STATE_MASK;
+ c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end);
+ if (c == c_end) {
+ goto exit;
+ } else {
+ goto without_accel;
+ }
+ }
+
+ s = doNormal16(m, &c, c_end, s, 1, mode);
+ do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG);
+ }
+
+ if (do_accept) {
+ if (mode == STOP_AT_MATCH) {
+ *state = s & STATE_MASK;
+ *c_final = c - 1;
+ return MO_MATCHES_PENDING;
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
+ return MO_DEAD; /* termination requested */
+ }
+ } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
+ &cached_accept_state, &cached_accept_id)
+ == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ }
+
+ assert(c <= c_end);
+ } while (c < c_end);
+
+exit:
+ s &= STATE_MASK;
+
+ if (mode == STOP_AT_MATCH) {
+ *c_final = c_end;
+ }
+ *state = s;
+
+ return MO_ALIVE;
+}
+
+static never_inline
+char mcshengExec16_i_cb(const struct mcsheng *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point, CALLBACK_OUTPUT);
+}
+
+static never_inline
+char mcshengExec16_i_sam(const struct mcsheng *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point, STOP_AT_MATCH);
+}
+
+static never_inline
+char mcshengExec16_i_nm(const struct mcsheng *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point, NO_MATCHES);
+}
+
+static really_inline
+char mcshengExec16_i_ni(const struct mcsheng *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point,
+ enum MatchMode mode) {
+ if (mode == CALLBACK_OUTPUT) {
+ return mcshengExec16_i_cb(m, state, buf, len, offAdj, cb, ctxt,
+ single, final_point);
+ } else if (mode == STOP_AT_MATCH) {
+ return mcshengExec16_i_sam(m, state, buf, len, offAdj, cb, ctxt,
+ single, final_point);
+ } else {
+ assert (mode == NO_MATCHES);
+ return mcshengExec16_i_nm(m, state, buf, len, offAdj, cb, ctxt,
+ single, final_point);
+ }
+}
+
+static really_inline
+u32 doNormal8(const struct mcsheng *m, const u8 **c_inout, const u8 *end, u32 s,
+ char do_accel, enum MatchMode mode) {
+ const u8 *c = *c_inout;
+ u32 sheng_end = m->sheng_end;
+ u32 accel_limit = m->accel_limit_8;
+ u32 accept_limit = m->accept_limit_8;
+
+ const u32 as = m->alphaShift;
+ const u8 *succ_table = (const u8 *)((const char *)m
+ + sizeof(struct mcsheng));
+ /* Adjust start of succ table so we can index into using state id (rather
+ * than adjust to normal id). As we will not be processing states with low
+ * state ids, we will not be accessing data before the succ table. Note: due
+ * to the size of the sheng tables, the succ_table pointer will still be
+ * inside the engine.*/
+ succ_table -= sheng_end << as;
+
+ assert(s >= sheng_end);
+
+ while (c < end && s >= sheng_end) {
+ u8 cprime = m->remap[*c];
+ DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c,
+ ourisprint(*c) ? *c : '?', cprime);
+ s = succ_table[(s << as) + cprime];
+
+ DEBUG_PRINTF("s: %u\n", s);
+ c++;
+ if (do_accel) {
+ if (s >= accel_limit) {
+ break;
+ }
+ } else {
+ if (mode != NO_MATCHES && s >= accept_limit) {
+ break;
+ }
+ }
+ }
+ *c_inout = c;
+ return s;
+}
+
+static really_inline
+char mcshengExec8_i(const struct mcsheng *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **c_final, enum MatchMode mode) {
+ if (!len) {
+ *c_final = buf;
+ return MO_ALIVE;
+ }
+ u32 s = *state;
+ const u8 *c = buf;
+ const u8 *c_end = buf + len;
+ const u8 sheng_end = m->sheng_end;
+
+ const struct mstate_aux *aux
+ = (const struct mstate_aux *)((const char *)m + m->aux_offset
+ - sizeof(struct NFA));
+ u32 accept_limit = m->accept_limit_8;
+
+ u32 cached_accept_id = 0;
+ u32 cached_accept_state = 0;
+
+ DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit);
+
+ DEBUG_PRINTF("s: %u, len %zu\n", s, len);
+
+ const u8 *min_accel_offset = c;
+ if (!m->has_accel || len < ACCEL_MIN_LEN) {
+ min_accel_offset = c_end;
+ goto without_accel;
+ }
+
+ goto with_accel;
+
+without_accel:
+ do {
+ assert(c < min_accel_offset);
+ if (!s) {
+ goto exit;
+ } else if (s < sheng_end) {
+ s = doSheng(m, &c, min_accel_offset, c_end, s, 0);
+ } else {
+ s = doNormal8(m, &c, min_accel_offset, s, 0, mode);
+ assert(c <= min_accel_offset);
+ }
+
+ if (mode != NO_MATCHES && s >= accept_limit) {
+ if (mode == STOP_AT_MATCH) {
+ DEBUG_PRINTF("match - pausing\n");
+ *state = s;
+ *c_final = c - 1;
+ return MO_MATCHES_PENDING;
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ } else if (doComplexReport(cb, ctxt, m, s, loc, 0,
+ &cached_accept_state, &cached_accept_id)
+ == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ }
+
+ assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */
+ } while (c < min_accel_offset);
+
+ if (c == c_end) {
+ goto exit;
+ }
+
+with_accel:
+ do {
+ u32 accel_limit = m->accel_limit_8;
+
+ assert(c < c_end);
+ if (!s) {
+ goto exit;
+ } else if (s < sheng_end) {
+ if (s > m->sheng_accel_limit) {
+ c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end);
+ if (c == c_end) {
+ goto exit;
+ } else {
+ goto without_accel;
+ }
+ }
+ s = doSheng(m, &c, c_end, c_end, s, 1);
+ } else {
+ if (s >= accel_limit && aux[s].accel_offset) {
+ c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end);
+ if (c == c_end) {
+ goto exit;
+ } else {
+ goto without_accel;
+ }
+ }
+ s = doNormal8(m, &c, c_end, s, 1, mode);
+ }
+
+ if (mode != NO_MATCHES && s >= accept_limit) {
+ if (mode == STOP_AT_MATCH) {
+ DEBUG_PRINTF("match - pausing\n");
+ *state = s;
+ *c_final = c - 1;
+ return MO_MATCHES_PENDING;
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ } else if (doComplexReport(cb, ctxt, m, s, loc, 0,
+ &cached_accept_state, &cached_accept_id)
+ == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ }
+
+ assert(c <= c_end);
+ } while (c < c_end);
+
+exit:
+ *state = s;
+ if (mode == STOP_AT_MATCH) {
+ *c_final = c_end;
+ }
+ return MO_ALIVE;
+}
+
+static never_inline
+char mcshengExec8_i_cb(const struct mcsheng *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point, CALLBACK_OUTPUT);
+}
+
+static never_inline
+char mcshengExec8_i_sam(const struct mcsheng *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point, STOP_AT_MATCH);
+}
+
+static never_inline
+char mcshengExec8_i_nm(const struct mcsheng *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point, NO_MATCHES);
+}
+
+static really_inline
+char mcshengExec8_i_ni(const struct mcsheng *m, u32 *state, const u8 *buf,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point,
+ enum MatchMode mode) {
+ if (mode == CALLBACK_OUTPUT) {
+ return mcshengExec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point);
+ } else if (mode == STOP_AT_MATCH) {
+ return mcshengExec8_i_sam(m, state, buf, len, offAdj, cb, ctxt,
+ single, final_point);
+ } else {
+ assert(mode == NO_MATCHES);
+ return mcshengExec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point);
+ }
+}
+
+static really_inline
+char mcshengCheckEOD(const struct NFA *nfa, u32 s, u64a offset,
+ NfaCallback cb, void *ctxt) {
+ const struct mcsheng *m = getImplNfa(nfa);
+ const struct mstate_aux *aux = get_aux(m, s);
+
+ if (!aux->accept_eod) {
+ return MO_CONTINUE_MATCHING;
+ }
+ return doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL);
+}
+
+static really_inline
+char nfaExecMcSheng16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
+ const u8 *hend, NfaCallback cb, void *context,
+ struct mq *q, char single, s64a end,
+ enum MatchMode mode) {
+ assert(n->type == MCSHENG_NFA_16);
+ const struct mcsheng *m = getImplNfa(n);
+ s64a sp;
+
+ assert(ISALIGNED_N(q->state, 2));
+ u32 s = *(u16 *)q->state;
+
+ if (q->report_current) {
+ assert(s);
+ assert(get_aux(m, s)->accept);
+
+ int rv;
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ rv = cb(0, q_cur_offset(q), m->arb_report, context);
+ } else {
+ u32 cached_accept_id = 0;
+ u32 cached_accept_state = 0;
+
+ rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0,
+ &cached_accept_state, &cached_accept_id);
+ }
+
+ q->report_current = 0;
+
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ }
+
+ sp = q_cur_loc(q);
+ q->cur++;
+
+ const u8 *cur_buf = sp < 0 ? hend : buffer;
+
+ assert(q->cur);
+ if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u16 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ while (1) {
+ assert(q->cur < q->end);
+ s64a ep = q->items[q->cur].location;
+ if (mode != NO_MATCHES) {
+ ep = MIN(ep, end);
+ }
+
+ assert(ep >= sp);
+
+ s64a local_ep = ep;
+ if (sp < 0) {
+ local_ep = MIN(0, ep);
+ }
+
+ /* do main buffer region */
+ const u8 *final_look;
+ char rv = mcshengExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp,
+ offset + sp, cb, context, single,
+ &final_look, mode);
+ if (rv == MO_DEAD) {
+ *(u16 *)q->state = 0;
+ return MO_DEAD;
+ }
+ if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
+
+ assert(q->cur);
+ assert(final_look != cur_buf + local_ep);
+
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = final_look - cur_buf + 1; /* due to
+ * early -1 */
+ *(u16 *)q->state = s;
+ return MO_MATCHES_PENDING;
+ }
+
+ assert(rv == MO_ALIVE);
+ assert(q->cur);
+ if (mode != NO_MATCHES && q->items[q->cur].location > end) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u16 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ sp = local_ep;
+
+ if (sp == 0) {
+ cur_buf = buffer;
+ }
+
+ if (sp != ep) {
+ continue;
+ }
+
+ switch (q->items[q->cur].type) {
+ case MQE_TOP:
+ assert(sp + offset || !s);
+ if (sp + offset == 0) {
+ s = m->start_anchored;
+ break;
+ }
+ s = mcshengEnableStarts(m, s);
+ break;
+ case MQE_END:
+ *(u16 *)q->state = s;
+ q->cur++;
+ return s ? MO_ALIVE : MO_DEAD;
+ default:
+ assert(!"invalid queue event");
+ }
+
+ q->cur++;
+ }
+}
+
+static really_inline
+char nfaExecMcSheng8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
+ const u8 *hend, NfaCallback cb, void *context,
+ struct mq *q, char single, s64a end,
+ enum MatchMode mode) {
+ assert(n->type == MCSHENG_NFA_8);
+ const struct mcsheng *m = getImplNfa(n);
+ s64a sp;
+
+ u32 s = *(u8 *)q->state;
+
+ if (q->report_current) {
+ assert(s);
+ assert(s >= m->accept_limit_8);
+
+ int rv;
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ rv = cb(0, q_cur_offset(q), m->arb_report, context);
+ } else {
+ u32 cached_accept_id = 0;
+ u32 cached_accept_state = 0;
+
+ rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0,
+ &cached_accept_state, &cached_accept_id);
+ }
+
+ q->report_current = 0;
+
+ if (rv == MO_HALT_MATCHING) {
+ return MO_DEAD;
+ }
+ }
+
+ sp = q_cur_loc(q);
+ q->cur++;
+
+ const u8 *cur_buf = sp < 0 ? hend : buffer;
+
+ if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u8 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ while (1) {
+ DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" :
+ q->items[q->cur].type == MQE_END ? "END" : "???",
+ q->items[q->cur].location + offset);
+ assert(q->cur < q->end);
+ s64a ep = q->items[q->cur].location;
+ if (mode != NO_MATCHES) {
+ ep = MIN(ep, end);
+ }
+
+ assert(ep >= sp);
+
+ s64a local_ep = ep;
+ if (sp < 0) {
+ local_ep = MIN(0, ep);
+ }
+
+ const u8 *final_look;
+ char rv = mcshengExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp,
+ offset + sp, cb, context, single,
+ &final_look, mode);
+ if (rv == MO_HALT_MATCHING) {
+ *(u8 *)q->state = 0;
+ return MO_DEAD;
+ }
+ if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
+
+ assert(q->cur);
+ assert(final_look != cur_buf + local_ep);
+
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = final_look - cur_buf + 1; /* due to
+ * early -1 */
+ *(u8 *)q->state = s;
+ return MO_MATCHES_PENDING;
+ }
+
+ assert(rv == MO_ALIVE);
+ assert(q->cur);
+ if (mode != NO_MATCHES && q->items[q->cur].location > end) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ assert(q->cur);
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u8 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ sp = local_ep;
+
+ if (sp == 0) {
+ cur_buf = buffer;
+ }
+
+ if (sp != ep) {
+ continue;
+ }
+
+ switch (q->items[q->cur].type) {
+ case MQE_TOP:
+ assert(sp + offset || !s);
+ if (sp + offset == 0) {
+ s = (u8)m->start_anchored;
+ break;
+ }
+ s = mcshengEnableStarts(m, s);
+ break;
+ case MQE_END:
+ *(u8 *)q->state = s;
+ q->cur++;
+ return s ? MO_ALIVE : MO_DEAD;
+ default:
+ assert(!"invalid queue event");
+ }
+
+ q->cur++;
+ }
+}
+
+char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCSHENG_NFA_8);
+ const struct mcsheng *m = getImplNfa(n);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCSHENG_FLAG_SINGLE, end,
+ CALLBACK_OUTPUT);
+}
+
+char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCSHENG_NFA_16);
+ const struct mcsheng *m = getImplNfa(n);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCSHENG_FLAG_SINGLE, end,
+ CALLBACK_OUTPUT);
+}
+
+char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q) {
+ const struct mcsheng *m = getImplNfa(n);
+ NfaCallback cb = q->cb;
+ void *ctxt = q->context;
+ u32 s = *(u8 *)q->state;
+ u8 single = m->flags & MCSHENG_FLAG_SINGLE;
+ u64a offset = q_cur_offset(q);
+ assert(q_cur_type(q) == MQE_START);
+ assert(s);
+
+ if (s >= m->accept_limit_8) {
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ cb(0, offset, m->arb_report, ctxt);
+ } else {
+ u32 cached_accept_id = 0;
+ u32 cached_accept_state = 0;
+
+ doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state,
+ &cached_accept_id);
+ }
+ }
+
+ return 0;
+}
+
+char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q) {
+ const struct mcsheng *m = getImplNfa(n);
+ NfaCallback cb = q->cb;
+ void *ctxt = q->context;
+ u32 s = *(u16 *)q->state;
+ const struct mstate_aux *aux = get_aux(m, s);
+ u8 single = m->flags & MCSHENG_FLAG_SINGLE;
+ u64a offset = q_cur_offset(q);
+ assert(q_cur_type(q) == MQE_START);
+ DEBUG_PRINTF("state %u\n", s);
+ assert(s);
+
+ if (aux->accept) {
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ cb(0, offset, m->arb_report, ctxt);
+ } else {
+ u32 cached_accept_id = 0;
+ u32 cached_accept_state = 0;
+
+ doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state,
+ &cached_accept_id);
+ }
+ }
+
+ return 0;
+}
+
+static
+char mcshengHasAccept(const struct mcsheng *m, const struct mstate_aux *aux,
+ ReportID report) {
+ assert(m && aux);
+
+ if (!aux->accept) {
+ return 0;
+ }
+
+ const struct report_list *rl = (const struct report_list *)
+ ((const char *)m + aux->accept - sizeof(struct NFA));
+ assert(ISALIGNED_N(rl, 4));
+
+ DEBUG_PRINTF("report list has %u entries\n", rl->count);
+
+ for (u32 i = 0; i < rl->count; i++) {
+ if (rl->report[i] == report) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q) {
+ assert(n && q);
+
+ const struct mcsheng *m = getImplNfa(n);
+ u8 s = *(u8 *)q->state;
+ DEBUG_PRINTF("checking accepts for %hhu\n", s);
+
+ return mcshengHasAccept(m, get_aux(m, s), report);
+}
+
+char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q) {
+ assert(n && q);
+
+ const struct mcsheng *m = getImplNfa(n);
+ u8 s = *(u8 *)q->state;
+ DEBUG_PRINTF("checking accepts for %hhu\n", s);
+
+ return !!get_aux(m, s)->accept;
+}
+
+char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q) {
+ assert(n && q);
+
+ const struct mcsheng *m = getImplNfa(n);
+ u16 s = *(u16 *)q->state;
+ DEBUG_PRINTF("checking accepts for %hu\n", s);
+
+ return mcshengHasAccept(m, get_aux(m, s), report);
+}
+
+char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q) {
+ assert(n && q);
+
+ const struct mcsheng *m = getImplNfa(n);
+ u16 s = *(u16 *)q->state;
+ DEBUG_PRINTF("checking accepts for %hu\n", s);
+
+ return !!get_aux(m, s)->accept;
+}
+
+char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCSHENG_NFA_8);
+ const struct mcsheng *m = getImplNfa(n);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCSHENG_FLAG_SINGLE, end,
+ STOP_AT_MATCH);
+}
+
+char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCSHENG_NFA_16);
+ const struct mcsheng *m = getImplNfa(n);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCSHENG_FLAG_SINGLE, end,
+ STOP_AT_MATCH);
+}
+
+char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCSHENG_NFA_8);
+ const struct mcsheng *m = getImplNfa(n);
+ const u8 *hend = q->history + q->hlength;
+
+ char rv = nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCSHENG_FLAG_SINGLE, 0 /* end */,
+ NO_MATCHES);
+ if (rv && nfaExecMcSheng8_inAccept(n, report, q)) {
+ return MO_MATCHES_PENDING;
+ } else {
+ return rv;
+ }
+}
+
+char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCSHENG_NFA_16);
+ const struct mcsheng *m = getImplNfa(n);
+ const u8 *hend = q->history + q->hlength;
+
+ char rv = nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCSHENG_FLAG_SINGLE, 0 /* end */,
+ NO_MATCHES);
+
+ if (rv && nfaExecMcSheng16_inAccept(n, report, q)) {
+ return MO_MATCHES_PENDING;
+ } else {
+ return rv;
+ }
+}
+
+char nfaExecMcSheng8_initCompressedState(const struct NFA *nfa, u64a offset,
+ void *state, UNUSED u8 key) {
+ const struct mcsheng *m = getImplNfa(nfa);
+ u8 s = offset ? m->start_floating : m->start_anchored;
+ if (s) {
+ *(u8 *)state = s;
+ return 1;
+ }
+ return 0;
+}
+
+char nfaExecMcSheng16_initCompressedState(const struct NFA *nfa, u64a offset,
+ void *state, UNUSED u8 key) {
+ const struct mcsheng *m = getImplNfa(nfa);
+ u16 s = offset ? m->start_floating : m->start_anchored;
+ if (s) {
+ unaligned_store_u16(state, s);
+ return 1;
+ }
+ return 0;
+}
+
+char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state,
+ UNUSED const char *streamState, u64a offset,
+ NfaCallback callback, void *context) {
+ return mcshengCheckEOD(nfa, *(const u8 *)state, offset, callback,
+ context);
+}
+
+char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state,
+ UNUSED const char *streamState, u64a offset,
+ NfaCallback callback, void *context) {
+ assert(ISALIGNED_N(state, 2));
+ return mcshengCheckEOD(nfa, *(const u16 *)state, offset, callback,
+ context);
+}
+
+char nfaExecMcSheng8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
+ assert(nfa->scratchStateSize == 1);
+ *(u8 *)q->state = 0;
+ return 0;
+}
+
+char nfaExecMcSheng16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
+ assert(nfa->scratchStateSize == 2);
+ assert(ISALIGNED_N(q->state, 2));
+ *(u16 *)q->state = 0;
+ return 0;
+}
+
+char nfaExecMcSheng8_queueCompressState(UNUSED const struct NFA *nfa,
+ const struct mq *q, UNUSED s64a loc) {
+ void *dest = q->streamState;
+ const void *src = q->state;
+ assert(nfa->scratchStateSize == 1);
+ assert(nfa->streamStateSize == 1);
+ *(u8 *)dest = *(const u8 *)src;
+ return 0;
+}
+
+char nfaExecMcSheng8_expandState(UNUSED const struct NFA *nfa, void *dest,
+ const void *src, UNUSED u64a offset,
+ UNUSED u8 key) {
+ assert(nfa->scratchStateSize == 1);
+ assert(nfa->streamStateSize == 1);
+ *(u8 *)dest = *(const u8 *)src;
+ return 0;
+}
+
+char nfaExecMcSheng16_queueCompressState(UNUSED const struct NFA *nfa,
+ const struct mq *q,
+ UNUSED s64a loc) {
+ void *dest = q->streamState;
+ const void *src = q->state;
+ assert(nfa->scratchStateSize == 2);
+ assert(nfa->streamStateSize == 2);
+ assert(ISALIGNED_N(src, 2));
+ unaligned_store_u16(dest, *(const u16 *)(src));
+ return 0;
+}
+
+char nfaExecMcSheng16_expandState(UNUSED const struct NFA *nfa, void *dest,
+ const void *src, UNUSED u64a offset,
+ UNUSED u8 key) {
+ assert(nfa->scratchStateSize == 2);
+ assert(nfa->streamStateSize == 2);
+ assert(ISALIGNED_N(dest, 2));
+ *(u16 *)dest = unaligned_load_u16(src);
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MCSHENG_H
+#define MCSHENG_H
+
+#include "callback.h"
+#include "ue2common.h"
+
+struct mq;
+struct NFA;
+
+/* 8-bit Sheng-McClellan hybrid */
+
+char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state,
+ const char *streamState, u64a offset,
+ NfaCallback callback, void *context);
+char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q);
+char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q);
+char nfaExecMcSheng8_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecMcSheng8_initCompressedState(const struct NFA *n, u64a offset,
+ void *state, u8 key);
+char nfaExecMcSheng8_queueCompressState(const struct NFA *nfa,
+ const struct mq *q, s64a loc);
+char nfaExecMcSheng8_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+
+#define nfaExecMcSheng8_B_Reverse NFA_API_NO_IMPL
+#define nfaExecMcSheng8_zombie_status NFA_API_ZOMBIE_NO_IMPL
+
+/* 16-bit Sheng-McClellan hybrid */
+
+char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state,
+ const char *streamState, u64a offset,
+ NfaCallback callback, void *context);
+char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q);
+char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q);
+char nfaExecMcSheng16_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecMcSheng16_initCompressedState(const struct NFA *n, u64a offset,
+ void *state, u8 key);
+char nfaExecMcSheng16_queueCompressState(const struct NFA *nfa,
+ const struct mq *q, s64a loc);
+char nfaExecMcSheng16_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+
+#define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL
+#define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mcsheng_compile.h"
+
+#include "accel.h"
+#include "accelcompile.h"
+#include "grey.h"
+#include "mcclellancompile.h"
+#include "mcclellancompile_util.h"
+#include "mcsheng_internal.h"
+#include "nfa_internal.h"
+#include "rdfa_graph.h"
+#include "shufticompile.h"
+#include "trufflecompile.h"
+#include "ue2common.h"
+#include "util/alloc.h"
+#include "util/bitutils.h"
+#include "util/charreach.h"
+#include "util/compare.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+#include "util/order_check.h"
+#include "util/report_manager.h"
+#include "util/ue2_containers.h"
+#include "util/unaligned.h"
+#include "util/verify_types.h"
+
+#include <algorithm>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <map>
+#include <memory>
+#include <set>
+#include <deque>
+#include <vector>
+
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_keys;
+
+namespace ue2 {
+
+namespace /* anon */ {
+
+#define MIN_SHENG_SIZE 6
+#define INVALID_SHENG_ID 255
+
+struct dstate_extra {
+ u16 daddytaken = 0;
+ bool shermanState = false;
+ bool sheng_succ = false;
+ u8 sheng_id = INVALID_SHENG_ID;
+};
+
+struct dfa_info {
+ accel_dfa_build_strat &strat;
+ raw_dfa &raw;
+ vector<dstate> &states;
+ vector<dstate_extra> extra;
+ const u16 alpha_size; /* including special symbols */
+ const array<u16, ALPHABET_SIZE> &alpha_remap;
+ vector<CharReach> rev_alpha;
+ const u16 impl_alpha_size;
+
+ u8 getAlphaShift() const;
+
+ explicit dfa_info(accel_dfa_build_strat &s)
+ : strat(s),
+ raw(s.get_raw()),
+ states(raw.states),
+ extra(raw.states.size()),
+ alpha_size(raw.alpha_size),
+ alpha_remap(raw.alpha_remap),
+ impl_alpha_size(raw.getImplAlphaSize()) {
+ rev_alpha.resize(impl_alpha_size);
+ for (u32 i = 0; i < N_CHARS; i++) {
+ rev_alpha[alpha_remap[i]].set(i);
+ }
+ }
+
+ dstate_id_t implId(dstate_id_t raw_id) const {
+ return states[raw_id].impl_id;
+ }
+
+ bool is_sherman(dstate_id_t raw_id) const {
+ return extra[raw_id].shermanState;
+ }
+
+ bool is_sheng(dstate_id_t raw_id) const {
+ return extra[raw_id].sheng_id != INVALID_SHENG_ID;
+ }
+
+ bool is_sheng_succ(dstate_id_t raw_id) const {
+ return extra[raw_id].sheng_succ;
+ }
+
+ /* states which use the normal transition/successor table */
+ bool is_normal(dstate_id_t raw_id) const {
+ return raw_id != DEAD_STATE && !is_sheng(raw_id) && !is_sherman(raw_id);
+ }
+ size_t size(void) const { return states.size(); }
+};
+
+u8 dfa_info::getAlphaShift() const {
+ if (impl_alpha_size < 2) {
+ return 1;
+ } else {
+ /* log2 round up */
+ return 32 - clz32(impl_alpha_size - 1);
+ }
+}
+
+} // namespace
+
+static
+mstate_aux *getAux(NFA *n, dstate_id_t i) {
+ mcsheng *m = (mcsheng *)getMutableImplNfa(n);
+ mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
+
+ mstate_aux *aux = aux_base + i;
+ assert((const char *)aux < (const char *)n + m->length);
+ return aux;
+}
+
+static
+void createShuffleMasks(mcsheng *m, const dfa_info &info,
+ dstate_id_t sheng_end,
+ const map<dstate_id_t, AccelScheme> &accel_escape_info) {
+ DEBUG_PRINTF("using first %hu states for a sheng\n", sheng_end);
+ assert(sheng_end > DEAD_STATE + 1);
+ assert(sheng_end <= sizeof(m128) + 1);
+ vector<array<u8, sizeof(m128)>> masks;
+ masks.resize(info.alpha_size);
+ /* -1 to avoid wasting a slot as we do not include dead state */
+ vector<dstate_id_t> raw_ids;
+ raw_ids.resize(sheng_end - 1);
+ for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) {
+ assert(info.implId(s)); /* should not map to DEAD_STATE */
+ if (info.is_sheng(s)) {
+ raw_ids[info.extra[s].sheng_id] = s;
+ }
+ }
+ for (u32 i = 0; i < info.alpha_size; i++) {
+ if (i == info.alpha_remap[TOP]) {
+ continue;
+ }
+ auto &mask = masks[i];
+ assert(sizeof(mask) == sizeof(m128));
+ mask.fill(0);
+
+ for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) {
+ dstate_id_t raw_id = raw_ids[sheng_id];
+ dstate_id_t next_id = info.implId(info.states[raw_id].next[i]);
+ if (next_id == DEAD_STATE) {
+ next_id = sheng_end - 1;
+ } else if (next_id < sheng_end) {
+ next_id--;
+ }
+ DEBUG_PRINTF("%hu: %u->next %hu\n", sheng_id, i, next_id);
+ mask[sheng_id] = verify_u8(next_id);
+ }
+ }
+ for (u32 i = 0; i < N_CHARS; i++) {
+ assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
+ m->sheng_masks[i] = loadu128(masks[info.alpha_remap[i]].data());
+ }
+ m->sheng_end = sheng_end;
+ m->sheng_accel_limit = sheng_end - 1;
+
+ for (dstate_id_t s : raw_ids) {
+ if (contains(accel_escape_info, s)) {
+ LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id);
+ }
+ }
+}
+
+static
+void populateBasicInfo(size_t state_size, const dfa_info &info,
+ u32 total_size, u32 aux_offset, u32 accel_offset,
+ u32 accel_count, ReportID arb, bool single, NFA *nfa) {
+ assert(state_size == sizeof(u16) || state_size == sizeof(u8));
+
+ nfa->length = total_size;
+ nfa->nPositions = info.states.size();
+
+ nfa->scratchStateSize = verify_u32(state_size);
+ nfa->streamStateSize = verify_u32(state_size);
+
+ if (state_size == sizeof(u8)) {
+ nfa->type = MCSHENG_NFA_8;
+ } else {
+ nfa->type = MCSHENG_NFA_16;
+ }
+
+ mcsheng *m = (mcsheng *)getMutableImplNfa(nfa);
+ for (u32 i = 0; i < 256; i++) {
+ m->remap[i] = verify_u8(info.alpha_remap[i]);
+ }
+ m->alphaShift = info.getAlphaShift();
+ m->length = total_size;
+ m->aux_offset = aux_offset;
+ m->accel_offset = accel_offset;
+ m->arb_report = arb;
+ m->state_count = verify_u16(info.size());
+ m->start_anchored = info.implId(info.raw.start_anchored);
+ m->start_floating = info.implId(info.raw.start_floating);
+ m->has_accel = accel_count ? 1 : 0;
+
+ if (single) {
+ m->flags |= MCSHENG_FLAG_SINGLE;
+ }
+}
+
+namespace {
+
+struct raw_report_list {
+ flat_set<ReportID> reports;
+
+ raw_report_list(const flat_set<ReportID> &reports_in,
+ const ReportManager &rm, bool do_remap) {
+ if (do_remap) {
+ for (auto &id : reports_in) {
+ reports.insert(rm.getProgramOffset(id));
+ }
+ } else {
+ reports = reports_in;
+ }
+ }
+
+ bool operator<(const raw_report_list &b) const {
+ return reports < b.reports;
+ }
+};
+
+struct raw_report_info_impl : public raw_report_info {
+ vector<raw_report_list> rl;
+ u32 getReportListSize() const override;
+ size_t size() const override;
+ void fillReportLists(NFA *n, size_t base_offset,
+ std::vector<u32> &ro /* out */) const override;
+};
+}
+
+u32 raw_report_info_impl::getReportListSize() const {
+ u32 rv = 0;
+
+ for (const auto &reps : rl) {
+ rv += sizeof(report_list);
+ rv += sizeof(ReportID) * reps.reports.size();
+ }
+
+ return rv;
+}
+
+size_t raw_report_info_impl::size() const {
+ return rl.size();
+}
+
+void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
+ vector<u32> &ro) const {
+ for (const auto &reps : rl) {
+ ro.push_back(base_offset);
+
+ report_list *p = (report_list *)((char *)n + base_offset);
+
+ u32 i = 0;
+ for (const ReportID report : reps.reports) {
+ p->report[i++] = report;
+ }
+ p->count = verify_u32(reps.reports.size());
+
+ base_offset += sizeof(report_list);
+ base_offset += sizeof(ReportID) * reps.reports.size();
+ }
+}
+
+static
+void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
+ set<dstate_id_t> *accel_states) {
+ for (dstate_id_t i : accel_escape_info | map_keys) {
+ accel_states->insert(i);
+ }
+}
+
+static
+size_t calcShermanRegionSize(const dfa_info &info) {
+ size_t rv = 0;
+
+ for (size_t i = 0; i < info.size(); i++) {
+ if (info.is_sherman(i)) {
+ rv += SHERMAN_FIXED_SIZE;
+ }
+ }
+
+ return ROUNDUP_16(rv);
+}
+
+static
+void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
+ const vector<u32> &reports, const vector<u32> &reports_eod,
+ const vector<u32> &reportOffsets) {
+ const dstate &raw_state = info.states[i];
+ aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]];
+ aux->accept_eod = raw_state.reports_eod.empty() ? 0
+ : reportOffsets[reports_eod[i]];
+ aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]]
+ : info.raw.start_floating);
+}
+
+/* returns false on error */
+static
+bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end,
+ dstate_id_t *sherman_base) {
+ info.states[0].impl_id = 0; /* dead is always 0 */
+
+ vector<dstate_id_t> norm;
+ vector<dstate_id_t> sherm;
+ vector<dstate_id_t> norm_sheng_succ;
+ vector<dstate_id_t> sherm_sheng_succ;
+
+ if (info.size() > (1 << 16)) {
+ DEBUG_PRINTF("too many states\n");
+ *sherman_base = 0;
+ return false;
+ }
+
+ for (u32 i = 1; i < info.size(); i++) {
+ if (info.is_sheng(i)) {
+ continue; /* sheng impl ids have already been allocated */
+ } if (info.is_sherman(i)) {
+ if (info.is_sheng_succ(i)) {
+ sherm_sheng_succ.push_back(i);
+ } else {
+ sherm.push_back(i);
+ }
+ } else {
+ if (info.is_sheng_succ(i)) {
+ norm_sheng_succ.push_back(i);
+ } else {
+ norm.push_back(i);
+ }
+ }
+ }
+
+ dstate_id_t next_norm = sheng_end;
+ for (dstate_id_t s : norm_sheng_succ) {
+ info.states[s].impl_id = next_norm++;
+ }
+ if (next_norm + norm.size() + sherm_sheng_succ.size() > UINT8_MAX) {
+ /* we need to give sheng_succs ids which fit into a u8 -- demote these
+ * to normal states */
+ for (dstate_id_t s : sherm_sheng_succ) {
+ info.states[s].impl_id = next_norm++;
+ info.extra[s].shermanState = false;
+ }
+ sherm_sheng_succ.clear();
+ }
+ for (dstate_id_t s : norm) {
+ info.states[s].impl_id = next_norm++;
+ }
+
+ *sherman_base = next_norm;
+ dstate_id_t next_sherman = next_norm;
+
+ for (dstate_id_t s : sherm_sheng_succ) {
+ info.states[s].impl_id = next_sherman++;
+ }
+
+ for (dstate_id_t s : sherm) {
+ info.states[s].impl_id = next_sherman++;
+ }
+
+ /* Check to see if we haven't over allocated our states */
+ DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman,
+ (dstate_id_t)(next_sherman & STATE_MASK));
+ return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK);
+}
+
+typedef RdfaGraph::vertex_descriptor RdfaVertex;
+
+static
+bool mark_sheng_succs(const RdfaGraph &g, dfa_info &info,
+ const flat_set<RdfaVertex> &sheng_states) {
+ u32 exit_count = 0;
+
+ for (auto v : sheng_states) {
+ dstate_id_t s = g[v].index;
+ for (u32 i = 0; i != info.alpha_size; i++) {
+ if (i == info.alpha_remap[TOP]) {
+ continue;
+ }
+ dstate_id_t next = info.states[s].next[i];
+ if (!next || info.is_sheng(next) || info.is_sheng_succ(next)) {
+ continue;
+ }
+ exit_count++;
+ info.extra[next].sheng_succ = true;
+ }
+ }
+
+ if (exit_count + sheng_states.size() < UINT8_MAX) {
+ return true;
+ } else {
+ DEBUG_PRINTF("fail: unable to fit %u exits in byte", exit_count);
+ return false;
+ }
+}
+
+static
+CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) {
+ CharReach rv;
+ for (u32 i = 0; i < info.impl_alpha_size; i++) {
+ if (info.raw.states[u].next[i] == v) {
+ assert(info.rev_alpha[i].any());
+ rv |= info.rev_alpha[i];
+ }
+ }
+ assert(rv.any());
+ return rv;
+}
+
+#define MAX_SHENG_STATES 16
+#define MAX_SHENG_LEAKINESS 0.05
+
+/**
+ * Returns the proportion of strings of length 'depth' which will leave the
+ * sheng region when starting at state 'u'.
+ */
+static
+double leakiness(const RdfaGraph &g, dfa_info &info,
+ const flat_set<RdfaVertex> &sheng_states, RdfaVertex u,
+ u32 depth,
+ unordered_map<pair<RdfaVertex, u32>, double> &cache) {
+ double rv = 0;
+ if (contains(cache, make_pair(u, depth))) {
+ return cache[make_pair(u, depth)];
+ }
+ for (RdfaVertex v : adjacent_vertices_range(u, g)) {
+ if (g[v].index == DEAD_STATE) {
+ continue;
+ }
+ double width = get_edge_reach(g[u].index, g[v].index, info).count();
+ width /= N_CHARS;
+
+ double weight;
+ if (!contains(sheng_states, v)) {
+ weight = 1;
+ } else if (depth > 1) {
+ weight = leakiness(g, info, sheng_states, v, depth - 1, cache);
+ } else {
+ continue; /* weight = 0 */
+ }
+ rv += width * weight;
+ }
+
+ cache[make_pair(u, depth)] = rv;
+ DEBUG_PRINTF("%zu [%u] q = %g\n", g[u].index, depth, rv);
+ return rv;
+}
+
+/**
+ * Returns the proportion of 8 byte strings which will leave the sheng region
+ * when starting at state 'u'.
+ */
+static
+double leakiness(const RdfaGraph &g, dfa_info &info,
+ const flat_set<RdfaVertex> &sheng_states, RdfaVertex u) {
+ unordered_map<pair<RdfaVertex, u32>, double> cache;
+ double rv = leakiness(g, info, sheng_states, u, 8, cache);
+ return rv;
+}
+
+static
+dstate_id_t find_sheng_states(dfa_info &info,
+ map<dstate_id_t, AccelScheme> &accel_escape_info) {
+ RdfaGraph g(info.raw);
+ auto cyclics = find_vertices_in_cycles(g);
+
+ auto base_cyclic = RdfaGraph::null_vertex();
+ for (const auto &v : cyclics) {
+ if (g[v].index == DEAD_STATE) {
+ continue;
+ }
+ DEBUG_PRINTF("considering cyclic %zu\n", g[v].index);
+ /* get an estimate of stickness of the cyclic: assume any edges from
+ * states with larger state ids are back edges */
+ CharReach est_back_reach;
+ for (const auto &u : inv_adjacent_vertices_range(v, g)) {
+ if (g[u].index < g[v].index) {
+ continue;
+ }
+ est_back_reach |= get_edge_reach(g[u].index, g[v].index, info);
+ }
+
+ if (est_back_reach.count() < 30) {
+ continue;
+ }
+ base_cyclic = v;
+ break;
+ }
+ if (!base_cyclic) {
+ return DEAD_STATE;
+ }
+
+ flat_set<RdfaVertex> sheng_states;
+ deque<RdfaVertex> to_consider = { base_cyclic };
+ flat_set<dstate_id_t> considered = { DEAD_STATE };
+ bool seen_back_edge = false;
+ while (!to_consider.empty()
+ && sheng_states.size() < MAX_SHENG_STATES) {
+ auto v = to_consider.front();
+ to_consider.pop_front();
+ if (!considered.insert(g[v].index).second) {
+ continue;
+ }
+
+ assert(!contains(sheng_states, v));
+
+ if (generates_callbacks(info.raw.kind)
+ && !info.states[g[v].index].reports.empty()) {
+ /* cannot raise callbacks from sheng region */
+ continue;
+ }
+
+ sheng_states.insert(v);
+ for (const auto &t : adjacent_vertices_range(v, g)) {
+ if (!contains(considered, g[t].index)) {
+ to_consider.push_back(t);
+ }
+ if (t == base_cyclic) {
+ seen_back_edge = true;
+ }
+ }
+ }
+
+ /* allocate normal ids */
+ dstate_id_t sheng_end = DEAD_STATE + 1;
+ for (auto v : sheng_states) {
+ dstate_id_t s = g[v].index;
+ if (!contains(accel_escape_info, s)) {
+ info.states[s].impl_id = sheng_end++;
+ info.extra[s].sheng_id = info.states[s].impl_id - 1;
+ }
+ }
+
+ /* allocate accel ids */
+ for (auto v : sheng_states) {
+ dstate_id_t s = g[v].index;
+ if (contains(accel_escape_info, s)) {
+ assert(!info.states[s].impl_id);
+ info.states[s].impl_id = sheng_end++;
+ info.extra[s].sheng_id = info.states[s].impl_id - 1;
+ }
+ }
+
+ if (sheng_states.size() < MIN_SHENG_SIZE) {
+ DEBUG_PRINTF("sheng region too small\n");
+ return DEAD_STATE;
+ }
+
+ if (!seen_back_edge) {
+ DEBUG_PRINTF("did not include cyclic\n");
+ return DEAD_STATE;
+ }
+
+ double leak = leakiness(g, info, sheng_states, base_cyclic);
+ if (leak > MAX_SHENG_LEAKINESS) {
+ DEBUG_PRINTF("too leaky (%g)\n", leak);
+ return DEAD_STATE;
+ }
+
+ if (!mark_sheng_succs(g, info, sheng_states)) {
+ return DEAD_STATE;
+ }
+
+ /* TODO: ensure sufficiently 'sticky' */
+ /* TODO: check not all states accel */
+ DEBUG_PRINTF("sheng_end = %hu\n", sheng_end);
+ return sheng_end;
+}
+
+static
+void fill_in_aux_info(NFA *nfa, const dfa_info &info,
+ const map<dstate_id_t, AccelScheme> &accel_escape_info,
+ u32 accel_offset, UNUSED u32 accel_end_offset,
+ const vector<u32> &reports,
+ const vector<u32> &reports_eod,
+ u32 report_base_offset,
+ const raw_report_info &ri) {
+ mcsheng *m = (mcsheng *)getMutableImplNfa(nfa);
+
+ vector<u32> reportOffsets;
+
+ ri.fillReportLists(nfa, report_base_offset, reportOffsets);
+
+ for (u32 i = 0; i < info.size(); i++) {
+ u16 impl_id = info.implId(i);
+ mstate_aux *this_aux = getAux(nfa, impl_id);
+
+ fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets);
+ if (contains(accel_escape_info, i)) {
+ this_aux->accel_offset = accel_offset;
+ accel_offset += info.strat.accelSize();
+ assert(accel_offset <= accel_end_offset);
+ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
+ info.strat.buildAccel(i, accel_escape_info.at(i),
+ (void *)((char *)m + this_aux->accel_offset));
+ }
+ }
+}
+
+static
+u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) {
+ mstate_aux *aux = getAux(nfa, target_impl_id);
+ u16 flags = 0;
+
+ if (aux->accept) {
+ flags |= ACCEPT_FLAG;
+ }
+
+ if (aux->accel_offset) {
+ flags |= ACCEL_FLAG;
+ }
+
+ return flags;
+}
+
+static
+void fill_in_succ_table_16(NFA *nfa, const dfa_info &info,
+ dstate_id_t sheng_end,
+ UNUSED dstate_id_t sherman_base) {
+ u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng));
+
+ u8 alphaShift = info.getAlphaShift();
+ assert(alphaShift <= 8);
+
+ for (size_t i = 0; i < info.size(); i++) {
+ if (!info.is_normal(i)) {
+ assert(info.implId(i) < sheng_end || info.is_sherman(i));
+ continue;
+ }
+
+ assert(info.implId(i) < sherman_base);
+ u16 normal_id = verify_u16(info.implId(i) - sheng_end);
+
+ for (size_t s = 0; s < info.impl_alpha_size; s++) {
+ dstate_id_t raw_succ = info.states[i].next[s];
+ u16 &entry = succ_table[(normal_id << alphaShift) + s];
+
+ entry = info.implId(raw_succ);
+ entry |= get_edge_flags(nfa, entry);
+ }
+ }
+}
+
+#define MAX_SHERMAN_LIST_LEN 8
+
+static
+void addIfEarlier(set<dstate_id_t> &dest, dstate_id_t candidate,
+ dstate_id_t max) {
+ if (candidate < max) {
+ dest.insert(candidate);
+ }
+}
+
+static
+void addSuccessors(set<dstate_id_t> &dest, const dstate &source,
+ u16 alphasize, dstate_id_t curr_id) {
+ for (symbol_t s = 0; s < alphasize; s++) {
+ addIfEarlier(dest, source.next[s], curr_id);
+ }
+}
+
+#define MAX_SHERMAN_SELF_LOOP 20
+
+static
+void find_better_daddy(dfa_info &info, dstate_id_t curr_id,
+ bool any_cyclic_near_anchored_state, const Grey &grey) {
+ if (!grey.allowShermanStates) {
+ return;
+ }
+
+ const u16 width = sizeof(u16);
+ const u16 alphasize = info.impl_alpha_size;
+
+ if (info.raw.start_anchored != DEAD_STATE
+ && any_cyclic_near_anchored_state
+ && curr_id < alphasize * 3) {
+ /* crude attempt to prevent frequent states from being sherman'ed
+ * depends on the fact that states are numbers are currently in bfs
+ * order */
+ DEBUG_PRINTF("%hu is banned\n", curr_id);
+ return;
+ }
+
+ if (info.raw.start_floating != DEAD_STATE
+ && curr_id >= info.raw.start_floating
+ && curr_id < info.raw.start_floating + alphasize * 3) {
+ /* crude attempt to prevent frequent states from being sherman'ed
+ * depends on the fact that states are numbers are currently in bfs
+ * order */
+ DEBUG_PRINTF("%hu is banned (%hu)\n", curr_id, info.raw.start_floating);
+ return;
+ }
+
+ const u16 full_state_size = width * alphasize;
+ const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN,
+ (full_state_size - 2)/(width + 1));
+ u16 best_score = 0;
+ dstate_id_t best_daddy = 0;
+ dstate &currState = info.states[curr_id];
+
+ set<dstate_id_t> hinted; /* set of states to search for a better daddy */
+ addIfEarlier(hinted, 0, curr_id);
+ addIfEarlier(hinted, info.raw.start_anchored, curr_id);
+ addIfEarlier(hinted, info.raw.start_floating, curr_id);
+
+ dstate_id_t mydaddy = currState.daddy;
+ if (mydaddy) {
+ addIfEarlier(hinted, mydaddy, curr_id);
+ addSuccessors(hinted, info.states[mydaddy], alphasize, curr_id);
+ dstate_id_t mygranddaddy = info.states[mydaddy].daddy;
+ if (mygranddaddy) {
+ addIfEarlier(hinted, mygranddaddy, curr_id);
+ addSuccessors(hinted, info.states[mygranddaddy], alphasize,
+ curr_id);
+ }
+ }
+
+ for (const dstate_id_t &donor : hinted) {
+ assert(donor < curr_id);
+ u32 score = 0;
+
+ if (!info.is_normal(donor)) {
+ continue;
+ }
+
+ const dstate &donorState = info.states[donor];
+ for (symbol_t s = 0; s < alphasize; s++) {
+ if (currState.next[s] == donorState.next[s]) {
+ score++;
+ }
+ }
+
+ /* prefer lower ids to provide some stability amongst potential
+ * siblings */
+ if (score > best_score || (score == best_score && donor < best_daddy)) {
+ best_daddy = donor;
+ best_score = score;
+
+ if (score == alphasize) {
+ break;
+ }
+ }
+ }
+
+ currState.daddy = best_daddy;
+ info.extra[curr_id].daddytaken = best_score;
+ DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n", curr_id, best_daddy,
+ best_score, alphasize);
+
+ if (best_daddy == DEAD_STATE) {
+ return; /* No good daddy */
+ }
+
+ if (best_score + max_list_len < alphasize) {
+ return; /* ??? */
+ }
+
+ assert(info.is_normal(currState.daddy));
+
+ u32 self_loop_width = 0;
+ const dstate curr_raw = info.states[curr_id];
+ for (unsigned i = 0; i < N_CHARS; i++) {
+ if (curr_raw.next[info.alpha_remap[i]] == curr_id) {
+ self_loop_width++;
+ }
+ }
+
+ if (self_loop_width > MAX_SHERMAN_SELF_LOOP) {
+ DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id,
+ self_loop_width);
+ return;
+ }
+
+ if (info.is_sheng(curr_id)) {
+ return;
+ }
+
+ DEBUG_PRINTF("%hu is sherman\n", curr_id);
+ info.extra[curr_id].shermanState = true;
+}
+
+static
+bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
+ symbol_t alphasize = raw.getImplAlphaSize();
+ for (symbol_t s = 0; s < alphasize; s++) {
+ dstate_id_t succ_id = raw.states[root].next[s];
+ if (succ_id == DEAD_STATE) {
+ continue;
+ }
+
+ const dstate &succ = raw.states[succ_id];
+ for (symbol_t t = 0; t < alphasize; t++) {
+ if (succ.next[t] == root || succ.next[t] == succ_id) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static
+void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
+ char *nfa_base = (char *)nfa;
+ mcsheng *m = (mcsheng *)getMutableImplNfa(nfa);
+ char *sherman_table = nfa_base + m->sherman_offset;
+
+ assert(ISALIGNED_16(sherman_table));
+ for (size_t i = 0; i < info.size(); i++) {
+ if (!info.is_sherman(i)) {
+ continue;
+ }
+ u16 fs = verify_u16(info.implId(i));
+ DEBUG_PRINTF("building sherman %zu impl %hu\n", i, fs);
+
+ assert(fs >= sherman_limit);
+
+ char *curr_sherman_entry
+ = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE;
+ assert(curr_sherman_entry <= nfa_base + m->length);
+
+ u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken);
+ assert(len <= 9);
+ dstate_id_t d = info.states[i].daddy;
+
+ *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
+ *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
+ *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
+ u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
+
+ for (u16 s = 0; s < info.impl_alpha_size; s++) {
+ if (info.states[i].next[s] != info.states[d].next[s]) {
+ *(chars++) = (u8)s;
+ }
+ }
+
+ u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
+ for (u16 s = 0; s < info.impl_alpha_size; s++) {
+ if (info.states[i].next[s] != info.states[d].next[s]) {
+ DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs,
+ info.implId(d),
+ info.implId(info.states[i].next[s]));
+ u16 entry_val = info.implId(info.states[i].next[s]);
+ entry_val |= get_edge_flags(nfa, entry_val);
+ unaligned_store_u16((u8 *)states++, entry_val);
+ }
+ }
+ }
+}
+
+static
+aligned_unique_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
+ const map<dstate_id_t, AccelScheme> &accel_escape_info,
+ const Grey &grey) {
+ DEBUG_PRINTF("building mcsheng 16\n");
+
+ vector<u32> reports; /* index in ri for the appropriate report list */
+ vector<u32> reports_eod; /* as above */
+ ReportID arb;
+ u8 single;
+
+ assert(info.getAlphaShift() <= 8);
+
+ u16 total_daddy = 0;
+ for (u32 i = 0; i < info.size(); i++) {
+ find_better_daddy(info, i,
+ is_cyclic_near(info.raw, info.raw.start_anchored),
+ grey);
+ total_daddy += info.extra[i].daddytaken;
+ }
+
+ DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
+ info.size() * info.impl_alpha_size, info.size(),
+ info.impl_alpha_size);
+
+ u16 sherman_limit;
+ if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
+ DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
+ info.size());
+ return nullptr;
+ }
+ u16 count_real_states = sherman_limit - sheng_end;
+
+ auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
+
+ size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16)
+ * count_real_states;
+
+ size_t aux_size = sizeof(mstate_aux) * info.size();
+
+ size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size);
+ size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
+ size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
+ + ri->getReportListSize(), 32);
+ size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size);
+ size_t sherman_size = calcShermanRegionSize(info);
+
+ size_t total_size = sherman_offset + sherman_size;
+
+ accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
+ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
+
+ aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
+ mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get());
+
+ populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
+ accel_escape_info.size(), arb, single, nfa.get());
+ createShuffleMasks(m, info, sheng_end, accel_escape_info);
+
+ /* copy in the mc header information */
+ m->sherman_offset = sherman_offset;
+ m->sherman_end = total_size;
+ m->sherman_limit = sherman_limit;
+
+ DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n", sheng_end,
+ count_real_states, info.size());
+
+ fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset,
+ sherman_offset - sizeof(NFA), reports, reports_eod,
+ aux_offset + aux_size, *ri);
+
+ fill_in_succ_table_16(nfa.get(), info, sheng_end, sherman_limit);
+
+ fill_in_sherman(nfa.get(), info, sherman_limit);
+
+ return nfa;
+}
+
+static
+void fill_in_succ_table_8(NFA *nfa, const dfa_info &info,
+ dstate_id_t sheng_end) {
+ u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng);
+
+ u8 alphaShift = info.getAlphaShift();
+ assert(alphaShift <= 8);
+
+ for (size_t i = 0; i < info.size(); i++) {
+ assert(!info.is_sherman(i));
+ if (!info.is_normal(i)) {
+ assert(info.implId(i) < sheng_end);
+ continue;
+ }
+ u8 normal_id = verify_u8(info.implId(i) - sheng_end);
+
+ for (size_t s = 0; s < info.impl_alpha_size; s++) {
+ dstate_id_t raw_succ = info.states[i].next[s];
+ succ_table[(normal_id << alphaShift) + s] = info.implId(raw_succ);
+ }
+ }
+}
+
+static
+void allocateImplId8(dfa_info &info, dstate_id_t sheng_end,
+ const map<dstate_id_t, AccelScheme> &accel_escape_info,
+ u16 *accel_limit, u16 *accept_limit) {
+ info.states[0].impl_id = 0; /* dead is always 0 */
+
+ vector<dstate_id_t> norm;
+ vector<dstate_id_t> accel;
+ vector<dstate_id_t> accept;
+
+ assert(info.size() <= (1 << 8));
+
+ for (u32 i = 1; i < info.size(); i++) {
+ if (info.is_sheng(i)) {
+ continue; /* already allocated */
+ } else if (!info.states[i].reports.empty()) {
+ accept.push_back(i);
+ } else if (contains(accel_escape_info, i)) {
+ accel.push_back(i);
+ } else {
+ norm.push_back(i);
+ }
+ }
+
+ u32 j = sheng_end;
+ for (const dstate_id_t &s : norm) {
+ assert(j <= 256);
+ DEBUG_PRINTF("mapping state %u to %u\n", s, j);
+ info.states[s].impl_id = j++;
+ }
+ *accel_limit = j;
+ for (const dstate_id_t &s : accel) {
+ assert(j <= 256);
+ DEBUG_PRINTF("mapping state %u to %u\n", s, j);
+ info.states[s].impl_id = j++;
+ }
+ *accept_limit = j;
+ for (const dstate_id_t &s : accept) {
+ assert(j <= 256);
+ DEBUG_PRINTF("mapping state %u to %u\n", s, j);
+ info.states[s].impl_id = j++;
+ }
+}
+
+static
+aligned_unique_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end,
+ const map<dstate_id_t, AccelScheme> &accel_escape_info) {
+ DEBUG_PRINTF("building mcsheng 8\n");
+
+ vector<u32> reports;
+ vector<u32> reports_eod;
+ ReportID arb;
+ u8 single;
+
+ auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
+
+ size_t normal_count = info.size() - sheng_end;
+
+ size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count;
+ size_t aux_size = sizeof(mstate_aux) * info.size();
+ size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size);
+ size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
+ size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
+ + ri->getReportListSize(), 32);
+ size_t total_size = accel_offset + accel_size;
+
+ DEBUG_PRINTF("aux_size %zu\n", aux_size);
+ DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
+ DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
+ DEBUG_PRINTF("accel_size %zu\n", accel_size);
+ DEBUG_PRINTF("accel_offset %zu\n", accel_offset);
+ DEBUG_PRINTF("total_size %zu\n", total_size);
+
+ accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
+ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
+
+ aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
+ mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get());
+
+ allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
+ &m->accept_limit_8);
+
+ populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset,
+ accel_escape_info.size(), arb, single, nfa.get());
+ createShuffleMasks(m, info, sheng_end, accel_escape_info);
+
+ fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset,
+ total_size - sizeof(NFA), reports, reports_eod,
+ aux_offset + aux_size, *ri);
+
+ fill_in_succ_table_8(nfa.get(), info, sheng_end);
+
+ DEBUG_PRINTF("rl size %zu\n", ri->size());
+
+ return nfa;
+}
+
+aligned_unique_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
+ const ReportManager &rm,
+ set<dstate_id_t> *accel_states) {
+ if (!cc.grey.allowMcSheng) {
+ return nullptr;
+ }
+
+ mcclellan_build_strat mbs(raw, rm);
+ dfa_info info(mbs);
+ bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256;
+
+ if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
+ * mode with our semantics */
+ raw.stripExtraEodReports();
+ }
+
+ bool has_eod_reports = raw.hasEodReports();
+
+ map<dstate_id_t, AccelScheme> accel_escape_info
+ = info.strat.getAccelInfo(cc.grey);
+
+ dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info);
+ if (sheng_end <= DEAD_STATE + 1) {
+ return nullptr;
+ }
+
+ aligned_unique_ptr<NFA> nfa;
+ if (!using8bit) {
+ nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey);
+ } else {
+ nfa = mcshengCompile8(info, sheng_end, accel_escape_info);
+ }
+
+ if (!nfa) {
+ return nfa;
+ }
+
+ if (has_eod_reports) {
+ nfa->flags |= NFA_ACCEPTS_EOD;
+ }
+
+ if (accel_states) {
+ fillAccelOut(accel_escape_info, accel_states);
+ }
+
+ DEBUG_PRINTF("compile done\n");
+ return nfa;
+}
+
+bool has_accel_mcsheng(const NFA *) {
+ return true; /* consider the sheng region as accelerated */
+}
+
+} // namespace ue2
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MCSHENGCOMPILE_H
+#define MCSHENGCOMPILE_H
+
+#include "accel_dfa_build_strat.h"
+#include "rdfa.h"
+#include "ue2common.h"
+#include "util/alloc.h"
+#include "util/ue2_containers.h"
+
+#include <memory>
+#include <set>
+
+struct NFA;
+
+namespace ue2 {
+
+class ReportManager;
+struct CompileContext;
+
+/* accel_states: (optional) on success, is filled with the set of accelerable
+ * states */
+ue2::aligned_unique_ptr<NFA>
+mcshengCompile(raw_dfa &raw, const CompileContext &cc,
+ const ReportManager &rm,
+ std::set<dstate_id_t> *accel_states = nullptr);
+
+bool has_accel_mcsheng(const NFA *nfa);
+
+} // namespace ue2
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mcsheng_internal.h"
+
+/* This table is in a separate translation unit from mcsheng.c as we want to
+ * prevent the compiler from seeing these constants. We have the load resources
+ * free at runtime to load the masks with no problems. */
+const u64a mcsheng_pext_mask[8] = {
+ 0, /* dummy */
+ 0x000000000000ff0f,
+ 0x0000000000ff000f,
+ 0x00000000ff00000f,
+ 0x000000ff0000000f,
+ 0x0000ff000000000f,
+ 0x00ff00000000000f,
+ 0xff0000000000000f,
+};
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "mcsheng_dump.h"
+
+#include "accel.h"
+#include "accel_dump.h"
+#include "nfa_dump_internal.h"
+#include "nfa_internal.h"
+#include "mcsheng_internal.h"
+#include "rdfa.h"
+#include "ue2common.h"
+#include "util/charreach.h"
+#include "util/dump_charclass.h"
+#include "util/dump_util.h"
+#include "util/unaligned.h"
+
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <map>
+
+#ifndef DUMP_SUPPORT
+#error No dump support!
+#endif
+
+using namespace std;
+
+namespace ue2 {
+
+static
+const mstate_aux *getAux(const NFA *n, dstate_id_t i) {
+ auto *m = (const mcsheng *)getImplNfa(n);
+ auto *aux_base = (const mstate_aux *)((const char *)n + m->aux_offset);
+
+ const mstate_aux *aux = aux_base + i;
+
+ assert((const char *)aux < (const char *)n + m->length);
+ return aux;
+}
+
+static
+void next_states(const NFA *n, u16 s, u16 *t) {
+ const mcsheng *m = (const mcsheng *)getImplNfa(n);
+ const mstate_aux *aux = getAux(n, s);
+ const u32 as = m->alphaShift;
+ assert(s != DEAD_STATE);
+
+ if (s < m->sheng_end) {
+ for (u16 c = 0; c < N_CHARS; c++) {
+ u8 sheng_s = s - 1;
+ auto trans_for_c = (const char *)&m->sheng_masks[c];
+ assert(sheng_s < sizeof(m128));
+ u8 raw_succ = trans_for_c[sheng_s];
+ if (raw_succ == m->sheng_end - 1) {
+ t[c] = DEAD_STATE;
+ } else if (raw_succ < m->sheng_end) {
+ t[c] = raw_succ + 1;
+ } else {
+ t[c] = raw_succ;
+ }
+ }
+ } else if (n->type == MCSHENG_NFA_8) {
+ const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcsheng));
+ for (u16 c = 0; c < N_CHARS; c++) {
+ u32 normal_id = s - m->sheng_end;
+ t[c] = succ_table[(normal_id << as) + m->remap[c]];
+ }
+ } else {
+ u16 base_s = s;
+ const char *winfo_base = (const char *)n + m->sherman_offset;
+ const char *state_base
+ = winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit);
+
+ if (s >= m->sherman_limit) {
+ base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET);
+ assert(base_s >= m->sheng_end);
+ }
+
+ const u16 *succ_table = (const u16 *)((const char *)m
+ + sizeof(mcsheng));
+ for (u16 c = 0; c < N_CHARS; c++) {
+ u32 normal_id = base_s - m->sheng_end;
+ t[c] = succ_table[(normal_id << as) + m->remap[c]];
+ }
+
+ if (s >= m->sherman_limit) {
+ UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET);
+ assert(type == SHERMAN_STATE);
+ u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base);
+ const char *chars = state_base + SHERMAN_CHARS_OFFSET;
+ const u16 *states = (const u16 *)(state_base
+ + SHERMAN_STATES_OFFSET(len));
+
+ for (u8 i = 0; i < len; i++) {
+ for (u16 c = 0; c < N_CHARS; c++) {
+ if (m->remap[c] == chars[i]) {
+ t[c] = unaligned_load_u16((const u8*)&states[i]);
+ }
+ }
+ }
+ }
+
+ for (u16 c = 0; c < N_CHARS; c++) {
+ t[c] &= STATE_MASK;
+ }
+
+ }
+
+ t[TOP] = aux->top & STATE_MASK;
+}
+
+static
+void describeEdge(FILE *f, const mcsheng *m, const u16 *t, u16 i) {
+ for (u16 s = 0; s < N_CHARS; s++) {
+ if (!t[s]) {
+ continue;
+ }
+
+ u16 ss;
+ for (ss = 0; ss < s; ss++) {
+ if (t[s] == t[ss]) {
+ break;
+ }
+ }
+
+ if (ss != s) {
+ continue;
+ }
+
+ CharReach reach;
+ for (ss = s; ss < 256; ss++) {
+ if (t[s] == t[ss]) {
+ reach.set(ss);
+ }
+ }
+
+ fprintf(f, "%u -> %u [ ", i, t[s]);
+ if (i < m->sheng_end && t[s] < m->sheng_end) {
+ fprintf(f, "color = red, fontcolor = red ");
+ }
+ fprintf(f, "label = \"");
+ describeClass(f, reach, 5, CC_OUT_DOT);
+
+ fprintf(f, "\" ];\n");
+ }
+}
+
+static
+void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel) {
+ switch(accel->accel_type) {
+ case ACCEL_NONE:
+ break;
+ case ACCEL_VERM:
+ case ACCEL_VERM_NOCASE:
+ case ACCEL_DVERM:
+ case ACCEL_DVERM_NOCASE:
+ fprintf(f, "%u [ color = forestgreen style=diagonals];\n", i);
+ break;
+ case ACCEL_SHUFTI:
+ case ACCEL_DSHUFTI:
+ case ACCEL_TRUFFLE:
+ fprintf(f, "%u [ color = darkgreen style=diagonals ];\n", i);
+ break;
+ default:
+ fprintf(f, "%u [ color = yellow style=diagonals ];\n", i);
+ break;
+ }
+}
+
+static
+void describeNode(const NFA *n, const mcsheng *m, u16 i, FILE *f) {
+ const mstate_aux *aux = getAux(n, i);
+
+ bool isSherman = m->sherman_limit && i >= m->sherman_limit;
+
+ fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
+ "label = \"%u%s\" ]; \n", i, i, isSherman ? "w":"");
+
+ if (aux->accel_offset) {
+ dumpAccelDot(f, i, (const union AccelAux *)
+ ((const char *)m + aux->accel_offset));
+ }
+
+ if (i && i < m->sheng_end) {
+ fprintf(f, "%u [color = red, fontcolor = red]; \n", i);
+ }
+
+ if (aux->accept_eod) {
+ fprintf(f, "%u [ color = darkorchid ];\n", i);
+ }
+
+ if (aux->accept) {
+ fprintf(f, "%u [ shape = doublecircle ];\n", i);
+ }
+
+ if (aux->top && aux->top != i) {
+ fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
+ aux->top);
+ }
+
+ if (i == m->start_anchored) {
+ fprintf(f, "STARTA -> %u [color = blue ]\n", i);
+ }
+
+ if (i == m->start_floating) {
+ fprintf(f, "STARTF -> %u [color = red ]\n", i);
+ }
+
+ if (isSherman) {
+ const char *winfo_base = (const char *)n + m->sherman_offset;
+ const char *state_base
+ = winfo_base + SHERMAN_FIXED_SIZE * (i - m->sherman_limit);
+ assert(state_base < (const char *)m + m->length - sizeof(NFA));
+ UNUSED u8 type = *(const u8 *)(state_base + SHERMAN_TYPE_OFFSET);
+ assert(type == SHERMAN_STATE);
+ fprintf(f, "%u [ fillcolor = lightblue style=filled ];\n", i);
+ u16 daddy = *(const u16 *)(state_base + SHERMAN_DADDY_OFFSET);
+ if (daddy) {
+ fprintf(f, "%u -> %u [ color=royalblue style=dashed weight=0.1]\n",
+ i, daddy);
+ }
+ }
+
+ if (i && i < m->sheng_end) {
+ fprintf(f, "subgraph cluster_sheng { %u } \n", i);
+ }
+
+}
+
+static
+void dumpDotPreambleDfa(FILE *f) {
+ dumpDotPreamble(f);
+
+ // DFA specific additions.
+ fprintf(f, "STARTF [style=invis];\n");
+ fprintf(f, "STARTA [style=invis];\n");
+ fprintf(f, "0 [style=invis];\n");
+ fprintf(f, "subgraph cluster_sheng { style = dashed }\n");
+}
+
+static
+void dump_dot_16(const NFA *nfa, FILE *f) {
+ auto *m = (const mcsheng *)getImplNfa(nfa);
+
+ dumpDotPreambleDfa(f);
+
+ for (u16 i = 1; i < m->state_count; i++) {
+ describeNode(nfa, m, i, f);
+
+ u16 t[ALPHABET_SIZE];
+
+ next_states(nfa, i, t);
+
+ describeEdge(f, m, t, i);
+ }
+
+ fprintf(f, "}\n");
+}
+
+static
+void dump_dot_8(const NFA *nfa, FILE *f) {
+ auto m = (const mcsheng *)getImplNfa(nfa);
+
+ dumpDotPreambleDfa(f);
+
+ for (u16 i = 1; i < m->state_count; i++) {
+ describeNode(nfa, m, i, f);
+
+ u16 t[ALPHABET_SIZE];
+
+ next_states(nfa, i, t);
+
+ describeEdge(f, m, t, i);
+ }
+
+ fprintf(f, "}\n");
+}
+
+static
+void dumpAccelMasks(FILE *f, const mcsheng *m, const mstate_aux *aux) {
+ fprintf(f, "\n");
+ fprintf(f, "Acceleration\n");
+ fprintf(f, "------------\n");
+
+ for (u16 i = 0; i < m->state_count; i++) {
+ if (!aux[i].accel_offset) {
+ continue;
+ }
+
+ auto accel = (const AccelAux *)((const char *)m + aux[i].accel_offset);
+ fprintf(f, "%05hu ", i);
+ dumpAccelInfo(f, *accel);
+ }
+}
+
+static
+void describeAlphabet(FILE *f, const mcsheng *m) {
+ map<u8, CharReach> rev;
+
+ for (u16 i = 0; i < N_CHARS; i++) {
+ rev[m->remap[i]].clear();
+ }
+
+ for (u16 i = 0; i < N_CHARS; i++) {
+ rev[m->remap[i]].set(i);
+ }
+
+ map<u8, CharReach>::const_iterator it;
+ fprintf(f, "\nAlphabet\n");
+ for (it = rev.begin(); it != rev.end(); ++it) {
+ fprintf(f, "%3hhu: ", it->first);
+ describeClass(f, it->second, 10240, CC_OUT_TEXT);
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\n");
+}
+
+static
+void dumpCommonHeader(FILE *f, const mcsheng *m) {
+ fprintf(f, "report: %u, states: %u, length: %u\n", m->arb_report,
+ m->state_count, m->length);
+ fprintf(f, "astart: %hu, fstart: %hu\n", m->start_anchored,
+ m->start_floating);
+ fprintf(f, "single accept: %d, has_accel: %d\n",
+ !!(int)m->flags & MCSHENG_FLAG_SINGLE, m->has_accel);
+ fprintf(f, "sheng_end: %hu\n", m->sheng_end);
+ fprintf(f, "sheng_accel_limit: %hu\n", m->sheng_accel_limit);
+}
+
+static
+void dump_text_16(const NFA *nfa, FILE *f) {
+ auto *m = (const mcsheng *)getImplNfa(nfa);
+ auto *aux = (const mstate_aux *)((const char *)nfa + m->aux_offset);
+
+ fprintf(f, "mcsheng 16\n");
+ dumpCommonHeader(f, m);
+ fprintf(f, "sherman_limit: %d, sherman_end: %d\n", (int)m->sherman_limit,
+ (int)m->sherman_end);
+ fprintf(f, "\n");
+
+ describeAlphabet(f, m);
+ dumpAccelMasks(f, m, aux);
+
+ fprintf(f, "\n");
+ dumpTextReverse(nfa, f);
+}
+
+static
+void dump_text_8(const NFA *nfa, FILE *f) {
+ auto m = (const mcsheng *)getImplNfa(nfa);
+ auto aux = (const mstate_aux *)((const char *)nfa + m->aux_offset);
+
+ fprintf(f, "mcsheng 8\n");
+ dumpCommonHeader(f, m);
+ fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8,
+ m->accept_limit_8);
+ fprintf(f, "\n");
+
+ describeAlphabet(f, m);
+ dumpAccelMasks(f, m, aux);
+
+ fprintf(f, "\n");
+ dumpTextReverse(nfa, f);
+}
+
+void nfaExecMcSheng16_dump(const NFA *nfa, const string &base) {
+ assert(nfa->type == MCSHENG_NFA_16);
+ FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
+ dump_text_16(nfa, f);
+ fclose(f);
+ f = fopen_or_throw((base + ".dot").c_str(), "w");
+ dump_dot_16(nfa, f);
+ fclose(f);
+}
+
+void nfaExecMcSheng8_dump(const NFA *nfa, const string &base) {
+ assert(nfa->type == MCSHENG_NFA_8);
+ FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
+ dump_text_8(nfa, f);
+ fclose(f);
+ f = fopen_or_throw((base + ".dot").c_str(), "w");
+ dump_dot_8(nfa, f);
+ fclose(f);
+}
+
+} // namespace ue2
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MCSHENG_DUMP_H
+#define MCSHENG_DUMP_H
+
+#ifdef DUMP_SUPPORT
+
+#include "rdfa.h"
+
+#include <cstdio>
+#include <string>
+
+struct NFA;
+
+namespace ue2 {
+
+void nfaExecMcSheng8_dump(const struct NFA *nfa, const std::string &base);
+void nfaExecMcSheng16_dump(const struct NFA *nfa, const std::string &base);
+
+} // namespace ue2
+
+#endif // DUMP_SUPPORT
+
+#endif // MCSHENG_DUMP_H
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MCSHENG_INTERNAL_H
+#define MCSHENG_INTERNAL_H
+
+#include "nfa_internal.h"
+#include "ue2common.h"
+#include "util/simd_utils.h"
+
+#define ACCEPT_FLAG 0x8000
+#define ACCEL_FLAG 0x4000
+#define STATE_MASK 0x3fff
+
+#define SHERMAN_STATE 1
+
+#define SHERMAN_TYPE_OFFSET 0
+#define SHERMAN_FIXED_SIZE 32
+
+#define SHERMAN_LEN_OFFSET 1
+#define SHERMAN_DADDY_OFFSET 2
+#define SHERMAN_CHARS_OFFSET 4
+#define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len))
+
+struct report_list {
+ u32 count;
+ ReportID report[];
+};
+
+struct mstate_aux {
+ u32 accept;
+ u32 accept_eod;
+ u16 top;
+ u32 accel_offset; /* relative to start of struct mcsheng; 0 if no accel */
+};
+
+#define MCSHENG_FLAG_SINGLE 1 /**< we raise only single accept id */
+
+struct mcsheng {
+ u16 state_count; /**< total number of states */
+ u32 length; /**< length of dfa in bytes */
+ u16 start_anchored; /**< anchored start state */
+ u16 start_floating; /**< floating start state */
+ u32 aux_offset; /**< offset of the aux structures relative to the start of
+ * the nfa structure */
+ u32 sherman_offset; /**< offset of array of sherman state offsets the
+ * state_info structures relative to the start of the
+ * nfa structure */
+ u32 sherman_end; /**< offset of the end of the state_info structures
+ * relative to the start of the nfa structure */
+ u16 sheng_end; /**< first non-sheng state */
+ u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of
+ * internal sheng ids */
+ u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
+ u16 accept_limit_8; /**< 8 bit, lowest accept state */
+ u16 sherman_limit; /**< lowest sherman state */
+ u8 alphaShift;
+ u8 flags;
+ u8 has_accel; /**< 1 iff there are any accel plans */
+ u8 remap[256]; /**< remaps characters to a smaller alphabet */
+ ReportID arb_report; /**< one of the accepts that this dfa may raise */
+ u32 accel_offset; /**< offset of the accel structures from start of NFA */
+ m128 sheng_masks[N_CHARS];
+};
+
+/* pext masks for the runtime to access appropriately copies of bytes 1..7
+ * representing the data from a u64a. */
+extern const u64a mcsheng_pext_mask[8];
+
+#endif
#include "lbr.h"
#include "limex.h"
#include "mcclellan.h"
+#include "mcsheng.h"
#include "mpv.h"
#include "sheng.h"
#include "tamarama.h"
DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \
DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \
DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \
+ DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \
+ DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
default: \
assert(0); \
}
#include "limex_internal.h"
#include "mcclellancompile.h"
+#include "mcsheng_compile.h"
#include "shengcompile.h"
#include "nfa_internal.h"
#include "repeat_internal.h"
const char *NFATraits<TAMARAMA_NFA>::name = "Tamarama";
#endif
+template<> struct NFATraits<MCSHENG_NFA_8> {
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 1;
+ static const bool fast = true;
+ static const nfa_dispatch_fn has_accel;
+ static const nfa_dispatch_fn has_repeats;
+ static const nfa_dispatch_fn has_repeats_other_than_firsts;
+};
+const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_accel = has_accel_mcsheng;
+const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_repeats = dispatch_false;
+const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
+#if defined(DUMP_SUPPORT)
+const char *NFATraits<MCSHENG_NFA_8>::name = "Shengy McShengFace 8";
+#endif
+
+template<> struct NFATraits<MCSHENG_NFA_16> {
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 2;
+ static const bool fast = true;
+ static const nfa_dispatch_fn has_accel;
+ static const nfa_dispatch_fn has_repeats;
+ static const nfa_dispatch_fn has_repeats_other_than_firsts;
+};
+const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_accel = has_accel_mcsheng;
+const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats = dispatch_false;
+const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
+#if defined(DUMP_SUPPORT)
+const char *NFATraits<MCSHENG_NFA_16>::name = "Shengy McShengFace 16";
+#endif
+
} // namespace
#if defined(DUMP_SUPPORT)
#include "lbr_dump.h"
#include "limex.h"
#include "mcclellandump.h"
+#include "mcsheng_dump.h"
#include "mpv_dump.h"
#include "shengdump.h"
#include "tamarama_dump.h"
DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \
DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \
DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \
+ DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \
+ DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
default: \
assert(0); \
}
CASTLE_NFA, /**< magic pseudo nfa */
SHENG_NFA, /**< magic pseudo nfa */
TAMARAMA_NFA, /**< magic nfa container */
+ MCSHENG_NFA_8, /**< magic pseudo nfa */
+ MCSHENG_NFA_16, /**< magic pseudo nfa */
/** \brief bogus NFA - not used */
INVALID_NFA
};
return t == MCCLELLAN_NFA_8 || t == MCCLELLAN_NFA_16;
}
+/** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid
+ * DFA. */
+static really_inline int isShengMcClellanType(u8 t) {
+ return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16;
+}
+
/** \brief True if the given type (from NFA::type) is a Gough DFA. */
static really_inline int isGoughType(u8 t) {
return t == GOUGH_NFA_8 || t == GOUGH_NFA_16;
* Sheng DFA.
*/
static really_inline int isDfaType(u8 t) {
- return isMcClellanType(t) || isGoughType(t) || isShengType(t);
+ return isMcClellanType(t) || isGoughType(t) || isShengType(t)
+ || isShengMcClellanType(t);
+}
+
+static really_inline int isBigDfaType(u8 t) {
+ return t == MCCLELLAN_NFA_16 || t == MCSHENG_NFA_16 || t == GOUGH_NFA_16;
+}
+
+static really_inline int isSmallDfaType(u8 t) {
+ return isDfaType(t) && !isBigDfaType(t);
}
/** \brief True if the given type (from NFA::type) is an NFA. */
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "rdfa_graph.h"
+
+#include "rdfa.h"
+#include "util/container.h"
+
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+RdfaGraph::RdfaGraph(const raw_dfa &rdfa) {
+ RdfaGraph &g = *this;
+
+ vector<RdfaGraph::vertex_descriptor> verts;
+ verts.reserve(rdfa.states.size());
+ for (dstate_id_t i = 0; i < rdfa.states.size(); i++) {
+ verts.push_back(add_vertex(g));
+ assert(g[verts.back()].index == i);
+ }
+
+ symbol_t symbol_end = rdfa.alpha_size - 1;
+
+ flat_set<dstate_id_t> local_succs;
+ for (dstate_id_t i = 0; i < rdfa.states.size(); i++) {
+ local_succs.clear();
+ for (symbol_t s = 0; s < symbol_end; s++) {
+ dstate_id_t next = rdfa.states[i].next[s];
+ if (contains(local_succs, next)) {
+ continue;
+ }
+ DEBUG_PRINTF("%hu->%hu\n", i, next);
+ add_edge(verts[i], verts[next], g);
+ local_succs.insert(next);
+ }
+ }
+}
+
+}
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RDFA_GRAPH_H
+#define RDFA_GRAPH_H
+
+#include "ue2common.h"
+#include "util/ue2_graph.h"
+
+namespace ue2 {
+
+struct raw_dfa;
+
+struct RdfaVertexProps {
+ size_t index = 0;
+};
+
+struct RdfaEdgeProps {
+ size_t index = 0;
+};
+
+struct RdfaGraph : public ue2_graph<RdfaGraph, RdfaVertexProps, RdfaEdgeProps> {
+ RdfaGraph(const raw_dfa &rdfa);
+};
+
+
+}
+
+#endif
}
}
-bool has_accel_sheng(const NFA *nfa) {
- const sheng *s = (const sheng *)getImplNfa(nfa);
- return s->flags & SHENG_FLAG_HAS_ACCEL;
+bool has_accel_sheng(const NFA *) {
+ return true; /* consider the sheng region as accelerated */
}
aligned_unique_ptr<NFA> shengCompile(raw_dfa &raw,
#include <map>
#include <set>
#include <boost/graph/filtered_graph.hpp>
-#include <boost/graph/strong_components.hpp>
#include <boost/graph/topological_sort.hpp>
#include <boost/range/adaptor/map.hpp>
using boost::default_color_type;
using boost::make_filtered_graph;
using boost::make_assoc_property_map;
-using boost::adaptors::map_values;
namespace ue2 {
return false;
}
-set<NFAVertex> findVerticesInCycles(const NGHolder &g) {
- map<NFAVertex, size_t> comp_map;
-
- strong_components(g, make_assoc_property_map(comp_map));
-
- map<size_t, set<NFAVertex> > comps;
-
- for (const auto &e : comp_map) {
- comps[e.second].insert(e.first);
- }
-
-
- set<NFAVertex> rv;
-
- for (const auto &comp : comps | map_values) {
- /* every vertex in a strongly connected component is reachable from
- * every other vertex in the component. A vertex is involved in a cycle
- * therefore if it is in a strongly connected component with more than
- * one vertex or if it is the only vertex and it has a self loop. */
- assert(!comp.empty());
- if (comp.size() > 1) {
- insert(&rv, comp);
- }
- NFAVertex v = *comp.begin();
- if (hasSelfLoop(v, g)) {
- rv.insert(v);
- }
- }
-
- return rv;
-}
-
bool can_never_match(const NGHolder &g) {
assert(edge(g.accept, g.acceptEod, g).second);
if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) {
#include "nfa/goughcompile.h"
#include "nfa/mcclellancompile.h"
#include "nfa/mcclellancompile_util.h"
+#include "nfa/mcsheng_compile.h"
#include "nfa/nfa_api_queue.h"
#include "nfa/nfa_build_util.h"
#include "nfa/nfa_internal.h"
bool d_accel = has_accel(*dfa_impl);
bool n_accel = has_accel(*nfa_impl);
- bool d_big = dfa_impl->type == MCCLELLAN_NFA_16;
+ bool d_big = isBigDfaType(dfa_impl->type);
bool n_vsmall = nfa_impl->nPositions <= 32;
bool n_br = has_bounded_repeats(*nfa_impl);
DEBUG_PRINTF("da %d na %d db %d nvs %d nbr %d\n", (int)d_accel,
}
static
-aligned_unique_ptr<NFA> getDfa(raw_dfa &rdfa, const CompileContext &cc,
+aligned_unique_ptr<NFA> getDfa(raw_dfa &rdfa, bool is_transient,
+ const CompileContext &cc,
const ReportManager &rm) {
// Unleash the Sheng!!
auto dfa = shengCompile(rdfa, cc, rm);
+ if (!dfa && !is_transient) {
+ // Sheng wasn't successful, so unleash McClellan!
+ /* We don't try the hybrid for transient prefixes due to the extra
+ * bytecode and that they are usually run on small blocks */
+ dfa = mcshengCompile(rdfa, cc, rm);
+ }
if (!dfa) {
// Sheng wasn't successful, so unleash McClellan!
dfa = mcclellanCompile(rdfa, cc, rm);
}
if (suff.dfa()) {
- auto d = getDfa(*suff.dfa(), cc, rm);
+ auto d = getDfa(*suff.dfa(), false, cc, rm);
assert(d);
return d;
}
auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0),
cc.grey);
if (rdfa) {
- auto d = getDfa(*rdfa, cc, rm);
+ auto d = getDfa(*rdfa, false, cc, rm);
assert(d);
if (cc.grey.roseMcClellanSuffix != 2) {
n = pickImpl(move(d), move(n));
}
if (left.dfa()) {
- n = getDfa(*left.dfa(), cc, rm);
+ n = getDfa(*left.dfa(), is_transient, cc, rm);
} else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix &&
!is_transient) {
auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
if (rdfa) {
- n = getDfa(*rdfa, cc, rm);
+ n = getDfa(*rdfa, is_transient, cc, rm);
assert(n);
}
}
&& (!n || !has_bounded_repeats_other_than_firsts(*n) || !is_fast(*n))) {
auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
if (rdfa) {
- auto d = getDfa(*rdfa, cc, rm);
+ auto d = getDfa(*rdfa, is_transient, cc, rm);
assert(d);
n = pickImpl(move(d), move(n));
}
aligned_unique_ptr<NFA> operator()(unique_ptr<raw_dfa> &rdfa) const {
// Unleash the mighty DFA!
- return getDfa(*rdfa, build.cc, build.rm);
+ return getDfa(*rdfa, false, build.cc, build.rm);
}
aligned_unique_ptr<NFA> operator()(unique_ptr<raw_som_dfa> &haig) const {
!has_bounded_repeats_other_than_firsts(*n)) {
auto rdfa = buildMcClellan(h, &rm, cc.grey);
if (rdfa) {
- auto d = getDfa(*rdfa, cc, rm);
+ auto d = getDfa(*rdfa, false, cc, rm);
if (d) {
n = pickImpl(move(d), move(n));
}
const NGHolder &g = *left.graph();
- auto cyclics = findVerticesInCycles(g);
+ auto cyclics = find_vertices_in_cycles(g);
if (!proper_out_degree(g.startDs, g)) {
cyclics.erase(g.startDs);
}
const NFA *nfa = (const NFA *)((const char *)atable + sizeof(*atable));
- if (nfa->type != MCCLELLAN_NFA_8) {
+ if (!isSmallDfaType(nfa->type)) {
DEBUG_PRINTF("m16 atable engine\n");
return 0;
}
return popcount64(mask);
}
+#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__))
+#define HAVE_PEXT
+#endif
+
+static really_inline
+u32 pext32(u32 x, u32 mask) {
+#if defined(HAVE_PEXT)
+ // Intel BMI2 can do this operation in one instruction.
+ return _pext_u32(x, mask);
+#else
+
+ u32 result = 0, num = 1;
+ while (mask != 0) {
+ u32 bit = findAndClearLSB_32(&mask);
+ if (x & (1U << bit)) {
+ assert(num != 0); // more than 32 bits!
+ result |= num;
+ }
+ num <<= 1;
+ }
+ return result;
+#endif
+}
+
+static really_inline
+u64a pext64(u64a x, u64a mask) {
+#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
+ // Intel BMI2 can do this operation in one instruction.
+ return _pext_u64(x, mask);
+#else
+
+ u32 result = 0, num = 1;
+ while (mask != 0) {
+ u32 bit = findAndClearLSB_64(&mask);
+ if (x & (1ULL << bit)) {
+ assert(num != 0); // more than 32 bits!
+ result |= num;
+ }
+ num <<= 1;
+ }
+ return result;
+#endif
+}
+
+#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
+static really_inline
+u64a pdep64(u64a x, u64a mask) {
+ return _pdep_u64(x, mask);
+}
+#endif
+
#endif // BITUTILS_H
#include "util/ue2_containers.h"
#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/strong_components.hpp>
+#include <boost/range/adaptor/map.hpp>
#include <algorithm>
+#include <map>
+#include <set>
#include <utility>
#include <vector>
}
}
+template <class Graph>
+ue2::flat_set<typename Graph::vertex_descriptor>
+find_vertices_in_cycles(const Graph &g) {
+ using vertex_descriptor = typename Graph::vertex_descriptor;
+
+ std::map<vertex_descriptor, size_t> comp_map;
+
+ boost::strong_components(g, boost::make_assoc_property_map(comp_map));
+
+ std::map<size_t, std::vector<vertex_descriptor>> comps;
+
+ for (const auto &e : comp_map) {
+ comps[e.second].push_back(e.first);
+ }
+
+ ue2::flat_set<vertex_descriptor> rv;
+
+ for (const auto &comp : comps | boost::adaptors::map_values) {
+ /* every vertex in a strongly connected component is reachable from
+ * every other vertex in the component. A vertex is involved in a cycle
+ * therefore if it is in a strongly connected component with more than
+ * one vertex or if it is the only vertex and it has a self loop. */
+ assert(!comp.empty());
+ if (comp.size() > 1) {
+ insert(&rv, comp);
+ }
+ vertex_descriptor v = *comp.begin();
+ if (hasSelfLoop(v, g)) {
+ rv.insert(v);
+ }
+ }
+
+ return rv;
+}
+
template <class Graph>
bool has_parallel_edge(const Graph &g) {
using vertex_descriptor = typename Graph::vertex_descriptor;
return _mm_set1_epi8(c);
}
+static really_inline m128 set4x32(u32 c) {
+ return _mm_set1_epi32(c);
+}
+
static really_inline u32 movd(const m128 in) {
return _mm_cvtsi128_si32(in);
}
return pshufb(in, shift_mask);
}
+static really_inline
+m128 max_u8_m128(m128 a, m128 b) {
+ return _mm_max_epu8(a, b);
+}
+
+static really_inline
+m128 min_u8_m128(m128 a, m128 b) {
+ return _mm_min_epu8(a, b);
+}
+
+static really_inline
+m128 sadd_u8_m128(m128 a, m128 b) {
+ return _mm_adds_epu8(a, b);
+}
+
+static really_inline
+m128 sub_u8_m128(m128 a, m128 b) {
+ return _mm_sub_epi8(a, b);
+}
/****
**** 256-bit Primitives
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
ASSERT_EQ(15, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 31));
ASSERT_EQ(31, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 63));
}
+
+#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
+TEST(BitUtils, pdep64) {
+ u64a data = 0xF123456789ABCDEF;
+ ASSERT_EQ(0xfULL, pdep64(data, 0xf));
+ ASSERT_EQ(0xefULL, pdep64(data, 0xff));
+ ASSERT_EQ(0xf0ULL, pdep64(data, 0xf0));
+ ASSERT_EQ(0xfULL, pdep64(data, 0xf));
+ ASSERT_EQ(0xef0ULL, pdep64(data, 0xff0));
+ ASSERT_EQ(0xef00ULL, pdep64(data, 0xff00));
+ ASSERT_EQ(0xd0e0f00ULL, pdep64(data, 0xf0f0f00));
+}
+#endif
add_edge(a, b, g);
add_edge(b, a, g);
- auto cyclics = findVerticesInCycles(g);
+ auto cyclics = find_vertices_in_cycles(g);
- ASSERT_EQ(set<NFAVertex>({g.startDs, a, b}), cyclics);
+ ASSERT_EQ(flat_set<NFAVertex>({g.startDs, a, b}), cyclics);
}
TEST(NFAGraph, cyclicVerts2) {
add_edge(c, d, g);
add_edge(a, e, g);
- auto cyclics = findVerticesInCycles(g);
+ auto cyclics = find_vertices_in_cycles(g);
- ASSERT_EQ(set<NFAVertex>({g.startDs, a, b, c}), cyclics);
+ ASSERT_EQ(flat_set<NFAVertex>({g.startDs, a, b, c}), cyclics);
}
TEST(NFAGraph, cyclicVerts3) {
add_edge(f, h, g);
add_edge(h, h, g);
- auto cyclics = findVerticesInCycles(g);
+ auto cyclics = find_vertices_in_cycles(g);
- ASSERT_EQ(set<NFAVertex>({g.startDs, a, b, c, d, e, h}), cyclics);
+ ASSERT_EQ(flat_set<NFAVertex>({g.startDs, a, b, c, d, e, h}), cyclics);
}
TEST(NFAGraph, cyclicVerts4) {
add_edge(e, f, g);
add_edge(f, h, g);
- auto cyclics = findVerticesInCycles(g);
+ auto cyclics = find_vertices_in_cycles(g);
- ASSERT_EQ(set<NFAVertex>({g.startDs, a, b, c, d, e}), cyclics);
+ ASSERT_EQ(flat_set<NFAVertex>({g.startDs, a, b, c, d, e}), cyclics);
}
TEST(NFAGraph, cyclicVerts5) {
add_edge(c, d, g);
add_edge(e, c, g);
- auto cyclics = findVerticesInCycles(g);
+ auto cyclics = find_vertices_in_cycles(g);
- ASSERT_EQ(set<NFAVertex>({g.startDs, b, c}), cyclics);
+ ASSERT_EQ(flat_set<NFAVertex>({g.startDs, b, c}), cyclics);
}
for (unsigned int i = 0; i < 32; i++) {
// shuffle a single 1 bit to the front
u32 mask = 1U << i;
- EXPECT_EQ(1U, packedExtract32(mask, mask));
- EXPECT_EQ(1U, packedExtract32(~0U, mask));
+ EXPECT_EQ(1U, pext32(mask, mask));
+ EXPECT_EQ(1U, pext32(~0U, mask));
// we should get zero out of these cases
- EXPECT_EQ(0U, packedExtract32(0, mask));
- EXPECT_EQ(0U, packedExtract32(~mask, mask));
+ EXPECT_EQ(0U, pext32(0, mask));
+ EXPECT_EQ(0U, pext32(~mask, mask));
// we should get zero out of all the other bit positions
for (unsigned int j = 0; (j != i && j < 32); j++) {
- EXPECT_EQ(0U, packedExtract32((1U << j), mask));
+ EXPECT_EQ(0U, pext32((1U << j), mask));
}
}
}
TEST(Shuffle, PackedExtract32_2) {
// All 32 bits in mask are on
u32 mask = ~0U;
- EXPECT_EQ(0U, packedExtract32(0, mask));
- EXPECT_EQ(mask, packedExtract32(mask, mask));
+ EXPECT_EQ(0U, pext32(0, mask));
+ EXPECT_EQ(mask, pext32(mask, mask));
for (unsigned int i = 0; i < 32; i++) {
- EXPECT_EQ(1U << i, packedExtract32(1U << i, mask));
+ EXPECT_EQ(1U << i, pext32(1U << i, mask));
}
}
}
// Test both cases (all even bits, all odd bits)
- EXPECT_EQ((1U << 16) - 1, packedExtract32(mask, mask));
- EXPECT_EQ((1U << 16) - 1, packedExtract32(~mask, ~mask));
- EXPECT_EQ(0U, packedExtract32(~mask, mask));
- EXPECT_EQ(0U, packedExtract32(mask, ~mask));
+ EXPECT_EQ((1U << 16) - 1, pext32(mask, mask));
+ EXPECT_EQ((1U << 16) - 1, pext32(~mask, ~mask));
+ EXPECT_EQ(0U, pext32(~mask, mask));
+ EXPECT_EQ(0U, pext32(mask, ~mask));
for (unsigned int i = 0; i < 32; i += 2) {
- EXPECT_EQ(1U << (i/2), packedExtract32(1U << i, mask));
- EXPECT_EQ(0U, packedExtract32(1U << i, ~mask));
- EXPECT_EQ(1U << (i/2), packedExtract32(1U << (i+1), ~mask));
- EXPECT_EQ(0U, packedExtract32(1U << (i+1), mask));
+ EXPECT_EQ(1U << (i/2), pext32(1U << i, mask));
+ EXPECT_EQ(0U, pext32(1U << i, ~mask));
+ EXPECT_EQ(1U << (i/2), pext32(1U << (i+1), ~mask));
+ EXPECT_EQ(0U, pext32(1U << (i+1), mask));
}
}
for (unsigned int i = 0; i < 64; i++) {
// shuffle a single 1 bit to the front
u64a mask = 1ULL << i;
- EXPECT_EQ(1U, packedExtract64(mask, mask));
- EXPECT_EQ(1U, packedExtract64(~0ULL, mask));
+ EXPECT_EQ(1U, pext64(mask, mask));
+ EXPECT_EQ(1U, pext64(~0ULL, mask));
// we should get zero out of these cases
- EXPECT_EQ(0U, packedExtract64(0, mask));
- EXPECT_EQ(0U, packedExtract64(~mask, mask));
+ EXPECT_EQ(0U, pext64(0, mask));
+ EXPECT_EQ(0U, pext64(~mask, mask));
// we should get zero out of all the other bit positions
for (unsigned int j = 0; (j != i && j < 64); j++) {
- EXPECT_EQ(0U, packedExtract64((1ULL << j), mask));
+ EXPECT_EQ(0U, pext64((1ULL << j), mask));
}
}
}
TEST(Shuffle, PackedExtract64_2) {
// Fill first half of mask
u64a mask = 0x00000000ffffffffULL;
- EXPECT_EQ(0U, packedExtract64(0, mask));
- EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask));
+ EXPECT_EQ(0U, pext64(0, mask));
+ EXPECT_EQ(0xffffffffU, pext64(mask, mask));
for (unsigned int i = 0; i < 32; i++) {
- EXPECT_EQ(1U << i, packedExtract64(1ULL << i, mask));
+ EXPECT_EQ(1U << i, pext64(1ULL << i, mask));
}
// Fill second half of mask
mask = 0xffffffff00000000ULL;
- EXPECT_EQ(0U, packedExtract64(0, mask));
- EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask));
+ EXPECT_EQ(0U, pext64(0, mask));
+ EXPECT_EQ(0xffffffffU, pext64(mask, mask));
for (unsigned int i = 32; i < 64; i++) {
- EXPECT_EQ(1U << (i - 32), packedExtract64(1ULL << i, mask));
+ EXPECT_EQ(1U << (i - 32), pext64(1ULL << i, mask));
}
// Try one in the middle
mask = 0x0000ffffffff0000ULL;
- EXPECT_EQ(0U, packedExtract64(0, mask));
- EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask));
+ EXPECT_EQ(0U, pext64(0, mask));
+ EXPECT_EQ(0xffffffffU, pext64(mask, mask));
for (unsigned int i = 16; i < 48; i++) {
- EXPECT_EQ(1U << (i - 16), packedExtract64(1ULL << i, mask));
+ EXPECT_EQ(1U << (i - 16), pext64(1ULL << i, mask));
}
}
}
// Test both cases (all even bits, all odd bits)
- EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask));
- EXPECT_EQ(0xffffffffU, packedExtract64(~mask, ~mask));
- EXPECT_EQ(0U, packedExtract64(~mask, mask));
- EXPECT_EQ(0U, packedExtract64(mask, ~mask));
+ EXPECT_EQ(0xffffffffU, pext64(mask, mask));
+ EXPECT_EQ(0xffffffffU, pext64(~mask, ~mask));
+ EXPECT_EQ(0U, pext64(~mask, mask));
+ EXPECT_EQ(0U, pext64(mask, ~mask));
for (unsigned int i = 0; i < 64; i += 2) {
- EXPECT_EQ(1U << (i/2), packedExtract64(1ULL << i, mask));
- EXPECT_EQ(0U, packedExtract64(1ULL << i, ~mask));
- EXPECT_EQ(1U << (i/2), packedExtract64(1ULL << (i+1), ~mask));
- EXPECT_EQ(0U, packedExtract64(1ULL << (i+1), mask));
+ EXPECT_EQ(1U << (i/2), pext64(1ULL << i, mask));
+ EXPECT_EQ(0U, pext64(1ULL << i, ~mask));
+ EXPECT_EQ(1U << (i/2), pext64(1ULL << (i+1), ~mask));
+ EXPECT_EQ(0U, pext64(1ULL << (i+1), mask));
}
}
}
}
+TEST(SimdUtilsTest, set4x32) {
+ u32 cmp[4] = { 0x12345678, 0x12345678, 0x12345678, 0x12345678 };
+ m128 simd = set4x32(cmp[0]);
+ ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
+}
+
#if defined(__AVX2__)
TEST(SimdUtilsTest, set32x8) {
char cmp[sizeof(m256)];
EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, -16)));
}
+TEST(SimdUtilsTest, max_u8_m128) {
+ char base1[] = "0123456789ABCDE\xfe";
+ char base2[] = "!!23455889aBCd\xff\xff";
+ char expec[] = "0123456889aBCd\xff\xff";
+ m128 in1 = loadu128(base1);
+ m128 in2 = loadu128(base2);
+ m128 result = max_u8_m128(in1, in2);
+ EXPECT_TRUE(!diff128(result, loadu128(expec)));
+}
+
+TEST(SimdUtilsTest, min_u8_m128) {
+ char base1[] = "0123456789ABCDE\xfe";
+ char base2[] = "!!23455889aBCd\xff\xff";
+ char expec[] = "!!23455789ABCDE\xfe";
+ m128 in1 = loadu128(base1);
+ m128 in2 = loadu128(base2);
+ m128 result = min_u8_m128(in1, in2);
+ EXPECT_TRUE(!diff128(result, loadu128(expec)));
+}
+
+TEST(SimdUtilsTest, sadd_u8_m128) {
+ unsigned char base1[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4',
+ '1', '2', '3', '4', '1', '2', '3', '4'};
+ unsigned char base2[] = {'a', 0x80, 'b', 'A', 0x10, 0x10, 0x10, 0x10,
+ 0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0};
+ unsigned char expec[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D',
+ 'a', 'b', 'c', 'd', '1', '2', '3', '4'};
+ m128 in1 = loadu128(base1);
+ m128 in2 = loadu128(base2);
+ m128 result = sadd_u8_m128(in1, in2);
+ EXPECT_TRUE(!diff128(result, loadu128(expec)));
+}
+
+TEST(SimdUtilsTest, sub_u8_m128) {
+ unsigned char base1[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D',
+ 'a', 'b', 'c', 'd', '1', '2', '3', '4'};
+ unsigned char base2[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4',
+ '1', '2', '3', '4', '1', '2', '3', '4'};
+ unsigned char expec[] = {'a', 0x7f, 0, 'A', 0x10, 0x10, 0x10, 0x10,
+ 0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0};
+ m128 in1 = loadu128(base1);
+ m128 in2 = loadu128(base2);
+ m128 result = sub_u8_m128(in1, in2);
+ EXPECT_TRUE(!diff128(result, loadu128(expec)));
+}
+
} // namespace