From: George Wort Date: Mon, 28 Jun 2021 15:29:43 +0000 (+0100) Subject: Implement new Vermicelli16 acceleration functions using SVE2. X-Git-Tag: v5.4.3+vectorscan~57 X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=df926ef62fd12ab332ad1c7ea55a1f865d42e3bc;p=thirdparty%2Fvectorscan.git Implement new Vermicelli16 acceleration functions using SVE2. The scheme utilises the MATCH and NMATCH instructions to scan for 16 characters at the same rate as vermicelli scans for one. Change-Id: Ie2cef904c56651e6108593c668e9b65bc001a886 --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bfb78dc..f246932c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -879,6 +879,8 @@ SET (hs_compile_SRCS src/nfa/tamaramacompile.h src/nfa/trufflecompile.cpp src/nfa/trufflecompile.h + src/nfa/vermicellicompile.cpp + src/nfa/vermicellicompile.h src/nfagraph/ng.cpp src/nfagraph/ng.h src/nfagraph/ng_anchored_acyclic.cpp diff --git a/src/hwlm/hwlm.c b/src/hwlm/hwlm.c index 8cf585a9..c1c2837f 100644 --- a/src/hwlm/hwlm.c +++ b/src/hwlm/hwlm.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -62,6 +63,11 @@ const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr, DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n", aux->dverm.c1, aux->dverm.c2); return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 1, ptr, end); +#ifdef HAVE_SVE2 + case ACCEL_VERM16: + DEBUG_PRINTF("single vermicelli16\n"); + return vermicelli16Exec(aux->verm16.mask, ptr, end); +#endif // HAVE_SVE2 case ACCEL_SHUFTI: DEBUG_PRINTF("single shufti\n"); return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end); diff --git a/src/nfa/accel.c b/src/nfa/accel.c index 2bc60945..8c9b6e72 100644 --- a/src/nfa/accel.c +++ b/src/nfa/accel.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -81,6 +82,17 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { c_end - 1); break; +#ifdef HAVE_SVE2 + case ACCEL_VERM16: + DEBUG_PRINTF("accel verm16 %p %p\n", c, c_end); + if (c_end - c < 16) { + return c; + } + + rv = vermicelli16Exec(accel->verm16.mask, c, c_end); + break; +#endif // HAVE_SVE2 + case ACCEL_DVERM_MASKED: DEBUG_PRINTF("accel dverm masked %p %p\n", c, c_end); if (c + 16 + 1 >= c_end) { diff --git a/src/nfa/accel.h b/src/nfa/accel.h index 3a03d059..0676239a 100644 --- a/src/nfa/accel.h +++ b/src/nfa/accel.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -62,6 +63,7 @@ enum AccelType { ACCEL_TRUFFLE, ACCEL_RED_TAPE, ACCEL_DVERM_MASKED, + ACCEL_VERM16 }; /** \brief Structure for accel framework. */ @@ -97,6 +99,11 @@ union AccelAux { u8 len1; u8 len2; } mdverm; + struct { + u8 accel_type; + u8 offset; + m128 mask; + } verm16; struct { u8 accel_type; u8 offset; diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp index 16a19f80..cfca9397 100644 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +34,7 @@ #include "nfagraph/ng_limex_accel.h" #include "shufticompile.h" #include "trufflecompile.h" +#include "vermicellicompile.h" #include "util/accel_scheme.h" #include "util/charreach.h" #include "util/container.h" @@ -514,6 +516,15 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, return; } +#ifdef HAVE_SVE2 + if (info.cr.count() <= 16) { + accel->accel_type = ACCEL_VERM16; + vermicelli16Build(info.cr, (u8 *)&accel->verm16.mask); + DEBUG_PRINTF("state %hu is vermicelli16\n", this_idx); + return; + } +#endif // HAVE_SVE2 + if (info.cr.count() > max_floating_stop_char()) { accel->accel_type = ACCEL_NONE; DEBUG_PRINTF("state %hu is too broad\n", this_idx); diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index a224410d..f68ed1b9 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,6 +30,7 @@ #include "accel.h" #include "accelcompile.h" #include "shufticompile.h" +#include "vermicellicompile.h" #include "trufflecompile.h" #include "nfagraph/ng_limex_accel.h" /* for constants */ #include "util/bitutils.h" @@ -71,6 +73,16 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { return; } +#ifdef HAVE_SVE2 + if (outs <= 16) { + aux->accel_type = ACCEL_VERM16; + aux->verm16.offset = offset; + vermicelli16Build(info.single_stops, (u8 *)&aux->verm16.mask); + DEBUG_PRINTF("building vermicelli16\n"); + return; + } +#endif + DEBUG_PRINTF("attempting shufti for %zu chars\n", outs); if (-1 != shuftiBuildMasks(info.single_stops, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { diff --git a/src/nfa/castle.c b/src/nfa/castle.c index 7c158b31..dc6ec8f9 100644 --- a/src/nfa/castle.c +++ b/src/nfa/castle.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -552,6 +553,42 @@ char castleScanNVerm(const struct Castle *c, const u8 *buf, const size_t begin, return 1; } +#ifdef HAVE_SVE2 + +static really_inline +char castleScanVerm16(const struct Castle *c, const u8 *buf, const size_t begin, + const size_t end, size_t *loc) { + const u8 *ptr = vermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleScanNVerm16(const struct Castle *c, const u8 *buf, const size_t begin, + const size_t end, size_t *loc) { + const u8 *ptr = nvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +#endif // HAVE_SVE2 + static really_inline char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin, const size_t end, size_t *loc) { @@ -604,6 +641,12 @@ char castleScan(const struct Castle *c, const u8 *buf, const size_t begin, return castleScanVerm(c, buf, begin, end, loc); case CASTLE_NVERM: return castleScanNVerm(c, buf, begin, end, loc); +#ifdef HAVE_SVE2 + case CASTLE_VERM16: + return castleScanVerm16(c, buf, begin, end, loc); + case CASTLE_NVERM16: + return castleScanNVerm16(c, buf, begin, end, loc); +#endif // HAVE_SVE2 case CASTLE_SHUFTI: return castleScanShufti(c, buf, begin, end, loc); case CASTLE_TRUFFLE: @@ -647,6 +690,42 @@ char castleRevScanNVerm(const struct Castle *c, const u8 *buf, return 1; } +#ifdef HAVE_SVE2 + +static really_inline +char castleRevScanVerm16(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScanNVerm16(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rnvermicelli16Exec(c->u.verm16.mask, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +#endif // HAVE_SVE2 + static really_inline char castleRevScanShufti(const struct Castle *c, const u8 *buf, const size_t begin, const size_t end, size_t *loc) { @@ -699,6 +778,12 @@ char castleRevScan(const struct Castle *c, const u8 *buf, const size_t begin, return castleRevScanVerm(c, buf, begin, end, loc); case CASTLE_NVERM: return castleRevScanNVerm(c, buf, begin, end, loc); +#ifdef HAVE_SVE2 + case CASTLE_VERM16: + return castleRevScanVerm16(c, buf, begin, end, loc); + case CASTLE_NVERM16: + return castleRevScanNVerm16(c, buf, begin, end, loc); +#endif // HAVE_SVE2 case CASTLE_SHUFTI: return castleRevScanShufti(c, buf, begin, end, loc); case CASTLE_TRUFFLE: diff --git a/src/nfa/castle_internal.h b/src/nfa/castle_internal.h index 429c232f..ea135f8d 100644 --- a/src/nfa/castle_internal.h +++ b/src/nfa/castle_internal.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -52,6 +53,8 @@ struct SubCastle { #define CASTLE_NVERM 2 #define CASTLE_SHUFTI 3 #define CASTLE_TRUFFLE 4 +#define CASTLE_VERM16 5 +#define CASTLE_NVERM16 6 enum ExclusiveType { NOT_EXCLUSIVE, //!< no subcastles are exclusive @@ -129,6 +132,9 @@ struct ALIGN_AVX_DIRECTIVE Castle { struct { char c; } verm; + struct { + m128 mask; + } verm16; struct { m128 mask_lo; m128 mask_hi; diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 20bc2925..56b12700 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,6 +40,7 @@ #include "repeatcompile.h" #include "shufticompile.h" #include "trufflecompile.h" +#include "vermicellicompile.h" #include "nfagraph/ng_dump.h" #include "nfagraph/ng_equivalence.h" #include "nfagraph/ng_repeat.h" @@ -101,6 +103,19 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) { return; } +#ifdef HAVE_SVE2 + if (cr.count() <= 16) { + c->type = CASTLE_NVERM16; + vermicelli16Build(cr, (u8 *)&c->u.verm16.mask); + return; + } + if (negated.count() <= 16) { + c->type = CASTLE_VERM16; + vermicelli16Build(negated, (u8 *)&c->u.verm16.mask); + return; + } +#endif // HAVE_SVE2 + if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo, (u8 *)&c->u.shuf.mask_hi) != -1) { c->type = CASTLE_SHUFTI; diff --git a/src/nfa/lbr.c b/src/nfa/lbr.c index d403733a..2c6ea163 100644 --- a/src/nfa/lbr.c +++ b/src/nfa/lbr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -361,6 +362,56 @@ char lbrRevScanNVerm(const struct NFA *nfa, const u8 *buf, return 1; } +#ifdef HAVE_SVE2 + +static really_inline +char lbrRevScanVerm16(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); + assert(nfa->type == LBR_NFA_VERM16); + const struct lbr_verm16 *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = rvermicelli16Exec(l->mask, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char lbrRevScanNVerm16(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); + assert(nfa->type == LBR_NFA_NVERM16); + const struct lbr_verm16 *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = rnvermicelli16Exec(l->mask, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +#endif // HAVE_SVE2 + static really_inline char lbrRevScanShuf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, @@ -467,6 +518,56 @@ char lbrFwdScanNVerm(const struct NFA *nfa, const u8 *buf, return 1; } +#ifdef HAVE_SVE2 + +static really_inline +char lbrFwdScanVerm16(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); + assert(nfa->type == LBR_NFA_VERM16); + const struct lbr_verm16 *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = vermicelli16Exec(l->mask, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char lbrFwdScanNVerm16(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); + assert(nfa->type == LBR_NFA_NVERM16); + const struct lbr_verm16 *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = nvermicelli16Exec(l->mask, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = ptr - buf; + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +#endif // HAVE_SVE2 + static really_inline char lbrFwdScanShuf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, @@ -524,6 +625,16 @@ char lbrFwdScanTruf(const struct NFA *nfa, const u8 *buf, #define ENGINE_ROOT_NAME NVerm #include "lbr_common_impl.h" +#ifdef HAVE_SVE2 + +#define ENGINE_ROOT_NAME Verm16 +#include "lbr_common_impl.h" + +#define ENGINE_ROOT_NAME NVerm16 +#include "lbr_common_impl.h" + +#endif // HAVE_SVE2 + #define ENGINE_ROOT_NAME Shuf #include "lbr_common_impl.h" diff --git a/src/nfa/lbr.h b/src/nfa/lbr.h index a9e42046..b6718c05 100644 --- a/src/nfa/lbr.h +++ b/src/nfa/lbr.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -101,6 +102,52 @@ char nfaExecLbrNVerm_expandState(const struct NFA *nfa, void *dest, #define nfaExecLbrNVerm_B_Reverse NFA_API_NO_IMPL #define nfaExecLbrNVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL +#ifdef HAVE_SVE2 + +// LBR Verm16 + +char nfaExecLbrVerm16_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrVerm16_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrVerm16_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecLbrVerm16_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecLbrVerm16_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecLbrVerm16_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecLbrVerm16_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecLbrVerm16_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecLbrVerm16_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecLbrVerm16_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecLbrVerm16_testEOD NFA_API_NO_IMPL +#define nfaExecLbrVerm16_B_Reverse NFA_API_NO_IMPL +#define nfaExecLbrVerm16_zombie_status NFA_API_ZOMBIE_NO_IMPL + +// LBR Negated Verm16 + +char nfaExecLbrNVerm16_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrNVerm16_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrNVerm16_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecLbrNVerm16_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecLbrNVerm16_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecLbrNVerm16_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecLbrNVerm16_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecLbrNVerm16_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecLbrNVerm16_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecLbrNVerm16_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecLbrNVerm16_testEOD NFA_API_NO_IMPL +#define nfaExecLbrNVerm16_B_Reverse NFA_API_NO_IMPL +#define nfaExecLbrNVerm16_zombie_status NFA_API_ZOMBIE_NO_IMPL + +#endif // HAVE_SVE2 + // LBR Shuf char nfaExecLbrShuf_Q(const struct NFA *n, struct mq *q, s64a end); diff --git a/src/nfa/lbr_internal.h b/src/nfa/lbr_internal.h index 8ba11dd4..beb1a50b 100644 --- a/src/nfa/lbr_internal.h +++ b/src/nfa/lbr_internal.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -56,6 +57,11 @@ struct lbr_verm { char c; //!< escape char }; +struct lbr_verm16 { + struct lbr_common common; + m128 mask; +}; + struct lbr_shuf { struct lbr_common common; m128 mask_lo; //!< shufti lo mask for escape chars diff --git a/src/nfa/mpv.c b/src/nfa/mpv.c index 552754d6..5829d43d 100644 --- a/src/nfa/mpv.c +++ b/src/nfa/mpv.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -260,6 +261,13 @@ size_t limitByReach(const struct mpv_kilopuff *kp, const u8 *buf, } else if (kp->type == MPV_NVERM) { return nvermicelliExec(kp->u.verm.c, 0, buf, buf + length) - buf; } +#ifdef HAVE_SVE2 + else if (kp->type == MPV_VERM16) { + return vermicelli16Exec(kp->u.verm16.mask, buf, buf + length) - buf; + } else if (kp->type == MPV_NVERM16) { + return nvermicelli16Exec(kp->u.verm16.mask, buf, buf + length) - buf; + } +#endif // HAVE_SVE2 assert(kp->type == MPV_DOT); return length; diff --git a/src/nfa/mpv_internal.h b/src/nfa/mpv_internal.h index a52853dc..b6b92504 100644 --- a/src/nfa/mpv_internal.h +++ b/src/nfa/mpv_internal.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +37,8 @@ #define MPV_SHUFTI 2 #define MPV_TRUFFLE 3 #define MPV_NVERM 4 +#define MPV_VERM16 5 +#define MPV_NVERM16 6 struct mpv_puffette { u32 repeats; @@ -65,6 +68,9 @@ struct mpv_kilopuff { struct { char c; } verm; + struct { + m128 mask; + } verm16; struct { m128 mask_lo; m128 mask_hi; diff --git a/src/nfa/mpvcompile.cpp b/src/nfa/mpvcompile.cpp index 5e59c04e..d85c90b0 100644 --- a/src/nfa/mpvcompile.cpp +++ b/src/nfa/mpvcompile.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +34,7 @@ #include "nfa_internal.h" #include "shufticompile.h" #include "trufflecompile.h" +#include "vermicellicompile.h" #include "util/alloc.h" #include "util/multibit_build.h" #include "util/order_check.h" @@ -175,6 +177,14 @@ void writeKiloPuff(const map>::const_iterator &it, size_t set = reach.find_first(); assert(set != CharReach::npos); kp->u.verm.c = (char)set; +#ifdef HAVE_SVE2 + } else if (reach.count() >= 240) { + kp->type = MPV_VERM16; + vermicelli16Build(~reach, (u8 *)&kp->u.verm16.mask); + } else if (reach.count() <= 16) { + kp->type = MPV_NVERM16; + vermicelli16Build(reach, (u8 *)&kp->u.verm16.mask); +#endif // HAVE_SVE2 } else if (shuftiBuildMasks(~reach, (u8 *)&kp->u.shuf.mask_lo, (u8 *)&kp->u.shuf.mask_hi) != -1) { kp->type = MPV_SHUFTI; diff --git a/src/nfa/nfa_api_dispatch.c b/src/nfa/nfa_api_dispatch.c index 75cac4b4..6785e939 100644 --- a/src/nfa/nfa_api_dispatch.c +++ b/src/nfa/nfa_api_dispatch.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -53,6 +54,14 @@ // general framework calls +#ifdef HAVE_SVE2 +#define VERM16_CASES(dbnt_func) \ + DISPATCH_CASE(LBR_NFA_VERM16, LbrVerm16, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_NVERM16, LbrNVerm16, dbnt_func); +#else +#define VERM16_CASES(dbnt_func) +#endif + #define DISPATCH_BY_NFA_TYPE(dbnt_func) \ switch (nfa->type) { \ DISPATCH_CASE(LIMEX_NFA_32, LimEx32, dbnt_func); \ @@ -80,6 +89,7 @@ DISPATCH_CASE(SHENG_NFA_64, Sheng64, dbnt_func); \ DISPATCH_CASE(MCSHENG_64_NFA_8, McSheng64_8, dbnt_func); \ DISPATCH_CASE(MCSHENG_64_NFA_16, McSheng64_16, dbnt_func); \ + VERM16_CASES(dbnt_func) \ default: \ assert(0); \ } diff --git a/src/nfa/nfa_build_util.cpp b/src/nfa/nfa_build_util.cpp index 47153163..ed0e2f01 100644 --- a/src/nfa/nfa_build_util.cpp +++ b/src/nfa/nfa_build_util.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -340,6 +341,42 @@ const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = const char *NFATraits::name = "Lim Bounded Repeat (NV)"; #endif +#ifdef HAVE_SVE2 + +template<> struct NFATraits { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 8; + static const bool fast = true; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits::name = "Lim Bounded Repeat (V16)"; +#endif + +template<> struct NFATraits { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 8; + static const bool fast = true; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits::name = "Lim Bounded Repeat (NV16)"; +#endif + +#endif // HAVE_SVE2 + template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; diff --git a/src/nfa/nfa_internal.h b/src/nfa/nfa_internal.h index ad27e28b..f7155aef 100644 --- a/src/nfa/nfa_internal.h +++ b/src/nfa/nfa_internal.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -65,6 +66,10 @@ enum NFAEngineType { LBR_NFA_DOT, /**< magic pseudo nfa */ LBR_NFA_VERM, /**< magic pseudo nfa */ LBR_NFA_NVERM, /**< magic pseudo nfa */ +#ifdef HAVE_SVE2 + LBR_NFA_VERM16, /**< magic pseudo nfa */ + LBR_NFA_NVERM16, /**< magic pseudo nfa */ +#endif // HAVE_SVE2 LBR_NFA_SHUF, /**< magic pseudo nfa */ LBR_NFA_TRUF, /**< magic pseudo nfa */ CASTLE_NFA, /**< magic pseudo nfa */ @@ -218,6 +223,9 @@ static really_inline int isNfaType(u8 t) { static really_inline int isLbrType(u8 t) { return t == LBR_NFA_DOT || t == LBR_NFA_VERM || t == LBR_NFA_NVERM || +#ifdef HAVE_SVE2 + t == LBR_NFA_VERM16 || t == LBR_NFA_NVERM16 || +#endif // HAVE_SVE2 t == LBR_NFA_SHUF || t == LBR_NFA_TRUF; } diff --git a/src/nfa/vermicelli_sve.h b/src/nfa/vermicelli_sve.h index 6a76f671..cadaac8e 100644 --- a/src/nfa/vermicelli_sve.h +++ b/src/nfa/vermicelli_sve.h @@ -232,10 +232,9 @@ const u8 *rdvermSearchLoopBody(svuint16_t chars, const u8 *buf) { } static really_inline -const u8 *vermSearch(char c, bool nocase, const u8 *buf, const u8 *buf_end, +const u8 *vermSearch(svuint8_t chars, const u8 *buf, const u8 *buf_end, bool negate) { assert(buf < buf_end); - svuint8_t chars = getCharMaskSingle(c, nocase); size_t len = buf_end - buf; if (len <= svcntb()) { return vermSearchOnce(chars, buf, buf_end, negate); @@ -267,10 +266,9 @@ const u8 *vermSearch(char c, bool nocase, const u8 *buf, const u8 *buf_end, } static really_inline -const u8 *rvermSearch(char c, bool nocase, const u8 *buf, const u8 *buf_end, +const u8 *rvermSearch(svuint8_t chars, const u8 *buf, const u8 *buf_end, bool negate) { assert(buf < buf_end); - svuint8_t chars = getCharMaskSingle(c, nocase); size_t len = buf_end - buf; if (len <= svcntb()) { return rvermSearchOnce(chars, buf, buf_end, negate); @@ -353,7 +351,8 @@ const u8 *vermicelliExec(char c, bool nocase, const u8 *buf, const u8 *buf_end) { DEBUG_PRINTF("verm scan %s\\x%02hhx over %td bytes\n", nocase ? "nocase " : "", c, buf_end - buf); - const u8 *ptr = vermSearch(c, nocase, buf, buf_end, false); + svuint8_t chars = getCharMaskSingle(c, nocase); + const u8 *ptr = vermSearch(chars, buf, buf_end, false); return ptr ? ptr : buf_end; } @@ -364,7 +363,8 @@ const u8 *nvermicelliExec(char c, bool nocase, const u8 *buf, const u8 *buf_end) { DEBUG_PRINTF("nverm scan %s\\x%02hhx over %td bytes\n", nocase ? "nocase " : "", c, buf_end - buf); - const u8 *ptr = vermSearch(c, nocase, buf, buf_end, true); + svuint8_t chars = getCharMaskSingle(c, nocase); + const u8 *ptr = vermSearch(chars, buf, buf_end, true); return ptr ? ptr : buf_end; } @@ -375,7 +375,8 @@ const u8 *rvermicelliExec(char c, bool nocase, const u8 *buf, const u8 *buf_end) { DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %td bytes\n", nocase ? "nocase " : "", c, buf_end - buf); - const u8 *ptr = rvermSearch(c, nocase, buf, buf_end, false); + svuint8_t chars = getCharMaskSingle(c, nocase); + const u8 *ptr = rvermSearch(chars, buf, buf_end, false); return ptr ? ptr : buf - 1; } @@ -386,7 +387,8 @@ const u8 *rnvermicelliExec(char c, bool nocase, const u8 *buf, const u8 *buf_end) { DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %td bytes\n", nocase ? "nocase " : "", c, buf_end - buf); - const u8 *ptr = rvermSearch(c, nocase, buf, buf_end, true); + svuint8_t chars = getCharMaskSingle(c, nocase); + const u8 *ptr = rvermSearch(chars, buf, buf_end, true); return ptr ? ptr : buf - 1; } @@ -427,4 +429,45 @@ const u8 *rvermicelliDoubleExec(char c1, char c2, bool nocase, const u8 *buf, } } return buf - 1; +} + +static really_inline +svuint8_t getDupSVEMaskFrom128(m128 _mask) { + return svld1rq_u8(svptrue_b8(), (const uint8_t *)&_mask); +} + +static really_inline +const u8 *vermicelli16Exec(const m128 _chars, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("verm16 scan over %td bytes\n", buf_end - buf); + svuint8_t chars = getDupSVEMaskFrom128(_chars); + const u8 *ptr = vermSearch(chars, buf, buf_end, false); + return ptr ? ptr : buf_end; +} + +static really_inline +const u8 *nvermicelli16Exec(const m128 _chars, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("nverm16 scan over %td bytes\n", buf_end - buf); + svuint8_t chars = getDupSVEMaskFrom128(_chars); + const u8 *ptr = vermSearch(chars, buf, buf_end, true); + return ptr ? ptr : buf_end; +} + +static really_inline +const u8 *rvermicelli16Exec(const m128 _chars, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("rverm16 scan over %td bytes\n", buf_end - buf); + svuint8_t chars = getDupSVEMaskFrom128(_chars); + const u8 *ptr = rvermSearch(chars, buf, buf_end, false); + return ptr ? ptr : buf - 1; +} + +static really_inline +const u8 *rnvermicelli16Exec(const m128 _chars, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("rnverm16 scan over %td bytes\n", buf_end - buf); + svuint8_t chars = getDupSVEMaskFrom128(_chars); + const u8 *ptr = rvermSearch(chars, buf, buf_end, true); + return ptr ? ptr : buf - 1; } \ No newline at end of file diff --git a/src/nfa/vermicellicompile.cpp b/src/nfa/vermicellicompile.cpp new file mode 100644 index 00000000..5b6ca036 --- /dev/null +++ b/src/nfa/vermicellicompile.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Vermicelli acceleration: compile code. + */ +#include "vermicellicompile.h" +#include "util/charreach.h" + +#include + +namespace ue2 { + +bool vermicelli16Build(const CharReach &chars, u8 *rv) { + size_t i = chars.find_first(); + u8 arr[16]; + std::memset(arr, i, sizeof(arr)); + size_t count = 1; + for (i = chars.find_next(i); i != CharReach::npos; i = chars.find_next(i)) { + if (count == sizeof(arr)) return false; + arr[count] = i; + ++count; + } + std::memcpy(rv, arr, sizeof(arr)); + return true; +} + +} // namespace ue2 diff --git a/src/nfa/vermicellicompile.h b/src/nfa/vermicellicompile.h new file mode 100644 index 00000000..5c70100a --- /dev/null +++ b/src/nfa/vermicellicompile.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021, Arm Limited + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Vermicelli acceleration: compile code. + */ + +#ifndef VERM_COMPILE_H +#define VERM_COMPILE_H + +#include "ue2common.h" +#include "util/charreach.h" +#include "util/flat_containers.h" + +#include + +namespace ue2 { + +bool vermicelli16Build(const CharReach &chars, u8 *rv); + +} // namespace ue2 + +#endif // VERM_COMPILE_H diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index d8ba503c..ca3a1a2e 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,6 +44,7 @@ #include "nfa/repeatcompile.h" #include "nfa/shufticompile.h" #include "nfa/trufflecompile.h" +#include "nfa/vermicellicompile.h" #include "util/alloc.h" #include "util/bitutils.h" // for lg2 #include "util/compile_context.h" @@ -209,6 +211,56 @@ bytecode_ptr buildLbrNVerm(const CharReach &cr, const depth &repeatMin, return nfa; } +#ifdef HAVE_SVE2 + +static +bytecode_ptr buildLbrVerm16(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { + const CharReach escapes(~cr); + + if (escapes.count() > 16) { + return nullptr; + } + + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); + auto nfa = makeLbrNfa(LBR_NFA_VERM16, rtype, repeatMax); + struct lbr_verm16 *lv = (struct lbr_verm16 *)getMutableImplNfa(nfa.get()); + vermicelli16Build(escapes, (u8 *)&lv->mask); + + fillNfa(nfa.get(), &lv->common, report, repeatMin, repeatMax, + minPeriod, rtype); + + DEBUG_PRINTF("built verm16 lbr\n"); + return nfa; +} + +static +bytecode_ptr buildLbrNVerm16(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { + const CharReach escapes(cr); + + if (escapes.count() > 16) { + return nullptr; + } + + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); + auto nfa = makeLbrNfa(LBR_NFA_NVERM16, rtype, repeatMax); + struct lbr_verm16 *lv = (struct lbr_verm16 *)getMutableImplNfa(nfa.get()); + vermicelli16Build(escapes, (u8 *)&lv->mask); + + fillNfa(nfa.get(), &lv->common, report, repeatMin, repeatMax, + minPeriod, rtype); + + DEBUG_PRINTF("built negated verm16 lbr\n"); + return nfa; +} + +#endif // HAVE_SVE2 + static bytecode_ptr buildLbrShuf(const CharReach &cr, const depth &repeatMin, const depth &repeatMax, u32 minPeriod, @@ -269,6 +321,16 @@ bytecode_ptr constructLBR(const CharReach &cr, const depth &repeatMin, nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, report); } +#ifdef HAVE_SVE2 + if (!nfa) { + nfa = buildLbrVerm16(cr, repeatMin, repeatMax, minPeriod, is_reset, + report); + } + if (!nfa) { + nfa = buildLbrNVerm16(cr, repeatMin, repeatMax, minPeriod, is_reset, + report); + } +#endif // HAVE_SVE2 if (!nfa) { nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset, report); diff --git a/src/rose/rose_build_lit_accel.cpp b/src/rose/rose_build_lit_accel.cpp index 62f660fb..7286fddb 100644 --- a/src/rose/rose_build_lit_accel.cpp +++ b/src/rose/rose_build_lit_accel.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +37,7 @@ #include "nfa/accel.h" #include "nfa/shufticompile.h" #include "nfa/trufflecompile.h" +#include "nfa/vermicellicompile.h" #include "util/compare.h" #include "util/dump_charclass.h" #include "util/ue2string.h" @@ -440,6 +442,17 @@ void findForwardAccelScheme(const vector &lits, } const CharReach &cr = reach[min_offset]; +#ifdef HAVE_SVE2 + if (min_count <= 16) { + vermicelli16Build(cr, (u8 *)&aux->verm16.mask); + DEBUG_PRINTF("built verm16 for %s (%zu chars, offset %u)\n", + describeClass(cr).c_str(), cr.count(), min_offset); + aux->verm16.accel_type = ACCEL_VERM16; + aux->verm16.offset = verify_u8(min_offset); + return; + } +#endif // HAVE_SVE2 + if (-1 != shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n", diff --git a/unit/internal/rvermicelli.cpp b/unit/internal/rvermicelli.cpp index 497ffe07..2806c5d8 100644 --- a/unit/internal/rvermicelli.cpp +++ b/unit/internal/rvermicelli.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -304,3 +305,267 @@ TEST(RDoubleVermicelli, Exec5) { } } } + +#ifdef HAVE_SVE2 + +#include "nfa/vermicellicompile.h" +using namespace ue2; + +union Matches { + u8 val8[16]; + m128 val128; +}; + +TEST(RVermicelli16, ExecNoMatch1) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + + CharReach chars; + chars.set('a'); + chars.set('B'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + for (size_t j = 0; j < 16; j++) { + const u8 *begin = (const u8 *)t1 + i; + const u8 *end = (const u8 *)t1 + strlen(t1) - j; + + const u8 *rv = rvermicelli16Exec(matches.val128, begin, end); + ASSERT_EQ(begin - 1, rv); + } + } +} + +TEST(RVermicelli16, Exec1) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rvermicelli16Exec(matches.val128, buf, buf + strlen(t1) - i); + ASSERT_EQ(buf + 48, rv); + } +} + +TEST(RVermicelli16, Exec2) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 48, rv); + } +} + +TEST(RVermicelli16, Exec3) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbaaaaaaaaaaaaaaaaaaaaaaAbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + Matches matches_a; + bool ret = vermicelli16Build(chars, matches_a.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rvermicelli16Exec(matches_a.val128, buf, buf + strlen(t1) - i); + ASSERT_EQ(buf + 47, rv); + + rv = rvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1) - i); + ASSERT_EQ(buf + 48, rv); + } +} + +TEST(RVermicelli16, Exec4) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + Matches matches_a; + bool ret = vermicelli16Build(chars, matches_a.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 31; i++) { + t1[16 + i] = 'a'; + const u8 *rv = rvermicelli16Exec(matches_a.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 16 + i, rv); + + rv = rvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 16 + i, rv); + } +} + +TEST(RVermicelli16, Exec5) { + char t1[] = "qqqqqqqqqqqqqqqqqabcdefghijklmnopqqqqqqqqqqqqqqqqqqqqq"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + Matches matches[16]; + bool ret; + + for (int i = 0; i < 16; ++i) { + chars.set('a' + i); + ret = vermicelli16Build(chars, matches[i].val8); + ASSERT_TRUE(ret); + } + + for (int j = 0; j < 16; ++j) { + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rvermicelli16Exec(matches[j].val128, buf, buf + strlen(t1) - i); + ASSERT_EQ(buf + j + 17, rv); + } + } +} + +TEST(RNVermicelli16, ExecNoMatch1) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + chars.set('B'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + for (size_t j = 0; j < 16; j++) { + const u8 *rv = rnvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - j); + ASSERT_EQ(buf + i - 1, rv); + } + } +} + +TEST(RNVermicelli16, Exec1) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rnvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - i); + ASSERT_EQ(buf + 48, rv); + } +} + +TEST(RNVermicelli16, Exec2) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rnvermicelli16Exec(matches.val128, buf, buf + strlen(t1) - i); + ASSERT_EQ(buf + 48, rv); + } +} + +TEST(RNVermicelli16, Exec3) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbaaaaaaaaaaaaaaaaaaaaaaAbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + Matches matches_b; + bool ret = vermicelli16Build(chars, matches_b.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rnvermicelli16Exec(matches_b.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 48, rv); + + rv = rnvermicelli16Exec(matches_A.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 47, rv); + } +} + +TEST(RNVermicelli16, Exec4) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + Matches matches_b; + bool ret = vermicelli16Build(chars, matches_b.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 31; i++) { + t1[16 + i] = 'a'; + const u8 *rv = rnvermicelli16Exec(matches_b.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 16 + i, rv); + + rv = rnvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 16 + i, rv); + } +} + +TEST(RNVermicelli16, Exec5) { + char t1[] = "aaaaaaaaaaaaaaaaaabcdefghijklmnopqqqqqqqqqqqqqqqqqqqqqqqq"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + Matches matches[16]; + bool ret; + + for (int i = 0; i < 16; ++i) { + chars.set('q' - i); + ret = vermicelli16Build(chars, matches[i].val8); + ASSERT_TRUE(ret); + } + + for (int j = 0; j < 16; ++j) { + for (size_t i = 0; i < 16; i++) { + const u8 *rv = rnvermicelli16Exec(matches[j].val128, buf, buf + strlen(t1) - i); + ASSERT_EQ(buf - j + 32, rv); + } + } +} + +#endif // HAVE_SVE2 \ No newline at end of file diff --git a/unit/internal/vermicelli.cpp b/unit/internal/vermicelli.cpp index 5e4a8253..bc007e1a 100644 --- a/unit/internal/vermicelli.cpp +++ b/unit/internal/vermicelli.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2021, Arm Limited * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -522,3 +523,264 @@ TEST(DoubleVermicelliMasked, Exec4) { } } +#ifdef HAVE_SVE2 + +#include "nfa/vermicellicompile.h" +using namespace ue2; + +union Matches { + u8 val8[16]; + m128 val128; +}; + +TEST(Vermicelli16, ExecNoMatch1) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + chars.set('B'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + for (size_t j = 0; j < 16; j++) { + const u8 *rv = vermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - j); + ASSERT_EQ(buf + strlen(t1) - j, rv); + } + } +} + +TEST(Vermicelli16, Exec1) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 17, rv); + } +} + +TEST(Vermicelli16, Exec2) { + char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbabbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 17, rv); + } +} + +TEST(Vermicelli16, Exec3) { + char t1[] = "bbbbbbbbbbbbbbbbbAaaaaaaaaaaaaaaaaaaaaaabbbbbbbbabbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + Matches matches_a; + bool ret = vermicelli16Build(chars, matches_a.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelli16Exec(matches_a.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 18, rv); + + rv = vermicelli16Exec(matches_A.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 17, rv); + } +} + +TEST(Vermicelli16, Exec4) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('a'); + Matches matches_a; + bool ret = vermicelli16Build(chars, matches_a.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 31; i++) { + t1[48 - i] = 'a'; + const u8 *rv = vermicelli16Exec(matches_a.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 48 - i, rv); + + rv = vermicelli16Exec(matches_A.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 48 - i, rv); + } +} + +TEST(Vermicelli16, Exec5) { + char t1[] = "qqqqqqqqqqqqqqqqqabcdefghijklmnopqqqqqqqqqqqqqqqqqqqqq"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + Matches matches[16]; + bool ret; + + for (int i = 0; i < 16; ++i) { + chars.set('p' - i); + ret = vermicelli16Build(chars, matches[i].val8); + ASSERT_TRUE(ret); + } + + for (int j = 0; j < 16; ++j) { + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelli16Exec(matches[j].val128, buf + i,buf + strlen(t1)); + ASSERT_EQ(buf - j + 32, rv); + } + } +} + +TEST(NVermicelli16, ExecNoMatch1) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + chars.set('B'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + for (size_t j = 0; j < 16; j++) { + const u8 *rv = nvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - j); + ASSERT_EQ((buf + strlen(t1) - j), rv); + } + } +} + +TEST(NVermicelli16, Exec1) { + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = nvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 17, rv); + } +} + +TEST(NVermicelli16, Exec2) { + char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbabbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + chars.set('A'); + Matches matches; + bool ret = vermicelli16Build(chars, matches.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = nvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 17, rv); + } +} + +TEST(NVermicelli16, Exec3) { + char t1[] = "bbbbbbbbbbbbbbbbbAaaaaaaaaaaaaaaaaaaaaaabbbbbbbbabbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + Matches matches_b; + bool ret = vermicelli16Build(chars, matches_b.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = nvermicelli16Exec(matches_b.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 17, rv); + + rv = nvermicelli16Exec(matches_A.val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + 18, rv); + } +} + +TEST(NVermicelli16, Exec4) { + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + chars.set('b'); + Matches matches_b; + bool ret = vermicelli16Build(chars, matches_b.val8); + ASSERT_TRUE(ret); + + chars.set('A'); + Matches matches_A; + ret = vermicelli16Build(chars, matches_A.val8); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 31; i++) { + t1[48 - i] = 'a'; + const u8 *rv = nvermicelli16Exec(matches_b.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 48 - i, rv); + + rv = nvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1)); + ASSERT_EQ(buf + 48 - i, rv); + } +} + +TEST(NVermicelli16, Exec5) { + char t1[] = "aaaaaaaaaaaaaaaaaabcdefghijklmnopqaaaaaaaaaaaaaaaaaaaaa"; + const u8 *buf = (const u8 *)t1; + + CharReach chars; + Matches matches[16]; + bool ret; + + for (int i = 0; i < 16; ++i) { + chars.set('a' + i); + ret = vermicelli16Build(chars, matches[i].val8); + ASSERT_TRUE(ret); + } + + for (int j = 0; j < 16; ++j) { + for (size_t i = 0; i < 16; i++) { + const u8 *rv = nvermicelli16Exec(matches[j].val128, buf + i, buf + strlen(t1)); + ASSERT_EQ(buf + j + 18, rv); + } + } +} + +#endif // HAVE_SVE2 \ No newline at end of file