src/nfa/mpv.h
src/nfa/mpv.c
src/nfa/mpv_internal.h
- src/nfa/multiaccel_common.h
- src/nfa/multiaccel_doubleshift.h
- src/nfa/multiaccel_doubleshiftgrab.h
- src/nfa/multiaccel_long.h
- src/nfa/multiaccel_longgrab.h
- src/nfa/multiaccel_shift.h
- src/nfa/multiaccel_shiftgrab.h
- src/nfa/multishufti.c
- src/nfa/multishufti_avx2.h
- src/nfa/multishufti_sse.h
- src/nfa/multishufti.h
- src/nfa/multitruffle.c
- src/nfa/multitruffle_avx2.h
- src/nfa/multitruffle_sse.h
- src/nfa/multitruffle.h
- src/nfa/multivermicelli.c
- src/nfa/multivermicelli.h
- src/nfa/multivermicelli_sse.h
- src/nfa/multivermicelli_avx2.h
src/nfa/nfa_api.h
src/nfa/nfa_api_dispatch.c
src/nfa/nfa_internal.h
src/nfa/sheng_impl.h
src/nfa/sheng_impl4.h
src/nfa/sheng_internal.h
- src/nfa/shufti_common.h
src/nfa/shufti.c
src/nfa/shufti.h
src/nfa/tamarama.c
src/nfa/tamarama.h
src/nfa/tamarama_internal.h
- src/nfa/truffle_common.h
src/nfa/truffle.c
src/nfa/truffle.h
src/nfa/vermicelli.h
src/nfa/mpv_internal.h
src/nfa/mpvcompile.cpp
src/nfa/mpvcompile.h
- src/nfa/multiaccel_compilehelper.cpp
- src/nfa/multiaccel_compilehelper.h
src/nfa/nfa_api.h
src/nfa/nfa_api_queue.h
src/nfa/nfa_api_util.h
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include "shufti.h"
#include "truffle.h"
#include "vermicelli.h"
-#include "multishufti.h"
-#include "multitruffle.h"
-#include "multivermicelli.h"
#include "ue2common.h"
const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
rv = c_end;
break;
- /* multibyte matchers */
- case ACCEL_MLVERM:
- DEBUG_PRINTF("accel mlverm %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = long_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
- break;
- case ACCEL_MLVERM_NOCASE:
- DEBUG_PRINTF("accel mlverm nc %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = long_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
- break;
- case ACCEL_MLGVERM:
- DEBUG_PRINTF("accel mlgverm %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = longgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
- break;
- case ACCEL_MLGVERM_NOCASE:
- DEBUG_PRINTF("accel mlgverm nc %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = longgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
- break;
- case ACCEL_MSVERM:
- DEBUG_PRINTF("accel msverm %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = shift_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
- break;
- case ACCEL_MSVERM_NOCASE:
- DEBUG_PRINTF("accel msverm nc %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = shift_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
- break;
- case ACCEL_MSGVERM:
- DEBUG_PRINTF("accel msgverm %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = shiftgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
- break;
- case ACCEL_MSGVERM_NOCASE:
- DEBUG_PRINTF("accel msgverm nc %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = shiftgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
- break;
- case ACCEL_MDSVERM:
- DEBUG_PRINTF("accel mdsverm %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = doubleshift_vermicelliExec(accel->mdverm.c, 0, c, c_end,
- accel->mdverm.len1, accel->mdverm.len2);
- break;
- case ACCEL_MDSVERM_NOCASE:
- DEBUG_PRINTF("accel mdsverm nc %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = doubleshift_vermicelliExec(accel->mdverm.c, 1, c, c_end,
- accel->mdverm.len1, accel->mdverm.len2);
- break;
- case ACCEL_MDSGVERM:
- DEBUG_PRINTF("accel mdsgverm %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 0, c, c_end,
- accel->mdverm.len1, accel->mdverm.len2);
- break;
- case ACCEL_MDSGVERM_NOCASE:
- DEBUG_PRINTF("accel mdsgverm nc %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 1, c, c_end,
- accel->mdverm.len1, accel->mdverm.len2);
- break;
- case ACCEL_MLSHUFTI:
- DEBUG_PRINTF("accel mlshufti %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = long_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
- accel->mshufti.len);
- break;
- case ACCEL_MLGSHUFTI:
- DEBUG_PRINTF("accel mlgshufti %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = longgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
- accel->mshufti.len);
- break;
- case ACCEL_MSSHUFTI:
- DEBUG_PRINTF("accel msshufti %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = shift_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
- accel->mshufti.len);
- break;
- case ACCEL_MSGSHUFTI:
- DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = shiftgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
- accel->mshufti.len);
- break;
- case ACCEL_MDSSHUFTI:
- DEBUG_PRINTF("accel mdsshufti %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = doubleshift_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end,
- accel->mdshufti.len1, accel->mdshufti.len2);
- break;
- case ACCEL_MDSGSHUFTI:
- DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = doubleshiftgrab_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end,
- accel->mdshufti.len1, accel->mdshufti.len2);
- break;
- case ACCEL_MLTRUFFLE:
- DEBUG_PRINTF("accel mltruffle %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = long_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
- c, c_end, accel->mtruffle.len);
- break;
- case ACCEL_MLGTRUFFLE:
- DEBUG_PRINTF("accel mlgtruffle %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = longgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
- c, c_end, accel->mtruffle.len);
- break;
- case ACCEL_MSTRUFFLE:
- DEBUG_PRINTF("accel mstruffle %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = shift_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
- c, c_end, accel->mtruffle.len);
- break;
- case ACCEL_MSGTRUFFLE:
- DEBUG_PRINTF("accel msgtruffle %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = shiftgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
- c, c_end, accel->mtruffle.len);
- break;
- case ACCEL_MDSTRUFFLE:
- DEBUG_PRINTF("accel mdstruffle %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = doubleshift_truffleExec(accel->mdtruffle.mask1,
- accel->mdtruffle.mask2, c, c_end,
- accel->mdtruffle.len1,
- accel->mdtruffle.len2);
- break;
- case ACCEL_MDSGTRUFFLE:
- DEBUG_PRINTF("accel mdsgtruffle %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = doubleshiftgrab_truffleExec(accel->mdtruffle.mask1,
- accel->mdtruffle.mask2, c, c_end,
- accel->mdtruffle.len1,
- accel->mdtruffle.len2);
- break;
-
default:
assert(!"not here");
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
ACCEL_DSHUFTI,
ACCEL_TRUFFLE,
ACCEL_RED_TAPE,
- /* multibyte vermicellis */
- ACCEL_MLVERM,
- ACCEL_MLVERM_NOCASE,
- ACCEL_MLGVERM,
- ACCEL_MLGVERM_NOCASE,
- ACCEL_MSVERM,
- ACCEL_MSVERM_NOCASE,
- ACCEL_MSGVERM,
- ACCEL_MSGVERM_NOCASE,
- ACCEL_MDSVERM,
- ACCEL_MDSVERM_NOCASE,
- ACCEL_MDSGVERM,
- ACCEL_MDSGVERM_NOCASE,
- /* multibyte shuftis */
- ACCEL_MLSHUFTI,
- ACCEL_MLGSHUFTI,
- ACCEL_MSSHUFTI,
- ACCEL_MSGSHUFTI,
- ACCEL_MDSSHUFTI,
- ACCEL_MDSGSHUFTI,
- /* multibyte truffles */
- ACCEL_MLTRUFFLE,
- ACCEL_MLGTRUFFLE,
- ACCEL_MSTRUFFLE,
- ACCEL_MSGTRUFFLE,
- ACCEL_MDSTRUFFLE,
- ACCEL_MDSGTRUFFLE,
- /* masked dverm */
ACCEL_DVERM_MASKED,
-
};
/** \brief Structure for accel framework. */
m128 lo2;
m128 hi2;
} dshufti;
- struct {
- u8 accel_type;
- u8 offset;
- m128 lo;
- m128 hi;
- u8 len;
- } mshufti;
- struct {
- u8 accel_type;
- u8 offset;
- m128 lo;
- m128 hi;
- u8 len1;
- u8 len2;
- } mdshufti;
struct {
u8 accel_type;
u8 offset;
m128 mask1;
m128 mask2;
} truffle;
- struct {
- u8 accel_type;
- u8 offset;
- m128 mask1;
- m128 mask2;
- u8 len;
- } mtruffle;
- struct {
- u8 accel_type;
- u8 offset;
- m128 mask1;
- m128 mask2;
- u8 len1;
- u8 len2;
- } mdtruffle;
};
/**
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
return "truffle";
case ACCEL_RED_TAPE:
return "red tape";
- case ACCEL_MLVERM:
- return "multibyte long vermicelli";
- case ACCEL_MLVERM_NOCASE:
- return "multibyte long vermicelli nocase";
- case ACCEL_MLGVERM:
- return "multibyte long-grab vermicelli";
- case ACCEL_MLGVERM_NOCASE:
- return "multibyte long-grab vermicelli nocase";
- case ACCEL_MSVERM:
- return "multibyte shift vermicelli";
- case ACCEL_MSVERM_NOCASE:
- return "multibyte shift vermicelli nocase";
- case ACCEL_MSGVERM:
- return "multibyte shift-grab vermicelli";
- case ACCEL_MSGVERM_NOCASE:
- return "multibyte shift-grab vermicelli nocase";
- case ACCEL_MDSVERM:
- return "multibyte doubleshift vermicelli";
- case ACCEL_MDSVERM_NOCASE:
- return "multibyte doubleshift vermicelli nocase";
- case ACCEL_MDSGVERM:
- return "multibyte doubleshift-grab vermicelli";
- case ACCEL_MDSGVERM_NOCASE:
- return "multibyte doubleshift-grab vermicelli nocase";
- case ACCEL_MLSHUFTI:
- return "multibyte long shufti";
- case ACCEL_MLGSHUFTI:
- return "multibyte long-grab shufti";
- case ACCEL_MSSHUFTI:
- return "multibyte shift shufti";
- case ACCEL_MSGSHUFTI:
- return "multibyte shift-grab shufti";
- case ACCEL_MDSSHUFTI:
- return "multibyte doubleshift shufti";
- case ACCEL_MDSGSHUFTI:
- return "multibyte doubleshift-grab shufti";
- case ACCEL_MLTRUFFLE:
- return "multibyte long truffle";
- case ACCEL_MLGTRUFFLE:
- return "multibyte long-grab truffle";
- case ACCEL_MSTRUFFLE:
- return "multibyte shift truffle";
- case ACCEL_MSGTRUFFLE:
- return "multibyte shift-grab truffle";
- case ACCEL_MDSTRUFFLE:
- return "multibyte doubleshift truffle";
- case ACCEL_MDSGTRUFFLE:
- return "multibyte doubleshift-grab truffle";
default:
return "unknown!";
}
(const u8 *)&accel.truffle.mask2);
break;
}
- case ACCEL_MLVERM:
- case ACCEL_MLVERM_NOCASE:
- case ACCEL_MLGVERM:
- case ACCEL_MLGVERM_NOCASE:
- case ACCEL_MSVERM:
- case ACCEL_MSVERM_NOCASE:
- case ACCEL_MSGVERM:
- case ACCEL_MSGVERM_NOCASE:
- fprintf(f, " [\\x%02hhx] len:%u\n", accel.mverm.c, accel.mverm.len);
- break;
- case ACCEL_MDSVERM:
- case ACCEL_MDSVERM_NOCASE:
- case ACCEL_MDSGVERM:
- case ACCEL_MDSGVERM_NOCASE:
- fprintf(f, " [\\x%02hhx] len1:%u len2:%u\n", accel.mdverm.c, accel.mdverm.len1,
- accel.mdverm.len2);
- break;
- case ACCEL_MLSHUFTI:
- case ACCEL_MLGSHUFTI:
- case ACCEL_MSSHUFTI:
- case ACCEL_MSGSHUFTI:
- fprintf(f, " len:%u\n", accel.mshufti.len);
- dumpShuftiMasks(f, (const u8 *)&accel.mshufti.lo,
- (const u8 *)&accel.mshufti.hi);
- dumpShuftiCharReach(f, (const u8 *)&accel.mshufti.lo,
- (const u8 *)&accel.mshufti.hi);
- break;
- case ACCEL_MDSSHUFTI:
- case ACCEL_MDSGSHUFTI:
- fprintf(f, " len1:%u len2:%u\n", accel.mdshufti.len1, accel.mdshufti.len2);
- dumpShuftiMasks(f, (const u8 *)&accel.mdshufti.lo,
- (const u8 *)&accel.mdshufti.hi);
- dumpShuftiCharReach(f, (const u8 *)&accel.mdshufti.lo,
- (const u8 *)&accel.mdshufti.hi);
- break;
- case ACCEL_MLTRUFFLE:
- case ACCEL_MLGTRUFFLE:
- case ACCEL_MSTRUFFLE:
- case ACCEL_MSGTRUFFLE:
- fprintf(f, " len:%u\n", accel.mtruffle.len);
- dumpTruffleMasks(f, (const u8 *)&accel.mtruffle.mask1,
- (const u8 *)&accel.mtruffle.mask2);
- dumpTruffleCharReach(f, (const u8 *)&accel.mtruffle.mask1,
- (const u8 *)&accel.mtruffle.mask2);
- break;
- case ACCEL_MDSTRUFFLE:
- case ACCEL_MDSGTRUFFLE:
- fprintf(f, " len1:%u len2:%u\n", accel.mdtruffle.len1, accel.mdtruffle.len2);
- dumpTruffleMasks(f, (const u8 *)&accel.mdtruffle.mask1,
- (const u8 *)&accel.mdtruffle.mask2);
- dumpTruffleCharReach(f, (const u8 *)&accel.mdtruffle.mask1,
- (const u8 *)&accel.mdtruffle.mask2);
- break;
default:
fprintf(f, "\n");
break;
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
aux->accel_type = ACCEL_NONE;
}
-static
-void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
- if (info.ma_type == MultibyteAccelInfo::MAT_NONE) {
- DEBUG_PRINTF("no multimatch for us :(");
- return;
- }
-
- u32 offset = info.multiaccel_offset;
- const CharReach &stops = info.multiaccel_stops;
-
- assert(aux->accel_type == ACCEL_NONE);
- if (stops.all()) {
- return;
- }
-
- size_t outs = stops.count();
- DEBUG_PRINTF("%zu outs\n", outs);
- assert(outs && outs < 256);
-
- switch (info.ma_type) {
- case MultibyteAccelInfo::MAT_LONG:
- if (outs == 1) {
- aux->accel_type = ACCEL_MLVERM;
- aux->mverm.offset = offset;
- aux->mverm.c = stops.find_first();
- aux->mverm.len = info.ma_len1;
- DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
- return;
- }
- if (outs == 2 && stops.isCaselessChar()) {
- aux->accel_type = ACCEL_MLVERM_NOCASE;
- aux->mverm.offset = offset;
- aux->mverm.c = stops.find_first() & CASE_CLEAR;
- aux->mverm.len = info.ma_len1;
- DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
- aux->verm.c);
- return;
- }
- break;
- case MultibyteAccelInfo::MAT_LONGGRAB:
- if (outs == 1) {
- aux->accel_type = ACCEL_MLGVERM;
- aux->mverm.offset = offset;
- aux->mverm.c = stops.find_first();
- aux->mverm.len = info.ma_len1;
- DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
- return;
- }
- if (outs == 2 && stops.isCaselessChar()) {
- aux->accel_type = ACCEL_MLGVERM_NOCASE;
- aux->mverm.offset = offset;
- aux->mverm.c = stops.find_first() & CASE_CLEAR;
- aux->mverm.len = info.ma_len1;
- DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
- aux->verm.c);
- return;
- }
- break;
- case MultibyteAccelInfo::MAT_SHIFT:
- if (outs == 1) {
- aux->accel_type = ACCEL_MSVERM;
- aux->mverm.offset = offset;
- aux->mverm.c = stops.find_first();
- aux->mverm.len = info.ma_len1;
- DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
- return;
- }
- if (outs == 2 && stops.isCaselessChar()) {
- aux->accel_type = ACCEL_MSVERM_NOCASE;
- aux->mverm.offset = offset;
- aux->mverm.c = stops.find_first() & CASE_CLEAR;
- aux->mverm.len = info.ma_len1;
- DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
- aux->verm.c);
- return;
- }
- break;
- case MultibyteAccelInfo::MAT_SHIFTGRAB:
- if (outs == 1) {
- aux->accel_type = ACCEL_MSGVERM;
- aux->mverm.offset = offset;
- aux->mverm.c = stops.find_first();
- aux->mverm.len = info.ma_len1;
- DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
- return;
- }
- if (outs == 2 && stops.isCaselessChar()) {
- aux->accel_type = ACCEL_MSGVERM_NOCASE;
- aux->mverm.offset = offset;
- aux->mverm.c = stops.find_first() & CASE_CLEAR;
- aux->mverm.len = info.ma_len1;
- DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
- aux->verm.c);
- return;
- }
- break;
- case MultibyteAccelInfo::MAT_DSHIFT:
- if (outs == 1) {
- aux->accel_type = ACCEL_MDSVERM;
- aux->mdverm.offset = offset;
- aux->mdverm.c = stops.find_first();
- aux->mdverm.len1 = info.ma_len1;
- aux->mdverm.len2 = info.ma_len2;
- DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
- return;
- }
- if (outs == 2 && stops.isCaselessChar()) {
- aux->accel_type = ACCEL_MDSVERM_NOCASE;
- aux->mverm.offset = offset;
- aux->mverm.c = stops.find_first() & CASE_CLEAR;
- aux->mdverm.len1 = info.ma_len1;
- aux->mdverm.len2 = info.ma_len2;
- DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
- aux->verm.c);
- return;
- }
- break;
- case MultibyteAccelInfo::MAT_DSHIFTGRAB:
- if (outs == 1) {
- aux->accel_type = ACCEL_MDSGVERM;
- aux->mdverm.offset = offset;
- aux->mdverm.c = stops.find_first();
- aux->mdverm.len1 = info.ma_len1;
- aux->mdverm.len2 = info.ma_len2;
- DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
- return;
- }
- if (outs == 2 && stops.isCaselessChar()) {
- aux->accel_type = ACCEL_MDSGVERM_NOCASE;
- aux->mverm.offset = offset;
- aux->mverm.c = stops.find_first() & CASE_CLEAR;
- aux->mdverm.len1 = info.ma_len1;
- aux->mdverm.len2 = info.ma_len2;
- DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
- aux->verm.c);
- return;
- }
- break;
- default:
- // shouldn't happen
- assert(0);
- return;
- }
-
- DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
-
- switch (info.ma_type) {
- case MultibyteAccelInfo::MAT_LONG:
- if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
- (u8 *)&aux->mshufti.hi) == -1) {
- break;
- }
- aux->accel_type = ACCEL_MLSHUFTI;
- aux->mshufti.offset = offset;
- aux->mshufti.len = info.ma_len1;
- return;
- case MultibyteAccelInfo::MAT_LONGGRAB:
- if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
- (u8 *)&aux->mshufti.hi) == -1) {
- break;
- }
- aux->accel_type = ACCEL_MLGSHUFTI;
- aux->mshufti.offset = offset;
- aux->mshufti.len = info.ma_len1;
- return;
- case MultibyteAccelInfo::MAT_SHIFT:
- if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
- (u8 *)&aux->mshufti.hi) == -1) {
- break;
- }
- aux->accel_type = ACCEL_MSSHUFTI;
- aux->mshufti.offset = offset;
- aux->mshufti.len = info.ma_len1;
- return;
- case MultibyteAccelInfo::MAT_SHIFTGRAB:
- if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
- (u8 *)&aux->mshufti.hi) == -1) {
- break;
- }
- aux->accel_type = ACCEL_MSGSHUFTI;
- aux->mshufti.offset = offset;
- aux->mshufti.len = info.ma_len1;
- return;
- case MultibyteAccelInfo::MAT_DSHIFT:
- if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo,
- (u8 *)&aux->mdshufti.hi) == -1) {
- break;
- }
- aux->accel_type = ACCEL_MDSSHUFTI;
- aux->mdshufti.offset = offset;
- aux->mdshufti.len1 = info.ma_len1;
- aux->mdshufti.len2 = info.ma_len2;
- return;
- case MultibyteAccelInfo::MAT_DSHIFTGRAB:
- if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo,
- (u8 *)&aux->mdshufti.hi) == -1) {
- break;
- }
- aux->accel_type = ACCEL_MDSGSHUFTI;
- aux->mdshufti.offset = offset;
- aux->mdshufti.len1 = info.ma_len1;
- aux->mdshufti.len2 = info.ma_len2;
- return;
- default:
- // shouldn't happen
- assert(0);
- return;
- }
- DEBUG_PRINTF("shufti build failed, falling through\n");
-
- if (outs <= ACCEL_MAX_STOP_CHAR) {
- DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
- switch (info.ma_type) {
- case MultibyteAccelInfo::MAT_LONG:
- aux->accel_type = ACCEL_MLTRUFFLE;
- aux->mtruffle.offset = offset;
- aux->mtruffle.len = info.ma_len1;
- truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
- (u8 *)&aux->mtruffle.mask2);
- break;
- case MultibyteAccelInfo::MAT_LONGGRAB:
- aux->accel_type = ACCEL_MLGTRUFFLE;
- aux->mtruffle.offset = offset;
- aux->mtruffle.len = info.ma_len1;
- truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
- (u8 *)&aux->mtruffle.mask2);
- break;
- case MultibyteAccelInfo::MAT_SHIFT:
- aux->accel_type = ACCEL_MSTRUFFLE;
- aux->mtruffle.offset = offset;
- aux->mtruffle.len = info.ma_len1;
- truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
- (u8 *)&aux->mtruffle.mask2);
- break;
- case MultibyteAccelInfo::MAT_SHIFTGRAB:
- aux->accel_type = ACCEL_MSGTRUFFLE;
- aux->mtruffle.offset = offset;
- aux->mtruffle.len = info.ma_len1;
- truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
- (u8 *)&aux->mtruffle.mask2);
- break;
- case MultibyteAccelInfo::MAT_DSHIFT:
- aux->accel_type = ACCEL_MDSTRUFFLE;
- aux->mdtruffle.offset = offset;
- aux->mdtruffle.len1 = info.ma_len1;
- aux->mdtruffle.len2 = info.ma_len2;
- truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
- (u8 *)&aux->mdtruffle.mask2);
- break;
- case MultibyteAccelInfo::MAT_DSHIFTGRAB:
- aux->accel_type = ACCEL_MDSGTRUFFLE;
- aux->mdtruffle.offset = offset;
- aux->mdtruffle.len1 = info.ma_len1;
- aux->mdtruffle.len2 = info.ma_len2;
- truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
- (u8 *)&aux->mdtruffle.mask2);
- break;
- default:
- // shouldn't happen
- assert(0);
- return;
- }
- return;
- }
-
- DEBUG_PRINTF("unable to accelerate multibyte case with %zu outs\n", outs);
-}
-
bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
assert(aux->accel_type == ACCEL_NONE);
if (info.single_stops.none()) {
aux->accel_type = ACCEL_RED_TAPE;
aux->generic.offset = info.single_offset;
}
- if (aux->accel_type == ACCEL_NONE) {
- buildAccelMulti(info, aux);
- }
if (aux->accel_type == ACCEL_NONE) {
buildAccelDouble(info, aux);
}
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
namespace ue2 {
-struct MultibyteAccelInfo {
- /* multibyte accel schemes, ordered by strength */
- enum multiaccel_type {
- MAT_SHIFT,
- MAT_SHIFTGRAB,
- MAT_DSHIFT,
- MAT_DSHIFTGRAB,
- MAT_LONG,
- MAT_LONGGRAB,
- MAT_MAX,
- MAT_NONE = MAT_MAX
- };
- CharReach cr;
- u32 offset = 0;
- u32 len1 = 0;
- u32 len2 = 0;
- multiaccel_type type = MAT_NONE;
-};
-
struct AccelInfo {
AccelInfo() : single_offset(0U), double_offset(0U),
- single_stops(CharReach::dot()),
- multiaccel_offset(0), ma_len1(0), ma_len2(0),
- ma_type(MultibyteAccelInfo::MAT_NONE) {}
+ single_stops(CharReach::dot()) {}
u32 single_offset; /**< offset correction to apply to single schemes */
u32 double_offset; /**< offset correction to apply to double schemes */
CharReach double_stop1; /**< single-byte accel stop literals for double
flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop
* literals */
CharReach single_stops; /**< escapes for single byte acceleration */
- u32 multiaccel_offset; /**< offset correction to apply to multibyte schemes */
- CharReach multiaccel_stops; /**< escapes for multibyte acceleration */
- u32 ma_len1; /**< multiaccel len1 */
- u32 ma_len2; /**< multiaccel len2 */
- MultibyteAccelInfo::multiaccel_type ma_type; /**< multiaccel type */
};
bool buildAccelAux(const AccelInfo &info, AccelAux *aux);
#include "nfa_internal.h"
#include "shufti.h"
#include "truffle.h"
-#include "multishufti.h"
-#include "multitruffle.h"
-#include "multivermicelli.h"
#include "ue2common.h"
#include "vermicelli.h"
#include "util/arch.h"
CharReach double_cr;
flat_set<pair<u8, u8>> double_lits; /* double-byte accel stop literals */
u32 double_offset;
-
- MultibyteAccelInfo ma_info;
};
struct limex_accel_info {
}
struct AccelBuild {
- AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0), ma_len1(0),
- ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {}
+ AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0) {}
NFAVertex v;
u32 state;
u32 offset; // offset correction to apply
CharReach stop1; // single-byte accel stop literals
flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals
- u32 ma_len1; // multiaccel len1
- u32 ma_len2; // multiaccel len2
- MultibyteAccelInfo::multiaccel_type ma_type; // multiaccel type
};
static
build.stop1 = CharReach::dot();
} else {
const precalcAccel &precalc = bi.accel.precalc.at(ss);
- unsigned ma_len = precalc.ma_info.len1 + precalc.ma_info.len2;
- if (ma_len >= MULTIACCEL_MIN_LEN) {
- build.ma_len1 = precalc.ma_info.len1;
- build.stop1 = precalc.ma_info.cr;
- build.offset = precalc.ma_info.offset;
- } else if (precalc.double_lits.empty()) {
+ if (precalc.double_lits.empty()) {
build.stop1 = precalc.single_cr;
build.offset = precalc.single_offset;
} else {
limex_accel_info &accel = bi.accel;
unordered_map<NFAVertex, AccelScheme> &accel_map = accel.accel_map;
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic = bi.br_cyclic;
- const CompileContext &cc = bi.cc;
const unordered_map<NFAVertex, u32> &state_ids = bi.state_ids;
const u32 num_states = bi.num_states;
DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset,
as.double_offset);
- // try multibyte acceleration first
- MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc);
-
precalcAccel &pa = accel.precalc[state_set];
- useful |= state_set;
-
- // if we successfully built a multibyte accel scheme, use that
- if (mai.type != MultibyteAccelInfo::MAT_NONE) {
- pa.ma_info = mai;
-
- DEBUG_PRINTF("multibyte acceleration!\n");
- continue;
- }
-
pa.single_offset = as.offset;
pa.single_cr = as.cr;
+
if (as.double_byte.size() != 0) {
pa.double_offset = as.double_offset;
pa.double_lits = as.double_byte;
pa.double_cr = as.double_cr;
- };
+ }
+
+ useful |= state_set;
}
for (const auto &m : accel_map) {
state_set.reset();
state_set.set(state_id);
- bool is_multi = false;
- auto p_it = accel.precalc.find(state_set);
- if (p_it != accel.precalc.end()) {
- const precalcAccel &pa = p_it->second;
- offset = max(pa.double_offset, pa.single_offset);
- is_multi = pa.ma_info.type != MultibyteAccelInfo::MAT_NONE;
- assert(offset <= MAX_ACCEL_DEPTH);
- }
-
accel.accelerable.insert(v);
- if (!is_multi) {
- findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]);
- }
+ findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]);
}
}
if (contains(accel.precalc, effective_states)) {
const auto &precalc = accel.precalc.at(effective_states);
- if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) {
- ainfo.ma_len1 = precalc.ma_info.len1;
- ainfo.ma_len2 = precalc.ma_info.len2;
- ainfo.multiaccel_offset = precalc.ma_info.offset;
- ainfo.multiaccel_stops = precalc.ma_info.cr;
- ainfo.ma_type = precalc.ma_info.type;
- } else {
- ainfo.single_offset = precalc.single_offset;
- ainfo.single_stops = precalc.single_cr;
- }
+ ainfo.single_offset = precalc.single_offset;
+ ainfo.single_stops = precalc.single_cr;
}
}
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MULTIACCEL_COMMON_H_
-#define MULTIACCEL_COMMON_H_
-
-#include "config.h"
-#include "ue2common.h"
-#include "util/join.h"
-#include "util/bitutils.h"
-
-/*
- * When doing shifting, remember that the total number of shifts should be n-1
- */
-#define VARISHIFT(src, dst, len) \
- do { \
- (dst) &= (src) >> (len); \
- } while (0)
-#define STATIC_SHIFT1(x) \
- do { \
- (x) &= (x) >> 1; \
- } while (0)
-#define STATIC_SHIFT2(x) \
- do { \
- (x) &= (x) >> 2;\
- } while (0)
-#define STATIC_SHIFT4(x) \
- do { \
- (x) &= (x) >> 4; \
- } while (0)
-#define STATIC_SHIFT8(x) \
- do { \
- (x) &= (x) >> 8; \
- } while (0)
-#define SHIFT1(x) \
- do {} while (0)
-#define SHIFT2(x) \
- do { \
- STATIC_SHIFT1(x); \
- } while (0)
-#define SHIFT3(x) \
- do { \
- STATIC_SHIFT1(x); \
- STATIC_SHIFT1(x); \
- } while (0)
-#define SHIFT4(x) \
- do { \
- STATIC_SHIFT1(x); \
- STATIC_SHIFT2(x); \
- } while (0)
-#define SHIFT5(x) \
- do { \
- SHIFT4(x); \
- STATIC_SHIFT1(x); \
- } while (0)
-#define SHIFT6(x) \
- do { \
- SHIFT4(x); \
- STATIC_SHIFT2(x); \
- } while (0)
-#define SHIFT7(x) \
- do { \
- SHIFT4(x); \
- STATIC_SHIFT1(x); \
- STATIC_SHIFT2(x); \
- } while (0)
-#define SHIFT8(x) \
- do { \
- SHIFT4(x); \
- STATIC_SHIFT4(x); \
- } while (0)
-#define SHIFT9(x) \
- do { \
- SHIFT8(x); \
- STATIC_SHIFT1(x); \
- } while (0)
-#define SHIFT10(x) \
- do { \
- SHIFT8(x); \
- STATIC_SHIFT2(x); \
- } while (0)
-#define SHIFT11(x) \
- do { \
- SHIFT8(x); \
- STATIC_SHIFT1(x); \
- STATIC_SHIFT2(x); \
- } while (0)
-#define SHIFT12(x); \
- do { \
- SHIFT8(x);\
- STATIC_SHIFT4(x); \
- } while (0)
-#define SHIFT13(x); \
- do { \
- SHIFT8(x); \
- STATIC_SHIFT1(x); \
- STATIC_SHIFT4(x); \
- } while (0)
-#define SHIFT14(x) \
- do { \
- SHIFT8(x); \
- STATIC_SHIFT2(x); \
- STATIC_SHIFT4(x); \
- } while (0)
-#define SHIFT15(x) \
- do { \
- SHIFT8(x); \
- STATIC_SHIFT1(x); \
- STATIC_SHIFT2(x); \
- STATIC_SHIFT4(x); \
- } while (0)
-#define SHIFT16(x) \
- do { \
- SHIFT8(x); \
- STATIC_SHIFT8(x); \
- } while (0)
-#define SHIFT17(x) \
- do { \
- SHIFT16(x); \
- STATIC_SHIFT1(x); \
- } while (0)
-#define SHIFT18(x) \
- do { \
- SHIFT16(x); \
- STATIC_SHIFT2(x); \
- } while (0)
-#define SHIFT19(x) \
- do { \
- SHIFT16(x); \
- STATIC_SHIFT1(x); \
- STATIC_SHIFT2(x); \
- } while (0)
-#define SHIFT20(x) \
- do { \
- SHIFT16(x); \
- STATIC_SHIFT4(x); \
- } while (0)
-#define SHIFT21(x) \
- do { \
- SHIFT16(x); \
- STATIC_SHIFT1(x); \
- STATIC_SHIFT4(x); \
- } while (0)
-#define SHIFT22(x) \
- do { \
- SHIFT16(x); \
- STATIC_SHIFT2(x); \
- STATIC_SHIFT4(x); \
- } while (0)
-#define SHIFT23(x) \
- do { \
- SHIFT16(x); \
- STATIC_SHIFT1(x); \
- STATIC_SHIFT2(x); \
- STATIC_SHIFT4(x); \
- } while (0)
-#define SHIFT24(x) \
- do { \
- SHIFT16(x); \
- STATIC_SHIFT8(x); \
- } while (0)
-#define SHIFT25(x) \
- do { \
- SHIFT24(x); \
- STATIC_SHIFT1(x); \
- } while (0)
-#define SHIFT26(x) \
- do { \
- SHIFT24(x); \
- STATIC_SHIFT2(x); \
- } while (0)
-#define SHIFT27(x) \
- do { \
- SHIFT24(x); \
- STATIC_SHIFT1(x); \
- STATIC_SHIFT2(x); \
- } while (0)
-#define SHIFT28(x) \
- do { \
- SHIFT24(x); \
- STATIC_SHIFT4(x); \
- } while (0)
-#define SHIFT29(x) \
- do { \
- SHIFT24(x); \
- STATIC_SHIFT1(x); \
- STATIC_SHIFT4(x); \
- } while (0)
-#define SHIFT30(x) \
- do { \
- SHIFT24(x); \
- STATIC_SHIFT2(x); \
- STATIC_SHIFT4(x); \
- } while (0)
-#define SHIFT31(x) \
- do { \
- SHIFT24(x); \
- STATIC_SHIFT1(x); \
- STATIC_SHIFT2(x); \
- STATIC_SHIFT4(x); \
- } while (0)
-#define SHIFT32(x) \
- do { \
- SHIFT24(x); \
- STATIC_SHIFT8(x); \
- } while (0)
-
-/*
- * this function is used by 32-bit multiaccel matchers. 32-bit matchers accept
- * a 32-bit integer as a buffer, where low 16 bits is movemask result and
- * high 16 bits are "don't care" values. this function is not expected to return
- * a result higher than 16.
- */
-static really_inline
-const u8 *match32(const u8 *buf, const u32 z) {
- if (unlikely(z != 0)) {
- u32 pos = ctz32(z);
- assert(pos < 16);
- return buf + pos;
- }
- return NULL;
-}
-
-/*
- * this function is used by 64-bit multiaccel matchers. 64-bit matchers accept
- * a 64-bit integer as a buffer, where low 32 bits is movemask result and
- * high 32 bits are "don't care" values. this function is not expected to return
- * a result higher than 32.
- */
-static really_inline
-const u8 *match64(const u8 *buf, const u64a z) {
- if (unlikely(z != 0)) {
- u32 pos = ctz64(z);
- assert(pos < 32);
- return buf + pos;
- }
- return NULL;
-}
-
-#endif /* MULTIACCEL_COMMON_H_ */
+++ /dev/null
-/*
- * Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "multiaccel_compilehelper.h"
-
-using namespace std;
-using namespace ue2;
-
-#ifdef DEBUG
-static const char* state_to_str[] = {
- "FIRST_RUN",
- "SECOND_RUN",
- "WAITING_FOR_GRAB",
- "FIRST_TAIL",
- "SECOND_TAIL",
- "STOPPED",
- "INVALID"
-};
-static const char* type_to_str[] = {
- "SHIFT",
- "SHIFTGRAB",
- "DOUBLESHIFT",
- "DOUBLESHIFTGRAB",
- "LONG",
- "LONGGRAB",
- "NONE"
-};
-
-static
-void dumpMultiaccelState(const accel_data &d) {
- DEBUG_PRINTF("type: %s state: %s len1: %u tlen1: %u len2: %u tlen2: %u\n",
- type_to_str[(unsigned) d.type],
- state_to_str[(unsigned) d.state],
- d.len1, d.tlen1, d.len2, d.tlen2);
-}
-#endif
-
-/* stop all the matching. this may render most schemes invalid. */
-static
-void stop(accel_data &d) {
- switch (d.state) {
- case STATE_STOPPED:
- case STATE_INVALID:
- break;
- case STATE_FIRST_TAIL:
- case STATE_SECOND_RUN:
- /*
- * Shift matchers are special case, because they have "tails".
- * When shift matcher reaches a mid/endpoint, tail mode is
- * activated, which looks for more matches to extend the match.
- *
- * For example, consider pattern /a{5}ba{3}/. Under normal circumstances,
- * long-grab matcher will be picked for this pattern (matching a run of a's,
- * followed by a not-a), because doubleshift matcher would be confused by
- * consecutive a's and would parse the pattern as a.{0}a.{0}a (two shifts
- * by 1) and throw out the rest of the pattern.
- *
- * With tails, we defer ending the run until we actually run out of
- * matching characters, so the above pattern will now be parsed by
- * doubleshift matcher as /a.{3}a.{3}a/ (two shifts by 4).
- *
- * So if we are stopping shift matchers, we should check if we aren't in
- * the process of matching first tail or second run. If we are, we can't
- * finish the second run as we are stopping, but we can try and split
- * the first tail instead to obtain a valid second run.
- */
- if ((d.type == MultibyteAccelInfo::MAT_DSHIFT ||
- d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.tlen1 == 0) {
- // can't split an empty void...
- d.state = STATE_INVALID;
- break;
- }
- d.len2 = 0;
- d.state = STATE_STOPPED;
- break;
- case STATE_SECOND_TAIL:
- d.state = STATE_STOPPED;
- break;
- case STATE_WAITING_FOR_GRAB:
- case STATE_FIRST_RUN:
- if (d.type == MultibyteAccelInfo::MAT_LONG) {
- d.state = STATE_STOPPED;
- } else {
- d.state = STATE_INVALID;
- }
- break;
- }
-}
-
-static
-void validate(accel_data &d, unsigned max_len) {
- // try and fit in all our tails
- if (d.len1 + d.tlen1 + d.len2 + d.tlen2 < max_len && d.len2 > 0) {
- // case 1: everything fits in
- d.len1 += d.tlen1;
- d.len2 += d.tlen2;
- d.tlen1 = 0;
- d.tlen2 = 0;
- } else if (d.len1 + d.tlen1 + d.len2 < max_len && d.len2 > 0) {
- // case 2: everything but the second tail fits in
- d.len1 += d.tlen1;
- d.tlen1 = 0;
- // try going for a partial tail
- if (d.tlen2 != 0) {
- int new_tlen2 = max_len - 1 - d.len1 - d.len2;
- if (new_tlen2 > 0) {
- d.len2 += new_tlen2;
- }
- d.tlen2 = 0;
- }
- } else if (d.len1 + d.tlen1 < max_len) {
- // case 3: first run and its tail fits in
- if (d.type == MultibyteAccelInfo::MAT_DSHIFT ||
- d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) {
- // split the tail into a second run
- d.len2 = d.tlen1;
- } else {
- d.len1 += d.tlen1;
- d.len2 = 0;
- }
- d.tlen1 = 0;
- d.tlen2 = 0;
- } else if (d.len1 < max_len) {
- // case 4: nothing but the first run fits in
- // try going for a partial tail
- if (d.tlen1 != 0) {
- int new_tlen1 = max_len - 1 - d.len1;
- if (new_tlen1 > 0) {
- d.len1 += new_tlen1;
- }
- d.tlen1 = 0;
- }
- d.len2 = 0;
- d.tlen2 = 0;
- }
- // if we removed our second run, doubleshift matchers are no longer valid
- if ((d.type == MultibyteAccelInfo::MAT_DSHIFT ||
- d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.len2 == 0) {
- d.state = STATE_INVALID;
- } else if ((d.type == MultibyteAccelInfo::MAT_LONG) && d.len1 >= max_len) {
- // long matchers can just stop whenever they want to
- d.len1 = max_len - 1;
- }
-
- // now, general sanity checks
- if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) >= max_len) {
- d.state = STATE_INVALID;
- }
- if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) < MULTIACCEL_MIN_LEN) {
- d.state = STATE_INVALID;
- }
-}
-
-static
-void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) {
- switch (d.type) {
- case MultibyteAccelInfo::MAT_LONG:
- {
- /*
- * For long matcher, we want lots of consecutive same-or-subset
- * char-reaches
- */
- if ((ref_cr & cur_cr) == cur_cr) {
- d.len1++;
- } else {
- d.state = STATE_STOPPED;
- }
- }
- break;
-
- case MultibyteAccelInfo::MAT_LONGGRAB:
- {
- /*
- * For long-grab matcher, we want lots of consecutive same-or-subset
- * char-reaches with a negative match in the end.
- */
- if ((ref_cr & cur_cr) == cur_cr) {
- d.len1++;
- } else if (!(ref_cr & cur_cr).any()) {
- /* we grabbed, stop immediately */
- d.state = STATE_STOPPED;
- } else {
- /* our run-n-grab was interrupted; mark as invalid */
- d.state = STATE_INVALID;
- }
- }
- break;
-
- case MultibyteAccelInfo::MAT_SHIFTGRAB:
- {
- /*
- * For shift-grab matcher, we want two matches separated by anything;
- * however the second vertex *must* be a negative (non-overlapping) match.
- *
- * Shiftgrab matcher is identical to shift except for presence of grab.
- */
- if (d.state == STATE_WAITING_FOR_GRAB) {
- if ((ref_cr & cur_cr).any()) {
- d.state = STATE_INVALID;
- } else {
- d.state = STATE_FIRST_RUN;
- d.len1++;
- }
- return;
- }
- }
- /* no break, falling through */
- case MultibyteAccelInfo::MAT_SHIFT:
- {
- /*
- * For shift-matcher, we want two matches separated by anything.
- */
- if (ref_cr == cur_cr) {
- // keep matching tail
- switch (d.state) {
- case STATE_FIRST_RUN:
- d.state = STATE_FIRST_TAIL;
- break;
- case STATE_FIRST_TAIL:
- d.tlen1++;
- break;
- default:
- // shouldn't happen
- assert(0);
- }
- } else {
- switch (d.state) {
- case STATE_FIRST_RUN:
- // simply advance
- d.len1++;
- break;
- case STATE_FIRST_TAIL:
- // we found a non-matching char after tail, so stop
- d.state = STATE_STOPPED;
- break;
- default:
- // shouldn't happen
- assert(0);
- }
- }
- }
- break;
-
- case MultibyteAccelInfo::MAT_DSHIFTGRAB:
- {
- /*
- * For double shift-grab matcher, we want two matches separated by
- * either negative matches or dots; however the second vertex *must*
- * be a negative match.
- *
- * Doubleshiftgrab matcher is identical to doubleshift except for
- * presence of grab.
- */
- if (d.state == STATE_WAITING_FOR_GRAB) {
- if ((ref_cr & cur_cr).any()) {
- d.state = STATE_INVALID;
- } else {
- d.state = STATE_FIRST_RUN;
- d.len1++;
- }
- return;
- }
- }
- /* no break, falling through */
- case MultibyteAccelInfo::MAT_DSHIFT:
- {
- /*
- * For double shift matcher, we want three matches, each separated
- * by a lot of anything.
- *
- * Doubleshift matcher is complicated by presence of tails.
- */
- if (ref_cr == cur_cr) {
- // decide if we are activating second shift or matching tails
- switch (d.state) {
- case STATE_FIRST_RUN:
- d.state = STATE_FIRST_TAIL;
- d.len2 = 1; // we're now ready for our second run
- break;
- case STATE_FIRST_TAIL:
- d.tlen1++;
- break;
- case STATE_SECOND_RUN:
- d.state = STATE_SECOND_TAIL;
- break;
- case STATE_SECOND_TAIL:
- d.tlen2++;
- break;
- default:
- // shouldn't happen
- assert(0);
- }
- } else {
- switch (d.state) {
- case STATE_FIRST_RUN:
- d.len1++;
- break;
- case STATE_FIRST_TAIL:
- // start second run
- d.state = STATE_SECOND_RUN;
- d.len2++;
- break;
- case STATE_SECOND_RUN:
- d.len2++;
- break;
- case STATE_SECOND_TAIL:
- // stop
- d.state = STATE_STOPPED;
- break;
- default:
- // shouldn't happen
- assert(0);
- }
- }
- }
- break;
-
- default:
- // shouldn't happen
- assert(0);
- break;
- }
-}
-
-MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr,
- u32 off, unsigned max_length)
- : cr(ref_cr), offset(off), max_len(max_length) {
- int accel_num = (int) MultibyteAccelInfo::MAT_MAX;
- accels.resize(accel_num);
-
- // mark everything as valid
- for (int i = 0; i < accel_num; i++) {
- accel_data &ad = accels[i];
- ad.len1 = 1;
- ad.type = (MultibyteAccelInfo::multiaccel_type) i;
-
- /* for shift-grab matchers, we are waiting for the grab right at the start */
- if (ad.type == MultibyteAccelInfo::MAT_SHIFTGRAB
- || ad.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) {
- ad.state = STATE_WAITING_FOR_GRAB;
- } else {
- ad.state = STATE_FIRST_RUN;
- }
- }
-}
-
-bool MultiaccelCompileHelper::canAdvance() {
- for (const accel_data &ad : accels) {
- if (ad.state != STATE_STOPPED && ad.state != STATE_INVALID) {
- return true;
- }
- }
- return false;
-}
-
-void MultiaccelCompileHelper::advance(const CharReach &cur_cr) {
- for (accel_data &ad : accels) {
- if (ad.state == STATE_STOPPED || ad.state == STATE_INVALID) {
- continue;
- }
- match(ad, cr, cur_cr);
-#ifdef DEBUG
- dumpMultiaccelState(ad);
-#endif
- }
-}
-
-MultibyteAccelInfo MultiaccelCompileHelper::getBestScheme() {
- int best_len = 0;
- accel_data best;
-
- DEBUG_PRINTF("Stopping multiaccel compile\n");
-
- for (accel_data &ad : accels) {
- // stop our matching
- stop(ad);
- validate(ad, max_len);
-
-#ifdef DEBUG
- dumpMultiaccelState(ad);
-#endif
-
- // skip invalid schemes
- if (ad.state == STATE_INVALID) {
- continue;
- }
- DEBUG_PRINTF("Marking as viable\n");
-
- // TODO: relative strengths of accel schemes? maybe e.g. a shorter
- // long match would in some cases be preferable to a longer
- // double shift match (for example, depending on length)?
- int as_len = ad.len1 + ad.len2;
- if (as_len >= best_len) {
- DEBUG_PRINTF("Marking as best\n");
- best_len = as_len;
- best = ad;
- }
- }
- // if we found at least one accel scheme, return it
- if (best.state != STATE_INVALID) {
-#ifdef DEBUG
- DEBUG_PRINTF("Picked best multiaccel state:\n");
- dumpMultiaccelState(best);
-#endif
- MultibyteAccelInfo info;
- info.cr = cr;
- info.offset = offset;
- info.len1 = best.len1;
- info.len2 = best.len2;
- info.type = best.type;
- return info;
- }
- return MultibyteAccelInfo();
-}
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MULTIACCELCOMPILE_H_
-#define MULTIACCELCOMPILE_H_
-
-#include "ue2common.h"
-
-#include "nfagraph/ng_limex_accel.h"
-
-#include <vector>
-
-namespace ue2 {
-
-/* accel scheme state machine */
-enum accel_scheme_state {
- STATE_FIRST_RUN,
- STATE_SECOND_RUN,
- STATE_WAITING_FOR_GRAB,
- STATE_FIRST_TAIL,
- STATE_SECOND_TAIL,
- STATE_STOPPED,
- STATE_INVALID
-};
-
-struct accel_data {
- MultibyteAccelInfo::multiaccel_type type = MultibyteAccelInfo::MAT_NONE;
- accel_scheme_state state = STATE_INVALID;
- unsigned len1 = 0; /* length of first run */
- unsigned len2 = 0; /* length of second run, if present */
- unsigned tlen1 = 0; /* first tail length */
- unsigned tlen2 = 0; /* second tail length */
-};
-
-class MultiaccelCompileHelper {
-private:
- const CharReach &cr;
- u32 offset;
- std::vector<accel_data> accels;
- unsigned max_len;
-public:
- MultiaccelCompileHelper(const CharReach &cr, u32 off, unsigned max_len);
- bool canAdvance();
- MultibyteAccelInfo getBestScheme();
- void advance(const ue2::CharReach &cr);
-};
-
-}; // namespace
-
-#endif /* MULTIACCELCOMPILE_H_ */
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MULTIACCEL_DOUBLESHIFT_H_
-#define MULTIACCEL_DOUBLESHIFT_H_
-
-#include "multiaccel_common.h"
-
-#define DOUBLESHIFT_MATCH(len, match_t, match_sz) \
- static really_inline \
- const u8 * JOIN4(doubleshiftMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\
- if (unlikely(z)) { \
- match_t tmp = z; \
- z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
- tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \
- VARISHIFT(z, z, len); \
- VARISHIFT(tmp, tmp, len2); \
- VARISHIFT(tmp, z, len); \
- return JOIN(match, match_sz)(buf, z); \
- } \
- return NULL; \
- }
-
-#define DOUBLESHIFT_MATCH_32_DEF(n) \
- DOUBLESHIFT_MATCH(n, u32, 32)
-#define DOUBLESHIFT_MATCH_64_DEF(n) \
- DOUBLESHIFT_MATCH(n, u64a, 64)
-#define DOUBLESHIFT_MATCH_DEF(n) \
- DOUBLESHIFT_MATCH_32_DEF(n) \
- DOUBLESHIFT_MATCH_64_DEF(n)
-
-DOUBLESHIFT_MATCH_DEF(1)
-DOUBLESHIFT_MATCH_DEF(2)
-DOUBLESHIFT_MATCH_DEF(3)
-DOUBLESHIFT_MATCH_DEF(4)
-DOUBLESHIFT_MATCH_DEF(5)
-DOUBLESHIFT_MATCH_DEF(6)
-DOUBLESHIFT_MATCH_DEF(7)
-DOUBLESHIFT_MATCH_DEF(8)
-DOUBLESHIFT_MATCH_DEF(9)
-DOUBLESHIFT_MATCH_DEF(10)
-DOUBLESHIFT_MATCH_DEF(11)
-DOUBLESHIFT_MATCH_DEF(12)
-DOUBLESHIFT_MATCH_DEF(13)
-DOUBLESHIFT_MATCH_DEF(14)
-DOUBLESHIFT_MATCH_DEF(15)
-DOUBLESHIFT_MATCH_64_DEF(16)
-DOUBLESHIFT_MATCH_64_DEF(17)
-DOUBLESHIFT_MATCH_64_DEF(18)
-DOUBLESHIFT_MATCH_64_DEF(19)
-DOUBLESHIFT_MATCH_64_DEF(20)
-DOUBLESHIFT_MATCH_64_DEF(21)
-DOUBLESHIFT_MATCH_64_DEF(22)
-DOUBLESHIFT_MATCH_64_DEF(23)
-DOUBLESHIFT_MATCH_64_DEF(24)
-DOUBLESHIFT_MATCH_64_DEF(25)
-DOUBLESHIFT_MATCH_64_DEF(26)
-DOUBLESHIFT_MATCH_64_DEF(27)
-DOUBLESHIFT_MATCH_64_DEF(28)
-DOUBLESHIFT_MATCH_64_DEF(29)
-DOUBLESHIFT_MATCH_64_DEF(30)
-DOUBLESHIFT_MATCH_64_DEF(31)
-
-static
-const UNUSED u8 * (*doubleshift_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) =
-{
-// skip the first
- 0,
- &doubleshiftMatch_32_1,
- &doubleshiftMatch_32_2,
- &doubleshiftMatch_32_3,
- &doubleshiftMatch_32_4,
- &doubleshiftMatch_32_5,
- &doubleshiftMatch_32_6,
- &doubleshiftMatch_32_7,
- &doubleshiftMatch_32_8,
- &doubleshiftMatch_32_9,
- &doubleshiftMatch_32_10,
- &doubleshiftMatch_32_11,
- &doubleshiftMatch_32_12,
- &doubleshiftMatch_32_13,
- &doubleshiftMatch_32_14,
- &doubleshiftMatch_32_15,
-};
-
-static
-const UNUSED u8 * (*doubleshift_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) =
-{
-// skip the first
- 0,
- &doubleshiftMatch_64_1,
- &doubleshiftMatch_64_2,
- &doubleshiftMatch_64_3,
- &doubleshiftMatch_64_4,
- &doubleshiftMatch_64_5,
- &doubleshiftMatch_64_6,
- &doubleshiftMatch_64_7,
- &doubleshiftMatch_64_8,
- &doubleshiftMatch_64_9,
- &doubleshiftMatch_64_10,
- &doubleshiftMatch_64_11,
- &doubleshiftMatch_64_12,
- &doubleshiftMatch_64_13,
- &doubleshiftMatch_64_14,
- &doubleshiftMatch_64_15,
- &doubleshiftMatch_64_16,
- &doubleshiftMatch_64_17,
- &doubleshiftMatch_64_18,
- &doubleshiftMatch_64_19,
- &doubleshiftMatch_64_20,
- &doubleshiftMatch_64_21,
- &doubleshiftMatch_64_22,
- &doubleshiftMatch_64_23,
- &doubleshiftMatch_64_24,
- &doubleshiftMatch_64_25,
- &doubleshiftMatch_64_26,
- &doubleshiftMatch_64_27,
- &doubleshiftMatch_64_28,
- &doubleshiftMatch_64_29,
- &doubleshiftMatch_64_30,
- &doubleshiftMatch_64_31,
-};
-
-#endif /* MULTIACCEL_DOUBLESHIFT_H_ */
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MULTIACCEL_DOUBLESHIFTGRAB_H_
-#define MULTIACCEL_DOUBLESHIFTGRAB_H_
-
-#include "multiaccel_common.h"
-
-#define DOUBLESHIFTGRAB_MATCH(len, match_t, match_sz) \
- static really_inline \
- const u8 * JOIN4(doubleshiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\
- if (unlikely(z)) { \
- match_t neg = ~z; \
- match_t tmp = z; \
- z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
- tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \
- neg |= ((match_t) (1 << len) - 1) << (match_sz / 2); \
- VARISHIFT(z, z, len); \
- VARISHIFT(tmp, tmp, len2); \
- VARISHIFT(neg, z, 1); \
- VARISHIFT(tmp, z, len); \
- return JOIN(match, match_sz)(buf, z); \
- } \
- return NULL; \
- }
-
-#define DOUBLESHIFTGRAB_MATCH_32_DEF(n) \
- DOUBLESHIFTGRAB_MATCH(n, u32, 32)
-#define DOUBLESHIFTGRAB_MATCH_64_DEF(n) \
- DOUBLESHIFTGRAB_MATCH(n, u64a, 64)
-#define DOUBLESHIFTGRAB_MATCH_DEF(n) \
- DOUBLESHIFTGRAB_MATCH_32_DEF(n) \
- DOUBLESHIFTGRAB_MATCH_64_DEF(n)
-
-DOUBLESHIFTGRAB_MATCH_DEF(1)
-DOUBLESHIFTGRAB_MATCH_DEF(2)
-DOUBLESHIFTGRAB_MATCH_DEF(3)
-DOUBLESHIFTGRAB_MATCH_DEF(4)
-DOUBLESHIFTGRAB_MATCH_DEF(5)
-DOUBLESHIFTGRAB_MATCH_DEF(6)
-DOUBLESHIFTGRAB_MATCH_DEF(7)
-DOUBLESHIFTGRAB_MATCH_DEF(8)
-DOUBLESHIFTGRAB_MATCH_DEF(9)
-DOUBLESHIFTGRAB_MATCH_DEF(10)
-DOUBLESHIFTGRAB_MATCH_DEF(11)
-DOUBLESHIFTGRAB_MATCH_DEF(12)
-DOUBLESHIFTGRAB_MATCH_DEF(13)
-DOUBLESHIFTGRAB_MATCH_DEF(14)
-DOUBLESHIFTGRAB_MATCH_DEF(15)
-DOUBLESHIFTGRAB_MATCH_64_DEF(16)
-DOUBLESHIFTGRAB_MATCH_64_DEF(17)
-DOUBLESHIFTGRAB_MATCH_64_DEF(18)
-DOUBLESHIFTGRAB_MATCH_64_DEF(19)
-DOUBLESHIFTGRAB_MATCH_64_DEF(20)
-DOUBLESHIFTGRAB_MATCH_64_DEF(21)
-DOUBLESHIFTGRAB_MATCH_64_DEF(22)
-DOUBLESHIFTGRAB_MATCH_64_DEF(23)
-DOUBLESHIFTGRAB_MATCH_64_DEF(24)
-DOUBLESHIFTGRAB_MATCH_64_DEF(25)
-DOUBLESHIFTGRAB_MATCH_64_DEF(26)
-DOUBLESHIFTGRAB_MATCH_64_DEF(27)
-DOUBLESHIFTGRAB_MATCH_64_DEF(28)
-DOUBLESHIFTGRAB_MATCH_64_DEF(29)
-DOUBLESHIFTGRAB_MATCH_64_DEF(30)
-DOUBLESHIFTGRAB_MATCH_64_DEF(31)
-
-static
-const UNUSED u8 * (*doubleshiftgrab_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) =
-{
-// skip the first
- 0,
- &doubleshiftgrabMatch_32_1,
- &doubleshiftgrabMatch_32_2,
- &doubleshiftgrabMatch_32_3,
- &doubleshiftgrabMatch_32_4,
- &doubleshiftgrabMatch_32_5,
- &doubleshiftgrabMatch_32_6,
- &doubleshiftgrabMatch_32_7,
- &doubleshiftgrabMatch_32_8,
- &doubleshiftgrabMatch_32_9,
- &doubleshiftgrabMatch_32_10,
- &doubleshiftgrabMatch_32_11,
- &doubleshiftgrabMatch_32_12,
- &doubleshiftgrabMatch_32_13,
- &doubleshiftgrabMatch_32_14,
- &doubleshiftgrabMatch_32_15,
-};
-
-static
-const UNUSED u8 * (*doubleshiftgrab_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) =
-{
-// skip the first
- 0,
- &doubleshiftgrabMatch_64_1,
- &doubleshiftgrabMatch_64_2,
- &doubleshiftgrabMatch_64_3,
- &doubleshiftgrabMatch_64_4,
- &doubleshiftgrabMatch_64_5,
- &doubleshiftgrabMatch_64_6,
- &doubleshiftgrabMatch_64_7,
- &doubleshiftgrabMatch_64_8,
- &doubleshiftgrabMatch_64_9,
- &doubleshiftgrabMatch_64_10,
- &doubleshiftgrabMatch_64_11,
- &doubleshiftgrabMatch_64_12,
- &doubleshiftgrabMatch_64_13,
- &doubleshiftgrabMatch_64_14,
- &doubleshiftgrabMatch_64_15,
- &doubleshiftgrabMatch_64_16,
- &doubleshiftgrabMatch_64_17,
- &doubleshiftgrabMatch_64_18,
- &doubleshiftgrabMatch_64_19,
- &doubleshiftgrabMatch_64_20,
- &doubleshiftgrabMatch_64_21,
- &doubleshiftgrabMatch_64_22,
- &doubleshiftgrabMatch_64_23,
- &doubleshiftgrabMatch_64_24,
- &doubleshiftgrabMatch_64_25,
- &doubleshiftgrabMatch_64_26,
- &doubleshiftgrabMatch_64_27,
- &doubleshiftgrabMatch_64_28,
- &doubleshiftgrabMatch_64_29,
- &doubleshiftgrabMatch_64_30,
- &doubleshiftgrabMatch_64_31,
-};
-
-#endif /* MULTIACCEL_DOUBLESHIFTGRAB_H_ */
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MULTIACCEL_LONG_H_
-#define MULTIACCEL_LONG_H_
-
-#include "multiaccel_common.h"
-
-#define LONG_MATCH(len, match_t, match_sz) \
- static really_inline \
- const u8 * JOIN4(longMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \
- if (unlikely(z)) { \
- z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \
- JOIN(SHIFT, len)(z); \
- return JOIN(match, match_sz)(buf, z); \
- } \
- return NULL; \
- }
-
-#define LONG_MATCH_32_DEF(n) \
- LONG_MATCH(n, u32, 32)
-#define LONG_MATCH_64_DEF(n) \
- LONG_MATCH(n, u64a, 64)
-#define LONG_MATCH_DEF(n) \
- LONG_MATCH_32_DEF(n) \
- LONG_MATCH_64_DEF(n)
-
-LONG_MATCH_DEF(1)
-LONG_MATCH_DEF(2)
-LONG_MATCH_DEF(3)
-LONG_MATCH_DEF(4)
-LONG_MATCH_DEF(5)
-LONG_MATCH_DEF(6)
-LONG_MATCH_DEF(7)
-LONG_MATCH_DEF(8)
-LONG_MATCH_DEF(9)
-LONG_MATCH_DEF(10)
-LONG_MATCH_DEF(11)
-LONG_MATCH_DEF(12)
-LONG_MATCH_DEF(13)
-LONG_MATCH_DEF(14)
-LONG_MATCH_DEF(15)
-LONG_MATCH_64_DEF(16)
-LONG_MATCH_64_DEF(17)
-LONG_MATCH_64_DEF(18)
-LONG_MATCH_64_DEF(19)
-LONG_MATCH_64_DEF(20)
-LONG_MATCH_64_DEF(21)
-LONG_MATCH_64_DEF(22)
-LONG_MATCH_64_DEF(23)
-LONG_MATCH_64_DEF(24)
-LONG_MATCH_64_DEF(25)
-LONG_MATCH_64_DEF(26)
-LONG_MATCH_64_DEF(27)
-LONG_MATCH_64_DEF(28)
-LONG_MATCH_64_DEF(29)
-LONG_MATCH_64_DEF(30)
-LONG_MATCH_64_DEF(31)
-
-static
-const UNUSED u8 *(*long_match_funcs_32[])(const u8 *buf, u32 z) =
-{
- // skip the first three
- 0,
- &longMatch_32_1,
- &longMatch_32_2,
- &longMatch_32_3,
- &longMatch_32_4,
- &longMatch_32_5,
- &longMatch_32_6,
- &longMatch_32_7,
- &longMatch_32_8,
- &longMatch_32_9,
- &longMatch_32_10,
- &longMatch_32_11,
- &longMatch_32_12,
- &longMatch_32_13,
- &longMatch_32_14,
- &longMatch_32_15,
- };
-
-static
-const UNUSED u8 *(*long_match_funcs_64[])(const u8 *buf, u64a z) =
-{
-// skip the first three
- 0,
- &longMatch_64_1,
- &longMatch_64_2,
- &longMatch_64_3,
- &longMatch_64_4,
- &longMatch_64_5,
- &longMatch_64_6,
- &longMatch_64_7,
- &longMatch_64_8,
- &longMatch_64_9,
- &longMatch_64_10,
- &longMatch_64_11,
- &longMatch_64_12,
- &longMatch_64_13,
- &longMatch_64_14,
- &longMatch_64_15,
- &longMatch_64_16,
- &longMatch_64_17,
- &longMatch_64_18,
- &longMatch_64_19,
- &longMatch_64_20,
- &longMatch_64_21,
- &longMatch_64_22,
- &longMatch_64_23,
- &longMatch_64_24,
- &longMatch_64_25,
- &longMatch_64_26,
- &longMatch_64_27,
- &longMatch_64_28,
- &longMatch_64_29,
- &longMatch_64_30,
- &longMatch_64_31,
-};
-
-#endif /* MULTIACCEL_LONG_H_ */
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MULTIACCEL_LONGGRAB_H_
-#define MULTIACCEL_LONGGRAB_H_
-
-#include "multiaccel_common.h"
-
-#define LONGGRAB_MATCH(len, match_t, match_sz) \
- static really_inline \
- const u8 * JOIN4(longgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \
- if (unlikely(z)) { \
- match_t tmp = ~z; \
- tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \
- z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \
- JOIN(SHIFT, len)(z); \
- VARISHIFT(tmp, z, len); \
- return JOIN(match, match_sz)(buf, z); \
- } \
- return NULL; \
- }
-
-#define LONGGRAB_MATCH_32_DEF(n) \
- LONGGRAB_MATCH(n, u32, 32)
-#define LONGGRAB_MATCH_64_DEF(n) \
- LONGGRAB_MATCH(n, u64a, 64)
-#define LONGGRAB_MATCH_DEF(n) \
- LONGGRAB_MATCH_32_DEF(n) \
- LONGGRAB_MATCH_64_DEF(n)
-
-LONGGRAB_MATCH_DEF(1)
-LONGGRAB_MATCH_DEF(2)
-LONGGRAB_MATCH_DEF(3)
-LONGGRAB_MATCH_DEF(4)
-LONGGRAB_MATCH_DEF(5)
-LONGGRAB_MATCH_DEF(6)
-LONGGRAB_MATCH_DEF(7)
-LONGGRAB_MATCH_DEF(8)
-LONGGRAB_MATCH_DEF(9)
-LONGGRAB_MATCH_DEF(10)
-LONGGRAB_MATCH_DEF(11)
-LONGGRAB_MATCH_DEF(12)
-LONGGRAB_MATCH_DEF(13)
-LONGGRAB_MATCH_DEF(14)
-LONGGRAB_MATCH_DEF(15)
-LONGGRAB_MATCH_64_DEF(16)
-LONGGRAB_MATCH_64_DEF(17)
-LONGGRAB_MATCH_64_DEF(18)
-LONGGRAB_MATCH_64_DEF(19)
-LONGGRAB_MATCH_64_DEF(20)
-LONGGRAB_MATCH_64_DEF(21)
-LONGGRAB_MATCH_64_DEF(22)
-LONGGRAB_MATCH_64_DEF(23)
-LONGGRAB_MATCH_64_DEF(24)
-LONGGRAB_MATCH_64_DEF(25)
-LONGGRAB_MATCH_64_DEF(26)
-LONGGRAB_MATCH_64_DEF(27)
-LONGGRAB_MATCH_64_DEF(28)
-LONGGRAB_MATCH_64_DEF(29)
-LONGGRAB_MATCH_64_DEF(30)
-LONGGRAB_MATCH_64_DEF(31)
-
-static
-const UNUSED u8 *(*longgrab_match_funcs_32[])(const u8 *buf, u32 z) =
-{
-// skip the first three
- 0,
- &longgrabMatch_32_1,
- &longgrabMatch_32_2,
- &longgrabMatch_32_3,
- &longgrabMatch_32_4,
- &longgrabMatch_32_5,
- &longgrabMatch_32_6,
- &longgrabMatch_32_7,
- &longgrabMatch_32_8,
- &longgrabMatch_32_9,
- &longgrabMatch_32_10,
- &longgrabMatch_32_11,
- &longgrabMatch_32_12,
- &longgrabMatch_32_13,
- &longgrabMatch_32_14,
- &longgrabMatch_32_15,
- };
-
-static
-const UNUSED u8 *(*longgrab_match_funcs_64[])(const u8 *buf, u64a z) =
-{
-// skip the first three
- 0,
- &longgrabMatch_64_1,
- &longgrabMatch_64_2,
- &longgrabMatch_64_3,
- &longgrabMatch_64_4,
- &longgrabMatch_64_5,
- &longgrabMatch_64_6,
- &longgrabMatch_64_7,
- &longgrabMatch_64_8,
- &longgrabMatch_64_9,
- &longgrabMatch_64_10,
- &longgrabMatch_64_11,
- &longgrabMatch_64_12,
- &longgrabMatch_64_13,
- &longgrabMatch_64_14,
- &longgrabMatch_64_15,
- &longgrabMatch_64_16,
- &longgrabMatch_64_17,
- &longgrabMatch_64_18,
- &longgrabMatch_64_19,
- &longgrabMatch_64_20,
- &longgrabMatch_64_21,
- &longgrabMatch_64_22,
- &longgrabMatch_64_23,
- &longgrabMatch_64_24,
- &longgrabMatch_64_25,
- &longgrabMatch_64_26,
- &longgrabMatch_64_27,
- &longgrabMatch_64_28,
- &longgrabMatch_64_29,
- &longgrabMatch_64_30,
- &longgrabMatch_64_31,
-};
-
-#endif /* MULTIACCEL_LONGGRAB_H_ */
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MULTIACCEL_SHIFT_H_
-#define MULTIACCEL_SHIFT_H_
-
-#include "multiaccel_common.h"
-
-#define SHIFT_MATCH(len, match_t, match_sz) \
- static really_inline \
- const u8 * JOIN4(shiftMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\
- if (unlikely(z)) { \
- z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
- VARISHIFT(z, z, len); \
- return JOIN(match, match_sz)(buf, z); \
- } \
- return NULL; \
- }
-
-#define SHIFT_MATCH_32_DEF(n) \
- SHIFT_MATCH(n, u32, 32)
-#define SHIFT_MATCH_64_DEF(n) \
- SHIFT_MATCH(n, u64a, 64)
-#define SHIFT_MATCH_DEF(n) \
- SHIFT_MATCH_32_DEF(n) \
- SHIFT_MATCH_64_DEF(n)
-
-SHIFT_MATCH_DEF(1)
-SHIFT_MATCH_DEF(2)
-SHIFT_MATCH_DEF(3)
-SHIFT_MATCH_DEF(4)
-SHIFT_MATCH_DEF(5)
-SHIFT_MATCH_DEF(6)
-SHIFT_MATCH_DEF(7)
-SHIFT_MATCH_DEF(8)
-SHIFT_MATCH_DEF(9)
-SHIFT_MATCH_DEF(10)
-SHIFT_MATCH_DEF(11)
-SHIFT_MATCH_DEF(12)
-SHIFT_MATCH_DEF(13)
-SHIFT_MATCH_DEF(14)
-SHIFT_MATCH_DEF(15)
-SHIFT_MATCH_64_DEF(16)
-SHIFT_MATCH_64_DEF(17)
-SHIFT_MATCH_64_DEF(18)
-SHIFT_MATCH_64_DEF(19)
-SHIFT_MATCH_64_DEF(20)
-SHIFT_MATCH_64_DEF(21)
-SHIFT_MATCH_64_DEF(22)
-SHIFT_MATCH_64_DEF(23)
-SHIFT_MATCH_64_DEF(24)
-SHIFT_MATCH_64_DEF(25)
-SHIFT_MATCH_64_DEF(26)
-SHIFT_MATCH_64_DEF(27)
-SHIFT_MATCH_64_DEF(28)
-SHIFT_MATCH_64_DEF(29)
-SHIFT_MATCH_64_DEF(30)
-SHIFT_MATCH_64_DEF(31)
-
-static
-const UNUSED u8 * (*shift_match_funcs_32[])(const u8 *buf, u32 z) =
-{
-// skip the first
- 0,
- &shiftMatch_32_1,
- &shiftMatch_32_2,
- &shiftMatch_32_3,
- &shiftMatch_32_4,
- &shiftMatch_32_5,
- &shiftMatch_32_6,
- &shiftMatch_32_7,
- &shiftMatch_32_8,
- &shiftMatch_32_9,
- &shiftMatch_32_10,
- &shiftMatch_32_11,
- &shiftMatch_32_12,
- &shiftMatch_32_13,
- &shiftMatch_32_14,
- &shiftMatch_32_15,
-};
-
-static
-const UNUSED u8 * (*shift_match_funcs_64[])(const u8 *buf, u64a z) =
-{
-// skip the first
- 0,
- &shiftMatch_64_1,
- &shiftMatch_64_2,
- &shiftMatch_64_3,
- &shiftMatch_64_4,
- &shiftMatch_64_5,
- &shiftMatch_64_6,
- &shiftMatch_64_7,
- &shiftMatch_64_8,
- &shiftMatch_64_9,
- &shiftMatch_64_10,
- &shiftMatch_64_11,
- &shiftMatch_64_12,
- &shiftMatch_64_13,
- &shiftMatch_64_14,
- &shiftMatch_64_15,
- &shiftMatch_64_16,
- &shiftMatch_64_17,
- &shiftMatch_64_18,
- &shiftMatch_64_19,
- &shiftMatch_64_20,
- &shiftMatch_64_21,
- &shiftMatch_64_22,
- &shiftMatch_64_23,
- &shiftMatch_64_24,
- &shiftMatch_64_25,
- &shiftMatch_64_26,
- &shiftMatch_64_27,
- &shiftMatch_64_28,
- &shiftMatch_64_29,
- &shiftMatch_64_30,
- &shiftMatch_64_31,
-};
-
-#endif /* MULTIACCEL_SHIFT_H_ */
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MULTIACCEL_SHIFTGRAB_H_
-#define MULTIACCEL_SHIFTGRAB_H_
-
-#include "multiaccel_common.h"
-
-#define SHIFTGRAB_MATCH(len, match_t, match_sz) \
- static really_inline \
- const u8 * JOIN4(shiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\
- if (unlikely(z)) { \
- match_t tmp = ~z; \
- z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
- tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \
- VARISHIFT(z, z, len); \
- VARISHIFT(tmp, z, 1); \
- return JOIN(match, match_sz)(buf, z); \
- } \
- return NULL; \
- }
-
-#define SHIFTGRAB_MATCH_32_DEF(n) \
- SHIFTGRAB_MATCH(n, u32, 32)
-#define SHIFTGRAB_MATCH_64_DEF(n) \
- SHIFTGRAB_MATCH(n, u64a, 64)
-#define SHIFTGRAB_MATCH_DEF(n) \
- SHIFTGRAB_MATCH_32_DEF(n) \
- SHIFTGRAB_MATCH_64_DEF(n)
-
-SHIFTGRAB_MATCH_DEF(1)
-SHIFTGRAB_MATCH_DEF(2)
-SHIFTGRAB_MATCH_DEF(3)
-SHIFTGRAB_MATCH_DEF(4)
-SHIFTGRAB_MATCH_DEF(5)
-SHIFTGRAB_MATCH_DEF(6)
-SHIFTGRAB_MATCH_DEF(7)
-SHIFTGRAB_MATCH_DEF(8)
-SHIFTGRAB_MATCH_DEF(9)
-SHIFTGRAB_MATCH_DEF(10)
-SHIFTGRAB_MATCH_DEF(11)
-SHIFTGRAB_MATCH_DEF(12)
-SHIFTGRAB_MATCH_DEF(13)
-SHIFTGRAB_MATCH_DEF(14)
-SHIFTGRAB_MATCH_DEF(15)
-SHIFTGRAB_MATCH_64_DEF(16)
-SHIFTGRAB_MATCH_64_DEF(17)
-SHIFTGRAB_MATCH_64_DEF(18)
-SHIFTGRAB_MATCH_64_DEF(19)
-SHIFTGRAB_MATCH_64_DEF(20)
-SHIFTGRAB_MATCH_64_DEF(21)
-SHIFTGRAB_MATCH_64_DEF(22)
-SHIFTGRAB_MATCH_64_DEF(23)
-SHIFTGRAB_MATCH_64_DEF(24)
-SHIFTGRAB_MATCH_64_DEF(25)
-SHIFTGRAB_MATCH_64_DEF(26)
-SHIFTGRAB_MATCH_64_DEF(27)
-SHIFTGRAB_MATCH_64_DEF(28)
-SHIFTGRAB_MATCH_64_DEF(29)
-SHIFTGRAB_MATCH_64_DEF(30)
-SHIFTGRAB_MATCH_64_DEF(31)
-
-static
-const UNUSED u8 * (*shiftgrab_match_funcs_32[])(const u8 *buf, u32 z) =
-{
-// skip the first
- 0,
- &shiftgrabMatch_32_1,
- &shiftgrabMatch_32_2,
- &shiftgrabMatch_32_3,
- &shiftgrabMatch_32_4,
- &shiftgrabMatch_32_5,
- &shiftgrabMatch_32_6,
- &shiftgrabMatch_32_7,
- &shiftgrabMatch_32_8,
- &shiftgrabMatch_32_9,
- &shiftgrabMatch_32_10,
- &shiftgrabMatch_32_11,
- &shiftgrabMatch_32_12,
- &shiftgrabMatch_32_13,
- &shiftgrabMatch_32_14,
- &shiftgrabMatch_32_15,
-};
-
-static
-const UNUSED u8 * (*shiftgrab_match_funcs_64[])(const u8 *buf, u64a z) =
- {
-// skip the first
- 0,
- &shiftgrabMatch_64_1,
- &shiftgrabMatch_64_2,
- &shiftgrabMatch_64_3,
- &shiftgrabMatch_64_4,
- &shiftgrabMatch_64_5,
- &shiftgrabMatch_64_6,
- &shiftgrabMatch_64_7,
- &shiftgrabMatch_64_8,
- &shiftgrabMatch_64_9,
- &shiftgrabMatch_64_10,
- &shiftgrabMatch_64_11,
- &shiftgrabMatch_64_12,
- &shiftgrabMatch_64_13,
- &shiftgrabMatch_64_14,
- &shiftgrabMatch_64_15,
- &shiftgrabMatch_64_16,
- &shiftgrabMatch_64_17,
- &shiftgrabMatch_64_18,
- &shiftgrabMatch_64_19,
- &shiftgrabMatch_64_20,
- &shiftgrabMatch_64_21,
- &shiftgrabMatch_64_22,
- &shiftgrabMatch_64_23,
- &shiftgrabMatch_64_24,
- &shiftgrabMatch_64_25,
- &shiftgrabMatch_64_26,
- &shiftgrabMatch_64_27,
- &shiftgrabMatch_64_28,
- &shiftgrabMatch_64_29,
- &shiftgrabMatch_64_30,
- &shiftgrabMatch_64_31,
-};
-
-#endif /* MULTIACCEL_SHIFTGRAB_H_ */
+++ /dev/null
-/*
- * Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Shufti: character class acceleration.
- *
- * Utilises the SSSE3 pshufb shuffle instruction
- */
-
-#include "config.h"
-#include "ue2common.h"
-#include "util/arch.h"
-
-#include "multishufti.h"
-
-#include "multiaccel_common.h"
-
-#if !defined(HAVE_AVX2)
-
-#define MATCH_ALGO long_
-#include "multiaccel_long.h"
-#include "multishufti_sse.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO longgrab_
-#include "multiaccel_longgrab.h"
-#include "multishufti_sse.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO shift_
-#include "multiaccel_shift.h"
-#include "multishufti_sse.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO shiftgrab_
-#include "multiaccel_shiftgrab.h"
-#include "multishufti_sse.h"
-#undef MATCH_ALGO
-
-#define MULTIACCEL_DOUBLE
-
-#define MATCH_ALGO doubleshift_
-#include "multiaccel_doubleshift.h"
-#include "multishufti_sse.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO doubleshiftgrab_
-#include "multiaccel_doubleshiftgrab.h"
-#include "multishufti_sse.h"
-#undef MATCH_ALGO
-
-#undef MULTIACCEL_DOUBLE
-
-#else
-
-#define MATCH_ALGO long_
-#include "multiaccel_long.h"
-#include "multishufti_avx2.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO longgrab_
-#include "multiaccel_longgrab.h"
-#include "multishufti_avx2.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO shift_
-#include "multiaccel_shift.h"
-#include "multishufti_avx2.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO shiftgrab_
-#include "multiaccel_shiftgrab.h"
-#include "multishufti_avx2.h"
-#undef MATCH_ALGO
-
-#define MULTIACCEL_DOUBLE
-
-#define MATCH_ALGO doubleshift_
-#include "multiaccel_doubleshift.h"
-#include "multishufti_avx2.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO doubleshiftgrab_
-#include "multiaccel_doubleshiftgrab.h"
-#include "multishufti_avx2.h"
-#undef MATCH_ALGO
-
-#undef MULTIACCEL_DOUBLE
-
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Multishufti: multibyte version of Shufti
- *
- * Utilises the SSSE3 pshufb shuffle instruction
- */
-
-#ifndef MULTISHUFTI_H
-#define MULTISHUFTI_H
-
-#include "ue2common.h"
-#include "util/simd_types.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-const u8 *long_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
- const u8 *buf_end, const u8 run_len);
-
-const u8 *longgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
- const u8 *buf_end, const u8 run_len);
-
-const u8 *shift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
- const u8 *buf_end, const u8 run_len);
-
-const u8 *shiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
- const u8 *buf_end, const u8 run_len);
-
-const u8 *doubleshift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
- const u8 *buf_end, const u8 run_len,
- const u8 run2_len);
-
-const u8 *doubleshiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
- const u8 *buf_end, const u8 run_len,
- const u8 run2_len);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "shufti_common.h"
-
-#include "ue2common.h"
-#include "util/bitutils.h"
-#include "util/simd_utils.h"
-
-static really_inline
-const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars,
- const u8 *buf, const m256 low4bits,
- const m256 zeroes, const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes);
- return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, ~z
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
-}
-
-const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi,
- const u8 *buf,
- const u8 *buf_end, u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , u8 run_len2
-#endif
- ) {
- assert(buf && buf_end);
- assert(buf < buf_end);
-
- // Slow path for small cases.
- if (buf_end - buf < 32) {
- return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
- buf, buf_end);
- }
-
- const m256 zeroes = zeroes256();
- const m256 low4bits = set32x8(0xf);
- const m256 wide_mask_lo = set2x128(mask_lo);
- const m256 wide_mask_hi = set2x128(mask_hi);
- const u8 *rv;
-
- size_t min = (size_t)buf % 32;
- assert(buf_end - buf >= 32);
-
- // Preconditioning: most of the time our buffer won't be aligned.
- m256 chars = loadu256(buf);
- rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf,
- low4bits, zeroes, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (rv) {
- return rv;
- }
- buf += (32 - min);
-
- // Unrolling was here, but it wasn't doing anything but taking up space.
- // Reroll FTW.
- const u8 *last_block = buf_end - 32;
- while (buf < last_block) {
- m256 lchars = load256(buf);
- rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, lchars, buf,
- low4bits, zeroes, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (rv) {
- return rv;
- }
- buf += 32;
- }
-
- // Use an unaligned load to mop up the last 32 bytes and get an accurate
- // picture to buf_end.
- assert(buf <= buf_end && buf >= buf_end - 32);
- chars = loadu256(buf_end - 32);
- rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf_end - 32,
- low4bits, zeroes, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (rv) {
- return rv;
- }
-
- return buf_end;
-}
+++ /dev/null
-/*
- * Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "shufti_common.h"
-
-#include "ue2common.h"
-#include "util/bitutils.h"
-#include "util/simd_utils.h"
-
-/* Normal SSSE3 shufti */
-
-static really_inline
-const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 mask_lo, m128 mask_hi, m128 chars,
- const u8 *buf, const m128 low4bits,
- const m128 zeroes, const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- // negate first 16 bits
- u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes) ^ 0xFFFF;
- return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
-}
-
-/*
- * 16-byte pipeline, for smaller scans
- */
-static
-const u8 *JOIN(MATCH_ALGO, shuftiPipeline16)(m128 mask_lo, m128 mask_hi,
- const u8 *buf, const u8 *buf_end,
- const m128 low4bits,
- const m128 zeroes, const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- const u8* ptr, *last_buf;
- u32 last_res;
-
- // pipeline prologue: scan first 16 bytes
- m128 data = load128(buf);
- u32 z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF;
- last_buf = buf;
- last_res = z;
- buf += 16;
-
- // now, start the pipeline!
- assert((size_t)buf % 16 == 0);
- for (; buf + 15 < buf_end; buf += 16) {
- // scan more data
- data = load128(buf);
- z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF;
-
- // do a comparison on previous result
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- last_buf = buf;
- last_res = z;
- }
- assert(buf <= buf_end && buf >= buf_end - 16);
-
- // epilogue: compare final results
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
-
- return NULL;
-}
-
-/*
- * 32-byte pipeline, for bigger scans
- */
-static
-const u8 *JOIN(MATCH_ALGO, shuftiPipeline32)(m128 mask_lo, m128 mask_hi,
- const u8 *buf, const u8 *buf_end,
- const m128 low4bits,
- const m128 zeroes, const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- const u8* ptr, *last_buf;
- u32 res;
-
- // pipeline prologue: scan first 32 bytes
- m128 data1 = load128(buf);
- u32 z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF;
- m128 data2 = load128(buf + 16);
- u32 z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF;
-
- // store the results
- u32 last_res = z1 | (z2 << 16);
- last_buf = buf;
- buf += 32;
-
-
- // now, start the pipeline!
- assert((size_t)buf % 16 == 0);
- for (; buf + 31 < buf_end; buf += 32) {
- // scan more data
- data1 = load128(buf);
- z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF;
- data2 = load128(buf + 16);
- z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF;
- res = z1 | (z2 << 16);
-
- // do a comparison on previous result
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- last_res = res;
- last_buf = buf;
- }
-
- // epilogue: compare final results
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
-
- // if we still have some data left, scan it too
- for (; buf + 15 < buf_end; buf += 16) {
- m128 chars = load128(buf);
- ptr = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf,
- low4bits, zeroes, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- }
- assert(buf <= buf_end && buf >= buf_end - 16);
-
- return NULL;
-}
-
-const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi,
- const u8 *buf,
- const u8 *buf_end, u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , u8 run_len2
-#endif
- ) {
- assert(buf && buf_end);
- assert(buf < buf_end);
-
- // Slow path for small cases.
- if (buf_end - buf < 16) {
- return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
- buf, buf_end);
- }
-
- const m128 zeroes = zeroes128();
- const m128 low4bits = _mm_set1_epi8(0xf);
- const u8 *rv;
-
- size_t min = (size_t)buf % 16;
- assert(buf_end - buf >= 16);
-
- // Preconditioning: most of the time our buffer won't be aligned.
- m128 chars = loadu128(buf);
- rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf,
- low4bits, zeroes, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (rv) {
- return rv;
- }
- buf += (16 - min);
-
- // if we have enough data, run bigger pipeline; otherwise run smaller one
- if (buf_end - buf >= 128) {
- rv = JOIN(MATCH_ALGO, shuftiPipeline32)(mask_lo, mask_hi,
- buf, buf_end, low4bits, zeroes, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(rv)) {
- return rv;
- }
- } else if (buf_end - buf >= 16){
- rv = JOIN(MATCH_ALGO, shuftiPipeline16)(mask_lo, mask_hi,
- buf, buf_end, low4bits, zeroes, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(rv)) {
- return rv;
- }
- }
-
- // Use an unaligned load to mop up the last 16 bytes and get an accurate
- // picture to buf_end.
- chars = loadu128(buf_end - 16);
- rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars,
- buf_end - 16, low4bits, zeroes, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (rv) {
- return rv;
- }
-
- return buf_end;
-}
+++ /dev/null
-/*
- * Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "ue2common.h"
-#include "util/arch.h"
-
-#include "multitruffle.h"
-#include "util/bitutils.h"
-#include "util/simd_utils.h"
-
-#include "multiaccel_common.h"
-
-#if !defined(HAVE_AVX2)
-
-#define MATCH_ALGO long_
-#include "multiaccel_long.h"
-#include "multitruffle_sse.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO longgrab_
-#include "multiaccel_longgrab.h"
-#include "multitruffle_sse.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO shift_
-#include "multiaccel_shift.h"
-#include "multitruffle_sse.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO shiftgrab_
-#include "multiaccel_shiftgrab.h"
-#include "multitruffle_sse.h"
-#undef MATCH_ALGO
-
-#define MULTIACCEL_DOUBLE
-
-#define MATCH_ALGO doubleshift_
-#include "multiaccel_doubleshift.h"
-#include "multitruffle_sse.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO doubleshiftgrab_
-#include "multiaccel_doubleshiftgrab.h"
-#include "multitruffle_sse.h"
-#undef MATCH_ALGO
-
-#undef MULTIACCEL_DOUBLE
-
-#else
-
-#define MATCH_ALGO long_
-#include "multiaccel_long.h"
-#include "multitruffle_avx2.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO longgrab_
-#include "multiaccel_longgrab.h"
-#include "multitruffle_avx2.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO shift_
-#include "multiaccel_shift.h"
-#include "multitruffle_avx2.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO shiftgrab_
-#include "multiaccel_shiftgrab.h"
-#include "multitruffle_avx2.h"
-#undef MATCH_ALGO
-
-#define MULTIACCEL_DOUBLE
-
-#define MATCH_ALGO doubleshift_
-#include "multiaccel_doubleshift.h"
-#include "multitruffle_avx2.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO doubleshiftgrab_
-#include "multiaccel_doubleshiftgrab.h"
-#include "multitruffle_avx2.h"
-#undef MATCH_ALGO
-
-#undef MULTIACCEL_DOUBLE
-
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MULTITRUFFLE_H
-#define MULTITRUFFLE_H
-
-/** \file
- * \brief Multitruffle: multibyte version of Truffle.
- *
- * Utilises the SSSE3 pshufb shuffle instruction
- */
-
-#include "util/simd_types.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-const u8 *long_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
- const u8 *buf, const u8 *buf_end, const u8 run_len);
-
-const u8 *longgrab_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
- const u8 *buf, const u8 *buf_end, const u8 run_len);
-
-const u8 *shift_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
- const u8 *buf, const u8 *buf_end, const u8 run_len);
-
-const u8 *shiftgrab_truffleExec(m128 shuf_mask_lo_highclear,
- m128 shuf_mask_lo_highset, const u8 *buf,
- const u8 *buf_end, const u8 run_len);
-
-const u8 *doubleshift_truffleExec(m128 shuf_mask_lo_highclear,
- m128 shuf_mask_lo_highset, const u8 *buf,
- const u8 *buf_end, const u8 run_len,
- const u8 run2_len);
-
-const u8 *doubleshiftgrab_truffleExec(m128 shuf_mask_lo_highclear,
- m128 shuf_mask_lo_highset, const u8 *buf,
- const u8 *buf_end, const u8 run_len,
- const u8 run2_len);
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif /* MULTITRUFFLE_H */
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Matches a byte in a charclass using three shuffles
- */
-
-#include "config.h"
-#include "ue2common.h"
-#include "multiaccel_common.h"
-
-/*
- * include "block" function
- */
-#include "truffle_common.h"
-
-/*
- * single-byte truffle fwd match function, should only be defined when not
- * compiling multiaccel
- */
-static really_inline
-const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset,
- m256 v, const u8 *buf, const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- u64a z = (u64a) block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v);
- return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, z ^ 0xFFFFFFFF
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
-}
-
-const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear,
- m128 shuf_mask_lo_highset,
- const u8 *buf, const u8 *buf_end, const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- DEBUG_PRINTF("run_len %zu\n", buf_end - buf);
- const m256 wide_clear = set2x128(shuf_mask_lo_highclear);
- const m256 wide_set = set2x128(shuf_mask_lo_highset);
-
- assert(buf && buf_end);
- assert(buf < buf_end);
- const u8 *rv;
-
- if (buf_end - buf < 32) {
- return truffleMini(wide_clear, wide_set, buf, buf_end);
- }
-
- size_t min = (size_t)buf % 32;
- assert(buf_end - buf >= 32);
-
- // Preconditioning: most of the time our buffer won't be aligned.
- m256 chars = loadu256(buf);
- rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars, buf, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (rv) {
- return rv;
- }
- buf += (32 - min);
-
- const u8 *last_block = buf_end - 32;
- while (buf < last_block) {
- m256 lchars = load256(buf);
- rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, lchars,
- buf, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (rv) {
- return rv;
- }
- buf += 32;
- }
-
- // Use an unaligned load to mop up the last 32 bytes and get an accurate
- // picture to buf_end.
- assert(buf <= buf_end && buf >= buf_end - 32);
- chars = loadu256(buf_end - 32);
- rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars,
- buf_end - 32, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (rv) {
- return rv;
- }
-
- return buf_end;
-}
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "ue2common.h"
-#include "multiaccel_common.h"
-
-/*
- * include "block" function
- */
-#include "truffle_common.h"
-
-/*
- * single-byte truffle fwd match function, should only be defined when not
- * compiling multiaccel
- */
-
-static really_inline
-const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
- m128 v, const u8 *buf, const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v) ^ 0xFFFF;
- return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
-}
-
-/*
- * 16-byte pipeline, for smaller scans
- */
-static
-const u8 *JOIN(MATCH_ALGO, trufflePipeline16)(m128 shuf_mask_lo_highclear,
- m128 shuf_mask_lo_highset,
- const u8 *buf, const u8 *buf_end,
- const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- const u8* ptr, *last_buf;
- u32 last_res;
-
- // pipeline prologue: scan first 16 bytes
- m128 data = load128(buf);
- u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF;
- last_buf = buf;
- last_res = z;
- buf += 16;
-
- // now, start the pipeline!
- assert((size_t)buf % 16 == 0);
- for (; buf + 15 < buf_end; buf += 16) {
- // scan more data
- data = load128(buf);
- z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF;
-
- // do a comparison on previous result
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- last_buf = buf;
- last_res = z;
- }
- assert(buf <= buf_end && buf >= buf_end - 16);
-
- // epilogue: compare final results
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
-
- return NULL;
-}
-
-/*
- * 32-byte pipeline, for bigger scans
- */
-static
-const u8 *JOIN(MATCH_ALGO, trufflePipeline32)(m128 shuf_mask_lo_highclear,
- m128 shuf_mask_lo_highset,
- const u8 *buf, const u8 *buf_end,
- const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- const u8* ptr, *last_buf;
- u32 res;
-
- // pipeline prologue: scan first 32 bytes
- m128 data1 = load128(buf);
- u32 z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF;
- m128 data2 = load128(buf + 16);
- u32 z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF;
-
- // store the results
- u32 last_res = z1 | (z2 << 16);
- last_buf = buf;
- buf += 32;
-
-
- // now, start the pipeline!
- assert((size_t)buf % 16 == 0);
- for (; buf + 31 < buf_end; buf += 32) {
- // scan more data
- data1 = load128(buf);
- z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF;
- data2 = load128(buf + 16);
- z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF;
- res = z1 | (z2 << 16);
-
- // do a comparison on previous result
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- last_res = res;
- last_buf = buf;
- }
-
- // epilogue: compare final results
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
-
- // if we still have some data left, scan it too
- for (; buf + 15 < buf_end; buf += 16) {
- m128 chars = load128(buf);
- ptr = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset,
- chars, buf, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- }
- assert(buf <= buf_end && buf >= buf_end - 16);
-
- return NULL;
-}
-
-const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear,
- m128 shuf_mask_lo_highset,
- const u8 *buf, const u8 *buf_end, const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- DEBUG_PRINTF("run_len %zu\n", buf_end - buf);
-
- assert(buf && buf_end);
- assert(buf < buf_end);
- const u8 *rv;
-
- if (buf_end - buf < 16) {
- return truffleMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf, buf_end);
- }
-
- size_t min = (size_t)buf % 16;
- assert(buf_end - buf >= 16);
-
- // Preconditioning: most of the time our buffer won't be aligned.
- m128 chars = loadu128(buf);
- rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (rv) {
- return rv;
- }
- buf += (16 - min);
-
- // if we have enough data, run bigger pipeline; otherwise run smaller one
- if (buf_end - buf >= 128) {
- rv = JOIN(MATCH_ALGO, trufflePipeline32)(shuf_mask_lo_highclear, shuf_mask_lo_highset,
- buf, buf_end, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(rv)) {
- return rv;
- }
- } else if (buf_end - buf >= 16){
- rv = JOIN(MATCH_ALGO, trufflePipeline16)(shuf_mask_lo_highclear, shuf_mask_lo_highset,
- buf, buf_end, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(rv)) {
- return rv;
- }
- }
-
- // Use an unaligned load to mop up the last 16 bytes and get an accurate
- // picture to buf_end.
- chars = loadu128(buf_end - 16);
- rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars,
- buf_end - 16, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (rv) {
- return rv;
- }
-
- return buf_end;
-}
+++ /dev/null
-/*
- * Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "ue2common.h"
-#include "util/arch.h"
-
-#include "multivermicelli.h"
-
-#include "multiaccel_common.h"
-
-#if !defined(HAVE_AVX2)
-
-#define MATCH_ALGO long_
-#include "multiaccel_long.h"
-#include "multivermicelli_sse.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO longgrab_
-#include "multiaccel_longgrab.h"
-#include "multivermicelli_sse.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO shift_
-#include "multiaccel_shift.h"
-#include "multivermicelli_sse.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO shiftgrab_
-#include "multiaccel_shiftgrab.h"
-#include "multivermicelli_sse.h"
-#undef MATCH_ALGO
-
-#define MULTIACCEL_DOUBLE
-
-#define MATCH_ALGO doubleshift_
-#include "multiaccel_doubleshift.h"
-#include "multivermicelli_sse.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO doubleshiftgrab_
-#include "multiaccel_doubleshiftgrab.h"
-#include "multivermicelli_sse.h"
-#undef MATCH_ALGO
-
-#undef MULTIACCEL_DOUBLE
-
-#else
-
-#define MATCH_ALGO long_
-#include "multiaccel_long.h"
-#include "multivermicelli_avx2.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO longgrab_
-#include "multiaccel_longgrab.h"
-#include "multivermicelli_avx2.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO shift_
-#include "multiaccel_shift.h"
-#include "multivermicelli_avx2.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO shiftgrab_
-#include "multiaccel_shiftgrab.h"
-#include "multivermicelli_avx2.h"
-#undef MATCH_ALGO
-
-#define MULTIACCEL_DOUBLE
-
-#define MATCH_ALGO doubleshift_
-#include "multiaccel_doubleshift.h"
-#include "multivermicelli_avx2.h"
-#undef MATCH_ALGO
-
-#define MATCH_ALGO doubleshiftgrab_
-#include "multiaccel_doubleshiftgrab.h"
-#include "multivermicelli_avx2.h"
-#undef MATCH_ALGO
-
-#undef MULTIACCEL_DOUBLE
-
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MULTIVERMICELLI_H_
-#define MULTIVERMICELLI_H_
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-const u8 *long_vermicelliExec(char c, char nocase, const u8 *buf,
- const u8 *buf_end, const u8 run_len);
-
-const u8 *longgrab_vermicelliExec(char c, char nocase, const u8 *buf,
- const u8 *buf_end, const u8 run_len);
-
-const u8 *shift_vermicelliExec(char c, char nocase, const u8 *buf,
- const u8 *buf_end, const u8 run_len);
-
-const u8 *shiftgrab_vermicelliExec(char c, char nocase, const u8 *buf,
- const u8 *buf_end, const u8 run_len);
-
-const u8 *doubleshift_vermicelliExec(char c, char nocase, const u8 *buf,
- const u8 *buf_end, const u8 run_len,
- const u8 run2_len);
-
-const u8 *doubleshiftgrab_vermicelliExec(char c, char nocase, const u8 *buf,
- const u8 *buf_end, const u8 run_len,
- const u8 run2_len);
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif /* MULTIVERMICELLI_H_ */
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "util/bitutils.h"
-#include "util/simd_utils.h"
-#include "util/unaligned.h"
-
-#include "multiaccel_common.h"
-
-static really_inline
-const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m256 chars,
- const u8 *buf,
- const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- m256 casemask = set32x8(CASE_CLEAR);
- const u8 *ptr;
- m256 data = loadu256(buf);
- u32 z = movemask256(eq256(chars, and256(casemask, data)));
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
- (buf, z
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- return NULL;
-}
-
-static really_inline
-const u8 *JOIN(MATCH_ALGO, vermUnalign)(m256 chars,
- const u8 *buf,
- const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- const u8 *ptr;
-
- m256 data = loadu256(buf);
- u32 z = movemask256(eq256(chars, data));
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
- (buf, z
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- return NULL;
-}
-
-/*
- * 32-byte pipeline
- */
-static really_inline
-const u8 *JOIN(MATCH_ALGO, vermPipeline)(m256 chars,
- const u8 *buf,
- const u8 *buf_end,
- const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- const u8* ptr, *last_buf;
- u32 last_res;
-
- // pipeline prologue: scan first 32 bytes
- m256 data = load256(buf);
- u32 z = movemask256(eq256(chars, data));
- last_res = z;
- last_buf = buf;
- buf += 32;
-
- // now, start the pipeline!
- assert((size_t)buf % 32 == 0);
- for (; buf + 31 < buf_end; buf += 32) {
- // scan more data
- data = load256(buf);
- z = movemask256(eq256(chars, data));
-
- // do a comparison on previous result
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- last_buf = buf;
- last_res = z;
- }
- assert(buf <= buf_end && buf >= buf_end - 32);
-
- // epilogue: compare final results
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
-
- return NULL;
-}
-
-/*
- * 32-byte caseless pipeline
- */
-static really_inline
-const u8 *JOIN(MATCH_ALGO, vermPipelineNocase)(m256 chars,
- const u8 *buf,
- const u8 *buf_end,
- const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- m256 casemask = set32x8(CASE_CLEAR);
- const u8* ptr, *last_buf;
- u32 last_res;
-
- // pipeline prologue: scan first 32 bytes
- m256 data = load256(buf);
- u32 z = movemask256(eq256(chars, and256(casemask, data)));
- last_res = z;
- last_buf = buf;
- buf += 32;
-
-
- // now, start the pipeline!
- assert((size_t)buf % 32 == 0);
- for (; buf + 31 < buf_end; buf += 32) {
- // scan more data
- data = load256(buf);
- z = movemask256(eq256(chars, and256(casemask, data)));
-
- // do a comparison on previous result
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- last_buf = buf;
- last_res = z;
- }
- assert(buf <= buf_end && buf >= buf_end - 32);
-
- // epilogue: compare final results
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
-
- return NULL;
-}
-
-const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase,
- const u8 *buf,
- const u8 *buf_end,
- const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n",
- nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
- assert(buf < buf_end);
-
- const u8 *ptr;
-
- // Handle small scans.
- if (buf_end - buf < 32) {
- for (; buf < buf_end; buf++) {
- char cur = (char)*buf;
- if (nocase) {
- cur &= CASE_CLEAR;
- }
- if (cur == c) {
- break;
- }
- }
- return buf;
- }
-
- m256 chars = set32x8(c); /* nocase already uppercase */
-
- uintptr_t min = (uintptr_t)buf % 32;
-
- if (min) {
- ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
- buf, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- ) : JOIN(MATCH_ALGO, vermUnalign)(chars,
- buf, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- buf += 32 - min;
- }
-
- if (buf_end - buf >= 32){
- ptr = nocase ? JOIN(MATCH_ALGO, vermPipelineNocase)(chars,
- buf, buf_end, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- ) : JOIN(MATCH_ALGO, vermPipeline)(chars,
- buf, buf_end, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- }
-
- // final unaligned scan
- ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
- buf_end - 32, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- ) : JOIN(MATCH_ALGO, vermUnalign)(chars,
- buf_end - 32, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
-
- // run our pipeline
- return ptr ? ptr : buf_end;
-}
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "util/bitutils.h"
-#include "util/simd_utils.h"
-#include "util/unaligned.h"
-
-#define VERM_BOUNDARY 16
-#define VERM_TYPE m128
-#define VERM_SET_FN set16x8
-
-#include "multiaccel_common.h"
-
-static really_inline
-const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m128 chars,
- const u8 *buf,
- const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- m128 casemask = set16x8(CASE_CLEAR);
- const u8 *ptr;
- m128 data = loadu128(buf);
- u32 z = movemask128(eq128(chars, and128(casemask, data)));
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
- (buf, z
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- return NULL;
-}
-
-static really_inline
-const u8 *JOIN(MATCH_ALGO, vermUnalign)(m128 chars,
- const u8 *buf,
- const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- const u8 *ptr;
-
- m128 data = loadu128(buf);
- u32 z = movemask128(eq128(chars, data));
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
- (buf, z
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- return NULL;
-}
-
-/*
- * 16-byte pipeline, for smaller scans
- */
-static
-const u8 *JOIN(MATCH_ALGO, vermPipeline16)(m128 chars,
- const u8 *buf,
- const u8 *buf_end,
- const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- const u8* ptr, *last_buf;
- u32 last_res;
-
- // pipeline prologue: scan first 16 bytes
- m128 data = load128(buf);
- u32 z = movemask128(eq128(chars, data));
- last_buf = buf;
- last_res = z;
- buf += 16;
-
- // now, start the pipeline!
- assert((size_t)buf % 16 == 0);
- for (; buf + 15 < buf_end; buf += 16) {
- // scan more data
- data = load128(buf);
- z = movemask128(eq128(chars, data));
-
- // do a comparison on previous result
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- last_buf = buf;
- last_res = z;
- }
- assert(buf <= buf_end && buf >= buf_end - 16);
-
- // epilogue: compare final results
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
-
- return NULL;
-}
-
-/*
- * 16-byte pipeline, for smaller scans
- */
-static
-const u8 *JOIN(MATCH_ALGO, vermPipeline16Nocase)(m128 chars,
- const u8 *buf,
- const u8 *buf_end,
- const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- m128 casemask = set16x8(CASE_CLEAR);
- const u8* ptr, *last_buf;
- u32 last_res;
-
- // pipeline prologue: scan first 16 bytes
- m128 data = load128(buf);
- u32 z = movemask128(eq128(chars, and128(casemask, data)));
- last_buf = buf;
- last_res = z;
- buf += 16;
-
- // now, start the pipeline!
- assert((size_t)buf % 16 == 0);
- for (; buf + 15 < buf_end; buf += 16) {
- // scan more data
- data = load128(buf);
- z = movemask128(eq128(chars, and128(casemask, data)));
-
- // do a comparison on previous result
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- last_buf = buf;
- last_res = z;
- }
- assert(buf <= buf_end && buf >= buf_end - 16);
-
- // epilogue: compare final results
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
-
- return NULL;
-}
-
-/*
- * 32-byte pipeline, for bigger scans
- */
-static
-const u8 *JOIN(MATCH_ALGO, vermPipeline32)(m128 chars,
- const u8 *buf,
- const u8 *buf_end,
- const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- const u8* ptr, *last_buf;
- u32 res;
-
- // pipeline prologue: scan first 32 bytes
- m128 data1 = load128(buf);
- u32 z1 = movemask128(eq128(chars, data1));
- m128 data2 = load128(buf + 16);
- u32 z2 = movemask128(eq128(chars, data2));
-
- // store the results
- u32 last_res = z1 | (z2 << VERM_BOUNDARY);
- last_buf = buf;
- buf += 32;
-
-
- // now, start the pipeline!
- assert((size_t)buf % 16 == 0);
- for (; buf + 31 < buf_end; buf += 32) {
- // scan more data
- data1 = load128(buf);
- z1 = movemask128(eq128(chars, data1));
- data2 = load128(buf + 16);
- z2 = movemask128(eq128(chars, data2));
- res = z1 | (z2 << 16);
-
- // do a comparison on previous result
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- last_res = res;
- last_buf = buf;
- }
-
- // epilogue: compare final results
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
-
- // if we still have some data left, scan it too
- if (buf + 15 < buf_end) {
- return JOIN(MATCH_ALGO, vermPipeline16)(chars, buf, buf_end, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- }
- assert(buf <= buf_end && buf >= buf_end - 16);
-
- return NULL;
-}
-
-/*
- * 32-byte caseless pipeline, for bigger scans
- */
-static
-const u8 *JOIN(MATCH_ALGO, vermPipeline32Nocase)(m128 chars,
- const u8 *buf,
- const u8 *buf_end,
- const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- m128 casemask = set16x8(CASE_CLEAR);
- const u8* ptr, *last_buf;
- u32 last_res;
-
- // pipeline prologue: scan first 32 bytes
- m128 data1 = load128(buf);
- u32 z1 = movemask128(eq128(chars, and128(casemask, data1)));
- m128 data2 = load128(buf + 16);
- u32 z2 = movemask128(eq128(chars, and128(casemask, data2)));
- u32 z = z1 | (z2 << VERM_BOUNDARY);
-
- last_res = z;
- last_buf = buf;
- buf += 32;
-
- // now, start the pipeline!
- assert((size_t)buf % 16 == 0);
- for (; buf + 31 < buf_end; buf += 32) {
- // scan more data
- data1 = load128(buf);
- z1 = movemask128(eq128(chars, and128(casemask, data1)));
- data2 = load128(buf + 16);
- z2 = movemask128(eq128(chars, and128(casemask, data2)));
- z = z1 | (z2 << 16);
-
- // do a comparison on previous result
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- last_res = z;
- last_buf = buf;
- }
-
- // epilogue: compare final results
- ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
- (last_buf, last_res
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
-
- // if we still have some data left, scan it too
- if (buf + 15 < buf_end) {
- return JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars, buf, buf_end, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- }
- assert(buf <= buf_end && buf >= buf_end - 16);
-
- return NULL;
-}
-
-const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase,
- const u8 *buf,
- const u8 *buf_end,
- const u8 run_len
-#ifdef MULTIACCEL_DOUBLE
- , const u8 run_len2
-#endif
- ) {
- DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n",
- nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
- assert(buf < buf_end);
-
- const u8 *ptr;
-
- // Handle small scans.
- if (buf_end - buf < VERM_BOUNDARY) {
- for (; buf < buf_end; buf++) {
- char cur = (char)*buf;
- if (nocase) {
- cur &= CASE_CLEAR;
- }
- if (cur == c) {
- break;
- }
- }
- return buf;
- }
-
- VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
-
- uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
-
- if (min) {
- ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
- buf, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- ) : JOIN(MATCH_ALGO, vermUnalign)(chars,
- buf, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- buf += VERM_BOUNDARY - min;
- }
-
- // if we have enough data, run bigger pipeline; otherwise run smaller one
- if (buf_end - buf >= 128) {
- ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline32Nocase)(chars,
- buf, buf_end, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- ) : JOIN(MATCH_ALGO, vermPipeline32)(chars,
- buf, buf_end, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- } else if (buf_end - buf >= 16){
- ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars,
- buf, buf_end, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- ) : JOIN(MATCH_ALGO, vermPipeline16)(chars,
- buf, buf_end, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
- if (unlikely(ptr)) {
- return ptr;
- }
- }
-
- // final unaligned scan
- ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars,
- buf_end - VERM_BOUNDARY, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- ) : JOIN(MATCH_ALGO, vermUnalign)(chars,
- buf_end - VERM_BOUNDARY, run_len
-#ifdef MULTIACCEL_DOUBLE
- , run_len2
-#endif
- );
-
- // run our pipeline
- return ptr ? ptr : buf_end;
-}
#include "util/simd_utils.h"
#include "util/unaligned.h"
-#include "shufti_common.h"
+#ifdef DEBUG
+#include <ctype.h>
+
+#define DUMP_MSK(_t) \
+static UNUSED \
+void dumpMsk##_t(m##_t msk) { \
+ u8 * mskAsU8 = (u8 *)&msk; \
+ for (unsigned i = 0; i < sizeof(msk); i++) { \
+ u8 c = mskAsU8[i]; \
+ for (int j = 0; j < 8; j++) { \
+ if ((c >> (7-j)) & 0x1) \
+ printf("1"); \
+ else \
+ printf("0"); \
+ } \
+ printf(" "); \
+ } \
+} \
+static UNUSED \
+void dumpMsk##_t##AsChars(m##_t msk) { \
+ u8 * mskAsU8 = (u8 *)&msk; \
+ for (unsigned i = 0; i < sizeof(msk); i++) { \
+ u8 c = mskAsU8[i]; \
+ if (isprint(c)) \
+ printf("%c",c); \
+ else \
+ printf("."); \
+ } \
+}
+
+#endif
+
+/** \brief Naive byte-by-byte implementation. */
+static really_inline
+const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf,
+ const u8 *buf_end) {
+ assert(buf < buf_end);
+
+ for (; buf < buf_end; ++buf) {
+ u8 c = *buf;
+ if (lo[c & 0xf] & hi[c >> 4]) {
+ break;
+ }
+ }
+ return buf;
+}
/** \brief Naive byte-by-byte implementation. */
static really_inline
#if !defined(HAVE_AVX2)
/* Normal SSSE3 shufti */
+#ifdef DEBUG
+DUMP_MSK(128)
+#endif
+
+#define GET_LO_4(chars) and128(chars, low4bits)
+#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
+
+static really_inline
+u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits,
+ const m128 compare) {
+ m128 c_lo = pshufb(mask_lo, GET_LO_4(chars));
+ m128 c_hi = pshufb(mask_hi, GET_HI_4(chars));
+ m128 t = and128(c_lo, c_hi);
+
+#ifdef DEBUG
+ DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n");
+ DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n");
+ DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n");
+ DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n");
+ DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n");
+#endif
+ return movemask128(eq128(t, compare));
+}
+
static really_inline
const u8 *firstMatch(const u8 *buf, u32 z) {
if (unlikely(z != 0xffff)) {
#else // AVX2 - 256 wide shuftis
+#ifdef DEBUG
+DUMP_MSK(256)
+#endif
+
+#define GET_LO_4(chars) and256(chars, low4bits)
+#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4)
+
+static really_inline
+u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits,
+ const m256 compare) {
+ m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars));
+ m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars));
+ m256 t = and256(c_lo, c_hi);
+
+#ifdef DEBUG
+ DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n");
+ DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n");
+ DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n");
+ DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n");
+ DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n");
+#endif
+
+ return movemask256(eq256(t, compare));
+}
+
static really_inline
const u8 *firstMatch(const u8 *buf, u32 z) {
if (unlikely(z != 0xffffffff)) {
+++ /dev/null
-/*
- * Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef SHUFTI_COMMON_H_
-#define SHUFTI_COMMON_H_
-
-#include "ue2common.h"
-
-#include "util/arch.h"
-#include "util/bitutils.h"
-#include "util/simd_utils.h"
-#include "util/unaligned.h"
-
-/*
- * Common stuff for all versions of shufti (single, multi and multidouble)
- */
-
-/** \brief Naive byte-by-byte implementation. */
-static really_inline
-const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf,
- const u8 *buf_end) {
- assert(buf < buf_end);
-
- for (; buf < buf_end; ++buf) {
- u8 c = *buf;
- if (lo[c & 0xf] & hi[c >> 4]) {
- break;
- }
- }
- return buf;
-}
-
-#ifdef DEBUG
-#include <ctype.h>
-
-#define DUMP_MSK(_t) \
-static UNUSED \
-void dumpMsk##_t(m##_t msk) { \
- u8 * mskAsU8 = (u8 *)&msk; \
- for (unsigned i = 0; i < sizeof(msk); i++) { \
- u8 c = mskAsU8[i]; \
- for (int j = 0; j < 8; j++) { \
- if ((c >> (7-j)) & 0x1) \
- printf("1"); \
- else \
- printf("0"); \
- } \
- printf(" "); \
- } \
-} \
-static UNUSED \
-void dumpMsk##_t##AsChars(m##_t msk) { \
- u8 * mskAsU8 = (u8 *)&msk; \
- for (unsigned i = 0; i < sizeof(msk); i++) { \
- u8 c = mskAsU8[i]; \
- if (isprint(c)) \
- printf("%c",c); \
- else \
- printf("."); \
- } \
-}
-
-#endif
-
-#if !defined(HAVE_AVX2)
-
-#ifdef DEBUG
-DUMP_MSK(128)
-#endif
-
-#define GET_LO_4(chars) and128(chars, low4bits)
-#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
-
-static really_inline
-u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits,
- const m128 compare) {
- m128 c_lo = pshufb(mask_lo, GET_LO_4(chars));
- m128 c_hi = pshufb(mask_hi, GET_HI_4(chars));
- m128 t = and128(c_lo, c_hi);
-
-#ifdef DEBUG
- DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n");
- DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n");
- DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n");
- DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n");
- DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n");
-#endif
- return movemask128(eq128(t, compare));
-}
-
-#else
-
-#ifdef DEBUG
-DUMP_MSK(256)
-#endif
-
-#define GET_LO_4(chars) and256(chars, low4bits)
-#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4)
-
-static really_inline
-u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits,
- const m256 compare) {
- m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars));
- m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars));
- m256 t = and256(c_lo, c_hi);
-
-#ifdef DEBUG
- DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n");
- DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n");
- DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n");
- DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n");
- DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n");
-#endif
-
- return movemask256(eq256(t, compare));
-}
-
-#endif
-
-
-#endif /* SHUFTI_COMMON_H_ */
#include "util/bitutils.h"
#include "util/simd_utils.h"
-#include "truffle_common.h"
-
#if !defined(HAVE_AVX2)
static really_inline
return NULL; // no match
}
+static really_inline
+const u8 *firstMatch(const u8 *buf, u32 z) {
+ if (unlikely(z != 0xffff)) {
+ u32 pos = ctz32(~z & 0xffff);
+ assert(pos < 16);
+ return buf + pos;
+ }
+
+ return NULL; // no match
+}
+
+static really_inline
+u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) {
+
+ m128 highconst = _mm_set1_epi8(0x80);
+ m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201);
+
+ // and now do the real work
+ m128 shuf1 = pshufb(shuf_mask_lo_highclear, v);
+ m128 t1 = xor128(v, highconst);
+ m128 shuf2 = pshufb(shuf_mask_lo_highset, t1);
+ m128 t2 = andnot128(highconst, rshift64_m128(v, 4));
+ m128 shuf3 = pshufb(shuf_mask_hi, t2);
+ m128 tmp = and128(or128(shuf1, shuf2), shuf3);
+ m128 tmp2 = eq128(tmp, zeroes128());
+ u32 z = movemask128(tmp2);
+
+ return z;
+}
+
+static
+const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
+ const u8 *buf, const u8 *buf_end) {
+ uintptr_t len = buf_end - buf;
+ assert(len < 16);
+
+ m128 chars = zeroes128();
+ memcpy(&chars, buf, len);
+
+ u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
+ // can't be these bytes in z
+ u32 mask = (0xffff >> (16 - len)) ^ 0xffff;
+ const u8 *rv = firstMatch(buf, z | mask);
+
+ if (rv) {
+ return rv;
+ } else {
+ return buf_end;
+ }
+}
+
static really_inline
const u8 *fwdBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
m128 v, const u8 *buf) {
m128 chars = zeroes128();
memcpy(&chars, buf, len);
- u32 mask = (0xFFFF >> (16 - len)) ^ 0xFFFF;
+ u32 mask = (0xffff >> (16 - len)) ^ 0xffff;
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
const u8 *rv = lastMatch(buf, z | mask);
#else
+// AVX2
+
static really_inline
const u8 *lastMatch(const u8 *buf, u32 z) {
if (unlikely(z != 0xffffffff)) {
return NULL; // no match
}
+static really_inline
+const u8 *firstMatch(const u8 *buf, u32 z) {
+ if (unlikely(z != 0xffffffff)) {
+ u32 pos = ctz32(~z);
+ assert(pos < 32);
+ return buf + pos;
+ }
+
+ return NULL; // no match
+}
+
+static really_inline
+u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) {
+
+ m256 highconst = _mm256_set1_epi8(0x80);
+ m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201);
+
+ // and now do the real work
+ m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v);
+ m256 t1 = xor256(v, highconst);
+ m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1);
+ m256 t2 = andnot256(highconst, rshift64_m256(v, 4));
+ m256 shuf3 = vpshufb(shuf_mask_hi, t2);
+ m256 tmp = and256(or256(shuf1, shuf2), shuf3);
+ m256 tmp2 = eq256(tmp, zeroes256());
+ u32 z = movemask256(tmp2);
+
+ return z;
+}
+
+static
+const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset,
+ const u8 *buf, const u8 *buf_end) {
+ uintptr_t len = buf_end - buf;
+ assert(len < 32);
+
+ m256 chars = zeroes256();
+ memcpy(&chars, buf, len);
+
+ u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
+ // can't be these bytes in z
+ u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff;
+ const u8 *rv = firstMatch(buf, z | mask);
+
+ if (rv) {
+ return rv;
+ } else {
+ return buf_end;
+ }
+}
+
static really_inline
const u8 *fwdBlock(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset,
m256 v, const u8 *buf) {
m256 chars = zeroes256();
memcpy(&chars, buf, len);
- u32 mask = (0xFFFFFFFF >> (32 - len)) ^ 0xFFFFFFFF;
+ u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff;
u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
const u8 *rv = lastMatch(buf, z | mask);
+++ /dev/null
-/*
- * Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TRUFFLE_COMMON_H_
-#define TRUFFLE_COMMON_H_
-
-#include "util/arch.h"
-#include "util/bitutils.h"
-#include "util/simd_utils.h"
-
-/*
- * Common stuff for all versions of truffle (single, multi and multidouble)
- */
-#if !defined(HAVE_AVX2)
-
-static really_inline
-const u8 *firstMatch(const u8 *buf, u32 z) {
- if (unlikely(z != 0xffff)) {
- u32 pos = ctz32(~z & 0xffff);
- assert(pos < 16);
- return buf + pos;
- }
-
- return NULL; // no match
-}
-
-static really_inline
-u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) {
-
- m128 highconst = _mm_set1_epi8(0x80);
- m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201);
-
- // and now do the real work
- m128 shuf1 = pshufb(shuf_mask_lo_highclear, v);
- m128 t1 = xor128(v, highconst);
- m128 shuf2 = pshufb(shuf_mask_lo_highset, t1);
- m128 t2 = andnot128(highconst, rshift64_m128(v, 4));
- m128 shuf3 = pshufb(shuf_mask_hi, t2);
- m128 tmp = and128(or128(shuf1, shuf2), shuf3);
- m128 tmp2 = eq128(tmp, zeroes128());
- u32 z = movemask128(tmp2);
-
- return z;
-}
-
-static
-const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
- const u8 *buf, const u8 *buf_end) {
- uintptr_t len = buf_end - buf;
- assert(len < 16);
-
- m128 chars = zeroes128();
- memcpy(&chars, buf, len);
-
- u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
- // can't be these bytes in z
- u32 mask = (0xFFFF >> (16 - len)) ^ 0xFFFF;
- const u8 *rv = firstMatch(buf, z| mask);
-
- if (rv) {
- return rv;
- } else {
- return buf_end;
- }
-}
-
-#else
-
-static really_inline
-const u8 *firstMatch(const u8 *buf, u32 z) {
- if (unlikely(z != 0xffffffff)) {
- u32 pos = ctz32(~z);
- assert(pos < 32);
- return buf + pos;
- }
-
- return NULL; // no match
-}
-
-static really_inline
-u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) {
-
- m256 highconst = _mm256_set1_epi8(0x80);
- m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201);
-
- // and now do the real work
- m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v);
- m256 t1 = xor256(v, highconst);
- m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1);
- m256 t2 = andnot256(highconst, rshift64_m256(v, 4));
- m256 shuf3 = vpshufb(shuf_mask_hi, t2);
- m256 tmp = and256(or256(shuf1, shuf2), shuf3);
- m256 tmp2 = eq256(tmp, zeroes256());
- u32 z = movemask256(tmp2);
-
- return z;
-}
-
-static
-const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset,
- const u8 *buf, const u8 *buf_end) {
- uintptr_t len = buf_end - buf;
- assert(len < 32);
-
- m256 chars = zeroes256();
- memcpy(&chars, buf, len);
-
- u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
- // can't be these bytes in z
- u32 mask = (0xFFFFFFFF >> (32 - len)) ^ 0xFFFFFFFF;
- const u8 *rv = firstMatch(buf, z | mask);
-
- if (rv) {
- return rv;
- } else {
- return buf_end;
- }
-}
-
-#endif
-
-#endif /* TRUFFLE_COMMON_H_ */
#include "ue2common.h"
#include "nfa/accel.h"
-#include "nfa/multiaccel_compilehelper.h"
#include "util/bitutils.h" // for CASE_CLEAR
#include "util/charreach.h"
return g.startDs;
}
-static
-NFAVertex find_next(const NFAVertex v, const NGHolder &g) {
- NFAVertex res = NGHolder::null_vertex();
- for (NFAVertex u : adjacent_vertices_range(v, g)) {
- if (u != v) {
- res = u;
- break;
- }
- }
- return res;
-}
-
-/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */
-MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
- const vector<NFAVertex> &states,
- const CompileContext &cc) {
- // For a set of states to be accelerable, we basically have to have only
- // one state to accelerate.
- if (states.size() != 1) {
- DEBUG_PRINTF("can't accelerate multiple states\n");
- return MultibyteAccelInfo();
- }
-
- // Get our base vertex
- NFAVertex v = states[0];
-
- // We need the base vertex to be a self-looping dotall leading to exactly
- // one vertex.
- if (!hasSelfLoop(v, g)) {
- DEBUG_PRINTF("base vertex has self-loop\n");
- return MultibyteAccelInfo();
- }
-
- if (!g[v].char_reach.all()) {
- DEBUG_PRINTF("can't accelerate anything but dot\n");
- return MultibyteAccelInfo();
- }
-
- if (proper_out_degree(v, g) != 1) {
- DEBUG_PRINTF("can't accelerate states with multiple successors\n");
- return MultibyteAccelInfo();
- }
-
- // find our start vertex
- NFAVertex cur = find_next(v, g);
- if (cur == NGHolder::null_vertex()) {
- DEBUG_PRINTF("invalid start vertex\n");
- return MultibyteAccelInfo();
- }
-
- bool has_offset = false;
- u32 offset = 0;
- CharReach cr = g[cur].char_reach;
-
- // if we start with a dot, we have an offset, so defer figuring out the
- // real CharReach for this accel scheme
- if (cr == CharReach::dot()) {
- has_offset = true;
- offset = 1;
- }
-
- // figure out our offset
- while (has_offset) {
- // vertices have to have no self loops
- if (hasSelfLoop(cur, g)) {
- DEBUG_PRINTF("can't have self-loops\n");
- return MultibyteAccelInfo();
- }
-
- // we have to have exactly 1 successor to have this acceleration scheme
- if (out_degree(cur, g) != 1) {
- DEBUG_PRINTF("can't have multiple successors\n");
- return MultibyteAccelInfo();
- }
-
- cur = *adjacent_vertices(cur, g).first;
-
- // if we met a special vertex, bail out
- if (is_special(cur, g)) {
- DEBUG_PRINTF("can't have special vertices\n");
- return MultibyteAccelInfo();
- }
-
- // now, get the real char reach
- if (g[cur].char_reach != CharReach::dot()) {
- cr = g[cur].char_reach;
- has_offset = false;
- } else {
- offset++;
- }
- }
-
- // now, fire up the compilation machinery
- target_t ti = cc.target_info;
- unsigned max_len = ti.has_avx2() ? MULTIACCEL_MAX_LEN_AVX2 : MULTIACCEL_MAX_LEN_SSE;
- MultiaccelCompileHelper mac(cr, offset, max_len);
-
- while (mac.canAdvance()) {
- // vertices have to have no self loops
- if (hasSelfLoop(cur, g)) {
- break;
- }
-
- // we have to have exactly 1 successor to have this acceleration scheme
- if (out_degree(cur, g) != 1) {
- break;
- }
-
- cur = *adjacent_vertices(cur, g).first;
-
- // if we met a special vertex, bail out
- if (is_special(cur, g)) {
- break;
- }
-
- mac.advance(g[cur].char_reach);
- }
- MultibyteAccelInfo mai = mac.getBestScheme();
-#ifdef DEBUG
- DEBUG_PRINTF("Multibyte acceleration scheme: type: %u offset: %u lengths: %u,%u\n",
- mai.type, mai.offset, mai.len1, mai.len2);
- for (size_t c = mai.cr.find_first(); c != CharReach::npos; c = mai.cr.find_next(c)) {
- DEBUG_PRINTF("multibyte accel char: %zu\n", c);
- }
-#endif
- return mai;
-}
-
/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
const vector<CharReach> &refined_cr,
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#define MAX_MERGED_ACCEL_STOPS 200
#define ACCEL_MAX_STOP_CHAR 24
#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
-#define MULTIACCEL_MIN_LEN 3
-#define MULTIACCEL_MAX_LEN_SSE 15
-#define MULTIACCEL_MAX_LEN_AVX2 31
// forward-declaration of CompileContext
struct CompileContext;
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
AccelScheme *as, bool allow_wide);
-/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA).
- */
-MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g,
- const std::vector<NFAVertex> &verts,
- const CompileContext &cc);
} // namespace ue2
internal/limex_nfa.cpp
internal/masked_move.cpp
internal/multi_bit.cpp
- internal/multiaccel_matcher.cpp
- internal/multiaccel_shift.cpp
internal/nfagraph_common.h
internal/nfagraph_comp.cpp
internal/nfagraph_equivalence.cpp
+++ /dev/null
-/*
- * Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-extern "C" {
-#include "nfa/accel.h" // wrapping in extern C to make sure run_accel works
-}
-
-#include "config.h"
-#include "src/ue2common.h"
-
-#include "gtest/gtest.h"
-#include "nfagraph/ng_limex_accel.h"
-#include "nfa/accelcompile.h"
-#include "nfa/multivermicelli.h"
-#include "nfa/multishufti.h"
-#include "nfa/multitruffle.h"
-#include "util/alloc.h"
-#include "util/charreach.h"
-
-#include <algorithm>
-#include <iostream>
-#include <random>
-#include <string>
-#include <vector>
-
-using namespace ue2;
-using namespace std;
-using namespace testing;
-
-// test parameters structure
-struct MultiaccelTestParam {
- string match_pattern;
- u32 match_pattern_start_idx;
- u32 match_idx;
- bool test_all_offsets;
- u8 match_len1;
- u8 match_len2;
- MultibyteAccelInfo::multiaccel_type type;
-};
-
-// buffer size is constant
-static const u32 BUF_SIZE = 200;
-
-// strings, out of which CharReach will be generated
-static const string VERM_CR = "a";
-static const string V_NC_CR = "aA";
-static const string SHUF_CR = "abcdefghijklmnopqrstuvwxyz";
-static const string TRUF_CR = "\x11\x22\x33\x44\x55\x66\x77\x88\x99";
-
-// Parameterized test case for multiaccel patterns.
-class MultiaccelTest : public TestWithParam<MultiaccelTestParam> {
-protected:
- virtual void SetUp() {
- // set up is deferred until the actual test, since we can't compile
- // any accel schemes unless we know CharReach
- const MultiaccelTestParam &p = GetParam();
-
- // reserve space in our buffer
- buffer = (u8 *)aligned_zmalloc(BUF_SIZE);
-
- // store the index where we expect to see the match. note that it may
- // be different from where the match pattern has started since we may
- // have a flooded match (i.e. a match preceded by almost-match) or a
- // no-match (in which case "match" index is at the end of the buffer).
- match_idx = p.match_idx;
-
- // make note if we need to test all offsets - sometimes we don't, for
- // example when testing partial or no-match.
- test_all_offsets = p.test_all_offsets;
- }
-
- char getChar(const CharReach &cr) {
- assert(cr.count() > 0);
- auto dist = uniform_int_distribution<size_t>(0, cr.count() - 1);
- size_t result = cr.find_nth(dist(prng));
- assert(result != CharReach::npos);
- return (char)result;
- }
-
- // char generator
- char getChar(const CharReach &cr, bool match) {
- return getChar(match ? cr : ~cr);
- }
-
- // appends a string with matches/unmatches according to input match pattern
- void getMatch(u8 *result, u32 start, const string &pattern,
- const CharReach &cr) {
- for (const auto &c : pattern) {
- result[start++] = getChar(cr, c == '1');
- }
- }
-
- // appends non-matching noise of certain lengths
- void getNoise(u8 *result, u32 start, u32 len, const CharReach &cr) {
- for (unsigned i = 0; i < len; i++) {
- result[start + i] = getChar(cr, false);
- }
- }
-
- // deferred buffer generation, as we don't know CharReach before we run the test
- void GenerateBuffer(const CharReach &cr) {
- const MultiaccelTestParam &p = GetParam();
-
- // step 1: fill prefix with non-matching noise
- u32 start = 0;
- getNoise(buffer, start, p.match_pattern_start_idx, cr);
-
- // step 2: add a match
- start += p.match_pattern_start_idx;
- getMatch(buffer, start, p.match_pattern, cr);
-
- // step 3: fill in the rest of the buffer with non-matching noise
- start += p.match_pattern.size();
- getNoise(buffer, start, BUF_SIZE - p.match_pattern.size() -
- p.match_pattern_start_idx, cr);
- }
-
- // deferred accel scheme generation, as we don't know CharReach before we run the test
- void CompileAccelScheme(const CharReach &cr, AccelAux *aux) {
- const MultiaccelTestParam &p = GetParam();
-
- AccelInfo ai;
- ai.single_stops = cr; // dummy CharReach to prevent red tape accel
- ai.ma_len1 = p.match_len1;
- ai.ma_len2 = p.match_len2;
- ai.multiaccel_stops = cr;
- ai.ma_type = p.type;
-
- buildAccelAux(ai, aux);
-
- // now, verify we've successfully built our accel scheme, *and* that it's
- // a multibyte scheme
- ASSERT_TRUE(aux->accel_type >= ACCEL_MLVERM &&
- aux->accel_type <= ACCEL_MDSGTRUFFLE);
- }
-
- virtual void TearDown() {
- aligned_free(buffer);
- }
-
- // We want our tests to be deterministic, so we use a PRNG in the test
- // fixture.
- mt19937 prng;
-
- u32 match_idx;
- u8 *buffer;
- bool test_all_offsets;
-};
-
-static
-void runTest(const u8 *buffer, AccelAux *aux, unsigned match_idx,
- bool test_all_offsets) {
- const u8 *start = buffer;
- const u8 *end = start + BUF_SIZE;
- const u8 *match = start + match_idx;
-
- // comparing indexes into the buffer is easier to understand than pointers
- if (test_all_offsets) {
- // run_accel can only scan >15 byte buffers
- u32 end_offset = min(match_idx, BUF_SIZE - 15);
-
- for (unsigned offset = 0; offset < end_offset; offset++) {
- const u8 *ptr = run_accel(aux, (start + offset), end);
- unsigned idx = ptr - start;
- ASSERT_EQ(match_idx, idx);
- }
- } else {
- const u8 *ptr = run_accel(aux, start, end);
- unsigned idx = ptr - start;
- ASSERT_EQ(match_idx, idx);
- }
-}
-
-TEST_P(MultiaccelTest, TestVermicelli) {
- AccelAux aux = {0};
- CharReach cr(VERM_CR);
-
- GenerateBuffer(cr);
-
- CompileAccelScheme(cr, &aux);
-
- runTest(buffer, &aux, match_idx, test_all_offsets);
-}
-
-TEST_P(MultiaccelTest, TestVermicelliNocase) {
- AccelAux aux = {0};
- CharReach cr(V_NC_CR);
-
- GenerateBuffer(cr);
-
- CompileAccelScheme(cr, &aux);
-
- runTest(buffer, &aux, match_idx, test_all_offsets);
-}
-
-TEST_P(MultiaccelTest, TestShufti) {
- AccelAux aux = {0};
- CharReach cr(SHUF_CR);
-
- GenerateBuffer(cr);
-
- CompileAccelScheme(cr, &aux);
-
- runTest(buffer, &aux, match_idx, test_all_offsets);
-}
-
-TEST_P(MultiaccelTest, TestTruffle) {
- AccelAux aux = {0};
- CharReach cr(TRUF_CR);
-
- GenerateBuffer(cr);
-
- CompileAccelScheme(cr, &aux);
-
- runTest(buffer, &aux, match_idx, test_all_offsets);
-}
-
-static const MultiaccelTestParam multiaccelTests[] = {
- // long matcher
-
- // full, partial, flooded, nomatch
- {"11111", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONG},
- {"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONG},
- {"1111011111", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONG},
- {"1111011110", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONG},
-
- // long-grab matcher
-
- // full, partial, flooded, nomatch
- {"111110", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
- {"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
- {"11111111110", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
- {"11110111101", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB},
-
- // shift matcher
-
- // full, partial, flooded, nomatch
- {"11001", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
- {"110", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
- {"1001011001", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
- {"1101001011", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT},
-
- // shift-grab matcher
-
- // full, partial, flooded, nomatch
- {"10111", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
- {"101", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
- {"1110010111", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
- {"1100101100", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB},
-
- // doubleshift matcher
-
- // full, partial (one and two shifts), flooded, nomatch
- {"110111", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
- {"110", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
- {"1101", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
- {"1100100101", 178, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
- {"1101001101", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT},
-
- // doubleshift-grab matcher
-
- // full, partial (one and two shifts), flooded, nomatch
- {"100101", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
- {"100", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
- {"1011", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
- {"11111101101", 177, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
- {"1111110111", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB},
-};
-
-INSTANTIATE_TEST_CASE_P(Multiaccel, MultiaccelTest, ValuesIn(multiaccelTests));
-
-// boring stuff for google test
-void PrintTo(const MultiaccelTestParam &p, ::std::ostream *os) {
- *os << "MultiaccelTestParam: " << p.match_pattern;
-}
+++ /dev/null
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "src/ue2common.h"
-
-#include "gtest/gtest.h"
-#include "nfa/multiaccel_common.h"
-
-/*
- * Unit tests for the shifters.
- *
- * This is a bit messy, as shifters are macros, so we're using macros to test
- * other macros.
- */
-
-#define TEST_SHIFT(n) \
- do { \
- u64a val = ((u64a) 1 << n) - 1; \
- JOIN(SHIFT, n)(val); \
- ASSERT_EQ(val, 1); \
- } while (0)
-
-TEST(MultiaccelShift, StaticShift) {
- TEST_SHIFT(1);
- TEST_SHIFT(2);
- TEST_SHIFT(3);
- TEST_SHIFT(4);
- TEST_SHIFT(5);
- TEST_SHIFT(6);
- TEST_SHIFT(7);
- TEST_SHIFT(8);
- TEST_SHIFT(10);
- TEST_SHIFT(11);
- TEST_SHIFT(12);
- TEST_SHIFT(13);
- TEST_SHIFT(14);
- TEST_SHIFT(15);
- TEST_SHIFT(16);
- TEST_SHIFT(17);
- TEST_SHIFT(18);
- TEST_SHIFT(19);
- TEST_SHIFT(20);
- TEST_SHIFT(21);
- TEST_SHIFT(22);
- TEST_SHIFT(23);
- TEST_SHIFT(24);
- TEST_SHIFT(25);
- TEST_SHIFT(26);
- TEST_SHIFT(27);
- TEST_SHIFT(28);
- TEST_SHIFT(29);
- TEST_SHIFT(30);
- TEST_SHIFT(31);
- TEST_SHIFT(32);
-}