From: Ken Steele Date: Mon, 16 Sep 2013 12:57:53 +0000 (-0400) Subject: Move SIMD implementations out of detect.c X-Git-Tag: suricata-2.0beta2~371 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=50f859e9f2bfa012b6836c7381b5473c8e9f909d;p=thirdparty%2Fsuricata.git Move SIMD implementations out of detect.c Move SIMD the implementations of SigMatchSignaturesBuildMatchArray() for SSE3 and Tile out of detect.c to reduce the size of the file. Also moved SIMD unit tests to detect-simd.c --- diff --git a/src/Makefile.am b/src/Makefile.am index c2c46e09d9..2084a1efce 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -65,6 +65,7 @@ detect-byte-extract.c detect-byte-extract.h \ detect-bytejump.c detect-bytejump.h \ detect-bytetest.c detect-bytetest.h \ detect.c detect.h \ +detect-simd.c \ detect-classtype.c detect-classtype.h \ detect-content.c detect-content.h \ detect-csum.c detect-csum.h \ diff --git a/src/detect-simd.c b/src/detect-simd.c new file mode 100644 index 0000000000..0323ba8a02 --- /dev/null +++ b/src/detect-simd.c @@ -0,0 +1,575 @@ +/* Copyright (C) 2013 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Victor Julien + * + * Basic detection engine + */ + +#include "suricata-common.h" +#include "detect.h" + +#include "util-unittest.h" +#include "util-unittest-helper.h" +#include "util-vector.h" + +/* Included into detect.c */ + +#if defined(__SSE3__) + +/** + * \brief SIMD implementation of mask prefiltering. + * + * Mass mask matching is done creating a bitmap of signatures that need + * futher inspection. + * + * On 32 bit systems we inspect in 32 sig batches, creating a u32 with flags. + * On 64 bit systems we inspect in 64 sig batches, creating a u64 with flags. + * The size of a register is leading here. + */ +void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx, + Packet *p, SignatureMask mask, uint16_t alproto) +{ + uint32_t u; + SigIntId x; + int bitno = 0; +#if __WORDSIZE == 32 + register uint32_t bm; /* bit mask, 32 bits used */ + + Vector pm, sm, r1, r2; + /* load the packet mask into each byte of the vector */ + pm.v = _mm_set1_epi8(mask); + + /* reset previous run */ + det_ctx->match_array_cnt = 0; + + for (u = 0; u < det_ctx->sgh->sig_cnt; u += 32) { + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u]); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + /* convert into a bitarray */ + bm = ((uint32_t) _mm_movemask_epi8(r2.v)); + + SCLogDebug("bm1 %08x", bm); + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u+16]); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + /* convert into a bitarray */ + bm |= ((uint32_t) _mm_movemask_epi8(r2.v) << 16); + + SCLogDebug("bm2 %08x", bm); + + if (bm == 0) { + continue; + } + + /* Check each bit in the bit map. Little endian is assumed (SSE is x86), + * so the bits are in memory backwards, 0 is on the right edge, + * 31 on the left edge. This is why above we store the output of the + * _mm_movemask_epi8 in this order as well */ + bitno = 0; + for (x = u; x < det_ctx->sgh->sig_cnt && bitno < 32; x++, bitno++) { + if (bm & (1 << bitno)) { + SignatureHeader *s = &det_ctx->sgh->head_array[x]; + + if (SigMatchSignaturesBuildMatchArrayAddSignature(det_ctx, p, s, alproto) == 1) { + /* okay, store it */ + det_ctx->match_array[det_ctx->match_array_cnt] = s->full_sig; + det_ctx->match_array_cnt++; + } + } + } + } +#elif __WORDSIZE == 64 + register uint64_t bm; /* bit mask, 64 bits used */ + + Vector pm, sm, r1, r2; + /* load the packet mask into each byte of the vector */ + pm.v = _mm_set1_epi8(mask); + + /* reset previous run */ + det_ctx->match_array_cnt = 0; + + for (u = 0; u < det_ctx->sgh->sig_cnt; u += 64) { + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u]); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + /* convert into a bitarray */ + bm = ((uint64_t) _mm_movemask_epi8(r2.v)); + + SCLogDebug("bm1 %08"PRIx64, bm); + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u+16]); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + /* convert into a bitarray */ + bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 16; + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u+32]); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + /* convert into a bitarray */ + bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 32; + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u+48]); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + /* convert into a bitarray */ + bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 48; + + SCLogDebug("bm2 %08"PRIx64, bm); + + if (bm == 0) { + continue; + } + + /* Check each bit in the bit map. Little endian is assumed (SSE is x86-64), + * so the bits are in memory backwards, 0 is on the right edge, + * 63 on the left edge. This is why above we store the output of the + * _mm_movemask_epi8 in this order as well */ + bitno = 0; + for (x = u; x < det_ctx->sgh->sig_cnt && bitno < 64; x++, bitno++) { + if (bm & ((uint64_t)1 << bitno)) { + SignatureHeader *s = &det_ctx->sgh->head_array[x]; + + if (SigMatchSignaturesBuildMatchArrayAddSignature(det_ctx, p, s, alproto) == 1) { + /* okay, store it */ + det_ctx->match_array[det_ctx->match_array_cnt] = s->full_sig; + det_ctx->match_array_cnt++; + } + } + } + } +#else +#error Wordsize (__WORDSIZE) neither 32 or 64. +#endif +} + /* end defined(__SSE3__) */ +#elif defined(__tile__) + +/** + * \brief SIMD implementation of mask prefiltering for TILE-Gx + * + * Mass mask matching is done creating a bitmap of signatures that need + * futher inspection. + */ +void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx, + Packet *p, SignatureMask mask, uint16_t alproto) +{ + uint32_t u; + register uint64_t bm; /* bit mask, 64 bits used */ + + /* Keep local copies of variables that don't change during this function. */ + uint64_t *mask_vector = (uint64_t*)det_ctx->sgh->mask_array; + uint32_t sig_cnt = det_ctx->sgh->sig_cnt; + SignatureHeader *head_array = det_ctx->sgh->head_array; + + Signature **match_array = det_ctx->match_array; + uint32_t match_count = 0; + + /* Replicate the packet mask into each byte of the vector. */ + uint64_t pm = __insn_shufflebytes(mask, 0, 0); + + /* u is the signature index. */ + for (u = 0; u < sig_cnt; u += 8) { + /* Load 8 masks */ + uint64_t sm = *mask_vector++; + /* Binary AND 8 masks with the packet's mask */ + uint64_t r1 = pm & sm; + /* Compare the result with the original mask + * Result if equal puts a 1 in LSB of bytes that match. + */ + bm = __insn_v1cmpeq(sm, r1); + + /* Check the LSB bit of each byte in the bit map. Little endian is assumed, + * so the LSB byte is index 0. Uses count trailing zeros to find least + * significant bit that is set. */ + while (bm) { + /* Find first bit set starting from LSB. */ + unsigned int first_bit = __insn_ctz(bm); + unsigned int first_byte = first_bit >> 3; + unsigned int x = u + first_byte; + if (x >= sig_cnt) + break; + SignatureHeader *s = &head_array[x]; + + /* Clear the first bit set, so it is not found again. */ + bm -= (1UL << first_bit); + + if (SigMatchSignaturesBuildMatchArrayAddSignature(det_ctx, p, s, alproto) == 1) { + /* okay, store it */ + *match_array++ = s->full_sig; + match_count++; + } + } + } + det_ctx->match_array_cnt = match_count; +} +#endif /* defined(__tile__) */ + + +#ifdef UNITTESTS +#include "flow-util.h" +#include "stream-tcp-reassemble.h" +#include "util-var-name.h" + +/// SCLogInfo("%s %u %u %u %u", #v, (v).dw[0], (v).dw[1], (v).dw[2], (v).dw[3]); +#define VECTOR_SCLogInfo(v) { \ + SCLogInfo("%s %08X %08X %08X %08X", #v, (v).dw[0], (v).dw[1], (v).dw[2], (v).dw[3]); \ +} + +/** + * \test Test 32 bit SIMD code. + */ +static int SigTestSIMDMask01(void) +{ +#if defined (__SSE3__) + Vector pm, sm, r1, r2; + uint32_t bm = 0; + + uint8_t *mask = SCMallocAligned(32, 16); + memset(mask, 0xEF, 32); + mask[31] = 0xFF; + printf("\n"); + pm.v = _mm_set1_epi8(0xEF); + VECTOR_SCLogInfo(pm); + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&mask[0]); + VECTOR_SCLogInfo(sm); + + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + VECTOR_SCLogInfo(r1); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + VECTOR_SCLogInfo(r2); + /* convert into a bitarray */ + bm = ((uint32_t) _mm_movemask_epi8(r2.v)); + + SCLogInfo("bm %08x", bm); + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&mask[16]); + VECTOR_SCLogInfo(sm); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + VECTOR_SCLogInfo(r1); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + VECTOR_SCLogInfo(r2); + /* convert into a bitarray */ + bm |= ((uint32_t) _mm_movemask_epi8(r2.v)) << 16; + + SCLogInfo("bm %08x", bm); + + int b = 0; + for ( ; b < 32; b++){ + if (bm & (1 << b)) { + SCLogInfo("b %02d, set", b); + } else { + SCLogInfo("b %02d, not set", b); + + } + } + + if (!(bm & (1 << 31))) { + return 1; + } + return 0; +#else + return 1; +#endif +} + +/** + * \test Test 32 bit SIMD code. + */ +static int SigTestSIMDMask02(void) +{ +#if defined (__SSE3__) + Vector pm, sm, r1, r2; + uint32_t bm = 0; + + uint8_t *mask = SCMallocAligned(32, 16); + memset(mask, 0x01, 32); + mask[31] = 0; + pm.v = _mm_set1_epi8(0x02); + VECTOR_SCLogInfo(pm); + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&mask[0]); + VECTOR_SCLogInfo(sm); + + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + VECTOR_SCLogInfo(r1); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + VECTOR_SCLogInfo(r2); + /* convert into a bitarray */ + bm = ((uint32_t) _mm_movemask_epi8(r2.v)); + + SCLogInfo("bm %08x", bm); + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&mask[16]); + VECTOR_SCLogInfo(sm); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + VECTOR_SCLogInfo(r1); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + VECTOR_SCLogInfo(r2); + /* convert into a bitarray */ + bm |= ((uint32_t) _mm_movemask_epi8(r2.v)) << 16; + + SCLogInfo("bm %08x", bm); + + int b = 0; + for ( ; b < 32; b++){ + if (bm & (1 << b)) { + SCLogInfo("b %02d, set", b); + } else { + SCLogInfo("b %02d, not set", b); + + } + } + + if (bm & (1 << 31)) { + return 1; + } + return 0; +#else + return 1; +#endif +} + +/** + * \test Test 64 bit SIMD code. + */ +static int SigTestSIMDMask03(void) +{ +#if defined (__SSE3__) + Vector pm, sm, r1, r2; + uint64_t bm = 0; + uint8_t *mask = SCMallocAligned(64, 16); + memset(mask, 0xEF, 64); + mask[31] = 0xFF; + mask[62] = 0xFF; + printf("\n"); + pm.v = _mm_set1_epi8(0xEF); + VECTOR_SCLogInfo(pm); + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&mask[0]); + VECTOR_SCLogInfo(sm); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + VECTOR_SCLogInfo(r1); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + VECTOR_SCLogInfo(r2); + /* convert into a bitarray */ + bm = ((uint64_t) _mm_movemask_epi8(r2.v)); + + SCLogInfo("bm1 %"PRIxMAX, (uintmax_t)bm); + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&mask[16]); + VECTOR_SCLogInfo(sm); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + VECTOR_SCLogInfo(r1); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + VECTOR_SCLogInfo(r2); + /* convert into a bitarray */ + bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 16; + + SCLogInfo("bm2 %"PRIxMAX, (uintmax_t)bm); + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&mask[32]); + VECTOR_SCLogInfo(sm); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + VECTOR_SCLogInfo(r1); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + VECTOR_SCLogInfo(r2); + /* convert into a bitarray */ + bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 32; + + SCLogInfo("bm3 %"PRIxMAX, (uintmax_t)bm); + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&mask[48]); + VECTOR_SCLogInfo(sm); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + VECTOR_SCLogInfo(r1); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + VECTOR_SCLogInfo(r2); + /* convert into a bitarray */ + bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 48; + + SCLogInfo("bm4 %"PRIxMAX, (uintmax_t)bm); + + int b = 0; + for ( ; b < 64; b++){ + if (bm & ((uint64_t)1 << b)) { + SCLogInfo("b %02d, set", b); + } else { + SCLogInfo("b %02d, not set", b); + + } + } + + if (!(bm & ((uint64_t)1 << 31)) && !(bm & ((uint64_t)1 << 62))) { + return 1; + } + return 0; +#else + return 1; +#endif +} + +/** + * \test Test 64 bit SIMD code. + */ +static int SigTestSIMDMask04(void) +{ +#if defined (__SSE3__) + Vector pm, sm, r1, r2; + uint64_t bm = 0; + + uint8_t *mask = SCMallocAligned(64, 16); + memset(mask, 0x01, 64); + mask[31] = 0; + mask[62] = 0; + pm.v = _mm_set1_epi8(0x02); + VECTOR_SCLogInfo(pm); + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&mask[0]); + VECTOR_SCLogInfo(sm); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + VECTOR_SCLogInfo(r1); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + VECTOR_SCLogInfo(r2); + /* convert into a bitarray */ + bm = ((uint64_t) _mm_movemask_epi8(r2.v)); + + SCLogInfo("bm1 %"PRIxMAX, (uintmax_t)bm); + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&mask[16]); + VECTOR_SCLogInfo(sm); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + VECTOR_SCLogInfo(r1); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + VECTOR_SCLogInfo(r2); + /* convert into a bitarray */ + bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 16; + + SCLogInfo("bm2 %"PRIxMAX, (uintmax_t)bm); + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&mask[32]); + VECTOR_SCLogInfo(sm); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + VECTOR_SCLogInfo(r1); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + VECTOR_SCLogInfo(r2); + /* convert into a bitarray */ + bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 32; + + SCLogInfo("bm3 %"PRIxMAX, (uintmax_t)bm); + + /* load a batch of masks */ + sm.v = _mm_load_si128((const __m128i *)&mask[48]); + VECTOR_SCLogInfo(sm); + /* logical AND them with the packet's mask */ + r1.v = _mm_and_si128(pm.v, sm.v); + VECTOR_SCLogInfo(r1); + /* compare the result with the original mask */ + r2.v = _mm_cmpeq_epi8(sm.v, r1.v); + VECTOR_SCLogInfo(r2); + /* convert into a bitarray */ + bm |= (((uint64_t) _mm_movemask_epi8(r2.v)) << 48); + + SCLogInfo("bm4-total %"PRIxMAX, (uintmax_t)bm); + + int b = 0; + for ( ; b < 64; b++){ + if (bm & ((uint64_t)1 << b)) { + SCLogInfo("b %02d, set", b); + } else { + SCLogInfo("b %02d, not set", b); + + } + } + + if ((bm & ((uint64_t)1 << 31)) && (bm & ((uint64_t)1 << 62))) { + return 1; + } + return 0; +#else + return 1; +#endif +} +#endif /* UNITTESTS */ + +void DetectSimdRegisterTests(void) +{ +#ifdef UNITTESTS + UtRegisterTest("SigTestSIMDMask01", SigTestSIMDMask01, 1); + UtRegisterTest("SigTestSIMDMask02", SigTestSIMDMask02, 1); + UtRegisterTest("SigTestSIMDMask03", SigTestSIMDMask03, 1); + UtRegisterTest("SigTestSIMDMask04", SigTestSIMDMask04, 1); +#endif /* UNITTESTS */ +} diff --git a/src/detect.c b/src/detect.c index b22d4b49e0..d40e97416c 100644 --- a/src/detect.c +++ b/src/detect.c @@ -188,7 +188,6 @@ #include "util-profiling.h" #include "util-validate.h" #include "util-optimize.h" -#include "util-vector.h" #include "util-path.h" #include "util-mpm-ac.h" @@ -494,8 +493,9 @@ int SigLoadSignatures(DetectEngineCtx *de_ctx, char *sig_file, int sig_file_excl * \retval 0 can't match, don't inspect * \retval 1 might match, further inspection required */ -static inline int SigMatchSignaturesBuildMatchArrayAddSignature(DetectEngineThreadCtx *det_ctx, - Packet *p, SignatureHeader *s, uint16_t alproto) +int SigMatchSignaturesBuildMatchArrayAddSignature(DetectEngineThreadCtx *det_ctx, + Packet *p, SignatureHeader *s, + uint16_t alproto) { /* if the sig has alproto and the session as well they should match */ if (likely(s->flags & SIG_FLAG_APPLAYER)) { @@ -564,218 +564,10 @@ static inline int SigMatchSignaturesBuildMatchArrayAddSignature(DetectEngineThre return 1; } -#if defined(__SSE3__) - -/** - * \brief SIMD implementation of mask prefiltering. - * - * Mass mask matching is done creating a bitmap of signatures that need - * futher inspection. - * - * On 32 bit systems we inspect in 32 sig batches, creating a u32 with flags. - * On 64 bit systems we inspect in 64 sig batches, creating a u64 with flags. - * The size of a register is leading here. - */ -static inline void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx, - Packet *p, SignatureMask mask, uint16_t alproto) -{ - uint32_t u; - SigIntId x; - int bitno = 0; -#if __WORDSIZE == 32 - register uint32_t bm; /* bit mask, 32 bits used */ - - Vector pm, sm, r1, r2; - /* load the packet mask into each byte of the vector */ - pm.v = _mm_set1_epi8(mask); - - /* reset previous run */ - det_ctx->match_array_cnt = 0; - - for (u = 0; u < det_ctx->sgh->sig_cnt; u += 32) { - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u]); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - /* convert into a bitarray */ - bm = ((uint32_t) _mm_movemask_epi8(r2.v)); - - SCLogDebug("bm1 %08x", bm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u+16]); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - /* convert into a bitarray */ - bm |= ((uint32_t) _mm_movemask_epi8(r2.v) << 16); - - SCLogDebug("bm2 %08x", bm); - - if (bm == 0) { - continue; - } - - /* Check each bit in the bit map. Little endian is assumed (SSE is x86), - * so the bits are in memory backwards, 0 is on the right edge, - * 31 on the left edge. This is why above we store the output of the - * _mm_movemask_epi8 in this order as well */ - bitno = 0; - for (x = u; x < det_ctx->sgh->sig_cnt && bitno < 32; x++, bitno++) { - if (bm & (1 << bitno)) { - SignatureHeader *s = &det_ctx->sgh->head_array[x]; - - if (SigMatchSignaturesBuildMatchArrayAddSignature(det_ctx, p, s, alproto) == 1) { - /* okay, store it */ - det_ctx->match_array[det_ctx->match_array_cnt] = s->full_sig; - det_ctx->match_array_cnt++; - } - } - } - } -#elif __WORDSIZE == 64 - register uint64_t bm; /* bit mask, 64 bits used */ - - Vector pm, sm, r1, r2; - /* load the packet mask into each byte of the vector */ - pm.v = _mm_set1_epi8(mask); - - /* reset previous run */ - det_ctx->match_array_cnt = 0; - - for (u = 0; u < det_ctx->sgh->sig_cnt; u += 64) { - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u]); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - /* convert into a bitarray */ - bm = ((uint64_t) _mm_movemask_epi8(r2.v)); - - SCLogDebug("bm1 %08"PRIx64, bm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u+16]); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - /* convert into a bitarray */ - bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 16; - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u+32]); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - /* convert into a bitarray */ - bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 32; - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u+48]); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - /* convert into a bitarray */ - bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 48; - - SCLogDebug("bm2 %08"PRIx64, bm); - - if (bm == 0) { - continue; - } - - /* Check each bit in the bit map. Little endian is assumed (SSE is x86-64), - * so the bits are in memory backwards, 0 is on the right edge, - * 63 on the left edge. This is why above we store the output of the - * _mm_movemask_epi8 in this order as well */ - bitno = 0; - for (x = u; x < det_ctx->sgh->sig_cnt && bitno < 64; x++, bitno++) { - if (bm & ((uint64_t)1 << bitno)) { - SignatureHeader *s = &det_ctx->sgh->head_array[x]; - - if (SigMatchSignaturesBuildMatchArrayAddSignature(det_ctx, p, s, alproto) == 1) { - /* okay, store it */ - det_ctx->match_array[det_ctx->match_array_cnt] = s->full_sig; - det_ctx->match_array_cnt++; - } - } - } - } -#else -#error Wordsize (__WORDSIZE) neither 32 or 64. -#endif -} - /* end defined(__SSE3__) */ -#elif defined(__tile__) - -/** - * \brief SIMD implementation of mask prefiltering for TILE-Gx - * - * Mass mask matching is done creating a bitmap of signatures that need - * futher inspection. - */ -static inline void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx, - Packet *p, SignatureMask mask, uint16_t alproto) -{ - uint32_t u; - register uint64_t bm; /* bit mask, 64 bits used */ - - /* Keep local copies of variables that don't change during this function. */ - uint64_t *mask_vector = (uint64_t*)det_ctx->sgh->mask_array; - uint32_t sig_cnt = det_ctx->sgh->sig_cnt; - SignatureHeader *head_array = det_ctx->sgh->head_array; - - Signature **match_array = det_ctx->match_array; - uint32_t match_count = 0; - - /* Replicate the packet mask into each byte of the vector. */ - uint64_t pm = __insn_shufflebytes(mask, 0, 0); - - /* u is the signature index. */ - for (u = 0; u < sig_cnt; u += 8) { - /* Load 8 masks */ - uint64_t sm = *mask_vector++; - /* Binary AND 8 masks with the packet's mask */ - uint64_t r1 = pm & sm; - /* Compare the result with the original mask - * Result if equal puts a 1 in LSB of bytes that match. - */ - bm = __insn_v1cmpeq(sm, r1); - - /* Check the LSB bit of each byte in the bit map. Little endian is assumed, - * so the LSB byte is index 0. Uses count trailing zeros to find least - * significant bit that is set. */ - while (bm) { - /* Find first bit set starting from LSB. */ - unsigned int first_bit = __insn_ctz(bm); - unsigned int first_byte = first_bit >> 3; - unsigned int x = u + first_byte; - if (x >= sig_cnt) - break; - SignatureHeader *s = &head_array[x]; - - /* Clear the first bit set, so it is not found again. */ - bm -= (1UL << first_bit); - - if (SigMatchSignaturesBuildMatchArrayAddSignature(det_ctx, p, s, alproto) == 1) { - /* okay, store it */ - *match_array++ = s->full_sig; - match_count++; - } - } - } - det_ctx->match_array_cnt = match_count; -} -/* end defined(__tile__) */ +#if defined(__SSE3__) || defined(__tile__) +/* SIMD implementations are in detect-simd.c */ #else -/* No SIMD implementation */ +/* Non-SIMD implementation */ /** * \brief build an array of signatures that will be inspected * @@ -787,9 +579,9 @@ static inline void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ * \param mask Packets mask * \param alproto application layer protocol */ -static inline void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx, - Packet *p, SignatureMask mask, - uint16_t alproto) +void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx, + Packet *p, SignatureMask mask, + uint16_t alproto) { uint32_t u; @@ -4959,7 +4751,8 @@ void SigTableSetup(void) { DetectDnsQueryRegister(); } -void SigTableRegisterTests(void) { +void SigTableRegisterTests(void) +{ /* register the tests */ int i = 0; for (i = 0; i < DETECT_TBLSIZE; i++) { @@ -11262,317 +11055,6 @@ end: return result; } - -/// SCLogInfo("%s %u %u %u %u", #v, (v).dw[0], (v).dw[1], (v).dw[2], (v).dw[3]); -#define VECTOR_SCLogInfo(v) { \ - SCLogInfo("%s %08X %08X %08X %08X", #v, (v).dw[0], (v).dw[1], (v).dw[2], (v).dw[3]); \ -} - -/** - * \test Test 32 bit SIMD code. - */ -int SigTestSIMDMask01(void) { -#if defined (__SSE3__) - Vector pm, sm, r1, r2; - uint32_t bm = 0; - - uint8_t *mask = SCMallocAligned(32, 16); - memset(mask, 0xEF, 32); - mask[31] = 0xFF; - printf("\n"); - pm.v = _mm_set1_epi8(0xEF); - VECTOR_SCLogInfo(pm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&mask[0]); - VECTOR_SCLogInfo(sm); - - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - VECTOR_SCLogInfo(r1); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - VECTOR_SCLogInfo(r2); - /* convert into a bitarray */ - bm = ((uint32_t) _mm_movemask_epi8(r2.v)); - - SCLogInfo("bm %08x", bm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&mask[16]); - VECTOR_SCLogInfo(sm); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - VECTOR_SCLogInfo(r1); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - VECTOR_SCLogInfo(r2); - /* convert into a bitarray */ - bm |= ((uint32_t) _mm_movemask_epi8(r2.v)) << 16; - - SCLogInfo("bm %08x", bm); - - int b = 0; - for ( ; b < 32; b++){ - if (bm & (1 << b)) { - SCLogInfo("b %02d, set", b); - } else { - SCLogInfo("b %02d, not set", b); - - } - } - - if (!(bm & (1 << 31))) { - return 1; - } - return 0; -#else - return 1; -#endif -} - -/** - * \test Test 32 bit SIMD code. - */ -int SigTestSIMDMask02(void) { -#if defined (__SSE3__) - Vector pm, sm, r1, r2; - uint32_t bm = 0; - - uint8_t *mask = SCMallocAligned(32, 16); - memset(mask, 0x01, 32); - mask[31] = 0; - pm.v = _mm_set1_epi8(0x02); - VECTOR_SCLogInfo(pm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&mask[0]); - VECTOR_SCLogInfo(sm); - - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - VECTOR_SCLogInfo(r1); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - VECTOR_SCLogInfo(r2); - /* convert into a bitarray */ - bm = ((uint32_t) _mm_movemask_epi8(r2.v)); - - SCLogInfo("bm %08x", bm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&mask[16]); - VECTOR_SCLogInfo(sm); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - VECTOR_SCLogInfo(r1); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - VECTOR_SCLogInfo(r2); - /* convert into a bitarray */ - bm |= ((uint32_t) _mm_movemask_epi8(r2.v)) << 16; - - SCLogInfo("bm %08x", bm); - - int b = 0; - for ( ; b < 32; b++){ - if (bm & (1 << b)) { - SCLogInfo("b %02d, set", b); - } else { - SCLogInfo("b %02d, not set", b); - - } - } - - if (bm & (1 << 31)) { - return 1; - } - return 0; -#else - return 1; -#endif -} - -/** - * \test Test 64 bit SIMD code. - */ -int SigTestSIMDMask03(void) { -#if defined (__SSE3__) - Vector pm, sm, r1, r2; - uint64_t bm = 0; - uint8_t *mask = SCMallocAligned(64, 16); - memset(mask, 0xEF, 64); - mask[31] = 0xFF; - mask[62] = 0xFF; - printf("\n"); - pm.v = _mm_set1_epi8(0xEF); - VECTOR_SCLogInfo(pm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&mask[0]); - VECTOR_SCLogInfo(sm); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - VECTOR_SCLogInfo(r1); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - VECTOR_SCLogInfo(r2); - /* convert into a bitarray */ - bm = ((uint64_t) _mm_movemask_epi8(r2.v)); - - SCLogInfo("bm1 %"PRIxMAX, (uintmax_t)bm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&mask[16]); - VECTOR_SCLogInfo(sm); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - VECTOR_SCLogInfo(r1); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - VECTOR_SCLogInfo(r2); - /* convert into a bitarray */ - bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 16; - - SCLogInfo("bm2 %"PRIxMAX, (uintmax_t)bm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&mask[32]); - VECTOR_SCLogInfo(sm); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - VECTOR_SCLogInfo(r1); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - VECTOR_SCLogInfo(r2); - /* convert into a bitarray */ - bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 32; - - SCLogInfo("bm3 %"PRIxMAX, (uintmax_t)bm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&mask[48]); - VECTOR_SCLogInfo(sm); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - VECTOR_SCLogInfo(r1); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - VECTOR_SCLogInfo(r2); - /* convert into a bitarray */ - bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 48; - - SCLogInfo("bm4 %"PRIxMAX, (uintmax_t)bm); - - int b = 0; - for ( ; b < 64; b++){ - if (bm & ((uint64_t)1 << b)) { - SCLogInfo("b %02d, set", b); - } else { - SCLogInfo("b %02d, not set", b); - - } - } - - if (!(bm & ((uint64_t)1 << 31)) && !(bm & ((uint64_t)1 << 62))) { - return 1; - } - return 0; -#else - return 1; -#endif -} - -/** - * \test Test 64 bit SIMD code. - */ -int SigTestSIMDMask04(void) { -#if defined (__SSE3__) - Vector pm, sm, r1, r2; - uint64_t bm = 0; - - uint8_t *mask = SCMallocAligned(64, 16); - memset(mask, 0x01, 64); - mask[31] = 0; - mask[62] = 0; - pm.v = _mm_set1_epi8(0x02); - VECTOR_SCLogInfo(pm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&mask[0]); - VECTOR_SCLogInfo(sm); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - VECTOR_SCLogInfo(r1); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - VECTOR_SCLogInfo(r2); - /* convert into a bitarray */ - bm = ((uint64_t) _mm_movemask_epi8(r2.v)); - - SCLogInfo("bm1 %"PRIxMAX, (uintmax_t)bm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&mask[16]); - VECTOR_SCLogInfo(sm); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - VECTOR_SCLogInfo(r1); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - VECTOR_SCLogInfo(r2); - /* convert into a bitarray */ - bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 16; - - SCLogInfo("bm2 %"PRIxMAX, (uintmax_t)bm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&mask[32]); - VECTOR_SCLogInfo(sm); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - VECTOR_SCLogInfo(r1); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - VECTOR_SCLogInfo(r2); - /* convert into a bitarray */ - bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 32; - - SCLogInfo("bm3 %"PRIxMAX, (uintmax_t)bm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&mask[48]); - VECTOR_SCLogInfo(sm); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - VECTOR_SCLogInfo(r1); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - VECTOR_SCLogInfo(r2); - /* convert into a bitarray */ - bm |= (((uint64_t) _mm_movemask_epi8(r2.v)) << 48); - - SCLogInfo("bm4-total %"PRIxMAX, (uintmax_t)bm); - - int b = 0; - for ( ; b < 64; b++){ - if (bm & ((uint64_t)1 << b)) { - SCLogInfo("b %02d, set", b); - } else { - SCLogInfo("b %02d, not set", b); - - } - } - - if ((bm & ((uint64_t)1 << 31)) && (bm & ((uint64_t)1 << 62))) { - return 1; - } - return 0; -#else - return 1; -#endif -} - #endif /* UNITTESTS */ void SigRegisterTests(void) { @@ -11780,11 +11262,7 @@ void SigRegisterTests(void) { UtRegisterTest("SigTestDropFlow03", SigTestDropFlow03, 1); UtRegisterTest("SigTestDropFlow04", SigTestDropFlow04, 1); - UtRegisterTest("SigTestSIMDMask01", SigTestSIMDMask01, 1); - UtRegisterTest("SigTestSIMDMask02", SigTestSIMDMask02, 1); - UtRegisterTest("SigTestSIMDMask03", SigTestSIMDMask03, 1); - UtRegisterTest("SigTestSIMDMask04", SigTestSIMDMask04, 1); - + DetectSimdRegisterTests(); #endif /* UNITTESTS */ } diff --git a/src/detect.h b/src/detect.h index df37f22434..650f6be67d 100644 --- a/src/detect.h +++ b/src/detect.h @@ -1137,11 +1137,18 @@ SigTableElmt sigmatch_table[DETECT_TBLSIZE]; /* detection api */ SigMatch *SigMatchAlloc(void); Signature *SigFindSignatureBySidGid(DetectEngineCtx *, uint32_t, uint32_t); +void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *, + Packet *, SignatureMask, + uint16_t); +int SigMatchSignaturesBuildMatchArrayAddSignature(DetectEngineThreadCtx *, + Packet *, SignatureHeader *, + uint16_t); void SigMatchFree(SigMatch *sm); void SigCleanSignatures(DetectEngineCtx *); void SigTableRegisterTests(void); void SigRegisterTests(void); +void DetectSimdRegisterTests(void); void TmModuleDetectRegister (void); int SigGroupBuild(DetectEngineCtx *);