From: Russ Combs (rucombs) Date: Tue, 29 Oct 2019 21:06:50 +0000 (-0400) Subject: Merge pull request #1787 in SNORT/snort3 from ~BRASTULT/snort3:boyer_moore to master X-Git-Tag: 3.0.0-263~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=58ef5862f05cbd6ca85245740d9664388519af13;p=thirdparty%2Fsnort3.git Merge pull request #1787 in SNORT/snort3 from ~BRASTULT/snort3:boyer_moore to master Squashed commit of the following: commit c38c3e1dc80b15da5ebc4423662efffe94b585cd Author: Brandon Stultz Date: Mon Oct 7 18:29:14 2019 -0400 content: rewrite boyer_moore for performance --- diff --git a/src/ips_options/ips_content.cc b/src/ips_options/ips_content.cc index 137a6b7c8..0edc0568f 100644 --- a/src/ips_options/ips_content.cc +++ b/src/ips_options/ips_content.cc @@ -44,39 +44,57 @@ using namespace snort; static THREAD_LOCAL ProfileStats contentPerfStats; -static IpsOption::EvalStatus CheckANDPatternMatch(struct ContentData*, Cursor&); +static IpsOption::EvalStatus CheckANDPatternMatch(class ContentData*, Cursor&); //------------------------------------------------------------------------- // instance data //------------------------------------------------------------------------- -struct ContentData +class ContentData { - PatternMatchData pmd; +public: + ContentData(); + + ~ContentData(); + + void setup_bm(); + void set_max_jump_size(); + + PatternMatchData pmd = {}; + + BoyerMoore* boyer_moore; int8_t offset_var; /* byte_extract variable indices for offset, */ int8_t depth_var; /* depth, distance, within */ unsigned match_delta; /* Maximum distance we can jump to search for this pattern again. */ - - int* skip_stride; /* B-M skip array */ - int* shift_stride; /* B-M shift array */ - - void init(); - void setup_bm(); - void set_max_jump_size(); }; -void ContentData::init() +ContentData::ContentData() { + boyer_moore = nullptr; offset_var = IPS_OPTIONS_NO_VAR; depth_var = IPS_OPTIONS_NO_VAR; + match_delta = 0; +} + +ContentData::~ContentData() +{ + if ( boyer_moore ) + delete boyer_moore; + + if ( pmd.pattern_buf ) + snort_free(const_cast(pmd.pattern_buf)); + + if ( pmd.last_check ) + snort_free(pmd.last_check); } void ContentData::setup_bm() { - skip_stride = make_skip(pmd.pattern_buf, pmd.pattern_size); - shift_stride = make_shift(pmd.pattern_buf, pmd.pattern_size); + const uint8_t* pattern = (const uint8_t*)pmd.pattern_buf; + + boyer_moore = new BoyerMoore(pattern, pmd.pattern_size); } // find the maximum number of characters we can jump ahead @@ -113,7 +131,8 @@ public: ContentOption(ContentData* c) : IpsOption(s_name, RULE_OPTION_TYPE_CONTENT) { config = c; } - ~ContentOption() override; + ~ContentOption() override + { delete config; } uint32_t hash() const override; bool operator==(const IpsOption&) const override; @@ -142,28 +161,6 @@ protected: ContentData* config; }; -ContentOption::~ContentOption() -{ - ContentData* cd = config; - - if ( !cd ) - return; - - if ( cd->pmd.pattern_buf ) - snort_free(const_cast(cd->pmd.pattern_buf)); - - if ( cd->pmd.last_check ) - snort_free(cd->pmd.last_check); - - if ( cd->skip_stride ) - snort_free(cd->skip_stride); - - if ( cd->shift_stride ) - snort_free(cd->shift_stride); - - snort_free(cd); -} - bool ContentOption::retry(Cursor& c) { if ( config->pmd.is_negated() ) @@ -352,15 +349,11 @@ static int uniSearchReal(ContentData* cd, Cursor& c) if ( cd->pmd.is_no_case() ) { - found = mSearchCI( - (const char*)base, depth, cd->pmd.pattern_buf, cd->pmd.pattern_size, - cd->skip_stride, cd->shift_stride); + found = cd->boyer_moore->search_nocase(base, depth); } else { - found = mSearch( - (const char*)base, depth, cd->pmd.pattern_buf, cd->pmd.pattern_size, - cd->skip_stride, cd->shift_stride); + found = cd->boyer_moore->search(base, depth); } if ( found >= 0 ) @@ -671,8 +664,7 @@ ContentData* ContentModule::get_data() bool ContentModule::begin(const char*, int, SnortConfig*) { - cd = (ContentData*)snort_calloc(sizeof(ContentData)); - cd->init(); + cd = new ContentData(); return true; } diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index b96f459f9..e7ff20588 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -1,6 +1,7 @@ set( UTIL_INCLUDES bitop.h + boyer_moore.h cpp_macros.h endian.h event_gen.h @@ -28,7 +29,6 @@ add_library ( utils OBJECT ${UTIL_INCLUDES} ${SNPRINTF_SOURCES} boyer_moore.cc - boyer_moore.h dnet_header.h dyn_array.cc dyn_array.h @@ -57,3 +57,5 @@ install (FILES ${UTIL_INCLUDES} DESTINATION "${INCLUDE_INSTALL_PATH}/utils" ) +add_subdirectory(test) + diff --git a/src/utils/boyer_moore.cc b/src/utils/boyer_moore.cc index 9c51f9b0a..77553dabc 100644 --- a/src/utils/boyer_moore.cc +++ b/src/utils/boyer_moore.cc @@ -1,7 +1,5 @@ //-------------------------------------------------------------------------- -// Copyright (C) 2014-2019 Cisco and/or its affiliates. All rights reserved. -// Copyright (C) 2002-2013 Sourcefire, Inc. -// Copyright (C) 1998-2002 Martin Roesch +// Copyright (C) 2019-2019 Cisco and/or its affiliates. All rights reserved. // // This program is free software; you can redistribute it and/or modify it // under the terms of the GNU General Public License Version 2 as published @@ -17,207 +15,75 @@ // with this program; if not, write to the Free Software Foundation, Inc., // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. //-------------------------------------------------------------------------- - -// boyer_moore.cc was split out of mstring.cc which had these comments: - -/*************************************************************************** - * - * File: MSTRING.C - * - * Purpose: Provide a variety of string functions not included in libc. Makes - * up for the fact that the libstdc++ is hard to get reference - * material on and I don't want to write any more non-portable c++ - * code until I have solid references and libraries to use. - * - * History: - * - * Date: Author: Notes: - * ---------- ------- ---------------------------------------------- - * 08/19/98 MFR Initial coding begun - * 03/06/99 MFR Added Boyer-Moore pattern match routine - * 12/31/99 JGW Added a full Boyer-Moore implementation to increase - * performance. Added a case insensitive version of mSearch - * 07/24/01 MFR Fixed Regex pattern matcher introduced by Fyodor - * - **************************************************************************/ +// boyer_moore.cc author Brandon Stultz #ifdef HAVE_CONFIG_H #include "config.h" #endif -#include "boyer_moore.h" +#include +#include -#include "util.h" +#include "boyer_moore.h" namespace snort { -/**************************************************************** - * - * Function: make_skip(char *, int) - * - * Purpose: Create a Boyer-Moore skip table for a given pattern - * - * Parameters: - * ptrn => pattern - * plen => length of the data in the pattern buffer - * - * Returns: - * int * - the skip table - * - ****************************************************************/ -int* make_skip(const char* ptrn, int plen) -{ - int i; - int* skip = (int*)snort_calloc(256, sizeof(int)); - for ( i = 0; i < 256; i++ ) - skip[i] = plen + 1; +BoyerMoore::BoyerMoore(const uint8_t* pattern, unsigned pattern_len) + : pattern(pattern), pattern_len(pattern_len) +{ + assert(pattern_len > 0); - while (plen != 0) - skip[(unsigned char)*ptrn++] = plen--; + last = pattern_len - 1; - return skip; + make_skip(); } -/**************************************************************** - * - * Function: make_shift(char *, int) - * - * Purpose: Create a Boyer-Moore shift table for a given pattern - * - * Parameters: - * ptrn => pattern - * plen => length of the data in the pattern buffer - * - * Returns: - * int * - the shift table - * - ****************************************************************/ -int* make_shift(const char* ptrn, int plen) +// skip[c] is the distance between the last character of the +// pattern and the rightmost occurrence of c in the pattern. +// If c does not occur in the pattern then skip[c] = pattern_len. +void BoyerMoore::make_skip() { - int* shift = (int*)snort_calloc(plen, sizeof(int)); - int* sptr = shift + plen - 1; - const char* pptr = ptrn + plen - 1; - char c; - - c = ptrn[plen - 1]; - - *sptr = 1; - - while (sptr-- != shift) - { - const char* p1 = ptrn + plen - 2, * p2, * p3; - - do - { - while (p1 >= ptrn && *p1-- != c) - ; - - p2 = ptrn + plen - 2; - p3 = p1; - - while (p3 >= ptrn && *p3-- == *p2-- && p2 >= pptr) - ; - } - while (p3 >= ptrn && p2 >= pptr); + for ( unsigned i = 0; i < 256; i++ ) + skip[i] = pattern_len; - *sptr = shift + plen - sptr + p2 - p3; - - pptr--; - } - - return shift; + for ( unsigned i = 0; i < last; i++ ) + skip[pattern[i]] = last - i; } -/**************************************************************** - * - * Function: mSearch(char *, int, char *, int) - * - * Purpose: Determines if a string contains a (non-regex) - * substring. - * - * Parameters: - * buf => data buffer we want to find the data in - * blen => data buffer length - * ptrn => pattern to find - * plen => length of the data in the pattern buffer - * skip => the B-M skip array - * shift => the B-M shift array - * - * Returns: - * -1 if not found or offset >= 0 if found - * - ****************************************************************/ -int mSearch( - const char* buf, int blen, const char* ptrn, int plen, const int* skip, const int* shift) +int BoyerMoore::search(const uint8_t* buffer, unsigned buffer_len) const { - if (plen == 0) - return -1; + const uint8_t* start = buffer; - int b_idx = plen; - - while (b_idx <= blen) + while ( buffer_len >= pattern_len ) { - int p_idx = plen, skip_stride, shift_stride; - - while (buf[--b_idx] == ptrn[--p_idx]) - { - if (p_idx == 0) - return b_idx; - } - - skip_stride = skip[(unsigned char)buf[b_idx]]; - shift_stride = shift[p_idx]; + for ( unsigned pos = last; buffer[pos] == pattern[pos]; pos-- ) + if ( pos == 0 ) + return buffer - start; - b_idx += (skip_stride > shift_stride) ? skip_stride : shift_stride; + buffer_len -= skip[buffer[last]]; + buffer += skip[buffer[last]]; } return -1; } -/**************************************************************** - * - * Function: mSearchCI(char *, int, char *, int) - * - * Purpose: Determines if a string contains a (non-regex) - * substring matching is case insensitive - * - * Parameters: - * buf => data buffer we want to find the data in - * blen => data buffer length - * ptrn => pattern to find - * plen => length of the data in the pattern buffer - * skip => the B-M skip array - * shift => the B-M shift array - * - * Returns: - * -1 if not found or offset >= 0 if found - * - ****************************************************************/ -int mSearchCI( - const char* buf, int blen, const char* ptrn, int plen, const int* skip, const int* shift) +int BoyerMoore::search_nocase(const uint8_t* buffer, unsigned buffer_len) const { - int b_idx = plen; + const uint8_t* start = buffer; - if (plen == 0) - return -1; - - while (b_idx <= blen) + while ( buffer_len >= pattern_len ) { - int p_idx = plen, skip_stride, shift_stride; - - while ((unsigned char)ptrn[--p_idx] == toupper((unsigned char)buf[--b_idx])) - { - if (p_idx == 0) - return b_idx; - } - - skip_stride = skip[toupper((unsigned char)buf[b_idx])]; - shift_stride = shift[p_idx]; + for ( unsigned pos = last; toupper(buffer[pos]) == pattern[pos]; pos-- ) + if ( pos == 0 ) + return buffer - start; - b_idx += (skip_stride > shift_stride) ? skip_stride : shift_stride; + buffer_len -= skip[toupper(buffer[last])]; + buffer += skip[toupper(buffer[last])]; } return -1; } + } + diff --git a/src/utils/boyer_moore.h b/src/utils/boyer_moore.h index 7726fe4cd..5926f49a6 100644 --- a/src/utils/boyer_moore.h +++ b/src/utils/boyer_moore.h @@ -1,7 +1,5 @@ //-------------------------------------------------------------------------- -// Copyright (C) 2014-2019 Cisco and/or its affiliates. All rights reserved. -// Copyright (C) 2002-2013 Sourcefire, Inc. -// Copyright (C) 1998-2002 Martin Roesch +// Copyright (C) 2019-2019 Cisco and/or its affiliates. All rights reserved. // // This program is free software; you can redistribute it and/or modify it // under the terms of the GNU General Public License Version 2 as published @@ -17,6 +15,7 @@ // with this program; if not, write to the Free Software Foundation, Inc., // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. //-------------------------------------------------------------------------- +// boyer_moore.h author Brandon Stultz #ifndef BOYER_MOORE_H #define BOYER_MOORE_H @@ -27,12 +26,25 @@ namespace snort { -// FIXIT-M no associated resource destructor for make_skip & make_shift :( -SO_PUBLIC int* make_skip(const char*, int); -SO_PUBLIC int* make_shift(const char*, int); -SO_PUBLIC int mSearch(const char*, int, const char*, int, const int*, const int*); -SO_PUBLIC int mSearchCI(const char*, int, const char*, int, const int*, const int*); +class SO_PUBLIC BoyerMoore +{ +public: + BoyerMoore(const uint8_t* pattern, unsigned pattern_len); + + int search(const uint8_t* buffer, unsigned buffer_len) const; + int search_nocase(const uint8_t* buffer, unsigned buffer_len) const; + +private: + void make_skip(); + + const uint8_t* pattern; + unsigned pattern_len; + unsigned last; + + unsigned skip[256]; +}; + } #endif diff --git a/src/utils/test/CMakeLists.txt b/src/utils/test/CMakeLists.txt new file mode 100644 index 000000000..6f9a2db4c --- /dev/null +++ b/src/utils/test/CMakeLists.txt @@ -0,0 +1,4 @@ +add_cpputest( boyer_moore_test + SOURCES + ../boyer_moore.cc +) diff --git a/src/utils/test/boyer_moore_test.cc b/src/utils/test/boyer_moore_test.cc new file mode 100644 index 000000000..4e5231f9f --- /dev/null +++ b/src/utils/test/boyer_moore_test.cc @@ -0,0 +1,195 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2019-2019 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// boyer_moore_test.cc author Brandon Stultz + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "../boyer_moore.h" + +#include +#include +#include +#include +#include + +using namespace std; +using namespace snort; + +enum TestType +{ + CASE, + NOCASE, +}; + +class Tester +{ +public: + Tester(const char* pat_str, const char* buf_str, TestType typ, int idx) + : pat(pat_str), buf(buf_str), type(typ), index(idx) + { + if ( type == NOCASE ) + transform(pat.begin(), pat.end(), pat.begin(), ::toupper); + + pattern_len = pat.length(); + buffer_len = buf.length(); + + pattern = (const uint8_t*)(pat.c_str()); + buffer = (const uint8_t*)(buf.c_str()); + } + + bool run(); + +private: + string pat; + string buf; + + TestType type; + + int index; + + unsigned pattern_len; + unsigned buffer_len; + + const uint8_t* pattern; + const uint8_t* buffer; +}; + +bool Tester::run() +{ + int pos; + + BoyerMoore bm = BoyerMoore(pattern, pattern_len); + + if ( type == NOCASE ) + { + pos = bm.search_nocase(buffer, buffer_len); + } + else + { + pos = bm.search(buffer, buffer_len); + } + + return pos == index; +} + +TEST_GROUP(boyer_moore_test_group) {}; + +TEST(boyer_moore_test_group, binary) +{ + const uint8_t pat[] = { 0xCA, 0xFE, 0xBA, 0xBE }; + + const uint8_t buf[] = { + 0x00, 0x01, 0x02, 0x03, + 0x72, 0x01, 0x3F, 0x2B, + 0x1F, 0xCA, 0xFE, 0xBA, + 0xBE, 0x01, 0x02, 0x03, + }; + + BoyerMoore bm = BoyerMoore(pat, sizeof(pat)); + + int pos = bm.search(buf, sizeof(buf)); + + CHECK(pos == 9); +} + +TEST(boyer_moore_test_group, empty) +{ + Tester t = Tester("abc", "", CASE, -1); + CHECK(t.run()); +} + +TEST(boyer_moore_test_group, start) +{ + Tester t = Tester("abc", "abc", CASE, 0); + CHECK(t.run()); +} + +TEST(boyer_moore_test_group, start_nocase) +{ + Tester t = Tester("abc", "aBc", NOCASE, 0); + CHECK(t.run()); +} + +TEST(boyer_moore_test_group, found1) +{ + Tester t = Tester("d", "abcdefg", CASE, 3); + CHECK(t.run()); +} + +TEST(boyer_moore_test_group, found2) +{ + Tester t = Tester("nan", "banana", CASE, 2); + CHECK(t.run()); +} + +TEST(boyer_moore_test_group, found3) +{ + Tester t = Tester("pan", "anpanman", CASE, 2); + CHECK(t.run()); +} + +TEST(boyer_moore_test_group, found4) +{ + Tester t = Tester("bcd", "abcd", CASE, 1); + CHECK(t.run()); +} + +TEST(boyer_moore_test_group, found5) +{ + Tester t = Tester("aa", "aaa", CASE, 0); + CHECK(t.run()); +} + +TEST(boyer_moore_test_group, found6) +{ + Tester t = Tester( + "that", "which finally halts at tHaT point", NOCASE, 23); + CHECK(t.run()); +} + +TEST(boyer_moore_test_group, not_found1) +{ + Tester t = Tester("nnaaman", "anpanmanam", CASE, -1); + CHECK(t.run()); +} + +TEST(boyer_moore_test_group, not_found2) +{ + Tester t = Tester("abcd", "abc", CASE, -1); + CHECK(t.run()); +} + +TEST(boyer_moore_test_group, not_found3) +{ + Tester t = Tester("abcd", "bcd", CASE, -1); + CHECK(t.run()); +} + +TEST(boyer_moore_test_group, not_found4) +{ + Tester t = Tester("baa", "aaaaa", CASE, -1); + CHECK(t.run()); +} + +int main(int argc, char** argv) +{ + return CommandLineTestRunner::RunAllTests(argc, argv); +} +