From: Michael Matirko (mmatirko) Date: Tue, 14 Jan 2025 20:09:43 +0000 (+0000) Subject: Pull request #4506: [PCRE2] ips: pcre2 X-Git-Tag: 3.6.2.0~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bb80b5d754774fc8b1c9c1d9208b6b78cc609521;p=thirdparty%2Fsnort3.git Pull request #4506: [PCRE2] ips: pcre2 Merge in SNORT/snort3 from ~MMATIRKO/snort3:pcre2 to master Squashed commit of the following: commit c385698cabdd01447d01456922f2e5cae451caa6 Author: Michael Matirko Date: Fri Jan 3 15:40:11 2025 -0500 utils: add new header/wrapper for pcre2 code unit width commit 92eab6e926465e51d5c47c67204759a8b87df581 Author: Michael Matirko Date: Fri Jan 3 15:39:31 2025 -0500 doc: stylize dependency names in README.md commit 3c395d5cd0d8ba8d4dc2572f5753d8fcb813756f Author: Michael Matirko Date: Thu Mar 7 13:51:08 2024 -0500 ips: update pcre to pcre2 --- diff --git a/README.md b/README.md index c731ce60a..5d4940664 100644 --- a/README.md +++ b/README.md @@ -51,8 +51,8 @@ Additional features on the roadmap include: If you already build Snort, you may have everything you need. If not, grab the latest: -* cmake to build from source -* daq from https://github.com/snort3/libdaq for packet IO +* CMake to build from source +* DAQ from https://github.com/snort3/libdaq for packet IO * dnet from https://github.com/dugsong/libdnet.git for network utility functions * flex >= 2.6.0 from https://github.com/westes/flex for JavaScript syntax parser * g++ >= 7 or other C++17 compiler @@ -61,7 +61,7 @@ the latest: * OpenSSL from https://www.openssl.org/source/ for SHA and MD5 file signatures, the protected_content rule option, and SSL service detection * pcap from http://www.tcpdump.org for tcpdump style logging -* pcre from http://www.pcre.org for regular expression pattern matching +* PCRE2 from http://www.pcre.org for regular expression pattern matching * pkgconfig from https://www.freedesktop.org/wiki/Software/pkg-config/ to locate build dependencies * zlib from http://www.zlib.net for decompression diff --git a/cmake/FindPCRE.cmake b/cmake/FindPCRE.cmake deleted file mode 100644 index 0fec76c76..000000000 --- a/cmake/FindPCRE.cmake +++ /dev/null @@ -1,32 +0,0 @@ -# - Find pcre -# Find the native PCRE includes and library -# -# PCRE_INCLUDE_DIR - where to find pcre.h, etc. -# PCRE_LIBRARIES - List of libraries when using pcre. -# PCRE_FOUND - True if pcre found. - -set(ERROR_MESSAGE - "\n\tERROR! Libpcre library not found. - \tGet it from http://www.pcre.org\n" -) - -find_package(PkgConfig) -pkg_check_modules(PC_PCRE libpcre) - -# Use PCRE_INCLUDE_DIR_HINT and PCRE_LIBRARIES_DIR_HINT from configure_cmake.sh as primary hints -# and then package config information after that. -find_path(PCRE_INCLUDE_DIR pcre.h - HINTS ${PCRE_INCLUDE_DIR_HINT} ${PC_PCRE_INCLUDEDIR} ${PC_PCRE_INCLUDE_DIRS}) -find_library(PCRE_LIBRARIES NAMES pcre - HINTS ${PCRE_LIBRARIES_DIR_HINT} ${PC_PCRE_LIBDIR} ${PC_PCRE_LIBRARY_DIRS}) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(PCRE - REQUIRED_VARS PCRE_INCLUDE_DIR PCRE_LIBRARIES - FAIL_MESSAGE "${ERROR_MESSAGE}" -) - -mark_as_advanced( - PCRE_LIBRARIES - PCRE_INCLUDE_DIR -) diff --git a/cmake/FindPCRE2.cmake b/cmake/FindPCRE2.cmake new file mode 100644 index 000000000..d8a385770 --- /dev/null +++ b/cmake/FindPCRE2.cmake @@ -0,0 +1,32 @@ +# - Find pcre2 +# Find the native PCRE2 includes and library +# +# PCRE2_INCLUDE_DIR - where to find pcre2.h, etc. +# PCRE2_LIBRARIES - List of libraries when using pcre2. +# PCRE2_FOUND - True if pcre2 found. + +set(ERROR_MESSAGE + "\n\tERROR! Libpcre2 library not found. + \tGet it from http://www.pcre.org\n" +) + +find_package(PkgConfig) +pkg_check_modules(PC_PCRE2 libpcre2) + +# Use PCRE2_INCLUDE_DIR_HINT and PCRE2_LIBRARIES_DIR_HINT from configure_cmake.sh as primary hints +# and then package config information after that. +find_path(PCRE2_INCLUDE_DIR pcre2.h + HINTS ${PCRE2_INCLUDE_DIR_HINT} ${PC_PCRE2_INCLUDEDIR} ${PC_PCRE2_INCLUDE_DIRS}) +find_library(PCRE2_LIBRARIES NAMES pcre2-8 + HINTS ${PCRE2_LIBRARIES_DIR_HINT} ${PC_PCRE2_LIBDIR} ${PC_PCRE2_LIBRARY_DIRS}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(PCRE2 + REQUIRED_VARS PCRE2_INCLUDE_DIR PCRE2_LIBRARIES + FAIL_MESSAGE "${ERROR_MESSAGE}" +) + +mark_as_advanced( + PCRE2_LIBRARIES + PCRE2_INCLUDE_DIR +) diff --git a/cmake/include_libraries.cmake b/cmake/include_libraries.cmake index 244c6452d..a6b2562ec 100644 --- a/cmake/include_libraries.cmake +++ b/cmake/include_libraries.cmake @@ -8,7 +8,7 @@ find_package(HWLOC REQUIRED) find_package(LuaJIT REQUIRED) find_package(OpenSSL 1.1.1 REQUIRED) find_package(PCAP REQUIRED) -find_package(PCRE REQUIRED) +find_package(PCRE2 REQUIRED) find_package(ZLIB REQUIRED) if (ENABLE_UNIT_TESTS) find_package(CppUTest REQUIRED) diff --git a/configure_cmake.sh b/configure_cmake.sh index c2a3e6b0b..572a53aa6 100755 --- a/configure_cmake.sh +++ b/configure_cmake.sh @@ -91,10 +91,10 @@ Optional Packages: luajit include directory --with-luajit-libraries=DIR luajit library directory - --with-pcre-includes=DIR - libpcre include directory - --with-pcre-libraries=DIR - libpcre library directory + --with-pcre2-includes=DIR + libpcre2 include directory + --with-pcre2-libraries=DIR + libpcre2 library directory --with-dnet-includes=DIR libdnet include directory --with-dnet-libraries=DIR @@ -421,11 +421,11 @@ while [ $# -ne 0 ]; do --with-luajit-libraries=*) append_cache_entry LUAJIT_LIBRARIES_DIR_HINT PATH $optarg ;; - --with-pcre-includes=*) - append_cache_entry PCRE_INCLUDE_DIR_HINT PATH $optarg + --with-pcre2-includes=*) + append_cache_entry PCRE2_INCLUDE_DIR_HINT PATH $optarg ;; - --with-pcre-libraries=*) - append_cache_entry PCRE_LIBRARIES_DIR_HINT PATH $optarg + --with-pcre2-libraries=*) + append_cache_entry PCRE2_LIBRARIES_DIR_HINT PATH $optarg ;; --with-dnet-includes=*) append_cache_entry DNET_INCLUDE_DIR_HINT PATH $optarg diff --git a/doc/user/tutorial.txt b/doc/user/tutorial.txt index 82997ded8..1e6c99b94 100644 --- a/doc/user/tutorial.txt +++ b/doc/user/tutorial.txt @@ -26,7 +26,7 @@ Required: * pcap from http://www.tcpdump.org for tcpdump style logging -* pcre from http://www.pcre.org for regular expression pattern matching +* pcre2 from http://www.pcre.org for regular expression pattern matching * pkgconfig from https://www.freedesktop.org/wiki/Software/pkg-config/ to locate build dependencies diff --git a/snort.pc.in b/snort.pc.in index 10f02dcc7..a8b259ae3 100644 --- a/snort.pc.in +++ b/snort.pc.in @@ -9,7 +9,7 @@ mandir=@mandir@ infodir=@infodir@ cpp_opts=DAQ LUAJIT -cpp_opts_other=DNET HWLOC HYPERSCAN LZMA OPENSSL PCAP PCRE UUID +cpp_opts_other=DNET HWLOC HYPERSCAN LZMA OPENSSL PCAP PCRE2 UUID PCAP_CPPFLAGS=@PCAP_CPPFLAGS@ LUAJIT_CPPFLAGS=@LUAJIT_CPPFLAGS@ @@ -18,7 +18,7 @@ DAQ_CPPFLAGS=@DAQ_CPPFLAGS@ FLEX_CPPFLAGS=@FLEX_CPPFLAGS@ OPENSSL_CPPFLAGS=@OPENSSL_CPPFLAGS@ HWLOC_CPPFLAGS=@HWLOC_CPPFLAGS@ -PCRE_CPPFLAGS=@PCRE_CPPFLAGS@ +PCRE2_CPPFLAGS=@PCRE2_CPPFLAGS@ LZMA_CPPFLAGS=@LZMA_CPPFLAGS@ HYPERSCAN_CPPFLAGS=@HYPERSCAN_CPPFLAGS@ UUID_CPPFLAGS=@UUID_CPPFLAGS@ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 546b1e03e..263f9e40a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -10,7 +10,7 @@ set(EXTERNAL_LIBRARIES ${LUAJIT_LIBRARIES} ${OPENSSL_CRYPTO_LIBRARY} ${PCAP_LIBRARIES} - ${PCRE_LIBRARIES} + ${PCRE2_LIBRARIES} ${ZLIB_LIBRARIES} ) @@ -21,7 +21,7 @@ set(EXTERNAL_INCLUDES ${HWLOC_INCLUDE_DIRS} ${OPENSSL_INCLUDE_DIR} ${PCAP_INCLUDE_DIR} - ${PCRE_INCLUDE_DIR} + ${PCRE2_INCLUDE_DIR} ${ZLIB_INCLUDE_DIRS} ) diff --git a/src/ips_options/ips_pcre.cc b/src/ips_options/ips_pcre.cc index cf0ed8ce7..e1b242361 100644 --- a/src/ips_options/ips_pcre.cc +++ b/src/ips_options/ips_pcre.cc @@ -23,8 +23,6 @@ #include "config.h" #endif -#include - #include #include "detection/ips_context.h" @@ -34,31 +32,20 @@ #include "framework/parameter.h" #include "framework/pig_pen.h" #include "hash/hash_key_operations.h" -#include "helpers/scratch_allocator.h" #include "log/log_stats.h" #include "log/messages.h" #include "main/snort_config.h" #include "managers/ips_manager.h" #include "managers/module_manager.h" #include "profiler/profiler.h" +#include "utils/snort_pcre.h" #include "utils/stats.h" #include "utils/util.h" -using namespace snort; - -#ifndef PCRE_STUDY_JIT_COMPILE -#define PCRE_STUDY_JIT_COMPILE 0 -#endif - -//#define NO_JIT // uncomment to disable JIT for Xcode +// Unset to disable JIT for Xcode +#define USE_JIT 1 -#ifdef NO_JIT -#define PCRE_STUDY_FLAGS 0 -#define pcre_release(x) pcre_free(x) -#else -#define PCRE_STUDY_FLAGS PCRE_STUDY_JIT_COMPILE -#define pcre_release(x) pcre_free_study(x) -#endif +using namespace snort; #define SNORT_PCRE_RELATIVE 0x00010 // relative to the end of the last match #define SNORT_PCRE_INVERT 0x00020 // invert detect @@ -72,25 +59,13 @@ void show_pcre_counts(); struct PcreData { - pcre* re; /* compiled regex */ - pcre_extra* pe; /* studied regex foo */ - bool free_pe; - int options; /* sp_pcre specific options (relative & inverse) */ + pcre2_code* re; /* compiled regex */ + pcre2_match_context* match_context; /* match_context */ + pcre2_match_data* match_data; /* match data space for storing results */ + int options; /* sp_pcre specific options (relative & inverse) */ char* expression; }; -// we need to specify the vector length for our pcre_exec call. we only care -// about the first vector, which if the match is successful will include the -// offset to the end of the full pattern match. if we decide to store other -// matches, make *SURE* that this is a multiple of 3 as pcre requires it. - -// this is a temporary value used during parsing and set in snort conf -// by verify; search uses the value in snort conf -static int s_ovector_max = -1; - -static unsigned scratch_index; -static ScratchAllocator* scratcher = nullptr; - static THREAD_LOCAL ProfileStats pcrePerfStats; struct PcreCounts @@ -143,27 +118,15 @@ THREAD_LOCAL PcreStats pcre_stats; // implementation foo //------------------------------------------------------------------------- -static void pcre_capture( - const void* code, const void* extra) -{ - int tmp_ovector_size = 0; - - pcre_fullinfo((const pcre*)code, (const pcre_extra*)extra, - PCRE_INFO_CAPTURECOUNT, &tmp_ovector_size); - - if (tmp_ovector_size > s_ovector_max) - s_ovector_max = tmp_ovector_size; -} - static void pcre_check_anchored(PcreData* pcre_data) { int rc; unsigned long int options = 0; - if ((pcre_data == nullptr) || (pcre_data->re == nullptr) || (pcre_data->pe == nullptr)) + if ((pcre_data == nullptr) || (pcre_data->re == nullptr)) return; - rc = pcre_fullinfo(pcre_data->re, pcre_data->pe, PCRE_INFO_OPTIONS, (void*)&options); + rc = pcre2_pattern_info(pcre_data->re, PCRE2_INFO_ARGOPTIONS, (void*)&options); switch (rc) { /* pcre_fullinfo fails for the following: @@ -178,24 +141,24 @@ static void pcre_check_anchored(PcreData* pcre_data) /* This is the success code */ break; - case PCRE_ERROR_NULL: - ParseError("pcre_fullinfo: code and/or where were null."); + case PCRE2_ERROR_NULL: + ParseError("pcre2: code and/or where were null."); return; - case PCRE_ERROR_BADMAGIC: - ParseError("pcre_fullinfo: compiled code didn't have correct magic."); + case PCRE2_ERROR_BADMAGIC: + ParseError("pcre2: compiled code didn't have correct magic."); return; - case PCRE_ERROR_BADOPTION: - ParseError("pcre_fullinfo: option type is invalid."); + case PCRE2_ERROR_BADOPTION: + ParseError("pcre2: option type is invalid."); return; default: - ParseError("pcre_fullinfo: Unknown error code."); + ParseError("pcre2: Unknown error code."); return; } - if ((options & PCRE_ANCHORED) && !(options & PCRE_MULTILINE)) + if ((options & PCRE2_ANCHORED) && !(options & PCRE2_MULTILINE)) { /* This means that this pcre rule option shouldn't be EvalStatus * even if any of it's relative children should fail to match. @@ -207,11 +170,12 @@ static void pcre_check_anchored(PcreData* pcre_data) static void pcre_parse(const SnortConfig* sc, const char* data, PcreData* pcre_data) { - const char* error; + PCRE2_UCHAR error[128]; char* re, * free_me; char* opts; char delimit = '/'; - int erroffset; + int errorcode; + PCRE2_SIZE erroffset; int compile_flags = 0; if (data == nullptr) @@ -281,17 +245,17 @@ static void pcre_parse(const SnortConfig* sc, const char* data, PcreData* pcre_d { switch (*opts) { - case 'i': compile_flags |= PCRE_CASELESS; break; - case 's': compile_flags |= PCRE_DOTALL; break; - case 'm': compile_flags |= PCRE_MULTILINE; break; - case 'x': compile_flags |= PCRE_EXTENDED; break; + case 'i': compile_flags |= PCRE2_CASELESS; break; + case 's': compile_flags |= PCRE2_DOTALL; break; + case 'm': compile_flags |= PCRE2_MULTILINE; break; + case 'x': compile_flags |= PCRE2_EXTENDED; break; /* * these are pcre specific... don't work with perl */ - case 'A': compile_flags |= PCRE_ANCHORED; break; - case 'E': compile_flags |= PCRE_DOLLAR_ENDONLY; break; - case 'G': compile_flags |= PCRE_UNGREEDY; break; + case 'A': compile_flags |= PCRE2_ANCHORED; break; + case 'E': compile_flags |= PCRE2_DOLLAR_ENDONLY; break; + case 'G': compile_flags |= PCRE2_UNGREEDY; break; /* * these are snort specific don't work with pcre or perl @@ -310,72 +274,48 @@ static void pcre_parse(const SnortConfig* sc, const char* data, PcreData* pcre_d } /* now compile the re */ - pcre_data->re = pcre_compile(re, compile_flags, &error, &erroffset, nullptr); + pcre_data->re = pcre2_compile((PCRE2_SPTR)re, PCRE2_ZERO_TERMINATED, compile_flags, + &errorcode, &erroffset, nullptr); if (pcre_data->re == nullptr) { - ParseError(": pcre compile of '%s' failed at offset " - "%d : %s", re, erroffset, error); + pcre2_get_error_message(errorcode, error, sizeof(error)/sizeof(char)); + ParseError("pcre2 compile of '%s' failed at offset " + "%zu : %s", re, erroffset, error); return; } - /* now study it... */ - pcre_data->pe = pcre_study(pcre_data->re, PCRE_STUDY_FLAGS, &error); - - if (pcre_data->pe) + /* create match context */ + pcre_data->match_context = pcre2_match_context_create(NULL); + if (pcre_data->match_context == NULL) { - if ((sc->get_pcre_match_limit() != 0) && - !(pcre_data->options & SNORT_OVERRIDE_MATCH_LIMIT)) - { - if ( !(pcre_data->pe->flags & PCRE_EXTRA_MATCH_LIMIT) ) - pcre_data->pe->flags |= PCRE_EXTRA_MATCH_LIMIT; - - pcre_data->pe->match_limit = sc->get_pcre_match_limit(); - } + ParseError("failed to allocate memory for match context"); + return; + } - if ((sc->get_pcre_match_limit_recursion() != 0) && - !(pcre_data->options & SNORT_OVERRIDE_MATCH_LIMIT)) - { - if ( !(pcre_data->pe->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) ) - pcre_data->pe->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; + pcre_data->match_data = pcre2_match_data_create_from_pattern(pcre_data->re, nullptr); - pcre_data->pe->match_limit_recursion = - sc->get_pcre_match_limit_recursion(); - } - } - else + /* now study it... */ + if (USE_JIT) { - if (!(pcre_data->options & SNORT_OVERRIDE_MATCH_LIMIT) && - ((sc->get_pcre_match_limit() != 0) || - (sc->get_pcre_match_limit_recursion() != 0))) + // It is possible that we fail to study a re with JIT. In that case, + // we fallback to normal processing (as non-JIT) + errorcode = pcre2_jit_compile(pcre_data->re, PCRE2_JIT_COMPLETE); + + if (errorcode) { - pcre_data->pe = (pcre_extra*)snort_calloc(sizeof(pcre_extra)); - pcre_data->free_pe = true; - - if (sc->get_pcre_match_limit() != 0) - { - pcre_data->pe->flags |= PCRE_EXTRA_MATCH_LIMIT; - pcre_data->pe->match_limit = sc->get_pcre_match_limit(); - } - - if (sc->get_pcre_match_limit_recursion() != 0) - { - pcre_data->pe->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; - pcre_data->pe->match_limit_recursion = - sc->get_pcre_match_limit_recursion(); - } + pcre2_get_error_message(errorcode, error, sizeof(error)/sizeof(char)); + ParseWarning(WARN_RULES, "pcre2 JIT compile of '%s' failed : %s\n", re, error); } } - if (error != nullptr) - { - ParseError("pcre study failed : %s", error); - return; - } + if ((sc->get_pcre_match_limit() != 0) && !(pcre_data->options & SNORT_OVERRIDE_MATCH_LIMIT)) + pcre2_set_match_limit(pcre_data->match_context, sc->get_pcre_match_limit()); - pcre_capture(pcre_data->re, pcre_data->pe); - pcre_check_anchored(pcre_data); + if ((sc->get_pcre_match_limit_recursion() != 0) && !(pcre_data->options & SNORT_OVERRIDE_MATCH_LIMIT)) + pcre2_set_depth_limit(pcre_data->match_context, sc->get_pcre_match_limit_recursion()); + pcre_check_anchored(pcre_data); snort_free(free_me); return; @@ -394,29 +334,26 @@ syntax: * found_offset will be set to -1 when the find is unsuccessful OR the routine is inverted */ static bool pcre_search( - Packet* p, + Packet*, const PcreData* pcre_data, const uint8_t* buf, unsigned len, unsigned start_offset, int& found_offset) { + PCRE2_SIZE* ovector; bool matched; found_offset = -1; - std::vector ss = p->context->conf->state[get_instance_id()]; - assert(ss[scratch_index]); - - int result = pcre_exec( - pcre_data->re, /* result of pcre_compile() */ - pcre_data->pe, /* result of pcre_study() */ - (const char*)buf, /* the subject string */ - len, /* the length of the subject string */ - start_offset, /* start at offset 0 in the subject */ - 0, /* options(handled at compile time */ - (int*)ss[scratch_index], /* vector for substring information */ - p->context->conf->pcre_ovector_size); /* number of elements in the vector */ + int result = pcre2_match( + pcre_data->re, /* result of pcre_compile() */ + (PCRE2_SPTR)buf, /* the subject string */ + (PCRE2_SIZE)len, /* the length of the subject string */ + (PCRE2_SIZE)start_offset, /* start at offset 0 in the subject */ + 0, /* options (handled at compile time) */ + pcre_data->match_data, /* match data to store the match results */ + pcre_data->match_context); /* match context for limits */ if (result >= 0) { @@ -432,7 +369,7 @@ static bool pcre_search( * ovector[1], identify the portion of the subject string matched * by the entire pattern. The next pair is used for the first * capturing subpattern, and so on. The value returned by - * pcre_exec() is the number of pairs that have been set. If there + * pcre_search() is the number of pairs that have been set. If there * are no capturing subpatterns, the return value from a successful * match is 1, indicating that just the first pair of offsets has * been set. @@ -441,18 +378,19 @@ static bool pcre_search( * and a single int for scratch space. */ - found_offset = ((int*)ss[scratch_index])[1]; + ovector = pcre2_get_ovector_pointer(pcre_data->match_data); + found_offset = ovector[1]; } - else if (result == PCRE_ERROR_NOMATCH) + else if (result == PCRE2_ERROR_NOMATCH) { matched = false; } - else if (result == PCRE_ERROR_MATCHLIMIT) + else if (result == PCRE2_ERROR_MATCHLIMIT) { pcre_stats.pcre_match_limit++; matched = false; } - else if (result == PCRE_ERROR_RECURSIONLIMIT) + else if (result == PCRE2_ERROR_RECURSIONLIMIT) { pcre_stats.pcre_recursion_limit++; matched = false; @@ -515,16 +453,9 @@ PcreOption::~PcreOption() if ( config->expression ) snort_free(config->expression); - if ( config->pe ) - { - if ( config->free_pe ) - snort_free(config->pe); - else - pcre_release(config->pe); - } - - if ( config->re ) - free(config->re); // external allocation + pcre2_match_context_free(config->match_context); + pcre2_code_free(config->re); + pcre2_match_data_free(config->match_data); snort_free(config); } @@ -674,17 +605,10 @@ class PcreModule : public Module { public: PcreModule() : Module(s_name, s_help, s_params) - { - data = nullptr; - scratcher = new SimpleScratchAllocator(scratch_setup, scratch_cleanup); - scratch_index = scratcher->get_id(); - } + { data = nullptr; } ~PcreModule() override - { - delete data; - delete scratcher; - } + { delete data; } #ifdef HAVE_HYPERSCAN bool begin(const char*, int, SnortConfig*) override; @@ -710,9 +634,6 @@ private: PcreData* data; Module* mod_regex = nullptr; std::string re; - - static bool scratch_setup(SnortConfig*); - static void scratch_cleanup(SnortConfig*); }; PcreData* PcreModule::get_data() @@ -739,6 +660,7 @@ bool PcreModule::begin(const char* name, int v, SnortConfig* sc) if( mod_regex ) mod_regex = mod_regex->begin(name, v, sc) ? mod_regex : nullptr; } + return true; } #endif @@ -768,38 +690,6 @@ bool PcreModule::end(const char* name, int v, SnortConfig* sc) return true; } -bool PcreModule::scratch_setup(SnortConfig* sc) -{ - if ( s_ovector_max < 0 ) - return false; - - // The pcre_fullinfo() function can be used to find out how many - // capturing subpatterns there are in a compiled pattern. The - // smallest size for ovector that will allow for n captured - // substrings, in addition to the offsets of the substring matched - // by the whole pattern is 3(n+1). - - sc->pcre_ovector_size = 3 * (s_ovector_max + 1); - s_ovector_max = -1; - - for ( unsigned i = 0; i < sc->num_slots; ++i ) - { - std::vector& ss = sc->state[i]; - ss[scratch_index] = snort_calloc(sc->pcre_ovector_size, sizeof(int)); - } - return true; -} - -void PcreModule::scratch_cleanup(SnortConfig* sc) -{ - for ( unsigned i = 0; i < sc->num_slots; ++i ) - { - std::vector& ss = sc->state[i]; - snort_free(ss[scratch_index]); - ss[scratch_index] = nullptr; - } -} - //------------------------------------------------------------------------- // api methods //------------------------------------------------------------------------- diff --git a/src/main/process.cc b/src/main/process.cc index 8d4e74bfd..b6362c3fd 100644 --- a/src/main/process.cc +++ b/src/main/process.cc @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -72,6 +71,7 @@ extern "C" { #include "protocols/packet.h" // For NUM_IP_PROTOS #include "utils/cpp_macros.h" #include "utils/stats.h" +#include "utils/snort_pcre.h" #include "utils/util.h" #include "main.h" @@ -679,6 +679,12 @@ int DisplayBanner() while ( *ljv && !isdigit(*ljv) ) ++ljv; + char pcre2_version[64] { }; + int pcre2_version_size = pcre2_config(PCRE2_CONFIG_VERSION, pcre2_version); + + if (pcre2_version_size <= 0 or static_cast(pcre2_version_size) > sizeof(pcre2_version)) + abort(); + LogMessage("\n"); LogMessage(" ,,_ -*> Snort++ <*-\n"); #ifdef BUILD @@ -707,7 +713,7 @@ int DisplayBanner() LogMessage(" Using LZMA version %s\n", lzma_version_string()); #endif LogMessage(" Using %s\n", OpenSSL_version(SSLEAY_VERSION)); - LogMessage(" Using PCRE version %s\n", pcre_version()); + LogMessage(" Using PCRE2 version %s\n", (char*) pcre2_version); LogMessage(" Using ZLIB version %s\n", zlib_version); LogMessage("\n"); diff --git a/src/main/shell.cc b/src/main/shell.cc index 1f1c85276..bb7c69060 100644 --- a/src/main/shell.cc +++ b/src/main/shell.cc @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -58,6 +57,7 @@ extern "C" { #include "managers/module_manager.h" #include "parser/parse_conf.h" #include "parser/parser.h" +#include "utils/snort_pcre.h" #include "utils/stats.h" #include "lua_bootstrap.h" @@ -140,6 +140,12 @@ static void install_dependencies_strings(Shell* sh, lua_State* L) { assert(dep_versions[0]); + char pcre2_version[64] { }; + int pcre2_version_size = pcre2_config(PCRE2_CONFIG_VERSION, pcre2_version); + + if (pcre2_version_size <= 0 or static_cast(pcre2_version_size) > sizeof(pcre2_version)) + abort(); + std::vector vs; const char* ljv = LUAJIT_VERSION; const char* osv = OpenSSL_version(SSLEAY_VERSION); @@ -156,7 +162,7 @@ static void install_dependencies_strings(Shell* sh, lua_State* L) vs.push_back(ljv); vs.push_back(osv); vs.push_back(lpv); - vs.push_back(pcre_version()); + vs.push_back(pcre2_version); vs.push_back(zlib_version); #ifdef HAVE_HYPERSCAN vs.push_back(hs_version()); diff --git a/src/network_inspectors/appid/lua_detector_api.cc b/src/network_inspectors/appid/lua_detector_api.cc index c621d1a11..4fe3fe95a 100644 --- a/src/network_inspectors/appid/lua_detector_api.cc +++ b/src/network_inspectors/appid/lua_detector_api.cc @@ -25,7 +25,6 @@ #include "lua_detector_api.h" #include -#include #include #include "detection/fp_config.h" @@ -38,6 +37,7 @@ #include "profiler/profiler.h" #include "protocols/packet.h" #include "trace/trace_api.h" +#include "utils/snort_pcre.h" #include "app_info_table.h" #include "appid_debug.h" @@ -727,35 +727,43 @@ static int detector_get_pcre_groups(lua_State* L) // Verify detector user data and that we are in packet context LuaStateDescriptor* lsd = ud->validate_lua_state(true); - int ovector[OVECCOUNT]; - const char* error; - int erroffset; + pcre2_match_data* match_data; + PCRE2_UCHAR error[128]; + PCRE2_SIZE erroffset; + int errorcode; const char* pattern = lua_tostring(L, 2); unsigned int offset = lua_tonumber(L, 3); /*offset can be zero, no check necessary. */ /*compile the regular expression pattern, and handle errors */ - pcre* re = pcre_compile(pattern, // the pattern - PCRE_DOTALL, // default options - dot matches all inc \n - &error, // for error message - &erroffset, // for error offset - nullptr); // use default character tables + pcre2_code* re = pcre2_compile((PCRE2_SPTR)pattern, // the pattern + PCRE2_ZERO_TERMINATED, // assume zero terminated strings + PCRE2_DOTALL, // default options - dot matches all inc \n + &errorcode, // for error message + &erroffset, // for error offset + nullptr); // default character tables if (re == nullptr) { + pcre2_get_error_message(errorcode, error, 128); appid_log(lsd->ldp.pkt, TRACE_ERROR_LEVEL, "PCRE compilation failed at offset %d: %s\n", erroffset, error); return 0; } - /*pattern match against the subject string. */ - int rc = pcre_exec(re, // compiled pattern - nullptr, // no extra data - (const char*)lsd->ldp.data, // subject string - lsd->ldp.size, // length of the subject - offset, // offset 0 - 0, // default options - ovector, // output vector for substring information - OVECCOUNT); // number of elements in the output vector + match_data = pcre2_match_data_create(OVECCOUNT, NULL); + if (!match_data) + { + appid_log(lsd->ldp.pkt, TRACE_ERROR_LEVEL, "PCRE failed to allocate mem for match_data\n"); + return 0; + } + + int rc = pcre2_match(re, // compiled pattern + (PCRE2_SPTR)lsd->ldp.data, // subject string + (PCRE2_SIZE)lsd->ldp.size, // length of the subject + (PCRE2_SIZE)offset, // offset 0 + 0, // default options + match_data, // match data for match results + NULL); // no match context if (rc >= 0) { @@ -773,6 +781,7 @@ static int detector_get_pcre_groups(lua_State* L) return 0; } + PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(match_data); for (int i = 0; i < rc; i++) { lua_pushlstring(L, (const char*)lsd->ldp.data + ovector[2*i], ovector[2*i+1] - @@ -782,12 +791,13 @@ static int detector_get_pcre_groups(lua_State* L) else { // log errors except no matches - if (rc != PCRE_ERROR_NOMATCH) + if (rc != PCRE2_ERROR_NOMATCH) appid_log(lsd->ldp.pkt, TRACE_WARNING_LEVEL, "PCRE regular expression group match failed. rc: %d\n", rc); rc = 0; } - pcre_free(re); + pcre2_match_data_free(match_data); + pcre2_code_free(re); return rc; } diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index 26cda0a55..1161dc528 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -18,6 +18,7 @@ add_library ( utils OBJECT sflsq.cc sflsq.h snort_bounds.h + snort_pcre.h stats.cc stats.h util.cc diff --git a/src/utils/snort_pcre.h b/src/utils/snort_pcre.h new file mode 100644 index 000000000..fcbf897d0 --- /dev/null +++ b/src/utils/snort_pcre.h @@ -0,0 +1,31 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2025-2025 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// snort_pcre.h author Michael Matirko + +#ifndef SNORT_PCRE_H +#define SNORT_PCRE_H + +// pcre2 code unit width must be set prior to including the pcre2 +// headers. Setting it here allows us to not require a redefinition +// of PCRE2_CODE_UNIT_WIDTH everywhere we include pcre2.h + +#define PCRE2_CODE_UNIT_WIDTH 8 +#include + +#endif +