]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Pull request #4506: [PCRE2] ips: pcre2
authorMichael Matirko (mmatirko) <mmatirko@cisco.com>
Tue, 14 Jan 2025 20:09:43 +0000 (20:09 +0000)
committerSteven Baigal (sbaigal) <sbaigal@cisco.com>
Tue, 14 Jan 2025 20:09:43 +0000 (20:09 +0000)
Merge in SNORT/snort3 from ~MMATIRKO/snort3:pcre2 to master

Squashed commit of the following:

commit c385698cabdd01447d01456922f2e5cae451caa6
Author: Michael Matirko <mmatirko@cisco.com>
Date:   Fri Jan 3 15:40:11 2025 -0500

    utils: add new header/wrapper for pcre2 code unit width

commit 92eab6e926465e51d5c47c67204759a8b87df581
Author: Michael Matirko <mmatirko@cisco.com>
Date:   Fri Jan 3 15:39:31 2025 -0500

    doc: stylize dependency names in README.md

commit 3c395d5cd0d8ba8d4dc2572f5753d8fcb813756f
Author: Michael Matirko <mmatirko@cisco.com>
Date:   Thu Mar 7 13:51:08 2024 -0500

    ips: update pcre to pcre2

14 files changed:
README.md
cmake/FindPCRE.cmake [deleted file]
cmake/FindPCRE2.cmake [new file with mode: 0644]
cmake/include_libraries.cmake
configure_cmake.sh
doc/user/tutorial.txt
snort.pc.in
src/CMakeLists.txt
src/ips_options/ips_pcre.cc
src/main/process.cc
src/main/shell.cc
src/network_inspectors/appid/lua_detector_api.cc
src/utils/CMakeLists.txt
src/utils/snort_pcre.h [new file with mode: 0644]

index c731ce60ad238984fbba13d6f3eb6e5be5b0ba6a..5d49406642372824a67caf8b5957258cfccfcd2f 100644 (file)
--- a/README.md
+++ b/README.md
@@ -51,8 +51,8 @@ Additional features on the roadmap include:
 If you already build Snort, you may have everything you need.  If not, grab
 the latest:
 
-* cmake to build from source
-* daq from https://github.com/snort3/libdaq for packet IO
+* CMake to build from source
+* DAQ from https://github.com/snort3/libdaq for packet IO
 * dnet from https://github.com/dugsong/libdnet.git for network utility functions
 * flex >= 2.6.0 from https://github.com/westes/flex for JavaScript syntax parser
 * g++ >= 7 or other C++17 compiler
@@ -61,7 +61,7 @@ the latest:
 * OpenSSL from https://www.openssl.org/source/ for SHA and MD5 file signatures,
   the protected_content rule option, and SSL service detection
 * pcap from http://www.tcpdump.org for tcpdump style logging
-* pcre from http://www.pcre.org for regular expression pattern matching
+* PCRE2 from http://www.pcre.org for regular expression pattern matching
 * pkgconfig from https://www.freedesktop.org/wiki/Software/pkg-config/ to locate build dependencies
 * zlib from http://www.zlib.net for decompression
 
diff --git a/cmake/FindPCRE.cmake b/cmake/FindPCRE.cmake
deleted file mode 100644 (file)
index 0fec76c..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-# - Find pcre
-# Find the native PCRE includes and library
-#
-#  PCRE_INCLUDE_DIR - where to find pcre.h, etc.
-#  PCRE_LIBRARIES    - List of libraries when using pcre.
-#  PCRE_FOUND        - True if pcre found.
-
-set(ERROR_MESSAGE
-    "\n\tERROR!  Libpcre library not found.
-    \tGet it from http://www.pcre.org\n"
-)
-
-find_package(PkgConfig)
-pkg_check_modules(PC_PCRE libpcre)
-
-# Use PCRE_INCLUDE_DIR_HINT and PCRE_LIBRARIES_DIR_HINT from configure_cmake.sh as primary hints
-# and then package config information after that.
-find_path(PCRE_INCLUDE_DIR pcre.h
-    HINTS ${PCRE_INCLUDE_DIR_HINT} ${PC_PCRE_INCLUDEDIR} ${PC_PCRE_INCLUDE_DIRS})
-find_library(PCRE_LIBRARIES NAMES pcre
-    HINTS ${PCRE_LIBRARIES_DIR_HINT} ${PC_PCRE_LIBDIR} ${PC_PCRE_LIBRARY_DIRS})
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(PCRE
-    REQUIRED_VARS PCRE_INCLUDE_DIR PCRE_LIBRARIES
-    FAIL_MESSAGE "${ERROR_MESSAGE}"
-)
-
-mark_as_advanced(
-    PCRE_LIBRARIES
-    PCRE_INCLUDE_DIR
-)
diff --git a/cmake/FindPCRE2.cmake b/cmake/FindPCRE2.cmake
new file mode 100644 (file)
index 0000000..d8a3857
--- /dev/null
@@ -0,0 +1,32 @@
+# - Find pcre2
+# Find the native PCRE2 includes and library
+#
+#  PCRE2_INCLUDE_DIR - where to find pcre2.h, etc.
+#  PCRE2_LIBRARIES    - List of libraries when using pcre2.
+#  PCRE2_FOUND        - True if pcre2 found.
+
+set(ERROR_MESSAGE
+    "\n\tERROR!  Libpcre2 library not found.
+    \tGet it from http://www.pcre.org\n"
+)
+
+find_package(PkgConfig)
+pkg_check_modules(PC_PCRE2 libpcre2)
+
+# Use PCRE2_INCLUDE_DIR_HINT and PCRE2_LIBRARIES_DIR_HINT from configure_cmake.sh as primary hints
+# and then package config information after that.
+find_path(PCRE2_INCLUDE_DIR pcre2.h
+    HINTS ${PCRE2_INCLUDE_DIR_HINT} ${PC_PCRE2_INCLUDEDIR} ${PC_PCRE2_INCLUDE_DIRS})
+find_library(PCRE2_LIBRARIES NAMES pcre2-8
+    HINTS ${PCRE2_LIBRARIES_DIR_HINT} ${PC_PCRE2_LIBDIR} ${PC_PCRE2_LIBRARY_DIRS})
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(PCRE2
+    REQUIRED_VARS PCRE2_INCLUDE_DIR PCRE2_LIBRARIES
+    FAIL_MESSAGE "${ERROR_MESSAGE}"
+)
+
+mark_as_advanced(
+    PCRE2_LIBRARIES
+    PCRE2_INCLUDE_DIR
+)
index 244c6452dc84282ff78ccfab7df6675f40deb50f..a6b2562ec708498c938675f1a2fd0be50cb85b0a 100644 (file)
@@ -8,7 +8,7 @@ find_package(HWLOC REQUIRED)
 find_package(LuaJIT REQUIRED)
 find_package(OpenSSL 1.1.1 REQUIRED)
 find_package(PCAP REQUIRED)
-find_package(PCRE REQUIRED)
+find_package(PCRE2 REQUIRED)
 find_package(ZLIB REQUIRED)
 if (ENABLE_UNIT_TESTS)
     find_package(CppUTest REQUIRED)
index c2a3e6b0babd1df218769df8c03915b9f27c20e4..572a53aa659128d1e1f6910d011d1e6a56dfa314 100755 (executable)
@@ -91,10 +91,10 @@ Optional Packages:
                             luajit include directory
     --with-luajit-libraries=DIR
                             luajit library directory
-    --with-pcre-includes=DIR
-                            libpcre include directory
-    --with-pcre-libraries=DIR
-                            libpcre library directory
+    --with-pcre2-includes=DIR
+                            libpcre2 include directory
+    --with-pcre2-libraries=DIR
+                            libpcre2 library directory
     --with-dnet-includes=DIR
                             libdnet include directory
     --with-dnet-libraries=DIR
@@ -421,11 +421,11 @@ while [ $# -ne 0 ]; do
         --with-luajit-libraries=*)
             append_cache_entry LUAJIT_LIBRARIES_DIR_HINT PATH $optarg
             ;;
-        --with-pcre-includes=*)
-            append_cache_entry PCRE_INCLUDE_DIR_HINT PATH $optarg
+        --with-pcre2-includes=*)
+            append_cache_entry PCRE2_INCLUDE_DIR_HINT PATH $optarg
             ;;
-        --with-pcre-libraries=*)
-            append_cache_entry PCRE_LIBRARIES_DIR_HINT PATH $optarg
+        --with-pcre2-libraries=*)
+            append_cache_entry PCRE2_LIBRARIES_DIR_HINT PATH $optarg
             ;;
         --with-dnet-includes=*)
             append_cache_entry DNET_INCLUDE_DIR_HINT PATH $optarg
index 82997ded8dbc563d6b4b3da0992236ec0f3b556f..1e6c99b9471d2eafaf1248e4debe03bb8b27e0f4 100644 (file)
@@ -26,7 +26,7 @@ Required:
 
 * pcap from http://www.tcpdump.org for tcpdump style logging
 
-* pcre from http://www.pcre.org for regular expression pattern matching
+* pcre2 from http://www.pcre.org for regular expression pattern matching
 
 * pkgconfig from https://www.freedesktop.org/wiki/Software/pkg-config/ to locate
   build dependencies
index 10f02dcc75dff8c5721dd6641d050a8a6c790f20..a8b259ae3659b355810f48b8a0804d6e6eacec46 100644 (file)
@@ -9,7 +9,7 @@ mandir=@mandir@
 infodir=@infodir@
 
 cpp_opts=DAQ LUAJIT
-cpp_opts_other=DNET HWLOC HYPERSCAN LZMA OPENSSL PCAP PCRE UUID
+cpp_opts_other=DNET HWLOC HYPERSCAN LZMA OPENSSL PCAP PCRE2 UUID
 
 PCAP_CPPFLAGS=@PCAP_CPPFLAGS@
 LUAJIT_CPPFLAGS=@LUAJIT_CPPFLAGS@
@@ -18,7 +18,7 @@ DAQ_CPPFLAGS=@DAQ_CPPFLAGS@
 FLEX_CPPFLAGS=@FLEX_CPPFLAGS@
 OPENSSL_CPPFLAGS=@OPENSSL_CPPFLAGS@
 HWLOC_CPPFLAGS=@HWLOC_CPPFLAGS@
-PCRE_CPPFLAGS=@PCRE_CPPFLAGS@
+PCRE2_CPPFLAGS=@PCRE2_CPPFLAGS@
 LZMA_CPPFLAGS=@LZMA_CPPFLAGS@
 HYPERSCAN_CPPFLAGS=@HYPERSCAN_CPPFLAGS@
 UUID_CPPFLAGS=@UUID_CPPFLAGS@
index 546b1e03e2b2d2c9d889fd3be3b75ea0be0c68e3..263f9e40aed8ea043ae2916bffb193835b034336 100644 (file)
@@ -10,7 +10,7 @@ set(EXTERNAL_LIBRARIES
     ${LUAJIT_LIBRARIES}
     ${OPENSSL_CRYPTO_LIBRARY}
     ${PCAP_LIBRARIES}
-    ${PCRE_LIBRARIES}
+    ${PCRE2_LIBRARIES}
     ${ZLIB_LIBRARIES}
 )
 
@@ -21,7 +21,7 @@ set(EXTERNAL_INCLUDES
     ${HWLOC_INCLUDE_DIRS}
     ${OPENSSL_INCLUDE_DIR}
     ${PCAP_INCLUDE_DIR}
-    ${PCRE_INCLUDE_DIR}
+    ${PCRE2_INCLUDE_DIR}
     ${ZLIB_INCLUDE_DIRS}
 )
 
index cf0ed8ce7477c0960f96c15b1236300691251170..e1b2423614020e497386fd88615aeee3fcaf3bd0 100644 (file)
@@ -23,8 +23,6 @@
 #include "config.h"
 #endif
 
-#include <pcre.h>
-
 #include <cassert>
 
 #include "detection/ips_context.h"
 #include "framework/parameter.h"
 #include "framework/pig_pen.h"
 #include "hash/hash_key_operations.h"
-#include "helpers/scratch_allocator.h"
 #include "log/log_stats.h"
 #include "log/messages.h"
 #include "main/snort_config.h"
 #include "managers/ips_manager.h"
 #include "managers/module_manager.h"
 #include "profiler/profiler.h"
+#include "utils/snort_pcre.h"
 #include "utils/stats.h"
 #include "utils/util.h"
 
-using namespace snort;
-
-#ifndef PCRE_STUDY_JIT_COMPILE
-#define PCRE_STUDY_JIT_COMPILE 0
-#endif
-
-//#define NO_JIT // uncomment to disable JIT for Xcode
+// Unset to disable JIT for Xcode
+#define USE_JIT 1
 
-#ifdef NO_JIT
-#define PCRE_STUDY_FLAGS 0
-#define pcre_release(x) pcre_free(x)
-#else
-#define PCRE_STUDY_FLAGS PCRE_STUDY_JIT_COMPILE
-#define pcre_release(x) pcre_free_study(x)
-#endif
+using namespace snort;
 
 #define SNORT_PCRE_RELATIVE         0x00010 // relative to the end of the last match
 #define SNORT_PCRE_INVERT           0x00020 // invert detect
@@ -72,25 +59,13 @@ void show_pcre_counts();
 
 struct PcreData
 {
-    pcre* re;           /* compiled regex */
-    pcre_extra* pe;     /* studied regex foo */
-    bool free_pe;
-    int options;        /* sp_pcre specific options (relative & inverse) */
+    pcre2_code* re;                       /* compiled regex */
+    pcre2_match_context* match_context;   /* match_context */
+    pcre2_match_data* match_data;         /* match data space for storing results */
+    int options;                          /* sp_pcre specific options (relative & inverse) */
     char* expression;
 };
 
-// we need to specify the vector length for our pcre_exec call.  we only care
-// about the first vector, which if the match is successful will include the
-// offset to the end of the full pattern match.  if we decide to store other
-// matches, make *SURE* that this is a multiple of 3 as pcre requires it.
-
-// this is a temporary value used during parsing and set in snort conf
-// by verify; search uses the value in snort conf
-static int s_ovector_max = -1;
-
-static unsigned scratch_index;
-static ScratchAllocator* scratcher = nullptr;
-
 static THREAD_LOCAL ProfileStats pcrePerfStats;
 
 struct PcreCounts
@@ -143,27 +118,15 @@ THREAD_LOCAL PcreStats pcre_stats;
 // implementation foo
 //-------------------------------------------------------------------------
 
-static void pcre_capture(
-    const void* code, const void* extra)
-{
-    int tmp_ovector_size = 0;
-
-    pcre_fullinfo((const pcre*)code, (const pcre_extra*)extra,
-        PCRE_INFO_CAPTURECOUNT, &tmp_ovector_size);
-
-    if (tmp_ovector_size > s_ovector_max)
-        s_ovector_max = tmp_ovector_size;
-}
-
 static void pcre_check_anchored(PcreData* pcre_data)
 {
     int rc;
     unsigned long int options = 0;
 
-    if ((pcre_data == nullptr) || (pcre_data->re == nullptr) || (pcre_data->pe == nullptr))
+    if ((pcre_data == nullptr) || (pcre_data->re == nullptr))
         return;
 
-    rc = pcre_fullinfo(pcre_data->re, pcre_data->pe, PCRE_INFO_OPTIONS, (void*)&options);
+    rc = pcre2_pattern_info(pcre_data->re, PCRE2_INFO_ARGOPTIONS, (void*)&options);
     switch (rc)
     {
     /* pcre_fullinfo fails for the following:
@@ -178,24 +141,24 @@ static void pcre_check_anchored(PcreData* pcre_data)
         /* This is the success code */
         break;
 
-    case PCRE_ERROR_NULL:
-        ParseError("pcre_fullinfo: code and/or where were null.");
+    case PCRE2_ERROR_NULL:
+        ParseError("pcre2: code and/or where were null.");
         return;
 
-    case PCRE_ERROR_BADMAGIC:
-        ParseError("pcre_fullinfo: compiled code didn't have correct magic.");
+    case PCRE2_ERROR_BADMAGIC:
+        ParseError("pcre2: compiled code didn't have correct magic.");
         return;
 
-    case PCRE_ERROR_BADOPTION:
-        ParseError("pcre_fullinfo: option type is invalid.");
+    case PCRE2_ERROR_BADOPTION:
+        ParseError("pcre2: option type is invalid.");
         return;
 
     default:
-        ParseError("pcre_fullinfo: Unknown error code.");
+        ParseError("pcre2: Unknown error code.");
         return;
     }
 
-    if ((options & PCRE_ANCHORED) && !(options & PCRE_MULTILINE))
+    if ((options & PCRE2_ANCHORED) && !(options & PCRE2_MULTILINE))
     {
         /* This means that this pcre rule option shouldn't be EvalStatus
          * even if any of it's relative children should fail to match.
@@ -207,11 +170,12 @@ static void pcre_check_anchored(PcreData* pcre_data)
 
 static void pcre_parse(const SnortConfig* sc, const char* data, PcreData* pcre_data)
 {
-    const char* error;
+    PCRE2_UCHAR error[128];
     char* re, * free_me;
     char* opts;
     char delimit = '/';
-    int erroffset;
+    int errorcode;
+    PCRE2_SIZE erroffset;
     int compile_flags = 0;
 
     if (data == nullptr)
@@ -281,17 +245,17 @@ static void pcre_parse(const SnortConfig* sc, const char* data, PcreData* pcre_d
     {
         switch (*opts)
         {
-        case 'i':  compile_flags |= PCRE_CASELESS;            break;
-        case 's':  compile_flags |= PCRE_DOTALL;              break;
-        case 'm':  compile_flags |= PCRE_MULTILINE;           break;
-        case 'x':  compile_flags |= PCRE_EXTENDED;            break;
+        case 'i':  compile_flags |= PCRE2_CASELESS;            break;
+        case 's':  compile_flags |= PCRE2_DOTALL;              break;
+        case 'm':  compile_flags |= PCRE2_MULTILINE;           break;
+        case 'x':  compile_flags |= PCRE2_EXTENDED;            break;
 
         /*
          * these are pcre specific... don't work with perl
          */
-        case 'A':  compile_flags |= PCRE_ANCHORED;            break;
-        case 'E':  compile_flags |= PCRE_DOLLAR_ENDONLY;      break;
-        case 'G':  compile_flags |= PCRE_UNGREEDY;            break;
+        case 'A':  compile_flags |= PCRE2_ANCHORED;            break;
+        case 'E':  compile_flags |= PCRE2_DOLLAR_ENDONLY;      break;
+        case 'G':  compile_flags |= PCRE2_UNGREEDY;            break;
 
         /*
          * these are snort specific don't work with pcre or perl
@@ -310,72 +274,48 @@ static void pcre_parse(const SnortConfig* sc, const char* data, PcreData* pcre_d
     }
 
     /* now compile the re */
-    pcre_data->re = pcre_compile(re, compile_flags, &error, &erroffset, nullptr);
+    pcre_data->re = pcre2_compile((PCRE2_SPTR)re, PCRE2_ZERO_TERMINATED, compile_flags,
+        &errorcode, &erroffset, nullptr);
 
     if (pcre_data->re == nullptr)
     {
-        ParseError(": pcre compile of '%s' failed at offset "
-            "%d : %s", re, erroffset, error);
+        pcre2_get_error_message(errorcode, error, sizeof(error)/sizeof(char));
+        ParseError("pcre2 compile of '%s' failed at offset "
+            "%zu : %s", re, erroffset, error);
         return;
     }
 
-    /* now study it... */
-    pcre_data->pe = pcre_study(pcre_data->re, PCRE_STUDY_FLAGS, &error);
-
-    if (pcre_data->pe)
+    /* create match context */
+    pcre_data->match_context = pcre2_match_context_create(NULL);
+    if (pcre_data->match_context == NULL)
     {
-        if ((sc->get_pcre_match_limit() != 0) &&
-            !(pcre_data->options & SNORT_OVERRIDE_MATCH_LIMIT))
-        {
-            if ( !(pcre_data->pe->flags & PCRE_EXTRA_MATCH_LIMIT) )
-                pcre_data->pe->flags |= PCRE_EXTRA_MATCH_LIMIT;
-
-            pcre_data->pe->match_limit = sc->get_pcre_match_limit();
-        }
+        ParseError("failed to allocate memory for match context");
+        return;
+    }
 
-        if ((sc->get_pcre_match_limit_recursion() != 0) &&
-            !(pcre_data->options & SNORT_OVERRIDE_MATCH_LIMIT))
-        {
-            if ( !(pcre_data->pe->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) )
-                pcre_data->pe->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+    pcre_data->match_data = pcre2_match_data_create_from_pattern(pcre_data->re, nullptr);
 
-            pcre_data->pe->match_limit_recursion =
-                sc->get_pcre_match_limit_recursion();
-        }
-    }
-    else
+     /* now study it... */
+    if (USE_JIT)
     {
-        if (!(pcre_data->options & SNORT_OVERRIDE_MATCH_LIMIT) &&
-            ((sc->get_pcre_match_limit() != 0) ||
-             (sc->get_pcre_match_limit_recursion() != 0)))
+        // It is possible that we fail to study a re with JIT. In that case,
+        // we fallback to normal processing (as non-JIT)
+        errorcode = pcre2_jit_compile(pcre_data->re, PCRE2_JIT_COMPLETE);
+
+        if (errorcode)
         {
-            pcre_data->pe = (pcre_extra*)snort_calloc(sizeof(pcre_extra));
-            pcre_data->free_pe = true;
-
-            if (sc->get_pcre_match_limit() != 0)
-            {
-                pcre_data->pe->flags |= PCRE_EXTRA_MATCH_LIMIT;
-                pcre_data->pe->match_limit = sc->get_pcre_match_limit();
-            }
-
-            if (sc->get_pcre_match_limit_recursion() != 0)
-            {
-                pcre_data->pe->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
-                pcre_data->pe->match_limit_recursion =
-                    sc->get_pcre_match_limit_recursion();
-            }
+            pcre2_get_error_message(errorcode, error, sizeof(error)/sizeof(char));
+            ParseWarning(WARN_RULES, "pcre2 JIT compile of '%s' failed : %s\n", re, error);
         }
     }
 
-    if (error != nullptr)
-    {
-        ParseError("pcre study failed : %s", error);
-        return;
-    }
+    if ((sc->get_pcre_match_limit() != 0) && !(pcre_data->options & SNORT_OVERRIDE_MATCH_LIMIT))
+        pcre2_set_match_limit(pcre_data->match_context, sc->get_pcre_match_limit());
 
-    pcre_capture(pcre_data->re, pcre_data->pe);
-    pcre_check_anchored(pcre_data);
+    if ((sc->get_pcre_match_limit_recursion() != 0) && !(pcre_data->options & SNORT_OVERRIDE_MATCH_LIMIT))
+        pcre2_set_depth_limit(pcre_data->match_context, sc->get_pcre_match_limit_recursion());
 
+    pcre_check_anchored(pcre_data);
     snort_free(free_me);
     return;
 
@@ -394,29 +334,26 @@ syntax:
  * found_offset will be set to -1 when the find is unsuccessful OR the routine is inverted
  */
 static bool pcre_search(
-    Packet* p,
+    Packet*,
     const PcreData* pcre_data,
     const uint8_t* buf,
     unsigned len,
     unsigned start_offset,
     int& found_offset)
 {
+    PCRE2_SIZE* ovector;
     bool matched;
 
     found_offset = -1;
 
-    std::vector<void *> ss = p->context->conf->state[get_instance_id()];
-    assert(ss[scratch_index]);
-
-    int result = pcre_exec(
-        pcre_data->re,  /* result of pcre_compile() */
-        pcre_data->pe,  /* result of pcre_study()   */
-        (const char*)buf, /* the subject string */
-        len,            /* the length of the subject string */
-        start_offset,   /* start at offset 0 in the subject */
-        0,              /* options(handled at compile time */
-        (int*)ss[scratch_index], /* vector for substring information */
-        p->context->conf->pcre_ovector_size); /* number of elements in the vector */
+    int result = pcre2_match(
+        pcre_data->re,              /* result of pcre_compile() */
+        (PCRE2_SPTR)buf,            /* the subject string */
+        (PCRE2_SIZE)len,            /* the length of the subject string */
+        (PCRE2_SIZE)start_offset,   /* start at offset 0 in the subject */
+        0,                          /* options (handled at compile time) */
+        pcre_data->match_data,      /* match data to store the match results */
+        pcre_data->match_context);  /* match context for limits */
 
     if (result >= 0)
     {
@@ -432,7 +369,7 @@ static bool pcre_search(
          * ovector[1], identify the portion of the subject string matched
          * by the entire pattern.  The next pair is used for the first
          * capturing subpattern, and so on. The value returned by
-         * pcre_exec() is the number of pairs that have been set. If there
+         * pcre_search() is the number of pairs that have been set. If there
          * are no capturing subpatterns, the return value from a successful
          * match is 1, indicating that just the first pair of offsets has
          * been set.
@@ -441,18 +378,19 @@ static bool pcre_search(
          * and a single int for scratch space.
          */
 
-        found_offset = ((int*)ss[scratch_index])[1];
+        ovector = pcre2_get_ovector_pointer(pcre_data->match_data);
+        found_offset = ovector[1];
     }
-    else if (result == PCRE_ERROR_NOMATCH)
+    else if (result == PCRE2_ERROR_NOMATCH)
     {
         matched = false;
     }
-    else if (result == PCRE_ERROR_MATCHLIMIT)
+    else if (result == PCRE2_ERROR_MATCHLIMIT)
     {
         pcre_stats.pcre_match_limit++;
         matched = false;
     }
-    else if (result == PCRE_ERROR_RECURSIONLIMIT)
+    else if (result == PCRE2_ERROR_RECURSIONLIMIT)
     {
         pcre_stats.pcre_recursion_limit++;
         matched = false;
@@ -515,16 +453,9 @@ PcreOption::~PcreOption()
     if ( config->expression )
         snort_free(config->expression);
 
-    if ( config->pe )
-    {
-        if ( config->free_pe )
-            snort_free(config->pe);
-        else
-            pcre_release(config->pe);
-    }
-
-    if ( config->re )
-        free(config->re);  // external allocation
+    pcre2_match_context_free(config->match_context);
+    pcre2_code_free(config->re);
+    pcre2_match_data_free(config->match_data);
 
     snort_free(config);
 }
@@ -674,17 +605,10 @@ class PcreModule : public Module
 {
 public:
     PcreModule() : Module(s_name, s_help, s_params)
-    {
-        data = nullptr;
-        scratcher = new SimpleScratchAllocator(scratch_setup, scratch_cleanup);
-        scratch_index = scratcher->get_id();
-    }
+    { data = nullptr; }
 
     ~PcreModule() override
-    {
-        delete data;
-        delete scratcher;
-    }
+    { delete data; }
 
 #ifdef HAVE_HYPERSCAN
     bool begin(const char*, int, SnortConfig*) override;
@@ -710,9 +634,6 @@ private:
     PcreData* data;
     Module* mod_regex = nullptr;
     std::string re;
-
-    static bool scratch_setup(SnortConfig*);
-    static void scratch_cleanup(SnortConfig*);
 };
 
 PcreData* PcreModule::get_data()
@@ -739,6 +660,7 @@ bool PcreModule::begin(const char* name, int v, SnortConfig* sc)
         if( mod_regex )
             mod_regex = mod_regex->begin(name, v, sc) ? mod_regex : nullptr;
     }
+
     return true;
 }
 #endif
@@ -768,38 +690,6 @@ bool PcreModule::end(const char* name, int v, SnortConfig* sc)
     return true;
 }
 
-bool PcreModule::scratch_setup(SnortConfig* sc)
-{
-    if ( s_ovector_max < 0 )
-        return false;
-
-    // The pcre_fullinfo() function can be used to find out how many
-    // capturing subpatterns there are in a compiled pattern. The
-    // smallest size for ovector that will allow for n captured
-    // substrings, in addition to the offsets of the substring matched
-    // by the whole pattern is 3(n+1).
-
-    sc->pcre_ovector_size = 3 * (s_ovector_max + 1);
-    s_ovector_max = -1;
-
-    for ( unsigned i = 0; i < sc->num_slots; ++i )
-    {
-        std::vector<void *>& ss = sc->state[i];
-        ss[scratch_index] = snort_calloc(sc->pcre_ovector_size, sizeof(int));
-    }
-    return true;
-}
-
-void PcreModule::scratch_cleanup(SnortConfig* sc)
-{
-    for ( unsigned i = 0; i < sc->num_slots; ++i )
-    {
-        std::vector<void *>& ss = sc->state[i];
-        snort_free(ss[scratch_index]);
-        ss[scratch_index] = nullptr;
-    }
-}
-
 //-------------------------------------------------------------------------
 // api methods
 //-------------------------------------------------------------------------
index 8d4e74bfd3d09bdc793ec806e05c4eac158ade75..b6362c3fd3a184b89c3be2d920148ad119c6464b 100644 (file)
@@ -27,7 +27,6 @@
 #include <luajit.h>
 #include <openssl/crypto.h>
 #include <pcap.h>
-#include <pcre.h>
 #include <pwd.h>
 #include <sys/file.h>
 #include <sys/resource.h>
@@ -72,6 +71,7 @@ extern "C" {
 #include "protocols/packet.h"   // For NUM_IP_PROTOS
 #include "utils/cpp_macros.h"
 #include "utils/stats.h"
+#include "utils/snort_pcre.h"
 #include "utils/util.h"
 
 #include "main.h"
@@ -679,6 +679,12 @@ int DisplayBanner()
     while ( *ljv && !isdigit(*ljv) )
         ++ljv;
 
+    char pcre2_version[64] { };
+    int pcre2_version_size = pcre2_config(PCRE2_CONFIG_VERSION, pcre2_version);
+
+    if (pcre2_version_size <= 0 or static_cast<size_t>(pcre2_version_size) > sizeof(pcre2_version))
+        abort();
+
     LogMessage("\n");
     LogMessage("   ,,_     -*> Snort++ <*-\n");
 #ifdef BUILD
@@ -707,7 +713,7 @@ int DisplayBanner()
     LogMessage("           Using LZMA version %s\n", lzma_version_string());
 #endif
     LogMessage("           Using %s\n", OpenSSL_version(SSLEAY_VERSION));
-    LogMessage("           Using PCRE version %s\n", pcre_version());
+    LogMessage("           Using PCRE2 version %s\n", (char*) pcre2_version);
     LogMessage("           Using ZLIB version %s\n", zlib_version);
 
     LogMessage("\n");
index 1f1c8527646652ac17af14708da5d751e5e48c31..bb7c69060aab86b0b758e845d7e91614d112458c 100644 (file)
@@ -29,7 +29,6 @@
 #include <fstream>
 #include <openssl/crypto.h>
 #include <pcap.h>
-#include <pcre.h>
 #include <stdexcept>
 #include <vector>
 #include <zlib.h>
@@ -58,6 +57,7 @@ extern "C" {
 #include "managers/module_manager.h"
 #include "parser/parse_conf.h"
 #include "parser/parser.h"
+#include "utils/snort_pcre.h"
 #include "utils/stats.h"
 
 #include "lua_bootstrap.h"
@@ -140,6 +140,12 @@ static void install_dependencies_strings(Shell* sh, lua_State* L)
 {
     assert(dep_versions[0]);
 
+    char pcre2_version[64] { };
+    int pcre2_version_size = pcre2_config(PCRE2_CONFIG_VERSION, pcre2_version);
+
+    if (pcre2_version_size <= 0 or static_cast<size_t>(pcre2_version_size) > sizeof(pcre2_version))
+        abort();
+
     std::vector<const char*> vs;
     const char* ljv = LUAJIT_VERSION;
     const char* osv = OpenSSL_version(SSLEAY_VERSION);
@@ -156,7 +162,7 @@ static void install_dependencies_strings(Shell* sh, lua_State* L)
     vs.push_back(ljv);
     vs.push_back(osv);
     vs.push_back(lpv);
-    vs.push_back(pcre_version());
+    vs.push_back(pcre2_version);
     vs.push_back(zlib_version);
 #ifdef HAVE_HYPERSCAN
     vs.push_back(hs_version());
index c621d1a112673935f1576b3d133ba8bc36629478..4fe3fe95a52a9f02a524efa859df0fb168f319e5 100644 (file)
@@ -25,7 +25,6 @@
 
 #include "lua_detector_api.h"
 #include <lua.hpp>
-#include <pcre.h>
 #include <unordered_map>
 
 #include "detection/fp_config.h"
@@ -38,6 +37,7 @@
 #include "profiler/profiler.h"
 #include "protocols/packet.h"
 #include "trace/trace_api.h"
+#include "utils/snort_pcre.h"
 
 #include "app_info_table.h"
 #include "appid_debug.h"
@@ -727,35 +727,43 @@ static int detector_get_pcre_groups(lua_State* L)
     // Verify detector user data and that we are in packet context
     LuaStateDescriptor* lsd = ud->validate_lua_state(true);
 
-    int ovector[OVECCOUNT];
-    const char* error;
-    int erroffset;
+    pcre2_match_data* match_data;
+    PCRE2_UCHAR error[128];
+    PCRE2_SIZE erroffset;
+    int errorcode;
 
     const char* pattern = lua_tostring(L, 2);
     unsigned int offset = lua_tonumber(L, 3);     /*offset can be zero, no check necessary. */
 
     /*compile the regular expression pattern, and handle errors */
-    pcre* re = pcre_compile(pattern,  // the pattern
-        PCRE_DOTALL,                  // default options - dot matches all inc \n
-        &error,                       // for error message
-        &erroffset,                   // for error offset
-        nullptr);                     // use default character tables
+    pcre2_code* re = pcre2_compile((PCRE2_SPTR)pattern,  // the pattern
+        PCRE2_ZERO_TERMINATED,         // assume zero terminated strings
+        PCRE2_DOTALL,                  // default options - dot matches all inc \n
+        &errorcode,                    // for error message
+        &erroffset,                    // for error offset
+        nullptr);                      // default character tables
 
     if (re == nullptr)
     {
+        pcre2_get_error_message(errorcode, error, 128);
         appid_log(lsd->ldp.pkt, TRACE_ERROR_LEVEL, "PCRE compilation failed at offset %d: %s\n", erroffset, error);
         return 0;
     }
 
-    /*pattern match against the subject string. */
-    int rc = pcre_exec(re,            // compiled pattern
-        nullptr,                      // no extra data
-        (const char*)lsd->ldp.data,   // subject string
-        lsd->ldp.size,                // length of the subject
-        offset,                       // offset 0
-        0,                            // default options
-        ovector,                      // output vector for substring information
-        OVECCOUNT);                   // number of elements in the output vector
+    match_data = pcre2_match_data_create(OVECCOUNT, NULL);
+    if (!match_data)
+    {
+        appid_log(lsd->ldp.pkt, TRACE_ERROR_LEVEL, "PCRE failed to allocate mem for match_data\n");
+        return 0;
+    }
+
+    int rc = pcre2_match(re,         // compiled pattern
+        (PCRE2_SPTR)lsd->ldp.data,   // subject string
+        (PCRE2_SIZE)lsd->ldp.size,   // length of the subject
+        (PCRE2_SIZE)offset,          // offset 0
+        0,                           // default options
+        match_data,                  // match data for match results
+        NULL);                       // no match context
 
     if (rc >= 0)
     {
@@ -773,6 +781,7 @@ static int detector_get_pcre_groups(lua_State* L)
             return 0;
         }
 
+        PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(match_data);
         for (int i = 0; i < rc; i++)
         {
             lua_pushlstring(L, (const char*)lsd->ldp.data + ovector[2*i], ovector[2*i+1] -
@@ -782,12 +791,13 @@ static int detector_get_pcre_groups(lua_State* L)
     else
     {
         // log errors except no matches
-        if (rc != PCRE_ERROR_NOMATCH)
+        if (rc != PCRE2_ERROR_NOMATCH)
             appid_log(lsd->ldp.pkt, TRACE_WARNING_LEVEL, "PCRE regular expression group match failed. rc: %d\n", rc);
         rc = 0;
     }
 
-    pcre_free(re);
+    pcre2_match_data_free(match_data);
+    pcre2_code_free(re);
     return rc;
 }
 
index 26cda0a55c7f9263152a425fa39ee0233038fd01..1161dc5288a9d6f533ee0ba424b7c745ec8cc43e 100644 (file)
@@ -18,6 +18,7 @@ add_library ( utils OBJECT
     sflsq.cc
     sflsq.h
     snort_bounds.h
+    snort_pcre.h
     stats.cc
     stats.h
     util.cc
diff --git a/src/utils/snort_pcre.h b/src/utils/snort_pcre.h
new file mode 100644 (file)
index 0000000..fcbf897
--- /dev/null
@@ -0,0 +1,31 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2025-2025 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// snort_pcre.h author Michael Matirko <mmatirko@cisco.com>
+
+#ifndef SNORT_PCRE_H
+#define SNORT_PCRE_H
+
+// pcre2 code unit width must be set prior to including the pcre2
+// headers. Setting it here allows us to not require a redefinition
+// of PCRE2_CODE_UNIT_WIDTH everywhere we include pcre2.h
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+
+#endif
+