]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Pull request #5030: snort_ml: scan multipart form data
authorBrandon Stultz (brastult) <brastult@cisco.com>
Tue, 9 Dec 2025 14:31:51 +0000 (14:31 +0000)
committerOleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) <oshumeik@cisco.com>
Tue, 9 Dec 2025 14:31:51 +0000 (14:31 +0000)
Merge in SNORT/snort3 from ~BRASTULT/snort3:snort_ml_multipart to master

Squashed commit of the following:

commit 324097ebac2877412a01f66816006c6033613ad3
Author: Brandon Stultz <brastult@cisco.com>
Date:   Tue Dec 2 18:18:21 2025 -0500

    snort_ml: enable client body scanning by default

commit 3a36905a39c842aca112757b3927b135395429ff
Author: Brandon Stultz <brastult@cisco.com>
Date:   Tue Dec 2 18:17:57 2025 -0500

    snort_ml: scan multipart form data

commit 065b77b473a6536bb002c71d362d8b7beb78e815
Author: Brandon Stultz <brastult@cisco.com>
Date:   Tue Dec 2 18:05:27 2025 -0500

    pub_sub: add is_urlencoded method

commit 6ef1aee143d6040d3e690f197cc71e629e9b40c9
Author: Brandon Stultz <brastult@cisco.com>
Date:   Tue Dec 2 17:57:54 2025 -0500

    http_inspect: add urlencoded to content-type list

src/network_inspectors/snort_ml/snort_ml_inspector.cc
src/network_inspectors/snort_ml/snort_ml_module.cc
src/pub_sub/http_request_body_event.cc
src/pub_sub/http_request_body_event.h
src/pub_sub/test/pub_sub_http_request_body_event_test.cc
src/service_inspectors/http_inspect/http_enum.h
src/service_inspectors/http_inspect/http_tables.cc

index 64a9f62a700812dd7988cf389fdd35dd501b8f3c..39de5fc63b3bb5f61611fdea8add90aadd0ee730 100644 (file)
@@ -29,6 +29,7 @@
 #include "log/messages.h"
 #include "managers/inspector_manager.h"
 #include "pub_sub/http_events.h"
+#include "pub_sub/http_form_data_event.h"
 #include "pub_sub/http_request_body_event.h"
 #include "utils/util.h"
 
@@ -40,6 +41,60 @@ using namespace std;
 THREAD_LOCAL SnortMLStats snort_ml_stats;
 THREAD_LOCAL ProfileStats snort_ml_prof;
 
+//--------------------------------------------------------------------------
+// HTTP uri event handler
+//--------------------------------------------------------------------------
+
+class HttpUriHandler : public DataHandler
+{
+public:
+    HttpUriHandler(const SnortMLEngine& eng, const SnortML& ins)
+        : DataHandler(SNORT_ML_NAME), engine(eng), inspector(ins) {}
+
+    void handle(DataEvent&, Flow*) override;
+
+private:
+    const SnortMLEngine& engine;
+    const SnortML& inspector;
+};
+
+void HttpUriHandler::handle(DataEvent& de, Flow*)
+{
+    // cppcheck-suppress unreadVariable
+    Profile profile(snort_ml_prof);
+
+    HttpEvent* he = reinterpret_cast<HttpEvent*>(&de);
+
+    int32_t query_len = 0;
+    const char* query = (const char*)he->get_uri_query(query_len);
+
+    if (!query || query_len <= 0)
+        return;
+
+    const SnortMLConfig& conf = inspector.get_config();
+
+    const size_t len = std::min((size_t)conf.uri_depth, (size_t)query_len);
+
+    float output = 0;
+    if (!engine.scan(query, len, output))
+        return;
+
+    snort_ml_stats.uri_bytes += len;
+
+    debug_logf(snort_ml_trace, TRACE_CLASSIFIER, nullptr,
+        "input (query): %.*s\n", (int)len, query);
+
+    debug_logf(snort_ml_trace, TRACE_CLASSIFIER, nullptr,
+        "output: %f\n", static_cast<double>(output));
+
+    if ((double)output > conf.http_param_threshold)
+    {
+        snort_ml_stats.uri_alerts++;
+        debug_logf(snort_ml_trace, TRACE_CLASSIFIER, nullptr, "<ALERT>\n");
+        DetectionEngine::queue_event(SNORT_ML_GID, SNORT_ML_SID);
+    }
+}
+
 //--------------------------------------------------------------------------
 // HTTP body event handler
 //--------------------------------------------------------------------------
@@ -50,7 +105,7 @@ public:
     HttpBodyHandler(const SnortMLEngine& eng, const SnortML& ins)
         : DataHandler(SNORT_ML_NAME), engine(eng), inspector(ins) {}
 
-    void handle(DataEvent& de, Flow*) override;
+    void handle(DataEvent&, Flow*) override;
 
 private:
     const SnortMLEngine& engine;
@@ -64,7 +119,7 @@ void HttpBodyHandler::handle(DataEvent& de, Flow*)
 
     HttpRequestBodyEvent* he = reinterpret_cast<HttpRequestBodyEvent*>(&de);
 
-    if (he->is_mime())
+    if (!he->is_urlencoded())
         return;
 
     int32_t body_len = 0;
@@ -98,13 +153,13 @@ void HttpBodyHandler::handle(DataEvent& de, Flow*)
 }
 
 //--------------------------------------------------------------------------
-// HTTP uri event handler
+// HTTP form event handler
 //--------------------------------------------------------------------------
 
-class HttpUriHandler : public DataHandler
+class HttpFormHandler : public DataHandler
 {
 public:
-    HttpUriHandler(const SnortMLEngine& eng, const SnortML& ins)
+    HttpFormHandler(const SnortMLEngine& eng, const SnortML& ins)
         : DataHandler(SNORT_ML_NAME), engine(eng), inspector(ins) {}
 
     void handle(DataEvent&, Flow*) override;
@@ -114,38 +169,37 @@ private:
     const SnortML& inspector;
 };
 
-void HttpUriHandler::handle(DataEvent& de, Flow*)
+void HttpFormHandler::handle(DataEvent& de, Flow*)
 {
     // cppcheck-suppress unreadVariable
     Profile profile(snort_ml_prof);
 
-    HttpEvent* he = reinterpret_cast<HttpEvent*>(&de);
+    HttpFormDataEvent* he = reinterpret_cast<HttpFormDataEvent*>(&de);
 
-    int32_t query_len = 0;
-    const char* query = (const char*)he->get_uri_query(query_len);
+    const std::string& data = he->get_form_data_uri();
 
-    if (!query || query_len <= 0)
+    if (data.empty())
         return;
 
     const SnortMLConfig& conf = inspector.get_config();
 
-    const size_t len = std::min((size_t)conf.uri_depth, (size_t)query_len);
+    const size_t len = std::min((size_t)conf.client_body_depth, data.length());
 
     float output = 0;
-    if (!engine.scan(query, len, output))
+    if (!engine.scan(data.c_str(), len, output))
         return;
 
-    snort_ml_stats.uri_bytes += len;
+    snort_ml_stats.client_body_bytes += len;
 
     debug_logf(snort_ml_trace, TRACE_CLASSIFIER, nullptr,
-        "input (query): %.*s\n", (int)len, query);
+        "input (form): %.*s\n", (int)len, data.c_str());
 
     debug_logf(snort_ml_trace, TRACE_CLASSIFIER, nullptr,
         "output: %f\n", static_cast<double>(output));
 
     if ((double)output > conf.http_param_threshold)
     {
-        snort_ml_stats.uri_alerts++;
+        snort_ml_stats.client_body_alerts++;
         debug_logf(snort_ml_trace, TRACE_CLASSIFIER, nullptr, "<ALERT>\n");
         DetectionEngine::queue_event(SNORT_ML_GID, SNORT_ML_SID);
     }
@@ -185,6 +239,9 @@ bool SnortML::configure(SnortConfig* sc)
     {
         DataBus::subscribe(http_pub_key, HttpEventIds::REQUEST_BODY,
             new HttpBodyHandler(*engine, *this));
+
+        DataBus::subscribe(http_pub_key, HttpEventIds::MIME_FORM_DATA,
+            new HttpFormHandler(*engine, *this));
     }
 
     return true;
index 7280bf6973efeef196516ea5fd6fc567aaa5292f..654563380b22cf254bfbec65e4fa0799a6000241 100644 (file)
@@ -35,7 +35,7 @@ static const Parameter snort_ml_params[] =
     { "uri_depth", Parameter::PT_INT, "-1:max31", "-1",
       "number of input HTTP URI bytes to scan (-1 unlimited)" },
 
-    { "client_body_depth", Parameter::PT_INT, "-1:max31", "0",
+    { "client_body_depth", Parameter::PT_INT, "-1:max31", "-1",
       "number of input HTTP client body bytes to scan (-1 unlimited)" },
 
     { "http_param_threshold", Parameter::PT_REAL, "0:1", "0.95",
index dab3851dbcb40d5db1511bca95ca1e15590a169b..217299d18b2bbeaffc4a0a9933da79c3876cf8ea 100644 (file)
@@ -82,6 +82,19 @@ bool HttpRequestBodyEvent::is_mime() const
     return false;
 }
 
+bool HttpRequestBodyEvent::is_urlencoded() const
+{
+    if (http_msg_body)
+    {
+        HttpMsgHeader* header = http_msg_body->get_header(HttpCommon::SRC_CLIENT);
+
+        if (header)
+            return header->get_content_type() == HttpEnums::CT_APPLICATION_X_WWW_FORM_URLENCODED;
+    }
+
+    return false;
+}
+
 int64_t HttpRequestBodyEvent::get_httpx_stream_id() const
 {
     return http_flow_data->get_hx_stream_id();
index 0edeefe4bef819e6e4f040fe3467dccd100dfa4b..67fb0eeb2e0ec54c952f074bcc444e3741f8ec3e 100644 (file)
@@ -29,7 +29,7 @@ class HttpFlowData;
 
 namespace snort
 {
-// This event is published each time new request body data is received by http_inspect for HTTP/2
+// This event is published each time new request body data is received by http_inspect for HTTP
 // traffic, up to the publish depth. The full request body may be sent in several pieces
 class SO_PUBLIC HttpRequestBodyEvent : public snort::DataEvent
 {
@@ -44,6 +44,7 @@ public:
     const uint8_t* get_client_body(int32_t& length);
     bool is_last_request_body_piece();
     bool is_mime() const;
+    bool is_urlencoded() const;
     int64_t get_httpx_stream_id() const;
 
 private:
index 96eb4507d3a690ec0baef451b622648ced04a5ef..302aeca784d2835f42ec0bf26761f760ad548ee6 100644 (file)
@@ -30,6 +30,7 @@
 #include "service_inspectors/http_inspect/http_enum.h"
 #include "service_inspectors/http_inspect/http_field.h"
 #include "service_inspectors/http_inspect/http_msg_body_cl.h"
+#include "service_inspectors/http_inspect/http_msg_head_shared.h"
 
 #include <CppUTest/CommandLineTestRunner.h>
 #include <CppUTest/TestHarness.h>
@@ -64,6 +65,7 @@ void HttpMsgSection::clear() {}
 #ifdef REG_TEST
 void HttpMsgBody::print_body_section(FILE*, const char*) {}
 #endif
+int32_t HttpMsgHeadShared::get_content_type() { return content_type; }
 
 HttpMsgSection::HttpMsgSection(const uint8_t* buffer, const uint16_t buf_size,
     HttpFlowData* session_data_, HttpCommon::SourceId source_id_, bool buf_owner,
index df4107d25bae432e726d20d6ffa19bf22bbd7d0c..25a978af9d4cc1c2cfe36ba01a1270ca7cf5871b 100755 (executable)
@@ -134,7 +134,7 @@ enum Contentcoding { CONTENTCODE__OTHER=1, CONTENTCODE_GZIP, CONTENTCODE_DEFLATE
 // Content media-types (MIME types)
 enum ContentType { CT__OTHER=1, CT_APPLICATION_DNS, CT_APPLICATION_PDF, CT_APPLICATION_OCTET_STREAM,
     CT_APPLICATION_JAVASCRIPT, CT_APPLICATION_ECMASCRIPT, CT_APPLICATION_X_JAVASCRIPT,
-    CT_APPLICATION_X_ECMASCRIPT, CT_APPLICATION_XHTML_XML,
+    CT_APPLICATION_X_ECMASCRIPT, CT_APPLICATION_XHTML_XML, CT_APPLICATION_X_WWW_FORM_URLENCODED,
     CT_MULTIPART_FORM_DATA,
     CT_TEXT_JAVASCRIPT, CT_TEXT_JAVASCRIPT_1_0, CT_TEXT_JAVASCRIPT_1_1, CT_TEXT_JAVASCRIPT_1_2, CT_TEXT_JAVASCRIPT_1_3,
     CT_TEXT_JAVASCRIPT_1_4, CT_TEXT_JAVASCRIPT_1_5, CT_TEXT_ECMASCRIPT, CT_TEXT_X_JAVASCRIPT,
index e21875078dc3239d29c37a910ab09b24520e461c..fa6a518520fdb1473208d1cd66eb6d514a14c8fe 100755 (executable)
@@ -174,29 +174,30 @@ const StrCode HttpMsgHeadShared::content_code_list[] =
 
 const StrCode HttpMsgHeadShared::content_type_list[] =
 {
-    { CT_APPLICATION_DNS,          "application/dns-message" },
-    { CT_APPLICATION_PDF,          "application/pdf" },
-    { CT_APPLICATION_OCTET_STREAM, "application/octet-stream" },
-    { CT_APPLICATION_JAVASCRIPT,   "application/javascript" },
-    { CT_APPLICATION_ECMASCRIPT,   "application/ecmascript" },
-    { CT_APPLICATION_X_JAVASCRIPT, "application/x-javascript" },
-    { CT_APPLICATION_X_ECMASCRIPT, "application/x-ecmascript" },
-    { CT_APPLICATION_XHTML_XML,    "application/xhtml+xml" },
-    { CT_MULTIPART_FORM_DATA,      "multipart/form-data" },
-    { CT_TEXT_JAVASCRIPT,          "text/javascript" },
-    { CT_TEXT_JAVASCRIPT_1_0,      "text/javascript1.0" },
-    { CT_TEXT_JAVASCRIPT_1_1,      "text/javascript1.1" },
-    { CT_TEXT_JAVASCRIPT_1_2,      "text/javascript1.2" },
-    { CT_TEXT_JAVASCRIPT_1_3,      "text/javascript1.3" },
-    { CT_TEXT_JAVASCRIPT_1_4,      "text/javascript1.4" },
-    { CT_TEXT_JAVASCRIPT_1_5,      "text/javascript1.5" },
-    { CT_TEXT_ECMASCRIPT,          "text/ecmascript" },
-    { CT_TEXT_X_JAVASCRIPT,        "text/x-javascript" },
-    { CT_TEXT_X_ECMASCRIPT,        "text/x-ecmascript" },
-    { CT_TEXT_JSCRIPT,             "text/jscript" },
-    { CT_TEXT_LIVESCRIPT,          "text/livescript" },
-    { CT_TEXT_HTML,                "text/html" },
-    { 0,                           nullptr }
+    { CT_APPLICATION_DNS,                   "application/dns-message" },
+    { CT_APPLICATION_PDF,                   "application/pdf" },
+    { CT_APPLICATION_OCTET_STREAM,          "application/octet-stream" },
+    { CT_APPLICATION_JAVASCRIPT,            "application/javascript" },
+    { CT_APPLICATION_ECMASCRIPT,            "application/ecmascript" },
+    { CT_APPLICATION_X_JAVASCRIPT,          "application/x-javascript" },
+    { CT_APPLICATION_X_ECMASCRIPT,          "application/x-ecmascript" },
+    { CT_APPLICATION_XHTML_XML,             "application/xhtml+xml" },
+    { CT_APPLICATION_X_WWW_FORM_URLENCODED, "application/x-www-form-urlencoded" },
+    { CT_MULTIPART_FORM_DATA,               "multipart/form-data" },
+    { CT_TEXT_JAVASCRIPT,                   "text/javascript" },
+    { CT_TEXT_JAVASCRIPT_1_0,               "text/javascript1.0" },
+    { CT_TEXT_JAVASCRIPT_1_1,               "text/javascript1.1" },
+    { CT_TEXT_JAVASCRIPT_1_2,               "text/javascript1.2" },
+    { CT_TEXT_JAVASCRIPT_1_3,               "text/javascript1.3" },
+    { CT_TEXT_JAVASCRIPT_1_4,               "text/javascript1.4" },
+    { CT_TEXT_JAVASCRIPT_1_5,               "text/javascript1.5" },
+    { CT_TEXT_ECMASCRIPT,                   "text/ecmascript" },
+    { CT_TEXT_X_JAVASCRIPT,                 "text/x-javascript" },
+    { CT_TEXT_X_ECMASCRIPT,                 "text/x-ecmascript" },
+    { CT_TEXT_JSCRIPT,                      "text/jscript" },
+    { CT_TEXT_LIVESCRIPT,                   "text/livescript" },
+    { CT_TEXT_HTML,                         "text/html" },
+    { 0,                                    nullptr }
 };
 
 const StrCode HttpMsgHeadShared::charset_code_list[] =