]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Pull request #4944: mime: implement parsing for MIME multipart/form_data content
authorAnna Norokh -X (anorokh - SOFTSERVE INC at Cisco) <anorokh@cisco.com>
Mon, 1 Dec 2025 11:24:16 +0000 (11:24 +0000)
committerOleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) <oshumeik@cisco.com>
Mon, 1 Dec 2025 11:24:16 +0000 (11:24 +0000)
Merge in SNORT/snort3 from ~ANOROKH/snort3:mime_form_data to master

Squashed commit of the following:

commit 86cbfb84db9b2930b42fc3bb7aea147d1c6e7aea
Author: anorokh <anorokh@cisco.com>
Date:   Wed Nov 12 12:08:36 2025 +0200

    mime: implement content parsing of multipart/form_data

        - reworked MIME header (Content-*) parsing
        - http_inspect: added new form_data value in MsgBody
        - pub_sub: added new event HttpFormDataEvent

19 files changed:
src/mime/CMakeLists.txt
src/mime/file_mime_form_data.cc [new file with mode: 0644]
src/mime/file_mime_form_data.h [new file with mode: 0644]
src/mime/file_mime_process.cc
src/mime/file_mime_process.h
src/pub_sub/CMakeLists.txt
src/pub_sub/http_event_ids.h
src/pub_sub/http_form_data_event.cc [new file with mode: 0644]
src/pub_sub/http_form_data_event.h [new file with mode: 0644]
src/pub_sub/test/CMakeLists.txt
src/pub_sub/test/pub_sub_http_form_data_event_test.cc [new file with mode: 0644]
src/pub_sub/test/pub_sub_http_transaction_end_event_test.cc
src/service_inspectors/http_inspect/http_enum.h
src/service_inspectors/http_inspect/http_field.h
src/service_inspectors/http_inspect/http_msg_body.cc
src/service_inspectors/http_inspect/http_msg_body.h
src/service_inspectors/http_inspect/http_tables.cc
src/service_inspectors/http_inspect/http_transaction.h
src/service_inspectors/http_inspect/test/http_transaction_test.cc

index 0ce5353f1dd145772d8c6ee9aa6015ad72b69bf1..75c5a7e0b89ebe61504a9351b5c7064231b15a2a 100644 (file)
@@ -4,6 +4,7 @@ set( MIME_INCLUDES
     decode_base.h
     file_mime_config.h
     file_mime_decode.h
+    file_mime_form_data.h
     file_mime_log.h
     file_mime_paf.h
     file_mime_process.h
@@ -25,6 +26,7 @@ add_library ( mime OBJECT
     file_mime_context_data.cc
     file_mime_context_data.h
     file_mime_decode.cc
+    file_mime_form_data.cc
     file_mime_log.cc
     file_mime_paf.cc
     file_mime_process.cc
diff --git a/src/mime/file_mime_form_data.cc b/src/mime/file_mime_form_data.cc
new file mode 100644 (file)
index 0000000..b9b4750
--- /dev/null
@@ -0,0 +1,48 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2025-2025 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// file_mime_form_data.cc author Anna Norokh <anorokh@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "file_mime_form_data.h"
+
+using namespace snort;
+
+void MimeFormDataCollector::finalize_field(const std::string& filename)
+{
+    if (!is_form_data or current_field_name.empty() or is_size_exceeded)
+        return;
+
+    const std::string& value_to_use = (is_file_upload and !filename.empty())
+        ? filename : current_field_value;
+
+    const size_t field_total_len = current_field_name.length() + 1 + value_to_use.length() +
+        (form_fields.empty() ? 0 : 1);
+
+    if (accumulated_size + field_total_len > MAX_FORM_DATA_SIZE)
+    {
+        is_size_exceeded = true;
+        return;
+    }
+
+    form_fields.emplace_back(current_field_name, value_to_use);
+    accumulated_size += field_total_len;
+}
+
diff --git a/src/mime/file_mime_form_data.h b/src/mime/file_mime_form_data.h
new file mode 100644 (file)
index 0000000..f0abc54
--- /dev/null
@@ -0,0 +1,90 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2025-2025 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// file_mime_form_data.h author Anna Norokh <anorokh@cisco.com>
+
+#ifndef FILE_MIME_FORM_DATA_H
+#define FILE_MIME_FORM_DATA_H
+
+#include <string>
+#include <vector>
+#include <utility>
+
+namespace snort
+{
+
+// Maximum size of form-data content to collect for SQL injection analysis
+#define MAX_FORM_DATA_SIZE 4096
+// size to be discussed
+// trigger built-in? + publish truncated
+
+class MimeFormDataCollector
+{
+public:
+    using FieldPair = std::pair<std::string, std::string>;
+    using FieldVector = std::vector<FieldPair>;
+
+    MimeFormDataCollector() = default;
+    ~MimeFormDataCollector() = default;
+
+    MimeFormDataCollector(const MimeFormDataCollector&) = delete;
+    MimeFormDataCollector& operator=(const MimeFormDataCollector&) = delete;
+
+    FieldVector&& take_fields()
+    { return std::move(form_fields); }
+
+    void set_field_name(const std::string& name)
+    { current_field_name = name; }
+
+    void set_field_value(const std::string& value)
+    { current_field_value = value; }
+
+    void set_is_form_data(bool is_form)
+    { is_form_data = is_form; }
+
+    void set_is_file_upload(bool is_file)
+    { is_file_upload = is_file; }
+
+    bool get_is_form_data() const
+    { return is_form_data; }
+
+    bool get_is_file_upload() const
+    { return is_file_upload; }
+
+    void finalize_field(const std::string& filename);
+
+    void reset_part()
+    {
+        current_field_name.clear();
+        current_field_value.clear();
+        is_form_data = false;
+        is_file_upload = false;
+    }
+
+private:
+    FieldVector form_fields;
+    std::string current_field_name;
+    std::string current_field_value;
+    size_t accumulated_size = 0;
+    bool is_form_data = false;
+    bool is_file_upload = false;
+    bool is_size_exceeded = false;
+};
+
+}
+#endif
+
index b7e8da6401011f04ebbd3288a3be163436b362a7..ed16a5cbd02adbc900fb9c18cd042683aa3b6eec 100644 (file)
@@ -30,6 +30,7 @@
 #include "file_api/file_flows.h"
 #include "hash/hash_key_operations.h"
 #include "log/messages.h"
+#include "mime/file_mime_form_data.h"
 #include "search_engines/search_tool.h"
 #include "utils/util_cstring.h"
 
@@ -184,6 +185,7 @@ const uint8_t* MimeSession::process_mime_header(Packet* p, const uint8_t* ptr,
 
     start_hdr = ptr;
     bool cont = true;
+
     while (cont and ptr < data_end_marker)
     {
         bool found_end_marker = get_mime_eol(ptr, data_end_marker, &eol, &eolm);
@@ -241,6 +243,7 @@ const uint8_t* MimeSession::process_mime_header(Packet* p, const uint8_t* ptr,
         }
         state_flags |= MIME_FLAG_SEEN_HEADERS;
     }
+
     if (!cont)
     {
         data_state = STATE_DATA_BODY;
@@ -248,6 +251,7 @@ const uint8_t* MimeSession::process_mime_header(Packet* p, const uint8_t* ptr,
         partial_data = nullptr;
         partial_data_len = 0;
     }
+
     return ptr;
 }
 
@@ -273,7 +277,7 @@ bool MimeSession::process_header_line(const uint8_t*& ptr, const uint8_t* eol, c
     if (!is_wsp(ptr))
     {
         // Clear flags from last header line
-        state_flags &= ~(MIME_FLAG_IN_CONTENT_TYPE | MIME_FLAG_IN_CONT_TRANS_ENC);
+        state_flags &= ~(MIME_FLAG_IN_CONTENT_TYPE | MIME_FLAG_IN_CONT_TRANS_ENC | MIME_FLAG_IN_CONT_DISP);
 
         bool got_non_printable_in_header_name = false;
 
@@ -328,18 +332,18 @@ bool MimeSession::process_header_line(const uint8_t*& ptr, const uint8_t* eol, c
             {
                 switch (mime_search_info.id)
                 {
-                    case HDR_CONTENT_TYPE:
-                        state_flags |= MIME_FLAG_IN_CONTENT_TYPE;
-                        break;
-                    case HDR_CONT_TRANS_ENC:
-                        state_flags |= MIME_FLAG_IN_CONT_TRANS_ENC;
-                        break;
-                    case HDR_CONT_DISP:
-                        state_flags |= MIME_FLAG_IN_CONT_DISP;
-                        break;
-                    default:
-                        assert(false);
-                        break;
+                case HDR_CONTENT_TYPE:
+                    state_flags |= MIME_FLAG_IN_CONTENT_TYPE;
+                    break;
+                case HDR_CONT_TRANS_ENC:
+                    state_flags |= MIME_FLAG_IN_CONT_TRANS_ENC;
+                    break;
+                case HDR_CONT_DISP:
+                    state_flags |= MIME_FLAG_IN_CONT_DISP;
+                    break;
+                default:
+                    assert(false);
+                    break;
                 }
             }
         }
@@ -380,45 +384,75 @@ bool MimeSession::process_header_line(const uint8_t*& ptr, const uint8_t* eol, c
         // FIXIT-L: either no data in header value OR this is folding split across PDU - second case should be implemented later on
         return true;
     }
-   
+
     if (state_flags & MIME_FLAG_IN_CONTENT_TYPE)
-    {
-        if (data_state == STATE_MIME_HEADER)
-        {
-            setup_attachment_processing();
-        }
+        process_content_type((const char*)header_value_ptr, header_value_len);
 
-        int len = extract_content_type((const char*&)header_value_ptr, header_value_len);
-        if (len > 0)
-            content_type.assign((const char*)header_value_ptr, len);
-        state_flags &= ~MIME_FLAG_IN_CONTENT_TYPE;
-    }
     else if (state_flags & MIME_FLAG_IN_CONT_TRANS_ENC)
+        process_content_transfer_encoding((const char*)header_value_ptr, header_value_len);
+
+    else if (state_flags & MIME_FLAG_IN_CONT_DISP)
+        process_content_disposition((const char*)header_value_ptr, header_value_len);
+
+    return true;
+}
+
+void MimeSession::process_content_type(const char* header, uint32_t header_length)
+{
+    if (data_state == STATE_MIME_HEADER)
     {
         setup_attachment_processing();
-        if (decode_state != nullptr and header_value_len > 0)
-        {
-            decode_state->process_decode_type((const char*)header_value_ptr, header_value_len);
-        }
-        // Don't clear the MIME_FLAG_IN_CONT_TRANS_ENC flag in case of folding
     }
-    else if (state_flags & MIME_FLAG_IN_CONT_DISP)
+
+    int len = extract_value(header, header_length);
+
+    if (len > 0)
+        content_type.assign(header, len);
+
+    state_flags &= ~MIME_FLAG_IN_CONTENT_TYPE;
+}
+
+void MimeSession::process_content_transfer_encoding(const char* header, uint32_t header_length)
+{
+    setup_attachment_processing();
+
+    if (decode_state != nullptr and header_length > 0)
+        decode_state->process_decode_type(header, header_length);
+
+    // Don't clear the MIME_FLAG_IN_CONT_TRANS_ENC flag in case of folding
+}
+
+void MimeSession::process_content_disposition(const char* header, uint32_t header_length)
+{
+    const uint8_t FORM_DATA_KEY_LENGTH = 9;
+    const char* value = header;
+    int value_length = extract_value(value, header_length);
+
+    if (value_length == FORM_DATA_KEY_LENGTH and SnortStrcasestr(value, value_length, "form-data"))
     {
-        int len = extract_file_name((const char*&)header_value_ptr, header_value_len);
+        form_data_collector.set_is_form_data(true);
+        form_data_collector.set_is_file_upload(false);
 
-        if (len > 0)
-        {
-            filename.assign((const char*)header_value_ptr, len);
+        const char* name = header;
+        int name_length = extract_attribute(name, header_length, "name");
 
-            if (log_config->log_filename && log_state)
-            {
-                log_state->log_file_name(header_value_ptr, len);
-            }
-            state_flags &= ~MIME_FLAG_IN_CONT_DISP;
-        }
+        if (name_length > 0)
+            form_data_collector.set_field_name(std::string(name, name_length));
     }
 
-    return true;
+    const char* name = header;
+    int name_length = extract_attribute(name, header_length, "filename");
+
+    if (name_length > 0)
+    {
+        filename.assign(name, name_length);
+
+        if (form_data_collector.get_is_form_data())
+            form_data_collector.set_is_file_upload(true);
+
+        if (log_config->log_filename && log_state)
+            log_state->log_file_name((const uint8_t*)name, name_length);
+    }
 }
 
 /* Get the end of data body (excluding boundary)*/
@@ -451,7 +485,8 @@ static const uint8_t* GetDataEnd(const uint8_t* data_start,
         }
         break;
     }
-    return data_end_marker;
+
+    return end == start ? data_start : data_end_marker;
 }
 
 const uint8_t* MimeSession::process_mime_body(const uint8_t* ptr,
@@ -498,6 +533,10 @@ const uint8_t* MimeSession::process_mime_body(const uint8_t* ptr,
             decode_alert();
     }
 
+    if (form_data_collector.get_is_form_data() and !form_data_collector.get_is_file_upload())
+        form_data_collector.set_field_value(
+            ptr < attach_end ? std::string((const char*)ptr, attach_end - ptr) : std::string());
+
     return data_end;
 }
 
@@ -585,9 +624,7 @@ const uint8_t* MimeSession::process_mime_data_paf(
             if (isFileEnd(position))
                 data_state = STATE_MIME_HEADER;
 
-            if (!(state_flags & MIME_FLAG_FILE_ATTACH))
-                start = end;
-            else
+            if (state_flags & MIME_FLAG_FILE_ATTACH)
             {
                 start = process_mime_body(start, end, position);
 
@@ -637,25 +674,34 @@ const uint8_t* MimeSession::process_mime_data_paf(
                 {
                     switch (decode_state->get_decode_type())
                     {
-                        case DECODE_B64:
-                            mime_stats->b64_bytes += buf_size;
-                            break;
-                        case DECODE_QP:
-                            mime_stats->qp_bytes += buf_size;
-                            break;
-                        case DECODE_UU:
-                            mime_stats->uu_bytes += buf_size;
-                            break;
-                        case DECODE_BITENC:
-                            mime_stats->bitenc_bytes += buf_size;
-                            break;
-                        default:
-                            break;
+                    case DECODE_B64:
+                        mime_stats->b64_bytes += buf_size;
+                        break;
+                    case DECODE_QP:
+                        mime_stats->qp_bytes += buf_size;
+                        break;
+                    case DECODE_UU:
+                        mime_stats->uu_bytes += buf_size;
+                        break;
+                    case DECODE_BITENC:
+                        mime_stats->bitenc_bytes += buf_size;
+                        break;
+                    default:
+                        break;
                     }
                 }
 
                 decode_state->reset_decoded_bytes();
             }
+            else if (form_data_collector.get_is_form_data() and !form_data_collector.get_is_file_upload())
+            {
+                start = process_mime_body(start, end, position);
+            }
+            else
+            {
+                start = end;
+            }
+
             break;
         }
     }
@@ -677,7 +723,7 @@ const uint8_t* MimeSession::process_mime_data_paf(
 void MimeSession::reset_part_state()
 {
     state_flags = 0;
-    filename_state = CONT_DISP_FILENAME_PARAM_NAME;
+    attribute_state = ATTRIBUTE_NAME;
 
     delete[] partial_header;
     partial_header = nullptr;
@@ -693,9 +739,14 @@ void MimeSession::reset_part_state()
         decode_state->file_decomp_reset();
     }
 
+    form_data_collector.finalize_field(filename);
+    form_data_collector.reset_part();
+
     // Clear MIME's file data to prepare for next file
     filename.clear();
     content_type.clear();
+
+    // Reset form-data state for next part
     file_counter++;
     file_offset = 0;
     current_file_cache_file_id = 0;
@@ -787,78 +838,84 @@ MailLogState* MimeSession::get_log_state()
     return log_state;
 }
 
-int MimeSession::extract_file_name(const char*& start, int length)
+int MimeSession::extract_attribute(const char*& start, int length, const char* attr)
 {
-    const char* tmp = start;
-    const char* end = start+length;
-
     if (length <= 0)
         return -1;
 
+    const char* tmp = start;
+    const char* end = start + length;
+
     while (tmp < end)
     {
-        switch (filename_state)
+        switch (attribute_state)
         {
-            case CONT_DISP_FILENAME_PARAM_NAME:
+
+        case ATTRIBUTE_NAME:
+            tmp = SnortStrcasestr(start, length, attr);
+
+            if (tmp == nullptr)
+                return -1;
+
+            tmp = tmp + strlen(attr);
+            attribute_state = ATTRIBUTE_EQUALS;
+            break;
+
+        case ATTRIBUTE_EQUALS:
+            if ('=' == *tmp or isspace(*tmp))
             {
-                tmp = SnortStrcasestr(start, length, "filename");
-                if ( tmp == nullptr )
-                    return -1;
-                tmp = tmp + 8;
-                filename_state = CONT_DISP_FILENAME_PARAM_EQUALS;
-                break;
+                tmp++;
             }
-            case CONT_DISP_FILENAME_PARAM_EQUALS:
+            else if (*tmp == '"')
             {
-                //skip past whitespace and '='
-                if (isspace(*tmp) or (*tmp == '='))
-                    tmp++;
-                else if (*tmp == '"')
-                {
-                    // Skip past the quote
-                    tmp++;
-                    filename_state = CONT_DISP_FILENAME_PARAM_VALUE_QUOTE;
-                }
-                else
-                {
-                    filename_state = CONT_DISP_FILENAME_PARAM_VALUE;
-                    start = tmp;
-                }
-                break;
+                tmp++;
+                attribute_state = ATTRIBUTE_VALUE_QUOTE;
             }
-            case CONT_DISP_FILENAME_PARAM_VALUE_QUOTE:
+            else
+            {
                 start = tmp;
-                tmp = SnortStrnPbrk(start,(end - tmp),"\"");
-                if (tmp)
-                {
-                    end = tmp;
-                    return (end - start);
-                }
-                // Since we have the full header line and there can't be wrapping within a quoted
-                // string, getting here means the line is malformed. Treat like unquoted string
-                filename_state = CONT_DISP_FILENAME_PARAM_VALUE;
-                tmp = start;
-                break;
-            case CONT_DISP_FILENAME_PARAM_VALUE:
-                // Go until we get a ';' or whitespace
-                if ((*tmp == ';') or isspace(*tmp))
-                {
-                    end = tmp;
-                    return (end - start);
-                }
-                tmp++;
-                break;
+                attribute_state = ATTRIBUTE_VALUE;
+            }
+            break;
+
+        case ATTRIBUTE_VALUE_QUOTE:
+            start = tmp;
+            tmp = SnortStrnPbrk(start, end - tmp, "\"");
+
+            if (tmp)
+            {
+                attribute_state = ATTRIBUTE_NAME;
+                end = tmp;
+                return end - start;
+            }
+
+            // Since we have the full header line and there can't be wrapping within a quoted
+            // string, getting here means the line is malformed. Treat like unquoted string
+            tmp = start;
+            attribute_state = ATTRIBUTE_VALUE;
+            break;
+
+        case ATTRIBUTE_VALUE:
+            if (';' == *tmp or isspace(*tmp))
+            {
+                attribute_state = ATTRIBUTE_NAME;
+                end = tmp;
+                return end - start;
+            }
+
+            tmp++;
+            break;
+
+        default:
+            assert(false);
+            return -1;
         }
     }
-    if (filename_state == CONT_DISP_FILENAME_PARAM_VALUE)
-    {
-        // The filename is ended by the eol marker
-        return (end - start);
-    }
-    return -1;
+
+    return ATTRIBUTE_VALUE == attribute_state ? end - start : -1;
 }
 
-int MimeSession::extract_content_type(const char*& start, uint32_t length)
+int MimeSession::extract_value(const char*& start, uint32_t length)
 {
     assert(start);
 
@@ -976,7 +1033,6 @@ void MimeSession::mime_file_process(Packet* p, const uint8_t* data, int data_siz
         {
             continue_inspecting_file = file_flows->set_file_name((const uint8_t*)filename.c_str(),
                 filename.length(), 0, get_multiprocessing_file_id(), uri, uri_length);
-            filename.clear();
         }
     }
 }
index a288ef49e10e8adcac74f0e53625c0c38ab3e393..0ee9d6e948e6f37a94013619653608b74b2e6409 100644 (file)
@@ -27,6 +27,7 @@
 #include "file_api/file_api.h"
 #include "mime/file_mime_config.h"
 #include "mime/file_mime_decode.h"
+#include "mime/file_mime_form_data.h"
 #include "mime/file_mime_log.h"
 #include "mime/file_mime_paf.h"
 
@@ -48,12 +49,12 @@ namespace snort
 #define STATE_DATA_BODY    2    /* Data body section of data state */
 #define STATE_MIME_HEADER  3    /* MIME header section within data section */
 
-enum FilenameState
+enum AttributeState
 {
-    CONT_DISP_FILENAME_PARAM_NAME,
-    CONT_DISP_FILENAME_PARAM_EQUALS,
-    CONT_DISP_FILENAME_PARAM_VALUE_QUOTE,
-    CONT_DISP_FILENAME_PARAM_VALUE
+    ATTRIBUTE_NAME,
+    ATTRIBUTE_EQUALS,
+    ATTRIBUTE_VALUE_QUOTE,
+    ATTRIBUTE_VALUE
 };
 
 /* Maximum length of header chars before colon, based on Exim 4.32 exploit */
@@ -85,6 +86,9 @@ public:
     void set_host_name(const std::string& host);
     bool is_host_set() const;
 
+    MimeFormDataCollector::FieldVector&& form_data_content()
+    { return std::move(form_data_collector.take_fields()); }
+
     const BufferData& get_ole_buf();
     const BufferData& get_vba_inspect_buf();
 
@@ -99,11 +103,14 @@ private:
     MailLogConfig* log_config = nullptr;
     MailLogState* log_state = nullptr;
     MimeStats* mime_stats = nullptr;
-    FilenameState filename_state = CONT_DISP_FILENAME_PARAM_NAME;
+    AttributeState attribute_state = ATTRIBUTE_NAME;
     std::string filename;
     std::string content_type;
     std::string host_name {""};
     bool host_set = false;
+
+    MimeFormDataCollector form_data_collector;
+
     bool continue_inspecting_file = true;
     // This counter is not an accurate count of files; used only for creating a unique mime_file_id
     uint32_t file_counter = 0;
@@ -113,6 +120,7 @@ private:
     uint64_t current_multiprocessing_file_id = 0;
     const uint8_t* uri;
     const int32_t uri_length;
+
     uint64_t get_file_cache_file_id();
     uint64_t get_multiprocessing_file_id();
     void mime_file_process(Packet* p, const uint8_t* data, int data_size, FilePosition position, bool upload);
@@ -131,12 +139,15 @@ private:
     const uint8_t* process_mime_header(Packet*, const uint8_t* ptr, const uint8_t* data_end_marker);
     bool process_header_line(const uint8_t*& ptr, const uint8_t* eol, const uint8_t* eolm, const
         uint8_t* start_hdr, Packet* p);
+    void process_content_type(const char* header, uint32_t header_length);
+    void process_content_transfer_encoding(const char* header, uint32_t header_length);
+    void process_content_disposition(const char* header, uint32_t header_length);
     const uint8_t* process_mime_body(const uint8_t* ptr, const uint8_t* data_end, FilePosition);
     const uint8_t* process_mime_data_paf(Packet*, const uint8_t* start, const uint8_t* end,
         bool upload, FilePosition, AttachmentBuffer* attachment);
-    int extract_file_name(const char*& start, int length);
-    int extract_content_type(const char*& start, uint32_t length);
 
+    int extract_value(const char*& start, uint32_t length);
+    int extract_attribute(const char*& start, int length, const char* attr);
 
     uint8_t* partial_header = nullptr;      // single header line split into multiple sections
     uint32_t partial_header_len = 0;
index 3d4ec80aaba6647a668b756c4cdb4ec86ec40e04..8301e10e02b41df22295ef63833f0b4f1a178fc1 100644 (file)
@@ -22,6 +22,7 @@ set (PUB_SUB_INCLUDES
     http_body_event.h
     http_event_ids.h
     http_events.h
+    http_form_data_event.h
     http_publish_length_event.h
     http_request_body_event.h
     http_transaction_end_event.h
@@ -49,6 +50,7 @@ add_library( pub_sub OBJECT
     eof_event.cc
     http_body_event.cc
     http_events.cc
+    http_form_data_event.cc
     http_request_body_event.cc
     http_transaction_end_event.cc
     quic_events.cc
index 2b286f7e6055df855bb39aca06da910f749704b9..80c99ff53f9cc8cb04f56013da4f4b93abfac2e1 100644 (file)
@@ -39,6 +39,7 @@ struct HttpEventIds
     DOH_BODY,
     END_OF_TRANSACTION,
     HTTP_PUBLISH_LENGTH,
+    MIME_FORM_DATA,
 
     num_ids
 }; };
diff --git a/src/pub_sub/http_form_data_event.cc b/src/pub_sub/http_form_data_event.cc
new file mode 100644 (file)
index 0000000..94aaab6
--- /dev/null
@@ -0,0 +1,76 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2025-2025 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// http_form_data_event.cc author Anna Norokh <anorokh@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "http_form_data_event.h"
+
+using namespace snort;
+using namespace std;
+
+static string normalize(const string& value)
+{
+    string field;
+    field.reserve(value.length());
+    bool in_space = true;
+
+    for (const auto& ch : value)
+    {
+        if (isspace(ch))
+        {
+            in_space = true;
+        }
+        else
+        {
+            if (in_space && !field.empty())
+                field += " ";
+            field += ch;
+            in_space = false;
+        }
+    }
+
+    return field;
+}
+
+void HttpFormDataEvent::format_as_uri() const
+{
+    if (form_data_fields.empty())
+        return;
+
+    size_t estimated_size = 0;
+    for (const auto& field : form_data_fields)
+        estimated_size += field.first.length() + field.second.length() + 2; // for "=&"
+
+    form_data_uri.reserve(estimated_size);
+
+    auto it = form_data_fields.begin();
+    form_data_uri = it->first;
+    form_data_uri += '=';
+    form_data_uri += normalize(it->second);
+
+    for (++it; it != form_data_fields.end(); ++it)
+    {
+        form_data_uri += '&';
+        form_data_uri += it->first;
+        form_data_uri += '=';
+        form_data_uri += normalize(it->second);
+    }
+}
diff --git a/src/pub_sub/http_form_data_event.h b/src/pub_sub/http_form_data_event.h
new file mode 100644 (file)
index 0000000..3d1784b
--- /dev/null
@@ -0,0 +1,62 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2025-2025 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// http_form_data_event.h author Anna Norokh <anorokh@cisco.com>
+
+#ifndef HTTP_FORM_DATA_EVENT_H
+#define HTTP_FORM_DATA_EVENT_H
+
+#include <string>
+#include <vector>
+#include <utility>
+
+#include "framework/data_bus.h"
+#include "service_inspectors/http_inspect/http_enum.h"
+
+namespace snort
+{
+// This event is published when HTTP multipart/form-data content is present and processing completes.
+class SO_PUBLIC HttpFormDataEvent : public snort::DataEvent
+{
+public:
+    using FieldPair = std::pair<std::string, std::string>;
+    using FieldVector = std::vector<FieldPair>;
+
+    HttpFormDataEvent(const FieldVector& fields, HttpEnums::MethodId method)
+        : form_data_fields(fields), method_id(method) { }
+
+    const std::string& get_form_data_uri() const
+    {
+        if (form_data_uri.empty() and !form_data_fields.empty())
+            format_as_uri();
+        return form_data_uri;
+    }
+
+    HttpEnums::MethodId get_method_id() const
+    { return method_id; }
+
+private:
+    void format_as_uri() const;
+
+    const FieldVector& form_data_fields;
+    mutable std::string form_data_uri;
+    HttpEnums::MethodId method_id;
+};
+
+}
+#endif
+
index 0e2eb58a408cc7983966c7618ad4be6188bbf21c..bdd4b4c698e89d7a51680a22db491acec80051e5 100644 (file)
@@ -17,6 +17,10 @@ add_cpputest( pub_sub_http_body_event_test
     SOURCES
         ../http_body_event.cc
 )
+add_cpputest( pub_sub_http_form_data_event_test
+    SOURCES
+        ../http_form_data_event.cc
+)
 add_cpputest( pub_sub_eve_process_event_test
     SOURCES
         ../eve_process_event.h
diff --git a/src/pub_sub/test/pub_sub_http_form_data_event_test.cc b/src/pub_sub/test/pub_sub_http_form_data_event_test.cc
new file mode 100644 (file)
index 0000000..1f4d2ae
--- /dev/null
@@ -0,0 +1,137 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2025-2025 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// pub_sub_http_form_data_event_test.cc author Anna Norokh <anorokh@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string>
+
+#include "pub_sub/http_form_data_event.h"
+
+#include <CppUTest/CommandLineTestRunner.h>
+#include <CppUTest/TestHarness.h>
+
+using namespace snort;
+
+TEST_GROUP(pub_sub_http_form_data_event_test)
+{
+};
+
+TEST(pub_sub_http_form_data_event_test, single_field)
+{
+    HttpFormDataEvent::FieldVector fields;
+    fields.emplace_back("username", "john_doe");
+
+    HttpFormDataEvent event(fields, HttpEnums::METH_POST);
+    const std::string& uri = event.get_form_data_uri();
+
+    STRCMP_EQUAL("username=john_doe", uri.c_str());
+}
+
+TEST(pub_sub_http_form_data_event_test, multiple_fields)
+{
+    HttpFormDataEvent::FieldVector fields;
+    fields.emplace_back("username", "admin");
+    fields.emplace_back("password", "admin");
+    fields.emplace_back("remember", "true");
+
+    HttpFormDataEvent event(fields, HttpEnums::METH_POST);
+    const std::string& uri = event.get_form_data_uri();
+
+    STRCMP_EQUAL("username=admin&password=admin&remember=true", uri.c_str());
+}
+
+TEST(pub_sub_http_form_data_event_test, empty_fields)
+{
+    HttpFormDataEvent::FieldVector fields;
+
+    HttpFormDataEvent event(fields, HttpEnums::METH_POST);
+    const std::string& uri = event.get_form_data_uri();
+
+    STRCMP_EQUAL("", uri.c_str());
+}
+
+TEST(pub_sub_http_form_data_event_test, fields_with_empty_values)
+{
+    HttpFormDataEvent::FieldVector fields;
+    fields.emplace_back("search", "");
+    fields.emplace_back("page", "1");
+
+    HttpFormDataEvent event(fields, HttpEnums::METH_POST);
+    const std::string& uri = event.get_form_data_uri();
+
+    STRCMP_EQUAL("search=&page=1", uri.c_str());
+}
+
+TEST(pub_sub_http_form_data_event_test, fields_with_special_characters)
+{
+    HttpFormDataEvent::FieldVector fields;
+    fields.emplace_back("query", "' OR '1'='1");
+    fields.emplace_back("id", "1;   DROP TABLE users--");
+
+    HttpFormDataEvent event(fields, HttpEnums::METH_POST);
+    const std::string& uri = event.get_form_data_uri();
+
+    STRCMP_EQUAL("query=' OR '1'='1&id=1; DROP TABLE users--", uri.c_str());
+}
+
+TEST(pub_sub_http_form_data_event_test, caching_multiple_calls)
+{
+    HttpFormDataEvent::FieldVector fields;
+    fields.emplace_back("name", "test");
+    fields.emplace_back("value", "123");
+
+    HttpFormDataEvent event(fields, HttpEnums::METH_POST);
+
+    // First call - formats the URI
+    const std::string& uri1 = event.get_form_data_uri();
+
+    // Second call - should return cached result
+    const std::string& uri2 = event.get_form_data_uri();
+
+    STRCMP_EQUAL("name=test&value=123", uri1.c_str());
+    STRCMP_EQUAL(uri1.c_str(), uri2.c_str());
+}
+
+TEST(pub_sub_http_form_data_event_test, fields_with_tab_characters)
+{
+    HttpFormDataEvent::FieldVector fields;
+    fields.emplace_back("name", "John\tDoe");
+    fields.emplace_back("Blog", " Hello!\tMy name is\t John. ");
+
+    HttpFormDataEvent event(fields, HttpEnums::METH_POST);
+    const std::string& uri = event.get_form_data_uri();
+
+    STRCMP_EQUAL("name=John Doe&Blog=Hello! My name is John.", uri.c_str());
+}
+
+TEST(pub_sub_http_form_data_event_test, get_method_id)
+{
+    HttpFormDataEvent::FieldVector fields;
+    fields.emplace_back("username", "admin");
+
+    HttpFormDataEvent post_event(fields, HttpEnums::METH_POST);
+    CHECK_EQUAL(HttpEnums::METH_POST, post_event.get_method_id());
+}
+
+int main(int argc, char** argv)
+{
+    return CommandLineTestRunner::RunAllTests(argc, argv);
+}
index 4956e38db9c5ef6a4f53331ccb857aeac949c5d1..9752689093d45dffa3e6dda676c750f7e4968d9f 100644 (file)
@@ -23,6 +23,7 @@
 #endif
 
 #include "pub_sub/http_transaction_end_event.h"
+#include "pub_sub/http_form_data_event.h"
 #include "service_inspectors/http_inspect/http_common.h"
 #include "service_inspectors/http_inspect/http_enum.h"
 #include "service_inspectors/http_inspect/http_flow_data.h"
@@ -72,6 +73,9 @@ unsigned StreamSplitter::max(snort::Flow*) { return 0; }
 HttpParaList::UriParam::UriParam() { }
 HttpParaList::JsNormParam::~JsNormParam() { }
 HttpParaList::~HttpParaList() { }
+
+void HttpFormDataEvent::format_as_uri() const { }
+
 HttpInspect::HttpInspect(const HttpParaList* para) :
     params(para), xtra_trueip_id(0), xtra_uri_id(0),
     xtra_host_id(0), xtra_jsnorm_id(0)
index 1f384b149e706a04621e68280120a7fa68c625dc..df4107d25bae432e726d20d6ffa19bf22bbd7d0c 100755 (executable)
@@ -134,8 +134,9 @@ enum Contentcoding { CONTENTCODE__OTHER=1, CONTENTCODE_GZIP, CONTENTCODE_DEFLATE
 // Content media-types (MIME types)
 enum ContentType { CT__OTHER=1, CT_APPLICATION_DNS, CT_APPLICATION_PDF, CT_APPLICATION_OCTET_STREAM,
     CT_APPLICATION_JAVASCRIPT, CT_APPLICATION_ECMASCRIPT, CT_APPLICATION_X_JAVASCRIPT,
-    CT_APPLICATION_X_ECMASCRIPT, CT_APPLICATION_XHTML_XML, CT_TEXT_JAVASCRIPT,
-    CT_TEXT_JAVASCRIPT_1_0, CT_TEXT_JAVASCRIPT_1_1, CT_TEXT_JAVASCRIPT_1_2, CT_TEXT_JAVASCRIPT_1_3,
+    CT_APPLICATION_X_ECMASCRIPT, CT_APPLICATION_XHTML_XML,
+    CT_MULTIPART_FORM_DATA,
+    CT_TEXT_JAVASCRIPT, CT_TEXT_JAVASCRIPT_1_0, CT_TEXT_JAVASCRIPT_1_1, CT_TEXT_JAVASCRIPT_1_2, CT_TEXT_JAVASCRIPT_1_3,
     CT_TEXT_JAVASCRIPT_1_4, CT_TEXT_JAVASCRIPT_1_5, CT_TEXT_ECMASCRIPT, CT_TEXT_X_JAVASCRIPT,
     CT_TEXT_X_ECMASCRIPT, CT_TEXT_JSCRIPT, CT_TEXT_LIVESCRIPT, CT_TEXT_HTML };
 
index 45ee94adb0455ec45f319f689d1c09751a0ecfd4..23d0ef324bf8739a950994707a96ed9938a27344 100644 (file)
 #ifndef HTTP_FIELD_H
 #define HTTP_FIELD_H
 
-#include <cstdint>
-#include <cstdio>
 #include <cassert>
+#include <cstdint>
+#include <string>
+#include <vector>
+#include <utility>
 
 #include "main/snort_types.h"
 
@@ -76,5 +78,7 @@ struct MimeBufs
         vba(vba_len, vba_buf, vba_own) {}
 };
 
+using MimeFormData = std::vector<std::pair<std::string, std::string>>;
+
 #endif
 
index 8948001e256a6fe4bcfd6530eac27555aafe397d..4d1275722069b0943f31ac09437f9a5e2a3bbd0a 100644 (file)
@@ -31,8 +31,9 @@
 #include "http_module.h"
 #include "js_norm/js_enum.h"
 #include "pub_sub/dns_payload_event.h"
-#include "pub_sub/http_request_body_event.h"
 #include "pub_sub/http_body_event.h"
+#include "pub_sub/http_form_data_event.h"
+#include "pub_sub/http_request_body_event.h"
 #include "pub_sub/intrinsic_event_ids.h"
 
 #include "http_api.h"
@@ -80,6 +81,24 @@ HttpMsgBody::HttpMsgBody(const uint8_t* buffer, const uint16_t buf_size,
 
 void HttpMsgBody::publish(unsigned pub_id)
 {
+    // publish data extracted from MIME
+    if (!mime_fields.empty())
+    {
+        HttpFormDataEvent http_form_data_event(mime_fields, method_id);
+        DataBus::publish(pub_id, HttpEventIds::MIME_FORM_DATA, http_form_data_event, flow);
+
+    #ifdef REG_TEST
+        if (HttpTestManager::use_test_output(HttpTestManager::IN_HTTP))
+        {
+            fprintf(HttpTestManager::get_output_file(),
+                "HttpFormDataEvent event published. Originated from client.\n");
+            fprintf(HttpTestManager::get_output_file(),
+                "Form data URI: %s\n", http_form_data_event.get_form_data_uri().c_str());
+            fflush(HttpTestManager::get_output_file());
+        }
+    #endif
+    }
+
     if (publish_length <= 0)
         return;
 
@@ -143,10 +162,10 @@ void HttpMsgBody::publish(unsigned pub_id)
     }
 
     // publish DOH body if applicable
-    if (get_header(source_id) and (get_header(source_id)->get_content_type() == CT_APPLICATION_DNS) 
+    if (get_header(source_id) and (get_header(source_id)->get_content_type() == CT_APPLICATION_DNS)
         and (publish_octets < BODY_PUBLISH_DEPTH))
     {
-        const auto doh_publish_depth_remaining = BODY_PUBLISH_DEPTH - publish_octets;        
+        const auto doh_publish_depth_remaining = BODY_PUBLISH_DEPTH - publish_octets;
         auto doh_publish_length = (publish_length > doh_publish_depth_remaining) ?
             doh_publish_depth_remaining : publish_length;
         auto doh_last_piece = last_piece ? true : (publish_octets + doh_publish_length >= BODY_PUBLISH_DEPTH);
@@ -334,6 +353,13 @@ void HttpMsgBody::analyze()
         }
 
         detect_data.set(msg_text.length(), msg_text.start());
+
+        // Extract Form Data
+        bool is_request = nullptr != request;
+        bool last_piece = session_data->cutter[source_id] == nullptr or tcp_close;
+
+        if (is_request and last_piece)
+            mime_fields = session_data->mime_state[source_id]->form_data_content();
     }
 
     else if (session_data->file_depth_remaining[source_id] > 0 or
@@ -899,7 +925,7 @@ void HttpMsgBody::clear()
 
     if (request != nullptr)
         request->clear_body_params();
-        
+
     HttpMsgSection::clear();
 }
 
index b15b0a8308e0265de0fde0b2982402105c3d229c..d201646960469b7d327307a30669dde2ffe44168 100644 (file)
@@ -101,6 +101,7 @@ private:
     Field ole_data;
     std::list<MimeBufs>* mime_bufs = nullptr;
     bool last_attachment_complete = true;
+    MimeFormData mime_fields;
 
     int32_t publish_length = HttpCommon::STAT_NOT_PRESENT;
 };
index 3c2ed9ec6b7e8d4ff56fd54ef41524b54570df4e..e21875078dc3239d29c37a910ab09b24520e461c 100755 (executable)
@@ -182,6 +182,7 @@ const StrCode HttpMsgHeadShared::content_type_list[] =
     { CT_APPLICATION_X_JAVASCRIPT, "application/x-javascript" },
     { CT_APPLICATION_X_ECMASCRIPT, "application/x-ecmascript" },
     { CT_APPLICATION_XHTML_XML,    "application/xhtml+xml" },
+    { CT_MULTIPART_FORM_DATA,      "multipart/form-data" },
     { CT_TEXT_JAVASCRIPT,          "text/javascript" },
     { CT_TEXT_JAVASCRIPT_1_0,      "text/javascript1.0" },
     { CT_TEXT_JAVASCRIPT_1_1,      "text/javascript1.1" },
index ee9e85505e865b2183f144cd55dca8af2b379532..c531179dcc4bef69e6d029d20f450cf1486f778d 100644 (file)
@@ -75,7 +75,7 @@ public:
     { return filename[source_id]; }
     const std::string& get_content_type(HttpCommon::SourceId source_id) const
     { return content_type[source_id]; }
-  
+
     void clear_section();
     bool is_clear() const { return active_sections == 0; }
     void garbage_collect();
index 31f42ce4fad9f6d281b0f431beb4b41d5a70cea9..07d2958fdfdf9b5afd836a2385a34144a4e2c7be 100644 (file)
@@ -24,6 +24,7 @@
 #endif
 
 #include "pub_sub/http_transaction_end_event.h"
+#include "pub_sub/http_form_data_event.h"
 #include "service_inspectors/http_inspect/http_common.h"
 #include "service_inspectors/http_inspect/http_enum.h"
 #include "service_inspectors/http_inspect/http_flow_data.h"
@@ -79,6 +80,8 @@ HttpParaList::UriParam::UriParam() {}
 HttpParaList::JsNormParam::~JsNormParam() {}
 HttpParaList::~HttpParaList() {}
 
+void HttpFormDataEvent::format_as_uri() const { }
+
 unsigned Http2FlowData::inspector_id = 0;
 uint32_t Http2FlowData::get_processing_stream_id() const { return 0; }
 HttpInspect::HttpInspect(const HttpParaList* para) :