]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Pull request #4537: http_inspect: save mime filenames in transaction
authorAdrian Mamolea (admamole) <admamole@cisco.com>
Sat, 8 Feb 2025 13:34:46 +0000 (13:34 +0000)
committerMaya Dagon (mdagon) <mdagon@cisco.com>
Sat, 8 Feb 2025 13:34:46 +0000 (13:34 +0000)
Merge in SNORT/snort3 from ~ADMAMOLE/snort3:file_name to master

Squashed commit of the following:

commit 5dd25eff54a4eae0be022c27c6b64156ddc62774
Author: Adrian Mamolea <admamole@cisco.com>
Date:   Wed Dec 4 16:57:33 2024 -0500

    extractor: add support for file name and type for mime

12 files changed:
doc/user/extractor.txt
src/mime/file_mime_process.cc
src/mime/file_mime_process.h
src/network_inspectors/extractor/extractor_http.cc
src/network_inspectors/extractor/extractor_service.cc
src/pub_sub/http_transaction_end_event.cc
src/pub_sub/http_transaction_end_event.h
src/pub_sub/test/pub_sub_http_transaction_end_event_test.cc
src/service_inspectors/http_inspect/http_msg_body.cc
src/service_inspectors/http_inspect/http_msg_body.h
src/service_inspectors/http_inspect/http_transaction.cc
src/service_inspectors/http_inspect/http_transaction.h

index a053849451db27f10ef2c5ff49eeefec0e8f4b98..8da1ad8ed8f7395ccd0fef6bb05ac98edb488106 100644 (file)
@@ -71,6 +71,15 @@ Fields supported for HTTP:
 * `status_code` - status code returned by server
 * `status_msg` - status message returned by server
 * `trans_depth` - number of request-response pairs seen in the session
+* `request_body_len` - length of the body, decompressed and normalized, of the HTTP request
+* `response_body_len` - length of the body, decompressed and normalized, of the HTTP response
+* `info_code` - last informational status code returned by the server
+* `info_msg` - last informational reason phrase returned by the server
+* `proxied` - list with the headers associated with proxied requests
+* `orig_filenames` - list with the names of the files sent by client
+* `resp_filenames` - list with the names of the files sent by server
+* `orig_mime_types` - list with the content types of the files sent by client
+* `resp_mime_types` - list with the content types of the files sent by server
 
 Fields supported for FTP:
 
index 15b49d13ab7ccd0dffea84947aac9d5cdff69998..f9747bbf09403c70a4f32979314662f5f7d5c5cc 100644 (file)
@@ -374,7 +374,10 @@ bool MimeSession::process_header_line(const uint8_t*& ptr, const uint8_t* eol, c
         {
             setup_attachment_processing();
         }
-        // We don't need the value, so it doesn't matter if we're folding
+
+        int len = extract_content_type((const char*&)header_value_ptr, header_value_len);
+        if (len > 0)
+            content_type.assign((const char*)header_value_ptr, len);
         state_flags &= ~MIME_FLAG_IN_CONTENT_TYPE;
     }
     else if (state_flags & MIME_FLAG_IN_CONT_TRANS_ENC)
@@ -493,7 +496,8 @@ void MimeSession::reset_mime_state()
 }
 
 const uint8_t* MimeSession::process_mime_data_paf(
-    Packet* p, const uint8_t* start, const uint8_t* end, bool upload, FilePosition position)
+    Packet* p, const uint8_t* start, const uint8_t* end, bool upload, FilePosition position,
+    AttachmentBuffer* attachment)
 {
     Flow* flow = p->flow;
     bool done_data = is_end_of_data(flow);
@@ -594,9 +598,19 @@ const uint8_t* MimeSession::process_mime_data_paf(
                     else
                         set_file_data(decomp_buffer, decomp_buf_size, file_counter);
 
-                    attachment.data = decomp_buffer;
-                    attachment.length = decomp_buf_size;
-                    attachment.finished = isFileEnd(position);
+                    if (attachment)
+                    {
+                        attachment->data = decomp_buffer;
+                        attachment->length = decomp_buf_size;
+                        attachment->finished = isFileEnd(position);
+
+                        attachment->started = isFileStart(position);
+                        if (attachment->started)
+                        {
+                            attachment->filename = filename;
+                            attachment->content_type = content_type;
+                        }
+                    }
                 }
 
                 // Process file type/file signature
@@ -664,6 +678,7 @@ void MimeSession::reset_part_state()
 
     // Clear MIME's file data to prepare for next file
     filename.clear();
+    content_type.clear();
     file_counter++;
     file_offset = 0;
     current_file_cache_file_id = 0;
@@ -674,21 +689,20 @@ void MimeSession::reset_part_state()
 // Main function for mime processing
 // This should be called when mime data is available
 const uint8_t* MimeSession::process_mime_data(Packet* p, const uint8_t* start,
-    int data_size, bool upload, FilePosition position)
+    int data_size, bool upload, FilePosition position, AttachmentBuffer* attachment)
 {
     const uint8_t* attach_start = start;
     const uint8_t* attach_end;
 
     const uint8_t* data_end_marker = start + data_size;
 
-    attachment.data = nullptr;
-    attachment.length = 0;
-    attachment.finished = true;
+    if (attachment)
+        attachment->clear();
 
     if (position != SNORT_FILE_POSITION_UNKNOWN)
     {
         process_mime_data_paf(p, attach_start, data_end_marker,
-            upload, position);
+            upload, position, attachment);
         return data_end_marker;
     }
 
@@ -702,7 +716,7 @@ const uint8_t* MimeSession::process_mime_data(Packet* p, const uint8_t* start,
             attach_end = start;
             finalFilePosition(&position);
             process_mime_data_paf(p, attach_start, attach_end,
-                upload, position);
+                upload, position, attachment);
             data_state = STATE_MIME_HEADER;
             return attach_end;
         }
@@ -714,7 +728,7 @@ const uint8_t* MimeSession::process_mime_data(Packet* p, const uint8_t* start,
     {
         updateFilePosition(&position, file_offset);
         process_mime_data_paf(p, attach_start, data_end_marker,
-            upload, position);
+            upload, position, attachment);
     }
 
     return data_end_marker;
@@ -827,6 +841,24 @@ int MimeSession::extract_file_name(const char*& start, int length)
     return -1;
 }
 
+int MimeSession::extract_content_type(const char*& start, uint32_t length)
+{
+    assert(start);
+
+    const char* tmp = start;
+    const char* end = start + length;
+
+    while (tmp < end and isspace(*tmp))
+        tmp++;
+    start = tmp;
+
+    while (tmp < end and *tmp != ';' and !isspace(*tmp))
+        tmp++;
+    end = tmp;
+
+    return (end - start);
+}
+
 /*
  * This is the initialization function for mime processing.
  * This should be called when snort initializes
index 3bb4850cf1b0c1a5dcfede34fd72a34fc0bfd8dc..554fe70e5cf873eeeca137623db17860e0ec5ce5 100644 (file)
@@ -62,6 +62,8 @@ enum FilenameState
 class SO_PUBLIC MimeSession
 {
 public:
+    struct AttachmentBuffer;
+
     MimeSession(Packet*, const DecodeConfig*, MailLogConfig*, uint64_t base_file_id=0,
         const uint8_t* uri=nullptr, const int32_t uri_length=0);
     virtual ~MimeSession();
@@ -73,7 +75,7 @@ public:
     static void exit();
 
     const uint8_t* process_mime_data(Packet*, const uint8_t *data, int data_size,
-        bool upload, FilePosition);
+        bool upload, FilePosition, AttachmentBuffer* attachment = nullptr);
 
     int get_data_state();
     void set_data_state(int);
@@ -86,15 +88,6 @@ public:
     const BufferData& get_ole_buf();
     const BufferData& get_vba_inspect_buf();
 
-    struct AttachmentBuffer
-    {
-        const uint8_t* data = nullptr;
-        uint32_t length = 0;
-        bool finished = true;
-    };
-
-    const AttachmentBuffer get_attachment() { return attachment; }
-
 protected:
     MimeDecode* decode_state = nullptr;
 
@@ -108,6 +101,7 @@ private:
     MimeStats* mime_stats = nullptr;
     FilenameState filename_state = CONT_DISP_FILENAME_PARAM_NAME;
     std::string filename;
+    std::string content_type;
     std::string host_name {""};
     bool host_set = false;
     bool continue_inspecting_file = true;
@@ -139,16 +133,34 @@ private:
         uint8_t* start_hdr, Packet* p);
     const uint8_t* process_mime_body(const uint8_t* ptr, const uint8_t* data_end, FilePosition);
     const uint8_t* process_mime_data_paf(Packet*, const uint8_t* start, const uint8_t* end,
-        bool upload, FilePosition);
+        bool upload, FilePosition, AttachmentBuffer* attachment);
     int extract_file_name(const char*& start, int length);
+    int extract_content_type(const char*& start, uint32_t length);
+
 
     uint8_t* partial_header = nullptr;      // single header line split into multiple sections
     uint32_t partial_header_len = 0;
     uint8_t* partial_data = nullptr;        // attachment's trailing bytes (suspected boundary)
     uint32_t partial_data_len = 0;
     uint8_t* rebuilt_data = nullptr;        // prepended attachment data for detection module
+};
 
-    AttachmentBuffer attachment;            // decoded and uncompressed file body
+struct MimeSession::AttachmentBuffer
+{
+    std::string filename;
+    std::string content_type;
+    const uint8_t* data = nullptr;
+    uint32_t length = 0;
+    bool started = false;
+    bool finished = true;
+
+    void clear()
+    {
+        data = nullptr;
+        length = 0;
+        started = false;
+        finished = true;
+    }
 };
 }
 #endif
index fad67d08cfa8bea56ccd9b7b039b0f86099fce20..9eea28f6976890b3a066d501248225238250b4cb 100644 (file)
@@ -125,12 +125,24 @@ static const char* get_resp_filenames(const DataEvent* event, const Flow*)
     return ((const HttpTransactionEndEvent*)event)->get_filename(HttpCommon::SRC_SERVER).c_str();
 }
 
+static const char* get_orig_mime_types(const DataEvent* event, const Flow*)
+{
+    return ((const HttpTransactionEndEvent*)event)->get_content_type(HttpCommon::SRC_CLIENT).c_str();
+}
+
+static const char* get_resp_mime_types(const DataEvent* event, const Flow*)
+{
+    return ((const HttpTransactionEndEvent*)event)->get_content_type(HttpCommon::SRC_SERVER).c_str();
+}
+
 static const map<string, ExtractorEvent::BufGetFn> sub_buf_getters =
 {
     {"version", get_version},
     {"proxied", get_proxied},
     {"orig_filenames", get_orig_filenames},
-    {"resp_filenames", get_resp_filenames}
+    {"resp_filenames", get_resp_filenames},
+    {"orig_mime_types", get_orig_mime_types},
+    {"resp_mime_types", get_resp_mime_types}
 };
 
 static const map<string, ExtractorEvent::NumGetFn> sub_num_getters =
index 19658404d4480257274cc59a7c12b4cbf40d945a..a8e32e8bfb92756a9c7b6f55054e5dc380e042a3 100644 (file)
@@ -249,7 +249,9 @@ const ServiceBlueprint HttpExtractorService::blueprint =
       "info_msg",
       "proxied",
       "orig_filenames",
-      "resp_filenames"
+      "resp_filenames",
+      "orig_mime_types",
+      "resp_mime_types"
     },
 };
 
index be35661cd2afd24f36c7f7cf3b8cb1444b8d56b5..5ce225b7f856d36d35d61444edd5473575040c46 100644 (file)
@@ -140,6 +140,11 @@ const std::string& HttpTransactionEndEvent::get_filename(HttpCommon::SourceId sr
     return transaction->get_filename(src_id);
 }
 
+const std::string& HttpTransactionEndEvent::get_content_type(HttpCommon::SourceId src_id) const
+{
+    return transaction->get_content_type(src_id);
+}
+
 const std::string& HttpTransactionEndEvent::get_proxied() const
 {
     if (proxies != nullptr)
@@ -163,9 +168,9 @@ const std::string& HttpTransactionEndEvent::get_proxied() const
         if (val.length() > 0)
         {
             if (!proxies->empty())
-                proxies->append(",");
+                proxies->append(" ");
             proxies->append(hdr.second);
-            proxies->append(" -> ");
+            proxies->append("->");
             proxies->append((const char*)val.start(), val.length());
         }
     }
index 7fda6f0dd975afae754d89b1b17c260ba5c706a6..03933b239da33d0b85d968797bb85a33c7a1670f 100644 (file)
@@ -56,6 +56,7 @@ public:
     uint8_t get_info_code() const;
     const Field& get_info_msg() const;
     const std::string& get_filename(HttpCommon::SourceId) const;
+    const std::string& get_content_type(HttpCommon::SourceId) const;
     const std::string& get_proxied() const;
 
 private:
index 8762cc6fb302c016e99b0652ebd509d68d563ea4..df28861b15a803a9eaaf5c1e765b5d0c6437842b 100644 (file)
@@ -192,8 +192,8 @@ TEST(pub_sub_http_transaction_end_event_test, proxied_str_exists)
     HttpMsgHeader* hdr = new HttpMsgHeader((uint8_t*)buf, sizeof(buf), flow_data, SRC_CLIENT, false, flow, &params);
     trans->set_header(hdr, SRC_CLIENT);
     HttpTransactionEndEvent event(trans);
-    const std::string result = "FORWARDED -> odd,X-FORWARDED-FOR -> odd,X-FORWARDED-FROM -> odd,"
-        "CLIENT-IP -> odd,VIA -> odd,XROXY-CONNECTION -> odd,PROXY-CONNECTION -> odd";
+    const std::string result = "FORWARDED->odd X-FORWARDED-FOR->odd X-FORWARDED-FROM->odd "
+        "CLIENT-IP->odd VIA->odd XROXY-CONNECTION->odd PROXY-CONNECTION->odd";
     test_number = 1;
     std::string proxied = event.get_proxied();
     CHECK(proxied == result);
index e7c80933ef1736760f9e2ae7dd5f48c37256768e..6ed1f03b8dcdc3151488897ef20af19b1c16173a 100644 (file)
@@ -199,10 +199,11 @@ void HttpMsgBody::analyze()
         {
             // After process_mime_data(), ptr will point to the last byte processed in the current MIME part
             ptr = session_data->mime_state[source_id]->process_mime_data(p, ptr, 
-                (section_end - ptr), true, SNORT_FILE_POSITION_UNKNOWN);
+                (section_end - ptr), true, SNORT_FILE_POSITION_UNKNOWN, &latest_attachment);
             ptr++;
 
-            latest_attachment = session_data->mime_state[source_id]->get_attachment();
+            if (latest_attachment.started)
+                transaction->add_filename(source_id, latest_attachment.filename, latest_attachment.content_type);
 
             if (!latest_attachment.data)
             {
@@ -697,10 +698,14 @@ void HttpMsgBody::do_file_processing(const Field& file_data)
     
     const uint8_t* filename_buffer = nullptr;
     uint32_t filename_length = 0;
+    const uint8_t* filetype_buffer = nullptr;
+    uint32_t filetype_length = 0;
     const uint8_t* uri_buffer = nullptr;
     uint32_t uri_length = 0;
+
     if (request != nullptr)
-        get_file_info(dir, filename_buffer, filename_length, uri_buffer, uri_length);
+        get_file_info(dir, filename_buffer, filename_length, filetype_buffer, filetype_length,
+            uri_buffer, uri_length);
 
     // Get host from the uri.
     if (host.empty() and request != nullptr)
@@ -723,7 +728,8 @@ void HttpMsgBody::do_file_processing(const Field& file_data)
                     filename_length, 0,
                     get_header(source_id)->get_multi_file_processing_id(), uri_buffer,
                     uri_length);
-                transaction->set_filename(source_id, (const char*) filename_buffer, filename_length);
+                transaction->add_filename(source_id, (const char*) filename_buffer, filename_length,
+                    (const char*) filetype_buffer, filetype_length);
             }
         }
     }
@@ -808,7 +814,8 @@ void HttpMsgBody::clear()
 // query or fragment. For the uri, use the request raw uri. If there is no URI or nothing in the
 // path after the last slash, the filename and uri buffers may be empty. The normalized URI is used.
 void HttpMsgBody::get_file_info(FileDirection dir, const uint8_t*& filename_buffer,
-    uint32_t& filename_length, const uint8_t*& uri_buffer, uint32_t& uri_length)
+    uint32_t& filename_length, const uint8_t*& filetype_buffer, uint32_t& filetype_length,
+    const uint8_t*& uri_buffer, uint32_t& uri_length)
 {
     filename_buffer = uri_buffer = nullptr;
     filename_length = uri_length = 0;
@@ -825,6 +832,15 @@ void HttpMsgBody::get_file_info(FileDirection dir, const uint8_t*& filename_buff
         }
     }
 
+    const Field& filetype = get_header(source_id)->get_header_value_norm(HEAD_CONTENT_TYPE);
+    if (filetype.length() > 0)
+    {
+        filetype_buffer = filetype.start();
+        filetype_length = filetype.length();
+        if (filetype_buffer[filetype_length - 1] == ';')
+            filetype_length--;
+    }
+
     if (http_uri)
     {
         const Field& uri_field = http_uri->get_norm_classic();
index 17cd366d5937aec8c0969235862befe4a26a15b7..7c55c76527d2d740abfca2769a3d060a18e08eab 100644 (file)
@@ -82,7 +82,8 @@ private:
         uint8_t*& partial_detect_buffer, uint32_t& partial_js_detect_length,
         int32_t detect_length);
     void get_file_info( FileDirection dir, const uint8_t*& filename_buffer,
-        uint32_t& filename_length, const uint8_t*& uri_buffer, uint32_t& uri_length);
+        uint32_t& filename_length, const uint8_t*& filetype_buffer, uint32_t& filetype_length,
+        const uint8_t*& uri_buffer, uint32_t& uri_length);
     void get_ole_data();
 
     Field msg_text_new;
index 3846065eb69a0bd370da742d39a94e8a46a6b733..8b79583d3deee1e38bba7cf1096f62b74d392b24 100644 (file)
@@ -347,3 +347,32 @@ const Field& HttpTransaction::get_info_msg() const
 
      return Field::FIELD_NULL;
 }
+
+void HttpTransaction::append_separator_if_needed(HttpCommon::SourceId source_id)
+{
+    static const std::string separator = " ";
+
+    if (!filename[source_id].empty())
+    {
+        filename[source_id].append(separator);
+        content_type[source_id].append(separator);
+    }
+}
+
+void HttpTransaction::add_filename(HttpCommon::SourceId source_id, const char* fname, uint32_t len,
+    const char* ftype, uint32_t tlen)
+{
+    append_separator_if_needed(source_id);
+    if (len)
+        filename[source_id].append(fname, len);
+    if (tlen)
+        content_type[source_id].append(ftype, tlen);
+}
+
+void HttpTransaction::add_filename(HttpCommon::SourceId source_id, const std::string& fname,
+    const std::string& ftype)
+{
+    append_separator_if_needed(source_id);
+    filename[source_id].append(fname);
+    content_type[source_id].append(ftype);
+}
index d64a6e817544edf1c81c03b694470b8872f4d2e8..25a2ec98746a847c3a6f4dd38da252448499c16c 100644 (file)
@@ -49,12 +49,12 @@ public:
 
     HttpMsgHeader* get_header(HttpCommon::SourceId source_id) const { return header[source_id]; }
     void set_header(HttpMsgHeader* header_, HttpCommon::SourceId source_id)
-        { header[source_id] = header_; }
+    { header[source_id] = header_; }
 
     HttpMsgTrailer* get_trailer(HttpCommon::SourceId source_id) const
-        { return trailer[source_id]; }
+    { return trailer[source_id]; }
     void set_trailer(HttpMsgTrailer* trailer_, HttpCommon::SourceId source_id)
-        { trailer[source_id] = trailer_; }
+    { trailer[source_id] = trailer_; }
     void set_body(HttpMsgBody* latest_body);
 
     HttpInfractions* get_infractions(HttpCommon::SourceId);
@@ -63,15 +63,19 @@ public:
     bool final_response() const { return !second_response_expected; }
 
     void add_body_len(HttpCommon::SourceId source_id, uint64_t len)
-        { body_len[source_id] += len; }
+    { body_len[source_id] += len; }
     uint64_t get_body_len(HttpCommon::SourceId source_id) const
-        { return body_len[source_id]; }
+    { return body_len[source_id]; }
     uint8_t get_info_code() const;
     const Field& get_info_msg() const;
-    void set_filename(HttpCommon::SourceId source_id, const char* fname, uint32_t len)
-        { filename[source_id].assign(fname, len);}
+    void add_filename(HttpCommon::SourceId source_id, const char* fname, uint32_t flen,
+        const char* ftype, uint32_t tlen);
+    void add_filename(HttpCommon::SourceId source_id,
+        const std::string& fname, const std::string& ftype);
     const std::string& get_filename(HttpCommon::SourceId source_id) const
-        { return filename[source_id]; }
+    { return filename[source_id]; }
+    const std::string& get_content_type(HttpCommon::SourceId source_id) const
+    { return content_type[source_id]; }
   
     void clear_section();
     bool is_clear() const { return active_sections == 0; }
@@ -85,6 +89,7 @@ private:
     void archive_status(HttpMsgStatus*);
     void archive_header(HttpMsgHeader*);
     void publish_end_of_transaction();
+    void append_separator_if_needed(HttpCommon::SourceId);
 
     HttpFlowData* const session_data;
 
@@ -112,7 +117,8 @@ private:
     snort::Flow* const flow;
 
     uint64_t body_len[2] = { 0, 0 };
-    std::string filename[2]; 
+    std::string filename[2];
+    std::string content_type[2];
 
     // Estimates of how much memory http_inspect uses to process a transaction
     static const uint16_t small_things = 400; // minor memory costs not otherwise accounted for