From: Adrian Mamolea (admamole) Date: Sat, 8 Feb 2025 13:34:46 +0000 (+0000) Subject: Pull request #4537: http_inspect: save mime filenames in transaction X-Git-Tag: 3.7.1.0~31 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1498524bb5c1c5e4f508d08a659d800d8a86e9f5;p=thirdparty%2Fsnort3.git Pull request #4537: http_inspect: save mime filenames in transaction Merge in SNORT/snort3 from ~ADMAMOLE/snort3:file_name to master Squashed commit of the following: commit 5dd25eff54a4eae0be022c27c6b64156ddc62774 Author: Adrian Mamolea Date: Wed Dec 4 16:57:33 2024 -0500 extractor: add support for file name and type for mime --- diff --git a/doc/user/extractor.txt b/doc/user/extractor.txt index a05384945..8da1ad8ed 100644 --- a/doc/user/extractor.txt +++ b/doc/user/extractor.txt @@ -71,6 +71,15 @@ Fields supported for HTTP: * `status_code` - status code returned by server * `status_msg` - status message returned by server * `trans_depth` - number of request-response pairs seen in the session +* `request_body_len` - length of the body, decompressed and normalized, of the HTTP request +* `response_body_len` - length of the body, decompressed and normalized, of the HTTP response +* `info_code` - last informational status code returned by the server +* `info_msg` - last informational reason phrase returned by the server +* `proxied` - list with the headers associated with proxied requests +* `orig_filenames` - list with the names of the files sent by client +* `resp_filenames` - list with the names of the files sent by server +* `orig_mime_types` - list with the content types of the files sent by client +* `resp_mime_types` - list with the content types of the files sent by server Fields supported for FTP: diff --git a/src/mime/file_mime_process.cc b/src/mime/file_mime_process.cc index 15b49d13a..f9747bbf0 100644 --- a/src/mime/file_mime_process.cc +++ b/src/mime/file_mime_process.cc @@ -374,7 +374,10 @@ bool MimeSession::process_header_line(const uint8_t*& ptr, const uint8_t* eol, c { setup_attachment_processing(); } - // We don't need the value, so it doesn't matter if we're folding + + int len = extract_content_type((const char*&)header_value_ptr, header_value_len); + if (len > 0) + content_type.assign((const char*)header_value_ptr, len); state_flags &= ~MIME_FLAG_IN_CONTENT_TYPE; } else if (state_flags & MIME_FLAG_IN_CONT_TRANS_ENC) @@ -493,7 +496,8 @@ void MimeSession::reset_mime_state() } const uint8_t* MimeSession::process_mime_data_paf( - Packet* p, const uint8_t* start, const uint8_t* end, bool upload, FilePosition position) + Packet* p, const uint8_t* start, const uint8_t* end, bool upload, FilePosition position, + AttachmentBuffer* attachment) { Flow* flow = p->flow; bool done_data = is_end_of_data(flow); @@ -594,9 +598,19 @@ const uint8_t* MimeSession::process_mime_data_paf( else set_file_data(decomp_buffer, decomp_buf_size, file_counter); - attachment.data = decomp_buffer; - attachment.length = decomp_buf_size; - attachment.finished = isFileEnd(position); + if (attachment) + { + attachment->data = decomp_buffer; + attachment->length = decomp_buf_size; + attachment->finished = isFileEnd(position); + + attachment->started = isFileStart(position); + if (attachment->started) + { + attachment->filename = filename; + attachment->content_type = content_type; + } + } } // Process file type/file signature @@ -664,6 +678,7 @@ void MimeSession::reset_part_state() // Clear MIME's file data to prepare for next file filename.clear(); + content_type.clear(); file_counter++; file_offset = 0; current_file_cache_file_id = 0; @@ -674,21 +689,20 @@ void MimeSession::reset_part_state() // Main function for mime processing // This should be called when mime data is available const uint8_t* MimeSession::process_mime_data(Packet* p, const uint8_t* start, - int data_size, bool upload, FilePosition position) + int data_size, bool upload, FilePosition position, AttachmentBuffer* attachment) { const uint8_t* attach_start = start; const uint8_t* attach_end; const uint8_t* data_end_marker = start + data_size; - attachment.data = nullptr; - attachment.length = 0; - attachment.finished = true; + if (attachment) + attachment->clear(); if (position != SNORT_FILE_POSITION_UNKNOWN) { process_mime_data_paf(p, attach_start, data_end_marker, - upload, position); + upload, position, attachment); return data_end_marker; } @@ -702,7 +716,7 @@ const uint8_t* MimeSession::process_mime_data(Packet* p, const uint8_t* start, attach_end = start; finalFilePosition(&position); process_mime_data_paf(p, attach_start, attach_end, - upload, position); + upload, position, attachment); data_state = STATE_MIME_HEADER; return attach_end; } @@ -714,7 +728,7 @@ const uint8_t* MimeSession::process_mime_data(Packet* p, const uint8_t* start, { updateFilePosition(&position, file_offset); process_mime_data_paf(p, attach_start, data_end_marker, - upload, position); + upload, position, attachment); } return data_end_marker; @@ -827,6 +841,24 @@ int MimeSession::extract_file_name(const char*& start, int length) return -1; } +int MimeSession::extract_content_type(const char*& start, uint32_t length) +{ + assert(start); + + const char* tmp = start; + const char* end = start + length; + + while (tmp < end and isspace(*tmp)) + tmp++; + start = tmp; + + while (tmp < end and *tmp != ';' and !isspace(*tmp)) + tmp++; + end = tmp; + + return (end - start); +} + /* * This is the initialization function for mime processing. * This should be called when snort initializes diff --git a/src/mime/file_mime_process.h b/src/mime/file_mime_process.h index 3bb4850cf..554fe70e5 100644 --- a/src/mime/file_mime_process.h +++ b/src/mime/file_mime_process.h @@ -62,6 +62,8 @@ enum FilenameState class SO_PUBLIC MimeSession { public: + struct AttachmentBuffer; + MimeSession(Packet*, const DecodeConfig*, MailLogConfig*, uint64_t base_file_id=0, const uint8_t* uri=nullptr, const int32_t uri_length=0); virtual ~MimeSession(); @@ -73,7 +75,7 @@ public: static void exit(); const uint8_t* process_mime_data(Packet*, const uint8_t *data, int data_size, - bool upload, FilePosition); + bool upload, FilePosition, AttachmentBuffer* attachment = nullptr); int get_data_state(); void set_data_state(int); @@ -86,15 +88,6 @@ public: const BufferData& get_ole_buf(); const BufferData& get_vba_inspect_buf(); - struct AttachmentBuffer - { - const uint8_t* data = nullptr; - uint32_t length = 0; - bool finished = true; - }; - - const AttachmentBuffer get_attachment() { return attachment; } - protected: MimeDecode* decode_state = nullptr; @@ -108,6 +101,7 @@ private: MimeStats* mime_stats = nullptr; FilenameState filename_state = CONT_DISP_FILENAME_PARAM_NAME; std::string filename; + std::string content_type; std::string host_name {""}; bool host_set = false; bool continue_inspecting_file = true; @@ -139,16 +133,34 @@ private: uint8_t* start_hdr, Packet* p); const uint8_t* process_mime_body(const uint8_t* ptr, const uint8_t* data_end, FilePosition); const uint8_t* process_mime_data_paf(Packet*, const uint8_t* start, const uint8_t* end, - bool upload, FilePosition); + bool upload, FilePosition, AttachmentBuffer* attachment); int extract_file_name(const char*& start, int length); + int extract_content_type(const char*& start, uint32_t length); + uint8_t* partial_header = nullptr; // single header line split into multiple sections uint32_t partial_header_len = 0; uint8_t* partial_data = nullptr; // attachment's trailing bytes (suspected boundary) uint32_t partial_data_len = 0; uint8_t* rebuilt_data = nullptr; // prepended attachment data for detection module +}; - AttachmentBuffer attachment; // decoded and uncompressed file body +struct MimeSession::AttachmentBuffer +{ + std::string filename; + std::string content_type; + const uint8_t* data = nullptr; + uint32_t length = 0; + bool started = false; + bool finished = true; + + void clear() + { + data = nullptr; + length = 0; + started = false; + finished = true; + } }; } #endif diff --git a/src/network_inspectors/extractor/extractor_http.cc b/src/network_inspectors/extractor/extractor_http.cc index fad67d08c..9eea28f69 100644 --- a/src/network_inspectors/extractor/extractor_http.cc +++ b/src/network_inspectors/extractor/extractor_http.cc @@ -125,12 +125,24 @@ static const char* get_resp_filenames(const DataEvent* event, const Flow*) return ((const HttpTransactionEndEvent*)event)->get_filename(HttpCommon::SRC_SERVER).c_str(); } +static const char* get_orig_mime_types(const DataEvent* event, const Flow*) +{ + return ((const HttpTransactionEndEvent*)event)->get_content_type(HttpCommon::SRC_CLIENT).c_str(); +} + +static const char* get_resp_mime_types(const DataEvent* event, const Flow*) +{ + return ((const HttpTransactionEndEvent*)event)->get_content_type(HttpCommon::SRC_SERVER).c_str(); +} + static const map sub_buf_getters = { {"version", get_version}, {"proxied", get_proxied}, {"orig_filenames", get_orig_filenames}, - {"resp_filenames", get_resp_filenames} + {"resp_filenames", get_resp_filenames}, + {"orig_mime_types", get_orig_mime_types}, + {"resp_mime_types", get_resp_mime_types} }; static const map sub_num_getters = diff --git a/src/network_inspectors/extractor/extractor_service.cc b/src/network_inspectors/extractor/extractor_service.cc index 19658404d..a8e32e8bf 100644 --- a/src/network_inspectors/extractor/extractor_service.cc +++ b/src/network_inspectors/extractor/extractor_service.cc @@ -249,7 +249,9 @@ const ServiceBlueprint HttpExtractorService::blueprint = "info_msg", "proxied", "orig_filenames", - "resp_filenames" + "resp_filenames", + "orig_mime_types", + "resp_mime_types" }, }; diff --git a/src/pub_sub/http_transaction_end_event.cc b/src/pub_sub/http_transaction_end_event.cc index be35661cd..5ce225b7f 100644 --- a/src/pub_sub/http_transaction_end_event.cc +++ b/src/pub_sub/http_transaction_end_event.cc @@ -140,6 +140,11 @@ const std::string& HttpTransactionEndEvent::get_filename(HttpCommon::SourceId sr return transaction->get_filename(src_id); } +const std::string& HttpTransactionEndEvent::get_content_type(HttpCommon::SourceId src_id) const +{ + return transaction->get_content_type(src_id); +} + const std::string& HttpTransactionEndEvent::get_proxied() const { if (proxies != nullptr) @@ -163,9 +168,9 @@ const std::string& HttpTransactionEndEvent::get_proxied() const if (val.length() > 0) { if (!proxies->empty()) - proxies->append(","); + proxies->append(" "); proxies->append(hdr.second); - proxies->append(" -> "); + proxies->append("->"); proxies->append((const char*)val.start(), val.length()); } } diff --git a/src/pub_sub/http_transaction_end_event.h b/src/pub_sub/http_transaction_end_event.h index 7fda6f0dd..03933b239 100644 --- a/src/pub_sub/http_transaction_end_event.h +++ b/src/pub_sub/http_transaction_end_event.h @@ -56,6 +56,7 @@ public: uint8_t get_info_code() const; const Field& get_info_msg() const; const std::string& get_filename(HttpCommon::SourceId) const; + const std::string& get_content_type(HttpCommon::SourceId) const; const std::string& get_proxied() const; private: diff --git a/src/pub_sub/test/pub_sub_http_transaction_end_event_test.cc b/src/pub_sub/test/pub_sub_http_transaction_end_event_test.cc index 8762cc6fb..df28861b1 100644 --- a/src/pub_sub/test/pub_sub_http_transaction_end_event_test.cc +++ b/src/pub_sub/test/pub_sub_http_transaction_end_event_test.cc @@ -192,8 +192,8 @@ TEST(pub_sub_http_transaction_end_event_test, proxied_str_exists) HttpMsgHeader* hdr = new HttpMsgHeader((uint8_t*)buf, sizeof(buf), flow_data, SRC_CLIENT, false, flow, ¶ms); trans->set_header(hdr, SRC_CLIENT); HttpTransactionEndEvent event(trans); - const std::string result = "FORWARDED -> odd,X-FORWARDED-FOR -> odd,X-FORWARDED-FROM -> odd," - "CLIENT-IP -> odd,VIA -> odd,XROXY-CONNECTION -> odd,PROXY-CONNECTION -> odd"; + const std::string result = "FORWARDED->odd X-FORWARDED-FOR->odd X-FORWARDED-FROM->odd " + "CLIENT-IP->odd VIA->odd XROXY-CONNECTION->odd PROXY-CONNECTION->odd"; test_number = 1; std::string proxied = event.get_proxied(); CHECK(proxied == result); diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc index e7c80933e..6ed1f03b8 100644 --- a/src/service_inspectors/http_inspect/http_msg_body.cc +++ b/src/service_inspectors/http_inspect/http_msg_body.cc @@ -199,10 +199,11 @@ void HttpMsgBody::analyze() { // After process_mime_data(), ptr will point to the last byte processed in the current MIME part ptr = session_data->mime_state[source_id]->process_mime_data(p, ptr, - (section_end - ptr), true, SNORT_FILE_POSITION_UNKNOWN); + (section_end - ptr), true, SNORT_FILE_POSITION_UNKNOWN, &latest_attachment); ptr++; - latest_attachment = session_data->mime_state[source_id]->get_attachment(); + if (latest_attachment.started) + transaction->add_filename(source_id, latest_attachment.filename, latest_attachment.content_type); if (!latest_attachment.data) { @@ -697,10 +698,14 @@ void HttpMsgBody::do_file_processing(const Field& file_data) const uint8_t* filename_buffer = nullptr; uint32_t filename_length = 0; + const uint8_t* filetype_buffer = nullptr; + uint32_t filetype_length = 0; const uint8_t* uri_buffer = nullptr; uint32_t uri_length = 0; + if (request != nullptr) - get_file_info(dir, filename_buffer, filename_length, uri_buffer, uri_length); + get_file_info(dir, filename_buffer, filename_length, filetype_buffer, filetype_length, + uri_buffer, uri_length); // Get host from the uri. if (host.empty() and request != nullptr) @@ -723,7 +728,8 @@ void HttpMsgBody::do_file_processing(const Field& file_data) filename_length, 0, get_header(source_id)->get_multi_file_processing_id(), uri_buffer, uri_length); - transaction->set_filename(source_id, (const char*) filename_buffer, filename_length); + transaction->add_filename(source_id, (const char*) filename_buffer, filename_length, + (const char*) filetype_buffer, filetype_length); } } } @@ -808,7 +814,8 @@ void HttpMsgBody::clear() // query or fragment. For the uri, use the request raw uri. If there is no URI or nothing in the // path after the last slash, the filename and uri buffers may be empty. The normalized URI is used. void HttpMsgBody::get_file_info(FileDirection dir, const uint8_t*& filename_buffer, - uint32_t& filename_length, const uint8_t*& uri_buffer, uint32_t& uri_length) + uint32_t& filename_length, const uint8_t*& filetype_buffer, uint32_t& filetype_length, + const uint8_t*& uri_buffer, uint32_t& uri_length) { filename_buffer = uri_buffer = nullptr; filename_length = uri_length = 0; @@ -825,6 +832,15 @@ void HttpMsgBody::get_file_info(FileDirection dir, const uint8_t*& filename_buff } } + const Field& filetype = get_header(source_id)->get_header_value_norm(HEAD_CONTENT_TYPE); + if (filetype.length() > 0) + { + filetype_buffer = filetype.start(); + filetype_length = filetype.length(); + if (filetype_buffer[filetype_length - 1] == ';') + filetype_length--; + } + if (http_uri) { const Field& uri_field = http_uri->get_norm_classic(); diff --git a/src/service_inspectors/http_inspect/http_msg_body.h b/src/service_inspectors/http_inspect/http_msg_body.h index 17cd366d5..7c55c7652 100644 --- a/src/service_inspectors/http_inspect/http_msg_body.h +++ b/src/service_inspectors/http_inspect/http_msg_body.h @@ -82,7 +82,8 @@ private: uint8_t*& partial_detect_buffer, uint32_t& partial_js_detect_length, int32_t detect_length); void get_file_info( FileDirection dir, const uint8_t*& filename_buffer, - uint32_t& filename_length, const uint8_t*& uri_buffer, uint32_t& uri_length); + uint32_t& filename_length, const uint8_t*& filetype_buffer, uint32_t& filetype_length, + const uint8_t*& uri_buffer, uint32_t& uri_length); void get_ole_data(); Field msg_text_new; diff --git a/src/service_inspectors/http_inspect/http_transaction.cc b/src/service_inspectors/http_inspect/http_transaction.cc index 3846065eb..8b79583d3 100644 --- a/src/service_inspectors/http_inspect/http_transaction.cc +++ b/src/service_inspectors/http_inspect/http_transaction.cc @@ -347,3 +347,32 @@ const Field& HttpTransaction::get_info_msg() const return Field::FIELD_NULL; } + +void HttpTransaction::append_separator_if_needed(HttpCommon::SourceId source_id) +{ + static const std::string separator = " "; + + if (!filename[source_id].empty()) + { + filename[source_id].append(separator); + content_type[source_id].append(separator); + } +} + +void HttpTransaction::add_filename(HttpCommon::SourceId source_id, const char* fname, uint32_t len, + const char* ftype, uint32_t tlen) +{ + append_separator_if_needed(source_id); + if (len) + filename[source_id].append(fname, len); + if (tlen) + content_type[source_id].append(ftype, tlen); +} + +void HttpTransaction::add_filename(HttpCommon::SourceId source_id, const std::string& fname, + const std::string& ftype) +{ + append_separator_if_needed(source_id); + filename[source_id].append(fname); + content_type[source_id].append(ftype); +} diff --git a/src/service_inspectors/http_inspect/http_transaction.h b/src/service_inspectors/http_inspect/http_transaction.h index d64a6e817..25a2ec987 100644 --- a/src/service_inspectors/http_inspect/http_transaction.h +++ b/src/service_inspectors/http_inspect/http_transaction.h @@ -49,12 +49,12 @@ public: HttpMsgHeader* get_header(HttpCommon::SourceId source_id) const { return header[source_id]; } void set_header(HttpMsgHeader* header_, HttpCommon::SourceId source_id) - { header[source_id] = header_; } + { header[source_id] = header_; } HttpMsgTrailer* get_trailer(HttpCommon::SourceId source_id) const - { return trailer[source_id]; } + { return trailer[source_id]; } void set_trailer(HttpMsgTrailer* trailer_, HttpCommon::SourceId source_id) - { trailer[source_id] = trailer_; } + { trailer[source_id] = trailer_; } void set_body(HttpMsgBody* latest_body); HttpInfractions* get_infractions(HttpCommon::SourceId); @@ -63,15 +63,19 @@ public: bool final_response() const { return !second_response_expected; } void add_body_len(HttpCommon::SourceId source_id, uint64_t len) - { body_len[source_id] += len; } + { body_len[source_id] += len; } uint64_t get_body_len(HttpCommon::SourceId source_id) const - { return body_len[source_id]; } + { return body_len[source_id]; } uint8_t get_info_code() const; const Field& get_info_msg() const; - void set_filename(HttpCommon::SourceId source_id, const char* fname, uint32_t len) - { filename[source_id].assign(fname, len);} + void add_filename(HttpCommon::SourceId source_id, const char* fname, uint32_t flen, + const char* ftype, uint32_t tlen); + void add_filename(HttpCommon::SourceId source_id, + const std::string& fname, const std::string& ftype); const std::string& get_filename(HttpCommon::SourceId source_id) const - { return filename[source_id]; } + { return filename[source_id]; } + const std::string& get_content_type(HttpCommon::SourceId source_id) const + { return content_type[source_id]; } void clear_section(); bool is_clear() const { return active_sections == 0; } @@ -85,6 +89,7 @@ private: void archive_status(HttpMsgStatus*); void archive_header(HttpMsgHeader*); void publish_end_of_transaction(); + void append_separator_if_needed(HttpCommon::SourceId); HttpFlowData* const session_data; @@ -112,7 +117,8 @@ private: snort::Flow* const flow; uint64_t body_len[2] = { 0, 0 }; - std::string filename[2]; + std::string filename[2]; + std::string content_type[2]; // Estimates of how much memory http_inspect uses to process a transaction static const uint16_t small_things = 400; // minor memory costs not otherwise accounted for