From: Oleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) Date: Tue, 22 Aug 2023 15:05:49 +0000 (+0000) Subject: Pull request #3961: HTTP mime boundary X-Git-Tag: 3.1.69.0~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=14dd2d708a08f5b9272be93f95dacc17b1659fa3;p=thirdparty%2Fsnort3.git Pull request #3961: HTTP mime boundary Merge in SNORT/snort3 from ~OSHUMEIK/snort3:http_mime_boundary to master Squashed commit of the following: commit 3ab0ced3e66e7f16da26e2ada1340b34d4f10897 Author: Oleksii Shumeiko Date: Fri Aug 4 15:49:38 2023 +0300 mime: postpone boundary-look-alike data till the next PDU arrives Works only if file position is unknown (http_inspect). commit 154e2cc8d636004796761f64f8ec515bbb0a9e5b Author: Oleksii Shumeiko Date: Thu Aug 3 21:02:24 2023 +0300 mime: support transport padding in boundary strings transport-padding := *LWSP-char In encapsulation as "dash-boundary transport-padding CRLF". In multipart-body as "delimiter transport-padding CRLF". commit 70d077a012bc79348017bd984f955c2b3ae3caec Author: Oleksii Shumeiko Date: Wed Aug 2 15:41:30 2023 +0300 mime: fix boundary search In multi-part body the delimiter starts with CRLF and then boundary sequence goes. The first boundary may go without CRLF. However, scanning_boundary still ignores CRLF as they frequently occur in the file body. commit 0e07d0a7c584633d6267f7df6283c4fa53f49d31 Author: Oleksii Shumeiko Date: Wed Jul 26 14:52:29 2023 +0300 http_inspect: adjust formatting --- diff --git a/src/mime/file_mime_paf.cc b/src/mime/file_mime_paf.cc index c70bac113..ea8443b1b 100644 --- a/src/mime/file_mime_paf.cc +++ b/src/mime/file_mime_paf.cc @@ -97,12 +97,40 @@ static inline bool store_boundary(MimeDataPafInfo* data_info, uint8_t val) /* check the boundary string in the mail body*/ static inline bool check_boundary(MimeDataPafInfo* data_info, uint8_t data) { - /* Search for boundary signature "--"*/ + const auto prev_state = data_info->boundary_state; + + /* Search for boundary signature "{CRLF}--"*/ switch (data_info->boundary_state) { case MIME_PAF_BOUNDARY_UNKNOWN: + if (data == '\r') + data_info->boundary_state = MIME_PAF_BOUNDARY_CR; + else if (data == '\n') + data_info->boundary_state = MIME_PAF_BOUNDARY_LF; + else if (data == '-' && data_info->data_state == MIME_PAF_FOUND_FIRST_BOUNDARY_STATE) + data_info->boundary_state = MIME_PAF_BOUNDARY_HYPEN_FIRST; + else + return false; + break; + + case MIME_PAF_BOUNDARY_CR: + if (data == '\n') + data_info->boundary_state = MIME_PAF_BOUNDARY_LF; + else if (data == '\r') + data_info->boundary_state = MIME_PAF_BOUNDARY_CR; + else + data_info->boundary_state = MIME_PAF_BOUNDARY_UNKNOWN; + break; + + case MIME_PAF_BOUNDARY_LF: if (data == '-') data_info->boundary_state = MIME_PAF_BOUNDARY_HYPEN_FIRST; + else if (data == '\r') + data_info->boundary_state = MIME_PAF_BOUNDARY_CR; + else if (data == '\n') + data_info->boundary_state = MIME_PAF_BOUNDARY_LF; + else + data_info->boundary_state = MIME_PAF_BOUNDARY_UNKNOWN; break; case MIME_PAF_BOUNDARY_HYPEN_FIRST: @@ -111,6 +139,10 @@ static inline bool check_boundary(MimeDataPafInfo* data_info, uint8_t data) data_info->boundary_state = MIME_PAF_BOUNDARY_HYPEN_SECOND; data_info->boundary_search = data_info->boundary; } + else if (data == '\r') + data_info->boundary_state = MIME_PAF_BOUNDARY_CR; + else if (data == '\n') + data_info->boundary_state = MIME_PAF_BOUNDARY_LF; else data_info->boundary_state = MIME_PAF_BOUNDARY_UNKNOWN; break; @@ -122,21 +154,31 @@ static inline bool check_boundary(MimeDataPafInfo* data_info, uint8_t data) if (data == '\n') { /*reset boundary search etc.*/ + data_info->boundary_search_len += 1; data_info->boundary_state = MIME_PAF_BOUNDARY_UNKNOWN; return true; } - else if ((data != '\r') && ((data != '-'))) + else if (data != '\r' && data != '-' && data != ' ' && data != '\t') data_info->boundary_state = MIME_PAF_BOUNDARY_UNKNOWN; } else if (*(data_info->boundary_search) == data) data_info->boundary_search++; - else if (data == '-') - data_info->boundary_state = MIME_PAF_BOUNDARY_HYPEN_FIRST; + else if (data == '\r') + data_info->boundary_state = MIME_PAF_BOUNDARY_CR; + else if (data == '\n') + data_info->boundary_state = MIME_PAF_BOUNDARY_LF; else data_info->boundary_state = MIME_PAF_BOUNDARY_UNKNOWN; break; } + if (MIME_PAF_BOUNDARY_UNKNOWN == data_info->boundary_state) + data_info->boundary_search_len = 0; + else if (prev_state >= data_info->boundary_state && prev_state != MIME_PAF_BOUNDARY_HYPEN_SECOND) + data_info->boundary_search_len = 1; + else + data_info->boundary_search_len += 1; + return false; } @@ -145,6 +187,7 @@ namespace snort void reset_mime_paf_state(MimeDataPafInfo* data_info) { data_info->boundary_search = nullptr; + data_info->boundary_search_len = 0; data_info->boundary_len = 0; data_info->boundary[0] = '\0'; data_info->boundary_state = MIME_PAF_BOUNDARY_UNKNOWN; @@ -157,18 +200,17 @@ bool process_mime_paf_data(MimeDataPafInfo* data_info, uint8_t data) switch (data_info->data_state) { case MIME_PAF_FINDING_BOUNDARY_STATE: - /* Search for boundary Store boundary string in PAF state*/ if (store_boundary(data_info, data)) { - /* End of boundary, move to MIME_PAF_FOUND_BOUNDARY_STATE*/ - data_info->data_state = MIME_PAF_FOUND_BOUNDARY_STATE; + data_info->data_state = MIME_PAF_FOUND_FIRST_BOUNDARY_STATE; } break; + case MIME_PAF_FOUND_FIRST_BOUNDARY_STATE: case MIME_PAF_FOUND_BOUNDARY_STATE: if (check_boundary(data_info, data)) { - /* End of boundary, move to MIME_PAF_FOUND_BOUNDARY_STATE*/ + data_info->data_state = MIME_PAF_FOUND_BOUNDARY_STATE; return true; } break; diff --git a/src/mime/file_mime_paf.h b/src/mime/file_mime_paf.h index 13a87c803..7d215af83 100644 --- a/src/mime/file_mime_paf.h +++ b/src/mime/file_mime_paf.h @@ -30,6 +30,7 @@ enum MimeDataState { MIME_PAF_FINDING_BOUNDARY_STATE, + MIME_PAF_FOUND_FIRST_BOUNDARY_STATE, MIME_PAF_FOUND_BOUNDARY_STATE }; @@ -37,6 +38,8 @@ enum MimeDataState enum MimeBoundaryState { MIME_PAF_BOUNDARY_UNKNOWN = 0, /* UNKNOWN */ + MIME_PAF_BOUNDARY_CR, /* '\r' */ + MIME_PAF_BOUNDARY_LF, /* '\n' */ MIME_PAF_BOUNDARY_HYPEN_FIRST, /* First '-' */ MIME_PAF_BOUNDARY_HYPEN_SECOND /* Second '-' */ }; @@ -59,15 +62,16 @@ struct MimeDataPafInfo MimeDataState data_state; char boundary[ MAX_MIME_BOUNDARY_LEN + 1]; /* MIME boundary string + '\0' */ int boundary_len; + int boundary_search_len; const char* boundary_search; MimeBoundaryState boundary_state; }; inline bool scanning_boundary(MimeDataPafInfo* mime_info, uint32_t boundary_start, uint32_t* fp) { - if (boundary_start && - mime_info->data_state == MIME_PAF_FOUND_BOUNDARY_STATE && - mime_info->boundary_state != MIME_PAF_BOUNDARY_UNKNOWN) + if (boundary_start + && mime_info->data_state != MIME_PAF_FINDING_BOUNDARY_STATE + && mime_info->boundary_state >= MIME_PAF_BOUNDARY_HYPEN_FIRST) { *fp = boundary_start; return true; diff --git a/src/mime/file_mime_process.cc b/src/mime/file_mime_process.cc index 31e4ba436..fe99fe386 100644 --- a/src/mime/file_mime_process.cc +++ b/src/mime/file_mime_process.cc @@ -233,6 +233,9 @@ const uint8_t* MimeSession::process_mime_header(Packet* p, const uint8_t* ptr, if (!cont) { data_state = STATE_DATA_BODY; + delete[] partial_data; + partial_data = nullptr; + partial_data_len = 0; } return ptr; } @@ -437,42 +440,43 @@ static const uint8_t* GetDataEnd(const uint8_t* data_start, return data_end_marker; } -/* - * Handle DATA_BODY state - * @param packet standard Packet structure - * @param i index into p->payload buffer to start looking at data - * @return i index into p->payload where we stopped looking at data - */ const uint8_t* MimeSession::process_mime_body(const uint8_t* ptr, - const uint8_t* data_end, bool is_body_end) + const uint8_t* data_end, FilePosition position) { - if (state_flags & MIME_FLAG_FILE_ATTACH) + auto data_size = data_end - ptr; + + if (partial_data && mime_boundary.boundary_search_len < data_size) { - const uint8_t* attach_start = ptr; - const uint8_t* attach_end; + delete[] rebuilt_data; + rebuilt_data = new uint8_t[partial_data_len + data_size]; + memcpy(rebuilt_data, partial_data, partial_data_len); + memcpy(rebuilt_data + partial_data_len, ptr, data_size); - if (is_body_end ) - { - attach_end = GetDataEnd(ptr, data_end); - } - else - { - attach_end = data_end; - } + ptr = rebuilt_data; + data_size = partial_data_len + data_size; - if (( attach_start < attach_end ) && decode_state) - { - decode_state->finalize_decoder(mime_stats); - if (decode_state->decode_data(attach_start, attach_end) == DECODE_FAIL ) - { - decode_alert(); - } - } + delete[] partial_data; + partial_data = nullptr; + partial_data_len = 0; + } + + const uint8_t* attach_end = isFileEnd(position) && mime_boundary.boundary_search_len < data_size + ? GetDataEnd(ptr, ptr + data_size) : ptr + data_size - mime_boundary.boundary_search_len; + + if (!isFileEnd(position) + && mime_boundary.boundary_search_len && mime_boundary.boundary_state != MIME_PAF_BOUNDARY_UNKNOWN) + { + delete[] partial_data; + partial_data_len = mime_boundary.boundary_search_len; + partial_data = new uint8_t[partial_data_len]; + memcpy(partial_data, attach_end, partial_data_len); } - if (is_body_end) + if (ptr < attach_end && decode_state) { - data_state = STATE_MIME_HEADER; + decode_state->finalize_decoder(mime_stats); + if (decode_state->decode_data(ptr, attach_end) == DECODE_FAIL) + decode_alert(); } return data_end; @@ -556,11 +560,17 @@ const uint8_t* MimeSession::process_mime_data_paf( case STATE_MIME_HEADER: start = process_mime_header(p, start, end); break; + case STATE_DATA_BODY: - start = process_mime_body(start, end, isFileEnd(position) ); + if (isFileEnd(position)) + data_state = STATE_MIME_HEADER; - if (state_flags & MIME_FLAG_FILE_ATTACH) + if (!(state_flags & MIME_FLAG_FILE_ATTACH)) + start = end; + else { + start = process_mime_body(start, end, position); + const DecodeConfig* conf = decode_conf; const uint8_t* buffer = nullptr; uint32_t buf_size = 0; @@ -574,11 +584,10 @@ const uint8_t* MimeSession::process_mime_data_paf( detection_size = (uint32_t)decode_state->get_detection_depth(); - DecodeResult result = decode_state->decompress_data( - buffer, detection_size, decomp_buffer, decomp_buf_size - ); + DecodeResult result = + decode_state->decompress_data(buffer, detection_size, decomp_buffer, decomp_buf_size); - if ( result != DECODE_SUCCESS ) + if (result != DECODE_SUCCESS) decompress_alert(); if (session_base_file_id) @@ -639,9 +648,15 @@ void MimeSession::reset_part_state() { state_flags = 0; filename_state = CONT_DISP_FILENAME_PARAM_NAME; + delete[] partial_header; partial_header = nullptr; partial_header_len = 0; + + delete[] partial_data; + partial_data = nullptr; + partial_data_len = 0; + if (decode_state) { decode_state->clear_decode_state(); @@ -690,7 +705,6 @@ const uint8_t* MimeSession::process_mime_data(Packet* p, const uint8_t* start, process_mime_data_paf(p, attach_start, attach_end, upload, position); data_state = STATE_MIME_HEADER; - position = SNORT_FILE_START; return attach_end; } @@ -856,6 +870,8 @@ MimeSession::~MimeSession() { delete decode_state; delete[] partial_header; + delete[] partial_data; + delete[] rebuilt_data; } // File verdicts get cached with key (file_id, sip, dip). File_id is hash of filename if available. diff --git a/src/mime/file_mime_process.h b/src/mime/file_mime_process.h index ffc63eef5..bb822b25a 100644 --- a/src/mime/file_mime_process.h +++ b/src/mime/file_mime_process.h @@ -133,15 +133,18 @@ private: const uint8_t* process_mime_header(Packet*, const uint8_t* ptr, const uint8_t* data_end_marker); bool process_header_line(const uint8_t*& ptr, const uint8_t* eol, const uint8_t* eolm, const uint8_t* start_hdr, Packet* p); - const uint8_t* process_mime_body(const uint8_t* ptr, const uint8_t* data_end,bool is_data_end); + const uint8_t* process_mime_body(const uint8_t* ptr, const uint8_t* data_end, FilePosition); const uint8_t* process_mime_data_paf(Packet*, const uint8_t* start, const uint8_t* end, bool upload, FilePosition); int extract_file_name(const char*& start, int length); - uint8_t* partial_header = nullptr; + uint8_t* partial_header = nullptr; // single header line split into multiple sections uint32_t partial_header_len = 0; + uint8_t* partial_data = nullptr; // attachment's trailing bytes (suspected boundary) + uint32_t partial_data_len = 0; + uint8_t* rebuilt_data = nullptr; // prepended attachment data for detection module - AttachmentBuffer attachment; + AttachmentBuffer attachment; // decoded and uncompressed file body }; } #endif diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc index 6ead8ac42..21e3df583 100644 --- a/src/service_inspectors/http_inspect/http_msg_body.cc +++ b/src/service_inspectors/http_inspect/http_msg_body.cc @@ -202,40 +202,44 @@ void HttpMsgBody::analyze() ptr++; latest_attachment = session_data->mime_state[source_id]->get_attachment(); - if (latest_attachment.data != nullptr) + + if (!latest_attachment.data) { - uint32_t attach_length; - uint8_t* attach_buf; - if (!last_attachment_complete) - { - assert(!mime_bufs->empty()); - // Remove the partial attachment from the list and replace it with an extended version - const uint8_t* const old_buf = mime_bufs->back().file.start(); - const uint32_t old_length = mime_bufs->back().file.length(); - attach_length = old_length + latest_attachment.length; - attach_buf = new uint8_t[attach_length]; - memcpy(attach_buf, old_buf, old_length); - memcpy(attach_buf + old_length, latest_attachment.data, latest_attachment.length); - mime_bufs->pop_back(); - } - else - { - attach_length = latest_attachment.length; - attach_buf = new uint8_t[attach_length]; - memcpy(attach_buf, latest_attachment.data, latest_attachment.length); - } - const BufferData& vba_buf = session_data->mime_state[source_id]->get_ole_buf(); - if (vba_buf.data_ptr() != nullptr) - { - uint8_t* my_vba_buf = new uint8_t[vba_buf.length()]; - memcpy(my_vba_buf, vba_buf.data_ptr(), vba_buf.length()); - mime_bufs->emplace_back(attach_length, attach_buf, true, vba_buf.length(), my_vba_buf, true); - } - else - mime_bufs->emplace_back(attach_length, attach_buf, true, STAT_NOT_PRESENT, nullptr, false); + last_attachment_complete = latest_attachment.finished; + continue; + } - mime_bufs->back().file.set_accumulation(!last_attachment_complete); + uint32_t attach_length; + uint8_t* attach_buf; + if (!last_attachment_complete) + { + assert(!mime_bufs->empty()); + // Remove the partial attachment from the list and replace it with an extended version + const uint8_t* const old_buf = mime_bufs->back().file.start(); + const uint32_t old_length = mime_bufs->back().file.length(); + attach_length = old_length + latest_attachment.length; + attach_buf = new uint8_t[attach_length]; + memcpy(attach_buf, old_buf, old_length); + memcpy(attach_buf + old_length, latest_attachment.data, latest_attachment.length); + mime_bufs->pop_back(); + } + else + { + attach_length = latest_attachment.length; + attach_buf = new uint8_t[attach_length]; + memcpy(attach_buf, latest_attachment.data, latest_attachment.length); } + const BufferData& vba_buf = session_data->mime_state[source_id]->get_ole_buf(); + if (vba_buf.data_ptr() != nullptr) + { + uint8_t* my_vba_buf = new uint8_t[vba_buf.length()]; + memcpy(my_vba_buf, vba_buf.data_ptr(), vba_buf.length()); + mime_bufs->emplace_back(attach_length, attach_buf, true, vba_buf.length(), my_vba_buf, true); + } + else + mime_bufs->emplace_back(attach_length, attach_buf, true, STAT_NOT_PRESENT, nullptr, false); + + mime_bufs->back().file.set_accumulation(!last_attachment_complete); last_attachment_complete = latest_attachment.finished; }