From: Tom Peters (thopeter) Date: Mon, 10 Oct 2022 19:00:07 +0000 (+0000) Subject: Pull request #3605: http_inspect: improved MIME processing X-Git-Tag: 3.1.45.0~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9092e33f470af0ee547e843f70a19b13719c4ccd;p=thirdparty%2Fsnort3.git Pull request #3605: http_inspect: improved MIME processing Merge in SNORT/snort3 from ~THOPETER/snort3:nhttp167 to master Squashed commit of the following: commit d383065b2a4a030102b7b8464320f68b97cf5fa7 Author: Tom Peters Date: Thu Aug 4 16:14:48 2022 -0400 http_inspect: inspect multiple MIME attachments per message section commit 084cbf53d63c61a97ed55f2e13523ab2fb249a2e Author: Tom Peters Date: Mon Jun 13 16:00:52 2022 -0400 http_inspect: MIME partial inspections --- diff --git a/doc/user/http_inspect.txt b/doc/user/http_inspect.txt index bdcb66ba0..b550f3513 100755 --- a/doc/user/http_inspect.txt +++ b/doc/user/http_inspect.txt @@ -209,6 +209,14 @@ locate the OLE (Object Linking and Embedding) file embedded with the files containing RLE compressed vba macro data. The decompressed vba macro data is then made available through the vba_data ips rule option. +===== max_mime_attach + +HTTP request message bodies may be in MIME format. Each file attachment is +separately incorporated in the file_data rule option. When a request contains +many small file attachments these inspections may consume a lot of processing +power. This parameter limits the number of files from one message that are +inspected. The default value is 5. + ===== normalize_javascript normalize_javascript = true will enable legacy normalizer of JavaScript within diff --git a/src/mime/file_mime_process.cc b/src/mime/file_mime_process.cc index 15cc19106..04f9c99b4 100644 --- a/src/mime/file_mime_process.cc +++ b/src/mime/file_mime_process.cc @@ -583,6 +583,9 @@ const uint8_t* MimeSession::process_mime_data_paf( decompress_alert(); set_file_data(decomp_buffer, decomp_buf_size); + attachment.data = decomp_buffer; + attachment.length = decomp_buf_size; + attachment.finished = isFileEnd(position); } // Process file type/file signature @@ -661,6 +664,10 @@ const uint8_t* MimeSession::process_mime_data(Packet* p, const uint8_t* start, const uint8_t* data_end_marker = start + data_size; + attachment.data = nullptr; + attachment.length = 0; + attachment.finished = true; + if (position != SNORT_FILE_POSITION_UNKNOWN) { process_mime_data_paf(p, attach_start, data_end_marker, diff --git a/src/mime/file_mime_process.h b/src/mime/file_mime_process.h index 8eca8a757..7eff92c33 100644 --- a/src/mime/file_mime_process.h +++ b/src/mime/file_mime_process.h @@ -83,6 +83,15 @@ public: const BufferData& get_ole_buf(); const BufferData& get_vba_inspect_buf(); + struct AttachmentBuffer + { + const uint8_t* data = nullptr; + uint32_t length = 0; + bool finished = true; + }; + + const AttachmentBuffer get_attachment() { return attachment; } + protected: MimeDecode* decode_state = nullptr; @@ -131,6 +140,8 @@ private: uint8_t* partial_header = nullptr; uint32_t partial_header_len = 0; + + AttachmentBuffer attachment; }; } #endif diff --git a/src/pub_sub/test/pub_sub_http_request_body_event_test.cc b/src/pub_sub/test/pub_sub_http_request_body_event_test.cc index b1e98bacd..c7e4e7ccc 100644 --- a/src/pub_sub/test/pub_sub_http_request_body_event_test.cc +++ b/src/pub_sub/test/pub_sub_http_request_body_event_test.cc @@ -58,6 +58,9 @@ void HttpMsgBody::do_file_decompression(const Field&, Field&) {} void HttpMsgBody::do_enhanced_js_normalization(const Field&, Field&) {} void HttpMsgBody::clean_partial(uint32_t&, uint32_t&, uint8_t*&, uint32_t&) {} void HttpMsgBody::bookkeeping_regular_flush(uint32_t&, uint8_t*&, uint32_t&, int32_t) {} +bool HttpMsgBody::run_detection(snort::Packet*) { return true; } +void HttpMsgBody::clear() {} +void HttpMsgSection::clear() {} #ifdef REG_TEST void HttpMsgBody::print_body_section(FILE*, const char*) {} #endif @@ -78,6 +81,7 @@ HttpMsgSection::HttpMsgSection(const uint8_t* buffer, const uint16_t buf_size, tcp_close(false) {} void HttpMsgSection::update_depth() const{} +bool HttpMsgSection::run_detection(snort::Packet*) { return true; } HttpTransaction*HttpTransaction::attach_my_transaction(HttpFlowData*, HttpCommon::SourceId) { return nullptr; } diff --git a/src/service_inspectors/http_inspect/dev_notes.txt b/src/service_inspectors/http_inspect/dev_notes.txt index ca4bed748..9714577f8 100755 --- a/src/service_inspectors/http_inspect/dev_notes.txt +++ b/src/service_inspectors/http_inspect/dev_notes.txt @@ -1,6 +1,6 @@ -The new Snort HTTP inspector (HI) is divided into two major parts. The HttpStreamSplitter -(splitter) accepts TCP payload data from Stream and subdivides it into message sections. -HttpInspect (inspector) processes individual message sections. +The HTTP inspector (HI) is divided into two major parts. The HttpStreamSplitter (splitter) accepts +TCP payload data from Stream and subdivides it into message sections. HttpInspect (inspector) +processes individual message sections. Splitter finish() is called by Stream when the TCP connection closes (including pruning). It serves several specialized purposes in cases where the HTTP message is truncated (ends @@ -15,6 +15,12 @@ Javascripts. The stream splitter scan() method searches its input for the end-of "". When necessary this requires scan() to unzip the data. This is an extra unzip as storage limitations preclude saving the unzipped version of the data for subsequent reassembly. +Update: the previous sentence has been discovered to be incorrect. The memory requirements of +zlib are very large. It would save a lot of memory and some processing time for script detection +to unzip one time in scan() and store the result for eventual use by reassemble(). The memory +lost by storing partial message sections in HI while waiting for reassemble() would be more than +compensated for by not having two instances of zlib. + When the end of a script is found and the normal flush point has not been found, the current TCP segment and all previous segments for the current message section are flushed using a special procedure known as partial inspection. From the perspective of Stream (or H2I) a partial inspection @@ -49,14 +55,20 @@ detection triggered it, because H2I wanted it, or both. Some applications may be affected by blocks too late scenarios related to seeing part of the zero-length chunk. For example a TCP packet that ends with: + 8abcdefgh0 + might be sufficient to forward the available data ("abcdefgh") to the application even though the final has not been received. + Note that the actual next bytes are uncertain here. The next packet might begin with , but + 100000ijklmnopq ... + is another perfectly legal possibility. There is no rule against starting a nonzero chunk length with a zero character and some applications reputedly do this. -As a precaution partial inspections performed when 1) a TCP segment ends inside a possible + +As a precaution partial inspection is performed when 1) a TCP segment ends inside a possible zero-length chunk or 2) chunk processing fails (broken chunk). HttpFlowData is a data class representing all HI information relating to a flow. It serves as @@ -73,10 +85,10 @@ processed together. There are eight types of message section: 4. Content-Length message body (a block of message data usually not much larger than 16K from a body defined by the Content-Length header) 5. Chunked message body (same but from a chunked body) -6. Old message body (same but from a body with no Content-Length header that runs to connection - close) +6. Old message body (same but from a response body with no Content-Length header that runs to + connection close) 7. HTTP/X message body (same but content taken from an HTTP/2 or HTTP/3 Data frame) -8. Trailers (all header lines following a chunked body as a group) +8. Trailers (all header lines following a chunked or HTTP/X body as a group) Message sections are represented by message section objects that contain and process them. There are twelve message section classes that inherit as follows. An asterisk denotes a virtual class. @@ -85,7 +97,7 @@ are twelve message section classes that inherit as follows. An asterisk denotes 2. HttpMsgStart* : HttpMsgSection - common elements of request and status 3. HttpMsgRequest : HttpMsgStart 4. HttpMsgStatus : HttpMsgStart -5. HttpMsgHeadShared* : HttpMsgSection - common elements of header and trailer +5. HttpMsgHeadShared* : HttpMsgSection - common elements of headers and trailers 6. HttpMsgHeader : HttpMsgHeadShared 7. HttpMsgTrailer : HttpMsgHeadShared 8. HttpMsgBody* : HttpMsgSection - common elements of message body processing @@ -138,6 +150,11 @@ derive it again. Once Field is set to a non-null value it should never change. The set() functions will assert if this rule is disregarded. +Partial inspections have created an exception. Fields may be used to store work products from a +partial inspection that may be updated by subsequent inspections. The reset() method has been +provided for this situation. It deletes any owned buffer and reinitializes the Field to null. +This feature should be used with care to avoid weakening the architecture. + A Field may own the buffer containing the message or it may point to a buffer that belongs to someone else. When a Field owning a buffer is deleted the buffer is deleted as well. Ownership is determined with the Field is initially set. In general any dynamically allocated buffer should be @@ -227,28 +244,34 @@ be kept intact. Any string literals, added by the plus operator, will be concatenated. This also works for functions that result in string literals. Semicolons will be inserted, if not already present, according to ECMAScript automatic semicolon insertion rules. + All JavaScript identifier names, except those from the ident_ignore or prop_ignore lists, will be substituted with unified names in the following format: var_0000 -> var_ffff. -So, the number of unique identifiers available is 65536 names per HTTP transaction. -If Normalizer overruns the configured limit, built-in alert is generated. +The number of unique identifiers available is 65536 names per HTTP transaction. If Normalizer +overruns the configured limit, built-in alert is generated. + A config option to set the limit manually: + * http_inspect.js_norm_identifier_depth. -Identifiers from the ident_ignore list will be placed as is, without substitution. Starting with +Identifiers from the ident_ignore list will be placed as is, without substitution. Starting with the listed identifier, any chain of dot accessors, brackets and function calls will be kept intact. + For example: + * console.log("bar") * document.getElementById("id").text * eval("script") * foo["bar"] -Ignored identifiers are configured via the following config option, -it accepts a list of object and function names: +Ignored identifiers are configured via the following config option that accepts a list of object +and function names: + * http_inspect.js_norm_ident_ignore = { 'console', 'document', 'eval', 'foo' } When a variable assignment that 'aliases' an identifier from the list is found, -the assignment will be tracked, and subsequent occurrences of the variable will be +the assignment will be tracked and subsequent occurrences of the variable will be replaced with the stored value. This substitution will follow JavaScript variable scope limits. @@ -264,6 +287,7 @@ list, the object will be tracked, and although its own identifier will be conver its property and function calls will be kept intact, as with ignored identifiers. For example: + var obj = new Array() obj.insert(1,2,3) // will be normalized to var_0000.insert(1,2,3) @@ -343,8 +367,8 @@ attacker cannot affect split points by adjusting their chunks. Built-in alerts for chunking are generated for protocol violations and suspicious usages. Many irregularities can be compensated for but others cannot. Whenever a fatal problem occurs, NHI generates 119:213 HTTP chunk misformatted and converts to a mode very similar to run to connection -close. The rest of the flow is sent to detection as-is. No further attempt is made to dechunk the -message body or look for the headers that begin the next message. The customer should block 119:213 +close. The rest of the flow is sent to detection as is. No further attempt is made to dechunk the +message body or look for the headers that begin the next message. The user should block 119:213 unless they are willing to run the risk of continuing with no real security. In addition to 119:213 there will often be a more specific alert based on what went wrong. @@ -397,6 +421,24 @@ generated and processing continues normally. If there is no separator at all tha Then we return to #1 as the next chunk begins. In particular extra separators beyond the two expected are attributed to the beginning of the next chunk. +MIME processing: + +NHI processes request message bodies in MIME format differently from other message bodies. Message +sections are forwarded to the MIME library instead of being directly input to file processing. The +library parses the input into individual MIME attachments. This creates a design issue because +there may be multiple attachments within a single message body section. The email inspectors solve +this issue by splitting MIME attachments within their stream splitters so that there is only one +attachment per reassembled packet. This attachment, if it contains a file, is the source material for +the file_data rule option. + +NHI stream splitter does not work this way. It does not consider MIME at all. Split points between +message sections are never based on MIME or any other type of message body content. + +The problem for NHI is that file_data is a singular entity and cannot accomodate multiple +simultaneous files derived from a message section. NHI resolves this by accumulating the processed +file attachments in a list and directly calling detection multiple times--once for each file +attachment installed as file_data. + Rule options: HttpIpsOption is the base class for http rule options. It supports the commonly used parameters: @@ -453,7 +495,7 @@ Insert commands: $fill create a paragraph consisting of octets of auto-fill data ABCDEFGHIJABC .... $fileread read the specified number of bytes from the included file into the - message buffer. Each read corresponds to one TCP section. + message buffer. $h2preface creates the HTTP/2 connection preface "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n" $h2frameheader generates an HTTP/2 frame header. The frame type may be the frame type name in all lowercase or the numeric frame type code: @@ -476,7 +518,7 @@ Escape sequences begin with '\'. They may be used within a paragraph or to begin \xnn or \Xnn - where nn is a two-digit hexadecimal number. Insert an arbitrary 8-bit number as the next character. a-f and A-F are both acceptable. -Data is separated into segments for presentation to the splitter whenever a paragraph ends (blank +Data are separated into segments for presentation to the splitter whenever a paragraph ends (blank line). When the inspector aborts the connection (scan() returns StreamSplitter::ABORT) it does not expect diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h index 487a3efd0..2d7795880 100755 --- a/src/service_inspectors/http_inspect/http_enum.h +++ b/src/service_inspectors/http_inspect/http_enum.h @@ -69,7 +69,7 @@ enum PEG_COUNT { PEG_FLOW = 0, PEG_SCAN, PEG_REASSEMBLE, PEG_INSPECT, PEG_REQUES PEG_CONCURRENT_SESSIONS, PEG_MAX_CONCURRENT_SESSIONS, PEG_SCRIPT_DETECTION, PEG_PARTIAL_INSPECT, PEG_EXCESS_PARAMS, PEG_PARAMS, PEG_CUTOVERS, PEG_SSL_SEARCH_ABND_EARLY, PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_JS_INLINE, PEG_JS_EXTERNAL, - PEG_JS_BYTES, PEG_JS_IDENTIFIER, PEG_JS_IDENTIFIER_OVERFLOW, PEG_COUNT_MAX }; + PEG_JS_BYTES, PEG_JS_IDENTIFIER, PEG_JS_IDENTIFIER_OVERFLOW, PEG_SKIP_MIME_ATTACH, PEG_COUNT_MAX }; // Result of scanning by splitter enum ScanResult { SCAN_NOT_FOUND, SCAN_NOT_FOUND_ACCELERATE, SCAN_FOUND, SCAN_FOUND_PIECE, diff --git a/src/service_inspectors/http_inspect/http_field.cc b/src/service_inspectors/http_inspect/http_field.cc index a2aaaf6ef..c52735a85 100644 --- a/src/service_inspectors/http_inspect/http_field.cc +++ b/src/service_inspectors/http_inspect/http_field.cc @@ -39,16 +39,6 @@ Field::Field(int32_t length, const uint8_t* start, bool own_the_buffer_) : assert(!((start != nullptr) && (length < 0))); } -Field& Field::operator=(const Field& rhs) -{ - assert(len == STAT_NOT_COMPUTE); - assert(strt == nullptr); - strt = rhs.strt; - len = rhs.len; - own_the_buffer = false; // buffer must not have two owners - return *this; -} - void Field::set(int32_t length, const uint8_t* start, bool own_the_buffer_) { assert(len == STAT_NOT_COMPUTE); @@ -77,6 +67,15 @@ void Field::set(const Field& f) // Both Fields cannot be responsible for deleting the buffer so do not copy own_the_buffer } +void Field::reset() +{ + if (own_the_buffer) + delete[] strt; + strt = nullptr; + len = STAT_NOT_COMPUTE; + own_the_buffer = false; +} + #ifdef REG_TEST void Field::print(FILE* output, const char* name) const { @@ -112,5 +111,6 @@ void Field::print(FILE* output, const char* name) const } fprintf(output, "\n"); } + #endif diff --git a/src/service_inspectors/http_inspect/http_field.h b/src/service_inspectors/http_inspect/http_field.h index 19b17296b..5abae6fa8 100644 --- a/src/service_inspectors/http_inspect/http_field.h +++ b/src/service_inspectors/http_inspect/http_field.h @@ -39,7 +39,10 @@ public: Field(int32_t length, const uint8_t* start, bool own_the_buffer_ = false); explicit Field(int32_t length) : len(length) { assert(length<=0); } Field() = default; - Field& operator=(const Field& rhs); + + // own_the_buffer precludes trivial copy assignment + Field& operator=(const Field& rhs) = delete; + ~Field() { if (own_the_buffer) delete[] strt; } int32_t length() const { return len; } const uint8_t* start() const { return strt; } @@ -47,6 +50,7 @@ public: void set(const Field& f); void set(HttpCommon::StatusCode stat_code); void set(int32_t length) { set(static_cast(length)); } + void reset(); #ifdef REG_TEST void print(FILE* output, const char* name) const; @@ -58,5 +62,15 @@ private: bool own_the_buffer = false; }; +struct MimeBufs +{ + Field file; + Field vba; + MimeBufs(int32_t file_len, const uint8_t* file_buf, bool file_own, int32_t vba_len, const uint8_t* vba_buf, + bool vba_own) : + file(file_len, file_buf, file_own), + vba(vba_len, vba_buf, vba_own) {} +}; + #endif diff --git a/src/service_inspectors/http_inspect/http_flow_data.cc b/src/service_inspectors/http_inspect/http_flow_data.cc index 26ff9ba42..73cc6dce5 100644 --- a/src/service_inspectors/http_inspect/http_flow_data.cc +++ b/src/service_inspectors/http_inspect/http_flow_data.cc @@ -121,6 +121,7 @@ HttpFlowData::~HttpFlowData() delete[] section_buffer[k]; delete[] partial_buffer[k]; delete[] partial_detect_buffer[k]; + delete partial_mime_bufs[k]; HttpTransaction::delete_transaction(transaction[k], nullptr); delete cutter[k]; if (compress_stream[k] != nullptr) @@ -147,6 +148,8 @@ HttpFlowData::~HttpFlowData() void HttpFlowData::half_reset(SourceId source_id) { assert((source_id == SRC_CLIENT) || (source_id == SRC_SERVER)); + assert(partial_mime_bufs[source_id] == nullptr); + assert(partial_mime_last_complete[source_id]); version_id[source_id] = VERS__NOT_PRESENT; data_length[source_id] = STAT_NOT_PRESENT; diff --git a/src/service_inspectors/http_inspect/http_flow_data.h b/src/service_inspectors/http_inspect/http_flow_data.h index aa824c7a3..e0f4a3c4a 100644 --- a/src/service_inspectors/http_inspect/http_flow_data.h +++ b/src/service_inspectors/http_inspect/http_flow_data.h @@ -23,6 +23,7 @@ #include #include +#include #include "flow/flow.h" #include "utils/util_utf.h" @@ -31,6 +32,7 @@ #include "http_common.h" #include "http_enum.h" #include "http_event.h" +#include "http_field.h" #include "http_module.h" class HttpTransaction; @@ -180,6 +182,8 @@ private: uint8_t* partial_detect_buffer[2] = { nullptr, nullptr }; uint32_t partial_detect_length[2] = { 0, 0 }; uint32_t partial_js_detect_length[2] = { 0, 0 }; + std::list* partial_mime_bufs[2] = { nullptr, nullptr }; + bool partial_mime_last_complete[2] = { true, true }; int32_t status_code_num = HttpCommon::STAT_NOT_PRESENT; HttpEnums::VersionId version_id[2] = { HttpEnums::VERS__NOT_PRESENT, HttpEnums::VERS__NOT_PRESENT }; diff --git a/src/service_inspectors/http_inspect/http_inspect.cc b/src/service_inspectors/http_inspect/http_inspect.cc index 66a9e47ff..91d1e3589 100755 --- a/src/service_inspectors/http_inspect/http_inspect.cc +++ b/src/service_inspectors/http_inspect/http_inspect.cc @@ -170,10 +170,10 @@ void HttpInspect::show(const SnortConfig*) const ConfigLogger::log_flag("decompress_swf", params->decompress_swf); ConfigLogger::log_flag("decompress_zip", params->decompress_zip); ConfigLogger::log_flag("decompress_vba", params->decompress_vba); + ConfigLogger::log_value("max_mime_attach", params->max_mime_attach); ConfigLogger::log_flag("script_detection", params->script_detection); ConfigLogger::log_flag("normalize_javascript", params->js_norm_param.normalize_javascript); - ConfigLogger::log_value("max_javascript_whitespaces", - params->js_norm_param.max_javascript_whitespaces); + ConfigLogger::log_value("max_javascript_whitespaces", params->js_norm_param.max_javascript_whitespaces); ConfigLogger::log_value("js_norm_bytes_depth", params->js_norm_param.js_norm_bytes_depth); ConfigLogger::log_value("js_norm_identifier_depth", params->js_norm_param.js_identifier_depth); ConfigLogger::log_value("js_norm_max_tmpl_nest", params->js_norm_param.max_template_nesting); diff --git a/src/service_inspectors/http_inspect/http_module.cc b/src/service_inspectors/http_inspect/http_module.cc index 672d88377..69fa99861 100755 --- a/src/service_inspectors/http_inspect/http_module.cc +++ b/src/service_inspectors/http_inspect/http_module.cc @@ -95,6 +95,9 @@ const Parameter HttpModule::http_params[] = { "decompress_vba", Parameter::PT_BOOL, nullptr, "false", "decompress MS Office Visual Basic for Applications macro files in response bodies" }, + { "max_mime_attach", Parameter::PT_INT, "1:65535", "5", + "maximum number of mime attachments that will be inspected in a section of a request message" }, + { "script_detection", Parameter::PT_BOOL, nullptr, "false", "inspect JavaScript immediately upon script end" }, @@ -298,6 +301,10 @@ bool HttpModule::set(const char*, Value& val, SnortConfig*) { params->decompress_vba = val.get_bool(); } + else if (val.is("max_mime_attach")) + { + params->max_mime_attach = val.get_uint32(); + } else if (val.is("script_detection")) { params->script_detection = val.get_bool(); diff --git a/src/service_inspectors/http_inspect/http_module.h b/src/service_inspectors/http_inspect/http_module.h index 455c8bcc6..2ccb7da90 100755 --- a/src/service_inspectors/http_inspect/http_module.h +++ b/src/service_inspectors/http_inspect/http_module.h @@ -62,6 +62,7 @@ public: bool decompress_zip = false; bool decompress_vba = false; snort::DecodeConfig* mime_decode_conf; + uint32_t max_mime_attach = 5; bool script_detection = false; snort::LiteralSearch::Handle* script_detection_handle = nullptr; bool publish_request_body = true; diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc index a6d565ef4..29bc17852 100644 --- a/src/service_inspectors/http_inspect/http_msg_body.cc +++ b/src/service_inspectors/http_inspect/http_msg_body.cc @@ -158,18 +158,61 @@ void HttpMsgBody::analyze() Packet* p = DetectionEngine::get_current_packet(); const uint8_t* const section_end = msg_text_new.start() + msg_text_new.length(); const uint8_t* ptr = msg_text_new.start(); + MimeSession::AttachmentBuffer latest_attachment; + + if (session_data->partial_mime_bufs[source_id] != nullptr) + { + // Retrieve the attachment list stored during the partial inspection + mime_bufs = session_data->partial_mime_bufs[source_id]; + session_data->partial_mime_bufs[source_id] = nullptr; + last_attachment_complete = session_data->partial_mime_last_complete[source_id]; + session_data->partial_mime_last_complete[source_id] = true; + } + else + mime_bufs = new std::list; + while (ptr < section_end) { - // After process_mime_data(), ptr will point to the last byte processed in the current - // MIME part + // After process_mime_data(), ptr will point to the last byte processed in the current MIME part ptr = session_data->mime_state[source_id]->process_mime_data(p, ptr, (section_end - ptr), true, SNORT_FILE_POSITION_UNKNOWN); ptr++; + + latest_attachment = session_data->mime_state[source_id]->get_attachment(); + if (latest_attachment.data != nullptr) + { + uint32_t attach_length; + uint8_t* attach_buf; + if (!last_attachment_complete) + { + assert(!mime_bufs->empty()); + // Remove the partial attachment from the list and replace it with an extended version + const uint8_t* const old_buf = mime_bufs->back().file.start(); + const uint32_t old_length = mime_bufs->back().file.length(); + attach_length = old_length + latest_attachment.length; + attach_buf = new uint8_t[attach_length]; + memcpy(attach_buf, old_buf, old_length); + memcpy(attach_buf + old_length, latest_attachment.data, latest_attachment.length); + mime_bufs->pop_back(); + } + else + { + attach_length = latest_attachment.length; + attach_buf = new uint8_t[attach_length]; + memcpy(attach_buf, latest_attachment.data, latest_attachment.length); + } + const BufferData& vba_buf = session_data->mime_state[source_id]->get_ole_buf(); + if (vba_buf.data_ptr() != nullptr) + { + uint8_t* my_vba_buf = new uint8_t[vba_buf.length()]; + memcpy(my_vba_buf, vba_buf.data_ptr(), vba_buf.length()); + mime_bufs->emplace_back(attach_length, attach_buf, true, vba_buf.length(), my_vba_buf, true); + } + else + mime_bufs->emplace_back(attach_length, attach_buf, true, STAT_NOT_PRESENT, nullptr, false); + } + last_attachment_complete = latest_attachment.finished; } - - const BufferData& vba_buf = session_data->mime_state[source_id]->get_ole_buf(); - if (vba_buf.data_ptr()) - ole_data.set(vba_buf.length(), vba_buf.data_ptr()); detect_data.set(msg_text.length(), msg_text.start()); } @@ -245,12 +288,7 @@ void HttpMsgBody::analyze() partial_js_detect_length = js_norm_body.length(); } - // If this is a MIME upload, the MIME library sets the file_data buffer to the - // file attachment body data. - // FIXIT-E currently the file_data buffer is set to the body of the last attachment per - // message section. - set_file_data(const_cast(detect_data.start()), - (unsigned)detect_data.length()); + set_file_data(const_cast(detect_data.start()), (unsigned)detect_data.length()); } } body_octets += msg_text.length(); @@ -306,7 +344,7 @@ void HttpMsgBody::get_ole_data() { ole_data.set(ole_len, ole_data_ptr, false); - //Reset the ole data ptr once it is stored in msg body + // Reset the ole data ptr once it is stored in msg body session_data->fd_state[source_id]->ole_data_reset(); } } @@ -524,6 +562,47 @@ void HttpMsgBody::do_file_processing(const Field& file_data) session_data->file_octets[source_id] += fp_length; } +bool HttpMsgBody::run_detection(snort::Packet* p) +{ + if ((p == nullptr) || !detection_required()) + return false; + if ((mime_bufs != nullptr) && !mime_bufs->empty()) + { + auto mb = mime_bufs->cbegin(); + for (uint32_t count = 1; (count <= params->max_mime_attach) && (mb != mime_bufs->cend()); + count++, mb++) + { + set_file_data(mb->file.start(), mb->file.length()); + if (mb->vba.length() > 0) + ole_data.set(mb->vba.length(), mb->vba.start()); + DetectionEngine::detect(p); + ole_data.reset(); + decompressed_vba_data.reset(); + } + if (mb != mime_bufs->cend()) + { + // More MIME attachments than we have resources to inspect + HttpModule::increment_peg_counts(PEG_SKIP_MIME_ATTACH); + } + } + else + DetectionEngine::detect(p); + return true; +} + +void HttpMsgBody::clear() +{ + if (session_data->partial_flush[source_id]) + { + // Stash the MIME file attachments for use in full inspection + session_data->partial_mime_bufs[source_id] = mime_bufs; + mime_bufs = nullptr; + session_data->partial_mime_last_complete[source_id] = last_attachment_complete; + } + + HttpMsgSection::clear(); +} + // Parses out the filename and URI associated with this file. // For the filename, if the message has a Content-Disposition header with a filename attribute, // use that. Otherwise use the segment of the URI path after the last '/' but not including the @@ -644,6 +723,22 @@ void HttpMsgBody::print_body_section(FILE* output, const char* body_type_str) HttpMsgSection::print_section_title(output, body_type_str); fprintf(output, "octets seen %" PRIi64 "\n", body_octets); detect_data.print(output, "Detect data"); + if ((mime_bufs != nullptr) && !mime_bufs->empty()) + for (MimeBufs& mb : *mime_bufs) + { + mb.file.print(output, "MIME data"); + mb.vba.print(output, "MIME OLE data"); + if (mb.vba.length() > 0) + ole_data.set(mb.vba.length(), mb.vba.start()); + get_decomp_vba_data().print(output, "MIME Decompressed VBA data"); + ole_data.reset(); + decompressed_vba_data.reset(); + } + else + { + ole_data.print(output, "OLE data"); + get_decomp_vba_data().print(output, "Decompressed VBA data"); + } get_classic_buffer(HTTP_BUFFER_CLIENT_BODY, 0, 0).print(output, HttpApi::classic_buffer_names[HTTP_BUFFER_CLIENT_BODY-1]); get_classic_buffer(HTTP_BUFFER_RAW_BODY, 0, 0).print(output, diff --git a/src/service_inspectors/http_inspect/http_msg_body.h b/src/service_inspectors/http_inspect/http_msg_body.h index fd241ed09..0fe62ddac 100644 --- a/src/service_inspectors/http_inspect/http_msg_body.h +++ b/src/service_inspectors/http_inspect/http_msg_body.h @@ -22,6 +22,8 @@ #include "file_api/file_api.h" +#include + #include "http_common.h" #include "http_enum.h" #include "http_field.h" @@ -34,10 +36,13 @@ class HttpMsgBody : public HttpMsgSection { public: + ~HttpMsgBody() override { delete mime_bufs; } void analyze() override; HttpEnums::InspectSection get_inspection_section() const override { return first_body ? HttpEnums::IS_FIRST_BODY : HttpEnums::IS_BODY; } bool detection_required() const override { return (detect_data.length() > 0); } + bool run_detection(snort::Packet* p) override; + void clear() override; HttpMsgBody* get_body() override { return this; } const Field& get_classic_client_body(); const Field& get_raw_body() { return raw_body; } @@ -77,7 +82,6 @@ private: uint32_t& filename_length, const uint8_t*& uri_buffer, uint32_t& uri_length); void get_ole_data(); - // In order of generation Field msg_text_new; Field decoded_body; Field raw_body; // request_depth or response_depth applied @@ -87,8 +91,12 @@ private: Field detect_data; Field norm_js_data; Field classic_client_body; // URI normalization applied + + // MIME buffers Field decompressed_vba_data; Field ole_data; + std::list* mime_bufs = nullptr; + bool last_attachment_complete = true; int32_t publish_length = HttpCommon::STAT_NOT_PRESENT; }; diff --git a/src/service_inspectors/http_inspect/http_msg_section.h b/src/service_inspectors/http_inspect/http_msg_section.h index bd34c9db7..9c20afc11 100644 --- a/src/service_inspectors/http_inspect/http_msg_section.h +++ b/src/service_inspectors/http_inspect/http_msg_section.h @@ -72,7 +72,7 @@ public: virtual void publish() {} // Call the detection engine to inspect the current packet - bool run_detection(snort::Packet* p); + virtual bool run_detection(snort::Packet* p); const Field& get_classic_buffer(unsigned id, uint64_t sub_id, uint64_t form); const Field& get_classic_buffer(const HttpBufferInfo& buf); @@ -82,7 +82,7 @@ public: int32_t get_status_code_num() const { return status_code_num; } - void clear(); + virtual void clear(); bool is_clear() { return cleared; } uint64_t get_transaction_id() { return trans_num; } diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc index e3242dfbc..5a044501f 100755 --- a/src/service_inspectors/http_inspect/http_tables.cc +++ b/src/service_inspectors/http_inspect/http_tables.cc @@ -393,6 +393,7 @@ const PegInfo HttpModule::peg_names[PEG_COUNT_MAX+1] = { CountType::SUM, "js_bytes", "total number of JavaScript bytes processed" }, { CountType::SUM, "js_identifiers", "total number of unique JavaScript identifiers processed" }, { CountType::SUM, "js_identifier_overflows", "total number of unique JavaScript identifier limit overflows" }, + { CountType::SUM, "skip_mime_attach", "total number of HTTP requests with too many MIME attachments to inspect" }, { CountType::END, nullptr, nullptr } }; diff --git a/src/service_inspectors/http_inspect/http_test_manager.cc b/src/service_inspectors/http_inspect/http_test_manager.cc index 054c008a7..779379768 100644 --- a/src/service_inspectors/http_inspect/http_test_manager.cc +++ b/src/service_inspectors/http_inspect/http_test_manager.cc @@ -23,10 +23,10 @@ #ifdef REG_TEST -#include - #include "http_test_manager.h" +#include + #include "http_test_input.h" unsigned HttpTestManager::test_input = IN_NONE;