Pull request #3605: http_inspect: improved MIME processing

author Tom Peters (thopeter) <thopeter@cisco.com>

Mon, 10 Oct 2022 19:00:07 +0000 (19:00 +0000)

committer Tom Peters (thopeter) <thopeter@cisco.com>

Mon, 10 Oct 2022 19:00:07 +0000 (19:00 +0000)
author Tom Peters (thopeter) <thopeter@cisco.com>
Mon, 10 Oct 2022 19:00:07 +0000 (19:00 +0000)
committer Tom Peters (thopeter) <thopeter@cisco.com>
Mon, 10 Oct 2022 19:00:07 +0000 (19:00 +0000)
diff --git a/doc/user/http_inspect.txt b/doc/user/http_inspect.txt

index bdcb66ba0e00646cf7f84e535f29f43b4a276561..b550f35138e0d362e1bcf6f9e8e6e4c1f98d75f3 100755 (executable)
--- a/doc/user/http_inspect.txt
+++ b/doc/user/http_inspect.txt
@@ -209,6 +209,14 @@ locate the OLE (Object Linking and Embedding) file embedded with the files
  containing RLE compressed vba macro data. The decompressed vba macro data is
  then made available through the vba_data ips rule option.
  
+===== max_mime_attach
+
+HTTP request message bodies may be in MIME format. Each file attachment is
+separately incorporated in the file_data rule option. When a request contains
+many small file attachments these inspections may consume a lot of processing
+power. This parameter limits the number of files from one message that are
+inspected. The default value is 5.
+
  ===== normalize_javascript
  
  normalize_javascript = true will enable legacy normalizer of JavaScript within
diff --git a/src/mime/file_mime_process.cc b/src/mime/file_mime_process.cc

index 15cc1910643b677bfa44cb5a630a156974549e4d..04f9c99b4191808c6e783be81dbd29133dd5bd0d 100644 (file)
--- a/src/mime/file_mime_process.cc
+++ b/src/mime/file_mime_process.cc
@@ -583,6 +583,9 @@ const uint8_t* MimeSession::process_mime_data_paf(
                          decompress_alert();
  
                      set_file_data(decomp_buffer, decomp_buf_size);
+                    attachment.data = decomp_buffer;
+                    attachment.length = decomp_buf_size;
+                    attachment.finished = isFileEnd(position);
                  }
  
                  // Process file type/file signature
@@ -661,6 +664,10 @@ const uint8_t* MimeSession::process_mime_data(Packet* p, const uint8_t* start,
  
      const uint8_t* data_end_marker = start + data_size;
  
+    attachment.data = nullptr;
+    attachment.length = 0;
+    attachment.finished = true;
+
      if (position != SNORT_FILE_POSITION_UNKNOWN)
      {
          process_mime_data_paf(p, attach_start, data_end_marker,
diff --git a/src/mime/file_mime_process.h b/src/mime/file_mime_process.h

index 8eca8a7577f113818b3ed93714352293dbd652d1..7eff92c339cef29691428a8b8c39d5a7a39fbbd3 100644 (file)
--- a/src/mime/file_mime_process.h
+++ b/src/mime/file_mime_process.h
@@ -83,6 +83,15 @@ public:
      const BufferData& get_ole_buf();
      const BufferData& get_vba_inspect_buf();
  
+    struct AttachmentBuffer
+    {
+        const uint8_t* data = nullptr;
+        uint32_t length = 0;
+        bool finished = true;
+    };
+
+    const AttachmentBuffer get_attachment() { return attachment; }
+
  protected:
      MimeDecode* decode_state = nullptr;
  
@@ -131,6 +140,8 @@ private:
  
      uint8_t* partial_header = nullptr;
      uint32_t partial_header_len = 0;
+
+    AttachmentBuffer attachment;
  };
  }
  #endif
diff --git a/src/pub_sub/test/pub_sub_http_request_body_event_test.cc b/src/pub_sub/test/pub_sub_http_request_body_event_test.cc

index b1e98bacd90086f528cdc0b53ee82420494a888f..c7e4e7cccf0c4b4fa8155f706c404cb1a1021d3c 100644 (file)
--- a/src/pub_sub/test/pub_sub_http_request_body_event_test.cc
+++ b/src/pub_sub/test/pub_sub_http_request_body_event_test.cc
@@ -58,6 +58,9 @@ void HttpMsgBody::do_file_decompression(const Field&, Field&) {}
  void HttpMsgBody::do_enhanced_js_normalization(const Field&, Field&) {}
  void HttpMsgBody::clean_partial(uint32_t&, uint32_t&, uint8_t*&, uint32_t&) {}
  void HttpMsgBody::bookkeeping_regular_flush(uint32_t&, uint8_t*&, uint32_t&, int32_t) {}
+bool HttpMsgBody::run_detection(snort::Packet*) { return true; }
+void HttpMsgBody::clear() {}
+void HttpMsgSection::clear() {}
  #ifdef REG_TEST
  void HttpMsgBody::print_body_section(FILE*, const char*) {}
  #endif
@@ -78,6 +81,7 @@ HttpMsgSection::HttpMsgSection(const uint8_t* buffer, const uint16_t buf_size,
      tcp_close(false)
  {}
  void HttpMsgSection::update_depth() const{}
+bool HttpMsgSection::run_detection(snort::Packet*) { return true; }
  
  HttpTransaction*HttpTransaction::attach_my_transaction(HttpFlowData*, HttpCommon::SourceId)
      { return nullptr; }
diff --git a/src/service_inspectors/http_inspect/dev_notes.txt b/src/service_inspectors/http_inspect/dev_notes.txt

index ca4bed7484ba7a70b79ad2469ff8df2427cdd687..9714577f82b8077d66dc4b99f2cd4f041d6dece3 100755 (executable)
--- a/src/service_inspectors/http_inspect/dev_notes.txt
+++ b/src/service_inspectors/http_inspect/dev_notes.txt
@@ -1,6 +1,6 @@
-The new Snort HTTP inspector (HI) is divided into two major parts. The HttpStreamSplitter
-(splitter) accepts TCP payload data from Stream and subdivides it into message sections.
-HttpInspect (inspector) processes individual message sections.
+The HTTP inspector (HI) is divided into two major parts. The HttpStreamSplitter (splitter) accepts
+TCP payload data from Stream and subdivides it into message sections. HttpInspect (inspector)
+processes individual message sections.
  
  Splitter finish() is called by Stream when the TCP connection closes (including pruning).
  It serves several specialized purposes in cases where the HTTP message is truncated (ends
@@ -15,6 +15,12 @@ Javascripts. The stream splitter scan() method searches its input for the end-of
  "</script>". When necessary this requires scan() to unzip the data. This is an extra unzip as
  storage limitations preclude saving the unzipped version of the data for subsequent reassembly.
  
+Update: the previous sentence has been discovered to be incorrect. The memory requirements of
+zlib are very large. It would save a lot of memory and some processing time for script detection
+to unzip one time in scan() and store the result for eventual use by reassemble(). The memory
+lost by storing partial message sections in HI while waiting for reassemble() would be more than
+compensated for by not having two instances of zlib.
+
  When the end of a script is found and the normal flush point has not been found, the current TCP
  segment and all previous segments for the current message section are flushed using a special
  procedure known as partial inspection. From the perspective of Stream (or H2I) a partial inspection
@@ -49,14 +55,20 @@ detection triggered it, because H2I wanted it, or both.
  
  Some applications may be affected by blocks too late scenarios related to seeing part of the
  zero-length chunk. For example a TCP packet that ends with:
+
      8<CR><LF>abcdefgh<CR><LF>0
+
  might be sufficient to forward the available data ("abcdefgh") to the application even though the
  final <CR><LF> has not been received.
+
  Note that the actual next bytes are uncertain here. The next packet might begin with <CR><LF>, but
+
      100000<CR><LF>ijklmnopq ...
+
  is another perfectly legal possibility. There is no rule against starting a nonzero chunk length
  with a zero character and some applications reputedly do this.
-As a precaution partial inspections performed when 1) a TCP segment ends inside a possible
+
+As a precaution partial inspection is performed when 1) a TCP segment ends inside a possible
  zero-length chunk or 2) chunk processing fails (broken chunk).
  
  HttpFlowData is a data class representing all HI information relating to a flow. It serves as
@@ -73,10 +85,10 @@ processed together. There are eight types of message section:
  4. Content-Length message body (a block of message data usually not much larger than 16K from a
     body defined by the Content-Length header)
  5. Chunked message body (same but from a chunked body)
-6. Old message body (same but from a body with no Content-Length header that runs to connection
-   close)
+6. Old message body (same but from a response body with no Content-Length header that runs to
+   connection close)
  7. HTTP/X message body (same but content taken from an HTTP/2 or HTTP/3 Data frame)
-8. Trailers (all header lines following a chunked body as a group)
+8. Trailers (all header lines following a chunked or HTTP/X body as a group)
  
  Message sections are represented by message section objects that contain and process them. There
  are twelve message section classes that inherit as follows. An asterisk denotes a virtual class.
@@ -85,7 +97,7 @@ are twelve message section classes that inherit as follows. An asterisk denotes
  2. HttpMsgStart* : HttpMsgSection - common elements of request and status
  3. HttpMsgRequest : HttpMsgStart
  4. HttpMsgStatus : HttpMsgStart
-5. HttpMsgHeadShared* : HttpMsgSection - common elements of header and trailer
+5. HttpMsgHeadShared* : HttpMsgSection - common elements of headers and trailers
  6. HttpMsgHeader : HttpMsgHeadShared
  7. HttpMsgTrailer : HttpMsgHeadShared
  8. HttpMsgBody* : HttpMsgSection - common elements of message body processing
@@ -138,6 +150,11 @@ derive it again.
  Once Field is set to a non-null value it should never change. The set() functions will assert if
  this rule is disregarded.
  
+Partial inspections have created an exception. Fields may be used to store work products from a
+partial inspection that may be updated by subsequent inspections. The reset() method has been
+provided for this situation. It deletes any owned buffer and reinitializes the Field to null.
+This feature should be used with care to avoid weakening the architecture.
+
  A Field may own the buffer containing the message or it may point to a buffer that belongs to
  someone else. When a Field owning a buffer is deleted the buffer is deleted as well. Ownership is
  determined with the Field is initially set. In general any dynamically allocated buffer should be
@@ -227,28 +244,34 @@ be kept intact. Any string literals, added by the plus operator,
  will be concatenated. This also works for functions that result in string
  literals. Semicolons will be inserted, if not already present, according to ECMAScript
  automatic semicolon insertion rules.
+
  All JavaScript identifier names, except those from the ident_ignore or prop_ignore lists,
  will be substituted with unified names in the following format: var_0000 -> var_ffff.
-So, the number of unique identifiers available is 65536 names per HTTP transaction.
-If Normalizer overruns the configured limit, built-in alert is generated.
+The number of unique identifiers available is 65536 names per HTTP transaction.  If Normalizer
+overruns the configured limit, built-in alert is generated.
+
  A config option to set the limit manually:
+
   * http_inspect.js_norm_identifier_depth.
  
-Identifiers from the ident_ignore list will be placed as is, without substitution. Starting with 
+Identifiers from the ident_ignore list will be placed as is, without substitution. Starting with
  the listed identifier, any chain of dot accessors, brackets and function calls will be kept
  intact.
+
  For example:
+
   * console.log("bar")
   * document.getElementById("id").text
   * eval("script")
   * foo["bar"]
  
-Ignored identifiers are configured via the following config option,
-it accepts a list of object and function names:
+Ignored identifiers are configured via the following config option that accepts a list of object
+and function names:
+
   * http_inspect.js_norm_ident_ignore = { 'console', 'document', 'eval', 'foo' }
  
  When a variable assignment that 'aliases' an identifier from the list is found,
-the assignment will be tracked, and subsequent occurrences of the variable will be
+the assignment will be tracked and subsequent occurrences of the variable will be
  replaced with the stored value. This substitution will follow JavaScript variable scope 
  limits.
  
@@ -264,6 +287,7 @@ list, the object will be tracked, and although its own identifier will be conver
  its property and function calls will be kept intact, as with ignored identifiers. 
  
  For example:
+
      var obj = new Array()
      obj.insert(1,2,3) // will be normalized to var_0000.insert(1,2,3)
  
@@ -343,8 +367,8 @@ attacker cannot affect split points by adjusting their chunks.
  Built-in alerts for chunking are generated for protocol violations and suspicious usages. Many
  irregularities can be compensated for but others cannot. Whenever a fatal problem occurs, NHI
  generates 119:213 HTTP chunk misformatted and converts to a mode very similar to run to connection
-close. The rest of the flow is sent to detection as-is. No further attempt is made to dechunk the
-message body or look for the headers that begin the next message. The customer should block 119:213
+close. The rest of the flow is sent to detection as is. No further attempt is made to dechunk the
+message body or look for the headers that begin the next message. The user should block 119:213
  unless they are willing to run the risk of continuing with no real security.
  
  In addition to 119:213 there will often be a more specific alert based on what went wrong.
@@ -397,6 +421,24 @@ generated and processing continues normally. If there is no separator at all tha
  Then we return to #1 as the next chunk begins. In particular extra separators beyond the two
  expected are attributed to the beginning of the next chunk.
  
+MIME processing:
+
+NHI processes request message bodies in MIME format differently from other message bodies. Message
+sections are forwarded to the MIME library instead of being directly input to file processing. The
+library parses the input into individual MIME attachments. This creates a design issue because
+there may be multiple attachments within a single message body section. The email inspectors solve
+this issue by splitting MIME attachments within their stream splitters so that there is only one
+attachment per reassembled packet. This attachment, if it contains a file, is the source material for
+the file_data rule option.
+
+NHI stream splitter does not work this way. It does not consider MIME at all. Split points between
+message sections are never based on MIME or any other type of message body content.
+
+The problem for NHI is that file_data is a singular entity and cannot accomodate multiple
+simultaneous files derived from a message section. NHI resolves this by accumulating the processed
+file attachments in a list and directly calling detection multiple times--once for each file
+attachment installed as file_data.
+
  Rule options:
  
  HttpIpsOption is the base class for http rule options. It supports the commonly used parameters: 
@@ -453,7 +495,7 @@ Insert commands:
    $fill <decimal number> create a paragraph consisting of <number> octets of auto-fill data
       ABCDEFGHIJABC ....
    $fileread <decimal number> read the specified number of bytes from the included file into the
-     message buffer. Each read corresponds to one TCP section.
+     message buffer.
    $h2preface creates the HTTP/2 connection preface "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"
    $h2frameheader <frame_type> <frame_length> <flags> <stream_id> generates an HTTP/2 frame header.
      The frame type may be the frame type name in all lowercase or the numeric frame type code:
@@ -476,7 +518,7 @@ Escape sequences begin with '\'. They may be used within a paragraph or to begin
    \xnn or \Xnn - where nn is a two-digit hexadecimal number. Insert an arbitrary 8-bit number as
       the next character. a-f and A-F are both acceptable.
  
-Data is separated into segments for presentation to the splitter whenever a paragraph ends (blank
+Data are separated into segments for presentation to the splitter whenever a paragraph ends (blank
  line).
  
  When the inspector aborts the connection (scan() returns StreamSplitter::ABORT) it does not expect
diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h

index 487a3efd0678d455a90bb31065026ea7fec67266..2d779588061aa62b496ccffdaae85807565cec67 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_enum.h
+++ b/src/service_inspectors/http_inspect/http_enum.h
@@ -69,7 +69,7 @@ enum PEG_COUNT { PEG_FLOW = 0, PEG_SCAN, PEG_REASSEMBLE, PEG_INSPECT, PEG_REQUES
      PEG_CONCURRENT_SESSIONS, PEG_MAX_CONCURRENT_SESSIONS, PEG_SCRIPT_DETECTION,
      PEG_PARTIAL_INSPECT, PEG_EXCESS_PARAMS, PEG_PARAMS, PEG_CUTOVERS, PEG_SSL_SEARCH_ABND_EARLY,
      PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_JS_INLINE, PEG_JS_EXTERNAL,
-    PEG_JS_BYTES, PEG_JS_IDENTIFIER, PEG_JS_IDENTIFIER_OVERFLOW, PEG_COUNT_MAX };
+    PEG_JS_BYTES, PEG_JS_IDENTIFIER, PEG_JS_IDENTIFIER_OVERFLOW, PEG_SKIP_MIME_ATTACH, PEG_COUNT_MAX };
  
  // Result of scanning by splitter
  enum ScanResult { SCAN_NOT_FOUND, SCAN_NOT_FOUND_ACCELERATE, SCAN_FOUND, SCAN_FOUND_PIECE,
diff --git a/src/service_inspectors/http_inspect/http_field.cc b/src/service_inspectors/http_inspect/http_field.cc

index a2aaaf6efec31766aebb2a871f641a9a30b78918..c52735a85996c83e0481dff230b09a72d440bb7b 100644 (file)
--- a/src/service_inspectors/http_inspect/http_field.cc
+++ b/src/service_inspectors/http_inspect/http_field.cc
@@ -39,16 +39,6 @@ Field::Field(int32_t length, const uint8_t* start, bool own_the_buffer_) :
      assert(!((start != nullptr) && (length < 0)));
  }
  
-Field& Field::operator=(const Field& rhs)
-{
-    assert(len == STAT_NOT_COMPUTE);
-    assert(strt == nullptr);
-    strt = rhs.strt;
-    len = rhs.len;
-    own_the_buffer = false;    // buffer must not have two owners
-    return *this;
-}
-
  void Field::set(int32_t length, const uint8_t* start, bool own_the_buffer_)
  {
      assert(len == STAT_NOT_COMPUTE);
@@ -77,6 +67,15 @@ void Field::set(const Field& f)
      // Both Fields cannot be responsible for deleting the buffer so do not copy own_the_buffer
  }
  
+void Field::reset()
+{
+    if (own_the_buffer)
+        delete[] strt;
+    strt = nullptr;
+    len = STAT_NOT_COMPUTE;
+    own_the_buffer = false;
+}
+
  #ifdef REG_TEST
  void Field::print(FILE* output, const char* name) const
  {
@@ -112,5 +111,6 @@ void Field::print(FILE* output, const char* name) const
      }
      fprintf(output, "\n");
  }
+
  #endif
  
diff --git a/src/service_inspectors/http_inspect/http_field.h b/src/service_inspectors/http_inspect/http_field.h

index 19b17296b8fe1db46a1612598c2042d4d3a8217a..5abae6fa83d78410c407c359899392deeecf00b2 100644 (file)
--- a/src/service_inspectors/http_inspect/http_field.h
+++ b/src/service_inspectors/http_inspect/http_field.h
@@ -39,7 +39,10 @@ public:
      Field(int32_t length, const uint8_t* start, bool own_the_buffer_ = false);
      explicit Field(int32_t length) : len(length) { assert(length<=0); }
      Field() = default;
-    Field& operator=(const Field& rhs);
+
+    // own_the_buffer precludes trivial copy assignment
+    Field& operator=(const Field& rhs) = delete;
+
      ~Field() { if (own_the_buffer) delete[] strt; }
      int32_t length() const { return len; }
      const uint8_t* start() const { return strt; }
@@ -47,6 +50,7 @@ public:
      void set(const Field& f);
      void set(HttpCommon::StatusCode stat_code);
      void set(int32_t length) { set(static_cast<HttpCommon::StatusCode>(length)); }
+    void reset();
  
  #ifdef REG_TEST
      void print(FILE* output, const char* name) const;
@@ -58,5 +62,15 @@ private:
      bool own_the_buffer = false;
  };
  
+struct MimeBufs
+{
+    Field file;
+    Field vba;
+    MimeBufs(int32_t file_len, const uint8_t* file_buf, bool file_own, int32_t vba_len, const uint8_t* vba_buf,
+        bool vba_own) :
+        file(file_len, file_buf, file_own),
+        vba(vba_len, vba_buf, vba_own) {}
+};
+
  #endif
  
diff --git a/src/service_inspectors/http_inspect/http_flow_data.cc b/src/service_inspectors/http_inspect/http_flow_data.cc

index 26ff9ba42e93170780f8845d70044b78761f78ca..73cc6dce58f5c5fe0a714c674ca4bd3996cf6577 100644 (file)
--- a/src/service_inspectors/http_inspect/http_flow_data.cc
+++ b/src/service_inspectors/http_inspect/http_flow_data.cc
@@ -121,6 +121,7 @@ HttpFlowData::~HttpFlowData()
          delete[] section_buffer[k];
          delete[] partial_buffer[k];
          delete[] partial_detect_buffer[k];
+        delete partial_mime_bufs[k];
          HttpTransaction::delete_transaction(transaction[k], nullptr);
          delete cutter[k];
          if (compress_stream[k] != nullptr)
@@ -147,6 +148,8 @@ HttpFlowData::~HttpFlowData()
  void HttpFlowData::half_reset(SourceId source_id)
  {
      assert((source_id == SRC_CLIENT) || (source_id == SRC_SERVER));
+    assert(partial_mime_bufs[source_id] == nullptr);
+    assert(partial_mime_last_complete[source_id]);
  
      version_id[source_id] = VERS__NOT_PRESENT;
      data_length[source_id] = STAT_NOT_PRESENT;
diff --git a/src/service_inspectors/http_inspect/http_flow_data.h b/src/service_inspectors/http_inspect/http_flow_data.h

index aa824c7a3e0e97977f6ded57209dea3001d18bc7..e0f4a3c4a09abbefbbd33cbda525d8626904b198 100644 (file)
--- a/src/service_inspectors/http_inspect/http_flow_data.h
+++ b/src/service_inspectors/http_inspect/http_flow_data.h
@@ -23,6 +23,7 @@
  #include <zlib.h>
  
  #include <cstdio>
+#include <list>
  
  #include "flow/flow.h"
  #include "utils/util_utf.h"
@@ -31,6 +32,7 @@
  #include "http_common.h"
  #include "http_enum.h"
  #include "http_event.h"
+#include "http_field.h"
  #include "http_module.h"
  
  class HttpTransaction;
@@ -180,6 +182,8 @@ private:
      uint8_t* partial_detect_buffer[2] = { nullptr, nullptr };
      uint32_t partial_detect_length[2] = { 0, 0 };
      uint32_t partial_js_detect_length[2] = { 0, 0 };
+    std::list<MimeBufs>* partial_mime_bufs[2] = { nullptr, nullptr };
+    bool partial_mime_last_complete[2] = { true, true };
      int32_t status_code_num = HttpCommon::STAT_NOT_PRESENT;
      HttpEnums::VersionId version_id[2] = { HttpEnums::VERS__NOT_PRESENT,
                                              HttpEnums::VERS__NOT_PRESENT };
diff --git a/src/service_inspectors/http_inspect/http_inspect.cc b/src/service_inspectors/http_inspect/http_inspect.cc

index 66a9e47ff35c85d516d7e9f9df391f81a41e0e98..91d1e3589815882fb3ccd7bea59f256974acfc05 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_inspect.cc
+++ b/src/service_inspectors/http_inspect/http_inspect.cc
@@ -170,10 +170,10 @@ void HttpInspect::show(const SnortConfig*) const
      ConfigLogger::log_flag("decompress_swf", params->decompress_swf);
      ConfigLogger::log_flag("decompress_zip", params->decompress_zip);
      ConfigLogger::log_flag("decompress_vba", params->decompress_vba);
+    ConfigLogger::log_value("max_mime_attach", params->max_mime_attach);
      ConfigLogger::log_flag("script_detection", params->script_detection);
      ConfigLogger::log_flag("normalize_javascript", params->js_norm_param.normalize_javascript);
-    ConfigLogger::log_value("max_javascript_whitespaces",
-        params->js_norm_param.max_javascript_whitespaces);
+    ConfigLogger::log_value("max_javascript_whitespaces", params->js_norm_param.max_javascript_whitespaces);
      ConfigLogger::log_value("js_norm_bytes_depth", params->js_norm_param.js_norm_bytes_depth);
      ConfigLogger::log_value("js_norm_identifier_depth", params->js_norm_param.js_identifier_depth);
      ConfigLogger::log_value("js_norm_max_tmpl_nest", params->js_norm_param.max_template_nesting);
diff --git a/src/service_inspectors/http_inspect/http_module.cc b/src/service_inspectors/http_inspect/http_module.cc

index 672d88377cba594949574b5a0cf685e7fa85b88f..69fa99861c9d19bfe303b97f24ee81c22e39dcbe 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_module.cc
+++ b/src/service_inspectors/http_inspect/http_module.cc
@@ -95,6 +95,9 @@ const Parameter HttpModule::http_params[] =
      { "decompress_vba", Parameter::PT_BOOL, nullptr, "false",
        "decompress MS Office Visual Basic for Applications macro files in response bodies" },
  
+    { "max_mime_attach", Parameter::PT_INT, "1:65535", "5",
+      "maximum number of mime attachments that will be inspected in a section of a request message" },
+
      { "script_detection", Parameter::PT_BOOL, nullptr, "false",
        "inspect JavaScript immediately upon script end" },
  
@@ -298,6 +301,10 @@ bool HttpModule::set(const char*, Value& val, SnortConfig*)
      {
          params->decompress_vba = val.get_bool();
      }
+    else if (val.is("max_mime_attach"))
+    {
+        params->max_mime_attach = val.get_uint32();
+    }
      else if (val.is("script_detection"))
      {
          params->script_detection = val.get_bool();
diff --git a/src/service_inspectors/http_inspect/http_module.h b/src/service_inspectors/http_inspect/http_module.h

index 455c8bcc6eac1a83296b742374eac7af809e0f64..2ccb7da9086c787d960321cb9ed1b8172412bf14 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_module.h
+++ b/src/service_inspectors/http_inspect/http_module.h
@@ -62,6 +62,7 @@ public:
      bool decompress_zip = false;
      bool decompress_vba = false;
      snort::DecodeConfig* mime_decode_conf;
+    uint32_t max_mime_attach = 5;
      bool script_detection = false;
      snort::LiteralSearch::Handle* script_detection_handle = nullptr;
      bool publish_request_body = true;
diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc

index a6d565ef4eeb5024e94daf1f2cc781fe57c15fa5..29bc17852bf6c61433b3348bdfb44a708b2a9d50 100644 (file)
--- a/src/service_inspectors/http_inspect/http_msg_body.cc
+++ b/src/service_inspectors/http_inspect/http_msg_body.cc
@@ -158,18 +158,61 @@ void HttpMsgBody::analyze()
          Packet* p = DetectionEngine::get_current_packet();
          const uint8_t* const section_end = msg_text_new.start() + msg_text_new.length();
          const uint8_t* ptr = msg_text_new.start();
+        MimeSession::AttachmentBuffer latest_attachment;
+
+        if (session_data->partial_mime_bufs[source_id] != nullptr)
+        {
+            // Retrieve the attachment list stored during the partial inspection
+            mime_bufs = session_data->partial_mime_bufs[source_id];
+            session_data->partial_mime_bufs[source_id] = nullptr;
+            last_attachment_complete = session_data->partial_mime_last_complete[source_id];
+            session_data->partial_mime_last_complete[source_id] = true;
+        }
+        else
+            mime_bufs = new std::list<MimeBufs>;
+
          while (ptr < section_end)
          {
-            // After process_mime_data(), ptr will point to the last byte processed in the current
-            // MIME part
+            // After process_mime_data(), ptr will point to the last byte processed in the current MIME part
              ptr = session_data->mime_state[source_id]->process_mime_data(p, ptr,
                  (section_end - ptr), true, SNORT_FILE_POSITION_UNKNOWN);
              ptr++;
+
+            latest_attachment = session_data->mime_state[source_id]->get_attachment();
+            if (latest_attachment.data != nullptr)
+            {
+                uint32_t attach_length;
+                uint8_t* attach_buf;
+                if (!last_attachment_complete)
+                {
+                    assert(!mime_bufs->empty());
+                    // Remove the partial attachment from the list and replace it with an extended version
+                    const uint8_t* const old_buf = mime_bufs->back().file.start();
+                    const uint32_t old_length = mime_bufs->back().file.length();
+                    attach_length = old_length + latest_attachment.length;
+                    attach_buf = new uint8_t[attach_length];
+                    memcpy(attach_buf, old_buf, old_length);
+                    memcpy(attach_buf + old_length, latest_attachment.data, latest_attachment.length);
+                    mime_bufs->pop_back();
+                }
+                else
+                {
+                    attach_length = latest_attachment.length;
+                    attach_buf = new uint8_t[attach_length];
+                    memcpy(attach_buf, latest_attachment.data, latest_attachment.length);
+                }
+                const BufferData& vba_buf = session_data->mime_state[source_id]->get_ole_buf();
+                if (vba_buf.data_ptr() != nullptr)
+                {
+                    uint8_t* my_vba_buf = new uint8_t[vba_buf.length()];
+                    memcpy(my_vba_buf, vba_buf.data_ptr(), vba_buf.length());
+                    mime_bufs->emplace_back(attach_length, attach_buf, true, vba_buf.length(), my_vba_buf, true);
+                }
+                else
+                    mime_bufs->emplace_back(attach_length, attach_buf, true, STAT_NOT_PRESENT, nullptr, false);
+            }
+            last_attachment_complete = latest_attachment.finished;
          }
-        
-        const BufferData& vba_buf = session_data->mime_state[source_id]->get_ole_buf();
-        if (vba_buf.data_ptr())
-            ole_data.set(vba_buf.length(), vba_buf.data_ptr());
  
          detect_data.set(msg_text.length(), msg_text.start());
      }
@@ -245,12 +288,7 @@ void HttpMsgBody::analyze()
                  partial_js_detect_length = js_norm_body.length();
              }
  
-            // If this is a MIME upload, the MIME library sets the file_data buffer to the
-            // file attachment body data.
-            // FIXIT-E currently the file_data buffer is set to the body of the last attachment per
-            // message section.
-            set_file_data(const_cast<uint8_t*>(detect_data.start()),
-                (unsigned)detect_data.length());
+            set_file_data(const_cast<uint8_t*>(detect_data.start()), (unsigned)detect_data.length());
          }
      }
      body_octets += msg_text.length();
@@ -306,7 +344,7 @@ void HttpMsgBody::get_ole_data()
      {
          ole_data.set(ole_len, ole_data_ptr, false);
  
-        //Reset the ole data ptr once it is stored in msg body
+        // Reset the ole data ptr once it is stored in msg body
          session_data->fd_state[source_id]->ole_data_reset();
      }
  }
@@ -524,6 +562,47 @@ void HttpMsgBody::do_file_processing(const Field& file_data)
      session_data->file_octets[source_id] += fp_length;
  }
  
+bool HttpMsgBody::run_detection(snort::Packet* p)
+{
+    if ((p == nullptr) || !detection_required())
+        return false;
+    if ((mime_bufs != nullptr) && !mime_bufs->empty())
+    {
+        auto mb = mime_bufs->cbegin();
+        for (uint32_t count = 1; (count <= params->max_mime_attach) && (mb != mime_bufs->cend());
+            count++, mb++)
+        {
+            set_file_data(mb->file.start(), mb->file.length());
+            if (mb->vba.length() > 0)
+                ole_data.set(mb->vba.length(), mb->vba.start());
+            DetectionEngine::detect(p);
+            ole_data.reset();
+            decompressed_vba_data.reset();
+        }
+        if (mb != mime_bufs->cend())
+        {
+            // More MIME attachments than we have resources to inspect
+            HttpModule::increment_peg_counts(PEG_SKIP_MIME_ATTACH);
+        }
+    }
+    else
+        DetectionEngine::detect(p);
+    return true;
+}
+
+void HttpMsgBody::clear()
+{
+    if (session_data->partial_flush[source_id])
+    {
+        // Stash the MIME file attachments for use in full inspection
+        session_data->partial_mime_bufs[source_id] = mime_bufs;
+        mime_bufs = nullptr;
+        session_data->partial_mime_last_complete[source_id] = last_attachment_complete;
+    }
+
+    HttpMsgSection::clear();
+}
+
  // Parses out the filename and URI associated with this file.
  // For the filename, if the message has a Content-Disposition header with a filename attribute,
  // use that. Otherwise use the segment of the URI path after the last '/' but not including the
@@ -644,6 +723,22 @@ void HttpMsgBody::print_body_section(FILE* output, const char* body_type_str)
      HttpMsgSection::print_section_title(output, body_type_str);
      fprintf(output, "octets seen %" PRIi64 "\n", body_octets);
      detect_data.print(output, "Detect data");
+    if ((mime_bufs != nullptr) && !mime_bufs->empty())
+        for (MimeBufs& mb : *mime_bufs)
+        {
+            mb.file.print(output, "MIME data");
+            mb.vba.print(output, "MIME OLE data");
+            if (mb.vba.length() > 0)
+                ole_data.set(mb.vba.length(), mb.vba.start());
+            get_decomp_vba_data().print(output, "MIME Decompressed VBA data");
+            ole_data.reset();
+            decompressed_vba_data.reset();
+        }
+    else
+    {
+        ole_data.print(output, "OLE data");
+        get_decomp_vba_data().print(output, "Decompressed VBA data");
+    }
      get_classic_buffer(HTTP_BUFFER_CLIENT_BODY, 0, 0).print(output,
          HttpApi::classic_buffer_names[HTTP_BUFFER_CLIENT_BODY-1]);
      get_classic_buffer(HTTP_BUFFER_RAW_BODY, 0, 0).print(output,
diff --git a/src/service_inspectors/http_inspect/http_msg_body.h b/src/service_inspectors/http_inspect/http_msg_body.h

index fd241ed09442386b9cdaa575caac4e5e8d6f369c..0fe62ddaca4ab05d74dc1ab135f0d2e3c8a2aaff 100644 (file)
--- a/src/service_inspectors/http_inspect/http_msg_body.h
+++ b/src/service_inspectors/http_inspect/http_msg_body.h
@@ -22,6 +22,8 @@
  
  #include "file_api/file_api.h"
  
+#include <list>
+
  #include "http_common.h"
  #include "http_enum.h"
  #include "http_field.h"
@@ -34,10 +36,13 @@
  class HttpMsgBody : public HttpMsgSection
  {
  public:
+    ~HttpMsgBody() override { delete mime_bufs; }
      void analyze() override;
      HttpEnums::InspectSection get_inspection_section() const override
          { return first_body ? HttpEnums::IS_FIRST_BODY : HttpEnums::IS_BODY; }
      bool detection_required() const override { return (detect_data.length() > 0); }
+    bool run_detection(snort::Packet* p) override;
+    void clear() override;
      HttpMsgBody* get_body() override { return this; }
      const Field& get_classic_client_body();
      const Field& get_raw_body() { return raw_body; }
@@ -77,7 +82,6 @@ private:
          uint32_t& filename_length, const uint8_t*& uri_buffer, uint32_t& uri_length);
      void get_ole_data();
  
-    // In order of generation
      Field msg_text_new;
      Field decoded_body;
      Field raw_body;              // request_depth or response_depth applied
@@ -87,8 +91,12 @@ private:
      Field detect_data;
      Field norm_js_data;
      Field classic_client_body;   // URI normalization applied
+
+    // MIME buffers
      Field decompressed_vba_data;
      Field ole_data;
+    std::list<MimeBufs>* mime_bufs = nullptr;
+    bool last_attachment_complete = true;
  
      int32_t publish_length = HttpCommon::STAT_NOT_PRESENT;
  };
diff --git a/src/service_inspectors/http_inspect/http_msg_section.h b/src/service_inspectors/http_inspect/http_msg_section.h

index bd34c9db7b6c36ae3b2233645595a5f3195b3400..9c20afc11237d6b3921ca9905abe5b9eb80cbd9b 100644 (file)
--- a/src/service_inspectors/http_inspect/http_msg_section.h
+++ b/src/service_inspectors/http_inspect/http_msg_section.h
@@ -72,7 +72,7 @@ public:
      virtual void publish() {}
  
      // Call the detection engine to inspect the current packet
-    bool run_detection(snort::Packet* p);
+    virtual bool run_detection(snort::Packet* p);
  
      const Field& get_classic_buffer(unsigned id, uint64_t sub_id, uint64_t form);
      const Field& get_classic_buffer(const HttpBufferInfo& buf);
@@ -82,7 +82,7 @@ public:
  
      int32_t get_status_code_num() const { return status_code_num; }
  
-    void clear();
+    virtual void clear();
      bool is_clear() { return cleared; }
  
      uint64_t get_transaction_id() { return trans_num; }
diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc

index e3242dfbce4c85852d7a4b84b5bebd148f30b015..5a044501fbd3abca9f9b1becabf175e1f9c1b760 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_tables.cc
+++ b/src/service_inspectors/http_inspect/http_tables.cc
@@ -393,6 +393,7 @@ const PegInfo HttpModule::peg_names[PEG_COUNT_MAX+1] =
      { CountType::SUM, "js_bytes", "total number of JavaScript bytes processed" },
      { CountType::SUM, "js_identifiers", "total number of unique JavaScript identifiers processed" },
      { CountType::SUM, "js_identifier_overflows", "total number of unique JavaScript identifier limit overflows" },
+    { CountType::SUM, "skip_mime_attach", "total number of HTTP requests with too many MIME attachments to inspect" },
      { CountType::END, nullptr, nullptr }
  };
  
diff --git a/src/service_inspectors/http_inspect/http_test_manager.cc b/src/service_inspectors/http_inspect/http_test_manager.cc

index 054c008a783962abc859571db0a874bfdb603372..7793797687cb718eb97dd170bf7132078a1eecdc 100644 (file)
--- a/src/service_inspectors/http_inspect/http_test_manager.cc
+++ b/src/service_inspectors/http_inspect/http_test_manager.cc
@@ -23,10 +23,10 @@
  
  #ifdef REG_TEST
  
-#include <stdexcept>
-
  #include "http_test_manager.h"
  
+#include <stdexcept>
+
  #include "http_test_input.h"
  
  unsigned HttpTestManager::test_input = IN_NONE;
author	Tom Peters (thopeter) <thopeter@cisco.com>
	Mon, 10 Oct 2022 19:00:07 +0000 (19:00 +0000)
committer	Tom Peters (thopeter) <thopeter@cisco.com>
	Mon, 10 Oct 2022 19:00:07 +0000 (19:00 +0000)
doc/user/http_inspect.txt		patch \| blob \| blame \| history
src/mime/file_mime_process.cc		patch \| blob \| blame \| history
src/mime/file_mime_process.h		patch \| blob \| blame \| history
src/pub_sub/test/pub_sub_http_request_body_event_test.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/dev_notes.txt		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_enum.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_field.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_field.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_flow_data.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_flow_data.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_inspect.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_module.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_module.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_msg_body.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_msg_body.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_msg_section.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_tables.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_test_manager.cc		patch \| blob \| blame \| history