Merge pull request #3053 in SNORT/snort3 from ~KATHARVE/snort3:file_new_api_http...

author Tom Peters (thopeter) <thopeter@cisco.com>

Tue, 14 Sep 2021 20:04:37 +0000 (20:04 +0000)

committer Tom Peters (thopeter) <thopeter@cisco.com>

Tue, 14 Sep 2021 20:04:37 +0000 (20:04 +0000)
author Tom Peters (thopeter) <thopeter@cisco.com>
Tue, 14 Sep 2021 20:04:37 +0000 (20:04 +0000)
committer Tom Peters (thopeter) <thopeter@cisco.com>
Tue, 14 Sep 2021 20:04:37 +0000 (20:04 +0000)
diff --git a/src/file_api/file_flows.cc b/src/file_api/file_flows.cc

index 0ff4743644121779c5434f3419cc508ba341ef02..9f1c24ff1823a58411728feb27c91c14d29aad97 100644 (file)
--- a/src/file_api/file_flows.cc
+++ b/src/file_api/file_flows.cc
@@ -428,7 +428,7 @@ bool FileFlows::file_process(Packet* p, const uint8_t* file_data, int data_size,
   *    false: ignore this file
   */
  bool FileFlows::set_file_name(const uint8_t* fname, uint32_t name_size, uint64_t file_id,
-    uint64_t multi_file_processing_id)
+    uint64_t multi_file_processing_id, const uint8_t* url, uint32_t url_size)
  {
      FileContext* context;
      if (file_id)
@@ -438,6 +438,9 @@ bool FileFlows::set_file_name(const uint8_t* fname, uint32_t name_size, uint64_t
      if ( !context )
          return false;
  
+    if ( !context->is_url_set() )
+        context->set_url((const char*)url, url_size);
+
      if ( !context->is_file_name_set() )
      {
          context->set_file_name((const char*)fname, name_size);
diff --git a/src/file_api/file_flows.h b/src/file_api/file_flows.h

index 7f2688b1f148cedc8b92912df72da45f834a419a..0e28cfdadba24e1faeed80ead92989cf04ee870b 100644 (file)
--- a/src/file_api/file_flows.h
+++ b/src/file_api/file_flows.h
@@ -83,7 +83,7 @@ public:
      uint64_t get_new_file_instance();
  
      bool set_file_name(const uint8_t* fname, uint32_t name_size, uint64_t file_id=0,
-        uint64_t multi_file_processing_id=0);
+        uint64_t multi_file_processing_id=0, const uint8_t* url=nullptr, uint32_t url_size=0);
  
      void set_sig_gen_state( bool enable )
      {
diff --git a/src/file_api/file_lib.cc b/src/file_api/file_lib.cc

index 5165915007ccb23321ef0af0ad36cea27c5958df..9ea1d1e775620cb8e78b484bcf2a8731044edc51 100644 (file)
--- a/src/file_api/file_lib.cc
+++ b/src/file_api/file_lib.cc
@@ -110,6 +110,8 @@ void FileInfo::copy(const FileInfo& other)
      file_id = other.file_id;
      file_name = other.file_name;
      file_name_set = other.file_name_set;
+    url = other.url;
+    url_set = other.url_set;
      verdict = other.verdict;
      file_type_enabled = other.file_type_enabled;
      file_signature_enabled = other.file_signature_enabled;
@@ -147,11 +149,26 @@ void FileInfo::set_file_name(const char* name, uint32_t name_size)
      file_name_set = true;
  }
  
+void FileInfo::set_url(const char* url_name, uint32_t url_size)
+{
+    if (url_name and url_size)
+    {
+        url.assign(url_name, url_size);
+    }
+
+    url_set = true;
+}
+
  std::string& FileInfo::get_file_name()
  {
      return file_name;
  }
  
+std::string& FileInfo::get_url()
+{
+    return url;
+}
+
  void FileInfo::set_file_size(uint64_t size)
  {
      file_size = size;
@@ -840,6 +857,8 @@ void FileContext::print_file_name(std::ostream& log)
  void FileContext::print(std::ostream& log)
  {
      print_file_name(log);
+    if (url.length() > 0)
+        log << "File URI: "<< url << std::endl;
      log << "File type: " << config->file_type_name(file_type_id)
          << '('<< file_type_id  << ')' << std::endl;
      log << "File size: " << file_size << std::endl;
diff --git a/src/file_api/file_lib.h b/src/file_api/file_lib.h

index d250f3aa1d78d192b7e4a591e9b73c50864abfcf..d57824a93c67fe39661131a04b88477ba7b3e4dc 100644 (file)
--- a/src/file_api/file_lib.h
+++ b/src/file_api/file_lib.h
@@ -54,9 +54,12 @@ public:
      FileInfo& operator=(const FileInfo& other);
      uint32_t get_file_type() const;
      void set_file_name(const char* file_name, uint32_t name_size);
+    void set_url(const char* url, uint32_t url_size);
      std::string& get_file_name();
+    std::string& get_url();
      // Whether file name has been set (could be empty file name)
      bool is_file_name_set() const { return file_name_set; }
+    bool is_url_set() const { return url_set; }
  
      void set_file_size(uint64_t size);
      uint64_t get_file_size() const;
@@ -89,6 +92,8 @@ public:
  protected:
      std::string file_name;
      bool file_name_set = false;
+    std::string url;
+    bool url_set = false;
      uint64_t file_size = 0;
      FileDirection direction = FILE_DOWNLOAD;
      uint32_t file_type_id = SNORT_FILE_TYPE_CONTINUE;
diff --git a/src/mime/file_mime_process.cc b/src/mime/file_mime_process.cc

index 08973f3c798151507a84e66061ba671a2d747370..a2cd6df19f415ddbb8944547355ad06af0553ad2 100644 (file)
--- a/src/mime/file_mime_process.cc
+++ b/src/mime/file_mime_process.cc
@@ -804,14 +804,16 @@ void MimeSession::exit()
  }
  
  MimeSession::MimeSession(Packet* p, DecodeConfig* dconf, MailLogConfig* lconf, uint64_t base_file_id,
-    bool session_is_http)
+    bool session_is_http, const uint8_t* uri, const int32_t uri_length):
+    decode_conf(dconf),
+    log_config(lconf),
+    log_state(new MailLogState(log_config)),
+    is_http(session_is_http),
+    session_base_file_id(base_file_id),
+    uri(uri),
+    uri_length(uri_length)
  {
-    decode_conf = dconf;
-    log_config =  lconf;
-    log_state = new MailLogState(log_config);
      p->flow->stash->store(STASH_EXTRADATA_MIME, log_state);
-    session_base_file_id = base_file_id;
-    is_http = session_is_http;
      reset_mime_paf_state(&mime_boundary);
      memory::MemoryCap::update_allocations(sizeof(*this));
  }
@@ -874,7 +876,7 @@ void MimeSession::mime_file_process(Packet* p, const uint8_t* data, int data_siz
          if (continue_inspecting_file and (isFileStart(position)) && log_state)
          {
              continue_inspecting_file = file_flows->set_file_name((const uint8_t*)filename.c_str(),
-                filename.length(), 0, get_multiprocessing_file_id());
+                filename.length(), 0, get_multiprocessing_file_id(), uri, uri_length);
              filename.clear();
          }
      }
diff --git a/src/mime/file_mime_process.h b/src/mime/file_mime_process.h

index 82b494423a65dbdd844783040535fdbbd9f019a3..aecc57d4f256d7f567d38135e35aa315b5f40abb 100644 (file)
--- a/src/mime/file_mime_process.h
+++ b/src/mime/file_mime_process.h
@@ -55,7 +55,8 @@ namespace snort
  class SO_PUBLIC MimeSession
  {
  public:
-    MimeSession(Packet*, DecodeConfig*, MailLogConfig*, uint64_t base_file_id=0, bool session_is_http=false);
+    MimeSession(Packet*, DecodeConfig*, MailLogConfig*, uint64_t base_file_id=0,
+        bool session_is_http=false, const uint8_t* uri=nullptr, const int32_t uri_length=0);
      virtual ~MimeSession();
  
      MimeSession(const MimeSession&) = delete;
@@ -84,7 +85,7 @@ private:
      MailLogState* log_state = nullptr;
      MimeStats* mime_stats = nullptr;
      std::string filename;
-    bool is_http = false;
+    bool is_http;
      bool continue_inspecting_file = true;
      // This counter is not an accurate count of files; used only for creating a unique mime_file_id
      uint32_t file_counter = 0;
@@ -92,6 +93,8 @@ private:
      uint64_t session_base_file_id = 0;
      uint64_t current_file_cache_file_id = 0;
      uint64_t current_multiprocessing_file_id = 0;
+    const uint8_t* uri;
+    const int32_t uri_length;
      uint64_t get_file_cache_file_id();
      uint64_t get_multiprocessing_file_id();
      void mime_file_process(Packet* p, const uint8_t* data, int data_size,
diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc

index 869391577274e1e04762e620dea19de65b2980c6..63cd4d27795a068836864211aaadd131d56d0636 100644 (file)
--- a/src/service_inspectors/http_inspect/http_msg_body.cc
+++ b/src/service_inspectors/http_inspect/http_msg_body.cc
@@ -33,6 +33,7 @@
  #include "http_msg_header.h"
  #include "http_msg_request.h"
  #include "http_test_manager.h"
+#include "http_uri.h"
  
  using namespace snort;
  using namespace HttpCommon;
@@ -405,7 +406,6 @@ void HttpMsgBody::do_file_processing(const Field& file_data)
              return;
  
          const FileDirection dir = source_id == SRC_SERVER ? FILE_DOWNLOAD : FILE_UPLOAD;
-        Field cont_disp_filename;
  
          const uint64_t file_index = get_header(source_id)->get_file_cache_index();
  
@@ -416,35 +416,21 @@ void HttpMsgBody::do_file_processing(const Field& file_data)
          {
              session_data->file_depth_remaining[source_id] -= fp_length;
  
-            // With the first piece of the file we must provide the "name". If an upload contains a
-            // filename in a Content-Disposition header, we use that. Otherwise the name is the URI.
+            // With the first piece of the file we must provide the filename and URI
              if (front)
              {
                  if (request != nullptr)
                  {
-                    bool has_cd_filename = false;
-                    if (dir == FILE_UPLOAD)
-                    {
-                        const Field& cd_filename = get_header(source_id)->
-                            get_content_disposition_filename();
-                        if (cd_filename.length() > 0)
-                        {
-                            continue_processing_file = file_flows->set_file_name(
-                                cd_filename.start(), cd_filename.length(), 0, 
-                                get_header(source_id)->get_multi_file_processing_id());
-                            has_cd_filename = true;
-                        }
-                    }
-                    if (!has_cd_filename)
-                    {
-                        const Field& transaction_uri = request->get_uri();
-                        if (transaction_uri.length() > 0)
-                        {
-                            continue_processing_file = file_flows->set_file_name(
-                                transaction_uri.start(), transaction_uri.length(), 0,
-                                get_header(source_id)->get_multi_file_processing_id());
-                        }
-                    }
+                    const uint8_t* filename_buffer;
+                    const uint8_t* uri_buffer;
+                    uint32_t filename_length;
+                    uint32_t uri_length;
+                    get_file_info(dir, filename_buffer, filename_length, uri_buffer, uri_length);
+
+                    continue_processing_file = file_flows->set_file_name(filename_buffer,
+                        filename_length, 0,
+                        get_header(source_id)->get_multi_file_processing_id(), uri_buffer,
+                        uri_length);
                  }
              }
          }
@@ -465,6 +451,56 @@ void HttpMsgBody::do_file_processing(const Field& file_data)
      }
  }
  
+// Parses out the filename and URI associated with this file.
+// For the filename, if the message has a Content-Disposition header with a filename attribute,
+// use that. Otherwise use the segment of the URI path after the last '/' but not including the
+// query or fragment. For the uri, use the request raw uri. If there is no URI or nothing in the
+// path after the last slash, the filename and uri buffers may be empty. The normalized URI is used.
+void HttpMsgBody::get_file_info(FileDirection dir, const uint8_t*& filename_buffer,
+    uint32_t& filename_length, const uint8_t*& uri_buffer, uint32_t& uri_length)
+{
+    filename_buffer = uri_buffer = nullptr;
+    filename_length = uri_length = 0;
+    HttpUri* http_uri = request->get_http_uri();
+
+    // First handle the content-disposition case
+    if (dir == FILE_UPLOAD)
+    {
+        const Field& cd_filename = get_header(source_id)->get_content_disposition_filename();
+        if (cd_filename.length() > 0)
+        {
+            filename_buffer = cd_filename.start();
+            filename_length = cd_filename.length();
+        }
+    }
+
+    if (http_uri)
+    {
+        const Field& uri_field = http_uri->get_norm_classic();
+        if (uri_field.length() > 0)
+        {
+            uri_buffer = uri_field.start();
+            uri_length = uri_field.length();
+        }
+
+        // Don't overwrite the content-disposition filename
+        if (filename_length > 0)
+            return;
+
+        const Field& path = http_uri->get_norm_path(); 
+        if (path.length() > 0)
+        {
+            const uint8_t* last_slash = (const uint8_t*)memrchr(path.start(), '/', path.length());
+            if (last_slash)
+            {
+                filename_length = (path.start() + path.length()) - (last_slash + 1);
+                if (filename_length > 0)
+                    filename_buffer = last_slash + 1;
+            }
+        }
+    }
+}
+
  const Field& HttpMsgBody::get_classic_client_body()
  {
      return classic_normalize(detect_data, classic_client_body, false, params->uri_param);
diff --git a/src/service_inspectors/http_inspect/http_msg_body.h b/src/service_inspectors/http_inspect/http_msg_body.h

index 81a81458afc7f172dff2927fec2bb725c21e0b36..460b44a7d33e3993a04e2b269d4f2cd120c555b3 100644 (file)
--- a/src/service_inspectors/http_inspect/http_msg_body.h
+++ b/src/service_inspectors/http_inspect/http_msg_body.h
@@ -68,6 +68,8 @@ private:
      void bookkeeping_regular_flush(uint32_t& partial_detect_length,
          uint8_t*& partial_detect_buffer, uint32_t& partial_js_detect_length,
          int32_t detect_length);
+    void get_file_info( FileDirection dir, const uint8_t*& filename_buffer,
+        uint32_t& filename_length, const uint8_t*& uri_buffer, uint32_t& uri_length);
  
      // In order of generation
      Field msg_text_new;
diff --git a/src/service_inspectors/http_inspect/http_msg_header.cc b/src/service_inspectors/http_inspect/http_msg_header.cc

index 913be484dc78671f2a0e8ece83dc38b80296a8c7..724b9546ac4ef44f9c5f49d4762ad76ef7d58bd9 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_msg_header.cc
+++ b/src/service_inspectors/http_inspect/http_msg_header.cc
@@ -499,8 +499,16 @@ void HttpMsgHeader::setup_file_processing()
              if (boundary_present(content_type))
              {
                  Packet* p = DetectionEngine::get_current_packet();
-                session_data->mime_state[source_id] = new MimeSession(p, &FileService::decode_conf,
-                    &mime_conf, get_multi_file_processing_id(), true);
+                const Field& uri = request->get_uri_norm_classic();
+                if (uri.length() > 0)
+                    session_data->mime_state[source_id] = new MimeSession(p,
+                        &FileService::decode_conf, &mime_conf, get_multi_file_processing_id(),
+                        true, uri.start(), uri.length());
+                else
+                    session_data->mime_state[source_id] = new MimeSession(p,
+                        &FileService::decode_conf, &mime_conf, get_multi_file_processing_id(),
+                        true);
+
                  // Show file processing the Content-Type header as if it were regular data.
                  // This will enable it to find the boundary string.
                  // FIXIT-L develop a proper interface for passing the boundary string.
author	Tom Peters (thopeter) <thopeter@cisco.com>
	Tue, 14 Sep 2021 20:04:37 +0000 (20:04 +0000)
committer	Tom Peters (thopeter) <thopeter@cisco.com>
	Tue, 14 Sep 2021 20:04:37 +0000 (20:04 +0000)
src/file_api/file_flows.cc		patch \| blob \| blame \| history
src/file_api/file_flows.h		patch \| blob \| blame \| history
src/file_api/file_lib.cc		patch \| blob \| blame \| history
src/file_api/file_lib.h		patch \| blob \| blame \| history
src/mime/file_mime_process.cc		patch \| blob \| blame \| history
src/mime/file_mime_process.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_msg_body.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_msg_body.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_msg_header.cc		patch \| blob \| blame \| history