From: Mike Stepanek (mstepane) <mstepane@cisco.com>
Date: Mon, 24 Aug 2020 20:04:20 +0000 (+0000)
Subject: Merge pull request #2414 in SNORT/snort3 from ~KATHARVE/snort3:file_upload_fix to... 
X-Git-Tag: 3.0.2-6~45
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7f7c8b95d9d7344fe97a4a25246ab5b3141cd32c;p=thirdparty%2Fsnort3.git

Merge pull request #2414 in SNORT/snort3 from ~KATHARVE/snort3:file_upload_fix to master

Squashed commit of the following:

commit 6dd1edc686aabf6e1803eb1803b3e67856f3385b
Author: Katura Harvey <katharve@cisco.com>
Date:   Tue Aug 11 16:53:17 2020 -0400

    http_inspect: don't use the URL to cache file verdicts for uploads
---

diff --git a/src/file_api/file_cache.cc b/src/file_api/file_cache.cc
index 6bfe5569f..0038a9425 100644
--- a/src/file_api/file_cache.cc
+++ b/src/file_api/file_cache.cc
@@ -353,7 +353,7 @@ bool FileCache::apply_verdict(Packet* p, FileContext* file_ctx, FileVerdict verd
         file_ctx->log_file_event(flow, policy);
         policy->log_file_action(flow, file_ctx, FILE_RESUME_BLOCK);
     }
-    else
+    else if (file_ctx->is_cacheable())
         store_verdict(flow, file_ctx, block_timeout);
 
     return true;
diff --git a/src/file_api/file_flows.cc b/src/file_api/file_flows.cc
index d55f7fce0..7f697686b 100644
--- a/src/file_api/file_flows.cc
+++ b/src/file_api/file_flows.cc
@@ -173,7 +173,10 @@ FileContext* FileFlows::find_main_file_context(FilePosition pos, FileDirection d
     context->check_policy(flow, dir, file_policy);
 
     if (!index)
+    {
         context->set_file_id(get_new_file_instance());
+        context->set_not_cacheable();
+    }
     else
         context->set_file_id(index);
 
@@ -265,6 +268,8 @@ bool FileFlows::file_process(Packet* p, uint64_t file_id, const uint8_t* file_da
 {
     int64_t file_depth = FileService::get_max_file_depth();
     bool continue_processing;
+    bool cacheable = file_id or offset;
+
     if (!multi_file_processing_id)
         multi_file_processing_id = file_id;
 
@@ -278,6 +283,9 @@ bool FileFlows::file_process(Packet* p, uint64_t file_id, const uint8_t* file_da
     if (!context)
         return false;
 
+    if (!cacheable)
+        context->set_not_cacheable();
+
     set_current_file_context(context);
 
     if (!context->get_processed_bytes())
@@ -286,7 +294,7 @@ bool FileFlows::file_process(Packet* p, uint64_t file_id, const uint8_t* file_da
         context->set_file_id(file_id);
     }
 
-    if ( offset != 0 and
+    if ( offset != 0 and context->is_cacheable() and
         (FileService::get_file_cache()->cached_verdict_lookup(p, context, file_policy) !=
             FILE_VERDICT_UNKNOWN) )
     {
diff --git a/src/file_api/file_lib.cc b/src/file_api/file_lib.cc
index 54ddaefd9..40b16011a 100644
--- a/src/file_api/file_lib.cc
+++ b/src/file_api/file_lib.cc
@@ -418,8 +418,8 @@ bool FileContext::process(Packet* p, const uint8_t* file_data, int data_size,
         return false;
     }
 
-    if ((FileService::get_file_cache()->cached_verdict_lookup(p, this,
-        policy) != FILE_VERDICT_UNKNOWN))
+    if (cacheable and (FileService::get_file_cache()->cached_verdict_lookup(p, this, policy) !=
+        FILE_VERDICT_UNKNOWN))
     {
         processing_complete = true;
         return true;
diff --git a/src/file_api/file_lib.h b/src/file_api/file_lib.h
index 9626ccece..9ae475f5e 100644
--- a/src/file_api/file_lib.h
+++ b/src/file_api/file_lib.h
@@ -135,6 +135,8 @@ public:
     static void print_file_data(FILE* fp, const uint8_t* data, int len, int max_depth);
     void print(std::ostream&);
     char* get_UTF8_fname(size_t* converted_len);
+    void set_not_cacheable() { cacheable = false; }
+    bool is_cacheable() { return cacheable; }
 
 private:
     uint64_t processed_bytes = 0;
@@ -143,6 +145,7 @@ private:
     FileSegments* file_segments;
     FileInspect* inspector;
     FileConfig*  config;
+    bool cacheable = true;
 
     inline void finalize_file_type();
     inline void finish_signature_lookup(Packet*, bool, FilePolicyBase*);
diff --git a/src/mime/file_mime_process.cc b/src/mime/file_mime_process.cc
index 8d137dfa1..426865d8d 100644
--- a/src/mime/file_mime_process.cc
+++ b/src/mime/file_mime_process.cc
@@ -641,7 +641,8 @@ void MimeSession::reset_file_data()
     // Clear MIME's file data to prepare for next file
     file_counter++;
     file_process_offset = 0;
-    current_mime_file_id = 0;
+    current_file_cache_file_id = 0;
+    current_multiprocessing_file_id = 0;
     continue_inspecting_file = true;
 }
 
@@ -821,17 +822,29 @@ MimeSession::~MimeSession()
         delete(log_state);
 }
 
-uint64_t MimeSession::get_mime_file_id()
+// File verdicts get cached with key (file_id, sip, dip). File_id is hash of filename if available.
+// Otherwise file_id is 0 and verdict will not be cached.
+uint64_t MimeSession::get_file_cache_file_id()
 {
-    if (!current_mime_file_id)
+    if (!current_file_cache_file_id and filename.length() > 0)
+        current_file_cache_file_id = str_to_hash((const uint8_t*)filename.c_str(), filename.length());
+    return current_file_cache_file_id;
+}
+
+// This file id is used to store file contexts on the flow during processing. Each file processed
+// per flow needs a unique (per flow) id, so use hash of the URL (passed from http_inspect) with a
+// file counter
+uint64_t MimeSession::get_multiprocessing_file_id()
+{
+    if (!current_multiprocessing_file_id)
     {
         const int data_len = sizeof(session_base_file_id) + sizeof(file_counter);
         uint8_t data[data_len];
         memcpy(data, (void*)&session_base_file_id, sizeof(session_base_file_id));
         memcpy(data + sizeof(session_base_file_id), (void*)&file_counter, sizeof(file_counter));
-        current_mime_file_id = str_to_hash(data, data_len);
+        current_multiprocessing_file_id = str_to_hash(data, data_len);
     }
-    return current_mime_file_id;
+    return current_multiprocessing_file_id;
 }
 
 void MimeSession::mime_file_process(Packet* p, const uint8_t* data, int data_size,
@@ -848,13 +861,8 @@ void MimeSession::mime_file_process(Packet* p, const uint8_t* data, int data_siz
         {
             const FileDirection dir = upload? FILE_UPLOAD : FILE_DOWNLOAD;
             uint64_t offset = file_process_offset;
-            // MIME has found the end of a file that file processing didn't want - tell file
-            // processing it can clear this file's data
-            if (!continue_inspecting_file)
-                offset = 0;
-            uint64_t file_id = get_mime_file_id();
-            continue_inspecting_file = file_flows->file_process(p, file_id, data, data_size, offset,
-                dir, file_id, position);
+            continue_inspecting_file = file_flows->file_process(p, get_file_cache_file_id(), data,
+                data_size, offset, dir, get_multiprocessing_file_id(), position);
         }
         else
         {
diff --git a/src/mime/file_mime_process.h b/src/mime/file_mime_process.h
index f44c1b231..537571fcb 100644
--- a/src/mime/file_mime_process.h
+++ b/src/mime/file_mime_process.h
@@ -90,8 +90,10 @@ private:
     uint32_t file_counter = 0;
     uint32_t file_process_offset = 0;
     uint64_t session_base_file_id = 0;
-    uint64_t current_mime_file_id = 0;
-    uint64_t get_mime_file_id();
+    uint64_t current_file_cache_file_id = 0;
+    uint64_t current_multiprocessing_file_id = 0;
+    uint64_t get_file_cache_file_id();
+    uint64_t get_multiprocessing_file_id();
     void mime_file_process(Packet* p, const uint8_t* data, int data_size,
         FilePosition position, bool upload);
     void reset_file_data();
diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc
index a5034ae53..0e43af24b 100644
--- a/src/service_inspectors/http_inspect/http_msg_body.cc
+++ b/src/service_inspectors/http_inspect/http_msg_body.cc
@@ -273,7 +273,11 @@ void HttpMsgBody::do_file_processing(const Field& file_data)
 
         size_t file_index = 0;
 
-        if ((request != nullptr) and (request->get_http_uri() != nullptr))
+        // For downloads file_id for the file cache is the URL since that should be unique per file.
+        // Upload verdicts are not currently cached since we have no unique information.
+        // FIXIT-E For uploads use the filename for the file_id when available.
+        if ((request != nullptr) and (request->get_http_uri() != nullptr)
+            and (dir == FILE_DOWNLOAD))
         {
             file_index = request->get_http_uri()->get_file_proc_hash();
         }