From: Mike Stepanek (mstepane) <mstepane@cisco.com>
Date: Mon, 27 Jul 2020 13:54:00 +0000 (+0000)
Subject: Merge pull request #2346 in SNORT/snort3 from ~THOPETER/snort3:nhttp143 to master
X-Git-Tag: 3.0.2-3~7
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=81c45a4672acbaf6672b166ea395628f2d07061f;p=thirdparty%2Fsnort3.git

Merge pull request #2346 in SNORT/snort3 from ~THOPETER/snort3:nhttp143 to master

Squashed commit of the following:

commit 9fce119f40acb34d7bc5cfcf4ed69f62d5af0811
Author: Tom Peters <thopeter@cisco.com>
Date:   Wed Jul 22 13:39:36 2020 -0400

    http_inspect: do partial inspections incrementally
---

diff --git a/src/service_inspectors/http_inspect/http_flow_data.cc b/src/service_inspectors/http_inspect/http_flow_data.cc
index 23ed128a7..bedc35db3 100644
--- a/src/service_inspectors/http_inspect/http_flow_data.cc
+++ b/src/service_inspectors/http_inspect/http_flow_data.cc
@@ -86,6 +86,7 @@ HttpFlowData::~HttpFlowData()
         delete events[k];
         delete[] section_buffer[k];
         delete[] partial_buffer[k];
+        delete[] partial_detect_buffer[k];
         HttpTransaction::delete_transaction(transaction[k], nullptr);
         delete cutter[k];
         if (compress_stream[k] != nullptr)
@@ -119,6 +120,7 @@ void HttpFlowData::half_reset(SourceId source_id)
     version_id[source_id] = VERS__NOT_PRESENT;
     data_length[source_id] = STAT_NOT_PRESENT;
     body_octets[source_id] = STAT_NOT_PRESENT;
+    partial_inspected_octets[source_id] = 0;
     section_size_target[source_id] = 0;
     stretch_section_to_packet[source_id] = false;
     detained_inspection[source_id] = false;
diff --git a/src/service_inspectors/http_inspect/http_flow_data.h b/src/service_inspectors/http_inspect/http_flow_data.h
index 2a9be9d1a..43a47e6dd 100644
--- a/src/service_inspectors/http_inspect/http_flow_data.h
+++ b/src/service_inspectors/http_inspect/http_flow_data.h
@@ -161,6 +161,9 @@ private:
 
     // number of user data octets seen so far (regular body or chunks)
     int64_t body_octets[2] = { HttpCommon::STAT_NOT_PRESENT, HttpCommon::STAT_NOT_PRESENT };
+    uint32_t partial_inspected_octets[2] = { 0, 0 };
+    uint8_t* partial_detect_buffer[2] = { nullptr, nullptr };
+    uint32_t partial_detect_length[2] = { 0, 0 };
     int32_t status_code_num = HttpCommon::STAT_NOT_PRESENT;
     HttpEnums::VersionId version_id[2] = { HttpEnums::VERS__NOT_PRESENT,
                                             HttpEnums::VERS__NOT_PRESENT };
diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc
index 2ee2b60e6..8082399fc 100644
--- a/src/service_inspectors/http_inspect/http_msg_body.cc
+++ b/src/service_inspectors/http_inspect/http_msg_body.cc
@@ -48,30 +48,70 @@ HttpMsgBody::HttpMsgBody(const uint8_t* buffer, const uint16_t buf_size,
 
 void HttpMsgBody::analyze()
 {
-    do_utf_decoding(msg_text, decoded_body);
+    uint32_t& partial_inspected_octets = session_data->partial_inspected_octets[source_id];
+
+    // When there have been partial inspections we focus on the part of the message we have not
+    // seen before
+    if (partial_inspected_octets > 0)
+        msg_text_new.set(msg_text.length() - partial_inspected_octets,
+            msg_text.start() + partial_inspected_octets);
+    else
+        msg_text_new.set(msg_text);
+
+    do_utf_decoding(msg_text_new, decoded_body);
+
+    if (session_data->file_depth_remaining[source_id] > 0)
+    {
+        do_file_processing(decoded_body);
+    }
 
     if (session_data->detect_depth_remaining[source_id] > 0)
     {
         do_file_decompression(decoded_body, decompressed_file_body);
         do_js_normalization(decompressed_file_body, js_norm_body);
+
+        uint32_t& partial_detect_length = session_data->partial_detect_length[source_id];
+        uint8_t*& partial_detect_buffer = session_data->partial_detect_buffer[source_id];
+        const int32_t total_length = js_norm_body.length() + partial_detect_length;
         const int32_t detect_length =
-            (js_norm_body.length() <= session_data->detect_depth_remaining[source_id]) ?
-            js_norm_body.length() : session_data->detect_depth_remaining[source_id];
-        detect_data.set(detect_length, js_norm_body.start());
+            (total_length <= session_data->detect_depth_remaining[source_id]) ?
+            total_length : session_data->detect_depth_remaining[source_id];
+
+        if (partial_detect_length > 0)
+        {
+            uint8_t* const detect_buffer = new uint8_t[total_length];
+            memcpy(detect_buffer, partial_detect_buffer, partial_detect_length);
+            memcpy(detect_buffer + partial_detect_length, js_norm_body.start(),
+                js_norm_body.length());
+            detect_data.set(total_length, detect_buffer, true);
+        }
+        else
+        {
+            detect_data.set(detect_length, js_norm_body.start());
+        }
+
+        delete[] partial_detect_buffer;
+
         if (!session_data->partial_flush[source_id])
+        {
             session_data->detect_depth_remaining[source_id] -= detect_length;
+            partial_detect_buffer = nullptr;
+            partial_detect_length = 0;
+        }
+        else
+        {
+            uint8_t* const save_partial = new uint8_t[detect_data.length()];
+            memcpy(save_partial, detect_data.start(), detect_data.length());
+            partial_detect_buffer = save_partial;
+            partial_detect_length = detect_data.length();
+        }
+
         set_file_data(const_cast<uint8_t*>(detect_data.start()),
             (unsigned)detect_data.length());
     }
 
-    // Only give data to file processing once, when we inspect the entire message section.
-    if (!session_data->partial_flush[source_id] &&
-        (session_data->file_depth_remaining[source_id] > 0))
-    {
-        do_file_processing(decoded_body);
-    }
-
     body_octets += msg_text.length();
+    partial_inspected_octets = session_data->partial_flush[source_id] ? msg_text.length() : 0;
 }
 
 void HttpMsgBody::do_utf_decoding(const Field& input, Field& output)
@@ -203,7 +243,8 @@ void HttpMsgBody::do_file_processing(const Field& file_data)
 {
     // Using the trick that cutter is deleted when regular or chunked body is complete
     Packet* p = DetectionEngine::get_current_packet();
-    const bool front = (body_octets == 0);
+    const bool front = (body_octets == 0) &&
+        (session_data->partial_inspected_octets[source_id] == 0);
     const bool back = (session_data->cutter[source_id] == nullptr) || tcp_close;
 
     FilePosition file_position;
@@ -237,7 +278,8 @@ void HttpMsgBody::do_file_processing(const Field& file_data)
             file_index = request->get_http_uri()->get_file_proc_hash();
         }
 
-        if (file_flows->file_process(p, file_index, file_data.start(), fp_length, body_octets, dir,
+        if (file_flows->file_process(p, file_index, file_data.start(), fp_length,
+            body_octets + session_data->partial_inspected_octets[source_id], dir,
             transaction->get_file_processing_id(source_id), file_position))
         {
             session_data->file_depth_remaining[source_id] -= fp_length;
diff --git a/src/service_inspectors/http_inspect/http_msg_body.h b/src/service_inspectors/http_inspect/http_msg_body.h
index c84079334..4e000cdf1 100644
--- a/src/service_inspectors/http_inspect/http_msg_body.h
+++ b/src/service_inspectors/http_inspect/http_msg_body.h
@@ -60,11 +60,13 @@ private:
     void do_file_decompression(const Field& input, Field& output);
     void do_js_normalization(const Field& input, Field& output);
 
-    Field detect_data;
-    Field classic_client_body;   // URI normalization applied
+    // In order of generation
+    Field msg_text_new;
     Field decoded_body;
     Field decompressed_file_body;
     Field js_norm_body;
+    Field detect_data;
+    Field classic_client_body;   // URI normalization applied
 };
 
 #endif