From: Mike Stepanek (mstepane) <mstepane@cisco.com>
Date: Fri, 19 Feb 2021 19:55:19 +0000 (+0000)
Subject: Merge pull request #2740 in SNORT/snort3 from ~MDAGON/snort3:chunk_partial to master
X-Git-Tag: 3.1.2.0~36
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3c65c22c1984228a4ad10a4a8aaa3bb138e3bde9;p=thirdparty%2Fsnort3.git

Merge pull request #2740 in SNORT/snort3 from ~MDAGON/snort3:chunk_partial to master

Squashed commit of the following:

commit 4549c4b769a5cb8f0cc2535385a1525dcc0da6e1
Author: mdagon <mdagon@cisco.com>
Date:   Thu Jan 28 09:12:47 2021 -0500

    http_inspect: partial inspection for 0 length chunk
---

diff --git a/src/service_inspectors/http_inspect/dev_notes.txt b/src/service_inspectors/http_inspect/dev_notes.txt
index 1f066908c..9b846c825 100755
--- a/src/service_inspectors/http_inspect/dev_notes.txt
+++ b/src/service_inspectors/http_inspect/dev_notes.txt
@@ -47,6 +47,18 @@ The http_inspect partial inspection mechanism is also used by http2_inspect to m
 boundaries. When inspecting HTTP/2, a partial inspection by http_inspect may occur because script
 detection triggered it, because H2I wanted it, or both. 
 
+Some applications may be affected by blocks too late scenarios related to seeing part of the
+zero-length chunk. For example a TCP packet that ends with:
+    8<CR><LF>abcdefgh<CR><LF>0
+might be sufficient to forward the available data ("abcdefgh") to the application even though the
+final <CR><LF> has not been received.
+Note that the actual next bytes are uncertain here. The next packet might begin with <CR><LF>, but
+    100000<CR><LF>ijklmnopq ...
+is another perfectly legal possibility. There is no rule against starting a nonzero chunk length
+with a zero character and some applications reputedly do this.
+As a precaution partial inspections performed when 1) a TCP segment ends inside a possible
+zero-length chunk or 2) chunk processing fails (broken chunk).
+
 HttpFlowData is a data class representing all HI information relating to a flow. It serves as
 persistent memory between invocations of HI by the framework. It also glues together the inspector,
 the client-to-server splitter, and the server-to-client splitter which pass information through the
diff --git a/src/service_inspectors/http_inspect/http_cutter.cc b/src/service_inspectors/http_inspect/http_cutter.cc
index 9d33cc55b..7c065b08d 100644
--- a/src/service_inspectors/http_inspect/http_cutter.cc
+++ b/src/service_inspectors/http_inspect/http_cutter.cc
@@ -434,6 +434,13 @@ ScanResult HttpBodyOldCutter::cut(const uint8_t* buffer, uint32_t length, HttpIn
     }
 }
 
+void HttpBodyChunkCutter::transition_to_chunk_bad(bool& accelerate_this_packet)
+{
+    curr_state = CHUNK_BAD;
+    accelerate_this_packet = true;
+    zero_chunk = false;
+}
+
 ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
     HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target, bool stretch,
     HttpEnums::H2BodyState)
@@ -450,6 +457,7 @@ ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
         switch (curr_state)
         {
         case CHUNK_NEWLINES:
+            zero_chunk = true;
             // Looking for improper CRLFs before the chunk header
             if (is_cr_lf[buffer[k]])
             {
@@ -470,7 +478,7 @@ ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
                 if (num_leading_ws == 5)
                 {
                     events->create_event(EVENT_BROKEN_CHUNK);
-                    curr_state = CHUNK_BAD;
+                    transition_to_chunk_bad(accelerate_this_packet);
                     k--;
                 }
                 break;
@@ -523,7 +531,7 @@ ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
                 // illegal character present in chunk length
                 *infractions += INF_CHUNK_BAD_CHAR;
                 events->create_event(EVENT_BROKEN_CHUNK);
-                curr_state = CHUNK_BAD;
+                transition_to_chunk_bad(accelerate_this_packet);
                 k--;
             }
             else
@@ -534,9 +542,11 @@ ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
                     // overflow protection: must fit into 32 bits
                     *infractions += INF_CHUNK_TOO_LARGE;
                     events->create_event(EVENT_BROKEN_CHUNK);
-                    curr_state = CHUNK_BAD;
+                    transition_to_chunk_bad(accelerate_this_packet);
                     k--;
                 }
+                if (expected != 0)
+                    zero_chunk = false;
             }
             break;
         case CHUNK_TRAILING_WS:
@@ -563,7 +573,7 @@ ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
                 // illegal character present in chunk length
                 *infractions += INF_CHUNK_BAD_CHAR;
                 events->create_event(EVENT_BROKEN_CHUNK);
-                curr_state = CHUNK_BAD;
+                transition_to_chunk_bad(accelerate_this_packet);
                 k--;
             }
             break;
@@ -591,7 +601,7 @@ ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
                 // ambiguous.
                 *infractions += INF_CHUNK_LONE_CR;
                 events->create_event(EVENT_BROKEN_CHUNK);
-                curr_state = CHUNK_BAD;
+                transition_to_chunk_bad(accelerate_this_packet);
                 k--;
                 break;
             }
@@ -610,7 +620,7 @@ ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
             {
                 *infractions += INF_CHUNK_NO_LENGTH;
                 events->create_event(EVENT_BROKEN_CHUNK);
-                curr_state = CHUNK_BAD;
+                transition_to_chunk_bad(accelerate_this_packet);
                 k--;
             }
             break;
@@ -656,7 +666,7 @@ ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
             {
                 *infractions += INF_CHUNK_BAD_END;
                 events->create_event(EVENT_BROKEN_CHUNK);
-                curr_state = CHUNK_BAD;
+                transition_to_chunk_bad(accelerate_this_packet);
                 k--;
             }
             break;
@@ -725,7 +735,11 @@ ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
     }
 
     octets_seen += length;
-    return accelerate_this_packet ? SCAN_NOT_FOUND_ACCELERATE : SCAN_NOT_FOUND;
+
+    if (accelerate_this_packet || (zero_chunk && data_seen))
+        return SCAN_NOT_FOUND_ACCELERATE;
+
+    return SCAN_NOT_FOUND;
 }
 
 ScanResult HttpBodyH2Cutter::cut(const uint8_t* buffer, uint32_t length,
@@ -814,7 +828,10 @@ ScanResult HttpBodyH2Cutter::cut(const uint8_t* buffer, uint32_t length,
 
 bool HttpBodyCutter::need_accelerated_blocking(const uint8_t* data, uint32_t length)
 {
-    return accelerated_blocking && dangerous(data, length);
+    const bool need_accelerated_blocking = accelerated_blocking && dangerous(data, length);
+    if (need_accelerated_blocking)
+        HttpModule::increment_peg_counts(PEG_SCRIPT_DETECTION);	 
+    return need_accelerated_blocking;
 }
 
 bool HttpBodyCutter::find_partial(const uint8_t* input_buf, uint32_t input_length, bool end)
diff --git a/src/service_inspectors/http_inspect/http_cutter.h b/src/service_inspectors/http_inspect/http_cutter.h
index 8c74e44d0..b2cb2142d 100644
--- a/src/service_inspectors/http_inspect/http_cutter.h
+++ b/src/service_inspectors/http_inspect/http_cutter.h
@@ -164,6 +164,8 @@ public:
     void soft_reset() override { num_good_chunks = 0; HttpBodyCutter::soft_reset(); }
 
 private:
+    void transition_to_chunk_bad(bool& accelerate_this_packet);
+
     uint32_t data_seen = 0;
     HttpEnums::ChunkState curr_state = HttpEnums::CHUNK_NEWLINES;
     uint32_t expected = 0;
@@ -171,6 +173,7 @@ private:
     uint32_t num_zeros = 0;
     uint32_t digits_seen = 0;
     uint32_t num_good_chunks = 0;  // that end in the current section
+    bool zero_chunk = true;
 };
 
 class HttpBodyH2Cutter : public HttpBodyCutter
diff --git a/src/service_inspectors/http_inspect/http_flow_data.h b/src/service_inspectors/http_inspect/http_flow_data.h
index 4f63e15b2..5f255232c 100644
--- a/src/service_inspectors/http_inspect/http_flow_data.h
+++ b/src/service_inspectors/http_inspect/http_flow_data.h
@@ -168,6 +168,7 @@ private:
     uint32_t partial_inspected_octets[2] = { 0, 0 };
     uint8_t* partial_detect_buffer[2] = { nullptr, nullptr };
     uint32_t partial_detect_length[2] = { 0, 0 };
+    uint32_t partial_js_detect_length[2] = { 0, 0 };
     int32_t status_code_num = HttpCommon::STAT_NOT_PRESENT;
     HttpEnums::VersionId version_id[2] = { HttpEnums::VERS__NOT_PRESENT,
                                             HttpEnums::VERS__NOT_PRESENT };
diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc
index 2e10c7344..3abb9dbd5 100644
--- a/src/service_inspectors/http_inspect/http_msg_body.cc
+++ b/src/service_inspectors/http_inspect/http_msg_body.cc
@@ -47,6 +47,34 @@ HttpMsgBody::HttpMsgBody(const uint8_t* buffer, const uint16_t buf_size,
     get_related_sections();
 }
 
+void HttpMsgBody::bookkeeping_regular_flush(uint32_t& partial_detect_length,
+    uint8_t*& partial_detect_buffer, uint32_t& partial_js_detect_length, int32_t detect_length)
+{
+    session_data->detect_depth_remaining[source_id] -= detect_length;
+    partial_detect_buffer = nullptr;
+    partial_detect_length = 0;
+    partial_js_detect_length = 0;
+}
+
+void HttpMsgBody::clean_partial(uint32_t& partial_inspected_octets, uint32_t& partial_detect_length,
+    uint8_t*& partial_detect_buffer, uint32_t& partial_js_detect_length, int32_t detect_length)
+{
+    body_octets += msg_text.length();
+    partial_inspected_octets = session_data->partial_flush[source_id] ? msg_text.length() : 0;
+
+    if (session_data->partial_flush[source_id])
+        return;
+
+    if (session_data->detect_depth_remaining[source_id] > 0)
+    {
+        delete[] partial_detect_buffer;
+        session_data->update_deallocations(partial_detect_length);
+        assert(detect_length <= session_data->detect_depth_remaining[source_id]);
+        bookkeeping_regular_flush(partial_detect_length, partial_detect_buffer, partial_js_detect_length,
+            detect_length);
+    }
+}
+
 void HttpMsgBody::analyze()
 {
     uint32_t& partial_inspected_octets = session_data->partial_inspected_octets[source_id];
@@ -54,8 +82,17 @@ void HttpMsgBody::analyze()
     // When there have been partial inspections we focus on the part of the message we have not
     // seen before
     if (partial_inspected_octets > 0)
+    {
+        assert(msg_text.length() >= (int32_t)partial_inspected_octets);
+        // For regular flush, file processing needs to be finalized.
+        // Continue even if there is no new information
+        if ((msg_text.length() == (int32_t)partial_inspected_octets)
+            && session_data->partial_flush[source_id])
+            return;
+
         msg_text_new.set(msg_text.length() - partial_inspected_octets,
             msg_text.start() + partial_inspected_octets);
+    }
     else
         msg_text_new.set(msg_text);
 
@@ -69,43 +106,52 @@ void HttpMsgBody::analyze()
     if (session_data->detect_depth_remaining[source_id] > 0)
     {
         do_file_decompression(decoded_body, decompressed_file_body);
-        do_js_normalization(decompressed_file_body, js_norm_body);
 
         uint32_t& partial_detect_length = session_data->partial_detect_length[source_id];
         uint8_t*& partial_detect_buffer = session_data->partial_detect_buffer[source_id];
-        const int32_t total_length = partial_detect_length + js_norm_body.length();
-        const int32_t detect_length =
-            (total_length <= session_data->detect_depth_remaining[source_id]) ?
-            total_length : session_data->detect_depth_remaining[source_id];
+        uint32_t& partial_js_detect_length = session_data->partial_js_detect_length[source_id];
 
         if (partial_detect_length > 0)
         {
-            uint8_t* const detect_buffer = new uint8_t[total_length];
-            memcpy(detect_buffer, partial_detect_buffer, partial_detect_length);
-            memcpy(detect_buffer + partial_detect_length, js_norm_body.start(),
-                js_norm_body.length());
-            detect_data.set(detect_length, detect_buffer, true);
+            const int32_t total_length = partial_detect_length + decompressed_file_body.length();
+            uint8_t* const cumulative_buffer = new uint8_t[total_length];
+            memcpy(cumulative_buffer, partial_detect_buffer, partial_detect_length);
+            memcpy(cumulative_buffer + partial_detect_length, decompressed_file_body.start(),
+                decompressed_file_body.length());
+            cumulative_data.set(total_length, cumulative_buffer, true);
+            do_js_normalization(cumulative_data, js_norm_body);
+            if ((int32_t)partial_js_detect_length == js_norm_body.length())
+            {
+                clean_partial(partial_inspected_octets, partial_detect_length,
+                    partial_detect_buffer, partial_js_detect_length, js_norm_body.length());
+                return;
+            }
         }
         else
-        {
-            detect_data.set(detect_length, js_norm_body.start());
-        }
+            do_js_normalization(decompressed_file_body, js_norm_body);
+
+        const int32_t detect_length =
+            (js_norm_body.length() <= session_data->detect_depth_remaining[source_id]) ?
+            js_norm_body.length() : session_data->detect_depth_remaining[source_id];
+        detect_data.set(detect_length, js_norm_body.start());
 
         delete[] partial_detect_buffer;
         session_data->update_deallocations(partial_detect_length);
 
         if (!session_data->partial_flush[source_id])
         {
-            session_data->detect_depth_remaining[source_id] -= detect_length;
-            partial_detect_buffer = nullptr;
-            partial_detect_length = 0;
+            bookkeeping_regular_flush(partial_detect_length, partial_detect_buffer,
+                partial_js_detect_length, detect_length);
         }
         else
         {
-            uint8_t* const save_partial = new uint8_t[detect_data.length()];
-            memcpy(save_partial, detect_data.start(), detect_data.length());
+            Field* decompressed = (cumulative_data.length() > 0) ?
+                &cumulative_data : &decompressed_file_body;
+            uint8_t* const save_partial = new uint8_t[decompressed->length()];
+            memcpy(save_partial, decompressed->start(), decompressed->length());
             partial_detect_buffer = save_partial;
-            partial_detect_length = detect_data.length();
+            partial_detect_length = decompressed->length();
+            partial_js_detect_length = js_norm_body.length();
             session_data->update_allocations(partial_detect_length);
         }
 
diff --git a/src/service_inspectors/http_inspect/http_msg_body.h b/src/service_inspectors/http_inspect/http_msg_body.h
index 4e000cdf1..56b633e3c 100644
--- a/src/service_inspectors/http_inspect/http_msg_body.h
+++ b/src/service_inspectors/http_inspect/http_msg_body.h
@@ -59,11 +59,18 @@ private:
     void do_utf_decoding(const Field& input, Field& output);
     void do_file_decompression(const Field& input, Field& output);
     void do_js_normalization(const Field& input, Field& output);
+    void clean_partial(uint32_t& partial_inspected_octets, uint32_t& partial_detect_length,
+        uint8_t*& partial_detect_buffer,  uint32_t& partial_js_detect_length,
+        int32_t detect_length);
+    void bookkeeping_regular_flush(uint32_t& partial_detect_length,
+        uint8_t*& partial_detect_buffer, uint32_t& partial_js_detect_length,
+        int32_t detect_length);
 
     // In order of generation
     Field msg_text_new;
     Field decoded_body;
     Field decompressed_file_body;
+    Field cumulative_data;
     Field js_norm_body;
     Field detect_data;
     Field classic_client_body;   // URI normalization applied
diff --git a/src/service_inspectors/http_inspect/http_stream_splitter_finish.cc b/src/service_inspectors/http_inspect/http_stream_splitter_finish.cc
index 9d84e47a6..a8fb81358 100644
--- a/src/service_inspectors/http_inspect/http_stream_splitter_finish.cc
+++ b/src/service_inspectors/http_inspect/http_stream_splitter_finish.cc
@@ -145,8 +145,8 @@ bool HttpStreamSplitter::finish(Flow* flow)
 
             uint64_t file_index = header->get_file_cache_index();
             const uint64_t file_processing_id = header->get_multi_file_processing_id();
-            file_flows->file_process(packet, file_index, nullptr, 0, 0, dir, file_processing_id,
-                SNORT_FILE_END);
+            file_flows->file_process(packet, file_index, nullptr, 0,
+                session_data->file_octets[source_id], dir, file_processing_id, SNORT_FILE_END);
 #ifdef REG_TEST
             if (HttpTestManager::use_test_output(HttpTestManager::IN_HTTP))
             {
diff --git a/src/service_inspectors/http_inspect/http_stream_splitter_scan.cc b/src/service_inspectors/http_inspect/http_stream_splitter_scan.cc
index eee62160b..9fbf5e884 100644
--- a/src/service_inspectors/http_inspect/http_stream_splitter_scan.cc
+++ b/src/service_inspectors/http_inspect/http_stream_splitter_scan.cc
@@ -261,7 +261,6 @@ StreamSplitter::Status HttpStreamSplitter::scan(Packet* pkt, const uint8_t* data
 
         if (cut_result == SCAN_NOT_FOUND_ACCELERATE)
         {
-            HttpModule::increment_peg_counts(PEG_SCRIPT_DETECTION);
             prep_partial_flush(flow, length);
 #ifdef REG_TEST
             if (!HttpTestManager::use_test_input(HttpTestManager::IN_HTTP))