]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Merge pull request #1235 in SNORT/snort3 from nhttp104 to master
authorTom Peters (thopeter) <thopeter@cisco.com>
Tue, 22 May 2018 16:26:48 +0000 (12:26 -0400)
committerTom Peters (thopeter) <thopeter@cisco.com>
Tue, 22 May 2018 16:26:48 +0000 (12:26 -0400)
Squashed commit of the following:

commit 44c803bf62cf1138968a11e232dc4c9e854f4438
Author: Tom Peters <thopeter@cisco.com>
Date:   Mon May 14 16:58:25 2018 -0400

    http_inspect: performance enhancements for file processing beyond detection depth

src/service_inspectors/http_inspect/http_cutter.cc
src/service_inspectors/http_inspect/http_cutter.h
src/service_inspectors/http_inspect/http_enum.h
src/service_inspectors/http_inspect/http_flow_data.cc
src/service_inspectors/http_inspect/http_flow_data.h
src/service_inspectors/http_inspect/http_msg_section.cc
src/service_inspectors/http_inspect/http_stream_splitter_scan.cc

index 68bf4b0ef5ec278e348545284ba589e8eda4bdc4..d845d61b87437bcf33c34ca1f8d51374ac599717 100644 (file)
@@ -26,7 +26,7 @@
 using namespace HttpEnums;
 
 ScanResult HttpStartCutter::cut(const uint8_t* buffer, uint32_t length,
-    HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t)
+    HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t, int32_t)
 {
     for (uint32_t k = 0; k < length; k++)
     {
@@ -154,7 +154,7 @@ HttpStartCutter::ValidationResult HttpStatusCutter::validate(uint8_t octet,
 }
 
 ScanResult HttpHeaderCutter::cut(const uint8_t* buffer, uint32_t length,
-    HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t)
+    HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t, int32_t)
 {
     // Header separators: leading \r\n, leading \n, nonleading \r\n\r\n, nonleading \n\r\n,
     // nonleading \r\n\n, and nonleading \n\n. The separator itself becomes num_excess which is
@@ -251,10 +251,16 @@ ScanResult HttpHeaderCutter::cut(const uint8_t* buffer, uint32_t length,
 }
 
 ScanResult HttpBodyClCutter::cut(const uint8_t*, uint32_t length, HttpInfractions*,
-    HttpEventGen*, uint32_t flow_target, uint32_t flow_max)
+    HttpEventGen*, uint32_t flow_target, uint32_t flow_max, int32_t flush_segment_min)
 {
     assert(remaining > 0);
 
+    if (new_section)
+    {
+        new_section = false;
+        octets_seen = 0;
+    }
+
     // Are we skipping to the next message?
     if (flow_target == 0)
     {
@@ -272,6 +278,37 @@ ScanResult HttpBodyClCutter::cut(const uint8_t*, uint32_t length, HttpInfraction
         }
     }
 
+    if (flush_segment_min >= 0)
+    {
+        // Flush at the end of the segment unless it would be really small
+        if ((remaining <= flow_target) && (remaining <= octets_seen + length))
+        {
+            num_flush = remaining - octets_seen;
+            remaining = 0;
+            new_section = true;
+            return SCAN_FOUND;
+        }
+        else if (flow_target <= octets_seen + length)
+        {
+            num_flush = flow_target - octets_seen;
+            remaining -= flow_target;
+            new_section = true;
+            return SCAN_FOUND_PIECE;
+        }
+        else if ((unsigned)flush_segment_min <= octets_seen + length)
+        {
+            num_flush = length;
+            remaining -= octets_seen + length;
+            new_section = true;
+            return SCAN_FOUND_PIECE;
+        }
+        else
+        {
+            octets_seen += length;
+            return SCAN_NOTFOUND;
+        }
+    }
+
     // The normal body section size is flow_target. But if there are only flow_max or less
     // remaining we take the whole thing rather than leave a small final section.
     if (remaining <= flow_max)
@@ -289,8 +326,14 @@ ScanResult HttpBodyClCutter::cut(const uint8_t*, uint32_t length, HttpInfraction
 }
 
 ScanResult HttpBodyOldCutter::cut(const uint8_t*, uint32_t length, HttpInfractions*, HttpEventGen*,
-    uint32_t flow_target, uint32_t)
+    uint32_t flow_target, uint32_t, int32_t flush_segment_min)
 {
+    if (new_section)
+    {
+        new_section = false;
+        octets_seen = 0;
+    }
+
     if (flow_target == 0)
     {
         // FIXIT-P Need StreamSplitter::END
@@ -302,12 +345,35 @@ ScanResult HttpBodyOldCutter::cut(const uint8_t*, uint32_t length, HttpInfractio
         return SCAN_DISCARD_PIECE;
     }
 
+    if (flush_segment_min >= 0)
+    {
+        // Flush at the end of the segment unless it would be really small
+        if (flow_target <= octets_seen + length)
+        {
+            num_flush = flow_target - octets_seen;
+            new_section = true;
+            return SCAN_FOUND_PIECE;
+        }
+        else if ((unsigned)flush_segment_min <= octets_seen + length)
+        {
+            num_flush = length;
+            new_section = true;
+            return SCAN_FOUND_PIECE;
+        }
+        else
+        {
+            octets_seen += length;
+            return SCAN_NOTFOUND;
+        }
+    }
+
     num_flush = flow_target;
     return SCAN_FOUND_PIECE;
 }
 
 ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
-    HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target, uint32_t)
+    HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target, uint32_t,
+    int32_t flush_segment_min)
 {
     // Are we skipping through the rest of this chunked body to the trailers and the next message?
     const bool discard_mode = (flow_target == 0);
@@ -585,6 +651,15 @@ ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
         num_flush = length;
         return SCAN_DISCARD_PIECE;
     }
+
+    if ((flush_segment_min >= 0) && ((unsigned)flush_segment_min <= octets_seen + length))
+    {
+        num_flush = length;
+        data_seen = 0;
+        new_section = true;
+        return SCAN_FOUND_PIECE;
+    }
+
     octets_seen += length;
     return SCAN_NOTFOUND;
 }
index 68ffd6fd50ad11594a64accef30c96f5a8e507ef..047a2481429a0522cda12d4ef1123040d9935819 100644 (file)
@@ -36,7 +36,7 @@ public:
     virtual ~HttpCutter() = default;
     virtual HttpEnums::ScanResult cut(const uint8_t* buffer, uint32_t length,
         HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target,
-        uint32_t flow_max) = 0;
+        uint32_t flow_max, int32_t flush_segment_min) = 0;
     uint32_t get_num_flush() const { return num_flush; }
     uint32_t get_octets_seen() const { return octets_seen; }
     uint32_t get_num_excess() const { return num_crlf; }
@@ -55,7 +55,7 @@ class HttpStartCutter : public HttpCutter
 {
 public:
     HttpEnums::ScanResult cut(const uint8_t* buffer, uint32_t length,
-        HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t) override;
+        HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t, int32_t) override;
 
 protected:
     enum ValidationResult { V_GOOD, V_BAD, V_TBD };
@@ -84,7 +84,7 @@ class HttpHeaderCutter : public HttpCutter
 {
 public:
     HttpEnums::ScanResult cut(const uint8_t* buffer, uint32_t length,
-        HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t) override;
+        HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t, int32_t) override;
     uint32_t get_num_head_lines() const override { return num_head_lines; }
 
 private:
@@ -99,25 +99,28 @@ public:
     explicit HttpBodyClCutter(int64_t expected_length) : remaining(expected_length)
         { assert(remaining > 0); }
     HttpEnums::ScanResult cut(const uint8_t*, uint32_t length, HttpInfractions*, HttpEventGen*,
-        uint32_t flow_target, uint32_t flow_max) override;
+        uint32_t flow_target, uint32_t flow_max, int32_t flush_segment_min) override;
 
 private:
     int64_t remaining;
+    bool new_section = false;
 };
 
 class HttpBodyOldCutter : public HttpCutter
 {
 public:
     HttpEnums::ScanResult cut(const uint8_t*, uint32_t, HttpInfractions*, HttpEventGen*,
-        uint32_t flow_target, uint32_t) override;
+        uint32_t flow_target, uint32_t, int32_t flush_segment_min) override;
+private:
+    bool new_section = false;
 };
 
 class HttpBodyChunkCutter : public HttpCutter
 {
 public:
     HttpEnums::ScanResult cut(const uint8_t* buffer, uint32_t length,
-        HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target, uint32_t)
-        override;
+        HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target, uint32_t,
+        int32_t flush_segment_min) override;
     bool get_is_broken_chunk() const override { return curr_state == HttpEnums::CHUNK_BAD; }
     uint32_t get_num_good_chunks() const override { return num_good_chunks; }
 
index 77e63a022517997ffd11242e13ce8c57656f993b..d9dc52b259cc58ff325687248229b7a12d3966d2 100644 (file)
@@ -28,6 +28,9 @@ static const int MAX_OCTETS = 63780;
 static const int GZIP_BLOCK_SIZE = 2048;
 static const int FINAL_GZIP_BLOCK_SIZE = 2304; // compromise value, too big causes gzip overruns
                                                // too small leaves too many little end sections
+static const int MIN_AUTOFLUSH_SIZE = 100;
+static const int FLOW_DEPTH_ERROR_MARGIN = 200;
+
 static const uint32_t HTTP_GID = 119;
 static const int GZIP_WINDOW_BITS = 31;
 static const int DEFLATE_WINDOW_BITS = 15;
index 6da55d977068bfa18c216f1dd0effc2242c5a668..eb9d9527689312588b29d8acb0569901c30bca49 100644 (file)
@@ -101,6 +101,7 @@ void HttpFlowData::half_reset(SourceId source_id)
     body_octets[source_id] = STAT_NOT_PRESENT;
     section_size_target[source_id] = 0;
     section_size_max[source_id] = 0;
+    flush_segment_min[source_id] = -1;
     file_depth_remaining[source_id] = STAT_NOT_PRESENT;
     detect_depth_remaining[source_id] = STAT_NOT_PRESENT;
     detection_status[source_id] = DET_REACTIVATING;
index dde8cbc3b76cf38cf91791c67b0d56acccc213f9..b5222216f4c599bbbd667f63196f29ffbc9dc79a 100644 (file)
@@ -115,6 +115,7 @@ private:
     int64_t data_length[2] = { HttpEnums::STAT_NOT_PRESENT, HttpEnums::STAT_NOT_PRESENT };
     uint32_t section_size_target[2] = { 0, 0 };
     uint32_t section_size_max[2] = { 0, 0 };
+    int32_t flush_segment_min[2] = { -1, -1 };
     HttpEnums::CompressId compression[2] = { HttpEnums::CMP_NONE, HttpEnums::CMP_NONE };
     z_stream* compress_stream[2] = { nullptr, nullptr };
     uint64_t zero_nine_expected = 0;
index fb0cc73b20ad9672de0bd2f76cb8d3dd7e713b16..3eef44cc6f5bb755ac31775f83838a38f6dc944e 100644 (file)
@@ -82,7 +82,6 @@ void HttpMsgSection::update_depth() const
     {
         // Don't need any more of the body
         session_data->section_size_target[source_id] = 0;
-        session_data->section_size_max[source_id] = 0;
         return;
     }
 
@@ -94,13 +93,19 @@ void HttpMsgSection::update_depth() const
 
     if (ddr <= 0)
     {
+        // We are only splitting to support file processing. That's not sensitive to section
+        // boundaries so we don't need random increments and we don't need to delay processing TCP
+        // segments while we accumulate more data. In addition to flushing when we reach target
+        // size, we also flush at the end of each segment provided it's not an unreasonably small
+        // amount.
         session_data->section_size_target[source_id] = target_size;
-        session_data->section_size_max[source_id] = max_size;
+        session_data->section_size_max[source_id] = target_size;
+        session_data->flush_segment_min[source_id] = MIN_AUTOFLUSH_SIZE;
     }
     else if (ddr <= max_size)
     {
-        session_data->section_size_target[source_id] = ddr + 200;
-        session_data->section_size_max[source_id] = ddr + 200;
+        session_data->section_size_target[source_id] = ddr + FLOW_DEPTH_ERROR_MARGIN;
+        session_data->section_size_max[source_id] = ddr + FLOW_DEPTH_ERROR_MARGIN;
     }
     else
     {
index 9959593c9434153febae2570d4e5dad20b87871d..a1247bc6596e7c94c128356e5736adef65f4039e 100644 (file)
@@ -164,7 +164,8 @@ StreamSplitter::Status HttpStreamSplitter::scan(Flow* flow, const uint8_t* data,
     const uint32_t max_length = MAX_OCTETS - cutter->get_octets_seen();
     const ScanResult cut_result = cutter->cut(data, (length <= max_length) ? length :
         max_length, session_data->get_infractions(source_id), session_data->get_events(source_id),
-        session_data->section_size_target[source_id], session_data->section_size_max[source_id]);
+        session_data->section_size_target[source_id], session_data->section_size_max[source_id],
+        session_data->flush_segment_min[source_id]);
     switch (cut_result)
     {
     case SCAN_NOTFOUND: