using namespace HttpEnums;
ScanResult HttpStartCutter::cut(const uint8_t* buffer, uint32_t length,
- HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t)
+ HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t, int32_t)
{
for (uint32_t k = 0; k < length; k++)
{
}
ScanResult HttpHeaderCutter::cut(const uint8_t* buffer, uint32_t length,
- HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t)
+ HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t, int32_t)
{
// Header separators: leading \r\n, leading \n, nonleading \r\n\r\n, nonleading \n\r\n,
// nonleading \r\n\n, and nonleading \n\n. The separator itself becomes num_excess which is
}
ScanResult HttpBodyClCutter::cut(const uint8_t*, uint32_t length, HttpInfractions*,
- HttpEventGen*, uint32_t flow_target, uint32_t flow_max)
+ HttpEventGen*, uint32_t flow_target, uint32_t flow_max, int32_t flush_segment_min)
{
assert(remaining > 0);
+ if (new_section)
+ {
+ new_section = false;
+ octets_seen = 0;
+ }
+
// Are we skipping to the next message?
if (flow_target == 0)
{
}
}
+ if (flush_segment_min >= 0)
+ {
+ // Flush at the end of the segment unless it would be really small
+ if ((remaining <= flow_target) && (remaining <= octets_seen + length))
+ {
+ num_flush = remaining - octets_seen;
+ remaining = 0;
+ new_section = true;
+ return SCAN_FOUND;
+ }
+ else if (flow_target <= octets_seen + length)
+ {
+ num_flush = flow_target - octets_seen;
+ remaining -= flow_target;
+ new_section = true;
+ return SCAN_FOUND_PIECE;
+ }
+ else if ((unsigned)flush_segment_min <= octets_seen + length)
+ {
+ num_flush = length;
+ remaining -= octets_seen + length;
+ new_section = true;
+ return SCAN_FOUND_PIECE;
+ }
+ else
+ {
+ octets_seen += length;
+ return SCAN_NOTFOUND;
+ }
+ }
+
// The normal body section size is flow_target. But if there are only flow_max or less
// remaining we take the whole thing rather than leave a small final section.
if (remaining <= flow_max)
}
ScanResult HttpBodyOldCutter::cut(const uint8_t*, uint32_t length, HttpInfractions*, HttpEventGen*,
- uint32_t flow_target, uint32_t)
+ uint32_t flow_target, uint32_t, int32_t flush_segment_min)
{
+ if (new_section)
+ {
+ new_section = false;
+ octets_seen = 0;
+ }
+
if (flow_target == 0)
{
// FIXIT-P Need StreamSplitter::END
return SCAN_DISCARD_PIECE;
}
+ if (flush_segment_min >= 0)
+ {
+ // Flush at the end of the segment unless it would be really small
+ if (flow_target <= octets_seen + length)
+ {
+ num_flush = flow_target - octets_seen;
+ new_section = true;
+ return SCAN_FOUND_PIECE;
+ }
+ else if ((unsigned)flush_segment_min <= octets_seen + length)
+ {
+ num_flush = length;
+ new_section = true;
+ return SCAN_FOUND_PIECE;
+ }
+ else
+ {
+ octets_seen += length;
+ return SCAN_NOTFOUND;
+ }
+ }
+
num_flush = flow_target;
return SCAN_FOUND_PIECE;
}
ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
- HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target, uint32_t)
+ HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target, uint32_t,
+ int32_t flush_segment_min)
{
// Are we skipping through the rest of this chunked body to the trailers and the next message?
const bool discard_mode = (flow_target == 0);
num_flush = length;
return SCAN_DISCARD_PIECE;
}
+
+ if ((flush_segment_min >= 0) && ((unsigned)flush_segment_min <= octets_seen + length))
+ {
+ num_flush = length;
+ data_seen = 0;
+ new_section = true;
+ return SCAN_FOUND_PIECE;
+ }
+
octets_seen += length;
return SCAN_NOTFOUND;
}
virtual ~HttpCutter() = default;
virtual HttpEnums::ScanResult cut(const uint8_t* buffer, uint32_t length,
HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target,
- uint32_t flow_max) = 0;
+ uint32_t flow_max, int32_t flush_segment_min) = 0;
uint32_t get_num_flush() const { return num_flush; }
uint32_t get_octets_seen() const { return octets_seen; }
uint32_t get_num_excess() const { return num_crlf; }
{
public:
HttpEnums::ScanResult cut(const uint8_t* buffer, uint32_t length,
- HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t) override;
+ HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t, int32_t) override;
protected:
enum ValidationResult { V_GOOD, V_BAD, V_TBD };
{
public:
HttpEnums::ScanResult cut(const uint8_t* buffer, uint32_t length,
- HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t) override;
+ HttpInfractions* infractions, HttpEventGen* events, uint32_t, uint32_t, int32_t) override;
uint32_t get_num_head_lines() const override { return num_head_lines; }
private:
explicit HttpBodyClCutter(int64_t expected_length) : remaining(expected_length)
{ assert(remaining > 0); }
HttpEnums::ScanResult cut(const uint8_t*, uint32_t length, HttpInfractions*, HttpEventGen*,
- uint32_t flow_target, uint32_t flow_max) override;
+ uint32_t flow_target, uint32_t flow_max, int32_t flush_segment_min) override;
private:
int64_t remaining;
+ bool new_section = false;
};
class HttpBodyOldCutter : public HttpCutter
{
public:
HttpEnums::ScanResult cut(const uint8_t*, uint32_t, HttpInfractions*, HttpEventGen*,
- uint32_t flow_target, uint32_t) override;
+ uint32_t flow_target, uint32_t, int32_t flush_segment_min) override;
+private:
+ bool new_section = false;
};
class HttpBodyChunkCutter : public HttpCutter
{
public:
HttpEnums::ScanResult cut(const uint8_t* buffer, uint32_t length,
- HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target, uint32_t)
- override;
+ HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target, uint32_t,
+ int32_t flush_segment_min) override;
bool get_is_broken_chunk() const override { return curr_state == HttpEnums::CHUNK_BAD; }
uint32_t get_num_good_chunks() const override { return num_good_chunks; }
static const int GZIP_BLOCK_SIZE = 2048;
static const int FINAL_GZIP_BLOCK_SIZE = 2304; // compromise value, too big causes gzip overruns
// too small leaves too many little end sections
+static const int MIN_AUTOFLUSH_SIZE = 100;
+static const int FLOW_DEPTH_ERROR_MARGIN = 200;
+
static const uint32_t HTTP_GID = 119;
static const int GZIP_WINDOW_BITS = 31;
static const int DEFLATE_WINDOW_BITS = 15;
body_octets[source_id] = STAT_NOT_PRESENT;
section_size_target[source_id] = 0;
section_size_max[source_id] = 0;
+ flush_segment_min[source_id] = -1;
file_depth_remaining[source_id] = STAT_NOT_PRESENT;
detect_depth_remaining[source_id] = STAT_NOT_PRESENT;
detection_status[source_id] = DET_REACTIVATING;
int64_t data_length[2] = { HttpEnums::STAT_NOT_PRESENT, HttpEnums::STAT_NOT_PRESENT };
uint32_t section_size_target[2] = { 0, 0 };
uint32_t section_size_max[2] = { 0, 0 };
+ int32_t flush_segment_min[2] = { -1, -1 };
HttpEnums::CompressId compression[2] = { HttpEnums::CMP_NONE, HttpEnums::CMP_NONE };
z_stream* compress_stream[2] = { nullptr, nullptr };
uint64_t zero_nine_expected = 0;
{
// Don't need any more of the body
session_data->section_size_target[source_id] = 0;
- session_data->section_size_max[source_id] = 0;
return;
}
if (ddr <= 0)
{
+ // We are only splitting to support file processing. That's not sensitive to section
+ // boundaries so we don't need random increments and we don't need to delay processing TCP
+ // segments while we accumulate more data. In addition to flushing when we reach target
+ // size, we also flush at the end of each segment provided it's not an unreasonably small
+ // amount.
session_data->section_size_target[source_id] = target_size;
- session_data->section_size_max[source_id] = max_size;
+ session_data->section_size_max[source_id] = target_size;
+ session_data->flush_segment_min[source_id] = MIN_AUTOFLUSH_SIZE;
}
else if (ddr <= max_size)
{
- session_data->section_size_target[source_id] = ddr + 200;
- session_data->section_size_max[source_id] = ddr + 200;
+ session_data->section_size_target[source_id] = ddr + FLOW_DEPTH_ERROR_MARGIN;
+ session_data->section_size_max[source_id] = ddr + FLOW_DEPTH_ERROR_MARGIN;
}
else
{
const uint32_t max_length = MAX_OCTETS - cutter->get_octets_seen();
const ScanResult cut_result = cutter->cut(data, (length <= max_length) ? length :
max_length, session_data->get_infractions(source_id), session_data->get_events(source_id),
- session_data->section_size_target[source_id], session_data->section_size_max[source_id]);
+ session_data->section_size_target[source_id], session_data->section_size_max[source_id],
+ session_data->flush_segment_min[source_id]);
switch (cut_result)
{
case SCAN_NOTFOUND: