From: Vitalii Serhiiovych Horbatov -X (vhorbato - SOFTSERVE INC at Cisco) Date: Fri, 14 Feb 2025 11:16:36 +0000 (+0000) Subject: Pull request #4611: extractor: add escaping for special charachers X-Git-Tag: 3.7.1.0~23 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0e296c0eee53bc9c0ca912268b5bb7c8a5d3d501;p=thirdparty%2Fsnort3.git Pull request #4611: extractor: add escaping for special charachers Merge in SNORT/snort3 from ~VHORBATO/snort3:extractor_escape to master Squashed commit of the following: commit b766cfe070915adee99fe84474a07d644020670d Author: vhorbato Date: Mon Feb 3 13:13:16 2025 +0200 extractor: add escaping for special characters --- diff --git a/src/helpers/json_stream.cc b/src/helpers/json_stream.cc index 95b474f67..d1f26f3cc 100644 --- a/src/helpers/json_stream.cc +++ b/src/helpers/json_stream.cc @@ -24,6 +24,8 @@ #include "json_stream.h" #include +#include +#include #include using namespace snort; @@ -119,7 +121,7 @@ void JsonStream::put(const char* key, const char* val) out << std::quoted(key) << ": "; if (val) - out << std::quoted(val); + put_escaped(val, strlen(val)); else out << "null"; } @@ -134,7 +136,7 @@ void JsonStream::put(const char* key, const std::string& val) if ( key ) out << std::quoted(key) << ": "; - out << std::quoted(val); + put_escaped(val.c_str(), val.size()); } void JsonStream::put(const char* key, double val, int precision) @@ -180,3 +182,118 @@ void JsonStream::put_eol() { out << std::endl; } + +void JsonStream::put_escaped(const char* v, size_t len) +{ + char* buf = new char[2 * len + 2]; + char* dst = buf; + + *dst++ = '\"'; + + while (len--) + { + char c = *v++; + + switch (c) + { + case '\\': *dst++ = '\\'; *dst++ = '\\'; break; + case '\"': *dst++ = '\\'; *dst++ = '"'; break; + case '\b': *dst++ = '\\'; *dst++ = 'b'; break; + case '\f': *dst++ = '\\'; *dst++ = 'f'; break; + case '\n': *dst++ = '\\'; *dst++ = 'n'; break; + case '\r': *dst++ = '\\'; *dst++ = 'r'; break; + case '\t': *dst++ = '\\'; *dst++ = 't'; break; + default: + if (isprint(c)) + *dst++ = c; + else + { + out.write(buf, dst - buf); + dst = buf; + out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << (0xFF & c); + } + } + } + + *dst++ = '\"'; + out.write(buf, dst - buf); + + delete[] buf; +} + +#ifdef UNIT_TEST + +#include "catch/snort_catch.h" + +class JsonStreamTest : public JsonStream +{ +public: + JsonStreamTest() : JsonStream(oss), oss() { } + + void check_escaping(const char* f, const char* input, size_t i_len, const std::string& expected) + { + oss.str(std::string()); + put(f, std::string(input, i_len)); + CHECK(oss.str() == expected); + } + +private: + std::ostringstream oss; +}; + +TEST_CASE_METHOD(JsonStreamTest, "escape: special chars", "[Json_Stream]") +{ + const char* field = "Special characters"; + const char* value = "\" \\ \b \f \n \r \t"; + size_t len = strlen(value); + + std::string expected = "\"Special characters\": \"\\\" \\\\ \\b \\f \\n \\r \\t\""; + check_escaping(field, value, len, expected); +} + +TEST_CASE_METHOD(JsonStreamTest, "escape: non printable chars", "[Json_Stream]") +{ + // __STRDUMP_DISABLE__ + const char* field = "Non printable"; + const char* value = "\x01\x02\x03"; + size_t len = strlen(value); + + std::string expected = "\"Non printable\": \"\\u0001\\u0002\\u0003\""; + check_escaping(field, value, len, expected); + // __STRDUMP_ENABLE__ +} + +TEST_CASE_METHOD(JsonStreamTest, "escape: printable chars", "[Json_Stream]") +{ + const char* field = "Printable characters"; + const char* value = "ABC abc 123"; + size_t len = strlen(value); + + std::string expected = "\"Printable characters\": \"ABC abc 123\""; + check_escaping(field, value, len, expected); +} + +TEST_CASE_METHOD(JsonStreamTest, "escape: mixed chars", "[Json_Stream]") +{ + // __STRDUMP_DISABLE__ + const char* field = "Mixed"; + const char* value = "ABC \x01 \" \\ \b \f \n \r \t 123"; + size_t len = strlen(value); + + std::string expected = "\"Mixed\": \"ABC \\u0001 \\\" \\\\ \\b \\f \\n \\r \\t 123\""; + check_escaping(field, value, len, expected); + // __STRDUMP_ENABLE__ +} + +TEST_CASE_METHOD(JsonStreamTest, "escape: empty string", "[Json_Stream]") +{ + const char* field = "Empty string"; + const char* value = ""; + size_t len = strlen(value); + + std::string expected = ""; + check_escaping(field, value, len, expected); +} + +#endif + diff --git a/src/helpers/json_stream.h b/src/helpers/json_stream.h index 2c1c81f94..197c0264c 100644 --- a/src/helpers/json_stream.h +++ b/src/helpers/json_stream.h @@ -51,6 +51,9 @@ public: void put_eol(); +protected: + void put_escaped(const char* v, size_t len); + private: void split(); diff --git a/src/network_inspectors/extractor/extractor_csv_logger.cc b/src/network_inspectors/extractor/extractor_csv_logger.cc index 7d0990ad9..bd7ba9596 100644 --- a/src/network_inspectors/extractor/extractor_csv_logger.cc +++ b/src/network_inspectors/extractor/extractor_csv_logger.cc @@ -24,6 +24,8 @@ #include "extractor_csv_logger.h" #include +#include +#include #include #include @@ -66,13 +68,13 @@ void CsvExtractorLogger::close_record(const Connector::ID& service_id) void CsvExtractorLogger::add_field(const char*, const char* v) { first_write ? []() { first_write = false; } () : buffer.push_back(','); - buffer.append(v); + add_escaped(v, strlen(v)); } void CsvExtractorLogger::add_field(const char*, const char* v, size_t len) { first_write ? []() { first_write = false; } () : buffer.push_back(','); - buffer.append(v, len); + add_escaped(v, len); } void CsvExtractorLogger::add_field(const char*, uint64_t v) @@ -108,3 +110,165 @@ void CsvExtractorLogger::add_field(const char*, bool v) buffer.append(v ? "true" : "false"); } +void CsvExtractorLogger::add_escaped(const char* v, size_t len) +{ + if (!v || len == 0) + return; + + constexpr float escape_resize_factor = 1.2; + + const char* p = v; + const char* end = v + len; + + buffer.reserve(buffer.length() + len * escape_resize_factor); + + bool to_quote = false; + std::vector quote_positions; + + while (p < end) + { + if (*p == '"') + { + to_quote = true; + quote_positions.push_back(p - v); + } + + to_quote = to_quote or *p == ',' or !isprint(*p) or (isblank(*p) and (p == v or p == end - 1)); + + ++p; + } + + if (!to_quote) + { + buffer.append(v, len); + return; + } + + buffer.push_back('"'); + + ptrdiff_t curr_pos = 0; + for (ptrdiff_t quote_pos : quote_positions) + { + assert(quote_pos >= curr_pos); + buffer.append(v + curr_pos, quote_pos - curr_pos); + buffer.push_back('"'); + curr_pos = quote_pos; + } + + buffer.append(v + curr_pos, len - curr_pos); + buffer.push_back('"'); +} + +#ifdef UNIT_TEST + +#include "catch/snort_catch.h" + +class CsvExtractorLoggerTest : public CsvExtractorLogger +{ +public: + CsvExtractorLoggerTest() : CsvExtractorLogger(nullptr) {} + + void check_escaping(const char* input, size_t i_len, const std::string& expected) + { + buffer.clear(); + add_escaped(input, i_len); + CHECK(buffer == expected); + } +}; + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: nullptr", "[extractor]") +{ + check_escaping(nullptr, 1, ""); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: zero len", "[extractor]") +{ + const char* input = ""; + check_escaping(input, 0, ""); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: no special chars", "[extractor]") +{ + const char* input = "simple_text"; + check_escaping(input, strlen(input), "simple_text"); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: comma", "[extractor]") +{ + const char* input = "text,with,commas"; + check_escaping(input, strlen(input), "\"text,with,commas\""); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: newline", "[extractor]") +{ + const char* input = "text\nwith\nnewlines"; + check_escaping(input, strlen(input), "\"text\nwith\nnewlines\""); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: CR", "[extractor]") +{ + const char* input = "text\rwith\rreturns"; + check_escaping(input, strlen(input), "\"text\rwith\rreturns\""); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: whitespaces", "[extractor]") +{ + const char* input = "text with ws"; + check_escaping(input, strlen(input), "text with ws"); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: whitespace at the beginning", "[extractor]") +{ + const char* input = " start_with_ws"; + check_escaping(input, strlen(input), "\" start_with_ws\""); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: whitespace at the end", "[extractor]") +{ + const char* input = "end_with_ws "; + check_escaping(input, strlen(input), "\"end_with_ws \""); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: quotes", "[extractor]") +{ + const char* input = "text\"with\"quotes"; + check_escaping(input, strlen(input), "\"text\"\"with\"\"quotes\""); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: mixed", "[extractor]") +{ + const char* input = "text,with\nmixed\"chars\r"; + check_escaping(input, strlen(input), "\"text,with\nmixed\"\"chars\r\""); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: single quote", "[extractor]") +{ + const char* input = "\""; + check_escaping(input, strlen(input), "\"\"\"\""); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: single comma", "[extractor]") +{ + const char* input = ","; + check_escaping(input, strlen(input), "\",\""); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: single newline", "[extractor]") +{ + const char* input = "\n"; + check_escaping(input, strlen(input), "\"\n\""); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: single CR", "[extractor]") +{ + const char* input = "\r"; + check_escaping(input, strlen(input), "\"\r\""); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: single whitespace", "[extractor]") +{ + const char* input = " "; + check_escaping(input, strlen(input), "\" \""); +} + +#endif diff --git a/src/network_inspectors/extractor/extractor_csv_logger.h b/src/network_inspectors/extractor/extractor_csv_logger.h index 023f1e8fb..4c74fc981 100644 --- a/src/network_inspectors/extractor/extractor_csv_logger.h +++ b/src/network_inspectors/extractor/extractor_csv_logger.h @@ -43,7 +43,9 @@ public: void open_record() override; void close_record(const snort::Connector::ID&) override; -private: +protected: + void add_escaped(const char*, size_t); + std::string buffer; }; diff --git a/src/network_inspectors/extractor/extractor_json_logger.cc b/src/network_inspectors/extractor/extractor_json_logger.cc index 9e16f94e5..cf79c1e25 100644 --- a/src/network_inspectors/extractor/extractor_json_logger.cc +++ b/src/network_inspectors/extractor/extractor_json_logger.cc @@ -53,9 +53,7 @@ void JsonExtractorLogger::add_field(const char* f, const char* v) void JsonExtractorLogger::add_field(const char* f, const char* v, size_t len) { - std::string s(v, len); - - js.put(f, s); + js.put(f, {v, len}); } void JsonExtractorLogger::add_field(const char* f, uint64_t v) diff --git a/src/network_inspectors/extractor/extractor_json_logger.h b/src/network_inspectors/extractor/extractor_json_logger.h index 4db4a4992..282fded65 100644 --- a/src/network_inspectors/extractor/extractor_json_logger.h +++ b/src/network_inspectors/extractor/extractor_json_logger.h @@ -45,7 +45,6 @@ public: private: std::ostringstream oss; snort::JsonStream js; - }; #endif