From: Oleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) Date: Fri, 4 Apr 2025 11:16:25 +0000 (+0000) Subject: Pull request #4687: TSV formatting X-Git-Tag: 3.7.3.0~7 X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=cb7a6fa24c50a601f94b69361e94171fc2baef2c;p=thirdparty%2Fsnort3.git Pull request #4687: TSV formatting Merge in SNORT/snort3 from ~OSHUMEIK/snort3:extr_tsv to master Squashed commit of the following: commit 7139b13db0f2864f003d18e7e1e1ba00398e7883 Author: Oleksii Shumeiko Date: Thu Apr 3 11:56:48 2025 +0300 control: fix types in comparison commit 7c3600f896b812b7dbb5ca262207789bf37ad598 Author: Oleksii Shumeiko Date: Thu Apr 3 11:14:01 2025 +0300 extractor: enable TSV formatting commit e7dde81c4dc9ee3772ea3cea7470ae36b0ade1b9 Author: Oleksii Shumeiko Date: Wed Apr 2 18:03:10 2025 +0300 extractor: add escaping for TSV commit 85df6b89ed7427f0ac72028b56a5cf820a9e0dbc Author: Oleksii Shumeiko Date: Wed Apr 2 18:01:46 2025 +0300 extractor: add configurable delimiter in CSV logger commit 56382b7d389a132523ba183323dc217ebe884031 Author: Oleksii Shumeiko Date: Wed Apr 2 17:12:07 2025 +0300 extractor: simplify CSV logger implementation --- diff --git a/src/control/control_mgmt.cc b/src/control/control_mgmt.cc index c48e5fb44..2d0638788 100644 --- a/src/control/control_mgmt.cc +++ b/src/control/control_mgmt.cc @@ -222,7 +222,7 @@ static bool poll_control_fds(FdEvents ready[MAX_CONTROL_FDS], unsigned& nready) return false; } nready = 0; - for (int i = 0; i < npfds; i++) + for (unsigned i = 0; i < npfds; i++) { struct pollfd* pfd = &pfds[i]; int fd = pfd->fd; diff --git a/src/network_inspectors/extractor/extractor.cc b/src/network_inspectors/extractor/extractor.cc index 7e59f3135..292e2c2c7 100644 --- a/src/network_inspectors/extractor/extractor.cc +++ b/src/network_inspectors/extractor/extractor.cc @@ -67,7 +67,7 @@ static const Parameter extractor_proto_params[] = static const Parameter s_params[] = { - { "formatting", Parameter::PT_ENUM, "csv | json", "csv", + { "formatting", Parameter::PT_ENUM, "csv | tsv | json", "csv", "output format for extractor" }, { "connector", Parameter::PT_STRING, nullptr, nullptr, diff --git a/src/network_inspectors/extractor/extractor_csv_logger.cc b/src/network_inspectors/extractor/extractor_csv_logger.cc index c8d92a506..f7bfe4eac 100644 --- a/src/network_inspectors/extractor/extractor_csv_logger.cc +++ b/src/network_inspectors/extractor/extractor_csv_logger.cc @@ -16,6 +16,7 @@ // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. //-------------------------------------------------------------------------- // extractor_csv_logger.cc author Anna Norokh +// extractor_csv_logger.cc author Cisco #ifdef HAVE_CONFIG_H #include "config.h" @@ -35,10 +36,8 @@ using namespace snort; using namespace std; -static THREAD_LOCAL bool first_write; - -CsvExtractorLogger::CsvExtractorLogger(snort::Connector* conn, TimeType ts_type) - : ExtractorLogger(conn) +CsvExtractorLogger::CsvExtractorLogger(snort::Connector* conn, TimeType ts_type, char delim) + : ExtractorLogger(conn), delimiter(delim) { switch (ts_type) { @@ -73,7 +72,7 @@ void CsvExtractorLogger::add_header(const vector& field_names, cons { header += d; header += n; - d = ','; + d = delimiter; } ConnectorMsg cmsg((const uint8_t*)header.c_str(), header.size(), false); @@ -82,63 +81,66 @@ void CsvExtractorLogger::add_header(const vector& field_names, cons void CsvExtractorLogger::open_record() { - first_write = true; + record.clear(); } void CsvExtractorLogger::close_record(const Connector::ID& service_id) { - ConnectorMsg cmsg((const uint8_t*)buffer.c_str(), buffer.size(), false); - output_conn->transmit_message(cmsg, service_id); + if (record.empty()) + return; - buffer.clear(); + auto data = (const uint8_t*)record.data() + 1; + auto size = record.size() - 1; + ConnectorMsg cmsg(data, size, false); + + output_conn->transmit_message(cmsg, service_id); } void CsvExtractorLogger::add_field(const char*, const char* v) { - first_write ? []() { first_write = false; } () : buffer.push_back(','); + record.push_back(delimiter); add_escaped(v, strlen(v)); } void CsvExtractorLogger::add_field(const char*, const char* v, size_t len) { - first_write ? []() { first_write = false; } () : buffer.push_back(','); + record.push_back(delimiter); add_escaped(v, len); } void CsvExtractorLogger::add_field(const char*, uint64_t v) { - first_write ? []() { first_write = false; } () : buffer.push_back(','); - buffer.append(to_string(v)); + record.push_back(delimiter); + record.append(to_string(v)); } void CsvExtractorLogger::add_field(const char*, const snort::SfIp& v) { - first_write ? []() { first_write = false; } () : buffer.push_back(','); + record.push_back(delimiter); snort::SfIpString buf; v.ntop(buf); - buffer.append(buf); + record.append(buf); } void CsvExtractorLogger::add_field(const char*, bool v) { - first_write ? []() { first_write = false; } () : buffer.push_back(','); + record.push_back(delimiter); - buffer.append(v ? "true" : "false"); + record.append(v ? "true" : "false"); } -void CsvExtractorLogger::add_escaped(const char* v, size_t len) +static void escape_csv_style(string& record, const char* v, size_t len, char delimiter) { - if (!v || len == 0) - return; + assert(v); + assert(len); constexpr float escape_resize_factor = 1.2; - const char* p = v; const char* end = v + len; - buffer.reserve(buffer.length() + len * escape_resize_factor); + record.reserve(record.length() + len * escape_resize_factor); bool to_quote = false; std::vector quote_positions; @@ -151,35 +153,89 @@ void CsvExtractorLogger::add_escaped(const char* v, size_t len) quote_positions.push_back(p - v); } - to_quote = to_quote or *p == ',' or !isprint(*p) or (isblank(*p) and (p == v or p == end - 1)); + to_quote = to_quote or *p == delimiter or !isprint(*p) or (isblank(*p) and (p == v or p == end - 1)); ++p; } if (!to_quote) { - buffer.append(v, len); + record.append(v, len); return; } - buffer.push_back('"'); + record.push_back('"'); ptrdiff_t curr_pos = 0; for (ptrdiff_t quote_pos : quote_positions) { assert(quote_pos >= curr_pos); - buffer.append(v + curr_pos, quote_pos - curr_pos); - buffer.push_back('"'); + record.append(v + curr_pos, quote_pos - curr_pos); + record.push_back('"'); curr_pos = quote_pos; } - buffer.append(v + curr_pos, len - curr_pos); - buffer.push_back('"'); + record.append(v + curr_pos, len - curr_pos); + record.push_back('"'); +} + +static void escape_tsv_style(string& record, const char* v, size_t len, char delimiter) +{ + assert(v); + assert(len); + + const char* p = v - 1; + const char* end = v + len; + bool clean = true; + + while (++p < end and clean) + clean = !(*p == delimiter or *p == '\r' or *p == '\n' or *p == '\\'); + + if (clean) + { + record.append(v, len); + return; + } + + p = v - 1; + end = v + len; + + while (++p < end) + { + if (*p == '\t') + record.append("\\t"); + else if (*p == '\r') + record.append("\\r"); + else if (*p == '\n') + record.append("\\n"); + else if (*p == '\\') + record.append("\\\\"); + else + record.push_back(*p); + + assert(delimiter == '\t'); + } +} + +void CsvExtractorLogger::add_escaped(const char* v, size_t len) +{ + bool visible = isprint(delimiter); + + if (!v || len == 0) + { + if (!visible) + record.append("-"); + return; + } + + return visible + ? escape_csv_style(record, v, len, delimiter) + : escape_tsv_style(record, v, len, delimiter); } void CsvExtractorLogger::add_field(const char*, struct timeval v) { - first_write ? []() { first_write = false; } () : buffer.push_back(','); + record.push_back(delimiter); (this->*add_ts)(v); } @@ -188,7 +244,7 @@ void CsvExtractorLogger::ts_snort(const struct timeval& v) char ts[TIMEBUF_SIZE]; ts_print(&v, ts, false); - buffer.append(ts); + record.append(ts); } void CsvExtractorLogger::ts_snort_yy(const struct timeval& v) @@ -196,7 +252,7 @@ void CsvExtractorLogger::ts_snort_yy(const struct timeval& v) char ts[TIMEBUF_SIZE]; ts_print(&v, ts, true); - buffer.append(ts); + record.append(ts); } void CsvExtractorLogger::ts_unix(const struct timeval& v) @@ -204,14 +260,14 @@ void CsvExtractorLogger::ts_unix(const struct timeval& v) char ts[numeric_limits::digits10 + 8]; snort::SnortSnprintf(ts, sizeof(ts), "%" PRIu64 ".%06d", (uint64_t)v.tv_sec, (unsigned)v.tv_usec); - buffer.append(ts); + record.append(ts); } void CsvExtractorLogger::ts_sec(const struct timeval& v) { uint64_t sec = (uint64_t)v.tv_sec; - buffer.append(to_string(sec)); + record.append(to_string(sec)); } void CsvExtractorLogger::ts_usec(const struct timeval& v) @@ -219,119 +275,164 @@ void CsvExtractorLogger::ts_usec(const struct timeval& v) uint64_t sec = (uint64_t)v.tv_sec; uint64_t usec = (uint64_t)v.tv_usec; - buffer.append(to_string(sec * 1000000 + usec)); + record.append(to_string(sec * 1000000 + usec)); } #ifdef UNIT_TEST #include "catch/snort_catch.h" -class CsvExtractorLoggerTest : public CsvExtractorLogger +class CsvExtractorLoggerHelper : public CsvExtractorLogger { public: - CsvExtractorLoggerTest() : CsvExtractorLogger(nullptr, TimeType::MAX) {} + CsvExtractorLoggerHelper(char delimiter) : CsvExtractorLogger(nullptr, TimeType::MAX, delimiter) {} - void check_escaping(const char* input, size_t i_len, const std::string& expected) + void check(const char* input, size_t i_len, const std::string& expected) { - buffer.clear(); + record.clear(); add_escaped(input, i_len); - CHECK(buffer == expected); + CHECK(record == expected); } }; +class CsvExtractorLoggerTest +{ +public: + + void check_csv(const char* input, size_t i_len, const std::string& expected) + { csv.check(input, i_len, expected); } + + void check_tsv(const char* input, size_t i_len, const std::string& expected) + { tsv.check(input, i_len, expected); } + +private: + CsvExtractorLoggerHelper csv{','}; + CsvExtractorLoggerHelper tsv{'\t'}; +}; + TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: nullptr", "[extractor]") { - check_escaping(nullptr, 1, ""); + check_csv(nullptr, 1, ""); + check_tsv(nullptr, 1, "-"); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: zero len", "[extractor]") { const char* input = ""; - check_escaping(input, 0, ""); + check_csv(input, 0, ""); + check_tsv(input, 0, "-"); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: no special chars", "[extractor]") { const char* input = "simple_text"; - check_escaping(input, strlen(input), "simple_text"); + check_csv(input, strlen(input), "simple_text"); + check_tsv(input, strlen(input), "simple_text"); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: comma", "[extractor]") { const char* input = "text,with,commas"; - check_escaping(input, strlen(input), "\"text,with,commas\""); + check_csv(input, strlen(input), "\"text,with,commas\""); + check_tsv(input, strlen(input), "text,with,commas"); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: tab", "[extractor]") +{ + const char* input = "text\t with\t tabs"; + check_csv(input, strlen(input), "\"text\t with\t tabs\""); + check_tsv(input, strlen(input), "text\\t with\\t tabs"); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: newline", "[extractor]") { - const char* input = "text\nwith\nnewlines"; - check_escaping(input, strlen(input), "\"text\nwith\nnewlines\""); + const char* input = "text\n with\n newlines"; + check_csv(input, strlen(input), "\"text\n with\n newlines\""); + check_tsv(input, strlen(input), "text\\n with\\n newlines"); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: CR", "[extractor]") { - const char* input = "text\rwith\rreturns"; - check_escaping(input, strlen(input), "\"text\rwith\rreturns\""); + const char* input = "text\r with\r returns"; + check_csv(input, strlen(input), "\"text\r with\r returns\""); + check_tsv(input, strlen(input), "text\\r with\\r returns"); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: whitespaces", "[extractor]") { const char* input = "text with ws"; - check_escaping(input, strlen(input), "text with ws"); + check_csv(input, strlen(input), "text with ws"); + check_tsv(input, strlen(input), "text with ws"); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: whitespace at the beginning", "[extractor]") { const char* input = " start_with_ws"; - check_escaping(input, strlen(input), "\" start_with_ws\""); + check_csv(input, strlen(input), "\" start_with_ws\""); + check_tsv(input, strlen(input), " start_with_ws"); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: whitespace at the end", "[extractor]") { const char* input = "end_with_ws "; - check_escaping(input, strlen(input), "\"end_with_ws \""); + check_csv(input, strlen(input), "\"end_with_ws \""); + check_tsv(input, strlen(input), "end_with_ws "); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: quotes", "[extractor]") { const char* input = "text\"with\"quotes"; - check_escaping(input, strlen(input), "\"text\"\"with\"\"quotes\""); + check_csv(input, strlen(input), "\"text\"\"with\"\"quotes\""); + check_tsv(input, strlen(input), "text\"with\"quotes"); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: mixed", "[extractor]") { - const char* input = "text,with\nmixed\"chars\r"; - check_escaping(input, strlen(input), "\"text,with\nmixed\"\"chars\r\""); + const char* input = "text,with\n mixed\"chars\r"; + check_csv(input, strlen(input), "\"text,with\n mixed\"\"chars\r\""); + check_tsv(input, strlen(input), "text,with\\n mixed\"chars\\r"); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: single quote", "[extractor]") { const char* input = "\""; - check_escaping(input, strlen(input), "\"\"\"\""); + check_csv(input, strlen(input), "\"\"\"\""); + check_tsv(input, strlen(input), "\""); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: single comma", "[extractor]") { const char* input = ","; - check_escaping(input, strlen(input), "\",\""); + check_csv(input, strlen(input), "\",\""); + check_tsv(input, strlen(input), ","); +} + +TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: single tab", "[extractor]") +{ + const char* input = "\t"; + check_csv(input, strlen(input), "\"\t\""); + check_tsv(input, strlen(input), "\\t"); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: single newline", "[extractor]") { const char* input = "\n"; - check_escaping(input, strlen(input), "\"\n\""); + check_csv(input, strlen(input), "\"\n\""); + check_tsv(input, strlen(input), "\\n"); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: single CR", "[extractor]") { const char* input = "\r"; - check_escaping(input, strlen(input), "\"\r\""); + check_csv(input, strlen(input), "\"\r\""); + check_tsv(input, strlen(input), "\\r"); } TEST_CASE_METHOD(CsvExtractorLoggerTest, "escape: single whitespace", "[extractor]") { const char* input = " "; - check_escaping(input, strlen(input), "\" \""); + check_csv(input, strlen(input), "\" \""); + check_tsv(input, strlen(input), " "); } #endif diff --git a/src/network_inspectors/extractor/extractor_csv_logger.h b/src/network_inspectors/extractor/extractor_csv_logger.h index 655686ac0..4b1fa942a 100644 --- a/src/network_inspectors/extractor/extractor_csv_logger.h +++ b/src/network_inspectors/extractor/extractor_csv_logger.h @@ -25,7 +25,7 @@ class CsvExtractorLogger : public ExtractorLogger { public: - CsvExtractorLogger(snort::Connector*, TimeType); + CsvExtractorLogger(snort::Connector*, TimeType, char delimiter = ','); virtual bool is_strict() const override { return true; } @@ -48,7 +48,8 @@ protected: void ts_sec(const struct timeval&); void ts_usec(const struct timeval&); - std::string buffer; + std::string record; + const char delimiter; void (CsvExtractorLogger::*add_ts)(const struct timeval&); }; diff --git a/src/network_inspectors/extractor/extractor_enums.h b/src/network_inspectors/extractor/extractor_enums.h index 566cb057a..3b2d99118 100644 --- a/src/network_inspectors/extractor/extractor_enums.h +++ b/src/network_inspectors/extractor/extractor_enums.h @@ -71,6 +71,7 @@ public: enum Value : uint8_t { CSV, + TSV, JSON, MAX }; @@ -88,6 +89,8 @@ public: { case CSV: return "csv"; + case TSV: + return "tsv"; case JSON: return "json"; case MAX: // fallthrough diff --git a/src/network_inspectors/extractor/extractor_logger.cc b/src/network_inspectors/extractor/extractor_logger.cc index b32ce1ac5..6ea4fa209 100644 --- a/src/network_inspectors/extractor/extractor_logger.cc +++ b/src/network_inspectors/extractor/extractor_logger.cc @@ -76,6 +76,9 @@ ExtractorLogger* ExtractorLogger::make_logger(FormatType f_type, const std::stri case FormatType::CSV: logger = new CsvExtractorLogger(output_conn, ts_type); break; + case FormatType::TSV: + logger = new CsvExtractorLogger(output_conn, ts_type, '\t'); + break; case FormatType::JSON: logger = new JsonExtractorLogger(output_conn, ts_type); break; @@ -100,10 +103,12 @@ TEST_CASE("Format Type", "[extractor]") SECTION("to string") { FormatType csv = FormatType::CSV; + FormatType tsv = FormatType::TSV; FormatType json = FormatType::JSON; FormatType max = FormatType::MAX; CHECK_FALSE(strcmp("csv", csv.c_str())); + CHECK_FALSE(strcmp("tsv", tsv.c_str())); CHECK_FALSE(strcmp("json", json.c_str())); CHECK_FALSE(strcmp("(not set)", max.c_str())); }