From 7e5e3885373fed4aa2b83291aa9c49521638dc40 Mon Sep 17 00:00:00 2001 From: "Vitalii Serhiiovych Horbatov -X (vhorbato - SOFTSERVE INC at Cisco)" Date: Wed, 2 Jul 2025 14:28:12 +0000 Subject: [PATCH] Pull request #4794: extractor: modify JSON Formatter to improve performance Merge in SNORT/snort3 from ~VHORBATO/snort3:json_buffer to master Squashed commit of the following: commit 2eb5914c5ba8a144091c072b5cfbacb601635559 Author: vhorbato Date: Tue Jun 17 14:06:03 2025 +0300 extractor: modify JSON Formatter to improve performance --- src/helpers/json_stream.cc | 128 ++++++--- src/helpers/json_stream.h | 6 +- .../extractor/extractor_json_logger.cc | 247 ++++++++++++++---- .../extractor/extractor_json_logger.h | 12 +- 4 files changed, 290 insertions(+), 103 deletions(-) diff --git a/src/helpers/json_stream.cc b/src/helpers/json_stream.cc index aff990182..c2c566044 100644 --- a/src/helpers/json_stream.cc +++ b/src/helpers/json_stream.cc @@ -30,6 +30,73 @@ using namespace snort; +static inline size_t str_esc_pos(const char* str, size_t len) +{ + for (size_t i = 0; i < len; ++i) + { + switch (str[i]) + { + case '\\': + case '\"': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + return i; + default: + if (!isprint(str[i])) + return i; + } + } + + return len; +} + +void escape_json_append(std::string& out, const char* v, size_t len) +{ + if (!v or len == 0) + return; + + size_t pos = str_esc_pos(v, len); + + if (pos != 0) + out.append(v, pos); + + if (pos == len) + return; + + len -= pos; + v += pos; + + out.reserve(out.size() + 2 * len); + + while (len--) + { + const unsigned char c = *v++; + + switch (c) + { + case '\\': out.push_back('\\'); out.push_back('\\'); break; + case '\"': out.push_back('\\'); out.push_back('"'); break; + case '\b': out.push_back('\\'); out.push_back('b'); break; + case '\f': out.push_back('\\'); out.push_back('f'); break; + case '\n': out.push_back('\\'); out.push_back('n'); break; + case '\r': out.push_back('\\'); out.push_back('r'); break; + case '\t': out.push_back('\\'); out.push_back('t'); break; + default: + if (isprint(c)) + out.push_back(c); + else + { + char buf[7]; + std::snprintf(buf, sizeof(buf), "\\u%04x", static_cast(0xFF & c)); + out.append(buf); + } + } + } +} + void JsonStream::open(const char* key) { split(); @@ -121,7 +188,11 @@ void JsonStream::put(const char* key, const char* val) out << std::quoted(key) << ": "; if (val) - put_escaped(val, strlen(val)); + { + std::string escaped; + escape_json_append(escaped, val, strlen(val)); + out << '"' << escaped << '"'; + } else out << "null"; } @@ -136,7 +207,9 @@ void JsonStream::put(const char* key, const std::string& val) if ( key ) out << std::quoted(key) << ": "; - put_escaped(val.c_str(), val.size()); + std::string escaped; + escape_json_append(escaped, val.c_str(), val.size()); + out << '"' << escaped << '"'; } void JsonStream::put(const char* key, double val, int precision) @@ -183,47 +256,6 @@ void JsonStream::put_eol() out << std::endl; } -void JsonStream::put_escaped(const char* v, size_t len) -{ - char* buf = new char[2 * len + 2]; - char* dst = buf; - - *dst++ = '\"'; - - while (len--) - { - char c = *v++; - - switch (c) - { - case '\\': *dst++ = '\\'; *dst++ = '\\'; break; - case '\"': *dst++ = '\\'; *dst++ = '"'; break; - case '\b': *dst++ = '\\'; *dst++ = 'b'; break; - case '\f': *dst++ = '\\'; *dst++ = 'f'; break; - case '\n': *dst++ = '\\'; *dst++ = 'n'; break; - case '\r': *dst++ = '\\'; *dst++ = 'r'; break; - case '\t': *dst++ = '\\'; *dst++ = 't'; break; - default: - if (isprint(c)) - *dst++ = c; - else - { - out.write(buf, dst - buf); - dst = buf; - - std::ios_base::fmtflags flags = out.flags(); - out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << (0xFF & c); - out.flags(flags); - } - } - } - - *dst++ = '\"'; - out.write(buf, dst - buf); - - delete[] buf; -} - #ifdef UNIT_TEST #include @@ -307,5 +339,15 @@ TEST_CASE_METHOD(JsonStreamTest, "escape: empty string", "[Json_Stream]") check_escaping(field, value, len, expected); } +TEST_CASE_METHOD(JsonStreamTest, "escape: no escaping", "[Json_Stream]") +{ + const char* field = "Normal string"; + const char* value = "foobar"; + size_t len = strlen(value); + + std::string expected = "\"Normal string\": \"foobar\""; + check_escaping(field, value, len, expected); +} + #endif diff --git a/src/helpers/json_stream.h b/src/helpers/json_stream.h index 197c0264c..43a354288 100644 --- a/src/helpers/json_stream.h +++ b/src/helpers/json_stream.h @@ -25,8 +25,11 @@ #include #include "main/snort_types.h" +void escape_json_append(std::string& out, const char* v, size_t len); + namespace snort { + class SO_PUBLIC JsonStream { public: @@ -51,9 +54,6 @@ public: void put_eol(); -protected: - void put_escaped(const char* v, size_t len); - private: void split(); diff --git a/src/network_inspectors/extractor/extractor_json_logger.cc b/src/network_inspectors/extractor/extractor_json_logger.cc index 32ae08c38..75c5dc93b 100644 --- a/src/network_inspectors/extractor/extractor_json_logger.cc +++ b/src/network_inspectors/extractor/extractor_json_logger.cc @@ -26,11 +26,27 @@ #include #include +#include "helpers/json_stream.h" #include "utils/util.h" #include "utils/util_cstring.h" +void JsonExtractorLogger::write_key(const char* key) +{ + if (!first_field_written) + first_field_written = true; + else + out_buffer += ", "; + + out_buffer += '"'; + + if (key) + out_buffer += key; + + out_buffer += "\": "; +} + JsonExtractorLogger::JsonExtractorLogger(snort::Connector* conn, TimeType ts_type) - : ExtractorLogger(conn), oss(), js(oss) + : ExtractorLogger(conn), first_field_written(false) { switch (ts_type) { @@ -58,35 +74,47 @@ JsonExtractorLogger::JsonExtractorLogger(snort::Connector* conn, TimeType ts_typ void JsonExtractorLogger::open_record() { - oss.str(""); - js.open(); + out_buffer.clear(); + out_buffer += "{ "; + first_field_written = false; } void JsonExtractorLogger::close_record(const snort::Connector::ID& service_id) { - js.close(); + out_buffer += " }"; - // FIXIT-L: we're removing last character(\n) due to a limitation of - // Json Stream configuration - assert(oss.str()[oss.str().size() - 1] == '\n'); + snort::ConnectorMsg cmsg(reinterpret_cast(out_buffer.c_str()), + out_buffer.length(), false); - output_conn->transmit_message(snort::ConnectorMsg( - (const uint8_t*)oss.str().c_str(), oss.str().size() - 1, false), service_id); + output_conn->transmit_message(cmsg, service_id); } void JsonExtractorLogger::add_field(const char* f, const char* v) { - js.put(f, v); + if (!v or v[0] == '\0') + return; + + write_key(f); + out_buffer += '"'; + escape_json_append(out_buffer, v, std::strlen(v)); + out_buffer += '"'; } void JsonExtractorLogger::add_field(const char* f, const char* v, size_t len) { - js.put(f, {v, len}); + if (!v or v[0] == '\0' or len == 0) + return; + + write_key(f); + out_buffer += '"'; + escape_json_append(out_buffer, v, len); + out_buffer += '"'; } void JsonExtractorLogger::add_field(const char* f, uint64_t v) { - js.uput(f, v); + write_key(f); + out_buffer += std::to_string(v); } void JsonExtractorLogger::add_field(const char* f, const snort::SfIp& v) @@ -94,12 +122,20 @@ void JsonExtractorLogger::add_field(const char* f, const snort::SfIp& v) snort::SfIpString buf; v.ntop(buf); - js.put(f, buf); + + if (buf[0] == '\0') + return; + + write_key(f); + out_buffer += '"'; + out_buffer += buf; + out_buffer += '"'; } void JsonExtractorLogger::add_field(const char* f, bool v) { - v ? js.put_true(f) : js.put_false(f); + write_key(f); + out_buffer += (v ? "true" : "false"); } void JsonExtractorLogger::add_field(const char* f, struct timeval v) @@ -112,7 +148,10 @@ void JsonExtractorLogger::ts_snort(const char* f, const struct timeval& v) char ts[TIMEBUF_SIZE]; snort::ts_print(&v, ts, false); - js.put(f, ts); + write_key(f); + out_buffer += '"'; + out_buffer += ts; + out_buffer += '"'; } void JsonExtractorLogger::ts_snort_yy(const char* f, const struct timeval& v) @@ -120,30 +159,43 @@ void JsonExtractorLogger::ts_snort_yy(const char* f, const struct timeval& v) char ts[TIMEBUF_SIZE]; snort::ts_print(&v, ts, true); - js.put(f, ts); + write_key(f); + out_buffer += '"'; + out_buffer += ts; + out_buffer += '"'; } void JsonExtractorLogger::ts_unix(const char* f, const struct timeval& v) { - double sec = (uint64_t)v.tv_sec; - double usec = (uint64_t)v.tv_usec; + double sec = static_cast(v.tv_sec); + double usec = static_cast(v.tv_usec); + double val = sec + usec / 1000000.0; + + const unsigned precision = 6; + const unsigned buf_size = 20 + 1 + precision + 1; + + char buf[buf_size]; + std::snprintf(buf, sizeof(buf), "%.*f", precision, val); - js.put(f, sec + usec / 1000000.0, 6); + write_key(f); + out_buffer += buf; } void JsonExtractorLogger::ts_sec(const char* f, const struct timeval& v) { - uint64_t sec = (uint64_t)v.tv_sec; + uint64_t sec = static_cast(v.tv_sec); - js.uput(f, sec); + write_key(f); + out_buffer += std::to_string(sec); } void JsonExtractorLogger::ts_usec(const char* f, const struct timeval& v) { - uint64_t sec = (uint64_t)v.tv_sec; - uint64_t usec = (uint64_t)v.tv_usec; + uint64_t sec = static_cast(v.tv_sec); + uint64_t usec = static_cast(v.tv_usec); - js.uput(f, sec * 1000000 + usec); + write_key(f); + out_buffer += std::to_string(sec * 1000000 + usec); } void JsonExtractorLogger::add_field(const char* f, const std::vector& v) @@ -151,11 +203,22 @@ void JsonExtractorLogger::add_field(const char* f, const std::vector& v) @@ -163,11 +226,19 @@ void JsonExtractorLogger::add_field(const char* f, const std::vector& if (v.empty()) return; - js.open_array(f); - for (const auto unum : v) - js.put(nullptr, unum); + write_key(f); + out_buffer += "[ "; + + auto it = v.cbegin(); + out_buffer += std::to_string(*it); + + for (++it; it != v.cend(); ++it) + { + out_buffer += ", "; + out_buffer += std::to_string(*it); + } - js.close_array(); + out_buffer += " ]"; } void JsonExtractorLogger::add_field(const char* f, const std::vector& v) @@ -175,11 +246,16 @@ void JsonExtractorLogger::add_field(const char* f, const std::vector& v) if (v.empty()) return; - js.open_array(f); - for (bool b : v) - b ? js.put_true(nullptr) : js.put_false(nullptr); + write_key(f); + out_buffer += "[ "; - js.close_array(); + auto it = v.cbegin(); + out_buffer += (*it ? "true" : "false"); + + for (++it; it != v.cend(); ++it) + out_buffer += (*it ? ", true" : ", false"); + + out_buffer += " ]"; } #ifdef UNIT_TEST @@ -188,67 +264,134 @@ void JsonExtractorLogger::add_field(const char* f, const std::vector& v) #include "catch/snort_catch.h" +#include "extractor_null_conn.h" + class JsonExtractorLoggerTest : public JsonExtractorLogger { public: - JsonExtractorLoggerTest() : JsonExtractorLogger(nullptr, TimeType::MAX) { } + JsonExtractorLoggerTest() : JsonExtractorLogger(new ExtractorNullConnector, TimeType::MAX) { } + + ~JsonExtractorLoggerTest() override + { delete output_conn; } void check(const char* f, const std::vector& v, const std::string& expected) { - oss.str(std::string()); add_field(f, v); - CHECK(oss.str() == expected); + CHECK(out_buffer == expected); + out_buffer.clear(); } void check(const char* f, const std::vector& v, const std::string& expected) { - oss.str(std::string()); add_field(f, v); - CHECK(oss.str() == expected); + CHECK(out_buffer == expected); + out_buffer.clear(); } void check(const char* f, const std::vector& v, const std::string& expected) { - oss.str(std::string()); add_field(f, v); - CHECK(oss.str() == expected); + CHECK(out_buffer == expected); + out_buffer.clear(); } }; TEST_CASE_METHOD(JsonExtractorLoggerTest, "json vector bool: empty", "[extractor]") { - const std::vector bool_vec = {}; + const std::vector bool_vec = { }; check("bool", bool_vec, ""); } TEST_CASE_METHOD(JsonExtractorLoggerTest, "json vector bool: 3 items", "[extractor]") { - const std::vector bool_vec = {true, false, true}; - check("bool", bool_vec, "\"bool\": [ true, false, true ]\n"); + const std::vector bool_vec = { true, false, true }; + check("bool", bool_vec, "\"bool\": [ true, false, true ]"); } TEST_CASE_METHOD(JsonExtractorLoggerTest, "json vector uint64_t: empty", "[extractor]") { - const std::vector num_vec = {}; + const std::vector num_vec = { }; check("num", num_vec, ""); } TEST_CASE_METHOD(JsonExtractorLoggerTest, "json vector uint64_t: 3 items", "[extractor]") { - const std::vector num_vec = {1,2,3}; - check("num", num_vec, "\"num\": [ 1, 2, 3 ]\n"); + const std::vector num_vec = { 1,2,3 }; + check("num", num_vec, "\"num\": [ 1, 2, 3 ]"); } TEST_CASE_METHOD(JsonExtractorLoggerTest, "json vector str: empty", "[extractor]") { - const std::vector char_vec = {}; + const std::vector char_vec = { }; check("str", char_vec, ""); } TEST_CASE_METHOD(JsonExtractorLoggerTest, "json vector str: 3 items", "[extractor]") { - const std::vector num_vec = {"exe", "pdf", "txt"}; - check("str", num_vec, "\"str\": [ \"exe\", \"pdf\", \"txt\" ]\n"); + const std::vector num_vec = { "exe", "pdf", "txt" }; + check("str", num_vec, "\"str\": [ \"exe\", \"pdf\", \"txt\" ]"); +} + +TEST_CASE_METHOD(JsonExtractorLoggerTest, "json multiple fields", "[extractor]") +{ + add_field("1", "foo"); + add_field("2", "bar", 3); + add_field("3", (uint64_t)123); + add_field("4", true); + add_field("5", false); + + snort::SfIp ip; + ip.pton(AF_INET, "1.1.1.1"); + add_field("6", ip); + + CHECK(out_buffer == "\"1\": \"foo\", \"2\": \"bar\", \"3\": 123, " + "\"4\": true, \"5\": false, \"6\": \"1.1.1.1\""); + out_buffer.clear(); +} + +TEST_CASE_METHOD(JsonExtractorLoggerTest, "json field with empty and null values", "[extractor]") +{ + add_field("1", "foo"); + add_field("2", nullptr); + add_field("3", ""); + add_field("4", "bar"); + CHECK(out_buffer == "\"1\": \"foo\", \"4\": \"bar\""); +} + +TEST_CASE_METHOD(JsonExtractorLoggerTest, "json field with empty and null values with length", "[extractor]") +{ + add_field("1", "foo", 3); + add_field("2", nullptr, 5); + add_field("3", ""); + add_field("4", "not empty", 0); + add_field("5", "bar", 3); + CHECK(out_buffer == "\"1\": \"foo\", \"5\": \"bar\""); +} + +TEST_CASE_METHOD(JsonExtractorLoggerTest, "json field bad ip", "[extractor]") +{ + snort::SfIp ip; + ip.pton(AF_INET, "10.10.10.10"); + add_field("ipv4", ip); + ip = { }; + + ip.pton(AF_INET, "bad ip"); + add_field("bad_ip", ip); + ip = { }; + + ip.pton(AF_INET6, "2001:db8:85a3::8a2e:370:7334"); + add_field("ipv6", ip); + + CHECK(out_buffer == "\"ipv4\": \"10.10.10.10\", " + "\"ipv6\": \"2001:0db8:85a3:0000:0000:8a2e:0370:7334\""); +} + +TEST_CASE_METHOD(JsonExtractorLoggerTest, "json open/close record", "[extractor]") +{ + open_record(); + close_record(snort::Connector::ID(1)); + CHECK(out_buffer == "{ }"); } #endif + diff --git a/src/network_inspectors/extractor/extractor_json_logger.h b/src/network_inspectors/extractor/extractor_json_logger.h index 811d1300a..c07b039bb 100644 --- a/src/network_inspectors/extractor/extractor_json_logger.h +++ b/src/network_inspectors/extractor/extractor_json_logger.h @@ -20,9 +20,9 @@ #ifndef EXTRACTOR_JSON_LOGGER_H #define EXTRACTOR_JSON_LOGGER_H -#include - -#include "helpers/json_stream.h" +#include +#include +#include #include "extractor_logger.h" @@ -46,17 +46,19 @@ public: void close_record(const snort::Connector::ID&) override; protected: - std::ostringstream oss; + std::string out_buffer; private: + void write_key(const char*); + void ts_snort(const char*, const struct timeval&); void ts_snort_yy(const char*, const struct timeval&); void ts_unix(const char*, const struct timeval&); void ts_sec(const char*, const struct timeval&); void ts_usec(const char*, const struct timeval&); - snort::JsonStream js; void (JsonExtractorLogger::*add_ts)(const char*, const struct timeval&); + bool first_field_written = false; }; #endif -- 2.47.3