From: Mike Stepanek (mstepane) Date: Fri, 1 Oct 2021 16:57:06 +0000 (+0000) Subject: Merge pull request #3081 in SNORT/snort3 from ~VHORBATO/snort3:rename_norm_ident... X-Git-Tag: 3.1.14.0~8 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4c84efc6e58b938166d71389a3bf8d07b994eecf;p=thirdparty%2Fsnort3.git Merge pull request #3081 in SNORT/snort3 from ~VHORBATO/snort3:rename_norm_ident to master Squashed commit of the following: commit 613865899894440d15e9cb49ba6a76b1cb790688 Author: Vitalii Date: Mon Sep 27 09:49:16 2021 +0300 http_inspect: change format of normalized JS identifiers utils: adjust output streambuffer expanding strategy and reserved memory --- diff --git a/src/service_inspectors/http_inspect/dev_notes.txt b/src/service_inspectors/http_inspect/dev_notes.txt index 707835373..c33a6e024 100755 --- a/src/service_inspectors/http_inspect/dev_notes.txt +++ b/src/service_inspectors/http_inspect/dev_notes.txt @@ -232,9 +232,9 @@ So, the following whitespace codes will be normalized: * Also including new-line and carriage-return line-break characters All JavaScript identifier names will be substituted to unified names with the -following format: a0 -> z9999. So, the number of unique identifiers available -is 260000 names per HTTP transaction. If Normalizer overruns the configured -limit, built-in alert generated. Additionaly, there is a config option to +following format: var_0000 -> var_ffff. So, the number of unique identifiers available +is 65536 names per HTTP transaction. If Normalizer overruns the configured +limit, built-in alert is generated. Additionally, there is a config option to specify the limit manually: * http_inspect.js_norm_identifier_depth. diff --git a/src/service_inspectors/http_inspect/http_module.cc b/src/service_inspectors/http_inspect/http_module.cc index b090fbf64..e92461950 100755 --- a/src/service_inspectors/http_inspect/http_module.cc +++ b/src/service_inspectors/http_inspect/http_module.cc @@ -85,8 +85,8 @@ const Parameter HttpModule::http_params[] = "number of input JavaScript bytes to normalize (-1 unlimited) " "(experimental)" }, - // range of accepted identifier names is (a0:z9999), so the max is 26 * 10000 = 260000 - { "js_norm_identifier_depth", Parameter::PT_INT, "0:260000", "260000", + // range of accepted identifier names is (var_0000:var_ffff), so the max is 2^16 + { "js_norm_identifier_depth", Parameter::PT_INT, "0:65536", "65536", "max number of unique JavaScript identifiers to normalize" }, { "js_norm_max_tmpl_nest", Parameter::PT_INT, "0:255", "32", diff --git a/src/utils/dev_notes.txt b/src/utils/dev_notes.txt index 7ecc6c546..5d5d1f1b7 100644 --- a/src/utils/dev_notes.txt +++ b/src/utils/dev_notes.txt @@ -44,5 +44,5 @@ which could be useful for final consumer. From performance perspective, ostreambuf_infl can reserve an amount of memory before actual operations. Also, memory extending is done by predefined -portions of 2^8^, 2^9^, 2^10^, 2^13^, 2^16^, 2^16^, 2^16^... +portions of 2^11^, 2^12^, 2^13^, 2^14^, 2^15^, 2^15^, 2^15^... This tries to minimize the number of memory reallocation. diff --git a/src/utils/js_identifier_ctx.cc b/src/utils/js_identifier_ctx.cc index 3b682b490..35b2b44dd 100644 --- a/src/utils/js_identifier_ctx.cc +++ b/src/utils/js_identifier_ctx.cc @@ -42,44 +42,43 @@ public: }; #endif // CATCH_TEST_BUILD -#define FIRST_NAME_SIZE 26 -#define LAST_NAME_SIZE 9999 +#define MAX_LAST_NAME 65535 +#define HEX_DIGIT_MASK 15 -static const char s_ident_first_names[FIRST_NAME_SIZE] = +static const char hex_digits[] = { - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' + '0', '1','2','3', '4', '5', '6', '7', '8','9', 'a', 'b', 'c', 'd', 'e', 'f' }; +static inline std::string format_name(int32_t num) +{ + std::string name("var_"); + name.reserve(8); + name.push_back(hex_digits[(num >> 12) & HEX_DIGIT_MASK]); + name.push_back(hex_digits[(num >> 8) & HEX_DIGIT_MASK]); + name.push_back(hex_digits[(num >> 4) & HEX_DIGIT_MASK]); + name.push_back(hex_digits[num & HEX_DIGIT_MASK]); + + return name; +} + const char* JSIdentifierCtx::substitute(const char* identifier) { const auto it = ident_names.find(identifier); if (it != ident_names.end()) return it->second.c_str(); - if (++ident_last_name > LAST_NAME_SIZE) - { - if (++ident_first_name > FIRST_NAME_SIZE - 1) - return nullptr; - - ident_last_name = 0; - } - - if (++unique_ident_cnt > depth) + if (ident_last_name >= depth || ident_last_name > MAX_LAST_NAME) return nullptr; - ident_names[identifier] = s_ident_first_names[ident_first_name] - + std::to_string(ident_last_name); - + ident_names[identifier] = format_name(ident_last_name++); HttpModule::increment_peg_counts(HttpEnums::PEG_JS_IDENTIFIER); return ident_names[identifier].c_str(); } void JSIdentifierCtx::reset() { - ident_first_name = 0; - ident_last_name = -1; - unique_ident_cnt = 0; + ident_last_name = 0; ident_names.clear(); } diff --git a/src/utils/js_identifier_ctx.h b/src/utils/js_identifier_ctx.h index 6a5add294..b69ec8679 100644 --- a/src/utils/js_identifier_ctx.h +++ b/src/utils/js_identifier_ctx.h @@ -46,9 +46,7 @@ public: { return (sizeof(JSIdentifierCtx) + (sizeof(std::string) * 2 * 500)); } private: - int ident_first_name = 0; - int ident_last_name = -1; - int32_t unique_ident_cnt = 0; + int32_t ident_last_name = 0; int32_t depth; std::unordered_map ident_names; diff --git a/src/utils/js_normalizer.cc b/src/utils/js_normalizer.cc index eff85e64e..cca5ed758 100644 --- a/src/utils/js_normalizer.cc +++ b/src/utils/js_normalizer.cc @@ -23,6 +23,8 @@ #include "js_normalizer.h" +#define BUFF_EXP_FACTOR 1.3 + using namespace snort; using namespace std; @@ -67,7 +69,7 @@ JSTokenizer::JSRet JSNormalizer::normalize(const char* src, size_t src_len) in_buf.pubsetbuf(nullptr, 0) ->pubsetbuf(tmp_buf, tmp_buf_size) ->pubsetbuf(const_cast(src), len); - out_buf.reserve(src_len); + out_buf.reserve(src_len * BUFF_EXP_FACTOR); JSTokenizer::JSRet ret = static_cast(tokenizer.yylex()); in.clear(); diff --git a/src/utils/streambuf.cc b/src/utils/streambuf.cc index dee1b939a..1b5e1d1b0 100644 --- a/src/utils/streambuf.cc +++ b/src/utils/streambuf.cc @@ -222,11 +222,11 @@ int istreambuf_glue::underflow() const ostreambuf_infl::State ostreambuf_infl::states[] = { - {states + 1, 1 << 8}, - {states + 2, 1 << 9}, - {states + 3, 1 << 10}, - {states + 4, 1 << 13}, - {states + 4, 1 << 16} + {states + 1, 1 << 11}, + {states + 2, 1 << 12}, + {states + 3, 1 << 13}, + {states + 4, 1 << 14}, + {states + 4, 1 << 15} }; ostreambuf_infl::ostreambuf_infl() : @@ -335,7 +335,7 @@ streamsize ostreambuf_infl::xsputn(const char* s, streamsize n) auto c_avail = epptr() - pptr(); if (n > c_avail) - enlarge(n - c_avail); + gen.n > (n - c_avail) ? enlarge() : enlarge(n - c_avail); auto n_avail = epptr() - pptr(); n = min(n, n_avail); diff --git a/src/utils/test/js_identifier_ctx_test.cc b/src/utils/test/js_identifier_ctx_test.cc index 89f025250..618bb22b9 100644 --- a/src/utils/test/js_identifier_ctx_test.cc +++ b/src/utils/test/js_identifier_ctx_test.cc @@ -25,19 +25,11 @@ #include #include +#include #include "utils/js_identifier_ctx.h" -#define DEPTH 260000 - -#define FIRST_NAME_SIZE 26 -#define LAST_NAME_SIZE 9999 - -static const char s_ident_first_names[FIRST_NAME_SIZE] = -{ - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' -}; +#define DEPTH 65536 TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]") { @@ -45,26 +37,26 @@ TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]") { JSIdentifierCtx ident_ctx(DEPTH); - CHECK(!strcmp(ident_ctx.substitute("a"), "a0")); - CHECK(!strcmp(ident_ctx.substitute("a"), "a0")); + CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000")); + CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000")); } SECTION("different names") { JSIdentifierCtx ident_ctx(DEPTH); - CHECK(!strcmp(ident_ctx.substitute("a"), "a0")); - CHECK(!strcmp(ident_ctx.substitute("b"), "a1")); - CHECK(!strcmp(ident_ctx.substitute("a"), "a0")); + CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000")); + CHECK(!strcmp(ident_ctx.substitute("b"), "var_0001")); + CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000")); } SECTION("depth reached") { JSIdentifierCtx ident_ctx(2); - CHECK(!strcmp(ident_ctx.substitute("a"), "a0")); - CHECK(!strcmp(ident_ctx.substitute("b"), "a1")); + CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000")); + CHECK(!strcmp(ident_ctx.substitute("b"), "var_0001")); CHECK(ident_ctx.substitute("c") == nullptr); CHECK(ident_ctx.substitute("d") == nullptr); - CHECK(!strcmp(ident_ctx.substitute("a"), "a0")); + CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000")); } SECTION("max names") { @@ -77,10 +69,12 @@ TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]") for (int it = 0; it < DEPTH + 2; ++it) n.push_back("n" + std::to_string(it)); - for (int it_first = 0; it_first < FIRST_NAME_SIZE; ++it_first) + for (int it_name = 0; it_name < DEPTH; ++it_name) { - for (int it_last = 0; it_last <= LAST_NAME_SIZE; ++it_last) - e.push_back(s_ident_first_names[it_first] + std::to_string(it_last)); + std::stringstream stream; + stream << std::setfill ('0') << std::setw(4) + << std::hex << it_name; + e.push_back("var_" + stream.str()); } for (int it = 0; it < DEPTH; ++it) diff --git a/src/utils/test/streambuf_test.cc b/src/utils/test/streambuf_test.cc index 65778366e..f3246b4fc 100644 --- a/src/utils/test/streambuf_test.cc +++ b/src/utils/test/streambuf_test.cc @@ -1442,9 +1442,9 @@ TEST_CASE("output buffer - basic", "[Stream buffers]") CHECK(c == 'A'); CHECK(off_b == 0); CHECK(off_c == 1); - CHECK(off_e == 256); + CHECK(off_e == 2048); - EXP_RES(b, exp, 1, 256); + EXP_RES(b, exp, 1, 2048); } SECTION("put two chars") @@ -1463,9 +1463,9 @@ TEST_CASE("output buffer - basic", "[Stream buffers]") CHECK(off_b == 0); CHECK(off_1 == 1); CHECK(off_2 == 2); - CHECK(off_e == 256); + CHECK(off_e == 2048); - EXP_RES(b, exp, 2, 256); + EXP_RES(b, exp, 2, 2048); } SECTION("extend buffer") @@ -1484,9 +1484,9 @@ TEST_CASE("output buffer - basic", "[Stream buffers]") CHECK(c2 == 'Z'); CHECK(off_b == 0); CHECK(off_1 == 1); - CHECK(off_2 == 257); - CHECK(off_e == 256); - CHECK(off_z == 768); + CHECK(off_2 == 2049); + CHECK(off_e == 2048); + CHECK(off_z == 6144); } SECTION("put sequence of chars") @@ -1524,7 +1524,7 @@ TEST_CASE("output buffer - basic", "[Stream buffers]") CHECK(c2 == 'Z'); CHECK(off_b == 0); CHECK(off_c == len + 2); - CHECK(off_e == 4096 + 512); + CHECK(off_e == 4096 + 2048); } }