From: Mike Stepanek (mstepane) Date: Mon, 16 May 2022 16:42:12 +0000 (+0000) Subject: Pull request #3422: JS Normalizer: regex char groups parsing X-Git-Tag: 3.1.30.0~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=07d372f8b18e95d1a59c204bc33060380aa2491e;p=thirdparty%2Fsnort3.git Pull request #3422: JS Normalizer: regex char groups parsing Merge in SNORT/snort3 from ~OSHUMEIK/snort3:js_regex_char_set to master Squashed commit of the following: commit bfac8f0bb9e69f89c289ab39b53b096d3b515219 Author: Oleksii Shumeiko Date: Fri May 13 16:38:36 2022 +0300 utils: fix regex char classes parsing Inside a character set only few characters retain a special meaning. --- diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l index cf6f02de0..c690ff401 100644 --- a/src/utils/js_tokenizer.l +++ b/src/utils/js_tokenizer.l @@ -2300,30 +2300,63 @@ JSTokenizer::JSRet JSTokenizer::literal_regex_g_open() if (regex_stack.size() >= max_template_nesting) return TEMPLATE_NESTING_OVERFLOW; - regex_stack.push(yytext[0]); + // special meaning only outside of a character class + if (regex_stack.empty() or regex_stack.top() != '[') + regex_stack.push(yytext[0]); + ECHO; return EOS; } JSTokenizer::JSRet JSTokenizer::literal_regex_g_close() { + char c_close = yytext[0]; + if (regex_stack.empty()) { + // a raw bracket is allowed in regex w/o unicode flag, + // but the parser will accept a bracket in regex with unicode flag + if (c_close == ']') + { + ECHO; + return EOS; + } + debug_logf(5, http_trace, TRACE_JS_PROC, nullptr, "no group to close, .. %c\n", yytext[0]); return BAD_TOKEN; } - char c = yytext[0]; - char o = regex_stack.top(); - char d = o == '(' ? 1 : 2; + char c_open = regex_stack.top(); + bool mismatch = false; - regex_stack.pop(); + switch (c_open) + { + case '(': + mismatch = c_close != ')'; + regex_stack.pop(); + break; + + case '[': + // only the closing bracket has an effect in a character set + if (c_close == ']') + regex_stack.pop(); + break; + + case '{': + mismatch = c_close != '}'; + regex_stack.pop(); + break; + + default: + assert(false); + mismatch = true; + } - if (o + d != c) + if (mismatch) { debug_logf(5, http_trace, TRACE_JS_PROC, nullptr, - "closing symbol mismatch, %c .. %c\n", o, c); + "closing symbol mismatch, %c .. %c\n", c_open, c_close); return BAD_TOKEN; } diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc index 8c5d7b9f2..7ea5ec734 100644 --- a/src/utils/test/js_normalizer_test.cc +++ b/src/utils/test/js_normalizer_test.cc @@ -673,11 +673,17 @@ static const char all_patterns_expected8[] = static const char all_patterns_buf9[] = "var r = /^(?:(?:https?|mailto|ftp):|[^:/?#]*(?:[/?#]|$))/i;" - "new Lb(function(a){return /^[^:]*([/?#]|$)/.test(a)})"; + "new Lb(function(a){return /^[^:]*([/?#]|$)/.test(a)});" + "pa=/^((https:)?\\/\\/[0-9a-z.:[\\]-]+\\/|\\/[^/\\\\]|" + "[^:/\\\\%]+\\/|[^:/\\\\%]*[?#]|about:blank#)/i;" + "/[/ a b c / 1]/ a b c / 1;"; static const char all_patterns_expected9[] = "var r=/^(?:(?:https?|mailto|ftp):|[^:/?#]*(?:[/?#]|$))/i;" - "new Lb(function(a){return /^[^:]*([/?#]|$)/.test(a)})"; + "new Lb(function(a){return /^[^:]*([/?#]|$)/.test(a)});" + "pa=/^((https:)?\\/\\/[0-9a-z.:[\\]-]+\\/|\\/[^/\\\\]" + "|[^:/\\\\%]+\\/|[^:/\\\\%]*[?#]|about:blank#)/i;" + "/[/ a b c / 1]/ a b c/1;"; TEST_CASE("all patterns", "[JSNormalizer]") { @@ -1104,12 +1110,12 @@ static const char syntax_cases_expected23[] = "`${`${`${`${`"; static const char syntax_cases_buf24[] = - "var a=/[[[[/]]]]/;" - "var b=/[[[[[/]]]]]/;"; + "var a=/{{{{/}}}}/;" + "var b=/{{{{{/}}}}}/;"; static const char syntax_cases_expected24[] = - "var a=/[[[[/]]]]/;" - "var b=/[[[["; + "var a=/{{{{/}}}}/;" + "var b=/{{{{"; static const char syntax_cases_buf25[] = "return /regex0/.foo + /regex1/.bar ;" @@ -3421,14 +3427,17 @@ TEST_CASE("scope regex groups", "[JSNormalizer]") { const char dat1[] = "a=/[]/;"; const char dat2[] = "b=/[][][]/;"; - const char dat3[] = "c=/[[[]]]/;"; + const char dat3[] = "c=/[[[[[]/;"; + const char dat4[] = "d=/[/]/;"; const char exp1[] = "a=/[]/;"; const char exp2[] = "b=/[][][]/;"; - const char exp3[] = "c=/[[[]]]/;"; + const char exp3[] = "c=/[[[[[]/;"; + const char exp4[] = "d=/[/]/;"; NORMALIZE_1(dat1, exp1); NORMALIZE_1(dat2, exp2); NORMALIZE_1(dat3, exp3); + NORMALIZE_1(dat4, exp4); } SECTION("mix of brackets") { @@ -3445,42 +3454,45 @@ TEST_CASE("scope regex groups", "[JSNormalizer]") } SECTION("parentheses - wrong closing symbol") { - const char dat1[] = "/({[ (} ]})/"; - const char dat2[] = "/({[ (] ]})/"; - const char exp1[] = "/({[ ("; - const char exp2[] = "/({[ ("; + const char dat1[] = "/({ (} })/"; + const char dat2[] = "/({ (] })/"; + const char exp1[] = "/({ ("; + const char exp2[] = "/({ ("; NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN); NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN); } SECTION("curly braces - wrong closing symbol") { - const char dat1[] = "/({[ {) ]})/"; - const char dat2[] = "/({[ {] ]})/"; - const char exp1[] = "/({[ {"; - const char exp2[] = "/({[ {"; + const char dat1[] = "/({ {) })/"; + const char dat2[] = "/({ {] })/"; + const char exp1[] = "/({ {"; + const char exp2[] = "/({ {"; NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN); NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN); } - SECTION("square brackets - wrong closing symbol") + SECTION("square brackets - raw bracket") { - const char dat1[] = "/([{ [) }])/"; - const char dat2[] = "/([{ [} }])/"; - const char exp1[] = "/([{ ["; - const char exp2[] = "/([{ ["; + const char dat1[] = "/]/"; + const char dat2[] = "/[]]/"; + const char dat3[] = "/][]]/g"; + const char exp1[] = "/]/"; + const char exp2[] = "/[]]/"; + const char exp3[] = "/][]]/g"; - NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN); - NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN); + NORMALIZE_1(dat1, exp1); + NORMALIZE_1(dat2, exp2); + NORMALIZE_1(dat3, exp3); } SECTION("parentheses - mismatch") { const char dat1[] = "/)/"; const char dat2[] = "/())/"; - const char dat3[] = "/({[ ()) ]})/"; + const char dat3[] = "/({{ ()) }})/"; const char exp1[] = "/"; const char exp2[] = "/()"; - const char exp3[] = "/({[ ()"; + const char exp3[] = "/({{ ()"; NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN); NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN); @@ -3490,23 +3502,10 @@ TEST_CASE("scope regex groups", "[JSNormalizer]") { const char dat1[] = "/}/"; const char dat2[] = "/{}}/"; - const char dat3[] = "/({[ {}} ]})/"; + const char dat3[] = "/({( {}} )})/"; const char exp1[] = "/"; const char exp2[] = "/{}"; - const char exp3[] = "/({[ {}"; - - NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN); - NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN); - NORM_BAD_1(dat3, exp3, JSTokenizer::BAD_TOKEN); - } - SECTION("square brackets - mismatch") - { - const char dat1[] = "/]/"; - const char dat2[] = "/[]]/"; - const char dat3[] = "/([{ []] }])/"; - const char exp1[] = "/"; - const char exp2[] = "/[]"; - const char exp3[] = "/([{ []"; + const char exp3[] = "/({( {}"; NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN); NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN); @@ -3537,10 +3536,10 @@ TEST_CASE("scope regex groups", "[JSNormalizer]") SECTION("square brackets - continuation") { const char dat1[] = "/[["; - const char dat2[] = "]]/"; + const char dat2[] = "[]/"; const char exp1[] = "/[["; - const char exp2[] = "]]/"; - const char exp[] = "/[[]]/"; + const char exp2[] = "[]/"; + const char exp[] = "/[[[]/"; NORMALIZE_2(dat1, dat2, exp1, exp2); NORM_COMBINED_2(dat1, dat2, exp); @@ -3564,17 +3563,6 @@ TEST_CASE("scope regex groups", "[JSNormalizer]") const char exp2[] = "}"; const char exp[] = "/{}"; - NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::BAD_TOKEN); - NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::BAD_TOKEN); - } - SECTION("square brackets - mismatch in continuation") - { - const char dat1[] = "/["; - const char dat2[] = "]]/"; - const char exp1[] = "/["; - const char exp2[] = "]"; - const char exp[] = "/[]"; - NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::BAD_TOKEN); NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::BAD_TOKEN); }