From: Mike Stepanek (mstepane) Date: Mon, 13 Jun 2022 10:39:20 +0000 (+0000) Subject: Pull request #3464: JS Normalizer: fix regex literal parsing X-Git-Tag: 3.1.32.0~7 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4f99d5d34cb26bc000aa0f8547e6960e17dd6fae;p=thirdparty%2Fsnort3.git Pull request #3464: JS Normalizer: fix regex literal parsing Merge in SNORT/snort3 from ~OSHUMEIK/snort3:js_regex_fix to master Squashed commit of the following: commit a819e45513bfdde092a859b5f0234e706e3c15a7 Author: Oleksii Shumeiko Date: Thu Jun 9 15:03:19 2022 +0300 utils: remove redundant checks in regex groups In regex literal a group and a character class do not intersect. commit 70ede6db27e10957b7464587734e54502676c597 Author: Oleksii Shumeiko Date: Thu Jun 9 13:35:30 2022 +0300 utils: remove curly brace parsing from regex literals Curly braces inside a regex literal are not a point of interest, since they don't form a class or a group. --- diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l index ae3a832d0..a78cc7e8b 100644 --- a/src/utils/js_tokenizer.l +++ b/src/utils/js_tokenizer.l @@ -1008,10 +1008,10 @@ LITERAL_TEMPLATE_TEXT [^\\%<$`]{1,32}|. LITERAL_REGEX_START \/[^*\/] LITERAL_REGEX_END \/[gimsuy]* -LITERAL_REGEX_SKIP \\\/|\\\\|\\\(|\\\)|\\\[|\\\]|\\\{|\\\} -LITERAL_REGEX_TEXT [^\\<\xA\xD\xE2/\(\)\[\]\{\}]{1,32}|. -LITERAL_REGEX_G_OPEN \(|\[|\{ -LITERAL_REGEX_G_CLOSE \)|\]|\} +LITERAL_REGEX_SKIP \\\/|\\\\|\\\(|\\\)|\\\[|\\\] +LITERAL_REGEX_TEXT [^\\<\xA\xD\xE2/\(\)\[\]]{1,32}|. +LITERAL_REGEX_G_OPEN \(|\[ +LITERAL_REGEX_G_CLOSE \)|\] /* extra literals */ /* according to https://ecma-international.org/ecma-262/5.1/#sec-4.3 */ LITERAL_UNDEFINED undefined @@ -2332,54 +2332,39 @@ JSTokenizer::JSRet JSTokenizer::literal_regex_g_open() JSTokenizer::JSRet JSTokenizer::literal_regex_g_close() { - char c_close = yytext[0]; - - if (regex_stack.empty()) + switch (yytext[0]) { - // a raw bracket is allowed in regex w/o unicode flag, - // but the parser will accept a bracket in regex with unicode flag - if (c_close == ']') + case ')': + if (regex_stack.empty()) { - ECHO; - return EOS; + debug_logf(5, http_trace, TRACE_JS_PROC, nullptr, + "no group to close, .. %c\n", yytext[0]); + return BAD_TOKEN; } + else if (regex_stack.top() == '(') + regex_stack.pop(); + else + assert(regex_stack.top() == '['); - debug_logf(5, http_trace, TRACE_JS_PROC, nullptr, - "no group to close, .. %c\n", yytext[0]); - return BAD_TOKEN; - } - - char c_open = regex_stack.top(); - bool mismatch = false; - - switch (c_open) - { - case '(': - mismatch = c_close != ')'; - regex_stack.pop(); break; - case '[': - // only the closing bracket has an effect in a character set - if (c_close == ']') + case ']': + if (regex_stack.empty()) + { + // a raw bracket is allowed in regex w/o unicode flag, + // but the parser will accept a bracket in regex with unicode flag + ECHO; + return EOS; + } + else if (regex_stack.top() == '[') regex_stack.pop(); - break; + else + assert(regex_stack.top() == '('); - case '{': - mismatch = c_close != '}'; - regex_stack.pop(); break; default: assert(false); - mismatch = true; - } - - if (mismatch) - { - debug_logf(5, http_trace, TRACE_JS_PROC, nullptr, - "closing symbol mismatch, %c .. %c\n", c_open, c_close); - return BAD_TOKEN; } ECHO; diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc index c35144add..5449a3748 100644 --- a/src/utils/test/js_normalizer_test.cc +++ b/src/utils/test/js_normalizer_test.cc @@ -685,6 +685,14 @@ static const char all_patterns_expected9[] = "|[^:/\\\\%]+\\/|[^:/\\\\%]*[?#]|about:blank#)/i;" "/[/ a b c / 1]/ a b c/1;"; +static const char all_patterns_buf10[] = + "function(a){if(!/^\\s*{/.test(a))return!1;a=_.xf(a);return null!==a&&\"object\"===typeof a&&!!a.g};" + "/^\\s*$/.test(Q)?0:/^[\\],:{}]*$/.replace(/(?=:|,|]|}|$)/g,z)"; + +static const char all_patterns_expected10[] = + "function(a){if(!/^\\s*{/.test(a))return!1;a=_.xf(a);return null!==a&&\"object\"===typeof a&&!!a.g};" + "/^\\s*$/.test(Q)?0:/^[\\],:{}]*$/.replace(/(?=:|,|]|}|$)/g,z)"; + TEST_CASE("all patterns", "[JSNormalizer]") { SECTION("whitespaces and special characters") @@ -790,6 +798,11 @@ TEST_CASE("all patterns", "[JSNormalizer]") NORMALIZE(all_patterns_buf9); VALIDATE(all_patterns_buf9, all_patterns_expected9); } + SECTION("regex literal with curly brace") + { + NORMALIZE(all_patterns_buf10); + VALIDATE(all_patterns_buf10, all_patterns_expected10); + } } // Test vectors for different syntax cases @@ -1110,12 +1123,12 @@ static const char syntax_cases_expected23[] = "`${`${`${`${`"; static const char syntax_cases_buf24[] = - "var a=/{{{{/}}}}/;" - "var b=/{{{{{/}}}}}/;"; + "var a=/((((/))))/;" + "var b=/(((((/)))))/;"; static const char syntax_cases_expected24[] = - "var a=/{{{{/}}}}/;" - "var b=/{{{{"; + "var a=/((((/))))/;" + "var b=/(((("; static const char syntax_cases_buf25[] = "return /regex0/.foo + /regex1/.bar ;" @@ -3561,26 +3574,6 @@ TEST_CASE("scope regex groups", "[JSNormalizer]") NORMALIZE_1(dat2, exp2); NORMALIZE_1(dat3, exp3); } - SECTION("parentheses - wrong closing symbol") - { - const char dat1[] = "/({ (} })/"; - const char dat2[] = "/({ (] })/"; - const char exp1[] = "/({ ("; - const char exp2[] = "/({ ("; - - NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN); - NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN); - } - SECTION("curly braces - wrong closing symbol") - { - const char dat1[] = "/({ {) })/"; - const char dat2[] = "/({ {] })/"; - const char exp1[] = "/({ {"; - const char exp2[] = "/({ {"; - - NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN); - NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN); - } SECTION("square brackets - raw bracket") { const char dat1[] = "/]/"; @@ -3598,23 +3591,10 @@ TEST_CASE("scope regex groups", "[JSNormalizer]") { const char dat1[] = "/)/"; const char dat2[] = "/())/"; - const char dat3[] = "/({{ ()) }})/"; + const char dat3[] = "/( ()) )/"; const char exp1[] = "/"; const char exp2[] = "/()"; - const char exp3[] = "/({{ ()"; - - NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN); - NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN); - NORM_BAD_1(dat3, exp3, JSTokenizer::BAD_TOKEN); - } - SECTION("curly braces - mismatch") - { - const char dat1[] = "/}/"; - const char dat2[] = "/{}}/"; - const char dat3[] = "/({( {}} )})/"; - const char exp1[] = "/"; - const char exp2[] = "/{}"; - const char exp3[] = "/({( {}"; + const char exp3[] = "/( ()) "; NORM_BAD_1(dat1, exp1, JSTokenizer::BAD_TOKEN); NORM_BAD_1(dat2, exp2, JSTokenizer::BAD_TOKEN); @@ -3631,17 +3611,6 @@ TEST_CASE("scope regex groups", "[JSNormalizer]") NORMALIZE_2(dat1, dat2, exp1, exp2); NORM_COMBINED_2(dat1, dat2, exp); } - SECTION("curly braces - continuation") - { - const char dat1[] = "/{{"; - const char dat2[] = "}}/"; - const char exp1[] = "/{{"; - const char exp2[] = "}}/"; - const char exp[] = "/{{}}/"; - - NORMALIZE_2(dat1, dat2, exp1, exp2); - NORM_COMBINED_2(dat1, dat2, exp); - } SECTION("square brackets - continuation") { const char dat1[] = "/[["; @@ -3661,17 +3630,6 @@ TEST_CASE("scope regex groups", "[JSNormalizer]") const char exp2[] = ")"; const char exp[] = "/()"; - NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::BAD_TOKEN); - NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::BAD_TOKEN); - } - SECTION("curly braces - mismatch in continuation") - { - const char dat1[] = "/{"; - const char dat2[] = "}}/"; - const char exp1[] = "/{"; - const char exp2[] = "}"; - const char exp[] = "/{}"; - NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::BAD_TOKEN); NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::BAD_TOKEN); }