From: Oleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) Date: Fri, 15 Jul 2022 19:02:01 +0000 (+0000) Subject: Pull request #3518: utils: fix Unicode LS PS handling in JavaScript X-Git-Tag: 3.1.37.0~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ee51c95dda95af29f50df133d52a09e250b812d1;p=thirdparty%2Fsnort3.git Pull request #3518: utils: fix Unicode LS PS handling in JavaScript Merge in SNORT/snort3 from ~OSERHIIE/snort3:js_fix_lsps to master Squashed commit of the following: commit 0a5bd2f42ba011e233b4e4cef21e7530f005b97f Author: Oleksandr Serhiienko Date: Thu Jul 14 13:58:19 2022 +0300 utils: fix Unicode LS PS handling in JavaScript --- diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l index 45a4bcb87..d5e06b373 100644 --- a/src/utils/js_tokenizer.l +++ b/src/utils/js_tokenizer.l @@ -1110,7 +1110,8 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 \\{CR}{LF} { /* skip */ } \\{LF} { /* skip */ } \\{CR} { /* skip */ } -{LINE_TERMINATORS} { BEGIN(regst); RETURN(BAD_TOKEN) } +{CR} { BEGIN(regst); RETURN(BAD_TOKEN) } +{LF} { BEGIN(regst); RETURN(BAD_TOKEN) } <> { RETURN(SCRIPT_CONTINUE) } {UNICODE_ESCAPE_SEQUENCE} | {HEX_ESCAPE_SEQUENCE} { escaped_unicode_utf_8(); } @@ -1131,7 +1132,8 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 \\{CR}{LF} { /* skip */ } \\{LF} { /* skip */ } \\{CR} { /* skip */ } -{LINE_TERMINATORS} { BEGIN(regst); RETURN(BAD_TOKEN) } +{CR} { BEGIN(regst); RETURN(BAD_TOKEN) } +{LF} { BEGIN(regst); RETURN(BAD_TOKEN) } <> { RETURN(SCRIPT_CONTINUE) } {UNICODE_ESCAPE_SEQUENCE} | {HEX_ESCAPE_SEQUENCE} { escaped_unicode_utf_8(); } @@ -1166,9 +1168,10 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_REGEX_START} { EXEC(literal_regex_start()) } {LITERAL_REGEX_END} { EXEC(literal_regex_end()) } {HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } -\\{LF} | -\\{CR} | -{LINE_TERMINATORS} { BEGIN(regst); RETURN(BAD_TOKEN) } +\\{CR} { BEGIN(regst); RETURN(BAD_TOKEN) } +\\{LF} { BEGIN(regst); RETURN(BAD_TOKEN) } +{CR} { BEGIN(regst); RETURN(BAD_TOKEN) } +{LF} { BEGIN(regst); RETURN(BAD_TOKEN) } {LITERAL_REGEX_G_OPEN} { EXEC(literal_regex_g_open()) } {LITERAL_REGEX_G_CLOSE} { EXEC(literal_regex_g_close()) } {UNICODE_ESCAPE_SEQUENCE} | diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc index d6709f6bc..926f6f1b4 100644 --- a/src/utils/test/js_normalizer_test.cc +++ b/src/utils/test/js_normalizer_test.cc @@ -1074,7 +1074,7 @@ static const char syntax_cases_buf15[] = "var invalid_str = 'abc\u2028 def' ;\n"; static const char syntax_cases_expected15[] = - "var invalid_str='abc"; + "var invalid_str='abc\u2028 def';"; static const char syntax_cases_buf16[] = "var invalid_str = \"abc\n def\""; @@ -1110,7 +1110,7 @@ static const char syntax_cases_buf21[] = "var invalid_str = 'abc\u2029 def' ;\n\r"; static const char syntax_cases_expected21[] = - "var invalid_str='abc"; + "var invalid_str='abc\u2029 def';"; static const char syntax_cases_buf22[] = "tag`template\n \\\\\\${ } \\\\${ a + ` template ${ 1 + c }` }`"; @@ -1140,6 +1140,12 @@ static const char syntax_cases_expected25[] = "return /regex0/.var_0000+/regex1/.var_0001;" "return /regex2/.var_0002*/regex3/.var_0003;"; +static const char syntax_cases_buf26[] = + "var invalid_re = /abc \n def/ ;"; + +static const char syntax_cases_expected26[] = + "var invalid_re=/abc "; + TEST_CASE("syntax cases", "[JSNormalizer]") { SECTION("variables") @@ -1217,6 +1223,16 @@ TEST_CASE("syntax cases", "[JSNormalizer]") NORMALIZE(syntax_cases_buf14); VALIDATE(syntax_cases_buf14, syntax_cases_expected14); } + SECTION("LS within literal") + { + NORMALIZE(syntax_cases_buf15); + VALIDATE(syntax_cases_buf15, syntax_cases_expected15); + } + SECTION("PS within literal") + { + NORMALIZE(syntax_cases_buf21); + VALIDATE(syntax_cases_buf21, syntax_cases_expected21); + } SECTION("template literals") { NORMALIZE(syntax_cases_buf22); @@ -1230,16 +1246,6 @@ TEST_CASE("syntax cases", "[JSNormalizer]") TEST_CASE("bad tokens", "[JSNormalizer]") { - SECTION("LS chars within literal") - { - NORMALIZE(syntax_cases_buf15); - VALIDATE_FAIL(syntax_cases_buf15, syntax_cases_expected15, JSTokenizer::BAD_TOKEN, 25); - } - SECTION("PS chars within literal") - { - NORMALIZE(syntax_cases_buf21); - VALIDATE_FAIL(syntax_cases_buf21, syntax_cases_expected21, JSTokenizer::BAD_TOKEN, 25); - } SECTION("explicit LF within literal") { NORMALIZE(syntax_cases_buf16); @@ -1265,6 +1271,11 @@ TEST_CASE("bad tokens", "[JSNormalizer]") NORMALIZE(syntax_cases_buf20); VALIDATE_FAIL(syntax_cases_buf20, syntax_cases_expected20, JSTokenizer::BAD_TOKEN, 23); } + SECTION("explicit LF within regex literal") + { + NORMALIZE(syntax_cases_buf26); + VALIDATE_FAIL(syntax_cases_buf26, syntax_cases_expected26, JSTokenizer::BAD_TOKEN, 23); + } } TEST_CASE("braces overflow", "[JSNormalizer]")