From: Mike Stepanek (mstepane) Date: Fri, 13 May 2022 15:23:06 +0000 (+0000) Subject: Pull request #3420: JavaScript Normalizer: add Latin-1 decoding of JavaScript unescap... X-Git-Tag: 3.1.30.0~9 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1e6c46f54b6c037136d6189649c1f8fb562b7dea;p=thirdparty%2Fsnort3.git Pull request #3420: JavaScript Normalizer: add Latin-1 decoding of JavaScript unescape-like functions Merge in SNORT/snort3 from ~OSERHIIE/snort3:js_unescape_latin_1 to master Squashed commit of the following: commit aee1c83bfea39d7bd219eb7aecd5255dca2d470f Author: Oleksandr Serhiienko Date: Wed May 11 13:07:41 2022 +0300 utils: add Latin-1 decoding of JavaScript unescape-like functions --- diff --git a/src/service_inspectors/http_inspect/dev_notes.txt b/src/service_inspectors/http_inspect/dev_notes.txt index 95fe7dd2e..1a88f01f5 100755 --- a/src/service_inspectors/http_inspect/dev_notes.txt +++ b/src/service_inspectors/http_inspect/dev_notes.txt @@ -272,13 +272,17 @@ For example: in: "string".toUpperCase().split("").reverse().join(""); out: "string".var_0000().split("").reverse().join(""); -In addition to the scope tracking, JS Normalizer specifically tracks unicode unescape -functions(unescape, decodeURI, decodeURIComponent, String.fromCharCode, String.fromCodePoint). +In addition to the scope tracking, JS Normalizer specifically tracks unescape-like JavaScript +functions (unescape, decodeURI, decodeURIComponent, String.fromCharCode, String.fromCodePoint). This allows detection of unescape functions nested within other unescape functions, which is a potential indicator of a multilevel obfuscation. The definition of a function call depends on identifier substitution, so such identifiers must be included in the ignore list in order to use this feature. After determining the unescape sequence, it is decoded into the corresponding string, and the name of unescape function will not be present in the output. +Single-byte escape sequences within the string and template literals which are arguments of +unescape, decodeURI and decodeURIComponent functions will be decoded according to ISO/IEC 8859-1 +(Latin-1) charset. Except these cases, escape sequences and code points will be decoded to UTF-8 +format. For example: diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h index 96e0d509a..8f994bc35 100644 --- a/src/utils/js_tokenizer.h +++ b/src/utils/js_tokenizer.h @@ -297,9 +297,10 @@ private: JSRet general_literal(); JSRet general_identifier(); void general_unicode(); - void escaped_unicode(); + void escaped_unicode_latin_1(); + void escaped_unicode_utf_8(); void escaped_code_point(); - void escaped_url_sequence(); + void escaped_url_sequence_latin_1(); void dec_code_point(); void hex_code_point(); void char_code_no_match(); diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l index 569bf4b0f..f130c523a 100644 --- a/src/utils/js_tokenizer.l +++ b/src/utils/js_tokenizer.l @@ -1103,14 +1103,14 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_DQ_STRING_TEXT} { dealias_append(); ECHO; } <> { RETURN(SCRIPT_CONTINUE) } {UNICODE_ESCAPE_SEQUENCE} | -{HEX_ESCAPE_SEQUENCE} { escaped_unicode(); } +{HEX_ESCAPE_SEQUENCE} { escaped_unicode_utf_8(); } {ESCAPED_CODE_POINT} { escaped_code_point(); } -{UNICODE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); } -{HEX_ESCAPE_SEQUENCE} { set_encoding(IS_XBACKSLASH); escaped_unicode(); } +{UNICODE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_2); escaped_unicode_utf_8(); } +{HEX_ESCAPE_SEQUENCE} { set_encoding(IS_XBACKSLASH); escaped_unicode_latin_1(); } {ESCAPED_CODE_POINT} { set_encoding(IS_UCODEPOINT); escaped_code_point(); } -{BYTE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); } -{PERCENT_ESCAPE_SEQUENCE} { set_encoding(IS_UPERCENT); escaped_unicode(); } -{URL_ESCAPE_SEQUENCE} { set_encoding(IS_PERCENT); escaped_url_sequence(); } +{BYTE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_1); escaped_unicode_latin_1(); } +{PERCENT_ESCAPE_SEQUENCE} { set_encoding(IS_UPERCENT); escaped_unicode_utf_8(); } +{URL_ESCAPE_SEQUENCE} { set_encoding(IS_PERCENT); escaped_url_sequence_latin_1(); } {LITERAL_SQ_STRING_START} { EXEC(literal_sq_string_start()) } {LITERAL_SQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); } @@ -1124,14 +1124,14 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_SQ_STRING_TEXT} { dealias_append(); ECHO; } <> { RETURN(SCRIPT_CONTINUE) } {UNICODE_ESCAPE_SEQUENCE} | -{HEX_ESCAPE_SEQUENCE} { escaped_unicode(); } +{HEX_ESCAPE_SEQUENCE} { escaped_unicode_utf_8(); } {ESCAPED_CODE_POINT} { escaped_code_point(); } -{UNICODE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); } -{HEX_ESCAPE_SEQUENCE} { set_encoding(IS_XBACKSLASH); escaped_unicode(); } +{UNICODE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_2); escaped_unicode_utf_8(); } +{HEX_ESCAPE_SEQUENCE} { set_encoding(IS_XBACKSLASH); escaped_unicode_latin_1(); } {ESCAPED_CODE_POINT} { set_encoding(IS_UCODEPOINT); escaped_code_point(); } -{BYTE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); } -{PERCENT_ESCAPE_SEQUENCE} { set_encoding(IS_UPERCENT); escaped_unicode(); } -{URL_ESCAPE_SEQUENCE} { set_encoding(IS_PERCENT); escaped_url_sequence(); } +{BYTE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_1); escaped_unicode_latin_1(); } +{PERCENT_ESCAPE_SEQUENCE} { set_encoding(IS_UPERCENT); escaped_unicode_utf_8(); } +{URL_ESCAPE_SEQUENCE} { set_encoding(IS_PERCENT); escaped_url_sequence_latin_1(); } {LITERAL_TEMPLATE_START} { EXEC(literal_template_start()) } (\\\\)*{LITERAL_TEMPLATE_END} { dealias_append(); ECHO; BEGIN(divop); } @@ -1142,14 +1142,14 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_TEMPLATE_OTHER} { dealias_append(); ECHO; } <> { RETURN(SCRIPT_CONTINUE) } {UNICODE_ESCAPE_SEQUENCE} | -{HEX_ESCAPE_SEQUENCE} { escaped_unicode(); } +{HEX_ESCAPE_SEQUENCE} { escaped_unicode_utf_8(); } {ESCAPED_CODE_POINT} { escaped_code_point(); } -{UNICODE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); } -{HEX_ESCAPE_SEQUENCE} { set_encoding(IS_XBACKSLASH); escaped_unicode(); } +{UNICODE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_2); escaped_unicode_utf_8(); } +{HEX_ESCAPE_SEQUENCE} { set_encoding(IS_XBACKSLASH); escaped_unicode_latin_1(); } {ESCAPED_CODE_POINT} { set_encoding(IS_UCODEPOINT); escaped_code_point(); } -{BYTE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); } -{PERCENT_ESCAPE_SEQUENCE} { set_encoding(IS_UPERCENT); escaped_unicode(); } -{URL_ESCAPE_SEQUENCE} { set_encoding(IS_PERCENT); escaped_url_sequence(); } +{BYTE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_1); escaped_unicode_latin_1(); } +{PERCENT_ESCAPE_SEQUENCE} { set_encoding(IS_UPERCENT); escaped_unicode_utf_8(); } +{URL_ESCAPE_SEQUENCE} { set_encoding(IS_PERCENT); escaped_url_sequence_latin_1(); } {LITERAL_REGEX_START} { EXEC(literal_regex_start()) } {LITERAL_REGEX_END} { EXEC(literal_regex_end()) } @@ -1162,7 +1162,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_REGEX_G_OPEN} { EXEC(literal_regex_g_open()) } {LITERAL_REGEX_G_CLOSE} { EXEC(literal_regex_g_close()) } {UNICODE_ESCAPE_SEQUENCE} | -{HEX_ESCAPE_SEQUENCE} { escaped_unicode(); } +{HEX_ESCAPE_SEQUENCE} { escaped_unicode_utf_8(); } <> { RETURN(SCRIPT_CONTINUE) } {DIV_OPERATOR} | @@ -2830,7 +2830,14 @@ void JSTokenizer::general_unicode() set_ident_norm(true); } -void JSTokenizer::escaped_unicode() +void JSTokenizer::escaped_unicode_latin_1() +{ + // truncate escape symbol, get hex number only + std::string code(YYText() + 2); + yyout << (char)std::stoi(code, nullptr, 16); +} + +void JSTokenizer::escaped_unicode_utf_8() { // truncate escape symbol, get hex number only std::string code(YYText() + 2); @@ -2845,11 +2852,11 @@ void JSTokenizer::escaped_code_point() yyout << unicode_to_utf8(std::stoi(code, nullptr, 16)); } -void JSTokenizer::escaped_url_sequence() +void JSTokenizer::escaped_url_sequence_latin_1() { // truncate escape symbol, get hex number only std::string code(YYText() + 1); - yyout << unicode_to_utf8(std::stoi(code, nullptr, 16)); + yyout << (char)std::stoi(code, nullptr, 16); } void JSTokenizer::dec_code_point() diff --git a/src/utils/test/js_dealias_test.cc b/src/utils/test/js_dealias_test.cc index 0d70b63b1..c1c97cdf6 100644 --- a/src/utils/test/js_dealias_test.cc +++ b/src/utils/test/js_dealias_test.cc @@ -424,6 +424,12 @@ TEST_CASE("De-aliasing - basic", "[JSNormalizer]") "a \n = \n eval \n a \n eval;", "var_0000=eval;eval;eval;" ); + + SECTION("with unescape") + test_normalization( + "a = \\u0065\\u{0076}\\u0061\\u{006C}; a(); a.foo();", + "var_0000=eval;eval();eval.foo();" + ); } TEST_CASE("De-aliasing - split", "[JSNormalizer]") diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc index 44c02b4eb..fba494958 100644 --- a/src/utils/test/js_normalizer_test.cc +++ b/src/utils/test/js_normalizer_test.cc @@ -5766,6 +5766,15 @@ TEST_CASE("String Concatenation - With unescape", "[JSNormalizer]") SECTION("inside function call arguments") test_normalization("unescape('foo' + '%62' + '%61' + '%72')", "'foobar'"); + + SECTION("Latin-1 encoding - string + unescape") + test_normalization("'foo ' + unescape('%eb')", "'foo \xEB'"); + + SECTION("Latin-1 encoding - unescape + string") + test_normalization("unescape('%eb') + ' foo'", "'\xEB foo'"); + + SECTION("Latin-1 encoding - unescape + unescape") + test_normalization("unescape('%eb') + unescape('%eb')", "'\xEB\xEB'"); } SECTION("String.fromCharCode") { diff --git a/src/utils/test/js_unescape_test.cc b/src/utils/test/js_unescape_test.cc index 64833687c..949e9840b 100644 --- a/src/utils/test/js_unescape_test.cc +++ b/src/utils/test/js_unescape_test.cc @@ -33,246 +33,52 @@ TEST_CASE("Sequence parsing", "[JSNormalizer]") { SECTION("\\xXX") - { - test_normalization( - "'\\x01'", - "'\u0001'" - ); - test_normalization( - "'\\x23'", - "'\u0023'" - ); - test_normalization( - "'\\x45'", - "'\u0045'" - ); - test_normalization( - "'\\x67'", - "'\u0067'" - ); test_normalization( - "'\\x89'", - "'\u0089'" - ); - test_normalization( - "'\\xaA'", - "'\u00aA'" - ); - test_normalization( - "'\\xbB'", - "'\u00bB'" - ); - test_normalization( - "'\\xcC'", - "'\u00cC'" - ); - test_normalization( - "'\\xdD'", - "'\u00dD'" - ); - test_normalization( - "'\\xeE'", - "'\u00eE'" + "'\\x01 \\x23 \\x45 \\x67 \\x89 \\xaA \\xbB \\xcC \\xdD \\xeE \\xfF'", + "'\u0001 \u0023 \u0045 \u0067 \u0089 \u00aA \u00bB \u00cC \u00dD \u00eE \u00fF'" ); + + SECTION("\\xXX - unescape function") test_normalization( - "'\\xfF'", - "'\u00fF'" + "unescape('\\x01 \\x23 \\x45 \\x67 \\x89 \\xaA \\xbB \\xcC \\xdD \\xeE \\xfF')", + "'\x01 \x23 \x45 \x67 \x89 \xaA \xbB \xcC \xdD \xeE \xfF'" ); - } SECTION("\\uXXXX") - { test_normalization( - "'\\u0123'", - "'\u0123'" + "'\\u0020 \\u00EB \\u0123 \\u4567 \\u89aA \\ubBcC \\u00dD \\ueEfF'", + "'\u0020 \u00EB \u0123 \u4567 \u89aA \ubBcC \u00dD \ueEfF'" ); - test_normalization( - "'\\u4567'", - "'\u4567'" - ); - test_normalization( - "'\\u89aA'", - "'\u89aA'" - ); - test_normalization( - "'\\ubBcC'", - "'\ubBcC'" - ); - test_normalization( - "'\\ueEfF'", - "'\ueEfF'" - ); - } SECTION("\\u{XXXX}") - { - test_normalization( - "'\\u{0123}'", - "'\u0123'" - ); - test_normalization( - "'\\u{4567}'", - "'\u4567'" - ); - test_normalization( - "'\\u{89aA}'", - "'\u89aA'" - ); - test_normalization( - "'\\u{bBcC}'", - "'\ubBcC'" - ); test_normalization( - "'\\u{eEfF}'", - "'\ueEfF'" + "'\\u{0020} \\u{00EB} \\u{0123} \\u{4567} \\u{89aA} \\u{bBcC} \\u{00dD} \\u{eEfF}'", + "'\u0020 \u00EB \u0123 \u4567 \u89aA \ubBcC \u00dD \ueEfF'" ); - } SECTION("%XX") - { - test_normalization( - "unescape('%01')", - "'\u0001'" - ); test_normalization( - "unescape('%23')", - "'\u0023'" + "unescape('%01 %23 %45 %67 %89 %aA %bB %cC %dD %eE %fF')", + "'\x01 \x23 \x45 \x67 \x89 \xaA \xbB \xcC \xdD \xeE \xfF'" ); - test_normalization( - "unescape('%45')", - "'\u0045'" - ); - test_normalization( - "unescape('%67')", - "'\u0067'" - ); - test_normalization( - "unescape('%89')", - "'\u0089'" - ); - test_normalization( - "unescape('%aA')", - "'\u00aA'" - ); - test_normalization( - "unescape('%bB')", - "'\u00bB'" - ); - test_normalization( - "unescape('%cC')", - "'\u00cC'" - ); - test_normalization( - "unescape('%dD')", - "'\u00dD'" - ); - test_normalization( - "unescape('%eE')", - "'\u00eE'" - ); - test_normalization( - "unescape('%fF')", - "'\u00fF'" - ); - } SECTION("\\uXX") - { - test_normalization( - "unescape('\\u01')", - "'\u0001'" - ); - test_normalization( - "unescape('%23')", - "'\u0023'" - ); - test_normalization( - "unescape('\\u45')", - "'\u0045'" - ); test_normalization( - "unescape('\\u67')", - "'\u0067'" + "unescape('\\u01 \\u23 \\u45 \\u67 \\u89 \\uaA \\ubB \\ucC \\udD \\ueE \\ufF')", + "'\x01 \x23 \x45 \x67 \x89 \xaA \xbB \xcC \xdD \xeE \xfF'" ); - test_normalization( - "unescape('\\u89')", - "'\u0089'" - ); - test_normalization( - "unescape('\\uaA')", - "'\u00aA'" - ); - test_normalization( - "unescape('\\ubB')", - "'\u00bB'" - ); - test_normalization( - "unescape('\\ucC')", - "'\u00cC'" - ); - test_normalization( - "unescape('\\udD')", - "'\u00dD'" - ); - test_normalization( - "unescape('\\ueE')", - "'\u00eE'" - ); - test_normalization( - "unescape('\\ufF')", - "'\u00fF'" - ); - } SECTION("%uXXXX") - { - test_normalization( - "unescape('%u0123')", - "'\u0123'" - ); test_normalization( - "unescape('%u4567')", - "'\u4567'" + "unescape('%u0020 %u00EB %u0123 %u4567 %u89aA %ubBcC %u00dD %ueEfF')", + "'\u0020 \u00EB \u0123 \u4567 \u89aA \ubBcC \u00dD \ueEfF'" ); - test_normalization( - "unescape('%u89aA')", - "'\u89aA'" - ); - test_normalization( - "unescape('%ubBcC')", - "'\ubBcC'" - ); - test_normalization( - "unescape('%ueEfF')", - "'\ueEfF'" - ); - } SECTION("decimal") { test_normalization( - "String.fromCharCode(1)", - "'\u0001'" - ); - test_normalization( - "String.fromCharCode(12)", - "'\u000c'" - ); - test_normalization( - "String.fromCharCode(345)", - "'\u0159'" - ); - test_normalization( - "String.fromCharCode(6789)", - "'\u1a85'" - ); - test_normalization( - "String.fromCharCode(1000)", - "'\u03e8'" - ); - test_normalization( - "String.fromCharCode(0001)", - "'\x01'" + "String.fromCharCode(1, 12 ,235, 345, 6789, 1000, 0001)", + "'\u0001\u000c\u00EB\u0159\u1a85\u03e8\u0001'" ); test_normalization( "String.fromCharCode(65536)", @@ -287,36 +93,8 @@ TEST_CASE("Sequence parsing", "[JSNormalizer]") SECTION("hexadecimal") { test_normalization( - "String.fromCharCode(0x0001)", - "'\u0001'" - ); - test_normalization( - "String.fromCharCode(0X0001)", - "'\u0001'" - ); - test_normalization( - "String.fromCharCode(0x1234)", - "'\u1234'" - ); - test_normalization( - "String.fromCharCode(0X5678)", - "'\u5678'" - ); - test_normalization( - "String.fromCharCode(0x9aAb)", - "'\u9aAb'" - ); - test_normalization( - "String.fromCharCode(0x9aAb)", - "'\u9aAb'" - ); - test_normalization( - "String.fromCharCode(0xBcCd)", - "'\uBcCd'" - ); - test_normalization( - "String.fromCharCode(0XeEfF)", - "'\ueEfF'" + "String.fromCharCode(0x0001, 0X00EB, 0x0123, 0x4567, 0x89aA, 0xbBcC, 0x00dD, 0xeEfF)", + "'\u0001\u00EB\u0123\u4567\u89aA\ubBcC\u00dD\ueEfF'" ); test_normalization( "String.fromCodePoint(0x10000)", @@ -342,64 +120,64 @@ TEST_CASE("Universal sequences", "[JSNormalizer]") SECTION("\\uXXXX") { test_normalization( - "\\u0065\\u0076\\u0061\\u006C () ;", - "eval();" + "\\u0065\\u0076\\u0061\\u006C () ; \\u00EB\\u1234 ;", + "eval();var_0000;" ); test_normalization( - "'\\u0062\\u0061\\u0072'", - "'bar'" + "'\\u0062\\u0061\\u0072' ; '\\u00EB\\ueEfF' ;", + "'bar';'\u00EB\ueEfF';" ); test_normalization( - "\"\\u0062\\u0061\\u0072\"", - "\"bar\"" + "\"\\u0062\\u0061\\u0072\" ; \"\\u00EB\\ueEfF\" ;", + "\"bar\";\"\u00EB\ueEfF\";" ); test_normalization( - "`\\u0062\\u0061\\u0072`", - "`bar`" + "`\\u0062\\u0061\\u0072` ; `\\u00EB\\ueEfF` ;", + "`bar`;`\u00EB\ueEfF`;" ); test_normalization( - "/\\u0062\\u0061\\u0072/", - "/bar/" + "/\\u0062\\u0061\\u0072/ ; /\\u00EB\\ueEfF/ ;", + "/bar/;/\u00EB\ueEfF/;" ); } SECTION("\\xXX") { test_normalization( - "'\\x62\\x61\\x72'", - "'bar'" + "'\\x62\\x61\\x72' ; '\\xEB' ;", + "'bar';'\u00EB';" ); test_normalization( - "\"\\x62\\x61\\x72\"", - "\"bar\"" + "\"\\x62\\x61\\x72\" ; \"\\xEB\" ;", + "\"bar\";\"\u00EB\";" ); test_normalization( - "`\\x62\\x61\\x72`", - "`bar`" + "`\\x62\\x61\\x72` ; `\\xEB` ;", + "`bar`;`\u00EB`;" ); test_normalization( - "/\\x62\\x61\\x72/", - "/bar/" + "/\\x62\\x61\\x72/ ; /\\xEB/ ;", + "/bar/;/\u00EB/;" ); } SECTION("\\u{XXXX}") { test_normalization( - "\\u{0065}\\u{0076}\\u{0061}\\u{006C} () ;", - "eval();" + "\\u{0065}\\u{0076}\\u{0061}\\u{006C} () ; \\u{00EB}\\u{1234} ;", + "eval();var_0000;" ); test_normalization( - "'\\u{0062}\\u{0061}\\u{0072}'", - "'bar'" + "'\\u{0062}\\u{0061}\\u{0072}'; '\\u{00EB}\\u{eEfF}' ;", + "'bar';'\u00EB\ueEfF';" ); test_normalization( - "\"\\u{0062}\\u{0061}\\u{0072}\"", - "\"bar\"" + "\"\\u{0062}\\u{0061}\\u{0072}\" ; \"\\u{00EB}\\u{eEfF}\" ;", + "\"bar\";\"\u00EB\ueEfF\";" ); test_normalization( - "`\\u{0062}\\u{0061}\\u{0072}`", - "`bar`" + "`\\u{0062}\\u{0061}\\u{0072}` ; `\\u{00EB}\\u{eEfF}` ;", + "`bar`;`\u00EB\ueEfF`;" ); } } @@ -409,96 +187,96 @@ TEST_CASE("unescape()", "[JSNormalizer]") SECTION("%XX") { test_normalization( - "unescape('%62%61%72')", - "'bar'" + "unescape('%62%61%72') ; unescape('%EB') ;", + "'bar';'\xEB';" ); test_normalization( - "unescape(\"%62%61%72\")", - "\"bar\"" + "unescape(\"%62%61%72\") ; unescape(\"%EB\") ;", + "\"bar\";\"\xEB\";" ); test_normalization( - "unescape(`%62%61%72`)", - "`bar`" + "unescape(`%62%61%72`) ; unescape(`%EB`) ;", + "`bar`;`\xEB`;" ); } SECTION("%uXXXX") { test_normalization( - "unescape('%u0062%u0061%u0072')", - "'bar'" + "unescape('%u0062%u0061%u0072') ; unescape('%u00EB%ueEfF') ;", + "'bar';'\u00EB\ueEfF';" ); test_normalization( - "unescape(\"%u0062%u0061%u0072\")", - "\"bar\"" + "unescape(\"%u0062%u0061%u0072\") ; unescape(\"%u00EB%ueEfF\") ;", + "\"bar\";\"\u00EB\ueEfF\";" ); test_normalization( - "unescape(`%u0062%u0061%u0072`)", - "`bar`" + "unescape(`%u0062%u0061%u0072`) ; unescape(`%u00EB%ueEfF`) ;", + "`bar`;`\u00EB\ueEfF`;" ); } SECTION("\\uXX") { test_normalization( - "unescape('\\u62\\u61\\u72')", - "'bar'" + "unescape('\\u62\\u61\\u72') ; unescape('\\uEB') ;", + "'bar';'\xEB';" ); test_normalization( - "unescape(\"\\u62\\u61\\u72\")", - "\"bar\"" + "unescape(\"\\u62\\u61\\u72\") ; unescape(\"\\uEB\") ;", + "\"bar\";\"\xEB\";" ); test_normalization( - "unescape(`\\u62\\u61\\u72`)", - "`bar`" + "unescape(`\\u62\\u61\\u72`) ; unescape(`\\uEB`) ;", + "`bar`;`\xEB`;" ); } SECTION("\\uXXXX") { test_normalization( - "unescape('\\u0062\\u0061\\u0072')", - "'bar'" + "unescape('\\u0062\\u0061\\u0072') ; unescape('\\u00EB\\ueEfF') ;", + "'bar';'\u00EB\ueEfF';" ); test_normalization( - "unescape(\"\\u0062\\u0061\\u0072\")", - "\"bar\"" + "unescape(\"\\u0062\\u0061\\u0072\") ; unescape(\"\\u00EB\\ueEfF\") ;", + "\"bar\";\"\u00EB\ueEfF\";" ); test_normalization( - "unescape(`\\u0062\\u0061\\u0072`)", - "`bar`" + "unescape(`\\u0062\\u0061\\u0072`) ; unescape(`\\u00EB\\ueEfF`) ;", + "`bar`;`\u00EB\ueEfF`;" ); } SECTION("\\xXX") { test_normalization( - "unescape('\\x62\\x61\\x72')", - "'bar'" + "unescape('\\x62\\x61\\x72') ; unescape('\\xEB') ;", + "'bar';'\xEB';" ); test_normalization( - "unescape(\"\\x62\\x61\\x72\")", - "\"bar\"" + "unescape(\"\\x62\\x61\\x72\") ; unescape(\"\\xEB\") ;", + "\"bar\";\"\xEB\";" ); test_normalization( - "unescape(`\\x62\\x61\\x72`)", - "`bar`" + "unescape(`\\x62\\x61\\x72`) ; unescape(`\\xEB`) ;", + "`bar`;`\xEB`;" ); } SECTION("\\u{XXXX}") { test_normalization( - "unescape('\\u{0062}\\u{0061}\\u{0072}')", - "'bar'" + "unescape('\\u{0062}\\u{0061}\\u{0072}') ; unescape('\\u{00EB}\\u{eEfF}') ;", + "'bar';'\u00EB\ueEfF';" ); test_normalization( - "unescape(\"\\u{0062}\\u{0061}\\u{0072}\")", - "\"bar\"" + "unescape(\"\\u{0062}\\u{0061}\\u{0072}\") ; unescape(\"\\u{00EB}\\u{eEfF}\") ;", + "\"bar\";\"\u00EB\ueEfF\";" ); test_normalization( - "unescape(`\\u{0062}\\u{0061}\\u{0072}`)", - "`bar`" + "unescape(`\\u{0062}\\u{0061}\\u{0072}`) ; unescape(`\\u{00EB}\\u{eEfF}`) ;", + "`bar`;`\u00EB\ueEfF`;" ); } @@ -541,96 +319,96 @@ TEST_CASE("decodeURI()", "[JSNormalizer]") SECTION("%XX") { test_normalization( - "decodeURI('%62%61%72')", - "'bar'" + "decodeURI('%62%61%72') ; decodeURI('%EB') ;", + "'bar';'\xEB';" ); test_normalization( - "decodeURI(\"%62%61%72\")", - "\"bar\"" + "decodeURI(\"%62%61%72\") ; decodeURI(\"%EB\") ;", + "\"bar\";\"\xEB\";" ); test_normalization( - "decodeURI(`%62%61%72`)", - "`bar`" + "decodeURI(`%62%61%72`) ; decodeURI(`%EB`) ;", + "`bar`;`\xEB`;" ); } SECTION("%uXXXX") { test_normalization( - "decodeURI('%u0062%u0061%u0072')", - "'bar'" + "decodeURI('%u0062%u0061%u0072') ; decodeURI('%u00EB%ueEfF') ;", + "'bar';'\u00EB\ueEfF';" ); test_normalization( - "decodeURI(\"%u0062%u0061%u0072\")", - "\"bar\"" + "decodeURI(\"%u0062%u0061%u0072\") ; decodeURI(\"%u00EB%ueEfF\") ;", + "\"bar\";\"\u00EB\ueEfF\";" ); test_normalization( - "decodeURI(`%u0062%u0061%u0072`)", - "`bar`" + "decodeURI(`%u0062%u0061%u0072`) ; decodeURI(`%u00EB%ueEfF`) ;", + "`bar`;`\u00EB\ueEfF`;" ); } SECTION("\\uXX") { test_normalization( - "decodeURI('\\u62\\u61\\u72')", - "'bar'" + "decodeURI('\\u62\\u61\\u72') ; decodeURI('\\uEB') ;", + "'bar';'\xEB';" ); test_normalization( - "decodeURI(\"\\u62\\u61\\u72\")", - "\"bar\"" + "decodeURI(\"\\u62\\u61\\u72\") ; decodeURI(\"\\uEB\") ;", + "\"bar\";\"\xEB\";" ); test_normalization( - "decodeURI(`\\u62\\u61\\u72`)", - "`bar`" + "decodeURI(`\\u62\\u61\\u72`) ; decodeURI(`\\uEB`) ;", + "`bar`;`\xEB`;" ); } SECTION("\\uXXXX") { test_normalization( - "decodeURI('\\u0062\\u0061\\u0072')", - "'bar'" + "decodeURI('\\u0062\\u0061\\u0072') ; decodeURI('\\u00EB\\ueEfF') ;", + "'bar';'\u00EB\ueEfF';" ); test_normalization( - "decodeURI(\"\\u0062\\u0061\\u0072\")", - "\"bar\"" + "decodeURI(\"\\u0062\\u0061\\u0072\") ; decodeURI(\"\\u00EB\\ueEfF\") ;", + "\"bar\";\"\u00EB\ueEfF\";" ); test_normalization( - "decodeURI(`\\u0062\\u0061\\u0072`)", - "`bar`" + "decodeURI(`\\u0062\\u0061\\u0072`) ; decodeURI(`\\u00EB\\ueEfF`) ;", + "`bar`;`\u00EB\ueEfF`;" ); } SECTION("\\xXX") { test_normalization( - "decodeURI('\\x62\\x61\\x72')", - "'bar'" + "decodeURI('\\x62\\x61\\x72') ; decodeURI('\\xEB') ;", + "'bar';'\xEB';" ); test_normalization( - "decodeURI(\"\\x62\\x61\\x72\")", - "\"bar\"" + "decodeURI(\"\\x62\\x61\\x72\") ; decodeURI(\"\\xEB\") ;", + "\"bar\";\"\xEB\";" ); test_normalization( - "decodeURI(`\\x62\\x61\\x72`)", - "`bar`" + "decodeURI(`\\x62\\x61\\x72`) ; decodeURI(`\\xEB`) ;", + "`bar`;`\xEB`;" ); } SECTION("\\u{XXXX}") { test_normalization( - "decodeURI('\\u{0062}\\u{0061}\\u{0072}')", - "'bar'" + "decodeURI('\\u{0062}\\u{0061}\\u{0072}') ; decodeURI('\\u{00EB}\\u{eEfF}') ;", + "'bar';'\u00EB\ueEfF';" ); test_normalization( - "decodeURI(\"\\u{0062}\\u{0061}\\u{0072}\")", - "\"bar\"" + "decodeURI(\"\\u{0062}\\u{0061}\\u{0072}\") ; decodeURI(\"\\u{00EB}\\u{eEfF}\") ;", + "\"bar\";\"\u00EB\ueEfF\";" ); test_normalization( - "decodeURI(`\\u{0062}\\u{0061}\\u{0072}`)", - "`bar`" + "decodeURI(`\\u{0062}\\u{0061}\\u{0072}`) ; decodeURI(`\\u{00EB}\\u{eEfF}`) ;", + "`bar`;`\u00EB\ueEfF`;" ); } @@ -673,96 +451,100 @@ TEST_CASE("decodeURIComponent()", "[JSNormalizer]") SECTION("%XX") { test_normalization( - "decodeURIComponent('%62%61%72')", - "'bar'" + "decodeURIComponent('%62%61%72') ; decodeURIComponent('%EB') ;", + "'bar';'\xEB';" ); test_normalization( - "decodeURIComponent(\"%62%61%72\")", - "\"bar\"" + "decodeURIComponent(\"%62%61%72\") ; decodeURIComponent(\"%EB\") ;", + "\"bar\";\"\xEB\";" ); test_normalization( - "decodeURIComponent(`%62%61%72`)", - "`bar`" + "decodeURIComponent(`%62%61%72`) ; decodeURIComponent(`%EB`) ;", + "`bar`;`\xEB`;" ); } SECTION("%uXXXX") { test_normalization( - "decodeURIComponent('%u0062%u0061%u0072')", - "'bar'" + "decodeURIComponent('%u0062%u0061%u0072') ; decodeURIComponent('%u00EB%ueEfF') ;", + "'bar';'\u00EB\ueEfF';" ); test_normalization( - "decodeURIComponent(\"%u0062%u0061%u0072\")", - "\"bar\"" + "decodeURIComponent(\"%u0062%u0061%u0072\") ; decodeURIComponent(\"%u00EB%ueEfF\") ;", + "\"bar\";\"\u00EB\ueEfF\";" ); test_normalization( - "decodeURIComponent(`%u0062%u0061%u0072`)", - "`bar`" + "decodeURIComponent(`%u0062%u0061%u0072`) ; decodeURIComponent(`%u00EB%ueEfF`) ;", + "`bar`;`\u00EB\ueEfF`;" ); } SECTION("\\uXX") { test_normalization( - "decodeURIComponent('\\u62\\u61\\u72')", - "'bar'" + "decodeURIComponent('\\u62\\u61\\u72') ; decodeURIComponent('\\uEB') ;", + "'bar';'\xEB';" ); test_normalization( - "decodeURIComponent(\"\\u62\\u61\\u72\")", - "\"bar\"" + "decodeURIComponent(\"\\u62\\u61\\u72\") ; decodeURIComponent(\"\\uEB\") ;", + "\"bar\";\"\xEB\";" ); test_normalization( - "decodeURIComponent(`\\u62\\u61\\u72`)", - "`bar`" + "decodeURIComponent(`\\u62\\u61\\u72`) ; decodeURIComponent(`\\uEB`) ;", + "`bar`;`\xEB`;" ); } SECTION("\\uXXXX") { test_normalization( - "decodeURIComponent('\\u0062\\u0061\\u0072')", - "'bar'" + "decodeURIComponent('\\u0062\\u0061\\u0072') ; decodeURIComponent('\\u00EB\\ueEfF') ;", + "'bar';'\u00EB\ueEfF';" ); test_normalization( - "decodeURIComponent(\"\\u0062\\u0061\\u0072\")", - "\"bar\"" + "decodeURIComponent(\"\\u0062\\u0061\\u0072\") ; " + "decodeURIComponent(\"\\u00EB\\ueEfF\") ;", + "\"bar\";\"\u00EB\ueEfF\";" ); test_normalization( - "decodeURIComponent(`\\u0062\\u0061\\u0072`)", - "`bar`" + "decodeURIComponent(`\\u0062\\u0061\\u0072`) ; decodeURIComponent(`\\u00EB\\ueEfF`) ;", + "`bar`;`\u00EB\ueEfF`;" ); } SECTION("\\xXX") { test_normalization( - "decodeURIComponent('\\x62\\x61\\x72')", - "'bar'" + "decodeURIComponent('\\x62\\x61\\x72') ; decodeURIComponent('\\xEB') ;", + "'bar';'\xEB';" ); test_normalization( - "decodeURIComponent(\"\\x62\\x61\\x72\")", - "\"bar\"" + "decodeURIComponent(\"\\x62\\x61\\x72\") ; decodeURIComponent(\"\\xEB\") ;", + "\"bar\";\"\xEB\";" ); test_normalization( - "decodeURIComponent(`\\x62\\x61\\x72`)", - "`bar`" + "decodeURIComponent(`\\x62\\x61\\x72`) ; decodeURIComponent(`\\xEB`) ;", + "`bar`;`\xEB`;" ); } SECTION("\\u{XXXX}") { test_normalization( - "decodeURIComponent('\\u{0062}\\u{0061}\\u{0072}')", - "'bar'" + "decodeURIComponent('\\u{0062}\\u{0061}\\u{0072}') ; " + "decodeURIComponent('\\u{00EB}\\u{eEfF}') ;", + "'bar';'\u00EB\ueEfF';" ); test_normalization( - "decodeURIComponent(\"\\u{0062}\\u{0061}\\u{0072}\")", - "\"bar\"" + "decodeURIComponent(\"\\u{0062}\\u{0061}\\u{0072}\") ; " + "decodeURIComponent(\"\\u{00EB}\\u{eEfF}\") ;", + "\"bar\";\"\u00EB\ueEfF\";" ); test_normalization( - "decodeURIComponent(`\\u{0062}\\u{0061}\\u{0072}`)", - "`bar`" + "decodeURIComponent(`\\u{0062}\\u{0061}\\u{0072}`) ; " + "decodeURIComponent(`\\u{00EB}\\u{eEfF}`) ;", + "`bar`;`\u00EB\ueEfF`;" ); }