From: Mike Stepanek (mstepane) Date: Wed, 23 Mar 2022 19:31:39 +0000 (+0000) Subject: Pull request #3312: JSN: Unescape Text Processing X-Git-Tag: 3.1.27.0~17 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cd2925f7ae3fd0ba6ecd88de2e647d69edf402f4;p=thirdparty%2Fsnort3.git Pull request #3312: JSN: Unescape Text Processing Merge in SNORT/snort3 from ~OSERHIIE/snort3:js_unescape to master Squashed commit of the following: commit 5e79a2a365a4b5b74670d4bfc6f94bcc35f3b2d6 Author: Oleksandr Serhiienko Date: Fri Mar 18 20:39:48 2022 +0200 utils: fix JS Normalizer benchmark build commit 8b79a4adbc538ea1b6400486cbe1b82a5369d1af Author: Oleksandr Serhiienko Date: Fri Mar 4 22:05:17 2022 +0200 http_inspect: add unescape text processing for Enhanced JS Normalizer * utils: decode %XX, %uXXXX, \uXX, \uXXXX, \xXX, \u{CHAR_CODE} escape sequences * utils: decode hexadecimal and decimal code points * utils: add support for unescape of universal sequences in identifiers, strings, template literals and regular expressions * utils: add support for unescape(), decodeURI(), decodeURIComponent() JavaScript functions * utils: add support for String.fromCharCode() JavaScript function * utils: add unit test coverage * utils: add benchmark test * http_inspect: enable alert 119:280 - mixed encoding * http_inspect: update dev notes * doc: update user manual --- diff --git a/doc/user/http_inspect.txt b/doc/user/http_inspect.txt index 98dec3b94..2d583e0cd 100755 --- a/doc/user/http_inspect.txt +++ b/doc/user/http_inspect.txt @@ -78,8 +78,9 @@ Normalizer. The Enhanced Normalizer can normalize inline/external scripts. It supports scripts over multiple PDUs. It is a stateful JavaScript whitespace and identifiers normalizer. All JavaScript identifier names, except those from the ignore list, will be substituted with unified names in the following -format: var_0000 -> var_ffff. Moreover, Normalizer validates the syntax -concerning ECMA-262 Standard, including scope tracking and restrictions +format: var_0000 -> var_ffff. The Normalizer tries to expand an escaped text, +so it will appear in a usual form in the output. Moreover, Normalizer validates +the syntax concerning ECMA-262 Standard, including scope tracking and restrictions for script elements. For more information on how additionally configure Enhanced Normalizer check with the following configuration options: js_norm_bytes_depth, js_norm_identifier_depth, js_norm_max_tmpl_nest, diff --git a/src/service_inspectors/http_inspect/dev_notes.txt b/src/service_inspectors/http_inspect/dev_notes.txt index 5e785eda6..83c5970be 100755 --- a/src/service_inspectors/http_inspect/dev_notes.txt +++ b/src/service_inspectors/http_inspect/dev_notes.txt @@ -256,11 +256,33 @@ For example: a("hello") // will be substituted to 'console.log("hello")' In addition to the scope tracking, JS Normalizer specifically tracks unicode unescape -functions(unescape, decodeURI, decodeURIComponent). This allows detection of +functions(unescape, decodeURI, decodeURIComponent, String.fromCharCode). This allows detection of unescape functions nested within other unescape functions, which is a potential indicator of a multilevel obfuscation. The definition of a function call depends on identifier substitution, so such identifiers must be included in the ignore list in -order to use this feature. +order to use this feature. After determining the unescape sequence, it is decoded into the +corresponding string. + +For example: + + unescape('\u0062\u0061\u0072') -> 'bar' + decodeURI('%62%61%72') -> 'bar' + decodeURIComponent('\x62\x61\x72') -> 'bar' + String.fromCharCode(98, 0x0061, 0x72) -> 'bar' + +Supported formats follow + + \xXX + \uXXXX + \u{XXXX} + %XX + \uXX + %uXXXX + decimal code point + hexadecimal code point + +JS Normalizer is able to decode mixed encoding sequences. However, a built-in alert rises +in such case. JS Normalizer's syntax parser follows ECMA-262 standard. For various features, tracking of variable scope and individual brackets is done in accordance to the standard. diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc index d73de30a6..2fd94b7a0 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.cc +++ b/src/service_inspectors/http_inspect/http_js_norm.cc @@ -219,6 +219,11 @@ void HttpJsNorm::do_external(const Field& input, Field& output, *infractions += INF_JS_UNESCAPE_NEST; events->create_event(EVENT_JS_UNESCAPE_NEST); } + if (js_ctx.is_mixed_encoding_seen()) + { + *infractions += INF_JS_MULTIPLE_ENC; + events->create_event(EVENT_JS_MULTIPLE_ENC); + } if (ssn->js_built_in_event) break; @@ -358,6 +363,11 @@ void HttpJsNorm::do_inline(const Field& input, Field& output, *infractions += INF_JS_UNESCAPE_NEST; events->create_event(EVENT_JS_UNESCAPE_NEST); } + if (js_ctx.is_mixed_encoding_seen()) + { + *infractions += INF_JS_MULTIPLE_ENC; + events->create_event(EVENT_JS_MULTIPLE_ENC); + } script_continue = ret == JSTokenizer::SCRIPT_CONTINUE; } diff --git a/src/utils/js_identifier_ctx.cc b/src/utils/js_identifier_ctx.cc index 49274c834..e6900c8d7 100644 --- a/src/utils/js_identifier_ctx.cc +++ b/src/utils/js_identifier_ctx.cc @@ -200,7 +200,7 @@ const char* JSIdentifierCtx::alias_lookup(const char* alias) const // advanced program scope access for testing -#ifdef CATCH_TEST_BUILD +#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST) bool JSIdentifierCtx::scope_check(const std::list& compare) const { @@ -226,4 +226,4 @@ const std::list JSIdentifierCtx::get_types() const return return_list; } -#endif // CATCH_TEST_BUILD +#endif // CATCH_TEST_BUILD || BENCHMARK_TEST diff --git a/src/utils/js_normalizer.h b/src/utils/js_normalizer.h index 4a694d3c7..8508cd7d4 100644 --- a/src/utils/js_normalizer.h +++ b/src/utils/js_normalizer.h @@ -61,14 +61,17 @@ public: bool is_unescape_nesting_seen() const { return tokenizer.is_unescape_nesting_seen(); } -#ifdef CATCH_TEST_BUILD + bool is_mixed_encoding_seen() const + { return tokenizer.is_mixed_encoding_seen(); } + +#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST) const char* get_tmp_buf() const { return tmp_buf; } size_t get_tmp_buf_size() const { return tmp_buf_size; } const JSTokenizer& get_tokenizer() const { return tokenizer; } -#endif +#endif // CATCH_TEST_BUILD || BENCHMARK_TEST #ifdef BENCHMARK_TEST void rewind_output() diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h index 2dcdc1fe7..0747af6d3 100644 --- a/src/utils/js_tokenizer.h +++ b/src/utils/js_tokenizer.h @@ -43,9 +43,10 @@ extern THREAD_LOCAL const snort::Trace* http_trace; enum JSProgramScopeType : unsigned int; class JSIdentifierCtxBase; -#ifdef CATCH_TEST_BUILD +#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST) class JSTokenizerTester; -#endif +#endif // CATCH_TEST_BUILD || BENCHMARK_TEST + class JSTokenizer : public yyFlexLexer { private: @@ -98,7 +99,7 @@ private: { Scope(ScopeType t) : type(t), meta_type(ScopeMetaType::NOT_SET), func_call_type(FuncType::NOT_FUNC), - ident_norm(true), block_param(false), do_loop(false) + ident_norm(true), block_param(false), do_loop(false), encoding(0), char_code_str(false) {} ScopeType type; @@ -107,6 +108,8 @@ private: bool ident_norm; bool block_param; bool do_loop; + uint32_t encoding; + bool char_code_str; }; enum ASIGroup @@ -164,6 +167,7 @@ public: JSRet process(size_t& bytes_in); bool is_unescape_nesting_seen() const; + bool is_mixed_encoding_seen() const; protected: [[noreturn]] void LexerError(const char* msg) override { snort::FatalError("%s", msg); } @@ -181,6 +185,7 @@ private: JSRet push_identifier(const char* ident); bool unescape(const char* lexeme); void process_punctuator(JSToken tok = PUNCTUATOR); + void skip_punctuator(); void process_closing_brace(); JSRet process_subst_open(); @@ -209,11 +214,24 @@ private: FuncType func_call_type(); FuncType detect_func_type(); void check_function_nesting(FuncType); + void check_mixed_encoding(uint32_t); void set_block_param(bool); bool block_param(); void set_do_loop(bool); bool do_loop(); + void set_encoding(uint32_t f) + { scope_cur().encoding |= f; } + + uint32_t encoding() + { return scope_cur().encoding; } + + void set_char_code_str(bool f) + { scope_cur().char_code_str = f; } + + bool char_code_str() + { return scope_cur().char_code_str; } + static JSProgramScopeType m2p(ScopeMetaType); static const char* m2str(ScopeMetaType); static bool is_operator(JSToken); @@ -269,6 +287,12 @@ private: JSRet general_literal(); JSRet general_identifier(); void general_unicode(); + void escaped_unicode(); + void escaped_code_point(); + void escaped_url_sequence(); + void dec_code_point(); + void hex_code_point(); + void char_code_no_match(); static const char* p_scope_codes[]; @@ -283,6 +307,7 @@ private: bool prefix_increment = false; bool dealias_stored = false; bool unescape_nest_seen = false; + bool mixed_encoding_seen = false; uint8_t max_template_nesting; std::stack> brace_depth; @@ -346,9 +371,9 @@ private: const uint32_t max_bracket_depth; std::stack scope_stack; -#ifdef CATCH_TEST_BUILD +#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST) friend JSTokenizerTester; -#endif // CATCH_TEST_BUILD +#endif // CATCH_TEST_BUILD || BENCHMARK_TEST }; #endif // JS_TOKENIZER_H diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l index b7f93df12..074dd45e9 100644 --- a/src/utils/js_tokenizer.l +++ b/src/utils/js_tokenizer.l @@ -82,6 +82,20 @@ constexpr bool JSTokenizer::insert_semicolon[ASI_GROUP_MAX][ASI_GROUP_MAX]; +// encoding flags + +enum EncodingType +{ + IS_HEX = 1 << 0, // hex code unit: 0xXXXX + IS_DEC = 1 << 1, // dec code unit: XXXX + IS_XBACKSLASH = 1 << 2, // \xXX + IS_UBACKSLASH_1 = 1 << 3, // \uXX + IS_UBACKSLASH_2 = 1 << 4, // \uXXXX + IS_UPERCENT = 1 << 5, // %uXXXX + IS_PERCENT = 1 << 6, // %XX + IS_UCODEPOINT = 1 << 7 // \u{0xXXXX} +}; + %} /* The following grammar was created based on ECMAScript specification */ @@ -848,9 +862,19 @@ UNICODE_CONNECTOR_PUNCTUATION {CONNECTOR_PUNCT_RNG_1}|{CONNECTOR_PUNCT_RNG_2} UNICODE_ZWNJ \xE2\x80\x8C UNICODE_ZWJ \xE2\x80\x8D -/* Unicode escape sequence */ -/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.8.4 (escape sequence) */ +/* according to https://262.ecma-international.org/12.0/#prod-UnicodeEscapeSequence */ UNICODE_ESCAPE_SEQUENCE \\u[0-9a-fA-F]{4} +ESCAPED_CODE_POINT \\u\{[0-9a-fA-F]+\} + +/* according to https://262.ecma-international.org/12.0/#prod-HexEscapeSequence */ +HEX_ESCAPE_SEQUENCE \\x[0-9a-fA-F]{2} + +/* according to https://tc39.es/ecma262/multipage/additional-ecmascript-features-for-web-browsers.html#sec-unescape-string */ +BYTE_ESCAPE_SEQUENCE \\u[0-9a-fA-F]{2} +PERCENT_ESCAPE_SEQUENCE %u[0-9a-fA-F]{4} + +/* according to https://datatracker.ietf.org/doc/html/rfc3986#section-2.1 */ +URL_ESCAPE_SEQUENCE %[0-9a-fA-F]{2} /* whitespaces */ /* according to https://ecma-international.org/ecma-262/5.1/#sec-7.2 */ @@ -947,7 +971,7 @@ PUNCTUATOR_ARROW "=>" /* identifiers */ /* according to https://ecma-international.org/ecma-262/5.1/#sec-7.6 */ -IDENTIFIER_START [_$]|({UNICODE_LETTER})|{UNICODE_ESCAPE_SEQUENCE} +IDENTIFIER_START [_$]|({UNICODE_LETTER})|{UNICODE_ESCAPE_SEQUENCE}|{ESCAPED_CODE_POINT} IDENTIFIER_PART (({IDENTIFIER_START})|({UNICODE_COMBINING_MARK})|({UNICODE_DIGIT})|({UNICODE_CONNECTOR_PUNCTUATION})|{UNICODE_ZWNJ}|{UNICODE_ZWJ})* IDENTIFIER ({IDENTIFIER_START}{IDENTIFIER_PART})* @@ -957,19 +981,20 @@ LITERAL_NULL null LITERAL_THIS this LITERAL_BOOLEAN true|false LITERAL_DECIMAL [.]?[0-9]+[\.]?[0-9]*[eE]?[0-9]* +LITERAL_INTEGER [0-9]* LITERAL_HEX_INTEGER 0x[0-9a-fA-F]*|0X[0-9a-fA-F]* LITERAL_DQ_STRING_START \" LITERAL_DQ_STRING_END \" LITERAL_DQ_STRING_SKIP \\\" -LITERAL_DQ_STRING_TEXT [^\"\\\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32} +LITERAL_DQ_STRING_TEXT [^\"\\%\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32} LITERAL_SQ_STRING_START \' LITERAL_SQ_STRING_END \' LITERAL_SQ_STRING_SKIP \\\' -LITERAL_SQ_STRING_TEXT [^\'\\\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32} +LITERAL_SQ_STRING_TEXT [^\'\\%\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32} LITERAL_TEMPLATE_START \` LITERAL_TEMPLATE_END \` LITERAL_TEMPLATE_SUBST_START \$\{ -LITERAL_TEMPLATE_OTHER [^\\\`(\$\{)("<"+(?i:\/script>))]{1,32} +LITERAL_TEMPLATE_OTHER [^\\%\`(\$\{)("<"+(?i:\/script>))]{1,32} LITERAL_REGEX_START \/[^*\/] LITERAL_REGEX_END \/[gimsuy]* LITERAL_REGEX_SKIP \\\/ @@ -1014,63 +1039,112 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 /* in a regular expression */ %x regex +/* in a single-quoted string within unescape function */ +%x unesc_sqstr + +/* in a double-quoted string within unescape function */ +%x unesc_dqstr + +/* in a template literal within unescape function */ +%x unesc_tmpll + +/* to process code units within char code unescape function */ +%x char_code +%x char_code_lcomm +%x char_code_bcomm + %% -{WHITESPACES} { } -{CHAR_ESCAPE_SEQUENCES} { } -{LINE_TERMINATORS} { BEGIN(regst); newline_found = true; } +{WHITESPACES} { /* skip */ } +{CHAR_ESCAPE_SEQUENCES} { /* skip */ } + +{LINE_TERMINATORS} { BEGIN(regst); newline_found = true; } +{LINE_TERMINATORS} { newline_found = true; } -{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) } +{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) } {HTML_TAG_SCRIPT_CLOSE} { EXEC(html_closing_script_tag()) } - {HTML_COMMENT_OPEN} { BEGIN(lcomm); } - {LINE_COMMENT_START} { BEGIN(lcomm); } -{LINE_COMMENT_END1} { BEGIN(regst); newline_found = true; } -{LINE_COMMENT_END2} { BEGIN(regst); newline_found = true; } -{LINE_COMMENT_END3} { BEGIN(regst); RETURN(OPENING_TAG) } -{LINE_COMMENT_END4} { BEGIN(regst); RETURN(CLOSING_TAG) } -{LINE_COMMENT_SKIP} { } -<> { RETURN(SCRIPT_CONTINUE) } - - {BLOCK_COMMENT_START} { BEGIN(bcomm); } -{BLOCK_COMMENT_END1} { BEGIN(regst); } -{BLOCK_COMMENT_END2} { BEGIN(regst); RETURN(OPENING_TAG) } -{BLOCK_COMMENT_END3} { BEGIN(regst); RETURN(CLOSING_TAG) } -{BLOCK_COMMENT_LINE1} | -{BLOCK_COMMENT_LINE2} { newline_found = true;} -{BLOCK_COMMENT_SKIP} { } -<> { RETURN(SCRIPT_CONTINUE) } - - {LITERAL_DQ_STRING_START} { EXEC(literal_dq_string_start()) } -{LITERAL_DQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); } -{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } -\\{CR}{LF} { } -\\{LF} { } -\\{CR} { } -{LINE_TERMINATORS} { BEGIN(regst); RETURN(BAD_TOKEN) } -{LITERAL_DQ_STRING_SKIP} { dealias_append(); ECHO; } -{LITERAL_DQ_STRING_TEXT} { dealias_append(); ECHO; } -<> { RETURN(SCRIPT_CONTINUE) } - - {LITERAL_SQ_STRING_START} { EXEC(literal_sq_string_start()) } -{LITERAL_SQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); } -{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } -\\{CR}{LF} { } -\\{LF} { } -\\{CR} { } -{LINE_TERMINATORS} { BEGIN(regst); RETURN(BAD_TOKEN) } -{LITERAL_SQ_STRING_SKIP} { dealias_append(); ECHO; } -{LITERAL_SQ_STRING_TEXT} { dealias_append(); ECHO; } -<> { RETURN(SCRIPT_CONTINUE) } - - {LITERAL_TEMPLATE_START} { EXEC(literal_template_start()) } -(\\\\)*{LITERAL_TEMPLATE_END} { dealias_append(); ECHO; BEGIN(divop); } -(\\\\)*{LITERAL_TEMPLATE_SUBST_START} { EXEC(process_subst_open()) dealias_reset(); } -{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } -(\\\\)*\\{LITERAL_TEMPLATE_SUBST_START} | /* escaped template substitution */ -(\\\\)*\\{LITERAL_TEMPLATE_END} | /* escaped backtick */ -{LITERAL_TEMPLATE_OTHER} { dealias_append(); ECHO; } -<> { RETURN(SCRIPT_CONTINUE) } + {HTML_COMMENT_OPEN} { BEGIN(lcomm); } + {LINE_COMMENT_START} { BEGIN(lcomm); } +{HTML_COMMENT_OPEN} { BEGIN(char_code_lcomm); } +{LINE_COMMENT_START} { BEGIN(char_code_lcomm); } +{LINE_COMMENT_END1} { BEGIN(regst); newline_found = true; } +{LINE_COMMENT_END2} { BEGIN(regst); newline_found = true; } +{LINE_COMMENT_END1} { BEGIN(char_code); newline_found = true; } +{LINE_COMMENT_END2} { BEGIN(char_code); newline_found = true; } +{LINE_COMMENT_END3} { BEGIN(regst); RETURN(OPENING_TAG) } +{LINE_COMMENT_END4} { BEGIN(regst); RETURN(CLOSING_TAG) } +{LINE_COMMENT_SKIP} { /* skip */ } +<> { RETURN(SCRIPT_CONTINUE) } + + {BLOCK_COMMENT_START} { BEGIN(bcomm); } +{BLOCK_COMMENT_START} { BEGIN(char_code_bcomm); } +{BLOCK_COMMENT_END1} { BEGIN(regst); } +{BLOCK_COMMENT_END1} { BEGIN(char_code); } +{BLOCK_COMMENT_END2} { BEGIN(regst); RETURN(OPENING_TAG) } +{BLOCK_COMMENT_END3} { BEGIN(regst); RETURN(CLOSING_TAG) } +{BLOCK_COMMENT_LINE1} | +{BLOCK_COMMENT_LINE2} { newline_found = true; } +{BLOCK_COMMENT_SKIP} { /* skip */ } +<> { RETURN(SCRIPT_CONTINUE) } + + {LITERAL_DQ_STRING_START} { EXEC(literal_dq_string_start()) } +{LITERAL_DQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); } +{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } +\\{CR}{LF} { /* skip */ } +\\{LF} { /* skip */ } +\\{CR} { /* skip */ } +{LINE_TERMINATORS} { BEGIN(regst); RETURN(BAD_TOKEN) } +{LITERAL_DQ_STRING_SKIP} { dealias_append(); ECHO; } +{LITERAL_DQ_STRING_TEXT} { dealias_append(); ECHO; } +<> { RETURN(SCRIPT_CONTINUE) } +{UNICODE_ESCAPE_SEQUENCE} | +{HEX_ESCAPE_SEQUENCE} { escaped_unicode(); } +{ESCAPED_CODE_POINT} { escaped_code_point(); } +{UNICODE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); } +{HEX_ESCAPE_SEQUENCE} { set_encoding(IS_XBACKSLASH); escaped_unicode(); } +{ESCAPED_CODE_POINT} { set_encoding(IS_UCODEPOINT); escaped_code_point(); } +{BYTE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); } +{PERCENT_ESCAPE_SEQUENCE} { set_encoding(IS_UPERCENT); escaped_unicode(); } +{URL_ESCAPE_SEQUENCE} { set_encoding(IS_PERCENT); escaped_url_sequence(); } + + {LITERAL_SQ_STRING_START} { EXEC(literal_sq_string_start()) } +{LITERAL_SQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); } +{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } +\\{CR}{LF} { /* skip */ } +\\{LF} { /* skip */ } +\\{CR} { /* skip */ } +{LINE_TERMINATORS} { BEGIN(regst); RETURN(BAD_TOKEN) } +{LITERAL_SQ_STRING_SKIP} { dealias_append(); ECHO; } +{LITERAL_SQ_STRING_TEXT} { dealias_append(); ECHO; } +<> { RETURN(SCRIPT_CONTINUE) } +{UNICODE_ESCAPE_SEQUENCE} | +{HEX_ESCAPE_SEQUENCE} { escaped_unicode(); } +{ESCAPED_CODE_POINT} { escaped_code_point(); } +{UNICODE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); } +{HEX_ESCAPE_SEQUENCE} { set_encoding(IS_XBACKSLASH); escaped_unicode(); } +{ESCAPED_CODE_POINT} { set_encoding(IS_UCODEPOINT); escaped_code_point(); } +{BYTE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); } +{PERCENT_ESCAPE_SEQUENCE} { set_encoding(IS_UPERCENT); escaped_unicode(); } +{URL_ESCAPE_SEQUENCE} { set_encoding(IS_PERCENT); escaped_url_sequence(); } + + {LITERAL_TEMPLATE_START} { EXEC(literal_template_start()) } +(\\\\)*{LITERAL_TEMPLATE_END} { dealias_append(); ECHO; BEGIN(divop); } +(\\\\)*{LITERAL_TEMPLATE_SUBST_START} { EXEC(process_subst_open()) dealias_reset(); } +{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } +(\\\\)*\\{LITERAL_TEMPLATE_SUBST_START} | /* escaped template substitution */ +(\\\\)*\\{LITERAL_TEMPLATE_END} | /* escaped backtick */ +{LITERAL_TEMPLATE_OTHER} { dealias_append(); ECHO; } +<> { RETURN(SCRIPT_CONTINUE) } +{UNICODE_ESCAPE_SEQUENCE} | +{HEX_ESCAPE_SEQUENCE} { escaped_unicode(); } +{ESCAPED_CODE_POINT} { escaped_code_point(); } +{UNICODE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); } +{HEX_ESCAPE_SEQUENCE} { set_encoding(IS_XBACKSLASH); escaped_unicode(); } +{ESCAPED_CODE_POINT} { set_encoding(IS_UCODEPOINT); escaped_code_point(); } +{BYTE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); } +{PERCENT_ESCAPE_SEQUENCE} { set_encoding(IS_UPERCENT); escaped_unicode(); } +{URL_ESCAPE_SEQUENCE} { set_encoding(IS_PERCENT); escaped_url_sequence(); } {LITERAL_REGEX_START} { EXEC(literal_regex_start()) } {LITERAL_REGEX_END} { ECHO; BEGIN(divop); } @@ -1080,17 +1154,19 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 \\{CR} | {LINE_TERMINATORS} { BEGIN(regst); RETURN(BAD_TOKEN) } [^<{LF}{CR}{LS}{PS}\\\/]+ { ECHO; } +{UNICODE_ESCAPE_SEQUENCE} | +{HEX_ESCAPE_SEQUENCE} { escaped_unicode(); } <> { RETURN(SCRIPT_CONTINUE) } {DIV_OPERATOR} | {DIV_ASSIGNMENT_OPERATOR} { div_assignment_operator(); } -{OPEN_BRACE} { EXEC(open_brace()) } -{CLOSE_BRACE} { EXEC(close_brace()) } -{OPEN_PARENTHESIS} { EXEC(open_parenthesis()) } -{CLOSE_PARENTHESIS} { EXEC(close_parenthesis()) } -{OPEN_BRACKET} { EXEC(open_bracket()) } -{CLOSE_BRACKET} { EXEC(close_bracket()) } +{OPEN_BRACE} { EXEC(open_brace()) } +{CLOSE_BRACE} { EXEC(close_brace()) } +{OPEN_PARENTHESIS} { EXEC(open_parenthesis()) } +{CLOSE_PARENTHESIS} { EXEC(close_parenthesis()) } +{OPEN_BRACKET} { EXEC(open_bracket()) } +{CLOSE_BRACKET} { EXEC(close_bracket()) } {PUNCTUATOR_PREFIX} { EXEC(punctuator_prefix()) } {DOT_ACCESSOR} { dot_accessor(); } @@ -1101,7 +1177,9 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {OPERATOR_COMPLEX_ASSIGNMENT} { operator_complex_assignment(); } {OPERATOR_LOGICAL} { operator_logical(); } {OPERATOR_SHIFT} { operator_shift(); } + {PUNCTUATOR_COMMA} { punctuator_comma(); } +{PUNCTUATOR_COMMA} { /* skip */ } {USE_STRICT_DIRECTIVE} { EXEC(use_strict_directive()) } {USE_STRICT_DIRECTIVE_SC} { EXEC(use_strict_directive_sc()) } @@ -1130,8 +1208,13 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL} { EXEC(general_literal()) } {IDENTIFIER} { EXEC(general_identifier()) } +{LITERAL_INTEGER} { set_encoding(IS_DEC); dec_code_point(); } +{LITERAL_HEX_INTEGER} { set_encoding(IS_HEX); hex_code_point(); } + .|{ALL_UNICODE} { general_unicode(); } -<> { EEOF(eval_eof()) } + +.|{ALL_UNICODE} { char_code_no_match(); } +<> { EEOF(eval_eof()) } %% @@ -1154,6 +1237,8 @@ static std::string unicode_to_utf8(const unsigned int code) res += 0x80 | ((code >> 6) & 0x3f); res += 0x80 | (code & 0x3f); } + else + res += "\uffff"; return res; } @@ -1167,6 +1252,7 @@ static std::string unescape_unicode(const char* lexeme) bool is_unescape = false; bool is_unicode = false; + bool is_code_point = false; short digits_left = 4; std::string unicode_str; @@ -1188,6 +1274,13 @@ static std::string unescape_unicode(const char* lexeme) is_unescape = false; } + if (is_unicode and ch == '{') + { + is_unicode = false; + is_code_point = true; + continue; + } + if (is_unicode) { unicode_str += ch; @@ -1203,6 +1296,22 @@ static std::string unescape_unicode(const char* lexeme) continue; } + if (is_code_point) + { + if (ch == '}') + { + const unsigned int code_point = std::stoi(unicode_str, nullptr, 16); + res += unicode_to_utf8(code_point); + + unicode_str = ""; + is_code_point = false; + } + else + unicode_str += ch; + + continue; + } + res += ch; } @@ -1471,6 +1580,12 @@ void JSTokenizer::process_punctuator(JSToken tok) BEGIN(regst); } +void JSTokenizer::skip_punctuator() +{ + token = PUNCTUATOR; + BEGIN(regst); +} + void JSTokenizer::process_closing_brace() { if (!brace_depth.empty()) @@ -1782,6 +1897,10 @@ JSTokenizer::FuncType JSTokenizer::detect_func_type() case IDENTIFIER: { FuncType ret = FuncType::GENERAL; + + if (meta_type() == ScopeMetaType::FUNCTION) + return ret; + if (ignored_id_pos >= 0) { std::streambuf* pbuf = yyout.rdbuf(); @@ -1801,6 +1920,8 @@ JSTokenizer::FuncType JSTokenizer::detect_func_type() memcmp(tail, id.identifier.data(), size) == 0) { ret = id.type; + pbuf->pubseekoff(-size, yyout.cur, yyout.out); + break; } } @@ -1826,11 +1947,21 @@ void JSTokenizer::check_function_nesting(JSTokenizer::FuncType type) } } +void JSTokenizer::check_mixed_encoding(uint32_t flags) +{ + mixed_encoding_seen = (flags != (flags & -flags)); +} + bool JSTokenizer::is_unescape_nesting_seen() const { return unescape_nest_seen; } +bool JSTokenizer::is_mixed_encoding_seen() const +{ + return mixed_encoding_seen; +} + void JSTokenizer::set_block_param(bool f) { scope_cur().block_param = f; @@ -2031,6 +2162,22 @@ JSTokenizer::JSRet JSTokenizer::literal_dq_string_start() ECHO; BEGIN(dqstr); set_ident_norm(true); + + switch (func_call_type()) + { + case FuncType::UNESCAPE: + BEGIN(unesc_dqstr); + break; + case FuncType::NOT_FUNC: + case FuncType::GENERAL: + case FuncType::CHAR_CODE: + BEGIN(dqstr); + break; + default: + assert(false); + return BAD_TOKEN; + } + return EOS; } @@ -2040,8 +2187,23 @@ JSTokenizer::JSRet JSTokenizer::literal_sq_string_start() EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) ECHO; - BEGIN(sqstr); set_ident_norm(true); + + switch (func_call_type()) + { + case FuncType::UNESCAPE: + BEGIN(unesc_sqstr); + break; + case FuncType::NOT_FUNC: + case FuncType::GENERAL: + case FuncType::CHAR_CODE: + BEGIN(sqstr); + break; + default: + assert(false); + return BAD_TOKEN; + } + return EOS; } @@ -2051,8 +2213,23 @@ JSTokenizer::JSRet JSTokenizer::literal_template_start() EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) ECHO; - BEGIN(tmpll); set_ident_norm(true); + + switch (func_call_type()) + { + case FuncType::UNESCAPE: + BEGIN(unesc_tmpll); + break; + case FuncType::NOT_FUNC: + case FuncType::GENERAL: + case FuncType::CHAR_CODE: + BEGIN(tmpll); + break; + default: + assert(false); + return BAD_TOKEN; + } + return EOS; } @@ -2121,7 +2298,27 @@ JSTokenizer::JSRet JSTokenizer::open_parenthesis() check_function_nesting(f_call); EXEC(scope_push(PARENTHESES)) set_func_call_type(f_call); - process_punctuator(); + + switch (f_call) + { + case FuncType::CHAR_CODE: + token = LITERAL; + BEGIN(char_code); + set_char_code_str(true); + yyout << '\''; + break; + case FuncType::UNESCAPE: + skip_punctuator(); + break; + case FuncType::NOT_FUNC: + case FuncType::GENERAL: + process_punctuator(); + break; + default: + assert(false); + return BAD_TOKEN; + } + return EOS; } @@ -2129,11 +2326,16 @@ JSTokenizer::JSRet JSTokenizer::close_parenthesis() { dealias_clear_mutated(false); dealias_reset(); + FuncType f_call = func_call_type(); + uint32_t flags = encoding(); + bool ch_code_str = char_code_str(); bool id_norm = ident_norm(); + if (meta_type() != ScopeMetaType::NOT_SET) EXEC(p_scope_pop(meta_type())) EXEC(scope_pop(PARENTHESES)) + if (f_call == FuncType::NOT_FUNC) set_ident_norm(id_norm); if (block_param()) @@ -2145,7 +2347,26 @@ JSTokenizer::JSRet JSTokenizer::close_parenthesis() { EXEC(do_semicolon_insertion(ASI_GROUP_5)) } - ECHO; + + switch (f_call) + { + case FuncType::NOT_FUNC: + case FuncType::GENERAL: + ECHO; + break; + case FuncType::UNESCAPE: + check_mixed_encoding(flags); + break; + case FuncType::CHAR_CODE: + check_mixed_encoding(flags); + if (ch_code_str) + yyout << '\''; + break; + default: + assert(false); + return BAD_TOKEN; + } + token = PUNCTUATOR; BEGIN(divop); return EOS; @@ -2522,10 +2743,54 @@ void JSTokenizer::general_unicode() set_ident_norm(true); } +void JSTokenizer::escaped_unicode() +{ + // truncate escape symbol, get hex number only + std::string code(YYText() + 2); + yyout << unicode_to_utf8(std::stoi(code, nullptr, 16)); +} + +void JSTokenizer::escaped_code_point() +{ + // truncate escape symbols, get hex number only + std::string code(YYText() + 3); + code.resize(code.size() - 1); + yyout << unicode_to_utf8(std::stoi(code, nullptr, 16)); +} + +void JSTokenizer::escaped_url_sequence() +{ + // truncate escape symbol, get hex number only + std::string code(YYText() + 1); + yyout << unicode_to_utf8(std::stoi(code, nullptr, 16)); +} + +void JSTokenizer::dec_code_point() +{ + std::string code(YYText()); + yyout << unicode_to_utf8(std::stoi(code, nullptr, 10)); +} + +void JSTokenizer::hex_code_point() +{ + std::string code(YYText()); + yyout << unicode_to_utf8(std::stoi(code, nullptr, 16)); +} + +void JSTokenizer::char_code_no_match() +{ + BEGIN(regst); + yyout << '\''; + set_char_code_str(false); + yyless(0); + memset((void*)(states + sp), 0, sizeof(states[0])); +} + JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in) { yy_flush_buffer(YY_CURRENT_BUFFER); unescape_nest_seen = false; + mixed_encoding_seen = false; auto r = yylex(); diff --git a/src/utils/test/CMakeLists.txt b/src/utils/test/CMakeLists.txt index 18e4c7f19..c382eadc6 100644 --- a/src/utils/test/CMakeLists.txt +++ b/src/utils/test/CMakeLists.txt @@ -30,6 +30,16 @@ add_catch_test( js_dealias_test js_test_utils.cc ) +add_catch_test( js_unescape_test + SOURCES + ${FLEX_js_tokenizer_OUTPUTS} + ../js_identifier_ctx.cc + ../js_normalizer.cc + ../streambuf.cc + ../util_cstring.cc + js_test_utils.cc +) + add_catch_test( js_identifier_ctx_test SOURCES ../js_identifier_ctx.cc diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc index f3887aaed..0c30c01e7 100644 --- a/src/utils/test/js_normalizer_test.cc +++ b/src/utils/test/js_normalizer_test.cc @@ -479,7 +479,7 @@ static const char clamav_buf12[] = "var x='test\\u0000test';"; static const char clamav_expected12[] = - "var x='test\\u0000test';"; + "var x='test\u0000test';"; static const char clamav_buf13[] = "var x\\s12345"; @@ -491,7 +491,7 @@ static const char clamav_buf14[] = "document.write(unescape('test%20test"; static const char clamav_expected14[] = - "document.write(unescape('test%20test"; + "document.write('test test"; TEST_CASE("clamav tests", "[JSNormalizer]") { @@ -2314,8 +2314,8 @@ TEST_CASE("split in string literal", "[JSNormalizer]") const char dat1[] = "var str =\"any\\"; const char dat2[] = "u1234tx\";"; const char exp1[] = "var str=\"any\\"; - const char exp2[] = "u1234tx\";"; - const char exp[] = "var str=\"any\\u1234tx\";"; + const char exp2[] = "\u1234tx\";"; + const char exp[] = "var str=\"any\u1234tx\";"; NORMALIZE_2(dat1, dat2, exp1, exp2); NORM_COMBINED_2(dat1, dat2, exp); @@ -2325,8 +2325,8 @@ TEST_CASE("split in string literal", "[JSNormalizer]") const char dat1[] = "var str =\"any\\u"; const char dat2[] = "1234tx\";"; const char exp1[] = "var str=\"any\\u"; - const char exp2[] = "1234tx\";"; - const char exp[] = "var str=\"any\\u1234tx\";"; + const char exp2[] = "\u1234tx\";"; + const char exp[] = "var str=\"any\u1234tx\";"; NORMALIZE_2(dat1, dat2, exp1, exp2); NORM_COMBINED_2(dat1, dat2, exp); @@ -4245,32 +4245,33 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]") SECTION("in arguments") { tester.test_function_scopes({ - {"unescape(", "unescape(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}} + {"unescape(", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}} }); } SECTION("separated identifier and call") { tester.test_function_scopes({ - {"unescape /*comment*/ (", "unescape(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}} + {"unescape /*comment*/ (", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}} }); } SECTION("complete call") { tester.test_function_scopes({ - {"unescape('%62%61%72')", "unescape('%62%61%72')", {FuncType::NOT_FUNC}} + {"unescape('%62%61%72')", "'bar'", {FuncType::NOT_FUNC}} }); } SECTION("as named function definition") { tester.test_function_scopes({ - {"unescape(){", "unescape(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}} + {"function unescape(){", "function unescape(){", + {FuncType::NOT_FUNC, FuncType::NOT_FUNC}} }); } SECTION("after assignment substitution") { tester.test_function_scopes({ - {"var a = unescape; a(", "var var_0000=unescape;unescape(", {FuncType::NOT_FUNC, - FuncType::UNESCAPE}} + {"var a = unescape; a(", "var var_0000=unescape;", + {FuncType::NOT_FUNC, FuncType::UNESCAPE}} }); } SECTION("literal") @@ -4282,7 +4283,7 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]") SECTION("as a template literal substitution") { tester.test_function_scopes({ - {"`literal ${unescape(", "`literal ${unescape(", + {"`literal ${unescape(", "`literal ${", {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}} }); } @@ -4292,33 +4293,34 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]") SECTION("in arguments") { tester.test_function_scopes({ - {"decodeURI(", "decodeURI(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}} + {"decodeURI(", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}} }); } SECTION("separated identifier and call") { tester.test_function_scopes({ - {"decodeURI /*comment*/ (", "decodeURI(", {FuncType::NOT_FUNC, - FuncType::UNESCAPE}} + {"decodeURI /*comment*/ (", "", + {FuncType::NOT_FUNC, FuncType::UNESCAPE}} }); } SECTION("complete call") { tester.test_function_scopes({ - {"decodeURI('%62%61%72')", "decodeURI('%62%61%72')", {FuncType::NOT_FUNC}} + {"decodeURI('%62%61%72')", "'bar'", {FuncType::NOT_FUNC}} }); } SECTION("as named function definition") { tester.test_function_scopes({ - {"decodeURI(){", "decodeURI(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}} + {"function decodeURI(){", "function decodeURI(){", + {FuncType::NOT_FUNC, FuncType::NOT_FUNC}} }); } SECTION("after assignment substitution") { tester.test_function_scopes({ - {"var a = decodeURI; a(", "var var_0000=decodeURI;decodeURI(", {FuncType::NOT_FUNC, - FuncType::UNESCAPE}} + {"var a = decodeURI; a(", "var var_0000=decodeURI;", + {FuncType::NOT_FUNC, FuncType::UNESCAPE}} }); } SECTION("literal") @@ -4330,7 +4332,7 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]") SECTION("as a template literal substitution") { tester.test_function_scopes({ - {"`literal ${decodeURI(", "`literal ${decodeURI(", + {"`literal ${decodeURI(", "`literal ${", {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}} }); } @@ -4340,37 +4342,36 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]") SECTION("in arguments") { tester.test_function_scopes({ - {"decodeURIComponent(", "decodeURIComponent(", {FuncType::NOT_FUNC, - FuncType::UNESCAPE}} + {"decodeURIComponent(", "", + {FuncType::NOT_FUNC, FuncType::UNESCAPE}} }); } SECTION("separated identifier and call") { tester.test_function_scopes({ - {"decodeURIComponent /*comment*/ (", "decodeURIComponent(", {FuncType::NOT_FUNC, - FuncType::UNESCAPE}} + {"decodeURIComponent /*comment*/ (", "", + {FuncType::NOT_FUNC, FuncType::UNESCAPE}} }); } SECTION("complete call") { tester.test_function_scopes({ - {"decodeURIComponent('%62%61%72')", "decodeURIComponent('%62%61%72')", + {"decodeURIComponent('%62%61%72')", "'bar'", {FuncType::NOT_FUNC}} }); } SECTION("as named function definition") { tester.test_function_scopes({ - {"decodeURIComponent(){", "decodeURIComponent(){", {FuncType::NOT_FUNC, - FuncType::NOT_FUNC}} + {"function decodeURIComponent(){", "function decodeURIComponent(){", + {FuncType::NOT_FUNC, FuncType::NOT_FUNC}} }); } SECTION("after assignment substitution") { tester.test_function_scopes({ - {"var a = decodeURIComponent; a(", - "var var_0000=decodeURIComponent;decodeURIComponent(", {FuncType::NOT_FUNC, - FuncType::UNESCAPE}} + {"var a = decodeURIComponent; a(", "var var_0000=decodeURIComponent;", + {FuncType::NOT_FUNC, FuncType::UNESCAPE}} }); } SECTION("literal") @@ -4382,8 +4383,8 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]") SECTION("as a template literal substitution") { tester.test_function_scopes({ - {"`literal ${decodeURIComponent(", "`literal ${decodeURIComponent(", - {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}} + {"`literal ${decodeURIComponent(", "`literal ${", + {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}} }); } } @@ -4392,35 +4393,35 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]") SECTION("in arguments") { tester.test_function_scopes({ - {"String.fromCharCode(", "String.fromCharCode(", + {"String.fromCharCode(", "'", {FuncType::NOT_FUNC, FuncType::CHAR_CODE}} }); } SECTION("separated identifier and call") { tester.test_function_scopes({ - {"String.fromCharCode /*comment*/ (", "String.fromCharCode(", + {"String.fromCharCode /*comment*/ (", "'", {FuncType::NOT_FUNC, FuncType::CHAR_CODE}} }); } SECTION("complete call") { tester.test_function_scopes({ - {"String.fromCharCode( 65, 0x42 )", "String.fromCharCode(65,0x42)", + {"String.fromCharCode( 65, 0x42 )", "'AB'", {FuncType::NOT_FUNC}} }); } SECTION("as named function definition") { tester.test_function_scopes({ - {"String.fromCharCode(){", "String.fromCharCode(){", + {"function String.fromCharCode(){", "function String.fromCharCode(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}} }); } SECTION("after class name assignment substitution") { tester.test_function_scopes({ - {"var a = String; a.fromCharCode(", "var var_0000=String;String.fromCharCode(", + {"var a = String; a.fromCharCode(", "var var_0000=String;'", {FuncType::NOT_FUNC, FuncType::CHAR_CODE}} }); } @@ -4428,7 +4429,7 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]") { tester.test_function_scopes({ {"var a = String.fromCharCode; a(", - "var var_0000=String.fromCharCode;String.fromCharCode(", + "var var_0000=String.fromCharCode;'", {FuncType::NOT_FUNC, FuncType::CHAR_CODE}} }); } @@ -4449,7 +4450,7 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]") SECTION("as a template literal substitution") { tester.test_function_scopes({ - {"`literal ${String.fromCharCode(", "`literal ${String.fromCharCode(", + {"`literal ${String.fromCharCode(", "`literal ${'", {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::CHAR_CODE}} }); } @@ -4476,7 +4477,7 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]") SECTION("Multiple unescape functions") { tester.test_function_scopes({ - {"unescape( unescape( unescape(", "unescape(unescape(unescape(", + {"unescape( unescape( unescape(", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE, FuncType::UNESCAPE, FuncType::UNESCAPE}} }); CHECK(tester.is_unescape_nesting_seen()); @@ -4484,31 +4485,24 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]") SECTION("Multiple different unescape functions") { tester.test_function_scopes({ - {"unescape( decodeURI( decodeURIComponent(", - "unescape(decodeURI(decodeURIComponent(", {FuncType::NOT_FUNC, - FuncType::UNESCAPE, - FuncType::UNESCAPE, - FuncType::UNESCAPE}} + {"unescape( decodeURI( decodeURIComponent(", "", + {FuncType::NOT_FUNC, FuncType::UNESCAPE, FuncType::UNESCAPE, FuncType::UNESCAPE}} }); CHECK(tester.is_unescape_nesting_seen()); } SECTION("Multiple String.fromCharCode functions") { tester.test_function_scopes({ - {"String.fromCharCode( String.fromCharCode( String.fromCharCode(", - "String.fromCharCode(String.fromCharCode(String.fromCharCode(", - {FuncType::NOT_FUNC, FuncType::CHAR_CODE, FuncType::CHAR_CODE, - FuncType::CHAR_CODE}} + {"String.fromCharCode( String.fromCharCode( String.fromCharCode(", "'' '' '", + {FuncType::NOT_FUNC, FuncType::CHAR_CODE, FuncType::CHAR_CODE, FuncType::CHAR_CODE}} }); CHECK(!tester.is_unescape_nesting_seen()); } SECTION("Mixed function calls") { tester.test_function_scopes({ - {"general( unescape( String.fromCharCode(", - "var_0000(unescape(String.fromCharCode(", - {FuncType::NOT_FUNC, FuncType::GENERAL, FuncType::UNESCAPE, - FuncType::CHAR_CODE}} + {"general( unescape( String.fromCharCode(", "var_0000('", + {FuncType::NOT_FUNC, FuncType::GENERAL, FuncType::UNESCAPE, FuncType::CHAR_CODE}} }); CHECK(!tester.is_unescape_nesting_seen()); } @@ -4525,16 +4519,14 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]") SECTION("Multiple unescape functions") { tester.test_function_scopes({ - {"unescape( unescape( unescape( '%62%61%72' ) )", - "unescape(unescape(unescape('%62%61%72'))", {FuncType::NOT_FUNC, - FuncType::UNESCAPE }} + {"unescape( unescape( unescape( '%62%61%72' ) )", "'bar'", + {FuncType::NOT_FUNC, FuncType::UNESCAPE }} }); } SECTION("Multiple different unescape functions") { tester.test_function_scopes({ - {"unescape( decodeURI( decodeURIComponent( '%62%61%72' ) )", - "unescape(decodeURI(decodeURIComponent('%62%61%72'))", + {"unescape( decodeURI( decodeURIComponent( '%62%61%72' ) )", "'bar'", {FuncType::NOT_FUNC, FuncType::UNESCAPE }} }); } @@ -4542,7 +4534,7 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]") { tester.test_function_scopes({ {"String.fromCharCode( String.fromCharCode( String.fromCharCode( 65, 0x42 ) )", - "String.fromCharCode(String.fromCharCode(String.fromCharCode(65,0x42))", + "'' '' 'AB'", {FuncType::NOT_FUNC, FuncType::CHAR_CODE}} }); } @@ -4550,8 +4542,8 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]") { tester.test_function_scopes({ {"general( unescape( String.fromCharCode( 65, 0x42 ) )", - "var_0000(unescape(String.fromCharCode(65,0x42))", {FuncType::NOT_FUNC, - FuncType::GENERAL}} + "var_0000('AB'", + {FuncType::NOT_FUNC, FuncType::GENERAL}} }); } } @@ -4569,18 +4561,18 @@ TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]") tester.test_function_scopes({ {"un", "var_0000", {FuncType::NOT_FUNC}}, {"escape", "unescape", {FuncType::NOT_FUNC}}, - {"(", "unescape(", {FuncType::NOT_FUNC, + {"(", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}, - {")", "unescape()", {FuncType::NOT_FUNC}}, + {")", "", {FuncType::NOT_FUNC}}, }); } SECTION("split between identifier and parenthesis") { tester.test_function_scopes({ {"decodeURI", "decodeURI", {FuncType::NOT_FUNC}}, - {"(", "decodeURI(", {FuncType::NOT_FUNC, + {"(", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}, - {")", "decodeURI()", {FuncType::NOT_FUNC}}, + {")", "", {FuncType::NOT_FUNC}}, }); } SECTION("comment between identifier and parenthesis") @@ -4588,9 +4580,9 @@ TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]") tester.test_function_scopes({ {"unescape", "unescape", {FuncType::NOT_FUNC}}, {"//String.fromCharCode\n", "unescape", {FuncType::NOT_FUNC}}, - {"(", "unescape(", {FuncType::NOT_FUNC, + {"(", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}, - {")", "unescape()", {FuncType::NOT_FUNC}}, + {")", "", {FuncType::NOT_FUNC}}, }); } SECTION("split in arguments") @@ -4611,13 +4603,13 @@ TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]") tester.test_function_scopes({ {"String", "String", {FuncType::NOT_FUNC}}, {".fromCharCode", "String.fromCharCode", {FuncType::NOT_FUNC}}, - {"(`", "String.fromCharCode(`", {FuncType::NOT_FUNC, + {"(`", "'' `", {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}, - {"un", "String.fromCharCode(`un", {FuncType::NOT_FUNC, + {"un", "'' `un", {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}, - {"escape(", "String.fromCharCode(`unescape(", {FuncType::NOT_FUNC, + {"escape(", "'' `unescape(", {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}, - {"`)", "String.fromCharCode(`unescape(`)", {FuncType::NOT_FUNC}}, + {"`)", "'' `unescape(`", {FuncType::NOT_FUNC}}, }); } SECTION("Nesting - Mixed function calls") @@ -4626,27 +4618,26 @@ TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]") {"decode", "var_0000", {FuncType::NOT_FUNC}}, {"URI", "decodeURI", {FuncType::NOT_FUNC}}, {"Component", "decodeURIComponent", {FuncType::NOT_FUNC}}, - {"(", "decodeURIComponent(", {FuncType::NOT_FUNC, + {"(", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}, - {" a, ", "decodeURIComponent(var_0001,", + {" a, ", "var_0001,", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}, - {" String.fromCharCode( ar", - "decodeURIComponent(var_0001,String.fromCharCode(var_0002", + {" String.fromCharCode( ar", "var_0001,'' var_0002", {FuncType::NOT_FUNC, FuncType::UNESCAPE, FuncType::CHAR_CODE}}, - {"g ), b, foo", - "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0005", + + {"g ), b, foo", "var_0001,'' var_0003,var_0004,var_0005", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}, - {"bar( ", - "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0006(", + + {"bar( ", "var_0001,'' var_0003,var_0004,var_0006(", {FuncType::NOT_FUNC, FuncType::UNESCAPE, FuncType::GENERAL}}, - {"))", - "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0006())", + + {"))", "var_0001,'' var_0003,var_0004,var_0006()", {FuncType::NOT_FUNC}} }); } @@ -4879,4 +4870,31 @@ TEST_CASE("JS Normalizer, automatic semicolon", "[JSNormalizer]") return normalizer_wo_ident.normalize(src_wo_semicolons, src_len); }; } + +TEST_CASE("JS Normalizer, unescape", "[JSNormalizer]") +{ + auto str_unescape = make_input("'", "\\u0061", "'", norm_depth); + auto f_unescape = make_input_repeat("unescape('')", norm_depth); + const char* src_str_unescape = str_unescape.c_str(); + const char* src_f_unescape = f_unescape.c_str(); + size_t src_len = norm_depth; + + JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids); + JSNormalizer norm(ident_ctx, unlim_depth, max_template_nesting, norm_depth); + + REQUIRE(norm_ret(norm, str_unescape) == JSTokenizer::SCRIPT_ENDED); + BENCHMARK("unescape sequence") + { + norm.rewind_output(); + return norm.normalize(src_str_unescape, src_len); + }; + + REQUIRE(norm_ret(norm, f_unescape) == JSTokenizer::SCRIPT_ENDED); + BENCHMARK("unescape function tracking") + { + norm.rewind_output(); + return norm.normalize(src_f_unescape, src_len); + }; +} + #endif // BENCHMARK_TEST diff --git a/src/utils/test/js_test_utils.cc b/src/utils/test/js_test_utils.cc index 5083dbcba..cd871d43d 100644 --- a/src/utils/test/js_test_utils.cc +++ b/src/utils/test/js_test_utils.cc @@ -67,7 +67,7 @@ bool JSTokenizerTester::is_unescape_nesting_seen() const return normalizer.is_unescape_nesting_seen(); } -void test_scope(const char* context, std::list stack) +void test_scope(const char* context, const std::list& stack) { std::string buf(context); buf += ""; @@ -96,6 +96,17 @@ void test_normalization_bad(const char* source, const char* expected, JSTokenize CHECK(result_buf == expected); } +void test_normalization_mixed_encoding(const char* source, const char* expected) +{ + JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids); + JSNormalizer normalizer(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); + auto ret = normalizer.normalize(source, strlen(source)); + std::string result_buf(normalizer.get_script(), normalizer.script_size()); + CHECK(ret == JSTokenizer::JSRet::SCRIPT_CONTINUE); + CHECK(normalizer.is_mixed_encoding_seen()); + CHECK(result_buf == expected); +} + void test_normalization(const std::vector& pdus) { JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids); @@ -111,7 +122,7 @@ void test_normalization(const std::vector& pdus) } } -void test_normalization(std::list pdus) +void test_normalization(const std::list& pdus) { JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids); JSNormalizer normalizer(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); diff --git a/src/utils/test/js_test_utils.h b/src/utils/test/js_test_utils.h index 10f5b0a20..269fabbb6 100644 --- a/src/utils/test/js_test_utils.h +++ b/src/utils/test/js_test_utils.h @@ -83,14 +83,15 @@ private: snort::JSNormalizer normalizer; }; -void test_scope(const char* context, std::list stack); +void test_scope(const char* context, const std::list& stack); void test_normalization(const char* source, const char* expected); void test_normalization_bad(const char* source, const char* expected, JSTokenizer::JSRet eret); +void test_normalization_mixed_encoding(const char* source, const char* expected); typedef std::pair PduCase; // source, expected for a single PDU void test_normalization(const std::vector& pdus); typedef std::tuple> ScopedPduCase; // source, expected, and current scope type stack for a single PDU -void test_normalization(std::list pdus); +void test_normalization(const std::list& pdus); #endif // JS_TEST_UTILS_H diff --git a/src/utils/test/js_unescape_test.cc b/src/utils/test/js_unescape_test.cc new file mode 100644 index 000000000..6736935be --- /dev/null +++ b/src/utils/test/js_unescape_test.cc @@ -0,0 +1,1144 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// js_unescape_test.cc author Volodymyr Horban + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "catch/catch.hpp" + +#include "utils/js_identifier_ctx.h" +#include "utils/js_normalizer.h" + +#include "js_test_utils.h" + +#ifdef CATCH_TEST_BUILD + +TEST_CASE("Sequence parsing", "[JSNormalizer]") +{ + SECTION("\\xXX") + { + test_normalization( + "'\\x01'", + "'\u0001'" + ); + test_normalization( + "'\\x23'", + "'\u0023'" + ); + test_normalization( + "'\\x45'", + "'\u0045'" + ); + test_normalization( + "'\\x67'", + "'\u0067'" + ); + test_normalization( + "'\\x89'", + "'\u0089'" + ); + test_normalization( + "'\\xaA'", + "'\u00aA'" + ); + test_normalization( + "'\\xbB'", + "'\u00bB'" + ); + test_normalization( + "'\\xcC'", + "'\u00cC'" + ); + test_normalization( + "'\\xdD'", + "'\u00dD'" + ); + test_normalization( + "'\\xeE'", + "'\u00eE'" + ); + test_normalization( + "'\\xfF'", + "'\u00fF'" + ); + } + + SECTION("\\uXXXX") + { + test_normalization( + "'\\u0123'", + "'\u0123'" + ); + test_normalization( + "'\\u4567'", + "'\u4567'" + ); + test_normalization( + "'\\u89aA'", + "'\u89aA'" + ); + test_normalization( + "'\\ubBcC'", + "'\ubBcC'" + ); + test_normalization( + "'\\ueEfF'", + "'\ueEfF'" + ); + } + + SECTION("\\u{XXXX}") + { + test_normalization( + "'\\u{0123}'", + "'\u0123'" + ); + test_normalization( + "'\\u{4567}'", + "'\u4567'" + ); + test_normalization( + "'\\u{89aA}'", + "'\u89aA'" + ); + test_normalization( + "'\\u{bBcC}'", + "'\ubBcC'" + ); + test_normalization( + "'\\u{eEfF}'", + "'\ueEfF'" + ); + } + + SECTION("%XX") + { + test_normalization( + "unescape('%01')", + "'\u0001'" + ); + test_normalization( + "unescape('%23')", + "'\u0023'" + ); + test_normalization( + "unescape('%45')", + "'\u0045'" + ); + test_normalization( + "unescape('%67')", + "'\u0067'" + ); + test_normalization( + "unescape('%89')", + "'\u0089'" + ); + test_normalization( + "unescape('%aA')", + "'\u00aA'" + ); + test_normalization( + "unescape('%bB')", + "'\u00bB'" + ); + test_normalization( + "unescape('%cC')", + "'\u00cC'" + ); + test_normalization( + "unescape('%dD')", + "'\u00dD'" + ); + test_normalization( + "unescape('%eE')", + "'\u00eE'" + ); + test_normalization( + "unescape('%fF')", + "'\u00fF'" + ); + } + + SECTION("\\uXX") + { + test_normalization( + "unescape('\\u01')", + "'\u0001'" + ); + test_normalization( + "unescape('%23')", + "'\u0023'" + ); + test_normalization( + "unescape('\\u45')", + "'\u0045'" + ); + test_normalization( + "unescape('\\u67')", + "'\u0067'" + ); + test_normalization( + "unescape('\\u89')", + "'\u0089'" + ); + test_normalization( + "unescape('\\uaA')", + "'\u00aA'" + ); + test_normalization( + "unescape('\\ubB')", + "'\u00bB'" + ); + test_normalization( + "unescape('\\ucC')", + "'\u00cC'" + ); + test_normalization( + "unescape('\\udD')", + "'\u00dD'" + ); + test_normalization( + "unescape('\\ueE')", + "'\u00eE'" + ); + test_normalization( + "unescape('\\ufF')", + "'\u00fF'" + ); + } + + SECTION("%uXXXX") + { + test_normalization( + "unescape('%u0123')", + "'\u0123'" + ); + test_normalization( + "unescape('%u4567')", + "'\u4567'" + ); + test_normalization( + "unescape('%u89aA')", + "'\u89aA'" + ); + test_normalization( + "unescape('%ubBcC')", + "'\ubBcC'" + ); + test_normalization( + "unescape('%ueEfF')", + "'\ueEfF'" + ); + } + + SECTION("decimal") + { + test_normalization( + "String.fromCharCode(1)", + "'\u0001'" + ); + test_normalization( + "String.fromCharCode(12)", + "'\u000c'" + ); + test_normalization( + "String.fromCharCode(345)", + "'\u0159'" + ); + test_normalization( + "String.fromCharCode(6789)", + "'\u1a85'" + ); + test_normalization( + "String.fromCharCode(1000)", + "'\u03e8'" + ); + test_normalization( + "String.fromCharCode(0001)", + "'\x01'" + ); + test_normalization( + "String.fromCharCode(65536)", + "'\uffff'" + ); + } + + SECTION("hexadecimal") + { + test_normalization( + "String.fromCharCode(0x0001)", + "'\u0001'" + ); + test_normalization( + "String.fromCharCode(0X0001)", + "'\u0001'" + ); + test_normalization( + "String.fromCharCode(0x1234)", + "'\u1234'" + ); + test_normalization( + "String.fromCharCode(0X5678)", + "'\u5678'" + ); + test_normalization( + "String.fromCharCode(0x9aAb)", + "'\u9aAb'" + ); + test_normalization( + "String.fromCharCode(0x9aAb)", + "'\u9aAb'" + ); + test_normalization( + "String.fromCharCode(0xBcCd)", + "'\uBcCd'" + ); + test_normalization( + "String.fromCharCode(0XeEfF)", + "'\ueEfF'" + ); + test_normalization( + "String.fromCharCode(0x10000)", + "'\uffff'" + ); + test_normalization( + "String.fromCharCode(0X10000)", + "'\uffff'" + ); + } +} + +TEST_CASE("Universal sequences", "[JSNormalizer]") +{ + SECTION("\\uXXXX") + { + test_normalization( + "\\u0065\\u0076\\u0061\\u006C () ;", + "eval();" + ); + test_normalization( + "'\\u0062\\u0061\\u0072'", + "'bar'" + ); + test_normalization( + "\"\\u0062\\u0061\\u0072\"", + "\"bar\"" + ); + test_normalization( + "`\\u0062\\u0061\\u0072`", + "`bar`" + ); + test_normalization( + "/\\u0062\\u0061\\u0072/", + "/bar/" + ); + } + + SECTION("\\xXX") + { + test_normalization( + "'\\x62\\x61\\x72'", + "'bar'" + ); + test_normalization( + "\"\\x62\\x61\\x72\"", + "\"bar\"" + ); + test_normalization( + "`\\x62\\x61\\x72`", + "`bar`" + ); + test_normalization( + "/\\x62\\x61\\x72/", + "/bar/" + ); + } + + SECTION("\\u{XXXX}") + { + test_normalization( + "\\u{0065}\\u{0076}\\u{0061}\\u{006C} () ;", + "eval();" + ); + test_normalization( + "'\\u{0062}\\u{0061}\\u{0072}'", + "'bar'" + ); + test_normalization( + "\"\\u{0062}\\u{0061}\\u{0072}\"", + "\"bar\"" + ); + test_normalization( + "`\\u{0062}\\u{0061}\\u{0072}`", + "`bar`" + ); + } +} + +TEST_CASE("unescape()", "[JSNormalizer]") +{ + SECTION("%XX") + { + test_normalization( + "unescape('%62%61%72')", + "'bar'" + ); + test_normalization( + "unescape(\"%62%61%72\")", + "\"bar\"" + ); + test_normalization( + "unescape(`%62%61%72`)", + "`bar`" + ); + } + + SECTION("%uXXXX") + { + test_normalization( + "unescape('%u0062%u0061%u0072')", + "'bar'" + ); + test_normalization( + "unescape(\"%u0062%u0061%u0072\")", + "\"bar\"" + ); + test_normalization( + "unescape(`%u0062%u0061%u0072`)", + "`bar`" + ); + } + + SECTION("\\uXX") + { + test_normalization( + "unescape('\\u62\\u61\\u72')", + "'bar'" + ); + test_normalization( + "unescape(\"\\u62\\u61\\u72\")", + "\"bar\"" + ); + test_normalization( + "unescape(`\\u62\\u61\\u72`)", + "`bar`" + ); + } + + SECTION("\\uXXXX") + { + test_normalization( + "unescape('\\u0062\\u0061\\u0072')", + "'bar'" + ); + test_normalization( + "unescape(\"\\u0062\\u0061\\u0072\")", + "\"bar\"" + ); + test_normalization( + "unescape(`\\u0062\\u0061\\u0072`)", + "`bar`" + ); + } + + SECTION("\\xXX") + { + test_normalization( + "unescape('\\x62\\x61\\x72')", + "'bar'" + ); + test_normalization( + "unescape(\"\\x62\\x61\\x72\")", + "\"bar\"" + ); + test_normalization( + "unescape(`\\x62\\x61\\x72`)", + "`bar`" + ); + } + + SECTION("\\u{XXXX}") + { + test_normalization( + "unescape('\\u{0062}\\u{0061}\\u{0072}')", + "'bar'" + ); + test_normalization( + "unescape(\"\\u{0062}\\u{0061}\\u{0072}\")", + "\"bar\"" + ); + test_normalization( + "unescape(`\\u{0062}\\u{0061}\\u{0072}`)", + "`bar`" + ); + } + + SECTION("mixed sequence") + { + test_normalization_mixed_encoding( + "unescape('\\u62%61%72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "unescape('\\x62%u0061%72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "unescape('\\x62\\u61\\u72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "unescape('%u0062\\u0061%72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "unescape('\\u0062\\x61%72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "unescape('\\u0062\\u{0061}\\x72')", + "'bar'" + ); + } +} + +TEST_CASE("decodeURI()", "[JSNormalizer]") +{ + SECTION("%XX") + { + test_normalization( + "decodeURI('%62%61%72')", + "'bar'" + ); + test_normalization( + "decodeURI(\"%62%61%72\")", + "\"bar\"" + ); + test_normalization( + "decodeURI(`%62%61%72`)", + "`bar`" + ); + } + + SECTION("%uXXXX") + { + test_normalization( + "decodeURI('%u0062%u0061%u0072')", + "'bar'" + ); + test_normalization( + "decodeURI(\"%u0062%u0061%u0072\")", + "\"bar\"" + ); + test_normalization( + "decodeURI(`%u0062%u0061%u0072`)", + "`bar`" + ); + } + + SECTION("\\uXX") + { + test_normalization( + "decodeURI('\\u62\\u61\\u72')", + "'bar'" + ); + test_normalization( + "decodeURI(\"\\u62\\u61\\u72\")", + "\"bar\"" + ); + test_normalization( + "decodeURI(`\\u62\\u61\\u72`)", + "`bar`" + ); + } + + SECTION("\\uXXXX") + { + test_normalization( + "decodeURI('\\u0062\\u0061\\u0072')", + "'bar'" + ); + test_normalization( + "decodeURI(\"\\u0062\\u0061\\u0072\")", + "\"bar\"" + ); + test_normalization( + "decodeURI(`\\u0062\\u0061\\u0072`)", + "`bar`" + ); + } + + SECTION("\\xXX") + { + test_normalization( + "decodeURI('\\x62\\x61\\x72')", + "'bar'" + ); + test_normalization( + "decodeURI(\"\\x62\\x61\\x72\")", + "\"bar\"" + ); + test_normalization( + "decodeURI(`\\x62\\x61\\x72`)", + "`bar`" + ); + } + + SECTION("\\u{XXXX}") + { + test_normalization( + "decodeURI('\\u{0062}\\u{0061}\\u{0072}')", + "'bar'" + ); + test_normalization( + "decodeURI(\"\\u{0062}\\u{0061}\\u{0072}\")", + "\"bar\"" + ); + test_normalization( + "decodeURI(`\\u{0062}\\u{0061}\\u{0072}`)", + "`bar`" + ); + } + + SECTION("mixed sequence") + { + test_normalization_mixed_encoding( + "decodeURI('\\u62%61%72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "decodeURI('\\x62%u0061%72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "decodeURI('\\x62\\u61\\u72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "decodeURI('%u0062\\u0061%72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "decodeURI('\\u0062\\x61%72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "decodeURI('\\u0062\\u{0061}\\x72')", + "'bar'" + ); + } +} + +TEST_CASE("decodeURIComponent()", "[JSNormalizer]") +{ + SECTION("%XX") + { + test_normalization( + "decodeURIComponent('%62%61%72')", + "'bar'" + ); + test_normalization( + "decodeURIComponent(\"%62%61%72\")", + "\"bar\"" + ); + test_normalization( + "decodeURIComponent(`%62%61%72`)", + "`bar`" + ); + } + + SECTION("%uXXXX") + { + test_normalization( + "decodeURIComponent('%u0062%u0061%u0072')", + "'bar'" + ); + test_normalization( + "decodeURIComponent(\"%u0062%u0061%u0072\")", + "\"bar\"" + ); + test_normalization( + "decodeURIComponent(`%u0062%u0061%u0072`)", + "`bar`" + ); + } + + SECTION("\\uXX") + { + test_normalization( + "decodeURIComponent('\\u62\\u61\\u72')", + "'bar'" + ); + test_normalization( + "decodeURIComponent(\"\\u62\\u61\\u72\")", + "\"bar\"" + ); + test_normalization( + "decodeURIComponent(`\\u62\\u61\\u72`)", + "`bar`" + ); + } + + SECTION("\\uXXXX") + { + test_normalization( + "decodeURIComponent('\\u0062\\u0061\\u0072')", + "'bar'" + ); + test_normalization( + "decodeURIComponent(\"\\u0062\\u0061\\u0072\")", + "\"bar\"" + ); + test_normalization( + "decodeURIComponent(`\\u0062\\u0061\\u0072`)", + "`bar`" + ); + } + + SECTION("\\xXX") + { + test_normalization( + "decodeURIComponent('\\x62\\x61\\x72')", + "'bar'" + ); + test_normalization( + "decodeURIComponent(\"\\x62\\x61\\x72\")", + "\"bar\"" + ); + test_normalization( + "decodeURIComponent(`\\x62\\x61\\x72`)", + "`bar`" + ); + } + + SECTION("\\u{XXXX}") + { + test_normalization( + "decodeURIComponent('\\u{0062}\\u{0061}\\u{0072}')", + "'bar'" + ); + test_normalization( + "decodeURIComponent(\"\\u{0062}\\u{0061}\\u{0072}\")", + "\"bar\"" + ); + test_normalization( + "decodeURIComponent(`\\u{0062}\\u{0061}\\u{0072}`)", + "`bar`" + ); + } + + SECTION("mixed sequence") + { + test_normalization_mixed_encoding( + "decodeURIComponent('\\u62%61%72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "decodeURIComponent('\\x62%u0061%72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "decodeURIComponent('\\x62\\u61\\u72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "decodeURIComponent('%u0062\\u0061%72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "decodeURIComponent('\\u0062\\x61%72')", + "'bar'" + ); + + test_normalization_mixed_encoding( + "decodeURIComponent('\\u0062\\u{0061}\\x72')", + "'bar'" + ); + } +} + +TEST_CASE("String.fromCharCode()", "[JSNormalizer]") +{ + SECTION("decimal") + { + test_normalization( + "String.fromCharCode(98, 97, 114)", + "'bar'" + ); + } + + SECTION("hexadecimal") + { + test_normalization( + "String.fromCharCode(0x62, 0x61, 0x72)", + "'bar'" + ); + + test_normalization( + "String.fromCharCode(0x0062, 0x0061, 0x0072)", + "'bar'" + ); + } + + SECTION("mixed sequence") + { + test_normalization_mixed_encoding( + "String.fromCharCode(98, 97, 0x72)", + "'bar'" + ); + + test_normalization_mixed_encoding( + "String.fromCharCode(0x62, 97, 114)", + "'bar'" + ); + } +} + +TEST_CASE("Split", "[JSNormalizer]") +{ + SECTION("unescape()") + { + test_normalization({ + { "unescape(", "" }, + { ")", "" } + }); + + test_normalization({ + { "unescape('%62", "'b" }, + { "%61%72')", "'bar'" } + }); + + test_normalization({ + { "unescape('%62%61", "'ba" }, + { "%72')", "'bar'" } + }); + + test_normalization({ + { "unescape('%62%61%72", "'bar" }, + { "')", "'bar'" } + }); + + test_normalization({ + { "unescape('%u", "'%u" }, + { "0062%u0061%u0072')", "'bar'" } + }); + + test_normalization({ + { "unescape('%u00", "'%u00" }, + { "62%u0061%u0072')", "'bar'" } + }); + + test_normalization({ + { "unescape('%u0062", "'b" }, + { "%u0061%u0072')", "'bar'" } + }); + + test_normalization({ + { "unescape('%u0062%u", "'b%u" }, + { "0061%u0072')", "'bar'" } + }); + + test_normalization({ + { "unescape('%u0062%u00", "'b%u00" }, + { "61%u0072')", "'bar'" } + }); + + test_normalization({ + { "unescape('%u0062%u0061", "'ba" }, + { "%u0072')", "'bar'" } + }); + + test_normalization({ + { "unescape('%u0062%u0061%u", "'ba%u" }, + { "0072')", "'bar'" } + }); + + test_normalization({ + { "unescape('%u0062%u0061%u00", "'ba%u00" }, + { "72')", "'bar'" } + }); + + test_normalization({ + { "unescape('%u0062%u0061%u0072", "'bar" }, + { "')", "'bar'" } + }); + + test_normalization({ + { "unescape('%u0062", "'b" }, + { "%u0061", "'ba" }, + { "%u0072')", "'bar'" } + }); + } + + SECTION("decodeURI()") + { + test_normalization({ + { "decodeURI(", "" }, + { ")", "" } + }); + + test_normalization({ + { "decodeURI('%u", "'%u" }, + { "0062%u0061%u0072')", "'bar'" } + }); + + test_normalization({ + { "decodeURI('%u00", "'%u00" }, + { "62%u0061%u0072')", "'bar'" } + }); + + test_normalization({ + { "decodeURI('%u0062%u", "'b%u" }, + { "0061%u0072')", "'bar'" } + }); + + test_normalization({ + { "decodeURI('%u0062%u00", "'b%u00" }, + { "61%u0072')", "'bar'" } + }); + + test_normalization({ + { "decodeURI('%u0062%u0061", "'ba" }, + { "%u0072')", "'bar'" } + }); + + test_normalization({ + { "decodeURI('%u0062%u0061%u", "'ba%u" }, + { "0072')", "'bar'" } + }); + + test_normalization({ + { "decodeURI('%u0062%u0061%u00", "'ba%u00" }, + { "72')", "'bar'" } + }); + + test_normalization({ + { "decodeURI('%u0062%u0061%u0072", "'bar" }, + { "')", "'bar'" } + }); + + test_normalization({ + { "decodeURI('%u0062", "'b" }, + { "%u0061", "'ba" }, + { "%u0072')", "'bar'" } + }); + } + + SECTION("decodeURIComponent()") + { + test_normalization({ + { "decodeURIComponent(", "" }, + { ")", "" } + }); + + test_normalization({ + { "decodeURIComponent('%u", "'%u" }, + { "0062%u0061%u0072')", "'bar'" } + }); + + test_normalization({ + { "decodeURIComponent('%u00", "'%u00" }, + { "62%u0061%u0072')", "'bar'" } + }); + + test_normalization({ + { "decodeURIComponent('%u0062%u", "'b%u" }, + { "0061%u0072')", "'bar'" } + }); + + test_normalization({ + { "decodeURIComponent('%u0062%u00", "'b%u00" }, + { "61%u0072')", "'bar'" } + }); + + test_normalization({ + { "decodeURIComponent('%u0062%u0061", "'ba" }, + { "%u0072')", "'bar'" } + }); + + test_normalization({ + { "decodeURIComponent('%u0062%u0061%u", "'ba%u" }, + { "0072')", "'bar'" } + }); + + test_normalization({ + { "decodeURIComponent('%u0062%u0061%u00", "'ba%u00" }, + { "72')", "'bar'" } + }); + + test_normalization({ + { "decodeURIComponent('%u0062%u0061%u0072", "'bar" }, + { "')", "'bar'" } + }); + + test_normalization({ + { "decodeURIComponent('%u0062", "'b" }, + { "%u0061", "'ba" }, + { "%u0072')", "'bar'" } + }); + } + + SECTION("String.fromCharCode()") + { + test_normalization({ + { "String.fromCharCode(", "'" }, + { ")", "''" } + }); + + test_normalization({ + { "String.fromCharCode(9", "'\u0009" }, + { "8, 97, 114)", "'bar'" } + }); + + test_normalization({ + { "String.fromCharCode(98,", "'b" }, + { "97, 114)", "'bar'" } + }); + + test_normalization({ + { "String.fromCharCode(98, 97", "'ba" }, + { ",114)", "'bar'" } + }); + + test_normalization({ + { "String.fromCharCode(98, 97, 114", "'bar" }, + { ")", "'bar'" } + }); + + test_normalization({ + { "String.fromCharCode(0x0062", "'b" }, + { ",0x0061, 0x0072)", "'bar'" } + }); + + test_normalization({ + { "String.fromCharCode(0x0062, 0x0061", "'ba" }, + { ", 0x0072)", "'bar'" } + }); + + test_normalization({ + { "String.fromCharCode(0x0062, 0x0061, 0x0072", "'bar" }, + { ")", "'bar'" } + }); + + test_normalization({ + { "String.fromCharCode(0x0062,", "'b" }, + { "0x0061,", "'ba" }, + { "0x72)", "'bar'" } + }); + + test_normalization({ + { "String.fromCharCode(98,", "'b" }, + { "97,", "'ba" }, + { "114)", "'bar'" } + }); + } +} + +TEST_CASE("Mixed input", "[JSNormalizer]") +{ + SECTION("string") + { + test_normalization( + "unescape ( ' A \\x62 B \\x61 C \\x72 ' ) ;", + "' A b B a C r ';" + ); + test_normalization( + "unescape ( ' \\x62ar b\\x61r ba\\x72 ' ) ;", + "' bar bar bar ';" + ); + test_normalization( + "unescape ( '\\x62\\x61\\x72', '\\x62\\x61\\x72' ) ;", + "'bar','bar';" + ); + test_normalization( + "unescape ( '\\x62\\x61\\x72' + '\\x62\\x61\\x72' ) ;", + "'bar'+'bar';" + ); + test_normalization_mixed_encoding( + "unescape ( '\\x62\\x61\\x72' + '\\u62\\u61\\u72' ) ;", + "'bar'+'bar';" + ); + } + + SECTION("literal") + { + test_normalization( + "unescape ( 2, '\\x62\\x61\\x72', 2 ) ;", + "2,'bar',2;" + ); + } + + SECTION("identifier") + { + test_normalization( + "unescape ( f(\"A\\u20B\\u20C\"), eval(\"\\u66\\u6f\\u6f\"), \"\\u66\\u6f\\u6f\" ) ;", + "var_0000(\"A\\u20B\\u20C\"),eval(\"\\u66\\u6f\\u6f\"),\"foo\";" + ); + test_normalization_mixed_encoding( + "String.fromCharCode (114, 0x72, eval('123'), 114, 0x72) ;", + "'rr' eval('123'),114,0x72;" + ); + } + + SECTION("comment") + { + test_normalization( + "String.fromCharCode(0x62, \n 0x61, // comment \n 0x72) ;", + "'bar';" + ); + test_normalization( + "String.fromCharCode(0x62, \t 0x61, /* comment */ 0x72) ;", + "'bar';" + ); + test_normalization( + "String.fromCharCode(0x62, \r 0x61,