It supports scripts over multiple PDUs. It is a stateful JavaScript whitespace
and identifiers normalizer. All JavaScript identifier names, except those from
the ignore list, will be substituted with unified names in the following
-format: var_0000 -> var_ffff. Moreover, Normalizer validates the syntax
-concerning ECMA-262 Standard, including scope tracking and restrictions
+format: var_0000 -> var_ffff. The Normalizer tries to expand an escaped text,
+so it will appear in a usual form in the output. Moreover, Normalizer validates
+the syntax concerning ECMA-262 Standard, including scope tracking and restrictions
for script elements. For more information on how additionally configure
Enhanced Normalizer check with the following configuration options:
js_norm_bytes_depth, js_norm_identifier_depth, js_norm_max_tmpl_nest,
a("hello") // will be substituted to 'console.log("hello")'
In addition to the scope tracking, JS Normalizer specifically tracks unicode unescape
-functions(unescape, decodeURI, decodeURIComponent). This allows detection of
+functions(unescape, decodeURI, decodeURIComponent, String.fromCharCode). This allows detection of
unescape functions nested within other unescape functions, which is a potential
indicator of a multilevel obfuscation. The definition of a function call depends on
identifier substitution, so such identifiers must be included in the ignore list in
-order to use this feature.
+order to use this feature. After determining the unescape sequence, it is decoded into the
+corresponding string.
+
+For example:
+
+ unescape('\u0062\u0061\u0072') -> 'bar'
+ decodeURI('%62%61%72') -> 'bar'
+ decodeURIComponent('\x62\x61\x72') -> 'bar'
+ String.fromCharCode(98, 0x0061, 0x72) -> 'bar'
+
+Supported formats follow
+
+ \xXX
+ \uXXXX
+ \u{XXXX}
+ %XX
+ \uXX
+ %uXXXX
+ decimal code point
+ hexadecimal code point
+
+JS Normalizer is able to decode mixed encoding sequences. However, a built-in alert rises
+in such case.
JS Normalizer's syntax parser follows ECMA-262 standard. For various features,
tracking of variable scope and individual brackets is done in accordance to the standard.
*infractions += INF_JS_UNESCAPE_NEST;
events->create_event(EVENT_JS_UNESCAPE_NEST);
}
+ if (js_ctx.is_mixed_encoding_seen())
+ {
+ *infractions += INF_JS_MULTIPLE_ENC;
+ events->create_event(EVENT_JS_MULTIPLE_ENC);
+ }
if (ssn->js_built_in_event)
break;
*infractions += INF_JS_UNESCAPE_NEST;
events->create_event(EVENT_JS_UNESCAPE_NEST);
}
+ if (js_ctx.is_mixed_encoding_seen())
+ {
+ *infractions += INF_JS_MULTIPLE_ENC;
+ events->create_event(EVENT_JS_MULTIPLE_ENC);
+ }
script_continue = ret == JSTokenizer::SCRIPT_CONTINUE;
}
// advanced program scope access for testing
-#ifdef CATCH_TEST_BUILD
+#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST)
bool JSIdentifierCtx::scope_check(const std::list<JSProgramScopeType>& compare) const
{
return return_list;
}
-#endif // CATCH_TEST_BUILD
+#endif // CATCH_TEST_BUILD || BENCHMARK_TEST
bool is_unescape_nesting_seen() const
{ return tokenizer.is_unescape_nesting_seen(); }
-#ifdef CATCH_TEST_BUILD
+ bool is_mixed_encoding_seen() const
+ { return tokenizer.is_mixed_encoding_seen(); }
+
+#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST)
const char* get_tmp_buf() const
{ return tmp_buf; }
size_t get_tmp_buf_size() const
{ return tmp_buf_size; }
const JSTokenizer& get_tokenizer() const
{ return tokenizer; }
-#endif
+#endif // CATCH_TEST_BUILD || BENCHMARK_TEST
#ifdef BENCHMARK_TEST
void rewind_output()
enum JSProgramScopeType : unsigned int;
class JSIdentifierCtxBase;
-#ifdef CATCH_TEST_BUILD
+#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST)
class JSTokenizerTester;
-#endif
+#endif // CATCH_TEST_BUILD || BENCHMARK_TEST
+
class JSTokenizer : public yyFlexLexer
{
private:
{
Scope(ScopeType t) :
type(t), meta_type(ScopeMetaType::NOT_SET), func_call_type(FuncType::NOT_FUNC),
- ident_norm(true), block_param(false), do_loop(false)
+ ident_norm(true), block_param(false), do_loop(false), encoding(0), char_code_str(false)
{}
ScopeType type;
bool ident_norm;
bool block_param;
bool do_loop;
+ uint32_t encoding;
+ bool char_code_str;
};
enum ASIGroup
JSRet process(size_t& bytes_in);
bool is_unescape_nesting_seen() const;
+ bool is_mixed_encoding_seen() const;
protected:
[[noreturn]] void LexerError(const char* msg) override
{ snort::FatalError("%s", msg); }
JSRet push_identifier(const char* ident);
bool unescape(const char* lexeme);
void process_punctuator(JSToken tok = PUNCTUATOR);
+ void skip_punctuator();
void process_closing_brace();
JSRet process_subst_open();
FuncType func_call_type();
FuncType detect_func_type();
void check_function_nesting(FuncType);
+ void check_mixed_encoding(uint32_t);
void set_block_param(bool);
bool block_param();
void set_do_loop(bool);
bool do_loop();
+ void set_encoding(uint32_t f)
+ { scope_cur().encoding |= f; }
+
+ uint32_t encoding()
+ { return scope_cur().encoding; }
+
+ void set_char_code_str(bool f)
+ { scope_cur().char_code_str = f; }
+
+ bool char_code_str()
+ { return scope_cur().char_code_str; }
+
static JSProgramScopeType m2p(ScopeMetaType);
static const char* m2str(ScopeMetaType);
static bool is_operator(JSToken);
JSRet general_literal();
JSRet general_identifier();
void general_unicode();
+ void escaped_unicode();
+ void escaped_code_point();
+ void escaped_url_sequence();
+ void dec_code_point();
+ void hex_code_point();
+ void char_code_no_match();
static const char* p_scope_codes[];
bool prefix_increment = false;
bool dealias_stored = false;
bool unescape_nest_seen = false;
+ bool mixed_encoding_seen = false;
uint8_t max_template_nesting;
std::stack<uint16_t, std::vector<uint16_t>> brace_depth;
const uint32_t max_bracket_depth;
std::stack<Scope> scope_stack;
-#ifdef CATCH_TEST_BUILD
+#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST)
friend JSTokenizerTester;
-#endif // CATCH_TEST_BUILD
+#endif // CATCH_TEST_BUILD || BENCHMARK_TEST
};
#endif // JS_TOKENIZER_H
constexpr bool JSTokenizer::insert_semicolon[ASI_GROUP_MAX][ASI_GROUP_MAX];
+// encoding flags
+
+enum EncodingType
+{
+ IS_HEX = 1 << 0, // hex code unit: 0xXXXX
+ IS_DEC = 1 << 1, // dec code unit: XXXX
+ IS_XBACKSLASH = 1 << 2, // \xXX
+ IS_UBACKSLASH_1 = 1 << 3, // \uXX
+ IS_UBACKSLASH_2 = 1 << 4, // \uXXXX
+ IS_UPERCENT = 1 << 5, // %uXXXX
+ IS_PERCENT = 1 << 6, // %XX
+ IS_UCODEPOINT = 1 << 7 // \u{0xXXXX}
+};
+
%}
/* The following grammar was created based on ECMAScript specification */
UNICODE_ZWNJ \xE2\x80\x8C
UNICODE_ZWJ \xE2\x80\x8D
-/* Unicode escape sequence */
-/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.8.4 (escape sequence) */
+/* according to https://262.ecma-international.org/12.0/#prod-UnicodeEscapeSequence */
UNICODE_ESCAPE_SEQUENCE \\u[0-9a-fA-F]{4}
+ESCAPED_CODE_POINT \\u\{[0-9a-fA-F]+\}
+
+/* according to https://262.ecma-international.org/12.0/#prod-HexEscapeSequence */
+HEX_ESCAPE_SEQUENCE \\x[0-9a-fA-F]{2}
+
+/* according to https://tc39.es/ecma262/multipage/additional-ecmascript-features-for-web-browsers.html#sec-unescape-string */
+BYTE_ESCAPE_SEQUENCE \\u[0-9a-fA-F]{2}
+PERCENT_ESCAPE_SEQUENCE %u[0-9a-fA-F]{4}
+
+/* according to https://datatracker.ietf.org/doc/html/rfc3986#section-2.1 */
+URL_ESCAPE_SEQUENCE %[0-9a-fA-F]{2}
/* whitespaces */
/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.2 */
/* identifiers */
/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.6 */
-IDENTIFIER_START [_$]|({UNICODE_LETTER})|{UNICODE_ESCAPE_SEQUENCE}
+IDENTIFIER_START [_$]|({UNICODE_LETTER})|{UNICODE_ESCAPE_SEQUENCE}|{ESCAPED_CODE_POINT}
IDENTIFIER_PART (({IDENTIFIER_START})|({UNICODE_COMBINING_MARK})|({UNICODE_DIGIT})|({UNICODE_CONNECTOR_PUNCTUATION})|{UNICODE_ZWNJ}|{UNICODE_ZWJ})*
IDENTIFIER ({IDENTIFIER_START}{IDENTIFIER_PART})*
LITERAL_THIS this
LITERAL_BOOLEAN true|false
LITERAL_DECIMAL [.]?[0-9]+[\.]?[0-9]*[eE]?[0-9]*
+LITERAL_INTEGER [0-9]*
LITERAL_HEX_INTEGER 0x[0-9a-fA-F]*|0X[0-9a-fA-F]*
LITERAL_DQ_STRING_START \"
LITERAL_DQ_STRING_END \"
LITERAL_DQ_STRING_SKIP \\\"
-LITERAL_DQ_STRING_TEXT [^\"\\\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32}
+LITERAL_DQ_STRING_TEXT [^\"\\%\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32}
LITERAL_SQ_STRING_START \'
LITERAL_SQ_STRING_END \'
LITERAL_SQ_STRING_SKIP \\\'
-LITERAL_SQ_STRING_TEXT [^\'\\\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32}
+LITERAL_SQ_STRING_TEXT [^\'\\%\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32}
LITERAL_TEMPLATE_START \`
LITERAL_TEMPLATE_END \`
LITERAL_TEMPLATE_SUBST_START \$\{
-LITERAL_TEMPLATE_OTHER [^\\\`(\$\{)("<"+(?i:\/script>))]{1,32}
+LITERAL_TEMPLATE_OTHER [^\\%\`(\$\{)("<"+(?i:\/script>))]{1,32}
LITERAL_REGEX_START \/[^*\/]
LITERAL_REGEX_END \/[gimsuy]*
LITERAL_REGEX_SKIP \\\/
/* in a regular expression */
%x regex
+/* in a single-quoted string within unescape function */
+%x unesc_sqstr
+
+/* in a double-quoted string within unescape function */
+%x unesc_dqstr
+
+/* in a template literal within unescape function */
+%x unesc_tmpll
+
+/* to process code units within char code unescape function */
+%x char_code
+%x char_code_lcomm
+%x char_code_bcomm
+
%%
-{WHITESPACES} { }
-{CHAR_ESCAPE_SEQUENCES} { }
-{LINE_TERMINATORS} { BEGIN(regst); newline_found = true; }
+<INITIAL,divop,regst,char_code>{WHITESPACES} { /* skip */ }
+<INITIAL,divop,regst,char_code>{CHAR_ESCAPE_SEQUENCES} { /* skip */ }
+
+{LINE_TERMINATORS} { BEGIN(regst); newline_found = true; }
+<char_code>{LINE_TERMINATORS} { newline_found = true; }
-<INITIAL,regex,dqstr,regst,sqstr,divop>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) }
+<INITIAL,regex,dqstr,regst,sqstr,divop,char_code>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) }
{HTML_TAG_SCRIPT_CLOSE} { EXEC(html_closing_script_tag()) }
- {HTML_COMMENT_OPEN} { BEGIN(lcomm); }
- {LINE_COMMENT_START} { BEGIN(lcomm); }
-<lcomm>{LINE_COMMENT_END1} { BEGIN(regst); newline_found = true; }
-<lcomm>{LINE_COMMENT_END2} { BEGIN(regst); newline_found = true; }
-<lcomm>{LINE_COMMENT_END3} { BEGIN(regst); RETURN(OPENING_TAG) }
-<lcomm>{LINE_COMMENT_END4} { BEGIN(regst); RETURN(CLOSING_TAG) }
-<lcomm>{LINE_COMMENT_SKIP} { }
-<lcomm><<EOF>> { RETURN(SCRIPT_CONTINUE) }
-
- {BLOCK_COMMENT_START} { BEGIN(bcomm); }
-<bcomm>{BLOCK_COMMENT_END1} { BEGIN(regst); }
-<bcomm>{BLOCK_COMMENT_END2} { BEGIN(regst); RETURN(OPENING_TAG) }
-<bcomm>{BLOCK_COMMENT_END3} { BEGIN(regst); RETURN(CLOSING_TAG) }
-<bcomm>{BLOCK_COMMENT_LINE1} |
-<bcomm>{BLOCK_COMMENT_LINE2} { newline_found = true;}
-<bcomm>{BLOCK_COMMENT_SKIP} { }
-<bcomm><<EOF>> { RETURN(SCRIPT_CONTINUE) }
-
- {LITERAL_DQ_STRING_START} { EXEC(literal_dq_string_start()) }
-<dqstr>{LITERAL_DQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); }
-<dqstr>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) }
-<dqstr>\\{CR}{LF} { }
-<dqstr>\\{LF} { }
-<dqstr>\\{CR} { }
-<dqstr>{LINE_TERMINATORS} { BEGIN(regst); RETURN(BAD_TOKEN) }
-<dqstr>{LITERAL_DQ_STRING_SKIP} { dealias_append(); ECHO; }
-<dqstr>{LITERAL_DQ_STRING_TEXT} { dealias_append(); ECHO; }
-<dqstr><<EOF>> { RETURN(SCRIPT_CONTINUE) }
-
- {LITERAL_SQ_STRING_START} { EXEC(literal_sq_string_start()) }
-<sqstr>{LITERAL_SQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); }
-<sqstr>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) }
-<sqstr>\\{CR}{LF} { }
-<sqstr>\\{LF} { }
-<sqstr>\\{CR} { }
-<sqstr>{LINE_TERMINATORS} { BEGIN(regst); RETURN(BAD_TOKEN) }
-<sqstr>{LITERAL_SQ_STRING_SKIP} { dealias_append(); ECHO; }
-<sqstr>{LITERAL_SQ_STRING_TEXT} { dealias_append(); ECHO; }
-<sqstr><<EOF>> { RETURN(SCRIPT_CONTINUE) }
-
- {LITERAL_TEMPLATE_START} { EXEC(literal_template_start()) }
-<tmpll>(\\\\)*{LITERAL_TEMPLATE_END} { dealias_append(); ECHO; BEGIN(divop); }
-<tmpll>(\\\\)*{LITERAL_TEMPLATE_SUBST_START} { EXEC(process_subst_open()) dealias_reset(); }
-<tmpll>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) }
-<tmpll>(\\\\)*\\{LITERAL_TEMPLATE_SUBST_START} | /* escaped template substitution */
-<tmpll>(\\\\)*\\{LITERAL_TEMPLATE_END} | /* escaped backtick */
-<tmpll>{LITERAL_TEMPLATE_OTHER} { dealias_append(); ECHO; }
-<tmpll><<EOF>> { RETURN(SCRIPT_CONTINUE) }
+ {HTML_COMMENT_OPEN} { BEGIN(lcomm); }
+ {LINE_COMMENT_START} { BEGIN(lcomm); }
+<char_code>{HTML_COMMENT_OPEN} { BEGIN(char_code_lcomm); }
+<char_code>{LINE_COMMENT_START} { BEGIN(char_code_lcomm); }
+<lcomm>{LINE_COMMENT_END1} { BEGIN(regst); newline_found = true; }
+<lcomm>{LINE_COMMENT_END2} { BEGIN(regst); newline_found = true; }
+<char_code_lcomm>{LINE_COMMENT_END1} { BEGIN(char_code); newline_found = true; }
+<char_code_lcomm>{LINE_COMMENT_END2} { BEGIN(char_code); newline_found = true; }
+<lcomm,char_code_lcomm>{LINE_COMMENT_END3} { BEGIN(regst); RETURN(OPENING_TAG) }
+<lcomm,char_code_lcomm>{LINE_COMMENT_END4} { BEGIN(regst); RETURN(CLOSING_TAG) }
+<lcomm,char_code_lcomm>{LINE_COMMENT_SKIP} { /* skip */ }
+<lcomm,char_code_lcomm><<EOF>> { RETURN(SCRIPT_CONTINUE) }
+
+ {BLOCK_COMMENT_START} { BEGIN(bcomm); }
+<char_code>{BLOCK_COMMENT_START} { BEGIN(char_code_bcomm); }
+<bcomm>{BLOCK_COMMENT_END1} { BEGIN(regst); }
+<char_code_bcomm>{BLOCK_COMMENT_END1} { BEGIN(char_code); }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_END2} { BEGIN(regst); RETURN(OPENING_TAG) }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_END3} { BEGIN(regst); RETURN(CLOSING_TAG) }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_LINE1} |
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_LINE2} { newline_found = true; }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_SKIP} { /* skip */ }
+<bcomm,char_code_bcomm><<EOF>> { RETURN(SCRIPT_CONTINUE) }
+
+ {LITERAL_DQ_STRING_START} { EXEC(literal_dq_string_start()) }
+<dqstr,unesc_dqstr>{LITERAL_DQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); }
+<dqstr,unesc_dqstr>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) }
+<dqstr,unesc_dqstr>\\{CR}{LF} { /* skip */ }
+<dqstr,unesc_dqstr>\\{LF} { /* skip */ }
+<dqstr,unesc_dqstr>\\{CR} { /* skip */ }
+<dqstr,unesc_dqstr>{LINE_TERMINATORS} { BEGIN(regst); RETURN(BAD_TOKEN) }
+<dqstr,unesc_dqstr>{LITERAL_DQ_STRING_SKIP} { dealias_append(); ECHO; }
+<dqstr,unesc_dqstr>{LITERAL_DQ_STRING_TEXT} { dealias_append(); ECHO; }
+<dqstr,unesc_dqstr><<EOF>> { RETURN(SCRIPT_CONTINUE) }
+<dqstr>{UNICODE_ESCAPE_SEQUENCE} |
+<dqstr>{HEX_ESCAPE_SEQUENCE} { escaped_unicode(); }
+<dqstr>{ESCAPED_CODE_POINT} { escaped_code_point(); }
+<unesc_dqstr>{UNICODE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); }
+<unesc_dqstr>{HEX_ESCAPE_SEQUENCE} { set_encoding(IS_XBACKSLASH); escaped_unicode(); }
+<unesc_dqstr>{ESCAPED_CODE_POINT} { set_encoding(IS_UCODEPOINT); escaped_code_point(); }
+<unesc_dqstr>{BYTE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); }
+<unesc_dqstr>{PERCENT_ESCAPE_SEQUENCE} { set_encoding(IS_UPERCENT); escaped_unicode(); }
+<unesc_dqstr>{URL_ESCAPE_SEQUENCE} { set_encoding(IS_PERCENT); escaped_url_sequence(); }
+
+ {LITERAL_SQ_STRING_START} { EXEC(literal_sq_string_start()) }
+<sqstr,unesc_sqstr>{LITERAL_SQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); }
+<sqstr,unesc_sqstr>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) }
+<sqstr,unesc_sqstr>\\{CR}{LF} { /* skip */ }
+<sqstr,unesc_sqstr>\\{LF} { /* skip */ }
+<sqstr,unesc_sqstr>\\{CR} { /* skip */ }
+<sqstr,unesc_sqstr>{LINE_TERMINATORS} { BEGIN(regst); RETURN(BAD_TOKEN) }
+<sqstr,unesc_sqstr>{LITERAL_SQ_STRING_SKIP} { dealias_append(); ECHO; }
+<sqstr,unesc_sqstr>{LITERAL_SQ_STRING_TEXT} { dealias_append(); ECHO; }
+<sqstr,unesc_sqstr><<EOF>> { RETURN(SCRIPT_CONTINUE) }
+<sqstr>{UNICODE_ESCAPE_SEQUENCE} |
+<sqstr>{HEX_ESCAPE_SEQUENCE} { escaped_unicode(); }
+<sqstr>{ESCAPED_CODE_POINT} { escaped_code_point(); }
+<unesc_sqstr>{UNICODE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); }
+<unesc_sqstr>{HEX_ESCAPE_SEQUENCE} { set_encoding(IS_XBACKSLASH); escaped_unicode(); }
+<unesc_sqstr>{ESCAPED_CODE_POINT} { set_encoding(IS_UCODEPOINT); escaped_code_point(); }
+<unesc_sqstr>{BYTE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); }
+<unesc_sqstr>{PERCENT_ESCAPE_SEQUENCE} { set_encoding(IS_UPERCENT); escaped_unicode(); }
+<unesc_sqstr>{URL_ESCAPE_SEQUENCE} { set_encoding(IS_PERCENT); escaped_url_sequence(); }
+
+ {LITERAL_TEMPLATE_START} { EXEC(literal_template_start()) }
+<tmpll,unesc_tmpll>(\\\\)*{LITERAL_TEMPLATE_END} { dealias_append(); ECHO; BEGIN(divop); }
+<tmpll,unesc_tmpll>(\\\\)*{LITERAL_TEMPLATE_SUBST_START} { EXEC(process_subst_open()) dealias_reset(); }
+<tmpll,unesc_tmpll>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) }
+<tmpll,unesc_tmpll>(\\\\)*\\{LITERAL_TEMPLATE_SUBST_START} | /* escaped template substitution */
+<tmpll,unesc_tmpll>(\\\\)*\\{LITERAL_TEMPLATE_END} | /* escaped backtick */
+<tmpll,unesc_tmpll>{LITERAL_TEMPLATE_OTHER} { dealias_append(); ECHO; }
+<tmpll,unesc_tmpll><<EOF>> { RETURN(SCRIPT_CONTINUE) }
+<tmpll>{UNICODE_ESCAPE_SEQUENCE} |
+<tmpll>{HEX_ESCAPE_SEQUENCE} { escaped_unicode(); }
+<tmpll>{ESCAPED_CODE_POINT} { escaped_code_point(); }
+<unesc_tmpll>{UNICODE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); }
+<unesc_tmpll>{HEX_ESCAPE_SEQUENCE} { set_encoding(IS_XBACKSLASH); escaped_unicode(); }
+<unesc_tmpll>{ESCAPED_CODE_POINT} { set_encoding(IS_UCODEPOINT); escaped_code_point(); }
+<unesc_tmpll>{BYTE_ESCAPE_SEQUENCE} { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); }
+<unesc_tmpll>{PERCENT_ESCAPE_SEQUENCE} { set_encoding(IS_UPERCENT); escaped_unicode(); }
+<unesc_tmpll>{URL_ESCAPE_SEQUENCE} { set_encoding(IS_PERCENT); escaped_url_sequence(); }
<regst>{LITERAL_REGEX_START} { EXEC(literal_regex_start()) }
<regex>{LITERAL_REGEX_END} { ECHO; BEGIN(divop); }
<regex>\\{CR} |
<regex>{LINE_TERMINATORS} { BEGIN(regst); RETURN(BAD_TOKEN) }
<regex>[^<{LF}{CR}{LS}{PS}\\\/]+ { ECHO; }
+<regex>{UNICODE_ESCAPE_SEQUENCE} |
+<regex>{HEX_ESCAPE_SEQUENCE} { escaped_unicode(); }
<regex><<EOF>> { RETURN(SCRIPT_CONTINUE) }
<divop>{DIV_OPERATOR} |
<divop>{DIV_ASSIGNMENT_OPERATOR} { div_assignment_operator(); }
-{OPEN_BRACE} { EXEC(open_brace()) }
-{CLOSE_BRACE} { EXEC(close_brace()) }
-{OPEN_PARENTHESIS} { EXEC(open_parenthesis()) }
-{CLOSE_PARENTHESIS} { EXEC(close_parenthesis()) }
-{OPEN_BRACKET} { EXEC(open_bracket()) }
-{CLOSE_BRACKET} { EXEC(close_bracket()) }
+{OPEN_BRACE} { EXEC(open_brace()) }
+{CLOSE_BRACE} { EXEC(close_brace()) }
+{OPEN_PARENTHESIS} { EXEC(open_parenthesis()) }
+<INITIAL,divop,regst,char_code>{CLOSE_PARENTHESIS} { EXEC(close_parenthesis()) }
+{OPEN_BRACKET} { EXEC(open_bracket()) }
+{CLOSE_BRACKET} { EXEC(close_bracket()) }
{PUNCTUATOR_PREFIX} { EXEC(punctuator_prefix()) }
{DOT_ACCESSOR} { dot_accessor(); }
{OPERATOR_COMPLEX_ASSIGNMENT} { operator_complex_assignment(); }
{OPERATOR_LOGICAL} { operator_logical(); }
{OPERATOR_SHIFT} { operator_shift(); }
+
{PUNCTUATOR_COMMA} { punctuator_comma(); }
+<char_code>{PUNCTUATOR_COMMA} { /* skip */ }
{USE_STRICT_DIRECTIVE} { EXEC(use_strict_directive()) }
{USE_STRICT_DIRECTIVE_SC} { EXEC(use_strict_directive_sc()) }
{LITERAL} { EXEC(general_literal()) }
{IDENTIFIER} { EXEC(general_identifier()) }
+<char_code>{LITERAL_INTEGER} { set_encoding(IS_DEC); dec_code_point(); }
+<char_code>{LITERAL_HEX_INTEGER} { set_encoding(IS_HEX); hex_code_point(); }
+
.|{ALL_UNICODE} { general_unicode(); }
-<<EOF>> { EEOF(eval_eof()) }
+
+<char_code>.|{ALL_UNICODE} { char_code_no_match(); }
+<INITIAL,divop,regst,char_code><<EOF>> { EEOF(eval_eof()) }
%%
res += 0x80 | ((code >> 6) & 0x3f);
res += 0x80 | (code & 0x3f);
}
+ else
+ res += "\uffff";
return res;
}
bool is_unescape = false;
bool is_unicode = false;
+ bool is_code_point = false;
short digits_left = 4;
std::string unicode_str;
is_unescape = false;
}
+ if (is_unicode and ch == '{')
+ {
+ is_unicode = false;
+ is_code_point = true;
+ continue;
+ }
+
if (is_unicode)
{
unicode_str += ch;
continue;
}
+ if (is_code_point)
+ {
+ if (ch == '}')
+ {
+ const unsigned int code_point = std::stoi(unicode_str, nullptr, 16);
+ res += unicode_to_utf8(code_point);
+
+ unicode_str = "";
+ is_code_point = false;
+ }
+ else
+ unicode_str += ch;
+
+ continue;
+ }
+
res += ch;
}
BEGIN(regst);
}
+void JSTokenizer::skip_punctuator()
+{
+ token = PUNCTUATOR;
+ BEGIN(regst);
+}
+
void JSTokenizer::process_closing_brace()
{
if (!brace_depth.empty())
case IDENTIFIER:
{
FuncType ret = FuncType::GENERAL;
+
+ if (meta_type() == ScopeMetaType::FUNCTION)
+ return ret;
+
if (ignored_id_pos >= 0)
{
std::streambuf* pbuf = yyout.rdbuf();
memcmp(tail, id.identifier.data(), size) == 0)
{
ret = id.type;
+ pbuf->pubseekoff(-size, yyout.cur, yyout.out);
+
break;
}
}
}
}
+void JSTokenizer::check_mixed_encoding(uint32_t flags)
+{
+ mixed_encoding_seen = (flags != (flags & -flags));
+}
+
bool JSTokenizer::is_unescape_nesting_seen() const
{
return unescape_nest_seen;
}
+bool JSTokenizer::is_mixed_encoding_seen() const
+{
+ return mixed_encoding_seen;
+}
+
void JSTokenizer::set_block_param(bool f)
{
scope_cur().block_param = f;
ECHO;
BEGIN(dqstr);
set_ident_norm(true);
+
+ switch (func_call_type())
+ {
+ case FuncType::UNESCAPE:
+ BEGIN(unesc_dqstr);
+ break;
+ case FuncType::NOT_FUNC:
+ case FuncType::GENERAL:
+ case FuncType::CHAR_CODE:
+ BEGIN(dqstr);
+ break;
+ default:
+ assert(false);
+ return BAD_TOKEN;
+ }
+
return EOS;
}
EXEC(do_semicolon_insertion(ASI_GROUP_7))
EXEC(do_spacing(LITERAL))
ECHO;
- BEGIN(sqstr);
set_ident_norm(true);
+
+ switch (func_call_type())
+ {
+ case FuncType::UNESCAPE:
+ BEGIN(unesc_sqstr);
+ break;
+ case FuncType::NOT_FUNC:
+ case FuncType::GENERAL:
+ case FuncType::CHAR_CODE:
+ BEGIN(sqstr);
+ break;
+ default:
+ assert(false);
+ return BAD_TOKEN;
+ }
+
return EOS;
}
EXEC(do_semicolon_insertion(ASI_GROUP_7))
EXEC(do_spacing(LITERAL))
ECHO;
- BEGIN(tmpll);
set_ident_norm(true);
+
+ switch (func_call_type())
+ {
+ case FuncType::UNESCAPE:
+ BEGIN(unesc_tmpll);
+ break;
+ case FuncType::NOT_FUNC:
+ case FuncType::GENERAL:
+ case FuncType::CHAR_CODE:
+ BEGIN(tmpll);
+ break;
+ default:
+ assert(false);
+ return BAD_TOKEN;
+ }
+
return EOS;
}
check_function_nesting(f_call);
EXEC(scope_push(PARENTHESES))
set_func_call_type(f_call);
- process_punctuator();
+
+ switch (f_call)
+ {
+ case FuncType::CHAR_CODE:
+ token = LITERAL;
+ BEGIN(char_code);
+ set_char_code_str(true);
+ yyout << '\'';
+ break;
+ case FuncType::UNESCAPE:
+ skip_punctuator();
+ break;
+ case FuncType::NOT_FUNC:
+ case FuncType::GENERAL:
+ process_punctuator();
+ break;
+ default:
+ assert(false);
+ return BAD_TOKEN;
+ }
+
return EOS;
}
{
dealias_clear_mutated(false);
dealias_reset();
+
FuncType f_call = func_call_type();
+ uint32_t flags = encoding();
+ bool ch_code_str = char_code_str();
bool id_norm = ident_norm();
+
if (meta_type() != ScopeMetaType::NOT_SET)
EXEC(p_scope_pop(meta_type()))
EXEC(scope_pop(PARENTHESES))
+
if (f_call == FuncType::NOT_FUNC)
set_ident_norm(id_norm);
if (block_param())
{
EXEC(do_semicolon_insertion(ASI_GROUP_5))
}
- ECHO;
+
+ switch (f_call)
+ {
+ case FuncType::NOT_FUNC:
+ case FuncType::GENERAL:
+ ECHO;
+ break;
+ case FuncType::UNESCAPE:
+ check_mixed_encoding(flags);
+ break;
+ case FuncType::CHAR_CODE:
+ check_mixed_encoding(flags);
+ if (ch_code_str)
+ yyout << '\'';
+ break;
+ default:
+ assert(false);
+ return BAD_TOKEN;
+ }
+
token = PUNCTUATOR;
BEGIN(divop);
return EOS;
set_ident_norm(true);
}
+void JSTokenizer::escaped_unicode()
+{
+ // truncate escape symbol, get hex number only
+ std::string code(YYText() + 2);
+ yyout << unicode_to_utf8(std::stoi(code, nullptr, 16));
+}
+
+void JSTokenizer::escaped_code_point()
+{
+ // truncate escape symbols, get hex number only
+ std::string code(YYText() + 3);
+ code.resize(code.size() - 1);
+ yyout << unicode_to_utf8(std::stoi(code, nullptr, 16));
+}
+
+void JSTokenizer::escaped_url_sequence()
+{
+ // truncate escape symbol, get hex number only
+ std::string code(YYText() + 1);
+ yyout << unicode_to_utf8(std::stoi(code, nullptr, 16));
+}
+
+void JSTokenizer::dec_code_point()
+{
+ std::string code(YYText());
+ yyout << unicode_to_utf8(std::stoi(code, nullptr, 10));
+}
+
+void JSTokenizer::hex_code_point()
+{
+ std::string code(YYText());
+ yyout << unicode_to_utf8(std::stoi(code, nullptr, 16));
+}
+
+void JSTokenizer::char_code_no_match()
+{
+ BEGIN(regst);
+ yyout << '\'';
+ set_char_code_str(false);
+ yyless(0);
+ memset((void*)(states + sp), 0, sizeof(states[0]));
+}
+
JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in)
{
yy_flush_buffer(YY_CURRENT_BUFFER);
unescape_nest_seen = false;
+ mixed_encoding_seen = false;
auto r = yylex();
js_test_utils.cc
)
+add_catch_test( js_unescape_test
+ SOURCES
+ ${FLEX_js_tokenizer_OUTPUTS}
+ ../js_identifier_ctx.cc
+ ../js_normalizer.cc
+ ../streambuf.cc
+ ../util_cstring.cc
+ js_test_utils.cc
+)
+
add_catch_test( js_identifier_ctx_test
SOURCES
../js_identifier_ctx.cc
"var x='test\\u0000test';";
static const char clamav_expected12[] =
- "var x='test\\u0000test';";
+ "var x='test\u0000test';";
static const char clamav_buf13[] =
"var x\\s12345";
"document.write(unescape('test%20test";
static const char clamav_expected14[] =
- "document.write(unescape('test%20test";
+ "document.write('test test";
TEST_CASE("clamav tests", "[JSNormalizer]")
{
const char dat1[] = "var str =\"any\\";
const char dat2[] = "u1234tx\";";
const char exp1[] = "var str=\"any\\";
- const char exp2[] = "u1234tx\";";
- const char exp[] = "var str=\"any\\u1234tx\";";
+ const char exp2[] = "\u1234tx\";";
+ const char exp[] = "var str=\"any\u1234tx\";";
NORMALIZE_2(dat1, dat2, exp1, exp2);
NORM_COMBINED_2(dat1, dat2, exp);
const char dat1[] = "var str =\"any\\u";
const char dat2[] = "1234tx\";";
const char exp1[] = "var str=\"any\\u";
- const char exp2[] = "1234tx\";";
- const char exp[] = "var str=\"any\\u1234tx\";";
+ const char exp2[] = "\u1234tx\";";
+ const char exp[] = "var str=\"any\u1234tx\";";
NORMALIZE_2(dat1, dat2, exp1, exp2);
NORM_COMBINED_2(dat1, dat2, exp);
SECTION("in arguments")
{
tester.test_function_scopes({
- {"unescape(", "unescape(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+ {"unescape(", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
});
}
SECTION("separated identifier and call")
{
tester.test_function_scopes({
- {"unescape /*comment*/ (", "unescape(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+ {"unescape /*comment*/ (", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
});
}
SECTION("complete call")
{
tester.test_function_scopes({
- {"unescape('%62%61%72')", "unescape('%62%61%72')", {FuncType::NOT_FUNC}}
+ {"unescape('%62%61%72')", "'bar'", {FuncType::NOT_FUNC}}
});
}
SECTION("as named function definition")
{
tester.test_function_scopes({
- {"unescape(){", "unescape(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
+ {"function unescape(){", "function unescape(){",
+ {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
});
}
SECTION("after assignment substitution")
{
tester.test_function_scopes({
- {"var a = unescape; a(", "var var_0000=unescape;unescape(", {FuncType::NOT_FUNC,
- FuncType::UNESCAPE}}
+ {"var a = unescape; a(", "var var_0000=unescape;",
+ {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
});
}
SECTION("literal")
SECTION("as a template literal substitution")
{
tester.test_function_scopes({
- {"`literal ${unescape(", "`literal ${unescape(",
+ {"`literal ${unescape(", "`literal ${",
{FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
});
}
SECTION("in arguments")
{
tester.test_function_scopes({
- {"decodeURI(", "decodeURI(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+ {"decodeURI(", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
});
}
SECTION("separated identifier and call")
{
tester.test_function_scopes({
- {"decodeURI /*comment*/ (", "decodeURI(", {FuncType::NOT_FUNC,
- FuncType::UNESCAPE}}
+ {"decodeURI /*comment*/ (", "",
+ {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
});
}
SECTION("complete call")
{
tester.test_function_scopes({
- {"decodeURI('%62%61%72')", "decodeURI('%62%61%72')", {FuncType::NOT_FUNC}}
+ {"decodeURI('%62%61%72')", "'bar'", {FuncType::NOT_FUNC}}
});
}
SECTION("as named function definition")
{
tester.test_function_scopes({
- {"decodeURI(){", "decodeURI(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
+ {"function decodeURI(){", "function decodeURI(){",
+ {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
});
}
SECTION("after assignment substitution")
{
tester.test_function_scopes({
- {"var a = decodeURI; a(", "var var_0000=decodeURI;decodeURI(", {FuncType::NOT_FUNC,
- FuncType::UNESCAPE}}
+ {"var a = decodeURI; a(", "var var_0000=decodeURI;",
+ {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
});
}
SECTION("literal")
SECTION("as a template literal substitution")
{
tester.test_function_scopes({
- {"`literal ${decodeURI(", "`literal ${decodeURI(",
+ {"`literal ${decodeURI(", "`literal ${",
{FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
});
}
SECTION("in arguments")
{
tester.test_function_scopes({
- {"decodeURIComponent(", "decodeURIComponent(", {FuncType::NOT_FUNC,
- FuncType::UNESCAPE}}
+ {"decodeURIComponent(", "",
+ {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
});
}
SECTION("separated identifier and call")
{
tester.test_function_scopes({
- {"decodeURIComponent /*comment*/ (", "decodeURIComponent(", {FuncType::NOT_FUNC,
- FuncType::UNESCAPE}}
+ {"decodeURIComponent /*comment*/ (", "",
+ {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
});
}
SECTION("complete call")
{
tester.test_function_scopes({
- {"decodeURIComponent('%62%61%72')", "decodeURIComponent('%62%61%72')",
+ {"decodeURIComponent('%62%61%72')", "'bar'",
{FuncType::NOT_FUNC}}
});
}
SECTION("as named function definition")
{
tester.test_function_scopes({
- {"decodeURIComponent(){", "decodeURIComponent(){", {FuncType::NOT_FUNC,
- FuncType::NOT_FUNC}}
+ {"function decodeURIComponent(){", "function decodeURIComponent(){",
+ {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
});
}
SECTION("after assignment substitution")
{
tester.test_function_scopes({
- {"var a = decodeURIComponent; a(",
- "var var_0000=decodeURIComponent;decodeURIComponent(", {FuncType::NOT_FUNC,
- FuncType::UNESCAPE}}
+ {"var a = decodeURIComponent; a(", "var var_0000=decodeURIComponent;",
+ {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
});
}
SECTION("literal")
SECTION("as a template literal substitution")
{
tester.test_function_scopes({
- {"`literal ${decodeURIComponent(", "`literal ${decodeURIComponent(",
- {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+ {"`literal ${decodeURIComponent(", "`literal ${",
+ {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
});
}
}
SECTION("in arguments")
{
tester.test_function_scopes({
- {"String.fromCharCode(", "String.fromCharCode(",
+ {"String.fromCharCode(", "'",
{FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
});
}
SECTION("separated identifier and call")
{
tester.test_function_scopes({
- {"String.fromCharCode /*comment*/ (", "String.fromCharCode(",
+ {"String.fromCharCode /*comment*/ (", "'",
{FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
});
}
SECTION("complete call")
{
tester.test_function_scopes({
- {"String.fromCharCode( 65, 0x42 )", "String.fromCharCode(65,0x42)",
+ {"String.fromCharCode( 65, 0x42 )", "'AB'",
{FuncType::NOT_FUNC}}
});
}
SECTION("as named function definition")
{
tester.test_function_scopes({
- {"String.fromCharCode(){", "String.fromCharCode(){",
+ {"function String.fromCharCode(){", "function String.fromCharCode(){",
{FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
});
}
SECTION("after class name assignment substitution")
{
tester.test_function_scopes({
- {"var a = String; a.fromCharCode(", "var var_0000=String;String.fromCharCode(",
+ {"var a = String; a.fromCharCode(", "var var_0000=String;'",
{FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
});
}
{
tester.test_function_scopes({
{"var a = String.fromCharCode; a(",
- "var var_0000=String.fromCharCode;String.fromCharCode(",
+ "var var_0000=String.fromCharCode;'",
{FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
});
}
SECTION("as a template literal substitution")
{
tester.test_function_scopes({
- {"`literal ${String.fromCharCode(", "`literal ${String.fromCharCode(",
+ {"`literal ${String.fromCharCode(", "`literal ${'",
{FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
});
}
SECTION("Multiple unescape functions")
{
tester.test_function_scopes({
- {"unescape( unescape( unescape(", "unescape(unescape(unescape(",
+ {"unescape( unescape( unescape(", "",
{FuncType::NOT_FUNC, FuncType::UNESCAPE, FuncType::UNESCAPE, FuncType::UNESCAPE}}
});
CHECK(tester.is_unescape_nesting_seen());
SECTION("Multiple different unescape functions")
{
tester.test_function_scopes({
- {"unescape( decodeURI( decodeURIComponent(",
- "unescape(decodeURI(decodeURIComponent(", {FuncType::NOT_FUNC,
- FuncType::UNESCAPE,
- FuncType::UNESCAPE,
- FuncType::UNESCAPE}}
+ {"unescape( decodeURI( decodeURIComponent(", "",
+ {FuncType::NOT_FUNC, FuncType::UNESCAPE, FuncType::UNESCAPE, FuncType::UNESCAPE}}
});
CHECK(tester.is_unescape_nesting_seen());
}
SECTION("Multiple String.fromCharCode functions")
{
tester.test_function_scopes({
- {"String.fromCharCode( String.fromCharCode( String.fromCharCode(",
- "String.fromCharCode(String.fromCharCode(String.fromCharCode(",
- {FuncType::NOT_FUNC, FuncType::CHAR_CODE, FuncType::CHAR_CODE,
- FuncType::CHAR_CODE}}
+ {"String.fromCharCode( String.fromCharCode( String.fromCharCode(", "'' '' '",
+ {FuncType::NOT_FUNC, FuncType::CHAR_CODE, FuncType::CHAR_CODE, FuncType::CHAR_CODE}}
});
CHECK(!tester.is_unescape_nesting_seen());
}
SECTION("Mixed function calls")
{
tester.test_function_scopes({
- {"general( unescape( String.fromCharCode(",
- "var_0000(unescape(String.fromCharCode(",
- {FuncType::NOT_FUNC, FuncType::GENERAL, FuncType::UNESCAPE,
- FuncType::CHAR_CODE}}
+ {"general( unescape( String.fromCharCode(", "var_0000('",
+ {FuncType::NOT_FUNC, FuncType::GENERAL, FuncType::UNESCAPE, FuncType::CHAR_CODE}}
});
CHECK(!tester.is_unescape_nesting_seen());
}
SECTION("Multiple unescape functions")
{
tester.test_function_scopes({
- {"unescape( unescape( unescape( '%62%61%72' ) )",
- "unescape(unescape(unescape('%62%61%72'))", {FuncType::NOT_FUNC,
- FuncType::UNESCAPE }}
+ {"unescape( unescape( unescape( '%62%61%72' ) )", "'bar'",
+ {FuncType::NOT_FUNC, FuncType::UNESCAPE }}
});
}
SECTION("Multiple different unescape functions")
{
tester.test_function_scopes({
- {"unescape( decodeURI( decodeURIComponent( '%62%61%72' ) )",
- "unescape(decodeURI(decodeURIComponent('%62%61%72'))",
+ {"unescape( decodeURI( decodeURIComponent( '%62%61%72' ) )", "'bar'",
{FuncType::NOT_FUNC, FuncType::UNESCAPE }}
});
}
{
tester.test_function_scopes({
{"String.fromCharCode( String.fromCharCode( String.fromCharCode( 65, 0x42 ) )",
- "String.fromCharCode(String.fromCharCode(String.fromCharCode(65,0x42))",
+ "'' '' 'AB'",
{FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
});
}
{
tester.test_function_scopes({
{"general( unescape( String.fromCharCode( 65, 0x42 ) )",
- "var_0000(unescape(String.fromCharCode(65,0x42))", {FuncType::NOT_FUNC,
- FuncType::GENERAL}}
+ "var_0000('AB'",
+ {FuncType::NOT_FUNC, FuncType::GENERAL}}
});
}
}
tester.test_function_scopes({
{"un", "var_0000", {FuncType::NOT_FUNC}},
{"escape", "unescape", {FuncType::NOT_FUNC}},
- {"(", "unescape(", {FuncType::NOT_FUNC,
+ {"(", "", {FuncType::NOT_FUNC,
FuncType::UNESCAPE}},
- {")", "unescape()", {FuncType::NOT_FUNC}},
+ {")", "", {FuncType::NOT_FUNC}},
});
}
SECTION("split between identifier and parenthesis")
{
tester.test_function_scopes({
{"decodeURI", "decodeURI", {FuncType::NOT_FUNC}},
- {"(", "decodeURI(", {FuncType::NOT_FUNC,
+ {"(", "", {FuncType::NOT_FUNC,
FuncType::UNESCAPE}},
- {")", "decodeURI()", {FuncType::NOT_FUNC}},
+ {")", "", {FuncType::NOT_FUNC}},
});
}
SECTION("comment between identifier and parenthesis")
tester.test_function_scopes({
{"unescape", "unescape", {FuncType::NOT_FUNC}},
{"//String.fromCharCode\n", "unescape", {FuncType::NOT_FUNC}},
- {"(", "unescape(", {FuncType::NOT_FUNC,
+ {"(", "", {FuncType::NOT_FUNC,
FuncType::UNESCAPE}},
- {")", "unescape()", {FuncType::NOT_FUNC}},
+ {")", "", {FuncType::NOT_FUNC}},
});
}
SECTION("split in arguments")
tester.test_function_scopes({
{"String", "String", {FuncType::NOT_FUNC}},
{".fromCharCode", "String.fromCharCode", {FuncType::NOT_FUNC}},
- {"(`", "String.fromCharCode(`", {FuncType::NOT_FUNC,
+ {"(`", "'' `", {FuncType::NOT_FUNC,
FuncType::CHAR_CODE}},
- {"un", "String.fromCharCode(`un", {FuncType::NOT_FUNC,
+ {"un", "'' `un", {FuncType::NOT_FUNC,
FuncType::CHAR_CODE}},
- {"escape(", "String.fromCharCode(`unescape(", {FuncType::NOT_FUNC,
+ {"escape(", "'' `unescape(", {FuncType::NOT_FUNC,
FuncType::CHAR_CODE}},
- {"`)", "String.fromCharCode(`unescape(`)", {FuncType::NOT_FUNC}},
+ {"`)", "'' `unescape(`", {FuncType::NOT_FUNC}},
});
}
SECTION("Nesting - Mixed function calls")
{"decode", "var_0000", {FuncType::NOT_FUNC}},
{"URI", "decodeURI", {FuncType::NOT_FUNC}},
{"Component", "decodeURIComponent", {FuncType::NOT_FUNC}},
- {"(", "decodeURIComponent(", {FuncType::NOT_FUNC,
+ {"(", "", {FuncType::NOT_FUNC,
FuncType::UNESCAPE}},
- {" a, ", "decodeURIComponent(var_0001,",
+ {" a, ", "var_0001,",
{FuncType::NOT_FUNC,
FuncType::UNESCAPE}},
- {" String.fromCharCode( ar",
- "decodeURIComponent(var_0001,String.fromCharCode(var_0002",
+ {" String.fromCharCode( ar", "var_0001,'' var_0002",
{FuncType::NOT_FUNC,
FuncType::UNESCAPE,
FuncType::CHAR_CODE}},
- {"g ), b, foo",
- "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0005",
+
+ {"g ), b, foo", "var_0001,'' var_0003,var_0004,var_0005",
{FuncType::NOT_FUNC,
FuncType::UNESCAPE}},
- {"bar( ",
- "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0006(",
+
+ {"bar( ", "var_0001,'' var_0003,var_0004,var_0006(",
{FuncType::NOT_FUNC,
FuncType::UNESCAPE,
FuncType::GENERAL}},
- {"))",
- "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0006())",
+
+ {"))", "var_0001,'' var_0003,var_0004,var_0006()",
{FuncType::NOT_FUNC}}
});
}
return normalizer_wo_ident.normalize(src_wo_semicolons, src_len);
};
}
+
+TEST_CASE("JS Normalizer, unescape", "[JSNormalizer]")
+{
+ auto str_unescape = make_input("'", "\\u0061", "'", norm_depth);
+ auto f_unescape = make_input_repeat("unescape('')", norm_depth);
+ const char* src_str_unescape = str_unescape.c_str();
+ const char* src_f_unescape = f_unescape.c_str();
+ size_t src_len = norm_depth;
+
+ JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
+ JSNormalizer norm(ident_ctx, unlim_depth, max_template_nesting, norm_depth);
+
+ REQUIRE(norm_ret(norm, str_unescape) == JSTokenizer::SCRIPT_ENDED);
+ BENCHMARK("unescape sequence")
+ {
+ norm.rewind_output();
+ return norm.normalize(src_str_unescape, src_len);
+ };
+
+ REQUIRE(norm_ret(norm, f_unescape) == JSTokenizer::SCRIPT_ENDED);
+ BENCHMARK("unescape function tracking")
+ {
+ norm.rewind_output();
+ return norm.normalize(src_f_unescape, src_len);
+ };
+}
+
#endif // BENCHMARK_TEST
return normalizer.is_unescape_nesting_seen();
}
-void test_scope(const char* context, std::list<JSProgramScopeType> stack)
+void test_scope(const char* context, const std::list<JSProgramScopeType>& stack)
{
std::string buf(context);
buf += "</script>";
CHECK(result_buf == expected);
}
+void test_normalization_mixed_encoding(const char* source, const char* expected)
+{
+ JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
+ JSNormalizer normalizer(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
+ auto ret = normalizer.normalize(source, strlen(source));
+ std::string result_buf(normalizer.get_script(), normalizer.script_size());
+ CHECK(ret == JSTokenizer::JSRet::SCRIPT_CONTINUE);
+ CHECK(normalizer.is_mixed_encoding_seen());
+ CHECK(result_buf == expected);
+}
+
void test_normalization(const std::vector<PduCase>& pdus)
{
JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
}
}
-void test_normalization(std::list<ScopedPduCase> pdus)
+void test_normalization(const std::list<ScopedPduCase>& pdus)
{
JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
JSNormalizer normalizer(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
snort::JSNormalizer normalizer;
};
-void test_scope(const char* context, std::list<JSProgramScopeType> stack);
+void test_scope(const char* context, const std::list<JSProgramScopeType>& stack);
void test_normalization(const char* source, const char* expected);
void test_normalization_bad(const char* source, const char* expected, JSTokenizer::JSRet eret);
+void test_normalization_mixed_encoding(const char* source, const char* expected);
typedef std::pair<const char*, const char*> PduCase;
// source, expected for a single PDU
void test_normalization(const std::vector<PduCase>& pdus);
typedef std::tuple<const char*,const char*, std::list<JSProgramScopeType>> ScopedPduCase;
// source, expected, and current scope type stack for a single PDU
-void test_normalization(std::list<ScopedPduCase> pdus);
+void test_normalization(const std::list<ScopedPduCase>& pdus);
#endif // JS_TEST_UTILS_H
--- /dev/null
+//--------------------------------------------------------------------------
+// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation. You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_unescape_test.cc author Volodymyr Horban <vhorban@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "catch/catch.hpp"
+
+#include "utils/js_identifier_ctx.h"
+#include "utils/js_normalizer.h"
+
+#include "js_test_utils.h"
+
+#ifdef CATCH_TEST_BUILD
+
+TEST_CASE("Sequence parsing", "[JSNormalizer]")
+{
+ SECTION("\\xXX")
+ {
+ test_normalization(
+ "'\\x01'",
+ "'\u0001'"
+ );
+ test_normalization(
+ "'\\x23'",
+ "'\u0023'"
+ );
+ test_normalization(
+ "'\\x45'",
+ "'\u0045'"
+ );
+ test_normalization(
+ "'\\x67'",
+ "'\u0067'"
+ );
+ test_normalization(
+ "'\\x89'",
+ "'\u0089'"
+ );
+ test_normalization(
+ "'\\xaA'",
+ "'\u00aA'"
+ );
+ test_normalization(
+ "'\\xbB'",
+ "'\u00bB'"
+ );
+ test_normalization(
+ "'\\xcC'",
+ "'\u00cC'"
+ );
+ test_normalization(
+ "'\\xdD'",
+ "'\u00dD'"
+ );
+ test_normalization(
+ "'\\xeE'",
+ "'\u00eE'"
+ );
+ test_normalization(
+ "'\\xfF'",
+ "'\u00fF'"
+ );
+ }
+
+ SECTION("\\uXXXX")
+ {
+ test_normalization(
+ "'\\u0123'",
+ "'\u0123'"
+ );
+ test_normalization(
+ "'\\u4567'",
+ "'\u4567'"
+ );
+ test_normalization(
+ "'\\u89aA'",
+ "'\u89aA'"
+ );
+ test_normalization(
+ "'\\ubBcC'",
+ "'\ubBcC'"
+ );
+ test_normalization(
+ "'\\ueEfF'",
+ "'\ueEfF'"
+ );
+ }
+
+ SECTION("\\u{XXXX}")
+ {
+ test_normalization(
+ "'\\u{0123}'",
+ "'\u0123'"
+ );
+ test_normalization(
+ "'\\u{4567}'",
+ "'\u4567'"
+ );
+ test_normalization(
+ "'\\u{89aA}'",
+ "'\u89aA'"
+ );
+ test_normalization(
+ "'\\u{bBcC}'",
+ "'\ubBcC'"
+ );
+ test_normalization(
+ "'\\u{eEfF}'",
+ "'\ueEfF'"
+ );
+ }
+
+ SECTION("%XX")
+ {
+ test_normalization(
+ "unescape('%01')",
+ "'\u0001'"
+ );
+ test_normalization(
+ "unescape('%23')",
+ "'\u0023'"
+ );
+ test_normalization(
+ "unescape('%45')",
+ "'\u0045'"
+ );
+ test_normalization(
+ "unescape('%67')",
+ "'\u0067'"
+ );
+ test_normalization(
+ "unescape('%89')",
+ "'\u0089'"
+ );
+ test_normalization(
+ "unescape('%aA')",
+ "'\u00aA'"
+ );
+ test_normalization(
+ "unescape('%bB')",
+ "'\u00bB'"
+ );
+ test_normalization(
+ "unescape('%cC')",
+ "'\u00cC'"
+ );
+ test_normalization(
+ "unescape('%dD')",
+ "'\u00dD'"
+ );
+ test_normalization(
+ "unescape('%eE')",
+ "'\u00eE'"
+ );
+ test_normalization(
+ "unescape('%fF')",
+ "'\u00fF'"
+ );
+ }
+
+ SECTION("\\uXX")
+ {
+ test_normalization(
+ "unescape('\\u01')",
+ "'\u0001'"
+ );
+ test_normalization(
+ "unescape('%23')",
+ "'\u0023'"
+ );
+ test_normalization(
+ "unescape('\\u45')",
+ "'\u0045'"
+ );
+ test_normalization(
+ "unescape('\\u67')",
+ "'\u0067'"
+ );
+ test_normalization(
+ "unescape('\\u89')",
+ "'\u0089'"
+ );
+ test_normalization(
+ "unescape('\\uaA')",
+ "'\u00aA'"
+ );
+ test_normalization(
+ "unescape('\\ubB')",
+ "'\u00bB'"
+ );
+ test_normalization(
+ "unescape('\\ucC')",
+ "'\u00cC'"
+ );
+ test_normalization(
+ "unescape('\\udD')",
+ "'\u00dD'"
+ );
+ test_normalization(
+ "unescape('\\ueE')",
+ "'\u00eE'"
+ );
+ test_normalization(
+ "unescape('\\ufF')",
+ "'\u00fF'"
+ );
+ }
+
+ SECTION("%uXXXX")
+ {
+ test_normalization(
+ "unescape('%u0123')",
+ "'\u0123'"
+ );
+ test_normalization(
+ "unescape('%u4567')",
+ "'\u4567'"
+ );
+ test_normalization(
+ "unescape('%u89aA')",
+ "'\u89aA'"
+ );
+ test_normalization(
+ "unescape('%ubBcC')",
+ "'\ubBcC'"
+ );
+ test_normalization(
+ "unescape('%ueEfF')",
+ "'\ueEfF'"
+ );
+ }
+
+ SECTION("decimal")
+ {
+ test_normalization(
+ "String.fromCharCode(1)",
+ "'\u0001'"
+ );
+ test_normalization(
+ "String.fromCharCode(12)",
+ "'\u000c'"
+ );
+ test_normalization(
+ "String.fromCharCode(345)",
+ "'\u0159'"
+ );
+ test_normalization(
+ "String.fromCharCode(6789)",
+ "'\u1a85'"
+ );
+ test_normalization(
+ "String.fromCharCode(1000)",
+ "'\u03e8'"
+ );
+ test_normalization(
+ "String.fromCharCode(0001)",
+ "'\x01'"
+ );
+ test_normalization(
+ "String.fromCharCode(65536)",
+ "'\uffff'"
+ );
+ }
+
+ SECTION("hexadecimal")
+ {
+ test_normalization(
+ "String.fromCharCode(0x0001)",
+ "'\u0001'"
+ );
+ test_normalization(
+ "String.fromCharCode(0X0001)",
+ "'\u0001'"
+ );
+ test_normalization(
+ "String.fromCharCode(0x1234)",
+ "'\u1234'"
+ );
+ test_normalization(
+ "String.fromCharCode(0X5678)",
+ "'\u5678'"
+ );
+ test_normalization(
+ "String.fromCharCode(0x9aAb)",
+ "'\u9aAb'"
+ );
+ test_normalization(
+ "String.fromCharCode(0x9aAb)",
+ "'\u9aAb'"
+ );
+ test_normalization(
+ "String.fromCharCode(0xBcCd)",
+ "'\uBcCd'"
+ );
+ test_normalization(
+ "String.fromCharCode(0XeEfF)",
+ "'\ueEfF'"
+ );
+ test_normalization(
+ "String.fromCharCode(0x10000)",
+ "'\uffff'"
+ );
+ test_normalization(
+ "String.fromCharCode(0X10000)",
+ "'\uffff'"
+ );
+ }
+}
+
+TEST_CASE("Universal sequences", "[JSNormalizer]")
+{
+ SECTION("\\uXXXX")
+ {
+ test_normalization(
+ "\\u0065\\u0076\\u0061\\u006C () ;",
+ "eval();"
+ );
+ test_normalization(
+ "'\\u0062\\u0061\\u0072'",
+ "'bar'"
+ );
+ test_normalization(
+ "\"\\u0062\\u0061\\u0072\"",
+ "\"bar\""
+ );
+ test_normalization(
+ "`\\u0062\\u0061\\u0072`",
+ "`bar`"
+ );
+ test_normalization(
+ "/\\u0062\\u0061\\u0072/",
+ "/bar/"
+ );
+ }
+
+ SECTION("\\xXX")
+ {
+ test_normalization(
+ "'\\x62\\x61\\x72'",
+ "'bar'"
+ );
+ test_normalization(
+ "\"\\x62\\x61\\x72\"",
+ "\"bar\""
+ );
+ test_normalization(
+ "`\\x62\\x61\\x72`",
+ "`bar`"
+ );
+ test_normalization(
+ "/\\x62\\x61\\x72/",
+ "/bar/"
+ );
+ }
+
+ SECTION("\\u{XXXX}")
+ {
+ test_normalization(
+ "\\u{0065}\\u{0076}\\u{0061}\\u{006C} () ;",
+ "eval();"
+ );
+ test_normalization(
+ "'\\u{0062}\\u{0061}\\u{0072}'",
+ "'bar'"
+ );
+ test_normalization(
+ "\"\\u{0062}\\u{0061}\\u{0072}\"",
+ "\"bar\""
+ );
+ test_normalization(
+ "`\\u{0062}\\u{0061}\\u{0072}`",
+ "`bar`"
+ );
+ }
+}
+
+TEST_CASE("unescape()", "[JSNormalizer]")
+{
+ SECTION("%XX")
+ {
+ test_normalization(
+ "unescape('%62%61%72')",
+ "'bar'"
+ );
+ test_normalization(
+ "unescape(\"%62%61%72\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "unescape(`%62%61%72`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("%uXXXX")
+ {
+ test_normalization(
+ "unescape('%u0062%u0061%u0072')",
+ "'bar'"
+ );
+ test_normalization(
+ "unescape(\"%u0062%u0061%u0072\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "unescape(`%u0062%u0061%u0072`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("\\uXX")
+ {
+ test_normalization(
+ "unescape('\\u62\\u61\\u72')",
+ "'bar'"
+ );
+ test_normalization(
+ "unescape(\"\\u62\\u61\\u72\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "unescape(`\\u62\\u61\\u72`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("\\uXXXX")
+ {
+ test_normalization(
+ "unescape('\\u0062\\u0061\\u0072')",
+ "'bar'"
+ );
+ test_normalization(
+ "unescape(\"\\u0062\\u0061\\u0072\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "unescape(`\\u0062\\u0061\\u0072`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("\\xXX")
+ {
+ test_normalization(
+ "unescape('\\x62\\x61\\x72')",
+ "'bar'"
+ );
+ test_normalization(
+ "unescape(\"\\x62\\x61\\x72\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "unescape(`\\x62\\x61\\x72`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("\\u{XXXX}")
+ {
+ test_normalization(
+ "unescape('\\u{0062}\\u{0061}\\u{0072}')",
+ "'bar'"
+ );
+ test_normalization(
+ "unescape(\"\\u{0062}\\u{0061}\\u{0072}\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "unescape(`\\u{0062}\\u{0061}\\u{0072}`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("mixed sequence")
+ {
+ test_normalization_mixed_encoding(
+ "unescape('\\u62%61%72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "unescape('\\x62%u0061%72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "unescape('\\x62\\u61\\u72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "unescape('%u0062\\u0061%72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "unescape('\\u0062\\x61%72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "unescape('\\u0062\\u{0061}\\x72')",
+ "'bar'"
+ );
+ }
+}
+
+TEST_CASE("decodeURI()", "[JSNormalizer]")
+{
+ SECTION("%XX")
+ {
+ test_normalization(
+ "decodeURI('%62%61%72')",
+ "'bar'"
+ );
+ test_normalization(
+ "decodeURI(\"%62%61%72\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "decodeURI(`%62%61%72`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("%uXXXX")
+ {
+ test_normalization(
+ "decodeURI('%u0062%u0061%u0072')",
+ "'bar'"
+ );
+ test_normalization(
+ "decodeURI(\"%u0062%u0061%u0072\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "decodeURI(`%u0062%u0061%u0072`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("\\uXX")
+ {
+ test_normalization(
+ "decodeURI('\\u62\\u61\\u72')",
+ "'bar'"
+ );
+ test_normalization(
+ "decodeURI(\"\\u62\\u61\\u72\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "decodeURI(`\\u62\\u61\\u72`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("\\uXXXX")
+ {
+ test_normalization(
+ "decodeURI('\\u0062\\u0061\\u0072')",
+ "'bar'"
+ );
+ test_normalization(
+ "decodeURI(\"\\u0062\\u0061\\u0072\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "decodeURI(`\\u0062\\u0061\\u0072`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("\\xXX")
+ {
+ test_normalization(
+ "decodeURI('\\x62\\x61\\x72')",
+ "'bar'"
+ );
+ test_normalization(
+ "decodeURI(\"\\x62\\x61\\x72\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "decodeURI(`\\x62\\x61\\x72`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("\\u{XXXX}")
+ {
+ test_normalization(
+ "decodeURI('\\u{0062}\\u{0061}\\u{0072}')",
+ "'bar'"
+ );
+ test_normalization(
+ "decodeURI(\"\\u{0062}\\u{0061}\\u{0072}\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "decodeURI(`\\u{0062}\\u{0061}\\u{0072}`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("mixed sequence")
+ {
+ test_normalization_mixed_encoding(
+ "decodeURI('\\u62%61%72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "decodeURI('\\x62%u0061%72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "decodeURI('\\x62\\u61\\u72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "decodeURI('%u0062\\u0061%72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "decodeURI('\\u0062\\x61%72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "decodeURI('\\u0062\\u{0061}\\x72')",
+ "'bar'"
+ );
+ }
+}
+
+TEST_CASE("decodeURIComponent()", "[JSNormalizer]")
+{
+ SECTION("%XX")
+ {
+ test_normalization(
+ "decodeURIComponent('%62%61%72')",
+ "'bar'"
+ );
+ test_normalization(
+ "decodeURIComponent(\"%62%61%72\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "decodeURIComponent(`%62%61%72`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("%uXXXX")
+ {
+ test_normalization(
+ "decodeURIComponent('%u0062%u0061%u0072')",
+ "'bar'"
+ );
+ test_normalization(
+ "decodeURIComponent(\"%u0062%u0061%u0072\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "decodeURIComponent(`%u0062%u0061%u0072`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("\\uXX")
+ {
+ test_normalization(
+ "decodeURIComponent('\\u62\\u61\\u72')",
+ "'bar'"
+ );
+ test_normalization(
+ "decodeURIComponent(\"\\u62\\u61\\u72\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "decodeURIComponent(`\\u62\\u61\\u72`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("\\uXXXX")
+ {
+ test_normalization(
+ "decodeURIComponent('\\u0062\\u0061\\u0072')",
+ "'bar'"
+ );
+ test_normalization(
+ "decodeURIComponent(\"\\u0062\\u0061\\u0072\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "decodeURIComponent(`\\u0062\\u0061\\u0072`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("\\xXX")
+ {
+ test_normalization(
+ "decodeURIComponent('\\x62\\x61\\x72')",
+ "'bar'"
+ );
+ test_normalization(
+ "decodeURIComponent(\"\\x62\\x61\\x72\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "decodeURIComponent(`\\x62\\x61\\x72`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("\\u{XXXX}")
+ {
+ test_normalization(
+ "decodeURIComponent('\\u{0062}\\u{0061}\\u{0072}')",
+ "'bar'"
+ );
+ test_normalization(
+ "decodeURIComponent(\"\\u{0062}\\u{0061}\\u{0072}\")",
+ "\"bar\""
+ );
+ test_normalization(
+ "decodeURIComponent(`\\u{0062}\\u{0061}\\u{0072}`)",
+ "`bar`"
+ );
+ }
+
+ SECTION("mixed sequence")
+ {
+ test_normalization_mixed_encoding(
+ "decodeURIComponent('\\u62%61%72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "decodeURIComponent('\\x62%u0061%72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "decodeURIComponent('\\x62\\u61\\u72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "decodeURIComponent('%u0062\\u0061%72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "decodeURIComponent('\\u0062\\x61%72')",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "decodeURIComponent('\\u0062\\u{0061}\\x72')",
+ "'bar'"
+ );
+ }
+}
+
+TEST_CASE("String.fromCharCode()", "[JSNormalizer]")
+{
+ SECTION("decimal")
+ {
+ test_normalization(
+ "String.fromCharCode(98, 97, 114)",
+ "'bar'"
+ );
+ }
+
+ SECTION("hexadecimal")
+ {
+ test_normalization(
+ "String.fromCharCode(0x62, 0x61, 0x72)",
+ "'bar'"
+ );
+
+ test_normalization(
+ "String.fromCharCode(0x0062, 0x0061, 0x0072)",
+ "'bar'"
+ );
+ }
+
+ SECTION("mixed sequence")
+ {
+ test_normalization_mixed_encoding(
+ "String.fromCharCode(98, 97, 0x72)",
+ "'bar'"
+ );
+
+ test_normalization_mixed_encoding(
+ "String.fromCharCode(0x62, 97, 114)",
+ "'bar'"
+ );
+ }
+}
+
+TEST_CASE("Split", "[JSNormalizer]")
+{
+ SECTION("unescape()")
+ {
+ test_normalization({
+ { "unescape(", "" },
+ { ")", "" }
+ });
+
+ test_normalization({
+ { "unescape('%62", "'b" },
+ { "%61%72')", "'bar'" }
+ });
+
+ test_normalization({
+ { "unescape('%62%61", "'ba" },
+ { "%72')", "'bar'" }
+ });
+
+ test_normalization({
+ { "unescape('%62%61%72", "'bar" },
+ { "')", "'bar'" }
+ });
+
+ test_normalization({
+ { "unescape('%u", "'%u" },
+ { "0062%u0061%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "unescape('%u00", "'%u00" },
+ { "62%u0061%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "unescape('%u0062", "'b" },
+ { "%u0061%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "unescape('%u0062%u", "'b%u" },
+ { "0061%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "unescape('%u0062%u00", "'b%u00" },
+ { "61%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "unescape('%u0062%u0061", "'ba" },
+ { "%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "unescape('%u0062%u0061%u", "'ba%u" },
+ { "0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "unescape('%u0062%u0061%u00", "'ba%u00" },
+ { "72')", "'bar'" }
+ });
+
+ test_normalization({
+ { "unescape('%u0062%u0061%u0072", "'bar" },
+ { "')", "'bar'" }
+ });
+
+ test_normalization({
+ { "unescape('%u0062", "'b" },
+ { "%u0061", "'ba" },
+ { "%u0072')", "'bar'" }
+ });
+ }
+
+ SECTION("decodeURI()")
+ {
+ test_normalization({
+ { "decodeURI(", "" },
+ { ")", "" }
+ });
+
+ test_normalization({
+ { "decodeURI('%u", "'%u" },
+ { "0062%u0061%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURI('%u00", "'%u00" },
+ { "62%u0061%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURI('%u0062%u", "'b%u" },
+ { "0061%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURI('%u0062%u00", "'b%u00" },
+ { "61%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURI('%u0062%u0061", "'ba" },
+ { "%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURI('%u0062%u0061%u", "'ba%u" },
+ { "0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURI('%u0062%u0061%u00", "'ba%u00" },
+ { "72')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURI('%u0062%u0061%u0072", "'bar" },
+ { "')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURI('%u0062", "'b" },
+ { "%u0061", "'ba" },
+ { "%u0072')", "'bar'" }
+ });
+ }
+
+ SECTION("decodeURIComponent()")
+ {
+ test_normalization({
+ { "decodeURIComponent(", "" },
+ { ")", "" }
+ });
+
+ test_normalization({
+ { "decodeURIComponent('%u", "'%u" },
+ { "0062%u0061%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURIComponent('%u00", "'%u00" },
+ { "62%u0061%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURIComponent('%u0062%u", "'b%u" },
+ { "0061%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURIComponent('%u0062%u00", "'b%u00" },
+ { "61%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURIComponent('%u0062%u0061", "'ba" },
+ { "%u0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURIComponent('%u0062%u0061%u", "'ba%u" },
+ { "0072')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURIComponent('%u0062%u0061%u00", "'ba%u00" },
+ { "72')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURIComponent('%u0062%u0061%u0072", "'bar" },
+ { "')", "'bar'" }
+ });
+
+ test_normalization({
+ { "decodeURIComponent('%u0062", "'b" },
+ { "%u0061", "'ba" },
+ { "%u0072')", "'bar'" }
+ });
+ }
+
+ SECTION("String.fromCharCode()")
+ {
+ test_normalization({
+ { "String.fromCharCode(", "'" },
+ { ")", "''" }
+ });
+
+ test_normalization({
+ { "String.fromCharCode(9", "'\u0009" },
+ { "8, 97, 114)", "'bar'" }
+ });
+
+ test_normalization({
+ { "String.fromCharCode(98,", "'b" },
+ { "97, 114)", "'bar'" }
+ });
+
+ test_normalization({
+ { "String.fromCharCode(98, 97", "'ba" },
+ { ",114)", "'bar'" }
+ });
+
+ test_normalization({
+ { "String.fromCharCode(98, 97, 114", "'bar" },
+ { ")", "'bar'" }
+ });
+
+ test_normalization({
+ { "String.fromCharCode(0x0062", "'b" },
+ { ",0x0061, 0x0072)", "'bar'" }
+ });
+
+ test_normalization({
+ { "String.fromCharCode(0x0062, 0x0061", "'ba" },
+ { ", 0x0072)", "'bar'" }
+ });
+
+ test_normalization({
+ { "String.fromCharCode(0x0062, 0x0061, 0x0072", "'bar" },
+ { ")", "'bar'" }
+ });
+
+ test_normalization({
+ { "String.fromCharCode(0x0062,", "'b" },
+ { "0x0061,", "'ba" },
+ { "0x72)", "'bar'" }
+ });
+
+ test_normalization({
+ { "String.fromCharCode(98,", "'b" },
+ { "97,", "'ba" },
+ { "114)", "'bar'" }
+ });
+ }
+}
+
+TEST_CASE("Mixed input", "[JSNormalizer]")
+{
+ SECTION("string")
+ {
+ test_normalization(
+ "unescape ( ' A \\x62 B \\x61 C \\x72 ' ) ;",
+ "' A b B a C r ';"
+ );
+ test_normalization(
+ "unescape ( ' \\x62ar b\\x61r ba\\x72 ' ) ;",
+ "' bar bar bar ';"
+ );
+ test_normalization(
+ "unescape ( '\\x62\\x61\\x72', '\\x62\\x61\\x72' ) ;",
+ "'bar','bar';"
+ );
+ test_normalization(
+ "unescape ( '\\x62\\x61\\x72' + '\\x62\\x61\\x72' ) ;",
+ "'bar'+'bar';"
+ );
+ test_normalization_mixed_encoding(
+ "unescape ( '\\x62\\x61\\x72' + '\\u62\\u61\\u72' ) ;",
+ "'bar'+'bar';"
+ );
+ }
+
+ SECTION("literal")
+ {
+ test_normalization(
+ "unescape ( 2, '\\x62\\x61\\x72', 2 ) ;",
+ "2,'bar',2;"
+ );
+ }
+
+ SECTION("identifier")
+ {
+ test_normalization(
+ "unescape ( f(\"A\\u20B\\u20C\"), eval(\"\\u66\\u6f\\u6f\"), \"\\u66\\u6f\\u6f\" ) ;",
+ "var_0000(\"A\\u20B\\u20C\"),eval(\"\\u66\\u6f\\u6f\"),\"foo\";"
+ );
+ test_normalization_mixed_encoding(
+ "String.fromCharCode (114, 0x72, eval('123'), 114, 0x72) ;",
+ "'rr' eval('123'),114,0x72;"
+ );
+ }
+
+ SECTION("comment")
+ {
+ test_normalization(
+ "String.fromCharCode(0x62, \n 0x61, // comment \n 0x72) ;",
+ "'bar';"
+ );
+ test_normalization(
+ "String.fromCharCode(0x62, \t 0x61, /* comment */ 0x72) ;",
+ "'bar';"
+ );
+ test_normalization(
+ "String.fromCharCode(0x62, \r 0x61, <!-- HTML comment \r 0x72) ;",
+ "'bar';"
+ );
+ }
+
+ SECTION("nested")
+ {
+ test_normalization(
+ "unescape('\\x62\\x61\\x72'+unescape('\\x62\\x61\\x72')+decodeURI('\\u62\\u61\\u72')) ;",
+ "'bar'+'bar'+'bar';"
+ );
+ test_normalization(
+ "document.write(unescape('%62%61%72')) ;",
+ "document.write('bar');"
+ );
+ }
+}
+
+#endif // CATCH_TEST_BUILD
+