The HTTP message body is gzip encoded and the FEXTRA flag is set in the gzip header.
+119:279
+
+Detected nesting of unescape functions(unescape, decodeURI, decodeURIComponent) in JavaScript code.
+Indicates that this code most likely has more than one level of obfuscation. This alert is raised
+by the enhanced JavaScript normalizer.
+
+119:280
+
+Detected more than one encoding within unescape function call arguments in JavaScript code.
+This alert is raised by the enhanced JavaScript normalizer.
+
121:1
Invalid flag set on HTTP/2 frame header
The default list of ignore-identifiers is present in "snort_defaults.lua".
+Unescape function names should remain intact in the output. They ought to be
+included in the ignore list. If for some reason the user wants to disable unescape
+related features, then removing function's name from the ignore list does the trick.
+
===== xff_headers
This configuration supports defining custom x-forwarded-for type headers. In a
var a = console.log
a("hello") // will be substituted to 'console.log("hello")'
+In addition to the scope tracking, JS Normalizer specifically tracks unicode unescape
+functions(unescape, decodeURI, decodeURIComponent). This allows detection of
+unescape functions nested within other unescape functions, which is a potential
+indicator of a multilevel obfuscation. The definition of a function call depends on
+identifier substitution, so such identifiers must be included in the ignore list in
+order to use this feature.
+
JS Normalizer's syntax parser follows ECMA-262 standard. For various features,
tracking of variable scope and individual brackets is done in accordance to the standard.
Additionally, Normalizer enforces standard limits on HTML content in JavaScript:
INF_INVALID_SUBVERSION = 133,
INF_VERSION_0 = 134,
INF_GZIP_FEXTRA = 135,
+ INF_JS_UNESCAPE_NEST = 136,
+ INF_JS_MULTIPLE_ENC = 137,
INF__MAX_VALUE
};
EVENT_VERSION_0 = 276,
EVENT_VERSION_HIGHER_THAN_1 = 277,
EVENT_GZIP_FEXTRA = 278,
+ EVENT_JS_UNESCAPE_NEST = 279,
+ EVENT_JS_MULTIPLE_ENC = 280,
EVENT__MAX_VALUE
};
break;
}
+ if (js_ctx.is_unescape_nesting_seen())
+ {
+ *infractions += INF_JS_UNESCAPE_NEST;
+ events->create_event(EVENT_JS_UNESCAPE_NEST);
+ }
+
if (ssn->js_built_in_event)
break;
}
*infractions += INF_JS_CODE_IN_EXTERNAL;
events->create_event(EVENT_JS_CODE_IN_EXTERNAL);
}
+ if (js_ctx.is_unescape_nesting_seen())
+ {
+ *infractions += INF_JS_UNESCAPE_NEST;
+ events->create_event(EVENT_JS_UNESCAPE_NEST);
+ }
script_continue = ret == JSTokenizer::SCRIPT_CONTINUE;
}
{ EVENT_VERSION_0, "HTTP version in start line is 0" },
{ EVENT_VERSION_HIGHER_THAN_1, "HTTP version in start line is higher than 1" },
{ EVENT_GZIP_FEXTRA, "HTTP gzip body with the FEXTRA flag set" },
+ { EVENT_JS_UNESCAPE_NEST, "nested unescape functions in JavaScript code" },
+ { EVENT_JS_MULTIPLE_ENC, "mixing of escape formats in JavaScript code" },
{ 0, nullptr }
};
static size_t size()
{ return sizeof(JSNormalizer) + 16834; /* YY_BUF_SIZE */ }
+ bool is_unescape_nesting_seen() const
+ { return tokenizer.is_unescape_nesting_seen(); }
+
#ifdef CATCH_TEST_BUILD
const char* get_tmp_buf() const
{ return tmp_buf; }
size_t get_tmp_buf_size() const
{ return tmp_buf_size; }
+ const JSTokenizer& get_tokenizer() const
+ { return tokenizer; }
#endif
#ifdef BENCHMARK_TEST
enum JSProgramScopeType : unsigned int;
class JSIdentifierCtxBase;
-
+#ifdef CATCH_TEST_BUILD
+class JSTokenizerTester;
+#endif
class JSTokenizer : public yyFlexLexer
{
private:
OBJECT, // object definition, class definition
SCOPE_META_TYPE_MAX
};
+ enum FuncType
+ {
+ NOT_FUNC = 0,
+ GENERAL,
+ UNESCAPE,
+ CHAR_CODE
+ };
struct Scope
{
Scope(ScopeType t) :
- type(t), meta_type(ScopeMetaType::NOT_SET), ident_norm(true), func_call(false),
- block_param(false), do_loop(false)
+ type(t), meta_type(ScopeMetaType::NOT_SET), func_call_type(FuncType::NOT_FUNC),
+ ident_norm(true), block_param(false), do_loop(false)
{}
ScopeType type;
ScopeMetaType meta_type;
+ FuncType func_call_type;
bool ident_norm;
- bool func_call;
bool block_param;
bool do_loop;
};
JSRet process(size_t& bytes_in);
+ bool is_unescape_nesting_seen() const;
protected:
[[noreturn]] void LexerError(const char* msg) override
{ snort::FatalError("%s", msg); }
ScopeMetaType meta_type();
void set_ident_norm(bool);
bool ident_norm();
- void set_func_call(bool);
- bool func_call();
+ void set_func_call_type(FuncType);
+ FuncType func_call_type();
+ FuncType detect_func_type();
+ void check_function_nesting(FuncType);
void set_block_param(bool);
bool block_param();
void set_do_loop(bool);
void dealias_append();
void dealias_finalize();
+ //rule handlers
+ JSRet html_closing_script_tag();
+ JSRet literal_dq_string_start();
+ JSRet literal_sq_string_start();
+ JSRet literal_template_start();
+ JSRet literal_regex_start();
+ void div_assignment_operator();
+ JSRet open_brace();
+ JSRet close_brace();
+ JSRet open_parenthesis();
+ JSRet close_parenthesis();
+ JSRet open_bracket();
+ JSRet close_bracket();
+ JSRet punctuator_prefix();
+ void dot_accessor();
+ JSRet punctuator_arrow();
+ JSRet punctuator_semicolon();
+ void punctuator_colon();
+ void operator_comparison();
+ void operator_complex_assignment();
+ void operator_logical();
+ void operator_shift();
+ void punctuator_comma();
+ JSRet use_strict_directive();
+ JSRet use_strict_directive_sc();
+ JSRet keyword_var_decl();
+ JSRet keyword_function();
+ JSRet keyword_catch();
+ JSRet keyword_while();
+ JSRet keyword_B();
+ JSRet keyword_BA();
+ JSRet keyword_finally();
+ JSRet keyword_do();
+ JSRet keyword_class();
+ JSRet keyword_other();
+ void operator_assignment();
+ JSRet operator_prefix();
+ JSRet operator_incr_decr();
+ JSRet general_operator();
+ JSRet general_literal();
+ JSRet general_identifier();
+ void general_unicode();
+
static const char* p_scope_codes[];
void* cur_buffer;
AliasState alias_state = ALIAS_NONE;
bool prefix_increment = false;
bool dealias_stored = false;
+ bool unescape_nest_seen = false;
uint8_t max_template_nesting;
std::stack<uint16_t, std::vector<uint16_t>> brace_depth;
{false, false, false, false, false, false, false, false, false, false, false,}
};
+ std::streampos ignored_id_pos = -1;
+ struct FunctionIdentifier
+ {
+ bool operator< (const FunctionIdentifier& other) const
+ { return identifier.size() < other.identifier.size(); }
+
+ std::string identifier;
+ FuncType type;
+ };
+
+ const std::array<FunctionIdentifier, 4> function_identifiers
+ {{
+ {"unescape", FuncType::UNESCAPE },
+ {"decodeURI", FuncType::UNESCAPE },
+ {"decodeURIComponent", FuncType::UNESCAPE },
+ {"String.fromCharCode", FuncType::CHAR_CODE }
+ }};
+
const uint32_t max_bracket_depth;
std::stack<Scope> scope_stack;
+
+#ifdef CATCH_TEST_BUILD
+ friend JSTokenizerTester;
+#endif // CATCH_TEST_BUILD
};
#endif // JS_TOKENIZER_H
{LINE_TERMINATORS} { BEGIN(regst); newline_found = true; }
<INITIAL,regex,dqstr,regst,sqstr,divop>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) }
-{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); if (!global_scope()) RETURN(ENDED_IN_INNER_SCOPE) else RETURN(SCRIPT_ENDED) }
+{HTML_TAG_SCRIPT_CLOSE} { EXEC(html_closing_script_tag()) }
{HTML_COMMENT_OPEN} { BEGIN(lcomm); }
{LINE_COMMENT_START} { BEGIN(lcomm); }
<bcomm>{BLOCK_COMMENT_SKIP} { }
<bcomm><<EOF>> { RETURN(SCRIPT_CONTINUE) }
- {LITERAL_DQ_STRING_START} { dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) ECHO; BEGIN(dqstr); set_ident_norm(true); }
+ {LITERAL_DQ_STRING_START} { EXEC(literal_dq_string_start()) }
<dqstr>{LITERAL_DQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); }
<dqstr>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) }
<dqstr>\\{CR}{LF} { }
<dqstr>{LITERAL_DQ_STRING_TEXT} { dealias_append(); ECHO; }
<dqstr><<EOF>> { RETURN(SCRIPT_CONTINUE) }
- {LITERAL_SQ_STRING_START} { dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) ECHO; BEGIN(sqstr); set_ident_norm(true); }
+ {LITERAL_SQ_STRING_START} { EXEC(literal_sq_string_start()) }
<sqstr>{LITERAL_SQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); }
<sqstr>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) }
<sqstr>\\{CR}{LF} { }
<sqstr>{LITERAL_SQ_STRING_TEXT} { dealias_append(); ECHO; }
<sqstr><<EOF>> { RETURN(SCRIPT_CONTINUE) }
- {LITERAL_TEMPLATE_START} { dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) ECHO; BEGIN(tmpll); set_ident_norm(true); }
+ {LITERAL_TEMPLATE_START} { EXEC(literal_template_start()) }
<tmpll>(\\\\)*{LITERAL_TEMPLATE_END} { dealias_append(); ECHO; BEGIN(divop); }
<tmpll>(\\\\)*{LITERAL_TEMPLATE_SUBST_START} { EXEC(process_subst_open()) dealias_reset(); }
<tmpll>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) }
<tmpll>{LITERAL_TEMPLATE_OTHER} { dealias_append(); ECHO; }
<tmpll><<EOF>> { RETURN(SCRIPT_CONTINUE) }
-<regst>{LITERAL_REGEX_START} { dealias_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) yyout << '/'; states_correct(1); yyless(1); BEGIN(regex); set_ident_norm(true); }
+<regst>{LITERAL_REGEX_START} { EXEC(literal_regex_start()) }
<regex>{LITERAL_REGEX_END} { ECHO; BEGIN(divop); }
<regex>{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) }
<regex>{LITERAL_REGEX_SKIP} { ECHO; }
<regex><<EOF>> { RETURN(SCRIPT_CONTINUE) }
<divop>{DIV_OPERATOR} |
-<divop>{DIV_ASSIGNMENT_OPERATOR} { dealias_equals(true); previous_group = ASI_OTHER; ECHO; token = PUNCTUATOR; BEGIN(INITIAL); set_ident_norm(true); }
-
-{OPEN_BRACE} { dealias_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_1)) if (meta_type() == ScopeMetaType::NOT_SET) { if (is_operator(token) || token == COLON || func_call()) set_meta_type(ScopeMetaType::OBJECT); else { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } } EXEC(scope_push(BRACES)) if (!brace_depth.empty()) brace_depth.top()++; process_punctuator(); }
-{CLOSE_BRACE} { dealias_clear_mutated(false); EXEC(do_semicolon_insertion(ASI_GROUP_2)) if (meta_type() != ScopeMetaType::NOT_SET) EXEC(p_scope_pop(meta_type())) EXEC(scope_pop(BRACES)) process_closing_brace(); set_ident_norm(true); }
-{OPEN_PARENTHESIS} { dealias_clear_mutated(true); dealias_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_3)) EXEC(scope_push(PARENTHESES)) if (token == IDENTIFIER || token == CLOSING_BRACKET || token == KEYWORD) set_func_call(true); process_punctuator(); }
-{CLOSE_PARENTHESIS} { dealias_clear_mutated(false); dealias_reset(); bool f_call = func_call(); bool id_norm = ident_norm(); if (meta_type() != ScopeMetaType::NOT_SET) EXEC(p_scope_pop(meta_type())) EXEC(scope_pop(PARENTHESES)) if (!f_call) set_ident_norm(id_norm); if (block_param()) { previous_group = ASI_OTHER; set_block_param(false); } else { EXEC(do_semicolon_insertion(ASI_GROUP_5)) } ECHO; token = PUNCTUATOR; BEGIN(divop); }
-{OPEN_BRACKET} { dealias_clear_mutated(true); dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_3)) EXEC(do_semicolon_insertion(ASI_GROUP_4)) EXEC(scope_push(BRACKETS)) process_punctuator(); }
-{CLOSE_BRACKET} { dealias_clear_mutated(false); dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_4)) EXEC(scope_pop(BRACKETS)) ECHO; token = CLOSING_BRACKET; BEGIN(divop); }
-
-{PUNCTUATOR_PREFIX} { process_punctuator(); EXEC(do_semicolon_insertion(ASI_GROUP_10)) set_ident_norm(true); }
-{DOT_ACCESSOR} { dealias_clear_mutated(true); previous_group = ASI_OTHER; dealias_append(); ECHO; token = DOT; BEGIN(regst); }
-{PUNCTUATOR_ARROW} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_reset(); process_punctuator(); set_ident_norm(true); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::FUNCTION); EXEC(p_scope_push(meta_type())) } }
-{PUNCTUATOR_SEMICOLON} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_finalize(); process_punctuator(); set_ident_norm(true); if (meta_type() != ScopeMetaType::NOT_SET) { EXEC(p_scope_pop(meta_type())) set_meta_type(ScopeMetaType::NOT_SET); } }
-{PUNCTUATOR_COLON} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_reset(); process_punctuator(COLON); set_ident_norm(true); }
-{OPERATOR_COMPARISON} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_prefix_reset(); process_punctuator(OPERATOR_COMPARISON); set_ident_norm(true); }
-{OPERATOR_COMPLEX_ASSIGNMENT} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_equals(true); process_punctuator(OPERATOR_COMPLEX_ASSIGNMENT); set_ident_norm(true); }
-{OPERATOR_LOGICAL} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_prefix_reset(); process_punctuator(OPERATOR_LOGICAL); set_ident_norm(true); }
-{OPERATOR_SHIFT} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_prefix_reset(); process_punctuator(OPERATOR_SHIFT); set_ident_norm(true); }
-{PUNCTUATOR_COMMA} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_finalize(); process_punctuator(); set_ident_norm(true); }
-
-{USE_STRICT_DIRECTIVE} { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); yyout << ';'; set_ident_norm(true); }
-{USE_STRICT_DIRECTIVE_SC} { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); set_ident_norm(true); }
-
-{KEYWORD_VAR_DECL} { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); alias_state = ALIAS_NONE; EXEC(do_spacing(KEYWORD_VAR_DECL)) ECHO; BEGIN(regst); }
-{KEYWORD_FUNCTION} { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_FUNCTION)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) set_meta_type(ScopeMetaType::FUNCTION); }
+<divop>{DIV_ASSIGNMENT_OPERATOR} { div_assignment_operator(); }
+
+{OPEN_BRACE} { EXEC(open_brace()) }
+{CLOSE_BRACE} { EXEC(close_brace()) }
+{OPEN_PARENTHESIS} { EXEC(open_parenthesis()) }
+{CLOSE_PARENTHESIS} { EXEC(close_parenthesis()) }
+{OPEN_BRACKET} { EXEC(open_bracket()) }
+{CLOSE_BRACKET} { EXEC(close_bracket()) }
+
+{PUNCTUATOR_PREFIX} { EXEC(punctuator_prefix()) }
+{DOT_ACCESSOR} { dot_accessor(); }
+{PUNCTUATOR_ARROW} { EXEC(punctuator_arrow()) }
+{PUNCTUATOR_SEMICOLON} { EXEC(punctuator_semicolon()) }
+{PUNCTUATOR_COLON} { punctuator_colon(); }
+{OPERATOR_COMPARISON} { operator_comparison(); }
+{OPERATOR_COMPLEX_ASSIGNMENT} { operator_complex_assignment(); }
+{OPERATOR_LOGICAL} { operator_logical(); }
+{OPERATOR_SHIFT} { operator_shift(); }
+{PUNCTUATOR_COMMA} { punctuator_comma(); }
+
+{USE_STRICT_DIRECTIVE} { EXEC(use_strict_directive()) }
+{USE_STRICT_DIRECTIVE_SC} { EXEC(use_strict_directive_sc()) }
+
+{KEYWORD_VAR_DECL} { EXEC(keyword_var_decl()) }
+{KEYWORD_FUNCTION} { EXEC(keyword_function()) }
{KEYWORD_IF} |
{KEYWORD_FOR} |
{KEYWORD_WITH} |
{KEYWORD_SWITCH} |
-{KEYWORD_CATCH} { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } set_block_param(true); }
-{KEYWORD_WHILE} { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } if (do_loop()) set_do_loop(false); else set_block_param(true); }
-{KEYWORD_B} { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
-{KEYWORD_BA} { EXEC(do_semicolon_insertion(ASI_GROUP_9)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
+{KEYWORD_CATCH} { EXEC(keyword_catch()) }
+{KEYWORD_WHILE} { EXEC(keyword_while()) }
+{KEYWORD_B} { EXEC(keyword_B()) }
+{KEYWORD_BA} { EXEC(keyword_BA()) }
{KEYWORD_TRY} |
{KEYWORD_ELSE} |
-{KEYWORD_FINALLY} { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } }
-{KEYWORD_DO} { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } set_do_loop(true); }
-{KEYWORD_CLASS} { previous_group = ASI_OTHER; dealias_reset(); if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_CLASS)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) set_meta_type(ScopeMetaType::OBJECT); }
-{KEYWORD_OTHER} { previous_group = ASI_OTHER; dealias_reset(); if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
-
-{OPERATOR_ASSIGNMENT} { previous_group = ASI_OTHER; dealias_equals(false); process_punctuator(OPERATOR_ASSIGNMENT); set_ident_norm(true); }
-{OPERATOR_PREFIX} { dealias_prefix_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_6)) EXEC(do_operator_spacing()) ECHO; BEGIN(divop); set_ident_norm(true); }
-{OPERATOR_INCR_DECR} { dealias_increment(); dealias_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_8)) EXEC(do_operator_spacing()) ECHO; BEGIN(divop); set_ident_norm(true); }
-{OPERATOR} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_prefix_reset(); EXEC(do_operator_spacing()) ECHO; BEGIN(divop); set_ident_norm(true);}
-{LITERAL} { dealias_clear_mutated(false); dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) ECHO; BEGIN(divop); set_ident_norm(true); }
-{IDENTIFIER} {
- if (unescape(YYText())) {
- bool id_part = (token == DOT);
- bool assignment_start = token == KEYWORD_VAR_DECL ||
- token == PUNCTUATOR ||
- token == UNDEFINED;
- EXEC(do_semicolon_insertion(ASI_GROUP_7))
- EXEC(do_spacing(IDENTIFIER))
- EXEC(do_identifier_substitution(YYText(), id_part))
- dealias_identifier(id_part, assignment_start);
- }
- else
- EXEC(do_semicolon_insertion(ASI_GROUP_7))
- BEGIN(divop);
- }
-
-.|{ALL_UNICODE} { previous_group = ASI_OTHER; ECHO; token = UNDEFINED; BEGIN(INITIAL); set_ident_norm(true); }
+{KEYWORD_FINALLY} { EXEC(keyword_finally()) }
+{KEYWORD_DO} { EXEC(keyword_do()) }
+{KEYWORD_CLASS} { EXEC(keyword_class()) }
+{KEYWORD_OTHER} { EXEC(keyword_other()) }
+
+{OPERATOR_ASSIGNMENT} { operator_assignment(); }
+{OPERATOR_PREFIX} { EXEC(operator_prefix()) }
+{OPERATOR_INCR_DECR} { EXEC(operator_incr_decr()) }
+{OPERATOR} { EXEC(general_operator()) }
+{LITERAL} { EXEC(general_literal()) }
+{IDENTIFIER} { EXEC(general_identifier()) }
+
+.|{ALL_UNICODE} { general_unicode(); }
<<EOF>> { EEOF(eval_eof()) }
%%
if (ident_ctx.is_ignored(lexeme) && !id_part)
{
+ ignored_id_pos = yyout.rdbuf()->pubseekoff(0, yyout.cur, std::ios_base::out);
set_ident_norm(false);
yyout << lexeme;
return EOS;
if (ident)
{
set_ident_norm(false);
+ ignored_id_pos = yyout.rdbuf()->pubseekoff(0, yyout.cur, std::ios_base::out);
last_dealiased = std::string(YYText());
dealias_stored = true;
}
else
+ {
+ ignored_id_pos = -1;
ident = ident_ctx.substitute(lexeme);
+ }
if (!ident)
{
return scope_cur().ident_norm;
}
-void JSTokenizer::set_func_call(bool f)
+void JSTokenizer::set_func_call_type(JSTokenizer::FuncType f)
+{
+ scope_cur().func_call_type = f;
+}
+
+JSTokenizer::FuncType JSTokenizer::func_call_type()
{
- scope_cur().func_call = f;
+ return scope_cur().func_call_type;
}
-bool JSTokenizer::func_call()
+JSTokenizer::FuncType JSTokenizer::detect_func_type()
{
- return scope_cur().func_call;
+ switch(token)
+ {
+ case CLOSING_BRACKET:
+ case KEYWORD:
+ return FuncType::GENERAL;
+
+ case IDENTIFIER:
+ {
+ FuncType ret = FuncType::GENERAL;
+ if (ignored_id_pos >= 0)
+ {
+ std::streambuf* pbuf = yyout.rdbuf();
+ std::streamsize size = pbuf->pubseekoff(0, yyout.cur, yyout.out) - ignored_id_pos;
+ assert(size >= 0);
+
+ char tail[256];
+ assert((long unsigned int)size <= sizeof(tail));
+ size = std::min((long unsigned int)size, sizeof(tail));
+
+ pbuf->pubseekoff(-size, yyout.cur, yyout.out);
+ pbuf->sgetn(tail, size);
+
+ for (const auto& id : function_identifiers)
+ {
+ if ((unsigned)size == (unsigned)id.identifier.size() &&
+ memcmp(tail, id.identifier.data(), size) == 0)
+ {
+ ret = id.type;
+ break;
+ }
+ }
+ }
+ return ret;
+ }
+
+ default:
+ return FuncType::NOT_FUNC;
+ }
+}
+
+void JSTokenizer::check_function_nesting(JSTokenizer::FuncType type)
+{
+ switch (type)
+ {
+ case (JSTokenizer::FuncType::UNESCAPE):
+ if (func_call_type() == JSTokenizer::FuncType::UNESCAPE)
+ unescape_nest_seen = true;
+ break;
+ default:
+ break;
+ }
+}
+
+bool JSTokenizer::is_unescape_nesting_seen() const
+{
+ return unescape_nest_seen;
}
void JSTokenizer::set_block_param(bool f)
dealias_reset();
}
+JSTokenizer::JSRet JSTokenizer::html_closing_script_tag()
+{ return global_scope() ? SCRIPT_ENDED : ENDED_IN_INNER_SCOPE; }
+
+JSTokenizer::JSRet JSTokenizer::literal_dq_string_start()
+{
+ dealias_append();
+ EXEC(do_semicolon_insertion(ASI_GROUP_7))
+ EXEC(do_spacing(LITERAL))
+ ECHO;
+ BEGIN(dqstr);
+ set_ident_norm(true);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::literal_sq_string_start()
+{
+ dealias_append();
+ EXEC(do_semicolon_insertion(ASI_GROUP_7))
+ EXEC(do_spacing(LITERAL))
+ ECHO;
+ BEGIN(sqstr);
+ set_ident_norm(true);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::literal_template_start()
+{
+ dealias_append();
+ EXEC(do_semicolon_insertion(ASI_GROUP_7))
+ EXEC(do_spacing(LITERAL))
+ ECHO;
+ BEGIN(tmpll);
+ set_ident_norm(true);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::literal_regex_start()
+{
+ dealias_reset();
+ EXEC(do_semicolon_insertion(ASI_GROUP_7))
+ EXEC(do_spacing(LITERAL))
+ yyout << '/';
+ states_correct(1);
+ yyless(1);
+ BEGIN(regex);
+ set_ident_norm(true);
+ return EOS;
+}
+
+void JSTokenizer::div_assignment_operator()
+{
+ dealias_equals(true);
+ previous_group = ASI_OTHER;
+ ECHO;
+ token = PUNCTUATOR;
+ BEGIN(INITIAL);
+ set_ident_norm(true);
+}
+
+JSTokenizer::JSRet JSTokenizer::open_brace()
+{
+ dealias_reset();
+ EXEC(do_semicolon_insertion(ASI_GROUP_1))
+ if (meta_type() == ScopeMetaType::NOT_SET)
+ {
+ if (is_operator(token) || token == COLON || func_call_type() != FuncType::NOT_FUNC)
+ set_meta_type(ScopeMetaType::OBJECT);
+ else
+ {
+ set_meta_type(ScopeMetaType::BLOCK);
+ EXEC(p_scope_push(meta_type()))
+ }
+ }
+ EXEC(scope_push(BRACES))
+ if (!brace_depth.empty())
+ brace_depth.top()++;
+ process_punctuator();
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::close_brace()
+{
+ dealias_clear_mutated(false);
+ EXEC(do_semicolon_insertion(ASI_GROUP_2))
+ if (meta_type() != ScopeMetaType::NOT_SET)
+ EXEC(p_scope_pop(meta_type()))
+ EXEC(scope_pop(BRACES))
+ process_closing_brace();
+ set_ident_norm(true);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::open_parenthesis()
+{
+ dealias_clear_mutated(true);
+ dealias_reset();
+ EXEC(do_semicolon_insertion(ASI_GROUP_3))
+ FuncType f_call = detect_func_type();
+ check_function_nesting(f_call);
+ EXEC(scope_push(PARENTHESES))
+ set_func_call_type(f_call);
+ process_punctuator();
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::close_parenthesis()
+{
+ dealias_clear_mutated(false);
+ dealias_reset();
+ FuncType f_call = func_call_type();
+ bool id_norm = ident_norm();
+ if (meta_type() != ScopeMetaType::NOT_SET)
+ EXEC(p_scope_pop(meta_type()))
+ EXEC(scope_pop(PARENTHESES))
+ if (f_call == FuncType::NOT_FUNC)
+ set_ident_norm(id_norm);
+ if (block_param())
+ {
+ previous_group = ASI_OTHER;
+ set_block_param(false);
+ }
+ else
+ {
+ EXEC(do_semicolon_insertion(ASI_GROUP_5))
+ }
+ ECHO;
+ token = PUNCTUATOR;
+ BEGIN(divop);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::open_bracket()
+{
+ dealias_clear_mutated(true);
+ dealias_append();
+ EXEC(do_semicolon_insertion(ASI_GROUP_3))
+ EXEC(do_semicolon_insertion(ASI_GROUP_4))
+ EXEC(scope_push(BRACKETS))
+ process_punctuator();
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::close_bracket()
+{
+ dealias_clear_mutated(false);
+ dealias_append();
+ EXEC(do_semicolon_insertion(ASI_GROUP_4))
+ EXEC(scope_pop(BRACKETS))
+ ECHO;
+ token = CLOSING_BRACKET;
+ BEGIN(divop);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::punctuator_prefix()
+{
+ process_punctuator();
+ EXEC(do_semicolon_insertion(ASI_GROUP_10))
+ set_ident_norm(true);
+ return EOS;
+}
+
+void JSTokenizer::dot_accessor()
+{
+ dealias_clear_mutated(true);
+ previous_group = ASI_OTHER;
+ dealias_append();
+ ECHO;
+ token = DOT;
+ BEGIN(regst);
+}
+
+JSTokenizer::JSRet JSTokenizer::punctuator_arrow()
+{
+ dealias_clear_mutated(false);
+ previous_group = ASI_OTHER;
+ dealias_reset();
+ process_punctuator();
+ set_ident_norm(true);
+ if (meta_type() == ScopeMetaType::NOT_SET)
+ {
+ set_meta_type(ScopeMetaType::FUNCTION);
+ EXEC(p_scope_push(meta_type()))
+ }
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::punctuator_semicolon()
+{
+ dealias_clear_mutated(false);
+ previous_group = ASI_OTHER;
+ dealias_finalize();
+ process_punctuator();
+ set_ident_norm(true);
+ if (meta_type() != ScopeMetaType::NOT_SET)
+ {
+ EXEC(p_scope_pop(meta_type()))
+ set_meta_type(ScopeMetaType::NOT_SET);
+ }
+ return EOS;
+}
+
+void JSTokenizer::punctuator_colon()
+{
+ dealias_clear_mutated(false);
+ previous_group = ASI_OTHER;
+ dealias_reset();
+ process_punctuator(COLON);
+ set_ident_norm(true);
+}
+
+void JSTokenizer::operator_comparison()
+{
+ dealias_clear_mutated(false);
+ previous_group = ASI_OTHER;
+ dealias_prefix_reset();
+ process_punctuator(OPERATOR_COMPARISON);
+ set_ident_norm(true);
+}
+
+void JSTokenizer::operator_complex_assignment()
+{
+ dealias_clear_mutated(false);
+ previous_group = ASI_OTHER;
+ dealias_equals(true);
+ process_punctuator(OPERATOR_COMPLEX_ASSIGNMENT);
+ set_ident_norm(true);
+}
+
+void JSTokenizer::operator_logical()
+{
+ dealias_clear_mutated(false);
+ previous_group = ASI_OTHER;
+ dealias_prefix_reset();
+ process_punctuator(OPERATOR_LOGICAL);
+ set_ident_norm(true);
+}
+
+void JSTokenizer::operator_shift()
+{
+ dealias_clear_mutated(false);
+ previous_group = ASI_OTHER;
+ dealias_prefix_reset();
+ process_punctuator(OPERATOR_SHIFT);
+ set_ident_norm(true);
+}
+
+void JSTokenizer::punctuator_comma()
+{
+ dealias_clear_mutated(false);
+ previous_group = ASI_OTHER;
+ dealias_finalize();
+ process_punctuator();
+ set_ident_norm(true);
+}
+
+JSTokenizer::JSRet JSTokenizer::use_strict_directive()
+{
+ previous_group = ASI_OTHER;
+ EXEC(do_spacing(DIRECTIVE))
+ ECHO;
+ BEGIN(INITIAL);
+ yyout << ';';
+ set_ident_norm(true);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::use_strict_directive_sc()
+{
+ previous_group = ASI_OTHER;
+ EXEC(do_spacing(DIRECTIVE))
+ ECHO;
+ BEGIN(INITIAL);
+ set_ident_norm(true);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_var_decl()
+{
+ EXEC(do_semicolon_insertion(ASI_GROUP_10))
+ if (token != DOT)
+ set_ident_norm(true);
+ alias_state = ALIAS_NONE;
+ EXEC(do_spacing(KEYWORD_VAR_DECL))
+ ECHO;
+ BEGIN(regst);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_function()
+{
+ EXEC(do_semicolon_insertion(ASI_GROUP_10))
+ if (token != DOT)
+ set_ident_norm(true);
+ EXEC(do_spacing(KEYWORD_FUNCTION))
+ ECHO;
+ BEGIN(regst);
+ if (meta_type() == ScopeMetaType::NOT_SET)
+ set_meta_type(ScopeMetaType::FUNCTION);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_catch()
+{
+ EXEC(do_semicolon_insertion(ASI_GROUP_10))
+ if (token != DOT)
+ set_ident_norm(true);
+ EXEC(do_spacing(KEYWORD_BLOCK))
+ ECHO;
+ BEGIN(regst);
+ if (meta_type() == ScopeMetaType::NOT_SET)
+ {
+ set_meta_type(ScopeMetaType::BLOCK);
+ EXEC(p_scope_push(meta_type()))
+ }
+ set_block_param(true);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_while()
+{
+ EXEC(do_semicolon_insertion(ASI_GROUP_10))
+ if (token != DOT)
+ set_ident_norm(true);
+ EXEC(do_spacing(KEYWORD_BLOCK))
+ ECHO;
+ BEGIN(regst);
+ if (meta_type() == ScopeMetaType::NOT_SET)
+ {
+ set_meta_type(ScopeMetaType::BLOCK);
+ EXEC(p_scope_push(meta_type()))
+ }
+ if (do_loop())
+ set_do_loop(false);
+ else
+ set_block_param(true);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_B()
+{
+ EXEC(do_semicolon_insertion(ASI_GROUP_10))
+ if (token != DOT)
+ set_ident_norm(true);
+ EXEC(do_spacing(KEYWORD))
+ ECHO;
+ BEGIN(regst);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_BA()
+{
+ EXEC(do_semicolon_insertion(ASI_GROUP_9))
+ if (token != DOT)
+ set_ident_norm(true);
+ EXEC(do_spacing(KEYWORD))
+ ECHO;
+ BEGIN(regst);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_finally()
+{
+ EXEC(do_semicolon_insertion(ASI_GROUP_10))
+ if (token != DOT)
+ set_ident_norm(true);
+ EXEC(do_spacing(KEYWORD_BLOCK))
+ ECHO;
+ BEGIN(regst);
+ if (meta_type() == ScopeMetaType::NOT_SET)
+ {
+ set_meta_type(ScopeMetaType::BLOCK);
+ EXEC(p_scope_push(meta_type()))
+ }
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_do()
+{
+ EXEC(do_semicolon_insertion(ASI_GROUP_10))
+ if (token != DOT)
+ set_ident_norm(true);
+ EXEC(do_spacing(KEYWORD_BLOCK))
+ ECHO;
+ BEGIN(regst);
+ if (meta_type() == ScopeMetaType::NOT_SET)
+ {
+ set_meta_type(ScopeMetaType::BLOCK);
+ EXEC(p_scope_push(meta_type()))
+ }
+ set_do_loop(true);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_class()
+{
+ previous_group = ASI_OTHER;
+ dealias_reset();
+ if (token != DOT)
+ set_ident_norm(true);
+ EXEC(do_spacing(KEYWORD_CLASS))
+ ECHO;
+ BEGIN(regst);
+ if (meta_type() == ScopeMetaType::NOT_SET)
+ set_meta_type(ScopeMetaType::OBJECT);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_other()
+{
+ previous_group = ASI_OTHER;
+ dealias_reset();
+ if (token != DOT)
+ set_ident_norm(true);
+ EXEC(do_spacing(KEYWORD))
+ ECHO;
+ BEGIN(regst);
+ return EOS;
+}
+
+void JSTokenizer::operator_assignment()
+{
+ previous_group = ASI_OTHER;
+ dealias_equals(false);
+ process_punctuator(OPERATOR_ASSIGNMENT);
+ set_ident_norm(true);
+}
+
+JSTokenizer::JSRet JSTokenizer::operator_prefix()
+{
+ dealias_prefix_reset();
+ EXEC(do_semicolon_insertion(ASI_GROUP_6))
+ EXEC(do_operator_spacing())
+ ECHO;
+ BEGIN(divop);
+ set_ident_norm(true);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::operator_incr_decr()
+{
+ dealias_increment();
+ dealias_reset();
+ EXEC(do_semicolon_insertion(ASI_GROUP_8))
+ EXEC(do_operator_spacing())
+ ECHO;
+ BEGIN(divop);
+ set_ident_norm(true);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::general_operator()
+{
+ dealias_clear_mutated(false);
+ previous_group = ASI_OTHER;
+ dealias_prefix_reset();
+ EXEC(do_operator_spacing())
+ ECHO;
+ BEGIN(divop);
+ set_ident_norm(true);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::general_literal()
+{
+ dealias_clear_mutated(false);
+ dealias_append();
+ EXEC(do_semicolon_insertion(ASI_GROUP_7))
+ EXEC(do_spacing(LITERAL))
+ ECHO;
+ BEGIN(divop);
+ set_ident_norm(true);
+ return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::general_identifier()
+{
+ if (unescape(YYText()))
+ {
+ bool id_part = (token == DOT);
+ bool assignment_start = token == KEYWORD_VAR_DECL || token == PUNCTUATOR ||
+ token == UNDEFINED;
+ EXEC(do_semicolon_insertion(ASI_GROUP_7))
+ EXEC(do_spacing(IDENTIFIER))
+ EXEC(do_identifier_substitution(YYText(), id_part))
+ dealias_identifier(id_part, assignment_start);
+ }
+ else
+ EXEC(do_semicolon_insertion(ASI_GROUP_7))
+ BEGIN(divop);
+ return EOS;
+}
+
+void JSTokenizer::general_unicode()
+{
+ previous_group = ASI_OTHER;
+ ECHO;
+ token = UNDEFINED;
+ BEGIN(INITIAL);
+ set_ident_norm(true);
+}
+
JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in)
{
yy_flush_buffer(YY_CURRENT_BUFFER);
+ unescape_nest_seen = false;
auto r = yylex();
bytes_read = 0;
return static_cast<JSTokenizer::JSRet>(r);
-}
+}
\ No newline at end of file
return n;
}
+// cppcheck-suppress unusedFunction
+streamsize ostreambuf_infl::xsgetn(char* s, streamsize n)
+{
+ assert(n >= 0);
+
+ if (pptr() != epptr())
+ {
+ n = max(0, n);
+ auto c_avail = epptr() - pptr();
+ n = min(c_avail, n);
+
+ memcpy(s, pptr(), n);
+ pbump(n);
+
+ return n;
+ }
+
+ return 0;
+}
+
// cppcheck-suppress unusedFunction
int ostreambuf_infl::overflow(int c)
{
std::ios_base::openmode which = std::ios_base::in | std::ios_base::out) override;
virtual int sync() override;
virtual std::streamsize xsputn(const char* s, std::streamsize n) override;
+ virtual std::streamsize xsgetn(char* s, std::streamsize n) override;
virtual int overflow(int c = EOF) override;
bool enlarge();
}
}
+TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
+{
+ JSTokenizerTester tester(norm_depth, max_scope_depth, s_ignored_ids, max_template_nesting,
+ max_bracket_depth);
+
+ using FuncType = JSTokenizerTester::FuncType;
+
+ SECTION("Global only")
+ {
+ tester.test_function_scopes({{ "", "", {FuncType::NOT_FUNC}}});
+ }
+ SECTION("General function call")
+ {
+ SECTION("in arguments")
+ {
+ tester.test_function_scopes({
+ {"general(", "var_0000(", {FuncType::NOT_FUNC, FuncType::GENERAL}}
+ });
+ }
+ SECTION("separated identifier and call")
+ {
+ tester.test_function_scopes({
+ {"general /*comment*/ (", "var_0000(", {FuncType::NOT_FUNC, FuncType::GENERAL}}
+ });
+ }
+ SECTION("complete call")
+ {
+ tester.test_function_scopes({
+ {"general('%62%61%72')", "var_0000('%62%61%72')", {FuncType::NOT_FUNC}}
+ });
+ }
+ SECTION("as named function definition")
+ {
+ tester.test_function_scopes({
+ {"general(){", "var_0000(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
+ });
+ }
+ SECTION("after defined function identifier")
+ {
+ tester.test_function_scopes({
+ {"unescape;hello(", "unescape;var_0000(", {FuncType::NOT_FUNC, FuncType::GENERAL}}
+ });
+ }
+ SECTION("fake defined function identifier")
+ {
+ tester.test_function_scopes({
+ {"fake_unescape(", "var_0000(", {FuncType::NOT_FUNC, FuncType::GENERAL}}
+ });
+ }
+ SECTION("ignored fake defined function identifier")
+ {
+ const std::unordered_set<std::string> s_ignored_ids_fake {"fake_unescape"};
+ JSTokenizerTester tester_fake(norm_depth, max_scope_depth, s_ignored_ids_fake,
+ max_template_nesting, max_bracket_depth);
+ tester_fake.test_function_scopes({
+ {"fake_unescape(", "fake_unescape(", {FuncType::NOT_FUNC, FuncType::GENERAL}}
+ });
+ }
+ SECTION("as a template literal substitution")
+ {
+ tester.test_function_scopes({
+ {"`unescape ${general(", "`unescape ${var_0000(",
+ {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::GENERAL}}
+ });
+ }
+ }
+ SECTION("unescape function call")
+ {
+ SECTION("in arguments")
+ {
+ tester.test_function_scopes({
+ {"unescape(", "unescape(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+ });
+ }
+ SECTION("separated identifier and call")
+ {
+ tester.test_function_scopes({
+ {"unescape /*comment*/ (", "unescape(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+ });
+ }
+ SECTION("complete call")
+ {
+ tester.test_function_scopes({
+ {"unescape('%62%61%72')", "unescape('%62%61%72')", {FuncType::NOT_FUNC}}
+ });
+ }
+ SECTION("as named function definition")
+ {
+ tester.test_function_scopes({
+ {"unescape(){", "unescape(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
+ });
+ }
+ SECTION("after assignment substitution")
+ {
+ tester.test_function_scopes({
+ {"var a = unescape; a(", "var var_0000=unescape;unescape(", {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE}}
+ });
+ }
+ SECTION("literal")
+ {
+ tester.test_function_scopes({
+ {"`unescape(", "`unescape(", {FuncType::NOT_FUNC}}
+ });
+ }
+ SECTION("as a template literal substitution")
+ {
+ tester.test_function_scopes({
+ {"`literal ${unescape(", "`literal ${unescape(",
+ {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+ });
+ }
+ }
+ SECTION("decodeURI function call")
+ {
+ SECTION("in arguments")
+ {
+ tester.test_function_scopes({
+ {"decodeURI(", "decodeURI(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+ });
+ }
+ SECTION("separated identifier and call")
+ {
+ tester.test_function_scopes({
+ {"decodeURI /*comment*/ (", "decodeURI(", {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE}}
+ });
+ }
+ SECTION("complete call")
+ {
+ tester.test_function_scopes({
+ {"decodeURI('%62%61%72')", "decodeURI('%62%61%72')", {FuncType::NOT_FUNC}}
+ });
+ }
+ SECTION("as named function definition")
+ {
+ tester.test_function_scopes({
+ {"decodeURI(){", "decodeURI(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
+ });
+ }
+ SECTION("after assignment substitution")
+ {
+ tester.test_function_scopes({
+ {"var a = decodeURI; a(", "var var_0000=decodeURI;decodeURI(", {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE}}
+ });
+ }
+ SECTION("literal")
+ {
+ tester.test_function_scopes({
+ {"`decodeURI(", "`decodeURI(", {FuncType::NOT_FUNC}}
+ });
+ }
+ SECTION("as a template literal substitution")
+ {
+ tester.test_function_scopes({
+ {"`literal ${decodeURI(", "`literal ${decodeURI(",
+ {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+ });
+ }
+ }
+ SECTION("decodeURIComponent function call")
+ {
+ SECTION("in arguments")
+ {
+ tester.test_function_scopes({
+ {"decodeURIComponent(", "decodeURIComponent(", {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE}}
+ });
+ }
+ SECTION("separated identifier and call")
+ {
+ tester.test_function_scopes({
+ {"decodeURIComponent /*comment*/ (", "decodeURIComponent(", {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE}}
+ });
+ }
+ SECTION("complete call")
+ {
+ tester.test_function_scopes({
+ {"decodeURIComponent('%62%61%72')", "decodeURIComponent('%62%61%72')",
+ {FuncType::NOT_FUNC}}
+ });
+ }
+ SECTION("as named function definition")
+ {
+ tester.test_function_scopes({
+ {"decodeURIComponent(){", "decodeURIComponent(){", {FuncType::NOT_FUNC,
+ FuncType::NOT_FUNC}}
+ });
+ }
+ SECTION("after assignment substitution")
+ {
+ tester.test_function_scopes({
+ {"var a = decodeURIComponent; a(",
+ "var var_0000=decodeURIComponent;decodeURIComponent(", {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE}}
+ });
+ }
+ SECTION("literal")
+ {
+ tester.test_function_scopes({
+ {"`decodeURIComponent(", "`decodeURIComponent(", {FuncType::NOT_FUNC}}
+ });
+ }
+ SECTION("as a template literal substitution")
+ {
+ tester.test_function_scopes({
+ {"`literal ${decodeURIComponent(", "`literal ${decodeURIComponent(",
+ {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+ });
+ }
+ }
+ SECTION("String.fromCharCode method call")
+ {
+ SECTION("in arguments")
+ {
+ tester.test_function_scopes({
+ {"String.fromCharCode(", "String.fromCharCode(",
+ {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
+ });
+ }
+ SECTION("separated identifier and call")
+ {
+ tester.test_function_scopes({
+ {"String.fromCharCode /*comment*/ (", "String.fromCharCode(",
+ {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
+ });
+ }
+ SECTION("complete call")
+ {
+ tester.test_function_scopes({
+ {"String.fromCharCode( 65, 0x42 )", "String.fromCharCode(65,0x42)",
+ {FuncType::NOT_FUNC}}
+ });
+ }
+ SECTION("as named function definition")
+ {
+ tester.test_function_scopes({
+ {"String.fromCharCode(){", "String.fromCharCode(){",
+ {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
+ });
+ }
+ SECTION("after class name assignment substitution")
+ {
+ tester.test_function_scopes({
+ {"var a = String; a.fromCharCode(", "var var_0000=String;String.fromCharCode(",
+ {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
+ });
+ }
+ SECTION("after assignment substitution")
+ {
+ tester.test_function_scopes({
+ {"var a = String.fromCharCode; a(",
+ "var var_0000=String.fromCharCode;String.fromCharCode(",
+ {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
+ });
+ }
+ SECTION("not a Sting class member call")
+ {
+ tester.test_function_scopes({
+ {"fromCharCode(",
+ "var_0000(",
+ {FuncType::NOT_FUNC, FuncType::GENERAL}}
+ });
+ }
+ SECTION("literal")
+ {
+ tester.test_function_scopes({
+ {"`String.fromCharCode(", "`String.fromCharCode(", {FuncType::NOT_FUNC}}
+ });
+ }
+ SECTION("as a template literal substitution")
+ {
+ tester.test_function_scopes({
+ {"`literal ${String.fromCharCode(", "`literal ${String.fromCharCode(",
+ {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
+ });
+ }
+ }
+}
+
+TEST_CASE("Function call tracking - nesting", "[JSNormalizer]")
+{
+ JSTokenizerTester tester(norm_depth, max_scope_depth, s_ignored_ids, max_template_nesting,
+ max_bracket_depth);
+
+ using FuncType = JSTokenizerTester::FuncType;
+
+ SECTION("Opening")
+ {
+ SECTION("Multiple general functions")
+ {
+ tester.test_function_scopes({
+ { "general( general( general(", "var_0000(var_0000(var_0000(",
+ { FuncType::NOT_FUNC, FuncType::GENERAL, FuncType::GENERAL, FuncType::GENERAL}}
+ });
+ CHECK(!tester.is_unescape_nesting_seen());
+ }
+ SECTION("Multiple unescape functions")
+ {
+ tester.test_function_scopes({
+ {"unescape( unescape( unescape(", "unescape(unescape(unescape(",
+ {FuncType::NOT_FUNC, FuncType::UNESCAPE, FuncType::UNESCAPE, FuncType::UNESCAPE}}
+ });
+ CHECK(tester.is_unescape_nesting_seen());
+ }
+ SECTION("Multiple different unescape functions")
+ {
+ tester.test_function_scopes({
+ {"unescape( decodeURI( decodeURIComponent(",
+ "unescape(decodeURI(decodeURIComponent(", {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE,
+ FuncType::UNESCAPE,
+ FuncType::UNESCAPE}}
+ });
+ CHECK(tester.is_unescape_nesting_seen());
+ }
+ SECTION("Multiple String.fromCharCode functions")
+ {
+ tester.test_function_scopes({
+ {"String.fromCharCode( String.fromCharCode( String.fromCharCode(",
+ "String.fromCharCode(String.fromCharCode(String.fromCharCode(",
+ {FuncType::NOT_FUNC, FuncType::CHAR_CODE, FuncType::CHAR_CODE,
+ FuncType::CHAR_CODE}}
+ });
+ CHECK(!tester.is_unescape_nesting_seen());
+ }
+ SECTION("Mixed function calls")
+ {
+ tester.test_function_scopes({
+ {"general( unescape( String.fromCharCode(",
+ "var_0000(unescape(String.fromCharCode(",
+ {FuncType::NOT_FUNC, FuncType::GENERAL, FuncType::UNESCAPE,
+ FuncType::CHAR_CODE}}
+ });
+ CHECK(!tester.is_unescape_nesting_seen());
+ }
+ }
+ SECTION("Closing")
+ {
+ SECTION("Multiple general functions")
+ {
+ tester.test_function_scopes({
+ {"general( general( general( a ) )", "var_0000(var_0000(var_0000(var_0001))",
+ {FuncType::NOT_FUNC, FuncType::GENERAL}}
+ });
+ }
+ SECTION("Multiple unescape functions")
+ {
+ tester.test_function_scopes({
+ {"unescape( unescape( unescape( '%62%61%72' ) )",
+ "unescape(unescape(unescape('%62%61%72'))", {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE }}
+ });
+ }
+ SECTION("Multiple different unescape functions")
+ {
+ tester.test_function_scopes({
+ {"unescape( decodeURI( decodeURIComponent( '%62%61%72' ) )",
+ "unescape(decodeURI(decodeURIComponent('%62%61%72'))",
+ {FuncType::NOT_FUNC, FuncType::UNESCAPE }}
+ });
+ }
+ SECTION("Multiple String.fromCharCode methods")
+ {
+ tester.test_function_scopes({
+ {"String.fromCharCode( String.fromCharCode( String.fromCharCode( 65, 0x42 ) )",
+ "String.fromCharCode(String.fromCharCode(String.fromCharCode(65,0x42))",
+ {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
+ });
+ }
+ SECTION("Mixed function calls")
+ {
+ tester.test_function_scopes({
+ {"general( unescape( String.fromCharCode( 65, 0x42 ) )",
+ "var_0000(unescape(String.fromCharCode(65,0x42))", {FuncType::NOT_FUNC,
+ FuncType::GENERAL}}
+ });
+ }
+ }
+}
+
+TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]")
+{
+ JSTokenizerTester tester(norm_depth, max_scope_depth, s_ignored_ids, max_template_nesting,
+ max_bracket_depth);
+
+ using FuncType = JSTokenizerTester::FuncType;
+
+ SECTION("split in the middle of the identifier")
+ {
+ tester.test_function_scopes({
+ {"un", "var_0000", {FuncType::NOT_FUNC}},
+ {"escape", "unescape", {FuncType::NOT_FUNC}},
+ {"(", "unescape(", {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE}},
+ {")", "unescape()", {FuncType::NOT_FUNC}},
+ });
+ }
+ SECTION("split between identifier and parenthesis")
+ {
+ tester.test_function_scopes({
+ {"decodeURI", "decodeURI", {FuncType::NOT_FUNC}},
+ {"(", "decodeURI(", {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE}},
+ {")", "decodeURI()", {FuncType::NOT_FUNC}},
+ });
+ }
+ SECTION("comment between identifier and parenthesis")
+ {
+ tester.test_function_scopes({
+ {"unescape", "unescape", {FuncType::NOT_FUNC}},
+ {"//String.fromCharCode\n", "unescape", {FuncType::NOT_FUNC}},
+ {"(", "unescape(", {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE}},
+ {")", "unescape()", {FuncType::NOT_FUNC}},
+ });
+ }
+ SECTION("split in arguments")
+ {
+ tester.test_function_scopes({
+ {"general", "var_0000", {FuncType::NOT_FUNC}},
+ {"(", "var_0000(", {FuncType::NOT_FUNC,
+ FuncType::GENERAL}},
+ {"a", "var_0000(var_0001", {FuncType::NOT_FUNC,
+ FuncType::GENERAL}},
+ {"+ b", "var_0000(var_0001+var_0002", {FuncType::NOT_FUNC,
+ FuncType::GENERAL}},
+ {")", "var_0000(var_0001+var_0002)", {FuncType::NOT_FUNC}},
+ });
+ }
+ SECTION("literal in arguments")
+ {
+ tester.test_function_scopes({
+ {"String", "String", {FuncType::NOT_FUNC}},
+ {".fromCharCode", "String.fromCharCode", {FuncType::NOT_FUNC}},
+ {"(`", "String.fromCharCode(`", {FuncType::NOT_FUNC,
+ FuncType::CHAR_CODE}},
+ {"un", "String.fromCharCode(`un", {FuncType::NOT_FUNC,
+ FuncType::CHAR_CODE}},
+ {"escape(", "String.fromCharCode(`unescape(", {FuncType::NOT_FUNC,
+ FuncType::CHAR_CODE}},
+ {"`)", "String.fromCharCode(`unescape(`)", {FuncType::NOT_FUNC}},
+ });
+ }
+ SECTION("Nesting - Mixed function calls")
+ {
+ tester.test_function_scopes({
+ {"decode", "var_0000", {FuncType::NOT_FUNC}},
+ {"URI", "decodeURI", {FuncType::NOT_FUNC}},
+ {"Component", "decodeURIComponent", {FuncType::NOT_FUNC}},
+ {"(", "decodeURIComponent(", {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE}},
+ {" a, ", "decodeURIComponent(var_0001,",
+ {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE}},
+ {" String.fromCharCode( ar",
+ "decodeURIComponent(var_0001,String.fromCharCode(var_0002",
+ {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE,
+ FuncType::CHAR_CODE}},
+ {"g ), b, foo",
+ "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0005",
+ {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE}},
+ {"bar( ",
+ "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0006(",
+ {FuncType::NOT_FUNC,
+ FuncType::UNESCAPE,
+ FuncType::GENERAL}},
+ {"))",
+ "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0006())",
+ {FuncType::NOT_FUNC}}
+ });
+ }
+}
+
#endif // CATCH_TEST_BUILD
// Benchmark tests
{
JSIdentifierCtxStub ident_ctx;
JSNormalizer normalizer(ident_ctx, unlim_depth, max_template_nesting, max_bracket_depth);
- char dst[DEPTH];
+ char dst[norm_depth];
constexpr size_t size = 1 << 13;
{
JSIdentifierCtxStub ident_ctx;
JSNormalizer normalizer(ident_ctx, unlim_depth, max_template_nesting, max_scope_depth);
- char dst[DEPTH];
+ char dst[norm_depth];
constexpr size_t size = 1 << 16;
{
// around 11 000 identifiers
std::string input;
- for (int it = 0; it < DEPTH; ++it)
+ for (int it = 0; it < norm_depth; ++it)
input.append("n" + std::to_string(it) + " ");
- input.resize(DEPTH - strlen(s_closing_tag));
+ input.resize(norm_depth - strlen(s_closing_tag));
input.append(s_closing_tag, strlen(s_closing_tag));
JSIdentifierCtxStub ident_ctx_mock;
TEST_CASE("JS Normalizer, automatic semicolon", "[JSNormalizer]")
{
- auto w_semicolons = make_input("", "a;\n", "", depth);
- auto wo_semicolons = make_input("", "a \n", "", depth);
+ auto w_semicolons = make_input("", "a;\n", "", norm_depth);
+ auto wo_semicolons = make_input("", "a \n", "", norm_depth);
const char* src_w_semicolons = w_semicolons.c_str();
const char* src_wo_semicolons = wo_semicolons.c_str();
size_t src_len = w_semicolons.size();
JSIdentifierCtxStub ident_ctx_mock;
- JSNormalizer normalizer_wo_ident(ident_ctx_mock, unlim_depth, max_template_nesting, depth);
+ JSNormalizer normalizer_wo_ident(ident_ctx_mock, unlim_depth, max_template_nesting, norm_depth);
REQUIRE(norm_ret(normalizer_wo_ident, w_semicolons) == JSTokenizer::SCRIPT_ENDED);
BENCHMARK("without semicolon insertion")
{
[[noreturn]] void FatalError(const char*, ...)
{ exit(EXIT_FAILURE); }
-void trace_vprintf(const char*, TraceLevel, const char*, const Packet*, const char*, va_list) {}
+void trace_vprintf(const char*, TraceLevel, const char*, const Packet*, const char*, va_list) { }
uint8_t TraceApi::get_constraints_generation() { return 0; }
-void TraceApi::filter(const Packet&) {}
+void TraceApi::filter(const Packet&) { }
}
THREAD_LOCAL const snort::Trace* http_trace = nullptr;
using namespace snort;
+void JSTokenizerTester::test_function_scopes(const std::list<ScopeCase>& pdus)
+{
+ for (auto pdu : pdus)
+ {
+ const char* source;
+ const char* expected;
+ std::list<FuncType> exp_stack;
+ std::tie(source, expected, exp_stack) = pdu;
+
+ normalizer.normalize(source, strlen(source));
+ std::string result_buf(normalizer.get_script(), normalizer.script_size());
+ CHECK(result_buf == expected);
+
+ auto tmp_stack(normalizer.get_tokenizer().scope_stack);
+ CHECK(tmp_stack.size() == exp_stack.size());
+ for (auto func_it = exp_stack.rbegin(); func_it != exp_stack.rend() and !tmp_stack.empty();
+ func_it++)
+ {
+ CHECK(tmp_stack.top().func_call_type == *func_it);
+ tmp_stack.pop();
+ }
+ }
+}
+
+bool JSTokenizerTester::is_unescape_nesting_seen() const
+{
+ return normalizer.is_unescape_nesting_seen();
+}
+
void test_scope(const char* context, std::list<JSProgramScopeType> stack)
{
std::string buf(context);
constexpr int max_template_nesting = 4;
constexpr int max_bracket_depth = 256;
constexpr int max_scope_depth = 256;
-static const std::unordered_set<std::string> s_ignored_ids { "console", "eval", "document" };
+static const std::unordered_set<std::string> s_ignored_ids {
+ "console", "eval", "document", "unescape", "decodeURI", "decodeURIComponent", "String"
+};
namespace snort
{
size_t size() const override { return 0; }
};
+class JSTokenizerTester
+{
+public:
+ JSTokenizerTester(int32_t depth, uint32_t max_scope_depth,
+ const std::unordered_set<std::string>& ignored_ids,
+ uint8_t max_template_nesting, uint32_t max_bracket_depth)
+ :
+ ident_ctx(depth, max_scope_depth, ignored_ids),
+ normalizer(ident_ctx, depth, max_template_nesting, max_bracket_depth)
+ { }
+
+ typedef JSTokenizer::FuncType FuncType;
+ typedef std::tuple<const char*, const char*, std::list<FuncType>> ScopeCase;
+ void test_function_scopes(const std::list<ScopeCase>& pdus);
+ bool is_unescape_nesting_seen() const;
+
+private:
+ JSIdentifierCtx ident_ctx;
+ snort::JSNormalizer normalizer;
+};
+
void test_scope(const char* context, std::list<JSProgramScopeType> stack);
void test_normalization(const char* source, const char* expected);
void test_normalization_bad(const char* source, const char* expected, JSTokenizer::JSRet eret);
CHECK(off_c == len + 2);
CHECK(off_e == 4096 + 2048);
}
+
+ SECTION("get char sequence")
+ {
+ ostreambuf_infl b;
+ const int exp_len = strlen(exp);
+ b.sputn(exp, exp_len);
+
+ int off_c = b.pubseekoff(-exp_len, ios_base::cur, ios_base::out);
+ CHECK(off_c == 0);
+
+ char* act_seq = new char[exp_len];
+ CHECK(b.sgetn(act_seq, exp_len) == exp_len);
+ CHECK(!memcmp(exp, act_seq, exp_len));
+ delete[] act_seq;
+
+ int new_off = b.pubseekoff(0, ios_base::cur, ios_base::out);
+ CHECK(new_off == exp_len);
+ }
+
+ SECTION("get char sequence from the end")
+ {
+ ostreambuf_infl b;
+ const int exp_len = strlen(exp);
+ char* buf = new char[exp_len];
+ memcpy(buf, exp, exp_len);
+ b.pubsetbuf(buf, exp_len);
+
+ int data_off = b.pubseekoff(exp_len, ios_base::beg, ios_base::out);
+ CHECK(data_off == exp_len);
+
+ char* act_seq = new char[exp_len];
+ memset(act_seq, '\0', exp_len);
+ CHECK(b.sgetn(act_seq, exp_len) == 0);
+ CHECK(strlen(act_seq) == 0);
+ delete[] act_seq;
+
+ int new_off = b.pubseekoff(0, ios_base::cur, ios_base::out);
+ CHECK(new_off == exp_len);
+ }
+
+ SECTION("get char sequence more than available")
+ {
+ ostreambuf_infl b;
+ const int exp_len = strlen(exp);
+ char* buf = new char[exp_len];
+ memcpy(buf, exp, exp_len);
+ b.pubsetbuf(buf, exp_len);
+
+ char* act_seq = new char[exp_len + 1];
+ CHECK(b.sgetn(act_seq, exp_len + 1) == exp_len);
+ CHECK(!memcmp(exp, act_seq, exp_len));
+ delete[] act_seq;
+
+ int new_off = b.pubseekoff(0, ios_base::cur, ios_base::out);
+ CHECK(new_off == exp_len);
+ }
}
TEST_CASE("output buffer - buffer management", "[Stream buffers]")
{
const int len = 1 << 21;
const int plen = 1 << 12;
- vector<char> chars;
-
- chars.reserve(len);
- for (char& c : chars)
- c = rand();
+ vector<char> chars(len, '\0');
+ generate_n(chars.begin(), len, rand);
SECTION("0 bytes reserved")
{