From: Mike Stepanek (mstepane) Date: Thu, 3 Mar 2022 20:45:40 +0000 (+0000) Subject: Pull request #3282: http_inspect: add function state tracking for Enchanced javascrip... X-Git-Tag: 3.1.25.0~7 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=052ff4e2289f1b0a4c78521e9074c1db6c80936d;p=thirdparty%2Fsnort3.git Pull request #3282: http_inspect: add function state tracking for Enchanced javascript normalization Merge in SNORT/snort3 from ~VHORBATO/snort3:js_unesc_track to master Squashed commit of the following: commit 18222154a76c7b9377a1080e4a146dbdfa3964de Author: Vitalii Date: Wed Feb 16 16:15:25 2022 +0200 http_inspect: add unescape function tracking for Enhanced JS Normalizer --- diff --git a/doc/reference/builtin_stubs.txt b/doc/reference/builtin_stubs.txt index 08b27fd4e..eaae05481 100644 --- a/doc/reference/builtin_stubs.txt +++ b/doc/reference/builtin_stubs.txt @@ -1299,6 +1299,17 @@ does not apply to HTTP/2 or HTTP/3 traffic. The HTTP message body is gzip encoded and the FEXTRA flag is set in the gzip header. +119:279 + +Detected nesting of unescape functions(unescape, decodeURI, decodeURIComponent) in JavaScript code. +Indicates that this code most likely has more than one level of obfuscation. This alert is raised +by the enhanced JavaScript normalizer. + +119:280 + +Detected more than one encoding within unescape function call arguments in JavaScript code. +This alert is raised by the enhanced JavaScript normalizer. + 121:1 Invalid flag set on HTTP/2 frame header diff --git a/doc/user/http_inspect.txt b/doc/user/http_inspect.txt index 4c3f6176a..cab6882d7 100755 --- a/doc/user/http_inspect.txt +++ b/doc/user/http_inspect.txt @@ -271,6 +271,10 @@ For example: The default list of ignore-identifiers is present in "snort_defaults.lua". +Unescape function names should remain intact in the output. They ought to be +included in the ignore list. If for some reason the user wants to disable unescape +related features, then removing function's name from the ignore list does the trick. + ===== xff_headers This configuration supports defining custom x-forwarded-for type headers. In a diff --git a/src/service_inspectors/http_inspect/dev_notes.txt b/src/service_inspectors/http_inspect/dev_notes.txt index aa451bae3..5e785eda6 100755 --- a/src/service_inspectors/http_inspect/dev_notes.txt +++ b/src/service_inspectors/http_inspect/dev_notes.txt @@ -255,6 +255,13 @@ For example: var a = console.log a("hello") // will be substituted to 'console.log("hello")' +In addition to the scope tracking, JS Normalizer specifically tracks unicode unescape +functions(unescape, decodeURI, decodeURIComponent). This allows detection of +unescape functions nested within other unescape functions, which is a potential +indicator of a multilevel obfuscation. The definition of a function call depends on +identifier substitution, so such identifiers must be included in the ignore list in +order to use this feature. + JS Normalizer's syntax parser follows ECMA-262 standard. For various features, tracking of variable scope and individual brackets is done in accordance to the standard. Additionally, Normalizer enforces standard limits on HTML content in JavaScript: diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h index 7e28b3fcd..d6821dbbf 100755 --- a/src/service_inspectors/http_inspect/http_enum.h +++ b/src/service_inspectors/http_inspect/http_enum.h @@ -294,6 +294,8 @@ enum Infraction INF_INVALID_SUBVERSION = 133, INF_VERSION_0 = 134, INF_GZIP_FEXTRA = 135, + INF_JS_UNESCAPE_NEST = 136, + INF_JS_MULTIPLE_ENC = 137, INF__MAX_VALUE }; @@ -432,6 +434,8 @@ enum EventSid EVENT_VERSION_0 = 276, EVENT_VERSION_HIGHER_THAN_1 = 277, EVENT_GZIP_FEXTRA = 278, + EVENT_JS_UNESCAPE_NEST = 279, + EVENT_JS_MULTIPLE_ENC = 280, EVENT__MAX_VALUE }; diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc index a8e332eb2..d73de30a6 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.cc +++ b/src/service_inspectors/http_inspect/http_js_norm.cc @@ -214,6 +214,12 @@ void HttpJsNorm::do_external(const Field& input, Field& output, break; } + if (js_ctx.is_unescape_nesting_seen()) + { + *infractions += INF_JS_UNESCAPE_NEST; + events->create_event(EVENT_JS_UNESCAPE_NEST); + } + if (ssn->js_built_in_event) break; } @@ -347,6 +353,11 @@ void HttpJsNorm::do_inline(const Field& input, Field& output, *infractions += INF_JS_CODE_IN_EXTERNAL; events->create_event(EVENT_JS_CODE_IN_EXTERNAL); } + if (js_ctx.is_unescape_nesting_seen()) + { + *infractions += INF_JS_UNESCAPE_NEST; + events->create_event(EVENT_JS_UNESCAPE_NEST); + } script_continue = ret == JSTokenizer::SCRIPT_CONTINUE; } diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc index 39f1dda20..d68503d90 100755 --- a/src/service_inspectors/http_inspect/http_tables.cc +++ b/src/service_inspectors/http_inspect/http_tables.cc @@ -338,6 +338,8 @@ const RuleMap HttpModule::http_events[] = { EVENT_VERSION_0, "HTTP version in start line is 0" }, { EVENT_VERSION_HIGHER_THAN_1, "HTTP version in start line is higher than 1" }, { EVENT_GZIP_FEXTRA, "HTTP gzip body with the FEXTRA flag set" }, + { EVENT_JS_UNESCAPE_NEST, "nested unescape functions in JavaScript code" }, + { EVENT_JS_MULTIPLE_ENC, "mixing of escape formats in JavaScript code" }, { 0, nullptr } }; diff --git a/src/utils/js_normalizer.h b/src/utils/js_normalizer.h index 5e40a6d64..4a694d3c7 100644 --- a/src/utils/js_normalizer.h +++ b/src/utils/js_normalizer.h @@ -58,11 +58,16 @@ public: static size_t size() { return sizeof(JSNormalizer) + 16834; /* YY_BUF_SIZE */ } + bool is_unescape_nesting_seen() const + { return tokenizer.is_unescape_nesting_seen(); } + #ifdef CATCH_TEST_BUILD const char* get_tmp_buf() const { return tmp_buf; } size_t get_tmp_buf_size() const { return tmp_buf_size; } + const JSTokenizer& get_tokenizer() const + { return tokenizer; } #endif #ifdef BENCHMARK_TEST diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h index 8908de56d..2e8cc6c8a 100644 --- a/src/utils/js_tokenizer.h +++ b/src/utils/js_tokenizer.h @@ -42,7 +42,9 @@ extern THREAD_LOCAL const snort::Trace* http_trace; enum JSProgramScopeType : unsigned int; class JSIdentifierCtxBase; - +#ifdef CATCH_TEST_BUILD +class JSTokenizerTester; +#endif class JSTokenizer : public yyFlexLexer { private: @@ -84,17 +86,24 @@ private: OBJECT, // object definition, class definition SCOPE_META_TYPE_MAX }; + enum FuncType + { + NOT_FUNC = 0, + GENERAL, + UNESCAPE, + CHAR_CODE + }; struct Scope { Scope(ScopeType t) : - type(t), meta_type(ScopeMetaType::NOT_SET), ident_norm(true), func_call(false), - block_param(false), do_loop(false) + type(t), meta_type(ScopeMetaType::NOT_SET), func_call_type(FuncType::NOT_FUNC), + ident_norm(true), block_param(false), do_loop(false) {} ScopeType type; ScopeMetaType meta_type; + FuncType func_call_type; bool ident_norm; - bool func_call; bool block_param; bool do_loop; }; @@ -153,6 +162,7 @@ public: JSRet process(size_t& bytes_in); + bool is_unescape_nesting_seen() const; protected: [[noreturn]] void LexerError(const char* msg) override { snort::FatalError("%s", msg); } @@ -194,8 +204,10 @@ private: ScopeMetaType meta_type(); void set_ident_norm(bool); bool ident_norm(); - void set_func_call(bool); - bool func_call(); + void set_func_call_type(FuncType); + FuncType func_call_type(); + FuncType detect_func_type(); + void check_function_nesting(FuncType); void set_block_param(bool); bool block_param(); void set_do_loop(bool); @@ -214,6 +226,49 @@ private: void dealias_append(); void dealias_finalize(); + //rule handlers + JSRet html_closing_script_tag(); + JSRet literal_dq_string_start(); + JSRet literal_sq_string_start(); + JSRet literal_template_start(); + JSRet literal_regex_start(); + void div_assignment_operator(); + JSRet open_brace(); + JSRet close_brace(); + JSRet open_parenthesis(); + JSRet close_parenthesis(); + JSRet open_bracket(); + JSRet close_bracket(); + JSRet punctuator_prefix(); + void dot_accessor(); + JSRet punctuator_arrow(); + JSRet punctuator_semicolon(); + void punctuator_colon(); + void operator_comparison(); + void operator_complex_assignment(); + void operator_logical(); + void operator_shift(); + void punctuator_comma(); + JSRet use_strict_directive(); + JSRet use_strict_directive_sc(); + JSRet keyword_var_decl(); + JSRet keyword_function(); + JSRet keyword_catch(); + JSRet keyword_while(); + JSRet keyword_B(); + JSRet keyword_BA(); + JSRet keyword_finally(); + JSRet keyword_do(); + JSRet keyword_class(); + JSRet keyword_other(); + void operator_assignment(); + JSRet operator_prefix(); + JSRet operator_incr_decr(); + JSRet general_operator(); + JSRet general_literal(); + JSRet general_identifier(); + void general_unicode(); + static const char* p_scope_codes[]; void* cur_buffer; @@ -226,6 +281,7 @@ private: AliasState alias_state = ALIAS_NONE; bool prefix_increment = false; bool dealias_stored = false; + bool unescape_nest_seen = false; uint8_t max_template_nesting; std::stack> brace_depth; @@ -268,8 +324,30 @@ private: {false, false, false, false, false, false, false, false, false, false, false,} }; + std::streampos ignored_id_pos = -1; + struct FunctionIdentifier + { + bool operator< (const FunctionIdentifier& other) const + { return identifier.size() < other.identifier.size(); } + + std::string identifier; + FuncType type; + }; + + const std::array function_identifiers + {{ + {"unescape", FuncType::UNESCAPE }, + {"decodeURI", FuncType::UNESCAPE }, + {"decodeURIComponent", FuncType::UNESCAPE }, + {"String.fromCharCode", FuncType::CHAR_CODE } + }}; + const uint32_t max_bracket_depth; std::stack scope_stack; + +#ifdef CATCH_TEST_BUILD + friend JSTokenizerTester; +#endif // CATCH_TEST_BUILD }; #endif // JS_TOKENIZER_H diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l index 18595f45d..4439e496e 100644 --- a/src/utils/js_tokenizer.l +++ b/src/utils/js_tokenizer.l @@ -1021,7 +1021,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LINE_TERMINATORS} { BEGIN(regst); newline_found = true; } {HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) } -{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); if (!global_scope()) RETURN(ENDED_IN_INNER_SCOPE) else RETURN(SCRIPT_ENDED) } +{HTML_TAG_SCRIPT_CLOSE} { EXEC(html_closing_script_tag()) } {HTML_COMMENT_OPEN} { BEGIN(lcomm); } {LINE_COMMENT_START} { BEGIN(lcomm); } @@ -1041,7 +1041,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {BLOCK_COMMENT_SKIP} { } <> { RETURN(SCRIPT_CONTINUE) } - {LITERAL_DQ_STRING_START} { dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) ECHO; BEGIN(dqstr); set_ident_norm(true); } + {LITERAL_DQ_STRING_START} { EXEC(literal_dq_string_start()) } {LITERAL_DQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); } {HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } \\{CR}{LF} { } @@ -1052,7 +1052,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_DQ_STRING_TEXT} { dealias_append(); ECHO; } <> { RETURN(SCRIPT_CONTINUE) } - {LITERAL_SQ_STRING_START} { dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) ECHO; BEGIN(sqstr); set_ident_norm(true); } + {LITERAL_SQ_STRING_START} { EXEC(literal_sq_string_start()) } {LITERAL_SQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); } {HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } \\{CR}{LF} { } @@ -1063,7 +1063,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_SQ_STRING_TEXT} { dealias_append(); ECHO; } <> { RETURN(SCRIPT_CONTINUE) } - {LITERAL_TEMPLATE_START} { dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) ECHO; BEGIN(tmpll); set_ident_norm(true); } + {LITERAL_TEMPLATE_START} { EXEC(literal_template_start()) } (\\\\)*{LITERAL_TEMPLATE_END} { dealias_append(); ECHO; BEGIN(divop); } (\\\\)*{LITERAL_TEMPLATE_SUBST_START} { EXEC(process_subst_open()) dealias_reset(); } {HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } @@ -1072,7 +1072,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_TEMPLATE_OTHER} { dealias_append(); ECHO; } <> { RETURN(SCRIPT_CONTINUE) } -{LITERAL_REGEX_START} { dealias_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) yyout << '/'; states_correct(1); yyless(1); BEGIN(regex); set_ident_norm(true); } +{LITERAL_REGEX_START} { EXEC(literal_regex_start()) } {LITERAL_REGEX_END} { ECHO; BEGIN(divop); } {HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } {LITERAL_REGEX_SKIP} { ECHO; } @@ -1083,68 +1083,54 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 <> { RETURN(SCRIPT_CONTINUE) } {DIV_OPERATOR} | -{DIV_ASSIGNMENT_OPERATOR} { dealias_equals(true); previous_group = ASI_OTHER; ECHO; token = PUNCTUATOR; BEGIN(INITIAL); set_ident_norm(true); } - -{OPEN_BRACE} { dealias_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_1)) if (meta_type() == ScopeMetaType::NOT_SET) { if (is_operator(token) || token == COLON || func_call()) set_meta_type(ScopeMetaType::OBJECT); else { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } } EXEC(scope_push(BRACES)) if (!brace_depth.empty()) brace_depth.top()++; process_punctuator(); } -{CLOSE_BRACE} { dealias_clear_mutated(false); EXEC(do_semicolon_insertion(ASI_GROUP_2)) if (meta_type() != ScopeMetaType::NOT_SET) EXEC(p_scope_pop(meta_type())) EXEC(scope_pop(BRACES)) process_closing_brace(); set_ident_norm(true); } -{OPEN_PARENTHESIS} { dealias_clear_mutated(true); dealias_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_3)) EXEC(scope_push(PARENTHESES)) if (token == IDENTIFIER || token == CLOSING_BRACKET || token == KEYWORD) set_func_call(true); process_punctuator(); } -{CLOSE_PARENTHESIS} { dealias_clear_mutated(false); dealias_reset(); bool f_call = func_call(); bool id_norm = ident_norm(); if (meta_type() != ScopeMetaType::NOT_SET) EXEC(p_scope_pop(meta_type())) EXEC(scope_pop(PARENTHESES)) if (!f_call) set_ident_norm(id_norm); if (block_param()) { previous_group = ASI_OTHER; set_block_param(false); } else { EXEC(do_semicolon_insertion(ASI_GROUP_5)) } ECHO; token = PUNCTUATOR; BEGIN(divop); } -{OPEN_BRACKET} { dealias_clear_mutated(true); dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_3)) EXEC(do_semicolon_insertion(ASI_GROUP_4)) EXEC(scope_push(BRACKETS)) process_punctuator(); } -{CLOSE_BRACKET} { dealias_clear_mutated(false); dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_4)) EXEC(scope_pop(BRACKETS)) ECHO; token = CLOSING_BRACKET; BEGIN(divop); } - -{PUNCTUATOR_PREFIX} { process_punctuator(); EXEC(do_semicolon_insertion(ASI_GROUP_10)) set_ident_norm(true); } -{DOT_ACCESSOR} { dealias_clear_mutated(true); previous_group = ASI_OTHER; dealias_append(); ECHO; token = DOT; BEGIN(regst); } -{PUNCTUATOR_ARROW} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_reset(); process_punctuator(); set_ident_norm(true); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::FUNCTION); EXEC(p_scope_push(meta_type())) } } -{PUNCTUATOR_SEMICOLON} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_finalize(); process_punctuator(); set_ident_norm(true); if (meta_type() != ScopeMetaType::NOT_SET) { EXEC(p_scope_pop(meta_type())) set_meta_type(ScopeMetaType::NOT_SET); } } -{PUNCTUATOR_COLON} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_reset(); process_punctuator(COLON); set_ident_norm(true); } -{OPERATOR_COMPARISON} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_prefix_reset(); process_punctuator(OPERATOR_COMPARISON); set_ident_norm(true); } -{OPERATOR_COMPLEX_ASSIGNMENT} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_equals(true); process_punctuator(OPERATOR_COMPLEX_ASSIGNMENT); set_ident_norm(true); } -{OPERATOR_LOGICAL} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_prefix_reset(); process_punctuator(OPERATOR_LOGICAL); set_ident_norm(true); } -{OPERATOR_SHIFT} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_prefix_reset(); process_punctuator(OPERATOR_SHIFT); set_ident_norm(true); } -{PUNCTUATOR_COMMA} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_finalize(); process_punctuator(); set_ident_norm(true); } - -{USE_STRICT_DIRECTIVE} { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); yyout << ';'; set_ident_norm(true); } -{USE_STRICT_DIRECTIVE_SC} { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); set_ident_norm(true); } - -{KEYWORD_VAR_DECL} { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); alias_state = ALIAS_NONE; EXEC(do_spacing(KEYWORD_VAR_DECL)) ECHO; BEGIN(regst); } -{KEYWORD_FUNCTION} { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_FUNCTION)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) set_meta_type(ScopeMetaType::FUNCTION); } +{DIV_ASSIGNMENT_OPERATOR} { div_assignment_operator(); } + +{OPEN_BRACE} { EXEC(open_brace()) } +{CLOSE_BRACE} { EXEC(close_brace()) } +{OPEN_PARENTHESIS} { EXEC(open_parenthesis()) } +{CLOSE_PARENTHESIS} { EXEC(close_parenthesis()) } +{OPEN_BRACKET} { EXEC(open_bracket()) } +{CLOSE_BRACKET} { EXEC(close_bracket()) } + +{PUNCTUATOR_PREFIX} { EXEC(punctuator_prefix()) } +{DOT_ACCESSOR} { dot_accessor(); } +{PUNCTUATOR_ARROW} { EXEC(punctuator_arrow()) } +{PUNCTUATOR_SEMICOLON} { EXEC(punctuator_semicolon()) } +{PUNCTUATOR_COLON} { punctuator_colon(); } +{OPERATOR_COMPARISON} { operator_comparison(); } +{OPERATOR_COMPLEX_ASSIGNMENT} { operator_complex_assignment(); } +{OPERATOR_LOGICAL} { operator_logical(); } +{OPERATOR_SHIFT} { operator_shift(); } +{PUNCTUATOR_COMMA} { punctuator_comma(); } + +{USE_STRICT_DIRECTIVE} { EXEC(use_strict_directive()) } +{USE_STRICT_DIRECTIVE_SC} { EXEC(use_strict_directive_sc()) } + +{KEYWORD_VAR_DECL} { EXEC(keyword_var_decl()) } +{KEYWORD_FUNCTION} { EXEC(keyword_function()) } {KEYWORD_IF} | {KEYWORD_FOR} | {KEYWORD_WITH} | {KEYWORD_SWITCH} | -{KEYWORD_CATCH} { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } set_block_param(true); } -{KEYWORD_WHILE} { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } if (do_loop()) set_do_loop(false); else set_block_param(true); } -{KEYWORD_B} { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); } -{KEYWORD_BA} { EXEC(do_semicolon_insertion(ASI_GROUP_9)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); } +{KEYWORD_CATCH} { EXEC(keyword_catch()) } +{KEYWORD_WHILE} { EXEC(keyword_while()) } +{KEYWORD_B} { EXEC(keyword_B()) } +{KEYWORD_BA} { EXEC(keyword_BA()) } {KEYWORD_TRY} | {KEYWORD_ELSE} | -{KEYWORD_FINALLY} { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } } -{KEYWORD_DO} { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } set_do_loop(true); } -{KEYWORD_CLASS} { previous_group = ASI_OTHER; dealias_reset(); if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_CLASS)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) set_meta_type(ScopeMetaType::OBJECT); } -{KEYWORD_OTHER} { previous_group = ASI_OTHER; dealias_reset(); if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); } - -{OPERATOR_ASSIGNMENT} { previous_group = ASI_OTHER; dealias_equals(false); process_punctuator(OPERATOR_ASSIGNMENT); set_ident_norm(true); } -{OPERATOR_PREFIX} { dealias_prefix_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_6)) EXEC(do_operator_spacing()) ECHO; BEGIN(divop); set_ident_norm(true); } -{OPERATOR_INCR_DECR} { dealias_increment(); dealias_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_8)) EXEC(do_operator_spacing()) ECHO; BEGIN(divop); set_ident_norm(true); } -{OPERATOR} { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_prefix_reset(); EXEC(do_operator_spacing()) ECHO; BEGIN(divop); set_ident_norm(true);} -{LITERAL} { dealias_clear_mutated(false); dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) ECHO; BEGIN(divop); set_ident_norm(true); } -{IDENTIFIER} { - if (unescape(YYText())) { - bool id_part = (token == DOT); - bool assignment_start = token == KEYWORD_VAR_DECL || - token == PUNCTUATOR || - token == UNDEFINED; - EXEC(do_semicolon_insertion(ASI_GROUP_7)) - EXEC(do_spacing(IDENTIFIER)) - EXEC(do_identifier_substitution(YYText(), id_part)) - dealias_identifier(id_part, assignment_start); - } - else - EXEC(do_semicolon_insertion(ASI_GROUP_7)) - BEGIN(divop); - } - -.|{ALL_UNICODE} { previous_group = ASI_OTHER; ECHO; token = UNDEFINED; BEGIN(INITIAL); set_ident_norm(true); } +{KEYWORD_FINALLY} { EXEC(keyword_finally()) } +{KEYWORD_DO} { EXEC(keyword_do()) } +{KEYWORD_CLASS} { EXEC(keyword_class()) } +{KEYWORD_OTHER} { EXEC(keyword_other()) } + +{OPERATOR_ASSIGNMENT} { operator_assignment(); } +{OPERATOR_PREFIX} { EXEC(operator_prefix()) } +{OPERATOR_INCR_DECR} { EXEC(operator_incr_decr()) } +{OPERATOR} { EXEC(general_operator()) } +{LITERAL} { EXEC(general_literal()) } +{IDENTIFIER} { EXEC(general_identifier()) } + +.|{ALL_UNICODE} { general_unicode(); } <> { EEOF(eval_eof()) } %% @@ -1392,6 +1378,7 @@ JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme, b if (ident_ctx.is_ignored(lexeme) && !id_part) { + ignored_id_pos = yyout.rdbuf()->pubseekoff(0, yyout.cur, std::ios_base::out); set_ident_norm(false); yyout << lexeme; return EOS; @@ -1403,11 +1390,15 @@ JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme, b if (ident) { set_ident_norm(false); + ignored_id_pos = yyout.rdbuf()->pubseekoff(0, yyout.cur, std::ios_base::out); last_dealiased = std::string(YYText()); dealias_stored = true; } else + { + ignored_id_pos = -1; ident = ident_ctx.substitute(lexeme); + } if (!ident) { @@ -1764,14 +1755,74 @@ bool JSTokenizer::ident_norm() return scope_cur().ident_norm; } -void JSTokenizer::set_func_call(bool f) +void JSTokenizer::set_func_call_type(JSTokenizer::FuncType f) +{ + scope_cur().func_call_type = f; +} + +JSTokenizer::FuncType JSTokenizer::func_call_type() { - scope_cur().func_call = f; + return scope_cur().func_call_type; } -bool JSTokenizer::func_call() +JSTokenizer::FuncType JSTokenizer::detect_func_type() { - return scope_cur().func_call; + switch(token) + { + case CLOSING_BRACKET: + case KEYWORD: + return FuncType::GENERAL; + + case IDENTIFIER: + { + FuncType ret = FuncType::GENERAL; + if (ignored_id_pos >= 0) + { + std::streambuf* pbuf = yyout.rdbuf(); + std::streamsize size = pbuf->pubseekoff(0, yyout.cur, yyout.out) - ignored_id_pos; + assert(size >= 0); + + char tail[256]; + assert((long unsigned int)size <= sizeof(tail)); + size = std::min((long unsigned int)size, sizeof(tail)); + + pbuf->pubseekoff(-size, yyout.cur, yyout.out); + pbuf->sgetn(tail, size); + + for (const auto& id : function_identifiers) + { + if ((unsigned)size == (unsigned)id.identifier.size() && + memcmp(tail, id.identifier.data(), size) == 0) + { + ret = id.type; + break; + } + } + } + return ret; + } + + default: + return FuncType::NOT_FUNC; + } +} + +void JSTokenizer::check_function_nesting(JSTokenizer::FuncType type) +{ + switch (type) + { + case (JSTokenizer::FuncType::UNESCAPE): + if (func_call_type() == JSTokenizer::FuncType::UNESCAPE) + unescape_nest_seen = true; + break; + default: + break; + } +} + +bool JSTokenizer::is_unescape_nesting_seen() const +{ + return unescape_nest_seen; } void JSTokenizer::set_block_param(bool f) @@ -1965,9 +2016,512 @@ void JSTokenizer::dealias_finalize() dealias_reset(); } +JSTokenizer::JSRet JSTokenizer::html_closing_script_tag() +{ return global_scope() ? SCRIPT_ENDED : ENDED_IN_INNER_SCOPE; } + +JSTokenizer::JSRet JSTokenizer::literal_dq_string_start() +{ + dealias_append(); + EXEC(do_semicolon_insertion(ASI_GROUP_7)) + EXEC(do_spacing(LITERAL)) + ECHO; + BEGIN(dqstr); + set_ident_norm(true); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::literal_sq_string_start() +{ + dealias_append(); + EXEC(do_semicolon_insertion(ASI_GROUP_7)) + EXEC(do_spacing(LITERAL)) + ECHO; + BEGIN(sqstr); + set_ident_norm(true); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::literal_template_start() +{ + dealias_append(); + EXEC(do_semicolon_insertion(ASI_GROUP_7)) + EXEC(do_spacing(LITERAL)) + ECHO; + BEGIN(tmpll); + set_ident_norm(true); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::literal_regex_start() +{ + dealias_reset(); + EXEC(do_semicolon_insertion(ASI_GROUP_7)) + EXEC(do_spacing(LITERAL)) + yyout << '/'; + states_correct(1); + yyless(1); + BEGIN(regex); + set_ident_norm(true); + return EOS; +} + +void JSTokenizer::div_assignment_operator() +{ + dealias_equals(true); + previous_group = ASI_OTHER; + ECHO; + token = PUNCTUATOR; + BEGIN(INITIAL); + set_ident_norm(true); +} + +JSTokenizer::JSRet JSTokenizer::open_brace() +{ + dealias_reset(); + EXEC(do_semicolon_insertion(ASI_GROUP_1)) + if (meta_type() == ScopeMetaType::NOT_SET) + { + if (is_operator(token) || token == COLON || func_call_type() != FuncType::NOT_FUNC) + set_meta_type(ScopeMetaType::OBJECT); + else + { + set_meta_type(ScopeMetaType::BLOCK); + EXEC(p_scope_push(meta_type())) + } + } + EXEC(scope_push(BRACES)) + if (!brace_depth.empty()) + brace_depth.top()++; + process_punctuator(); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::close_brace() +{ + dealias_clear_mutated(false); + EXEC(do_semicolon_insertion(ASI_GROUP_2)) + if (meta_type() != ScopeMetaType::NOT_SET) + EXEC(p_scope_pop(meta_type())) + EXEC(scope_pop(BRACES)) + process_closing_brace(); + set_ident_norm(true); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::open_parenthesis() +{ + dealias_clear_mutated(true); + dealias_reset(); + EXEC(do_semicolon_insertion(ASI_GROUP_3)) + FuncType f_call = detect_func_type(); + check_function_nesting(f_call); + EXEC(scope_push(PARENTHESES)) + set_func_call_type(f_call); + process_punctuator(); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::close_parenthesis() +{ + dealias_clear_mutated(false); + dealias_reset(); + FuncType f_call = func_call_type(); + bool id_norm = ident_norm(); + if (meta_type() != ScopeMetaType::NOT_SET) + EXEC(p_scope_pop(meta_type())) + EXEC(scope_pop(PARENTHESES)) + if (f_call == FuncType::NOT_FUNC) + set_ident_norm(id_norm); + if (block_param()) + { + previous_group = ASI_OTHER; + set_block_param(false); + } + else + { + EXEC(do_semicolon_insertion(ASI_GROUP_5)) + } + ECHO; + token = PUNCTUATOR; + BEGIN(divop); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::open_bracket() +{ + dealias_clear_mutated(true); + dealias_append(); + EXEC(do_semicolon_insertion(ASI_GROUP_3)) + EXEC(do_semicolon_insertion(ASI_GROUP_4)) + EXEC(scope_push(BRACKETS)) + process_punctuator(); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::close_bracket() +{ + dealias_clear_mutated(false); + dealias_append(); + EXEC(do_semicolon_insertion(ASI_GROUP_4)) + EXEC(scope_pop(BRACKETS)) + ECHO; + token = CLOSING_BRACKET; + BEGIN(divop); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::punctuator_prefix() +{ + process_punctuator(); + EXEC(do_semicolon_insertion(ASI_GROUP_10)) + set_ident_norm(true); + return EOS; +} + +void JSTokenizer::dot_accessor() +{ + dealias_clear_mutated(true); + previous_group = ASI_OTHER; + dealias_append(); + ECHO; + token = DOT; + BEGIN(regst); +} + +JSTokenizer::JSRet JSTokenizer::punctuator_arrow() +{ + dealias_clear_mutated(false); + previous_group = ASI_OTHER; + dealias_reset(); + process_punctuator(); + set_ident_norm(true); + if (meta_type() == ScopeMetaType::NOT_SET) + { + set_meta_type(ScopeMetaType::FUNCTION); + EXEC(p_scope_push(meta_type())) + } + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::punctuator_semicolon() +{ + dealias_clear_mutated(false); + previous_group = ASI_OTHER; + dealias_finalize(); + process_punctuator(); + set_ident_norm(true); + if (meta_type() != ScopeMetaType::NOT_SET) + { + EXEC(p_scope_pop(meta_type())) + set_meta_type(ScopeMetaType::NOT_SET); + } + return EOS; +} + +void JSTokenizer::punctuator_colon() +{ + dealias_clear_mutated(false); + previous_group = ASI_OTHER; + dealias_reset(); + process_punctuator(COLON); + set_ident_norm(true); +} + +void JSTokenizer::operator_comparison() +{ + dealias_clear_mutated(false); + previous_group = ASI_OTHER; + dealias_prefix_reset(); + process_punctuator(OPERATOR_COMPARISON); + set_ident_norm(true); +} + +void JSTokenizer::operator_complex_assignment() +{ + dealias_clear_mutated(false); + previous_group = ASI_OTHER; + dealias_equals(true); + process_punctuator(OPERATOR_COMPLEX_ASSIGNMENT); + set_ident_norm(true); +} + +void JSTokenizer::operator_logical() +{ + dealias_clear_mutated(false); + previous_group = ASI_OTHER; + dealias_prefix_reset(); + process_punctuator(OPERATOR_LOGICAL); + set_ident_norm(true); +} + +void JSTokenizer::operator_shift() +{ + dealias_clear_mutated(false); + previous_group = ASI_OTHER; + dealias_prefix_reset(); + process_punctuator(OPERATOR_SHIFT); + set_ident_norm(true); +} + +void JSTokenizer::punctuator_comma() +{ + dealias_clear_mutated(false); + previous_group = ASI_OTHER; + dealias_finalize(); + process_punctuator(); + set_ident_norm(true); +} + +JSTokenizer::JSRet JSTokenizer::use_strict_directive() +{ + previous_group = ASI_OTHER; + EXEC(do_spacing(DIRECTIVE)) + ECHO; + BEGIN(INITIAL); + yyout << ';'; + set_ident_norm(true); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::use_strict_directive_sc() +{ + previous_group = ASI_OTHER; + EXEC(do_spacing(DIRECTIVE)) + ECHO; + BEGIN(INITIAL); + set_ident_norm(true); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::keyword_var_decl() +{ + EXEC(do_semicolon_insertion(ASI_GROUP_10)) + if (token != DOT) + set_ident_norm(true); + alias_state = ALIAS_NONE; + EXEC(do_spacing(KEYWORD_VAR_DECL)) + ECHO; + BEGIN(regst); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::keyword_function() +{ + EXEC(do_semicolon_insertion(ASI_GROUP_10)) + if (token != DOT) + set_ident_norm(true); + EXEC(do_spacing(KEYWORD_FUNCTION)) + ECHO; + BEGIN(regst); + if (meta_type() == ScopeMetaType::NOT_SET) + set_meta_type(ScopeMetaType::FUNCTION); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::keyword_catch() +{ + EXEC(do_semicolon_insertion(ASI_GROUP_10)) + if (token != DOT) + set_ident_norm(true); + EXEC(do_spacing(KEYWORD_BLOCK)) + ECHO; + BEGIN(regst); + if (meta_type() == ScopeMetaType::NOT_SET) + { + set_meta_type(ScopeMetaType::BLOCK); + EXEC(p_scope_push(meta_type())) + } + set_block_param(true); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::keyword_while() +{ + EXEC(do_semicolon_insertion(ASI_GROUP_10)) + if (token != DOT) + set_ident_norm(true); + EXEC(do_spacing(KEYWORD_BLOCK)) + ECHO; + BEGIN(regst); + if (meta_type() == ScopeMetaType::NOT_SET) + { + set_meta_type(ScopeMetaType::BLOCK); + EXEC(p_scope_push(meta_type())) + } + if (do_loop()) + set_do_loop(false); + else + set_block_param(true); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::keyword_B() +{ + EXEC(do_semicolon_insertion(ASI_GROUP_10)) + if (token != DOT) + set_ident_norm(true); + EXEC(do_spacing(KEYWORD)) + ECHO; + BEGIN(regst); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::keyword_BA() +{ + EXEC(do_semicolon_insertion(ASI_GROUP_9)) + if (token != DOT) + set_ident_norm(true); + EXEC(do_spacing(KEYWORD)) + ECHO; + BEGIN(regst); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::keyword_finally() +{ + EXEC(do_semicolon_insertion(ASI_GROUP_10)) + if (token != DOT) + set_ident_norm(true); + EXEC(do_spacing(KEYWORD_BLOCK)) + ECHO; + BEGIN(regst); + if (meta_type() == ScopeMetaType::NOT_SET) + { + set_meta_type(ScopeMetaType::BLOCK); + EXEC(p_scope_push(meta_type())) + } + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::keyword_do() +{ + EXEC(do_semicolon_insertion(ASI_GROUP_10)) + if (token != DOT) + set_ident_norm(true); + EXEC(do_spacing(KEYWORD_BLOCK)) + ECHO; + BEGIN(regst); + if (meta_type() == ScopeMetaType::NOT_SET) + { + set_meta_type(ScopeMetaType::BLOCK); + EXEC(p_scope_push(meta_type())) + } + set_do_loop(true); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::keyword_class() +{ + previous_group = ASI_OTHER; + dealias_reset(); + if (token != DOT) + set_ident_norm(true); + EXEC(do_spacing(KEYWORD_CLASS)) + ECHO; + BEGIN(regst); + if (meta_type() == ScopeMetaType::NOT_SET) + set_meta_type(ScopeMetaType::OBJECT); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::keyword_other() +{ + previous_group = ASI_OTHER; + dealias_reset(); + if (token != DOT) + set_ident_norm(true); + EXEC(do_spacing(KEYWORD)) + ECHO; + BEGIN(regst); + return EOS; +} + +void JSTokenizer::operator_assignment() +{ + previous_group = ASI_OTHER; + dealias_equals(false); + process_punctuator(OPERATOR_ASSIGNMENT); + set_ident_norm(true); +} + +JSTokenizer::JSRet JSTokenizer::operator_prefix() +{ + dealias_prefix_reset(); + EXEC(do_semicolon_insertion(ASI_GROUP_6)) + EXEC(do_operator_spacing()) + ECHO; + BEGIN(divop); + set_ident_norm(true); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::operator_incr_decr() +{ + dealias_increment(); + dealias_reset(); + EXEC(do_semicolon_insertion(ASI_GROUP_8)) + EXEC(do_operator_spacing()) + ECHO; + BEGIN(divop); + set_ident_norm(true); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::general_operator() +{ + dealias_clear_mutated(false); + previous_group = ASI_OTHER; + dealias_prefix_reset(); + EXEC(do_operator_spacing()) + ECHO; + BEGIN(divop); + set_ident_norm(true); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::general_literal() +{ + dealias_clear_mutated(false); + dealias_append(); + EXEC(do_semicolon_insertion(ASI_GROUP_7)) + EXEC(do_spacing(LITERAL)) + ECHO; + BEGIN(divop); + set_ident_norm(true); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::general_identifier() +{ + if (unescape(YYText())) + { + bool id_part = (token == DOT); + bool assignment_start = token == KEYWORD_VAR_DECL || token == PUNCTUATOR || + token == UNDEFINED; + EXEC(do_semicolon_insertion(ASI_GROUP_7)) + EXEC(do_spacing(IDENTIFIER)) + EXEC(do_identifier_substitution(YYText(), id_part)) + dealias_identifier(id_part, assignment_start); + } + else + EXEC(do_semicolon_insertion(ASI_GROUP_7)) + BEGIN(divop); + return EOS; +} + +void JSTokenizer::general_unicode() +{ + previous_group = ASI_OTHER; + ECHO; + token = UNDEFINED; + BEGIN(INITIAL); + set_ident_norm(true); +} + JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in) { yy_flush_buffer(YY_CURRENT_BUFFER); + unescape_nest_seen = false; auto r = yylex(); @@ -1978,4 +2532,4 @@ JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in) bytes_read = 0; return static_cast(r); -} +} \ No newline at end of file diff --git a/src/utils/streambuf.cc b/src/utils/streambuf.cc index e5ba9e81f..08c4684b3 100644 --- a/src/utils/streambuf.cc +++ b/src/utils/streambuf.cc @@ -369,6 +369,26 @@ streamsize ostreambuf_infl::xsputn(const char* s, streamsize n) return n; } +// cppcheck-suppress unusedFunction +streamsize ostreambuf_infl::xsgetn(char* s, streamsize n) +{ + assert(n >= 0); + + if (pptr() != epptr()) + { + n = max(0, n); + auto c_avail = epptr() - pptr(); + n = min(c_avail, n); + + memcpy(s, pptr(), n); + pbump(n); + + return n; + } + + return 0; +} + // cppcheck-suppress unusedFunction int ostreambuf_infl::overflow(int c) { diff --git a/src/utils/streambuf.h b/src/utils/streambuf.h index 8cd7108f8..acf81b47c 100644 --- a/src/utils/streambuf.h +++ b/src/utils/streambuf.h @@ -95,6 +95,7 @@ protected: std::ios_base::openmode which = std::ios_base::in | std::ios_base::out) override; virtual int sync() override; virtual std::streamsize xsputn(const char* s, std::streamsize n) override; + virtual std::streamsize xsgetn(char* s, std::streamsize n) override; virtual int overflow(int c = EOF) override; bool enlarge(); diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc index c80dc1096..dedae02ab 100644 --- a/src/utils/test/js_normalizer_test.cc +++ b/src/utils/test/js_normalizer_test.cc @@ -4174,6 +4174,484 @@ TEST_CASE("Scope tracking - error handling", "[JSNormalizer]") } } +TEST_CASE("Function call tracking - basic", "[JSNormalizer]") +{ + JSTokenizerTester tester(norm_depth, max_scope_depth, s_ignored_ids, max_template_nesting, + max_bracket_depth); + + using FuncType = JSTokenizerTester::FuncType; + + SECTION("Global only") + { + tester.test_function_scopes({{ "", "", {FuncType::NOT_FUNC}}}); + } + SECTION("General function call") + { + SECTION("in arguments") + { + tester.test_function_scopes({ + {"general(", "var_0000(", {FuncType::NOT_FUNC, FuncType::GENERAL}} + }); + } + SECTION("separated identifier and call") + { + tester.test_function_scopes({ + {"general /*comment*/ (", "var_0000(", {FuncType::NOT_FUNC, FuncType::GENERAL}} + }); + } + SECTION("complete call") + { + tester.test_function_scopes({ + {"general('%62%61%72')", "var_0000('%62%61%72')", {FuncType::NOT_FUNC}} + }); + } + SECTION("as named function definition") + { + tester.test_function_scopes({ + {"general(){", "var_0000(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}} + }); + } + SECTION("after defined function identifier") + { + tester.test_function_scopes({ + {"unescape;hello(", "unescape;var_0000(", {FuncType::NOT_FUNC, FuncType::GENERAL}} + }); + } + SECTION("fake defined function identifier") + { + tester.test_function_scopes({ + {"fake_unescape(", "var_0000(", {FuncType::NOT_FUNC, FuncType::GENERAL}} + }); + } + SECTION("ignored fake defined function identifier") + { + const std::unordered_set s_ignored_ids_fake {"fake_unescape"}; + JSTokenizerTester tester_fake(norm_depth, max_scope_depth, s_ignored_ids_fake, + max_template_nesting, max_bracket_depth); + tester_fake.test_function_scopes({ + {"fake_unescape(", "fake_unescape(", {FuncType::NOT_FUNC, FuncType::GENERAL}} + }); + } + SECTION("as a template literal substitution") + { + tester.test_function_scopes({ + {"`unescape ${general(", "`unescape ${var_0000(", + {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::GENERAL}} + }); + } + } + SECTION("unescape function call") + { + SECTION("in arguments") + { + tester.test_function_scopes({ + {"unescape(", "unescape(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}} + }); + } + SECTION("separated identifier and call") + { + tester.test_function_scopes({ + {"unescape /*comment*/ (", "unescape(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}} + }); + } + SECTION("complete call") + { + tester.test_function_scopes({ + {"unescape('%62%61%72')", "unescape('%62%61%72')", {FuncType::NOT_FUNC}} + }); + } + SECTION("as named function definition") + { + tester.test_function_scopes({ + {"unescape(){", "unescape(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}} + }); + } + SECTION("after assignment substitution") + { + tester.test_function_scopes({ + {"var a = unescape; a(", "var var_0000=unescape;unescape(", {FuncType::NOT_FUNC, + FuncType::UNESCAPE}} + }); + } + SECTION("literal") + { + tester.test_function_scopes({ + {"`unescape(", "`unescape(", {FuncType::NOT_FUNC}} + }); + } + SECTION("as a template literal substitution") + { + tester.test_function_scopes({ + {"`literal ${unescape(", "`literal ${unescape(", + {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}} + }); + } + } + SECTION("decodeURI function call") + { + SECTION("in arguments") + { + tester.test_function_scopes({ + {"decodeURI(", "decodeURI(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}} + }); + } + SECTION("separated identifier and call") + { + tester.test_function_scopes({ + {"decodeURI /*comment*/ (", "decodeURI(", {FuncType::NOT_FUNC, + FuncType::UNESCAPE}} + }); + } + SECTION("complete call") + { + tester.test_function_scopes({ + {"decodeURI('%62%61%72')", "decodeURI('%62%61%72')", {FuncType::NOT_FUNC}} + }); + } + SECTION("as named function definition") + { + tester.test_function_scopes({ + {"decodeURI(){", "decodeURI(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}} + }); + } + SECTION("after assignment substitution") + { + tester.test_function_scopes({ + {"var a = decodeURI; a(", "var var_0000=decodeURI;decodeURI(", {FuncType::NOT_FUNC, + FuncType::UNESCAPE}} + }); + } + SECTION("literal") + { + tester.test_function_scopes({ + {"`decodeURI(", "`decodeURI(", {FuncType::NOT_FUNC}} + }); + } + SECTION("as a template literal substitution") + { + tester.test_function_scopes({ + {"`literal ${decodeURI(", "`literal ${decodeURI(", + {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}} + }); + } + } + SECTION("decodeURIComponent function call") + { + SECTION("in arguments") + { + tester.test_function_scopes({ + {"decodeURIComponent(", "decodeURIComponent(", {FuncType::NOT_FUNC, + FuncType::UNESCAPE}} + }); + } + SECTION("separated identifier and call") + { + tester.test_function_scopes({ + {"decodeURIComponent /*comment*/ (", "decodeURIComponent(", {FuncType::NOT_FUNC, + FuncType::UNESCAPE}} + }); + } + SECTION("complete call") + { + tester.test_function_scopes({ + {"decodeURIComponent('%62%61%72')", "decodeURIComponent('%62%61%72')", + {FuncType::NOT_FUNC}} + }); + } + SECTION("as named function definition") + { + tester.test_function_scopes({ + {"decodeURIComponent(){", "decodeURIComponent(){", {FuncType::NOT_FUNC, + FuncType::NOT_FUNC}} + }); + } + SECTION("after assignment substitution") + { + tester.test_function_scopes({ + {"var a = decodeURIComponent; a(", + "var var_0000=decodeURIComponent;decodeURIComponent(", {FuncType::NOT_FUNC, + FuncType::UNESCAPE}} + }); + } + SECTION("literal") + { + tester.test_function_scopes({ + {"`decodeURIComponent(", "`decodeURIComponent(", {FuncType::NOT_FUNC}} + }); + } + SECTION("as a template literal substitution") + { + tester.test_function_scopes({ + {"`literal ${decodeURIComponent(", "`literal ${decodeURIComponent(", + {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}} + }); + } + } + SECTION("String.fromCharCode method call") + { + SECTION("in arguments") + { + tester.test_function_scopes({ + {"String.fromCharCode(", "String.fromCharCode(", + {FuncType::NOT_FUNC, FuncType::CHAR_CODE}} + }); + } + SECTION("separated identifier and call") + { + tester.test_function_scopes({ + {"String.fromCharCode /*comment*/ (", "String.fromCharCode(", + {FuncType::NOT_FUNC, FuncType::CHAR_CODE}} + }); + } + SECTION("complete call") + { + tester.test_function_scopes({ + {"String.fromCharCode( 65, 0x42 )", "String.fromCharCode(65,0x42)", + {FuncType::NOT_FUNC}} + }); + } + SECTION("as named function definition") + { + tester.test_function_scopes({ + {"String.fromCharCode(){", "String.fromCharCode(){", + {FuncType::NOT_FUNC, FuncType::NOT_FUNC}} + }); + } + SECTION("after class name assignment substitution") + { + tester.test_function_scopes({ + {"var a = String; a.fromCharCode(", "var var_0000=String;String.fromCharCode(", + {FuncType::NOT_FUNC, FuncType::CHAR_CODE}} + }); + } + SECTION("after assignment substitution") + { + tester.test_function_scopes({ + {"var a = String.fromCharCode; a(", + "var var_0000=String.fromCharCode;String.fromCharCode(", + {FuncType::NOT_FUNC, FuncType::CHAR_CODE}} + }); + } + SECTION("not a Sting class member call") + { + tester.test_function_scopes({ + {"fromCharCode(", + "var_0000(", + {FuncType::NOT_FUNC, FuncType::GENERAL}} + }); + } + SECTION("literal") + { + tester.test_function_scopes({ + {"`String.fromCharCode(", "`String.fromCharCode(", {FuncType::NOT_FUNC}} + }); + } + SECTION("as a template literal substitution") + { + tester.test_function_scopes({ + {"`literal ${String.fromCharCode(", "`literal ${String.fromCharCode(", + {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::CHAR_CODE}} + }); + } + } +} + +TEST_CASE("Function call tracking - nesting", "[JSNormalizer]") +{ + JSTokenizerTester tester(norm_depth, max_scope_depth, s_ignored_ids, max_template_nesting, + max_bracket_depth); + + using FuncType = JSTokenizerTester::FuncType; + + SECTION("Opening") + { + SECTION("Multiple general functions") + { + tester.test_function_scopes({ + { "general( general( general(", "var_0000(var_0000(var_0000(", + { FuncType::NOT_FUNC, FuncType::GENERAL, FuncType::GENERAL, FuncType::GENERAL}} + }); + CHECK(!tester.is_unescape_nesting_seen()); + } + SECTION("Multiple unescape functions") + { + tester.test_function_scopes({ + {"unescape( unescape( unescape(", "unescape(unescape(unescape(", + {FuncType::NOT_FUNC, FuncType::UNESCAPE, FuncType::UNESCAPE, FuncType::UNESCAPE}} + }); + CHECK(tester.is_unescape_nesting_seen()); + } + SECTION("Multiple different unescape functions") + { + tester.test_function_scopes({ + {"unescape( decodeURI( decodeURIComponent(", + "unescape(decodeURI(decodeURIComponent(", {FuncType::NOT_FUNC, + FuncType::UNESCAPE, + FuncType::UNESCAPE, + FuncType::UNESCAPE}} + }); + CHECK(tester.is_unescape_nesting_seen()); + } + SECTION("Multiple String.fromCharCode functions") + { + tester.test_function_scopes({ + {"String.fromCharCode( String.fromCharCode( String.fromCharCode(", + "String.fromCharCode(String.fromCharCode(String.fromCharCode(", + {FuncType::NOT_FUNC, FuncType::CHAR_CODE, FuncType::CHAR_CODE, + FuncType::CHAR_CODE}} + }); + CHECK(!tester.is_unescape_nesting_seen()); + } + SECTION("Mixed function calls") + { + tester.test_function_scopes({ + {"general( unescape( String.fromCharCode(", + "var_0000(unescape(String.fromCharCode(", + {FuncType::NOT_FUNC, FuncType::GENERAL, FuncType::UNESCAPE, + FuncType::CHAR_CODE}} + }); + CHECK(!tester.is_unescape_nesting_seen()); + } + } + SECTION("Closing") + { + SECTION("Multiple general functions") + { + tester.test_function_scopes({ + {"general( general( general( a ) )", "var_0000(var_0000(var_0000(var_0001))", + {FuncType::NOT_FUNC, FuncType::GENERAL}} + }); + } + SECTION("Multiple unescape functions") + { + tester.test_function_scopes({ + {"unescape( unescape( unescape( '%62%61%72' ) )", + "unescape(unescape(unescape('%62%61%72'))", {FuncType::NOT_FUNC, + FuncType::UNESCAPE }} + }); + } + SECTION("Multiple different unescape functions") + { + tester.test_function_scopes({ + {"unescape( decodeURI( decodeURIComponent( '%62%61%72' ) )", + "unescape(decodeURI(decodeURIComponent('%62%61%72'))", + {FuncType::NOT_FUNC, FuncType::UNESCAPE }} + }); + } + SECTION("Multiple String.fromCharCode methods") + { + tester.test_function_scopes({ + {"String.fromCharCode( String.fromCharCode( String.fromCharCode( 65, 0x42 ) )", + "String.fromCharCode(String.fromCharCode(String.fromCharCode(65,0x42))", + {FuncType::NOT_FUNC, FuncType::CHAR_CODE}} + }); + } + SECTION("Mixed function calls") + { + tester.test_function_scopes({ + {"general( unescape( String.fromCharCode( 65, 0x42 ) )", + "var_0000(unescape(String.fromCharCode(65,0x42))", {FuncType::NOT_FUNC, + FuncType::GENERAL}} + }); + } + } +} + +TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]") +{ + JSTokenizerTester tester(norm_depth, max_scope_depth, s_ignored_ids, max_template_nesting, + max_bracket_depth); + + using FuncType = JSTokenizerTester::FuncType; + + SECTION("split in the middle of the identifier") + { + tester.test_function_scopes({ + {"un", "var_0000", {FuncType::NOT_FUNC}}, + {"escape", "unescape", {FuncType::NOT_FUNC}}, + {"(", "unescape(", {FuncType::NOT_FUNC, + FuncType::UNESCAPE}}, + {")", "unescape()", {FuncType::NOT_FUNC}}, + }); + } + SECTION("split between identifier and parenthesis") + { + tester.test_function_scopes({ + {"decodeURI", "decodeURI", {FuncType::NOT_FUNC}}, + {"(", "decodeURI(", {FuncType::NOT_FUNC, + FuncType::UNESCAPE}}, + {")", "decodeURI()", {FuncType::NOT_FUNC}}, + }); + } + SECTION("comment between identifier and parenthesis") + { + tester.test_function_scopes({ + {"unescape", "unescape", {FuncType::NOT_FUNC}}, + {"//String.fromCharCode\n", "unescape", {FuncType::NOT_FUNC}}, + {"(", "unescape(", {FuncType::NOT_FUNC, + FuncType::UNESCAPE}}, + {")", "unescape()", {FuncType::NOT_FUNC}}, + }); + } + SECTION("split in arguments") + { + tester.test_function_scopes({ + {"general", "var_0000", {FuncType::NOT_FUNC}}, + {"(", "var_0000(", {FuncType::NOT_FUNC, + FuncType::GENERAL}}, + {"a", "var_0000(var_0001", {FuncType::NOT_FUNC, + FuncType::GENERAL}}, + {"+ b", "var_0000(var_0001+var_0002", {FuncType::NOT_FUNC, + FuncType::GENERAL}}, + {")", "var_0000(var_0001+var_0002)", {FuncType::NOT_FUNC}}, + }); + } + SECTION("literal in arguments") + { + tester.test_function_scopes({ + {"String", "String", {FuncType::NOT_FUNC}}, + {".fromCharCode", "String.fromCharCode", {FuncType::NOT_FUNC}}, + {"(`", "String.fromCharCode(`", {FuncType::NOT_FUNC, + FuncType::CHAR_CODE}}, + {"un", "String.fromCharCode(`un", {FuncType::NOT_FUNC, + FuncType::CHAR_CODE}}, + {"escape(", "String.fromCharCode(`unescape(", {FuncType::NOT_FUNC, + FuncType::CHAR_CODE}}, + {"`)", "String.fromCharCode(`unescape(`)", {FuncType::NOT_FUNC}}, + }); + } + SECTION("Nesting - Mixed function calls") + { + tester.test_function_scopes({ + {"decode", "var_0000", {FuncType::NOT_FUNC}}, + {"URI", "decodeURI", {FuncType::NOT_FUNC}}, + {"Component", "decodeURIComponent", {FuncType::NOT_FUNC}}, + {"(", "decodeURIComponent(", {FuncType::NOT_FUNC, + FuncType::UNESCAPE}}, + {" a, ", "decodeURIComponent(var_0001,", + {FuncType::NOT_FUNC, + FuncType::UNESCAPE}}, + {" String.fromCharCode( ar", + "decodeURIComponent(var_0001,String.fromCharCode(var_0002", + {FuncType::NOT_FUNC, + FuncType::UNESCAPE, + FuncType::CHAR_CODE}}, + {"g ), b, foo", + "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0005", + {FuncType::NOT_FUNC, + FuncType::UNESCAPE}}, + {"bar( ", + "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0006(", + {FuncType::NOT_FUNC, + FuncType::UNESCAPE, + FuncType::GENERAL}}, + {"))", + "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0006())", + {FuncType::NOT_FUNC}} + }); + } +} + #endif // CATCH_TEST_BUILD // Benchmark tests @@ -4215,7 +4693,7 @@ TEST_CASE("JS Normalizer, literals by 8 K", "[JSNormalizer]") { JSIdentifierCtxStub ident_ctx; JSNormalizer normalizer(ident_ctx, unlim_depth, max_template_nesting, max_bracket_depth); - char dst[DEPTH]; + char dst[norm_depth]; constexpr size_t size = 1 << 13; @@ -4255,7 +4733,7 @@ TEST_CASE("JS Normalizer, literals by 64 K", "[JSNormalizer]") { JSIdentifierCtxStub ident_ctx; JSNormalizer normalizer(ident_ctx, unlim_depth, max_template_nesting, max_scope_depth); - char dst[DEPTH]; + char dst[norm_depth]; constexpr size_t size = 1 << 16; @@ -4295,10 +4773,10 @@ TEST_CASE("JS Normalizer, id normalization", "[JSNormalizer]") { // around 11 000 identifiers std::string input; - for (int it = 0; it < DEPTH; ++it) + for (int it = 0; it < norm_depth; ++it) input.append("n" + std::to_string(it) + " "); - input.resize(DEPTH - strlen(s_closing_tag)); + input.resize(norm_depth - strlen(s_closing_tag)); input.append(s_closing_tag, strlen(s_closing_tag)); JSIdentifierCtxStub ident_ctx_mock; @@ -4378,14 +4856,14 @@ TEST_CASE("JS Normalizer, scope tracking", "[JSNormalizer]") TEST_CASE("JS Normalizer, automatic semicolon", "[JSNormalizer]") { - auto w_semicolons = make_input("", "a;\n", "", depth); - auto wo_semicolons = make_input("", "a \n", "", depth); + auto w_semicolons = make_input("", "a;\n", "", norm_depth); + auto wo_semicolons = make_input("", "a \n", "", norm_depth); const char* src_w_semicolons = w_semicolons.c_str(); const char* src_wo_semicolons = wo_semicolons.c_str(); size_t src_len = w_semicolons.size(); JSIdentifierCtxStub ident_ctx_mock; - JSNormalizer normalizer_wo_ident(ident_ctx_mock, unlim_depth, max_template_nesting, depth); + JSNormalizer normalizer_wo_ident(ident_ctx_mock, unlim_depth, max_template_nesting, norm_depth); REQUIRE(norm_ret(normalizer_wo_ident, w_semicolons) == JSTokenizer::SCRIPT_ENDED); BENCHMARK("without semicolon insertion") diff --git a/src/utils/test/js_test_utils.cc b/src/utils/test/js_test_utils.cc index a57552818..5083dbcba 100644 --- a/src/utils/test/js_test_utils.cc +++ b/src/utils/test/js_test_utils.cc @@ -29,15 +29,44 @@ namespace snort { [[noreturn]] void FatalError(const char*, ...) { exit(EXIT_FAILURE); } -void trace_vprintf(const char*, TraceLevel, const char*, const Packet*, const char*, va_list) {} +void trace_vprintf(const char*, TraceLevel, const char*, const Packet*, const char*, va_list) { } uint8_t TraceApi::get_constraints_generation() { return 0; } -void TraceApi::filter(const Packet&) {} +void TraceApi::filter(const Packet&) { } } THREAD_LOCAL const snort::Trace* http_trace = nullptr; using namespace snort; +void JSTokenizerTester::test_function_scopes(const std::list& pdus) +{ + for (auto pdu : pdus) + { + const char* source; + const char* expected; + std::list exp_stack; + std::tie(source, expected, exp_stack) = pdu; + + normalizer.normalize(source, strlen(source)); + std::string result_buf(normalizer.get_script(), normalizer.script_size()); + CHECK(result_buf == expected); + + auto tmp_stack(normalizer.get_tokenizer().scope_stack); + CHECK(tmp_stack.size() == exp_stack.size()); + for (auto func_it = exp_stack.rbegin(); func_it != exp_stack.rend() and !tmp_stack.empty(); + func_it++) + { + CHECK(tmp_stack.top().func_call_type == *func_it); + tmp_stack.pop(); + } + } +} + +bool JSTokenizerTester::is_unescape_nesting_seen() const +{ + return normalizer.is_unescape_nesting_seen(); +} + void test_scope(const char* context, std::list stack) { std::string buf(context); diff --git a/src/utils/test/js_test_utils.h b/src/utils/test/js_test_utils.h index cce716282..10f5b0a20 100644 --- a/src/utils/test/js_test_utils.h +++ b/src/utils/test/js_test_utils.h @@ -34,7 +34,9 @@ constexpr int norm_depth = 65535; constexpr int max_template_nesting = 4; constexpr int max_bracket_depth = 256; constexpr int max_scope_depth = 256; -static const std::unordered_set s_ignored_ids { "console", "eval", "document" }; +static const std::unordered_set s_ignored_ids { + "console", "eval", "document", "unescape", "decodeURI", "decodeURIComponent", "String" +}; namespace snort { @@ -60,6 +62,27 @@ public: size_t size() const override { return 0; } }; +class JSTokenizerTester +{ +public: + JSTokenizerTester(int32_t depth, uint32_t max_scope_depth, + const std::unordered_set& ignored_ids, + uint8_t max_template_nesting, uint32_t max_bracket_depth) + : + ident_ctx(depth, max_scope_depth, ignored_ids), + normalizer(ident_ctx, depth, max_template_nesting, max_bracket_depth) + { } + + typedef JSTokenizer::FuncType FuncType; + typedef std::tuple> ScopeCase; + void test_function_scopes(const std::list& pdus); + bool is_unescape_nesting_seen() const; + +private: + JSIdentifierCtx ident_ctx; + snort::JSNormalizer normalizer; +}; + void test_scope(const char* context, std::list stack); void test_normalization(const char* source, const char* expected); void test_normalization_bad(const char* source, const char* expected, JSTokenizer::JSRet eret); diff --git a/src/utils/test/streambuf_test.cc b/src/utils/test/streambuf_test.cc index 61dfc483a..5129804ee 100644 --- a/src/utils/test/streambuf_test.cc +++ b/src/utils/test/streambuf_test.cc @@ -1555,6 +1555,62 @@ TEST_CASE("output buffer - basic", "[Stream buffers]") CHECK(off_c == len + 2); CHECK(off_e == 4096 + 2048); } + + SECTION("get char sequence") + { + ostreambuf_infl b; + const int exp_len = strlen(exp); + b.sputn(exp, exp_len); + + int off_c = b.pubseekoff(-exp_len, ios_base::cur, ios_base::out); + CHECK(off_c == 0); + + char* act_seq = new char[exp_len]; + CHECK(b.sgetn(act_seq, exp_len) == exp_len); + CHECK(!memcmp(exp, act_seq, exp_len)); + delete[] act_seq; + + int new_off = b.pubseekoff(0, ios_base::cur, ios_base::out); + CHECK(new_off == exp_len); + } + + SECTION("get char sequence from the end") + { + ostreambuf_infl b; + const int exp_len = strlen(exp); + char* buf = new char[exp_len]; + memcpy(buf, exp, exp_len); + b.pubsetbuf(buf, exp_len); + + int data_off = b.pubseekoff(exp_len, ios_base::beg, ios_base::out); + CHECK(data_off == exp_len); + + char* act_seq = new char[exp_len]; + memset(act_seq, '\0', exp_len); + CHECK(b.sgetn(act_seq, exp_len) == 0); + CHECK(strlen(act_seq) == 0); + delete[] act_seq; + + int new_off = b.pubseekoff(0, ios_base::cur, ios_base::out); + CHECK(new_off == exp_len); + } + + SECTION("get char sequence more than available") + { + ostreambuf_infl b; + const int exp_len = strlen(exp); + char* buf = new char[exp_len]; + memcpy(buf, exp, exp_len); + b.pubsetbuf(buf, exp_len); + + char* act_seq = new char[exp_len + 1]; + CHECK(b.sgetn(act_seq, exp_len + 1) == exp_len); + CHECK(!memcmp(exp, act_seq, exp_len)); + delete[] act_seq; + + int new_off = b.pubseekoff(0, ios_base::cur, ios_base::out); + CHECK(new_off == exp_len); + } } TEST_CASE("output buffer - buffer management", "[Stream buffers]") @@ -2008,11 +2064,8 @@ TEST_CASE("output stream - large data", "[Stream buffers]") { const int len = 1 << 21; const int plen = 1 << 12; - vector chars; - - chars.reserve(len); - for (char& c : chars) - c = rand(); + vector chars(len, '\0'); + generate_n(chars.begin(), len, rand); SECTION("0 bytes reserved") {