From: Oleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) Date: Tue, 18 Oct 2022 17:37:13 +0000 (+0000) Subject: Pull request #3623: utils: Add possibility to process keywords as identifiers X-Git-Tag: 3.1.45.0~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8ceeed2b9ca8d7013f9c205ad8a30e89e9fca0b4;p=thirdparty%2Fsnort3.git Pull request #3623: utils: Add possibility to process keywords as identifiers Merge in SNORT/snort3 from ~ANOROKH/snort3:js_bracket_mismatch to master Squashed commit of the following: commit 5e2066e75b0a7e8db2e148e356638ec4060fc84d Author: AnnaNorokh Date: Thu Oct 13 14:11:33 2022 +0300 utils: add possibility to process keywords as identifiers * added JavaScript scope property to track an object body, * process keywords as identifiers, if they were used as name function or object member, *'catch' and finally' were added to ignore list, so they would not normalized as function identifiers, * added unit tests to cover changes * 'function' isn't supporting as object member because of anonymous function peculiarities --- diff --git a/lua/snort_defaults.lua b/lua/snort_defaults.lua index a1c70f003..721198ec1 100644 --- a/lua/snort_defaults.lua +++ b/lua/snort_defaults.lua @@ -1341,6 +1341,9 @@ default_js_norm_prop_ignore = 'innerText', 'lang', 'nonce', 'outerText', 'style', 'tabIndex', 'title', 'attachInternals', + -- Promise + 'catch', 'finally', + -- Misc 'ExportStyle', 'callee' } diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h index e4910a744..bd46004d8 100644 --- a/src/utils/js_tokenizer.h +++ b/src/utils/js_tokenizer.h @@ -101,7 +101,8 @@ private: { Scope(ScopeType t) : type(t), meta_type(ScopeMetaType::NOT_SET), func_call_type(FuncType::NOT_FUNC), - ident_norm(true), block_param(false), do_loop(false), encoding(0), char_code_str(false) + ident_norm(true), block_param(false), do_loop(false), encoding(0), char_code_str(false), + in_object(false) {} ScopeType type; @@ -112,6 +113,7 @@ private: bool do_loop; uint32_t encoding; bool char_code_str; + bool in_object; }; enum ASIGroup @@ -244,6 +246,12 @@ private: bool char_code_str() { return scope_cur().char_code_str; } + void set_in_object(bool f) + { scope_cur().in_object = f; } + + bool in_object() + { return scope_cur().in_object; } + static JSProgramScopeType m2p(ScopeMetaType); static const char* m2str(ScopeMetaType); static bool is_operator(JSToken); diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l index d35e4ca92..f75731ae4 100644 --- a/src/utils/js_tokenizer.l +++ b/src/utils/js_tokenizer.l @@ -1876,6 +1876,8 @@ JSTokenizer::JSRet JSTokenizer::scope_push(ScopeType t) return BRACKET_NESTING_OVERFLOW; JSRet ret = EOS; + bool inside_object = false; + switch (meta_type()) { case ScopeMetaType::FUNCTION: @@ -1890,6 +1892,7 @@ JSTokenizer::JSRet JSTokenizer::scope_push(ScopeType t) if (t == BRACES) ret = p_scope_push(meta_type()); + inside_object = true; break; } case ScopeMetaType::ARROW_FUNCTION: break; @@ -1899,6 +1902,7 @@ JSTokenizer::JSRet JSTokenizer::scope_push(ScopeType t) } scope_stack.emplace(t); + set_in_object(inside_object); return ret; } @@ -2697,9 +2701,11 @@ JSTokenizer::JSRet JSTokenizer::use_strict_directive_sc() JSTokenizer::JSRet JSTokenizer::keyword_var_decl() { + if (token == DOT or in_object()) + return general_identifier(); + EXEC(do_semicolon_insertion(ASI_GROUP_10)) - if (token != DOT) - set_ident_norm(true); + set_ident_norm(true); alias_state = ALIAS_NONE; EXEC(do_spacing(KEYWORD_VAR_DECL)) ECHO; @@ -2709,9 +2715,11 @@ JSTokenizer::JSRet JSTokenizer::keyword_var_decl() JSTokenizer::JSRet JSTokenizer::keyword_function() { + if (token == DOT) + return general_identifier(); + EXEC(do_semicolon_insertion(ASI_GROUP_10)) - if (token != DOT) - set_ident_norm(true); + set_ident_norm(true); EXEC(do_spacing(KEYWORD_FUNCTION)) ECHO; BEGIN(regst); @@ -2722,9 +2730,11 @@ JSTokenizer::JSRet JSTokenizer::keyword_function() JSTokenizer::JSRet JSTokenizer::keyword_catch() { + if (token == DOT or in_object()) + return general_identifier(); + EXEC(do_semicolon_insertion(ASI_GROUP_10)) - if (token != DOT) - set_ident_norm(true); + set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); @@ -2739,9 +2749,11 @@ JSTokenizer::JSRet JSTokenizer::keyword_catch() JSTokenizer::JSRet JSTokenizer::keyword_while() { + if (token == DOT or in_object()) + return general_identifier(); + EXEC(do_semicolon_insertion(ASI_GROUP_10)) - if (token != DOT) - set_ident_norm(true); + set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); @@ -2759,9 +2771,11 @@ JSTokenizer::JSRet JSTokenizer::keyword_while() JSTokenizer::JSRet JSTokenizer::keyword_B() { + if (token == DOT or in_object()) + return general_identifier(); + EXEC(do_semicolon_insertion(ASI_GROUP_10)) - if (token != DOT) - set_ident_norm(true); + set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); @@ -2770,9 +2784,11 @@ JSTokenizer::JSRet JSTokenizer::keyword_B() JSTokenizer::JSRet JSTokenizer::keyword_new() { + if (token == DOT or in_object()) + return general_identifier(); + EXEC(do_semicolon_insertion(ASI_GROUP_10)) - if (token != DOT) - set_ident_norm(true); + set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); @@ -2784,9 +2800,11 @@ JSTokenizer::JSRet JSTokenizer::keyword_new() JSTokenizer::JSRet JSTokenizer::keyword_BA() { + if (token == DOT or in_object()) + return general_identifier(); + EXEC(do_semicolon_insertion(ASI_GROUP_9)) - if (token != DOT) - set_ident_norm(true); + set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); @@ -2795,9 +2813,11 @@ JSTokenizer::JSRet JSTokenizer::keyword_BA() JSTokenizer::JSRet JSTokenizer::keyword_finally() { + if (token == DOT or in_object()) + return general_identifier(); + EXEC(do_semicolon_insertion(ASI_GROUP_10)) - if (token != DOT) - set_ident_norm(true); + set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); @@ -2811,9 +2831,11 @@ JSTokenizer::JSRet JSTokenizer::keyword_finally() JSTokenizer::JSRet JSTokenizer::keyword_do() { + if (token == DOT or in_object()) + return general_identifier(); + EXEC(do_semicolon_insertion(ASI_GROUP_10)) - if (token != DOT) - set_ident_norm(true); + set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); @@ -2827,11 +2849,13 @@ JSTokenizer::JSRet JSTokenizer::keyword_do() } JSTokenizer::JSRet JSTokenizer::keyword_class() -{ +{ + if (token == DOT or in_object()) + return general_identifier(); + previous_group = ASI_OTHER; dealias_reset(); - if (token != DOT) - set_ident_norm(true); + set_ident_norm(true); EXEC(do_spacing(KEYWORD_CLASS)) ECHO; BEGIN(regst); @@ -2842,10 +2866,12 @@ JSTokenizer::JSRet JSTokenizer::keyword_class() JSTokenizer::JSRet JSTokenizer::keyword_other() { + if (token == DOT or in_object()) + return general_identifier(); + previous_group = ASI_OTHER; dealias_reset(); - if (token != DOT) - set_ident_norm(true); + set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc index fb2345978..1550cd3b7 100644 --- a/src/utils/test/js_normalizer_test.cc +++ b/src/utils/test/js_normalizer_test.cc @@ -6650,5 +6650,199 @@ TEST_CASE("String Concatenation - Multiple PDU", "[JSNormalizer]") } } +TEST_CASE("keywords as identifiers", "[JSNormalizer]") +{ + SECTION("catch as identifier") + { + test_normalization( + "function() { a.catch() }; function() { [a.catch() ] }", + "function(){var_0000.catch()};function(){[var_0000.catch()]}" + ); + test_normalization("var a = A ({catch: 1})","var var_0000=var_0001({var_0002:1})"); + + } + SECTION("finally as identifier") + { + test_normalization( + "function() { a.finally() }; function() { [a.finally() ] }", + "function(){var_0000.finally()};function(){[var_0000.finally()]}" + ); + test_normalization("var a = A ({finally: 1})","var var_0000=var_0001({var_0002:1})"); + + } + SECTION("while as identifier") + { + test_normalization( + "function() { a.while() }; function() { [a.while() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization("var a = A ({while: 1})","var var_0000=var_0001({var_0002:1})"); + + } + SECTION("do as identifier") + { + test_normalization( + "function() { a.do() }; function() { [a.do() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization("var a = A ({do: 1})","var var_0000=var_0001({var_0002:1})"); + } + SECTION("new as identifier") + { + test_normalization( + "function() { a.new() }; function() { [a.new() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization("var a = A ({new: 1})","var var_0000=var_0001({var_0002:1})"); + + } + SECTION("class as identifier") + { + test_normalization( + "function() { a.class() }; function() { [a.class() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization("var a = A ({class: 1})","var var_0000=var_0001({var_0002:1})"); + test_normalization("class A { class : 1 }", "class var_0000{var_0001:1}"); + test_normalization("var obj = { class : 1 }", "var var_0000={var_0001:1}"); + } + SECTION("function as identifier") + { + test_normalization( + "function() { a.function() }; function() { [a.function() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + } + SECTION("const|var|let as identifier") + { + test_normalization( + "function() { a.const() }; function() { [a.const() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.var() }; function() { [a.var() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.let() }; function() { [a.let() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "var a = A ({const: 1, var: 0, let:1})", + "var var_0000=var_0001({var_0002:1,var_0003:0,var_0004:1})" + ); + } + SECTION("delete|throw|typeof|void as identifier") + { + test_normalization( + "function() { a.delete() }; function() { [a.delete() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.throw() }; function() { [a.throw() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.typeof() }; function() { [a.typeof() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.void() }; function() { [a.void() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "var a = A ({delete: 1, throw: 0, typeof:1, void: 0})", + "var var_0000=var_0001({var_0002:1,var_0003:0,var_0004:1,var_0005:0})" + ); + } + SECTION("other keywords as identifiers") + { + test_normalization( + "function() { a.case() }; function() { [a.case() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.default() }; function() { [a.default() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.enum() }; function() { [a.enum() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.export() }; function() { [a.export() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.extends() }; function() { [a.extends() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.implements() }; function() { [a.implements() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.import() }; function() { [a.import() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.in() }; function() { [a.in() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.instanceof() }; function() { [a.instanceof() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.interface() }; function() { [a.interface() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.package() }; function() { [a.package() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.private() }; function() { [a.private() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.protected() }; function() { [a.protected() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.public() }; function() { [a.public() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.static() }; function() { [a.static() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.super() }; function() { [a.super() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "function() { a.yield() }; function() { [a.yield() ] }", + "function(){var_0000.var_0001()};function(){[var_0000.var_0001()]}" + ); + test_normalization( + "var a = A ({case: 1, default: 0, enum:1, export: 0})", + "var var_0000=var_0001({var_0002:1,var_0003:0,var_0004:1,var_0005:0})" + ); + test_normalization( + "var a = A ({extends: 1, implements: 0, import:1, in: 0})", + "var var_0000=var_0001({var_0002:1,var_0003:0,var_0004:1,var_0005:0})" + ); + test_normalization( + "var a = A ({instanceof: 1, interface: 0, package:1, private: 0})", + "var var_0000=var_0001({var_0002:1,var_0003:0,var_0004:1,var_0005:0})" + ); + test_normalization( + "var a = A ({protected: 1, public: 0, static:1, super: 0, yield: 1})", + "var var_0000=var_0001({var_0002:1,var_0003:0,var_0004:1,var_0005:0,var_0006:1})" + ); + } +} + #endif diff --git a/src/utils/test/js_test_utils.h b/src/utils/test/js_test_utils.h index f407fa55d..1f99ec7a8 100644 --- a/src/utils/test/js_test_utils.h +++ b/src/utils/test/js_test_utils.h @@ -130,7 +130,7 @@ static const JSTestConfig default_config({ "console", "eval", "document", "unescape", "decodeURI", "decodeURIComponent", "String", "name", "u"}), ignored_properties_list({ - "watch", "unwatch", "split", "reverse", "join", "name", "w"}), + "watch", "unwatch", "split", "reverse", "join", "name", "w", "catch", "finally"}), normalize_identifiers(true) });