]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Pull request #3282: http_inspect: add function state tracking for Enchanced javascrip...
authorMike Stepanek (mstepane) <mstepane@cisco.com>
Thu, 3 Mar 2022 20:45:40 +0000 (20:45 +0000)
committerMike Stepanek (mstepane) <mstepane@cisco.com>
Thu, 3 Mar 2022 20:45:40 +0000 (20:45 +0000)
Merge in SNORT/snort3 from ~VHORBATO/snort3:js_unesc_track to master

Squashed commit of the following:

commit 18222154a76c7b9377a1080e4a146dbdfa3964de
Author: Vitalii <vhorbato@cisco.com>
Date:   Wed Feb 16 16:15:25 2022 +0200

    http_inspect: add unescape function tracking for Enhanced JS Normalizer

15 files changed:
doc/reference/builtin_stubs.txt
doc/user/http_inspect.txt
src/service_inspectors/http_inspect/dev_notes.txt
src/service_inspectors/http_inspect/http_enum.h
src/service_inspectors/http_inspect/http_js_norm.cc
src/service_inspectors/http_inspect/http_tables.cc
src/utils/js_normalizer.h
src/utils/js_tokenizer.h
src/utils/js_tokenizer.l
src/utils/streambuf.cc
src/utils/streambuf.h
src/utils/test/js_normalizer_test.cc
src/utils/test/js_test_utils.cc
src/utils/test/js_test_utils.h
src/utils/test/streambuf_test.cc

index 08b27fd4e3d09da345ab1ddb75c55cb701d9b539..eaae05481ebefea3c4b85cf25c75aba4bf965587 100644 (file)
@@ -1299,6 +1299,17 @@ does not apply to HTTP/2 or HTTP/3 traffic.
 
 The HTTP message body is gzip encoded and the FEXTRA flag is set in the gzip header.
 
+119:279
+
+Detected nesting of unescape functions(unescape, decodeURI, decodeURIComponent) in JavaScript code. 
+Indicates that this code most likely has more than one level of obfuscation. This alert is raised
+by the enhanced JavaScript normalizer.
+
+119:280
+
+Detected more than one encoding within unescape function call arguments in JavaScript code.
+This alert is raised by the enhanced JavaScript normalizer.
+
 121:1
 
 Invalid flag set on HTTP/2 frame header
index 4c3f6176aebd55b8c5e60ecf48fea3ce9f3b4ae9..cab6882d7d023405e757b88801d3c0ca8a0eb736 100755 (executable)
@@ -271,6 +271,10 @@ For example:
 
 The default list of ignore-identifiers is present in "snort_defaults.lua".
 
+Unescape function names should remain intact in the output. They ought to be
+included in the ignore list. If for some reason the user wants to disable unescape
+related features, then removing function's name from the ignore list does the trick.
+
 ===== xff_headers
 
 This configuration supports defining custom x-forwarded-for type headers. In a
index aa451bae39da7d60b47dfea56967fd7eccf66cf9..5e785eda6aacf48a6e1a2a190126ff2d76094e5e 100755 (executable)
@@ -255,6 +255,13 @@ For example:
     var a = console.log
     a("hello") // will be substituted to 'console.log("hello")'
 
+In addition to the scope tracking, JS Normalizer specifically tracks unicode unescape
+functions(unescape, decodeURI, decodeURIComponent). This allows detection of
+unescape functions nested within other unescape functions, which is a potential
+indicator of a multilevel obfuscation. The definition of a function call depends on
+identifier substitution, so such identifiers must be included in the ignore list in
+order to use this feature.
+
 JS Normalizer's syntax parser follows ECMA-262 standard. For various features,
 tracking of variable scope and individual brackets is done in accordance to the standard.
 Additionally, Normalizer enforces standard limits on HTML content in JavaScript:
index 7e28b3fcdd191d73c4a1b529dcf0f0fdae12c54d..d6821dbbff242c82dab984d4810f5973de425b24 100755 (executable)
@@ -294,6 +294,8 @@ enum Infraction
     INF_INVALID_SUBVERSION = 133,
     INF_VERSION_0 = 134,
     INF_GZIP_FEXTRA = 135,
+    INF_JS_UNESCAPE_NEST = 136,
+    INF_JS_MULTIPLE_ENC = 137,
     INF__MAX_VALUE
 };
 
@@ -432,6 +434,8 @@ enum EventSid
     EVENT_VERSION_0 = 276,
     EVENT_VERSION_HIGHER_THAN_1 = 277,
     EVENT_GZIP_FEXTRA = 278,
+    EVENT_JS_UNESCAPE_NEST = 279,
+    EVENT_JS_MULTIPLE_ENC = 280,
     EVENT__MAX_VALUE
 };
 
index a8e332eb244dd374e0ae284dbeb7ff3c8d2ba8a4..d73de30a633b62d8f58356c52a37040eb9b14f30 100644 (file)
@@ -214,6 +214,12 @@ void HttpJsNorm::do_external(const Field& input, Field& output,
             break;
         }
 
+        if (js_ctx.is_unescape_nesting_seen())
+        {
+            *infractions += INF_JS_UNESCAPE_NEST;
+            events->create_event(EVENT_JS_UNESCAPE_NEST);
+        }
+
         if (ssn->js_built_in_event)
             break;
     }
@@ -347,6 +353,11 @@ void HttpJsNorm::do_inline(const Field& input, Field& output,
             *infractions += INF_JS_CODE_IN_EXTERNAL;
             events->create_event(EVENT_JS_CODE_IN_EXTERNAL);
         }
+        if (js_ctx.is_unescape_nesting_seen())
+        {
+            *infractions += INF_JS_UNESCAPE_NEST;
+            events->create_event(EVENT_JS_UNESCAPE_NEST);
+        }
 
         script_continue = ret == JSTokenizer::SCRIPT_CONTINUE;
     }
index 39f1dda20b784d6f68f17544d6526e7c784bd487..d68503d9081db2b47c3c23e4affa9ac01e16b2ff 100755 (executable)
@@ -338,6 +338,8 @@ const RuleMap HttpModule::http_events[] =
     { EVENT_VERSION_0,                  "HTTP version in start line is 0" },
     { EVENT_VERSION_HIGHER_THAN_1,      "HTTP version in start line is higher than 1" },
     { EVENT_GZIP_FEXTRA,                "HTTP gzip body with the FEXTRA flag set" },
+    { EVENT_JS_UNESCAPE_NEST,           "nested unescape functions in JavaScript code" },
+    { EVENT_JS_MULTIPLE_ENC,            "mixing of escape formats in JavaScript code" },
     { 0, nullptr }
 };
 
index 5e40a6d648e95f86179e79ad1400b4482078cbae..4a694d3c7a4c5096d8cac31de27c6e756e7324f3 100644 (file)
@@ -58,11 +58,16 @@ public:
     static size_t size()
     { return sizeof(JSNormalizer) + 16834; /* YY_BUF_SIZE */ }
 
+    bool is_unescape_nesting_seen() const
+    { return tokenizer.is_unescape_nesting_seen(); }
+
 #ifdef CATCH_TEST_BUILD
     const char* get_tmp_buf() const
     { return tmp_buf; }
     size_t get_tmp_buf_size() const
     { return tmp_buf_size; }
+    const JSTokenizer& get_tokenizer() const
+    { return tokenizer; }
 #endif
 
 #ifdef BENCHMARK_TEST
index 8908de56d868f7e444930d77d9afff1933a159e4..2e8cc6c8a6cbd0231418ebb44881ca645b4f263d 100644 (file)
@@ -42,7 +42,9 @@ extern THREAD_LOCAL const snort::Trace* http_trace;
 enum JSProgramScopeType : unsigned int;
 
 class JSIdentifierCtxBase;
-
+#ifdef CATCH_TEST_BUILD
+class JSTokenizerTester;
+#endif
 class JSTokenizer : public yyFlexLexer
 {
 private:
@@ -84,17 +86,24 @@ private:
         OBJECT,     // object definition, class definition
         SCOPE_META_TYPE_MAX
     };
+    enum FuncType
+    {
+        NOT_FUNC = 0,
+        GENERAL,
+        UNESCAPE,
+        CHAR_CODE
+    };
     struct Scope
     {
         Scope(ScopeType t) :
-            type(t), meta_type(ScopeMetaType::NOT_SET), ident_norm(true), func_call(false),
-            block_param(false), do_loop(false)
+            type(t), meta_type(ScopeMetaType::NOT_SET), func_call_type(FuncType::NOT_FUNC),
+            ident_norm(true), block_param(false), do_loop(false)
         {}
 
         ScopeType type;
         ScopeMetaType meta_type;
+        FuncType func_call_type;
         bool ident_norm;
-        bool func_call;
         bool block_param;
         bool do_loop;
     };
@@ -153,6 +162,7 @@ public:
 
     JSRet process(size_t& bytes_in);
 
+    bool is_unescape_nesting_seen() const;
 protected:
     [[noreturn]] void LexerError(const char* msg) override
     { snort::FatalError("%s", msg); }
@@ -194,8 +204,10 @@ private:
     ScopeMetaType meta_type();
     void set_ident_norm(bool);
     bool ident_norm();
-    void set_func_call(bool);
-    bool func_call();
+    void set_func_call_type(FuncType);
+    FuncType func_call_type();
+    FuncType detect_func_type();
+    void check_function_nesting(FuncType);
     void set_block_param(bool);
     bool block_param();
     void set_do_loop(bool);
@@ -214,6 +226,49 @@ private:
     void dealias_append();
     void dealias_finalize();
 
+    //rule handlers
+    JSRet html_closing_script_tag();
+    JSRet literal_dq_string_start();
+    JSRet literal_sq_string_start();
+    JSRet literal_template_start();
+    JSRet literal_regex_start();
+    void div_assignment_operator();
+    JSRet open_brace();
+    JSRet close_brace();
+    JSRet open_parenthesis();
+    JSRet close_parenthesis();
+    JSRet open_bracket();
+    JSRet close_bracket();
+    JSRet punctuator_prefix();
+    void dot_accessor();
+    JSRet punctuator_arrow();
+    JSRet punctuator_semicolon();
+    void punctuator_colon();
+    void operator_comparison();
+    void operator_complex_assignment();
+    void operator_logical();
+    void operator_shift();
+    void punctuator_comma();
+    JSRet use_strict_directive();
+    JSRet use_strict_directive_sc();
+    JSRet keyword_var_decl();
+    JSRet keyword_function();
+    JSRet keyword_catch();
+    JSRet keyword_while();
+    JSRet keyword_B();
+    JSRet keyword_BA();
+    JSRet keyword_finally();
+    JSRet keyword_do();
+    JSRet keyword_class();
+    JSRet keyword_other();
+    void operator_assignment();
+    JSRet operator_prefix();
+    JSRet operator_incr_decr();
+    JSRet general_operator();
+    JSRet general_literal();
+    JSRet general_identifier();
+    void general_unicode();
+
     static const char* p_scope_codes[];
 
     void* cur_buffer;
@@ -226,6 +281,7 @@ private:
     AliasState alias_state = ALIAS_NONE;
     bool prefix_increment = false;
     bool dealias_stored = false;
+    bool unescape_nest_seen = false;
 
     uint8_t max_template_nesting;
     std::stack<uint16_t, std::vector<uint16_t>> brace_depth;
@@ -268,8 +324,30 @@ private:
         {false, false, false, false, false, false, false, false, false, false, false,}
     };
 
+    std::streampos ignored_id_pos = -1;
+    struct FunctionIdentifier
+    {
+        bool operator< (const FunctionIdentifier& other) const
+        { return identifier.size() < other.identifier.size(); }
+
+        std::string identifier;
+        FuncType type;
+    };
+
+    const std::array<FunctionIdentifier, 4> function_identifiers
+    {{
+        {"unescape",            FuncType::UNESCAPE  },
+        {"decodeURI",           FuncType::UNESCAPE  },
+        {"decodeURIComponent",  FuncType::UNESCAPE  },
+        {"String.fromCharCode", FuncType::CHAR_CODE }        
+    }};
+
     const uint32_t max_bracket_depth;
     std::stack<Scope> scope_stack;
+
+#ifdef CATCH_TEST_BUILD
+    friend JSTokenizerTester;
+#endif // CATCH_TEST_BUILD
 };
 
 #endif // JS_TOKENIZER_H
index 18595f45d0b21246a0347ca9a0a8d835c62076e6..4439e496e0ddfb58f1c52ff517badeabe938b7b5 100644 (file)
@@ -1021,7 +1021,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 {LINE_TERMINATORS}                  { BEGIN(regst); newline_found = true; }
 
 <INITIAL,regex,dqstr,regst,sqstr,divop>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) }
-{HTML_TAG_SCRIPT_CLOSE}             { BEGIN(regst); if (!global_scope()) RETURN(ENDED_IN_INNER_SCOPE) else RETURN(SCRIPT_ENDED) }
+{HTML_TAG_SCRIPT_CLOSE}             { EXEC(html_closing_script_tag()) }
 
        {HTML_COMMENT_OPEN}          { BEGIN(lcomm); }
        {LINE_COMMENT_START}         { BEGIN(lcomm); }
@@ -1041,7 +1041,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <bcomm>{BLOCK_COMMENT_SKIP}         { }
 <bcomm><<EOF>>                      { RETURN(SCRIPT_CONTINUE) }
 
-       {LITERAL_DQ_STRING_START}    { dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) ECHO; BEGIN(dqstr); set_ident_norm(true); }
+       {LITERAL_DQ_STRING_START}    { EXEC(literal_dq_string_start()) }
 <dqstr>{LITERAL_DQ_STRING_END}      { dealias_append(); ECHO; BEGIN(divop); }
 <dqstr>{HTML_TAG_SCRIPT_CLOSE}      { BEGIN(regst); RETURN(CLOSING_TAG) }
 <dqstr>\\{CR}{LF}                   { }
@@ -1052,7 +1052,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <dqstr>{LITERAL_DQ_STRING_TEXT}     { dealias_append(); ECHO; }
 <dqstr><<EOF>>                      { RETURN(SCRIPT_CONTINUE) }
 
-       {LITERAL_SQ_STRING_START}    { dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) ECHO; BEGIN(sqstr); set_ident_norm(true); }
+       {LITERAL_SQ_STRING_START}    { EXEC(literal_sq_string_start()) }
 <sqstr>{LITERAL_SQ_STRING_END}      { dealias_append(); ECHO; BEGIN(divop); }
 <sqstr>{HTML_TAG_SCRIPT_CLOSE}      { BEGIN(regst); RETURN(CLOSING_TAG) }
 <sqstr>\\{CR}{LF}                   { }
@@ -1063,7 +1063,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <sqstr>{LITERAL_SQ_STRING_TEXT}     { dealias_append(); ECHO; }
 <sqstr><<EOF>>                      { RETURN(SCRIPT_CONTINUE) }
 
-       {LITERAL_TEMPLATE_START}                  { dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) ECHO; BEGIN(tmpll); set_ident_norm(true); }
+       {LITERAL_TEMPLATE_START}                  { EXEC(literal_template_start()) }
 <tmpll>(\\\\)*{LITERAL_TEMPLATE_END}             { dealias_append(); ECHO; BEGIN(divop); }
 <tmpll>(\\\\)*{LITERAL_TEMPLATE_SUBST_START}     { EXEC(process_subst_open()) dealias_reset(); }
 <tmpll>{HTML_TAG_SCRIPT_CLOSE}                   { BEGIN(regst); RETURN(CLOSING_TAG) }
@@ -1072,7 +1072,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <tmpll>{LITERAL_TEMPLATE_OTHER}                  { dealias_append(); ECHO; }
 <tmpll><<EOF>>                                   { RETURN(SCRIPT_CONTINUE) }
 
-<regst>{LITERAL_REGEX_START}        { dealias_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) yyout << '/'; states_correct(1); yyless(1); BEGIN(regex); set_ident_norm(true); }
+<regst>{LITERAL_REGEX_START}        { EXEC(literal_regex_start()) }
 <regex>{LITERAL_REGEX_END}          { ECHO; BEGIN(divop); }
 <regex>{HTML_TAG_SCRIPT_CLOSE}      { BEGIN(regst); RETURN(CLOSING_TAG) }
 <regex>{LITERAL_REGEX_SKIP}         { ECHO; }
@@ -1083,68 +1083,54 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <regex><<EOF>>                      { RETURN(SCRIPT_CONTINUE) }
 
 <divop>{DIV_OPERATOR}               |
-<divop>{DIV_ASSIGNMENT_OPERATOR}    { dealias_equals(true); previous_group = ASI_OTHER; ECHO; token = PUNCTUATOR; BEGIN(INITIAL); set_ident_norm(true); }
-
-{OPEN_BRACE}                        { dealias_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_1)) if (meta_type() == ScopeMetaType::NOT_SET) { if (is_operator(token) || token == COLON || func_call()) set_meta_type(ScopeMetaType::OBJECT); else { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } } EXEC(scope_push(BRACES)) if (!brace_depth.empty()) brace_depth.top()++; process_punctuator(); }
-{CLOSE_BRACE}                       { dealias_clear_mutated(false); EXEC(do_semicolon_insertion(ASI_GROUP_2)) if (meta_type() != ScopeMetaType::NOT_SET) EXEC(p_scope_pop(meta_type())) EXEC(scope_pop(BRACES)) process_closing_brace(); set_ident_norm(true); }
-{OPEN_PARENTHESIS}                  { dealias_clear_mutated(true); dealias_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_3)) EXEC(scope_push(PARENTHESES)) if (token == IDENTIFIER || token == CLOSING_BRACKET || token == KEYWORD) set_func_call(true); process_punctuator(); }
-{CLOSE_PARENTHESIS}                 { dealias_clear_mutated(false); dealias_reset(); bool f_call = func_call(); bool id_norm = ident_norm(); if (meta_type() != ScopeMetaType::NOT_SET) EXEC(p_scope_pop(meta_type())) EXEC(scope_pop(PARENTHESES)) if (!f_call) set_ident_norm(id_norm); if (block_param()) { previous_group = ASI_OTHER; set_block_param(false); } else { EXEC(do_semicolon_insertion(ASI_GROUP_5)) } ECHO; token = PUNCTUATOR; BEGIN(divop); }
-{OPEN_BRACKET}                      { dealias_clear_mutated(true); dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_3)) EXEC(do_semicolon_insertion(ASI_GROUP_4)) EXEC(scope_push(BRACKETS)) process_punctuator(); }
-{CLOSE_BRACKET}                     { dealias_clear_mutated(false); dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_4)) EXEC(scope_pop(BRACKETS)) ECHO; token = CLOSING_BRACKET; BEGIN(divop); }
-
-{PUNCTUATOR_PREFIX}                 { process_punctuator(); EXEC(do_semicolon_insertion(ASI_GROUP_10)) set_ident_norm(true); }
-{DOT_ACCESSOR}                      { dealias_clear_mutated(true); previous_group = ASI_OTHER; dealias_append(); ECHO; token = DOT; BEGIN(regst); }
-{PUNCTUATOR_ARROW}                  { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_reset(); process_punctuator(); set_ident_norm(true); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::FUNCTION); EXEC(p_scope_push(meta_type())) } }
-{PUNCTUATOR_SEMICOLON}              { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_finalize(); process_punctuator(); set_ident_norm(true); if (meta_type() != ScopeMetaType::NOT_SET) { EXEC(p_scope_pop(meta_type())) set_meta_type(ScopeMetaType::NOT_SET); } }
-{PUNCTUATOR_COLON}                  { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_reset(); process_punctuator(COLON); set_ident_norm(true); }
-{OPERATOR_COMPARISON}               { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_prefix_reset(); process_punctuator(OPERATOR_COMPARISON); set_ident_norm(true); }
-{OPERATOR_COMPLEX_ASSIGNMENT}       { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_equals(true); process_punctuator(OPERATOR_COMPLEX_ASSIGNMENT); set_ident_norm(true); }
-{OPERATOR_LOGICAL}                  { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_prefix_reset(); process_punctuator(OPERATOR_LOGICAL); set_ident_norm(true); }
-{OPERATOR_SHIFT}                    { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_prefix_reset(); process_punctuator(OPERATOR_SHIFT); set_ident_norm(true); }
-{PUNCTUATOR_COMMA}                  { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_finalize(); process_punctuator(); set_ident_norm(true); }
-
-{USE_STRICT_DIRECTIVE}              { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); yyout << ';'; set_ident_norm(true); }
-{USE_STRICT_DIRECTIVE_SC}           { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); set_ident_norm(true); }
-
-{KEYWORD_VAR_DECL}                  { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); alias_state = ALIAS_NONE; EXEC(do_spacing(KEYWORD_VAR_DECL)) ECHO; BEGIN(regst); }
-{KEYWORD_FUNCTION}                  { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_FUNCTION)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) set_meta_type(ScopeMetaType::FUNCTION); }
+<divop>{DIV_ASSIGNMENT_OPERATOR}    { div_assignment_operator(); }
+
+{OPEN_BRACE}                        { EXEC(open_brace()) }
+{CLOSE_BRACE}                       { EXEC(close_brace()) }
+{OPEN_PARENTHESIS}                  { EXEC(open_parenthesis()) }
+{CLOSE_PARENTHESIS}                 { EXEC(close_parenthesis()) }
+{OPEN_BRACKET}                      { EXEC(open_bracket()) }
+{CLOSE_BRACKET}                     { EXEC(close_bracket()) }
+
+{PUNCTUATOR_PREFIX}                 { EXEC(punctuator_prefix()) }
+{DOT_ACCESSOR}                      { dot_accessor(); }
+{PUNCTUATOR_ARROW}                  { EXEC(punctuator_arrow()) }
+{PUNCTUATOR_SEMICOLON}              { EXEC(punctuator_semicolon()) }
+{PUNCTUATOR_COLON}                  { punctuator_colon(); }
+{OPERATOR_COMPARISON}               { operator_comparison(); }
+{OPERATOR_COMPLEX_ASSIGNMENT}       { operator_complex_assignment(); }
+{OPERATOR_LOGICAL}                  { operator_logical(); }
+{OPERATOR_SHIFT}                    { operator_shift(); }
+{PUNCTUATOR_COMMA}                  { punctuator_comma(); }
+
+{USE_STRICT_DIRECTIVE}              { EXEC(use_strict_directive()) }
+{USE_STRICT_DIRECTIVE_SC}           { EXEC(use_strict_directive_sc()) }
+
+{KEYWORD_VAR_DECL}                  { EXEC(keyword_var_decl()) }
+{KEYWORD_FUNCTION}                  { EXEC(keyword_function()) }
 {KEYWORD_IF}                        |
 {KEYWORD_FOR}                       |
 {KEYWORD_WITH}                      |
 {KEYWORD_SWITCH}                    |
-{KEYWORD_CATCH}                     { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } set_block_param(true); }
-{KEYWORD_WHILE}                     { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } if (do_loop()) set_do_loop(false); else set_block_param(true); }
-{KEYWORD_B}                         { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
-{KEYWORD_BA}                        { EXEC(do_semicolon_insertion(ASI_GROUP_9)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
+{KEYWORD_CATCH}                     { EXEC(keyword_catch()) }
+{KEYWORD_WHILE}                     { EXEC(keyword_while()) }
+{KEYWORD_B}                         { EXEC(keyword_B()) }
+{KEYWORD_BA}                        { EXEC(keyword_BA()) }
 {KEYWORD_TRY}                       |
 {KEYWORD_ELSE}                      |
-{KEYWORD_FINALLY}                   { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } }
-{KEYWORD_DO}                        { EXEC(do_semicolon_insertion(ASI_GROUP_10)) if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_BLOCK)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) { set_meta_type(ScopeMetaType::BLOCK); EXEC(p_scope_push(meta_type())) } set_do_loop(true); }
-{KEYWORD_CLASS}                     { previous_group = ASI_OTHER; dealias_reset(); if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD_CLASS)) ECHO; BEGIN(regst); if (meta_type() == ScopeMetaType::NOT_SET) set_meta_type(ScopeMetaType::OBJECT); }
-{KEYWORD_OTHER}                     { previous_group = ASI_OTHER; dealias_reset(); if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
-
-{OPERATOR_ASSIGNMENT}               { previous_group = ASI_OTHER; dealias_equals(false); process_punctuator(OPERATOR_ASSIGNMENT); set_ident_norm(true); }
-{OPERATOR_PREFIX}                   { dealias_prefix_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_6)) EXEC(do_operator_spacing()) ECHO; BEGIN(divop); set_ident_norm(true); }
-{OPERATOR_INCR_DECR}                { dealias_increment(); dealias_reset(); EXEC(do_semicolon_insertion(ASI_GROUP_8)) EXEC(do_operator_spacing()) ECHO; BEGIN(divop); set_ident_norm(true); }
-{OPERATOR}                          { dealias_clear_mutated(false); previous_group = ASI_OTHER; dealias_prefix_reset(); EXEC(do_operator_spacing()) ECHO; BEGIN(divop); set_ident_norm(true);}
-{LITERAL}                           { dealias_clear_mutated(false); dealias_append(); EXEC(do_semicolon_insertion(ASI_GROUP_7)) EXEC(do_spacing(LITERAL)) ECHO; BEGIN(divop); set_ident_norm(true); }
-{IDENTIFIER}                        { 
-                                        if (unescape(YYText())) {
-                                            bool id_part = (token == DOT);
-                                            bool assignment_start = token == KEYWORD_VAR_DECL ||
-                                                                    token == PUNCTUATOR ||
-                                                                    token == UNDEFINED;
-                                            EXEC(do_semicolon_insertion(ASI_GROUP_7)) 
-                                            EXEC(do_spacing(IDENTIFIER))
-                                            EXEC(do_identifier_substitution(YYText(), id_part))
-                                            dealias_identifier(id_part, assignment_start);
-                                        }
-                                        else
-                                            EXEC(do_semicolon_insertion(ASI_GROUP_7)) 
-                                        BEGIN(divop);
-                                    }
-
-.|{ALL_UNICODE}                     { previous_group = ASI_OTHER; ECHO; token = UNDEFINED; BEGIN(INITIAL); set_ident_norm(true); }
+{KEYWORD_FINALLY}                   { EXEC(keyword_finally()) }
+{KEYWORD_DO}                        { EXEC(keyword_do()) }
+{KEYWORD_CLASS}                     { EXEC(keyword_class()) }
+{KEYWORD_OTHER}                     { EXEC(keyword_other()) }
+
+{OPERATOR_ASSIGNMENT}               { operator_assignment(); }
+{OPERATOR_PREFIX}                   { EXEC(operator_prefix()) }
+{OPERATOR_INCR_DECR}                { EXEC(operator_incr_decr()) }
+{OPERATOR}                          { EXEC(general_operator()) }
+{LITERAL}                           { EXEC(general_literal()) }
+{IDENTIFIER}                        { EXEC(general_identifier()) }
+
+.|{ALL_UNICODE}                     { general_unicode(); }
 <<EOF>>                             { EEOF(eval_eof()) }
 
 %%
@@ -1392,6 +1378,7 @@ JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme, b
 
     if (ident_ctx.is_ignored(lexeme) && !id_part)
     {
+        ignored_id_pos = yyout.rdbuf()->pubseekoff(0, yyout.cur, std::ios_base::out);
         set_ident_norm(false);
         yyout << lexeme;
         return EOS;
@@ -1403,11 +1390,15 @@ JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme, b
     if (ident)
     {
         set_ident_norm(false);
+        ignored_id_pos = yyout.rdbuf()->pubseekoff(0, yyout.cur, std::ios_base::out);
         last_dealiased = std::string(YYText());
         dealias_stored = true;
     }
     else
+    {
+        ignored_id_pos = -1;
         ident = ident_ctx.substitute(lexeme);
+    }
 
     if (!ident)
     {
@@ -1764,14 +1755,74 @@ bool JSTokenizer::ident_norm()
     return scope_cur().ident_norm;
 }
 
-void JSTokenizer::set_func_call(bool f)
+void JSTokenizer::set_func_call_type(JSTokenizer::FuncType f)
+{
+    scope_cur().func_call_type = f;
+}
+
+JSTokenizer::FuncType JSTokenizer::func_call_type()
 {
-    scope_cur().func_call = f;
+    return scope_cur().func_call_type;
 }
 
-bool JSTokenizer::func_call()
+JSTokenizer::FuncType JSTokenizer::detect_func_type()
 {
-    return scope_cur().func_call;
+    switch(token)
+    {
+    case CLOSING_BRACKET:
+    case KEYWORD:
+        return FuncType::GENERAL;
+
+    case IDENTIFIER:
+    {
+        FuncType ret = FuncType::GENERAL;
+        if (ignored_id_pos >= 0)      
+        {
+            std::streambuf* pbuf = yyout.rdbuf();
+            std::streamsize size = pbuf->pubseekoff(0, yyout.cur, yyout.out) - ignored_id_pos;
+            assert(size >= 0);
+            
+            char tail[256];
+            assert((long unsigned int)size <= sizeof(tail));
+            size = std::min((long unsigned int)size, sizeof(tail));
+
+            pbuf->pubseekoff(-size, yyout.cur, yyout.out);
+            pbuf->sgetn(tail, size);
+
+            for (const auto& id : function_identifiers)
+            {
+                if ((unsigned)size == (unsigned)id.identifier.size() &&
+                    memcmp(tail, id.identifier.data(), size) == 0)
+                {
+                    ret = id.type;
+                    break;
+                }
+            }
+        }
+        return ret;
+    }
+
+    default:
+        return FuncType::NOT_FUNC;
+    }
+}
+
+void JSTokenizer::check_function_nesting(JSTokenizer::FuncType type)
+{
+    switch (type)
+    {
+    case (JSTokenizer::FuncType::UNESCAPE):
+        if (func_call_type() == JSTokenizer::FuncType::UNESCAPE)
+            unescape_nest_seen = true;
+        break;
+    default:
+        break;
+    }
+}
+
+bool JSTokenizer::is_unescape_nesting_seen() const
+{
+    return unescape_nest_seen;
 }
 
 void JSTokenizer::set_block_param(bool f)
@@ -1965,9 +2016,512 @@ void JSTokenizer::dealias_finalize()
         dealias_reset();
 }
 
+JSTokenizer::JSRet JSTokenizer::html_closing_script_tag()
+{ return global_scope() ? SCRIPT_ENDED : ENDED_IN_INNER_SCOPE; }
+
+JSTokenizer::JSRet JSTokenizer::literal_dq_string_start()
+{
+    dealias_append();
+    EXEC(do_semicolon_insertion(ASI_GROUP_7))
+    EXEC(do_spacing(LITERAL))
+    ECHO;
+    BEGIN(dqstr);
+    set_ident_norm(true);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::literal_sq_string_start()
+{
+    dealias_append();
+    EXEC(do_semicolon_insertion(ASI_GROUP_7))
+    EXEC(do_spacing(LITERAL))
+    ECHO;
+    BEGIN(sqstr);
+    set_ident_norm(true);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::literal_template_start()
+{
+    dealias_append();
+    EXEC(do_semicolon_insertion(ASI_GROUP_7))
+    EXEC(do_spacing(LITERAL))
+    ECHO;
+    BEGIN(tmpll);
+    set_ident_norm(true);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::literal_regex_start()
+{
+    dealias_reset();
+    EXEC(do_semicolon_insertion(ASI_GROUP_7))
+    EXEC(do_spacing(LITERAL))
+    yyout << '/';
+    states_correct(1);
+    yyless(1);
+    BEGIN(regex);
+    set_ident_norm(true);
+    return EOS;
+}
+
+void JSTokenizer::div_assignment_operator()
+{
+    dealias_equals(true);
+    previous_group = ASI_OTHER;
+    ECHO;
+    token = PUNCTUATOR;
+    BEGIN(INITIAL);
+    set_ident_norm(true);
+}
+
+JSTokenizer::JSRet JSTokenizer::open_brace()
+{
+    dealias_reset();
+    EXEC(do_semicolon_insertion(ASI_GROUP_1))
+    if (meta_type() == ScopeMetaType::NOT_SET)
+    {
+        if (is_operator(token) || token == COLON || func_call_type() != FuncType::NOT_FUNC)
+            set_meta_type(ScopeMetaType::OBJECT);
+        else
+        {
+            set_meta_type(ScopeMetaType::BLOCK);
+            EXEC(p_scope_push(meta_type()))
+        }
+    }
+    EXEC(scope_push(BRACES))
+    if (!brace_depth.empty())
+        brace_depth.top()++;
+    process_punctuator();
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::close_brace()
+{
+    dealias_clear_mutated(false);
+    EXEC(do_semicolon_insertion(ASI_GROUP_2))
+    if (meta_type() != ScopeMetaType::NOT_SET)
+        EXEC(p_scope_pop(meta_type()))
+    EXEC(scope_pop(BRACES))
+    process_closing_brace();
+    set_ident_norm(true);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::open_parenthesis()
+{
+    dealias_clear_mutated(true);
+    dealias_reset();
+    EXEC(do_semicolon_insertion(ASI_GROUP_3))
+    FuncType f_call = detect_func_type();
+    check_function_nesting(f_call);
+    EXEC(scope_push(PARENTHESES))
+    set_func_call_type(f_call);
+    process_punctuator();
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::close_parenthesis()
+{
+    dealias_clear_mutated(false);
+    dealias_reset();
+    FuncType f_call = func_call_type();
+    bool id_norm = ident_norm();
+    if (meta_type() != ScopeMetaType::NOT_SET)
+        EXEC(p_scope_pop(meta_type()))
+    EXEC(scope_pop(PARENTHESES))
+    if (f_call == FuncType::NOT_FUNC)
+        set_ident_norm(id_norm);
+    if (block_param())
+    {
+        previous_group = ASI_OTHER;
+        set_block_param(false);
+    }
+    else
+    {
+        EXEC(do_semicolon_insertion(ASI_GROUP_5))
+    }
+    ECHO;
+    token = PUNCTUATOR;
+    BEGIN(divop);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::open_bracket()
+{
+    dealias_clear_mutated(true);
+    dealias_append();
+    EXEC(do_semicolon_insertion(ASI_GROUP_3))
+    EXEC(do_semicolon_insertion(ASI_GROUP_4))
+    EXEC(scope_push(BRACKETS))
+    process_punctuator();
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::close_bracket()
+{
+    dealias_clear_mutated(false);
+    dealias_append();
+    EXEC(do_semicolon_insertion(ASI_GROUP_4))
+    EXEC(scope_pop(BRACKETS))
+    ECHO;
+    token = CLOSING_BRACKET;
+    BEGIN(divop);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::punctuator_prefix()
+{
+    process_punctuator();
+    EXEC(do_semicolon_insertion(ASI_GROUP_10))
+    set_ident_norm(true);
+    return EOS;
+}
+
+void JSTokenizer::dot_accessor()
+{
+    dealias_clear_mutated(true);
+    previous_group = ASI_OTHER;
+    dealias_append();
+    ECHO;
+    token = DOT;
+    BEGIN(regst);
+}
+
+JSTokenizer::JSRet JSTokenizer::punctuator_arrow()
+{
+    dealias_clear_mutated(false);
+    previous_group = ASI_OTHER;
+    dealias_reset();
+    process_punctuator();
+    set_ident_norm(true);
+    if (meta_type() == ScopeMetaType::NOT_SET)
+    {
+        set_meta_type(ScopeMetaType::FUNCTION);
+        EXEC(p_scope_push(meta_type()))
+    }
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::punctuator_semicolon()
+{
+    dealias_clear_mutated(false);
+    previous_group = ASI_OTHER;
+    dealias_finalize();
+    process_punctuator();
+    set_ident_norm(true);
+    if (meta_type() != ScopeMetaType::NOT_SET)
+    {
+        EXEC(p_scope_pop(meta_type()))
+        set_meta_type(ScopeMetaType::NOT_SET);
+    }
+    return EOS;
+}
+
+void JSTokenizer::punctuator_colon()
+{
+    dealias_clear_mutated(false);
+    previous_group = ASI_OTHER;
+    dealias_reset();
+    process_punctuator(COLON);
+    set_ident_norm(true);
+}
+
+void JSTokenizer::operator_comparison()
+{
+    dealias_clear_mutated(false);
+    previous_group = ASI_OTHER;
+    dealias_prefix_reset();
+    process_punctuator(OPERATOR_COMPARISON);
+    set_ident_norm(true);
+}
+
+void JSTokenizer::operator_complex_assignment()
+{
+    dealias_clear_mutated(false);
+    previous_group = ASI_OTHER;
+    dealias_equals(true);
+    process_punctuator(OPERATOR_COMPLEX_ASSIGNMENT);
+    set_ident_norm(true);
+}
+
+void JSTokenizer::operator_logical()
+{
+    dealias_clear_mutated(false);
+    previous_group = ASI_OTHER;
+    dealias_prefix_reset();
+    process_punctuator(OPERATOR_LOGICAL);
+    set_ident_norm(true);
+}
+
+void JSTokenizer::operator_shift()
+{
+    dealias_clear_mutated(false);
+    previous_group = ASI_OTHER;
+    dealias_prefix_reset();
+    process_punctuator(OPERATOR_SHIFT);
+    set_ident_norm(true);
+}
+
+void JSTokenizer::punctuator_comma()
+{
+    dealias_clear_mutated(false);
+    previous_group = ASI_OTHER;
+    dealias_finalize();
+    process_punctuator();
+    set_ident_norm(true);
+}
+
+JSTokenizer::JSRet JSTokenizer::use_strict_directive()
+{
+    previous_group = ASI_OTHER;
+    EXEC(do_spacing(DIRECTIVE))
+    ECHO;
+    BEGIN(INITIAL);
+    yyout << ';';
+    set_ident_norm(true);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::use_strict_directive_sc()
+{
+    previous_group = ASI_OTHER;
+    EXEC(do_spacing(DIRECTIVE))
+    ECHO;
+    BEGIN(INITIAL);
+    set_ident_norm(true);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_var_decl()
+{
+    EXEC(do_semicolon_insertion(ASI_GROUP_10))
+    if (token != DOT)
+        set_ident_norm(true);
+    alias_state = ALIAS_NONE;
+    EXEC(do_spacing(KEYWORD_VAR_DECL))
+    ECHO;
+    BEGIN(regst);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_function()
+{
+    EXEC(do_semicolon_insertion(ASI_GROUP_10))
+    if (token != DOT)
+        set_ident_norm(true);
+    EXEC(do_spacing(KEYWORD_FUNCTION))
+    ECHO;
+    BEGIN(regst);
+    if (meta_type() == ScopeMetaType::NOT_SET)
+        set_meta_type(ScopeMetaType::FUNCTION);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_catch()
+{
+    EXEC(do_semicolon_insertion(ASI_GROUP_10))
+    if (token != DOT)
+        set_ident_norm(true);
+    EXEC(do_spacing(KEYWORD_BLOCK))
+    ECHO;
+    BEGIN(regst);
+    if (meta_type() == ScopeMetaType::NOT_SET)
+    {
+        set_meta_type(ScopeMetaType::BLOCK);
+        EXEC(p_scope_push(meta_type()))
+    }
+    set_block_param(true);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_while()
+{
+    EXEC(do_semicolon_insertion(ASI_GROUP_10))
+    if (token != DOT)
+        set_ident_norm(true);
+    EXEC(do_spacing(KEYWORD_BLOCK))
+    ECHO;
+    BEGIN(regst);
+    if (meta_type() == ScopeMetaType::NOT_SET)
+    {
+        set_meta_type(ScopeMetaType::BLOCK);
+        EXEC(p_scope_push(meta_type()))
+    }
+    if (do_loop())
+        set_do_loop(false);
+    else
+        set_block_param(true);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_B()
+{
+    EXEC(do_semicolon_insertion(ASI_GROUP_10))
+    if (token != DOT)
+        set_ident_norm(true);
+    EXEC(do_spacing(KEYWORD))
+    ECHO;
+    BEGIN(regst);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_BA()
+{
+    EXEC(do_semicolon_insertion(ASI_GROUP_9))
+    if (token != DOT)
+        set_ident_norm(true);
+    EXEC(do_spacing(KEYWORD))
+    ECHO;
+    BEGIN(regst);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_finally()
+{
+    EXEC(do_semicolon_insertion(ASI_GROUP_10))
+    if (token != DOT)
+        set_ident_norm(true);
+    EXEC(do_spacing(KEYWORD_BLOCK))
+    ECHO;
+    BEGIN(regst);
+    if (meta_type() == ScopeMetaType::NOT_SET)
+    {
+        set_meta_type(ScopeMetaType::BLOCK);
+        EXEC(p_scope_push(meta_type()))
+    }
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_do()
+{
+    EXEC(do_semicolon_insertion(ASI_GROUP_10))
+    if (token != DOT)
+        set_ident_norm(true);
+    EXEC(do_spacing(KEYWORD_BLOCK))
+    ECHO;
+    BEGIN(regst);
+    if (meta_type() == ScopeMetaType::NOT_SET)
+    {
+        set_meta_type(ScopeMetaType::BLOCK);
+        EXEC(p_scope_push(meta_type()))
+    }
+    set_do_loop(true);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_class()
+{
+    previous_group = ASI_OTHER;
+    dealias_reset();
+    if (token != DOT)
+        set_ident_norm(true);
+    EXEC(do_spacing(KEYWORD_CLASS))
+    ECHO;
+    BEGIN(regst);
+    if (meta_type() == ScopeMetaType::NOT_SET)
+        set_meta_type(ScopeMetaType::OBJECT);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::keyword_other()
+{
+    previous_group = ASI_OTHER;
+    dealias_reset();
+    if (token != DOT)
+        set_ident_norm(true);
+    EXEC(do_spacing(KEYWORD))
+    ECHO;
+    BEGIN(regst);
+    return EOS;
+}
+
+void JSTokenizer::operator_assignment()
+{
+    previous_group = ASI_OTHER;
+    dealias_equals(false);
+    process_punctuator(OPERATOR_ASSIGNMENT);
+    set_ident_norm(true);
+}
+
+JSTokenizer::JSRet JSTokenizer::operator_prefix()
+{
+    dealias_prefix_reset();
+    EXEC(do_semicolon_insertion(ASI_GROUP_6))
+    EXEC(do_operator_spacing())
+    ECHO;
+    BEGIN(divop);
+    set_ident_norm(true);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::operator_incr_decr()
+{
+    dealias_increment();
+    dealias_reset();
+    EXEC(do_semicolon_insertion(ASI_GROUP_8))
+    EXEC(do_operator_spacing())
+    ECHO;
+    BEGIN(divop);
+    set_ident_norm(true);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::general_operator()
+{
+    dealias_clear_mutated(false);
+    previous_group = ASI_OTHER;
+    dealias_prefix_reset();
+    EXEC(do_operator_spacing())
+    ECHO;
+    BEGIN(divop);
+    set_ident_norm(true);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::general_literal()
+{
+    dealias_clear_mutated(false);
+    dealias_append();
+    EXEC(do_semicolon_insertion(ASI_GROUP_7))
+    EXEC(do_spacing(LITERAL))
+    ECHO;
+    BEGIN(divop);
+    set_ident_norm(true);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::general_identifier()
+{
+    if (unescape(YYText()))
+    {
+        bool id_part = (token == DOT);
+        bool assignment_start = token == KEYWORD_VAR_DECL || token == PUNCTUATOR ||
+            token == UNDEFINED;
+        EXEC(do_semicolon_insertion(ASI_GROUP_7))
+        EXEC(do_spacing(IDENTIFIER))
+        EXEC(do_identifier_substitution(YYText(), id_part))
+        dealias_identifier(id_part, assignment_start);
+    }
+    else
+        EXEC(do_semicolon_insertion(ASI_GROUP_7))
+    BEGIN(divop);
+    return EOS;
+}
+
+void JSTokenizer::general_unicode()
+{
+    previous_group = ASI_OTHER;
+    ECHO;
+    token = UNDEFINED;
+    BEGIN(INITIAL);
+    set_ident_norm(true);
+}
+
 JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in)
 {
     yy_flush_buffer(YY_CURRENT_BUFFER);
+    unescape_nest_seen = false;
 
     auto r = yylex();
 
@@ -1978,4 +2532,4 @@ JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in)
     bytes_read = 0;
 
     return static_cast<JSTokenizer::JSRet>(r);
-}
+}
\ No newline at end of file
index e5ba9e81feb77dbed2d5368dcffa09a7f2733759..08c4684b3dcaa59b1e8d3d0dc4594f8ca732bfea 100644 (file)
@@ -369,6 +369,26 @@ streamsize ostreambuf_infl::xsputn(const char* s, streamsize n)
     return n;
 }
 
+// cppcheck-suppress unusedFunction
+streamsize ostreambuf_infl::xsgetn(char* s, streamsize n)
+{
+    assert(n >= 0);
+
+    if (pptr() != epptr())
+    {
+        n = max(0, n);
+        auto c_avail = epptr() - pptr();
+        n = min(c_avail, n);
+
+        memcpy(s, pptr(), n);
+        pbump(n);
+
+        return n;
+    }
+
+    return 0;
+}
+
 // cppcheck-suppress unusedFunction
 int ostreambuf_infl::overflow(int c)
 {
index 8cd7108f83d2a902498db47849bae7c082dbb49b..acf81b47cd8a67fbd703bedbc6bc9d7781de151d 100644 (file)
@@ -95,6 +95,7 @@ protected:
         std::ios_base::openmode which = std::ios_base::in | std::ios_base::out) override;
     virtual int sync() override;
     virtual std::streamsize xsputn(const char* s, std::streamsize n) override;
+    virtual std::streamsize xsgetn(char* s, std::streamsize n) override;
     virtual int overflow(int c = EOF) override;
 
     bool enlarge();
index c80dc10966bf37697b6a0bf5d6945743595c1942..dedae02ab1127816dbb6bda5ed1e42f28ecebd35 100644 (file)
@@ -4174,6 +4174,484 @@ TEST_CASE("Scope tracking - error handling", "[JSNormalizer]")
     }
 }
 
+TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
+{
+    JSTokenizerTester tester(norm_depth, max_scope_depth, s_ignored_ids, max_template_nesting,
+        max_bracket_depth);
+
+    using FuncType = JSTokenizerTester::FuncType;
+
+    SECTION("Global only")
+    {
+        tester.test_function_scopes({{ "", "", {FuncType::NOT_FUNC}}});
+    }
+    SECTION("General function call")
+    {
+        SECTION("in arguments")
+        {
+            tester.test_function_scopes({
+                {"general(", "var_0000(", {FuncType::NOT_FUNC, FuncType::GENERAL}}
+            });
+        }
+        SECTION("separated identifier and call")
+        {
+            tester.test_function_scopes({
+                {"general  /*comment*/  (", "var_0000(", {FuncType::NOT_FUNC, FuncType::GENERAL}}
+            });
+        }
+        SECTION("complete call")
+        {
+            tester.test_function_scopes({
+                {"general('%62%61%72')", "var_0000('%62%61%72')", {FuncType::NOT_FUNC}}
+            });
+        }
+        SECTION("as named function definition")
+        {
+            tester.test_function_scopes({
+                {"general(){", "var_0000(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
+            });
+        }
+        SECTION("after defined function identifier")
+        {
+            tester.test_function_scopes({
+                {"unescape;hello(", "unescape;var_0000(", {FuncType::NOT_FUNC, FuncType::GENERAL}}
+            });
+        }
+        SECTION("fake defined function identifier")
+        {
+            tester.test_function_scopes({
+                {"fake_unescape(", "var_0000(", {FuncType::NOT_FUNC, FuncType::GENERAL}}
+            });
+        }
+        SECTION("ignored fake defined function identifier")
+        {
+            const std::unordered_set<std::string> s_ignored_ids_fake {"fake_unescape"};
+            JSTokenizerTester tester_fake(norm_depth, max_scope_depth, s_ignored_ids_fake, 
+            max_template_nesting, max_bracket_depth);
+            tester_fake.test_function_scopes({
+                {"fake_unescape(", "fake_unescape(", {FuncType::NOT_FUNC, FuncType::GENERAL}}
+            });
+        }
+        SECTION("as a template literal substitution")
+        {
+            tester.test_function_scopes({
+                {"`unescape ${general(", "`unescape ${var_0000(",
+                {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::GENERAL}}
+            });
+        }
+    }
+    SECTION("unescape function call")
+    {
+        SECTION("in arguments")
+        {
+            tester.test_function_scopes({
+                {"unescape(", "unescape(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+            });
+        }
+        SECTION("separated identifier and call")
+        {
+            tester.test_function_scopes({
+                {"unescape  /*comment*/  (", "unescape(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+            });
+        }
+        SECTION("complete call")
+        {
+            tester.test_function_scopes({
+                {"unescape('%62%61%72')", "unescape('%62%61%72')", {FuncType::NOT_FUNC}}
+            });
+        }
+        SECTION("as named function definition")
+        {
+            tester.test_function_scopes({
+                {"unescape(){", "unescape(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
+            });
+        }
+        SECTION("after assignment substitution")
+        {
+            tester.test_function_scopes({
+                {"var a = unescape; a(", "var var_0000=unescape;unescape(", {FuncType::NOT_FUNC,
+                                                                             FuncType::UNESCAPE}}
+            });
+        }
+        SECTION("literal")
+        {
+            tester.test_function_scopes({
+                {"`unescape(", "`unescape(", {FuncType::NOT_FUNC}}
+            });
+        }
+        SECTION("as a template literal substitution")
+        {
+            tester.test_function_scopes({
+                {"`literal ${unescape(", "`literal ${unescape(",
+                {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+            });
+        }
+    }
+    SECTION("decodeURI function call")
+    {
+        SECTION("in arguments")
+        {
+            tester.test_function_scopes({
+                {"decodeURI(", "decodeURI(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+            });
+        }
+        SECTION("separated identifier and call")
+        {
+            tester.test_function_scopes({
+                {"decodeURI  /*comment*/  (", "decodeURI(", {FuncType::NOT_FUNC,
+                                                             FuncType::UNESCAPE}}
+            });
+        }
+        SECTION("complete call")
+        {
+            tester.test_function_scopes({
+                {"decodeURI('%62%61%72')", "decodeURI('%62%61%72')", {FuncType::NOT_FUNC}}
+            });
+        }
+        SECTION("as named function definition")
+        {
+            tester.test_function_scopes({
+                {"decodeURI(){", "decodeURI(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
+            });
+        }
+        SECTION("after assignment substitution")
+        {
+            tester.test_function_scopes({
+                {"var a = decodeURI; a(", "var var_0000=decodeURI;decodeURI(", {FuncType::NOT_FUNC,
+                                                                                FuncType::UNESCAPE}}
+            });
+        }
+        SECTION("literal")
+        {
+            tester.test_function_scopes({
+                {"`decodeURI(", "`decodeURI(", {FuncType::NOT_FUNC}}
+            });
+        }
+        SECTION("as a template literal substitution")
+        {
+            tester.test_function_scopes({
+                {"`literal ${decodeURI(", "`literal ${decodeURI(",
+                {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+            });
+        }
+    }
+    SECTION("decodeURIComponent function call")
+    {
+        SECTION("in arguments")
+        {
+            tester.test_function_scopes({
+                {"decodeURIComponent(", "decodeURIComponent(", {FuncType::NOT_FUNC,
+                                                                FuncType::UNESCAPE}}
+            });
+        }
+        SECTION("separated identifier and call")
+        {
+            tester.test_function_scopes({
+                {"decodeURIComponent  /*comment*/  (", "decodeURIComponent(", {FuncType::NOT_FUNC,
+                                                                               FuncType::UNESCAPE}}
+            });
+        }
+        SECTION("complete call")
+        {
+            tester.test_function_scopes({
+                {"decodeURIComponent('%62%61%72')", "decodeURIComponent('%62%61%72')",
+                {FuncType::NOT_FUNC}}
+            });
+        }
+        SECTION("as named function definition")
+        {
+            tester.test_function_scopes({
+                {"decodeURIComponent(){", "decodeURIComponent(){", {FuncType::NOT_FUNC,
+                                                                    FuncType::NOT_FUNC}}
+            });
+        }
+        SECTION("after assignment substitution")
+        {
+            tester.test_function_scopes({
+                {"var a = decodeURIComponent; a(",
+                "var var_0000=decodeURIComponent;decodeURIComponent(", {FuncType::NOT_FUNC,
+                                                                         FuncType::UNESCAPE}}
+            });
+        }
+        SECTION("literal")
+        {
+            tester.test_function_scopes({
+                {"`decodeURIComponent(", "`decodeURIComponent(", {FuncType::NOT_FUNC}}
+            });
+        }
+        SECTION("as a template literal substitution")
+        {
+            tester.test_function_scopes({
+                {"`literal ${decodeURIComponent(", "`literal ${decodeURIComponent(",
+                 {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+            });
+        }
+    }
+    SECTION("String.fromCharCode method call")
+    {
+        SECTION("in arguments")
+        {
+            tester.test_function_scopes({
+                {"String.fromCharCode(", "String.fromCharCode(",
+                {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
+            });
+        }
+        SECTION("separated identifier and call")
+        {
+            tester.test_function_scopes({
+                {"String.fromCharCode  /*comment*/  (", "String.fromCharCode(",
+                {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
+            });
+        }
+        SECTION("complete call")
+        {
+            tester.test_function_scopes({
+                {"String.fromCharCode( 65, 0x42 )", "String.fromCharCode(65,0x42)",
+                {FuncType::NOT_FUNC}}
+            });
+        }
+        SECTION("as named function definition")
+        {
+            tester.test_function_scopes({
+                {"String.fromCharCode(){", "String.fromCharCode(){",
+                {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
+            });
+        }
+        SECTION("after class name assignment substitution")
+        {
+            tester.test_function_scopes({
+                {"var a = String; a.fromCharCode(", "var var_0000=String;String.fromCharCode(",
+                {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
+            });
+        }
+        SECTION("after assignment substitution")
+        {
+            tester.test_function_scopes({
+                {"var a = String.fromCharCode; a(",
+                "var var_0000=String.fromCharCode;String.fromCharCode(",
+                {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
+            });
+        }
+        SECTION("not a Sting class member call")
+        {
+            tester.test_function_scopes({
+                {"fromCharCode(",
+                "var_0000(",
+                {FuncType::NOT_FUNC, FuncType::GENERAL}}
+            });
+        }
+        SECTION("literal")
+        {
+            tester.test_function_scopes({
+                {"`String.fromCharCode(", "`String.fromCharCode(", {FuncType::NOT_FUNC}}
+            });
+        }
+        SECTION("as a template literal substitution")
+        {
+            tester.test_function_scopes({
+                {"`literal ${String.fromCharCode(", "`literal ${String.fromCharCode(",
+                {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
+            });
+        }
+    }
+}
+
+TEST_CASE("Function call tracking - nesting", "[JSNormalizer]")
+{
+    JSTokenizerTester tester(norm_depth, max_scope_depth, s_ignored_ids, max_template_nesting,
+        max_bracket_depth);
+
+    using FuncType = JSTokenizerTester::FuncType;
+
+    SECTION("Opening")
+    {
+        SECTION("Multiple general functions")
+        {
+            tester.test_function_scopes({
+                { "general( general( general(", "var_0000(var_0000(var_0000(",
+                { FuncType::NOT_FUNC, FuncType::GENERAL, FuncType::GENERAL, FuncType::GENERAL}}
+            });
+            CHECK(!tester.is_unescape_nesting_seen());
+        }
+        SECTION("Multiple unescape functions")
+        {
+            tester.test_function_scopes({
+                {"unescape( unescape( unescape(", "unescape(unescape(unescape(",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE, FuncType::UNESCAPE, FuncType::UNESCAPE}}
+            });
+            CHECK(tester.is_unescape_nesting_seen());
+        }
+        SECTION("Multiple different unescape functions")
+        {
+            tester.test_function_scopes({
+                {"unescape( decodeURI( decodeURIComponent(",
+                "unescape(decodeURI(decodeURIComponent(", {FuncType::NOT_FUNC,
+                                                           FuncType::UNESCAPE,
+                                                           FuncType::UNESCAPE,
+                                                           FuncType::UNESCAPE}}
+            });
+            CHECK(tester.is_unescape_nesting_seen());
+        }
+        SECTION("Multiple String.fromCharCode functions")
+        {
+            tester.test_function_scopes({
+                {"String.fromCharCode( String.fromCharCode( String.fromCharCode(",
+                "String.fromCharCode(String.fromCharCode(String.fromCharCode(",
+                {FuncType::NOT_FUNC, FuncType::CHAR_CODE, FuncType::CHAR_CODE,
+                FuncType::CHAR_CODE}}
+            });
+            CHECK(!tester.is_unescape_nesting_seen());
+        }
+        SECTION("Mixed function calls")
+        {
+            tester.test_function_scopes({
+                {"general( unescape( String.fromCharCode(",
+                "var_0000(unescape(String.fromCharCode(",
+                {FuncType::NOT_FUNC, FuncType::GENERAL, FuncType::UNESCAPE,
+                FuncType::CHAR_CODE}}
+            });
+            CHECK(!tester.is_unescape_nesting_seen());
+        }
+    }
+    SECTION("Closing")
+    {
+        SECTION("Multiple general functions")
+        {
+            tester.test_function_scopes({
+                {"general( general( general( a ) )", "var_0000(var_0000(var_0000(var_0001))",
+                {FuncType::NOT_FUNC, FuncType::GENERAL}}
+            });
+        }
+        SECTION("Multiple unescape functions")
+        {
+            tester.test_function_scopes({
+                {"unescape( unescape( unescape( '%62%61%72' ) )",
+                "unescape(unescape(unescape('%62%61%72'))", {FuncType::NOT_FUNC,
+                                                             FuncType::UNESCAPE }}
+            });
+        }
+        SECTION("Multiple different unescape functions")
+        {
+            tester.test_function_scopes({
+                {"unescape( decodeURI( decodeURIComponent( '%62%61%72' ) )",
+                "unescape(decodeURI(decodeURIComponent('%62%61%72'))",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE }}
+            });
+        }
+        SECTION("Multiple String.fromCharCode methods")
+        {
+            tester.test_function_scopes({
+                {"String.fromCharCode( String.fromCharCode( String.fromCharCode( 65, 0x42 ) )",
+                "String.fromCharCode(String.fromCharCode(String.fromCharCode(65,0x42))",
+                {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
+            });
+        }
+        SECTION("Mixed function calls")
+        {
+            tester.test_function_scopes({
+                {"general( unescape( String.fromCharCode( 65, 0x42 ) )",
+                "var_0000(unescape(String.fromCharCode(65,0x42))", {FuncType::NOT_FUNC,
+                                                                    FuncType::GENERAL}}
+            });
+        }
+    }
+}
+
+TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]")
+{
+    JSTokenizerTester tester(norm_depth, max_scope_depth, s_ignored_ids, max_template_nesting,
+        max_bracket_depth);
+
+    using FuncType = JSTokenizerTester::FuncType;
+
+    SECTION("split in the middle of the identifier")
+    {
+        tester.test_function_scopes({
+            {"un",          "var_0000",     {FuncType::NOT_FUNC}},
+            {"escape",      "unescape",     {FuncType::NOT_FUNC}},
+            {"(",           "unescape(",    {FuncType::NOT_FUNC,
+                                             FuncType::UNESCAPE}},
+            {")",           "unescape()",   {FuncType::NOT_FUNC}},
+        });
+    }
+    SECTION("split between identifier and parenthesis")
+    {
+        tester.test_function_scopes({
+            {"decodeURI",   "decodeURI",    {FuncType::NOT_FUNC}},
+            {"(",           "decodeURI(",   {FuncType::NOT_FUNC,
+                                             FuncType::UNESCAPE}},
+            {")",           "decodeURI()",  {FuncType::NOT_FUNC}},
+        });
+    }
+    SECTION("comment between identifier and parenthesis")
+    {
+        tester.test_function_scopes({
+            {"unescape",                "unescape",     {FuncType::NOT_FUNC}},
+            {"//String.fromCharCode\n", "unescape",     {FuncType::NOT_FUNC}},
+            {"(",                       "unescape(",    {FuncType::NOT_FUNC,
+                                                         FuncType::UNESCAPE}},
+            {")",                       "unescape()",   {FuncType::NOT_FUNC}},
+        });
+    }
+    SECTION("split in arguments")
+    {
+        tester.test_function_scopes({
+            {"general",         "var_0000",                         {FuncType::NOT_FUNC}},
+            {"(",               "var_0000(",                        {FuncType::NOT_FUNC,
+                                                                     FuncType::GENERAL}},
+            {"a",               "var_0000(var_0001",                {FuncType::NOT_FUNC,
+                                                                     FuncType::GENERAL}},
+            {"+ b",             "var_0000(var_0001+var_0002",       {FuncType::NOT_FUNC,
+                                                                     FuncType::GENERAL}},
+            {")",               "var_0000(var_0001+var_0002)",      {FuncType::NOT_FUNC}},
+        });
+    }
+    SECTION("literal in arguments")
+    {
+        tester.test_function_scopes({
+            {"String",          "String",                               {FuncType::NOT_FUNC}},
+            {".fromCharCode",   "String.fromCharCode",                  {FuncType::NOT_FUNC}},
+            {"(`",              "String.fromCharCode(`",                {FuncType::NOT_FUNC,
+                                                                         FuncType::CHAR_CODE}},
+            {"un",              "String.fromCharCode(`un",              {FuncType::NOT_FUNC,
+                                                                         FuncType::CHAR_CODE}},
+            {"escape(",         "String.fromCharCode(`unescape(",       {FuncType::NOT_FUNC,
+                                                                         FuncType::CHAR_CODE}},
+            {"`)",              "String.fromCharCode(`unescape(`)",     {FuncType::NOT_FUNC}},
+        });
+    }
+    SECTION("Nesting - Mixed function calls")
+    {
+        tester.test_function_scopes({
+            {"decode",                      "var_0000",                 {FuncType::NOT_FUNC}},
+            {"URI",                         "decodeURI",                {FuncType::NOT_FUNC}},
+            {"Component",                   "decodeURIComponent",       {FuncType::NOT_FUNC}},
+            {"(",                           "decodeURIComponent(",      {FuncType::NOT_FUNC,
+                                                                         FuncType::UNESCAPE}},
+            {" a, ",                        "decodeURIComponent(var_0001,",
+                                                                        {FuncType::NOT_FUNC,
+                                                                         FuncType::UNESCAPE}},
+            {" String.fromCharCode( ar",
+            "decodeURIComponent(var_0001,String.fromCharCode(var_0002",
+                                                                        {FuncType::NOT_FUNC,
+                                                                         FuncType::UNESCAPE,
+                                                                         FuncType::CHAR_CODE}},
+            {"g ), b, foo",
+            "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0005",
+                                                                        {FuncType::NOT_FUNC,
+                                                                         FuncType::UNESCAPE}},
+            {"bar( ",
+            "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0006(",
+                                                                        {FuncType::NOT_FUNC,
+                                                                         FuncType::UNESCAPE,
+                                                                         FuncType::GENERAL}},
+            {"))",
+            "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0006())",
+                                                                        {FuncType::NOT_FUNC}}
+        });
+    }
+}
+
 #endif // CATCH_TEST_BUILD
 
 // Benchmark tests
@@ -4215,7 +4693,7 @@ TEST_CASE("JS Normalizer, literals by 8 K", "[JSNormalizer]")
 {
     JSIdentifierCtxStub ident_ctx;
     JSNormalizer normalizer(ident_ctx, unlim_depth, max_template_nesting, max_bracket_depth);
-    char dst[DEPTH];
+    char dst[norm_depth];
 
     constexpr size_t size = 1 << 13;
 
@@ -4255,7 +4733,7 @@ TEST_CASE("JS Normalizer, literals by 64 K", "[JSNormalizer]")
 {
     JSIdentifierCtxStub ident_ctx;
     JSNormalizer normalizer(ident_ctx, unlim_depth, max_template_nesting, max_scope_depth);
-    char dst[DEPTH];
+    char dst[norm_depth];
 
     constexpr size_t size = 1 << 16;
 
@@ -4295,10 +4773,10 @@ TEST_CASE("JS Normalizer, id normalization", "[JSNormalizer]")
 {
     // around 11 000 identifiers
     std::string input;
-    for (int it = 0; it < DEPTH; ++it)
+    for (int it = 0; it < norm_depth; ++it)
         input.append("n" + std::to_string(it) + " ");
 
-    input.resize(DEPTH - strlen(s_closing_tag));
+    input.resize(norm_depth - strlen(s_closing_tag));
     input.append(s_closing_tag, strlen(s_closing_tag));
 
     JSIdentifierCtxStub ident_ctx_mock;
@@ -4378,14 +4856,14 @@ TEST_CASE("JS Normalizer, scope tracking", "[JSNormalizer]")
 
 TEST_CASE("JS Normalizer, automatic semicolon", "[JSNormalizer]")
 {
-    auto w_semicolons = make_input("", "a;\n", "", depth);
-    auto wo_semicolons = make_input("", "a \n", "", depth);
+    auto w_semicolons = make_input("", "a;\n", "", norm_depth);
+    auto wo_semicolons = make_input("", "a \n", "", norm_depth);
     const char* src_w_semicolons = w_semicolons.c_str();
     const char* src_wo_semicolons = wo_semicolons.c_str();
     size_t src_len = w_semicolons.size();
 
     JSIdentifierCtxStub ident_ctx_mock;
-    JSNormalizer normalizer_wo_ident(ident_ctx_mock, unlim_depth, max_template_nesting, depth);
+    JSNormalizer normalizer_wo_ident(ident_ctx_mock, unlim_depth, max_template_nesting, norm_depth);
 
     REQUIRE(norm_ret(normalizer_wo_ident, w_semicolons) == JSTokenizer::SCRIPT_ENDED);
     BENCHMARK("without semicolon insertion")
index a57552818b4b3a2e621f2eb1283cda7003589261..5083dbcba948a8200d5a40dee61123dc2458fd1f 100644 (file)
@@ -29,15 +29,44 @@ namespace snort
 {
 [[noreturn]] void FatalError(const char*, ...)
 { exit(EXIT_FAILURE); }
-void trace_vprintf(const char*, TraceLevel, const char*, const Packet*, const char*, va_list) {}
+void trace_vprintf(const char*, TraceLevel, const char*, const Packet*, const char*, va_list) { }
 uint8_t TraceApi::get_constraints_generation() { return 0; }
-void TraceApi::filter(const Packet&) {}
+void TraceApi::filter(const Packet&) { }
 }
 
 THREAD_LOCAL const snort::Trace* http_trace = nullptr;
 
 using namespace snort;
 
+void JSTokenizerTester::test_function_scopes(const std::list<ScopeCase>& pdus)
+{
+    for (auto pdu : pdus)
+    {
+        const char* source;
+        const char* expected;
+        std::list<FuncType> exp_stack;
+        std::tie(source, expected, exp_stack) = pdu;
+
+        normalizer.normalize(source, strlen(source));
+        std::string result_buf(normalizer.get_script(), normalizer.script_size());
+        CHECK(result_buf == expected);
+
+        auto tmp_stack(normalizer.get_tokenizer().scope_stack);
+        CHECK(tmp_stack.size() == exp_stack.size());
+        for (auto func_it = exp_stack.rbegin(); func_it != exp_stack.rend() and !tmp_stack.empty();
+            func_it++)
+        {
+            CHECK(tmp_stack.top().func_call_type == *func_it);
+            tmp_stack.pop();
+        }
+    }
+}
+
+bool JSTokenizerTester::is_unescape_nesting_seen() const
+{
+    return normalizer.is_unescape_nesting_seen();
+}
+
 void test_scope(const char* context, std::list<JSProgramScopeType> stack)
 {
     std::string buf(context);
index cce7162829047fb55c28d27d0143d8948681a41d..10f5b0a20ea56e6da2a8c20e16ac1f1f22b9ba56 100644 (file)
@@ -34,7 +34,9 @@ constexpr int norm_depth = 65535;
 constexpr int max_template_nesting = 4;
 constexpr int max_bracket_depth = 256;
 constexpr int max_scope_depth = 256;
-static const std::unordered_set<std::string> s_ignored_ids { "console", "eval", "document" };
+static const std::unordered_set<std::string> s_ignored_ids {
+    "console", "eval", "document", "unescape", "decodeURI", "decodeURIComponent", "String"
+};
 
 namespace snort
 {
@@ -60,6 +62,27 @@ public:
     size_t size() const override { return 0; }
 };
 
+class JSTokenizerTester
+{
+public:
+    JSTokenizerTester(int32_t depth, uint32_t max_scope_depth,
+        const std::unordered_set<std::string>& ignored_ids,
+        uint8_t max_template_nesting, uint32_t max_bracket_depth)
+        :
+        ident_ctx(depth, max_scope_depth, ignored_ids),
+        normalizer(ident_ctx, depth, max_template_nesting, max_bracket_depth)
+    { }
+
+    typedef JSTokenizer::FuncType FuncType;
+    typedef std::tuple<const char*, const char*, std::list<FuncType>> ScopeCase;
+    void test_function_scopes(const std::list<ScopeCase>& pdus);
+    bool is_unescape_nesting_seen() const;
+
+private:
+    JSIdentifierCtx ident_ctx;
+    snort::JSNormalizer normalizer;
+};
+
 void test_scope(const char* context, std::list<JSProgramScopeType> stack);
 void test_normalization(const char* source, const char* expected);
 void test_normalization_bad(const char* source, const char* expected, JSTokenizer::JSRet eret);
index 61dfc483a89210a643d19e0190288082683d3b00..5129804ee342cfbb29b7f8d16e39762bdeac973b 100644 (file)
@@ -1555,6 +1555,62 @@ TEST_CASE("output buffer - basic", "[Stream buffers]")
         CHECK(off_c == len + 2);
         CHECK(off_e == 4096 + 2048);
     }
+
+    SECTION("get char sequence")
+    {
+        ostreambuf_infl b;
+        const int exp_len = strlen(exp);
+        b.sputn(exp, exp_len);
+
+        int off_c = b.pubseekoff(-exp_len, ios_base::cur, ios_base::out);
+        CHECK(off_c == 0);
+        
+        char* act_seq = new char[exp_len];
+        CHECK(b.sgetn(act_seq, exp_len) == exp_len);
+        CHECK(!memcmp(exp, act_seq, exp_len));
+        delete[] act_seq;
+
+        int new_off = b.pubseekoff(0, ios_base::cur, ios_base::out);
+        CHECK(new_off == exp_len);
+    }
+
+    SECTION("get char sequence from the end")
+    {
+        ostreambuf_infl b;
+        const int exp_len = strlen(exp);
+        char* buf = new char[exp_len];
+        memcpy(buf, exp, exp_len);
+        b.pubsetbuf(buf, exp_len);
+        
+        int data_off = b.pubseekoff(exp_len, ios_base::beg, ios_base::out);
+        CHECK(data_off == exp_len);
+
+        char* act_seq = new char[exp_len];
+        memset(act_seq, '\0', exp_len);
+        CHECK(b.sgetn(act_seq, exp_len) == 0);
+        CHECK(strlen(act_seq) == 0);
+        delete[] act_seq;
+
+        int new_off = b.pubseekoff(0, ios_base::cur, ios_base::out);
+        CHECK(new_off == exp_len);
+    }
+    
+    SECTION("get char sequence more than available")
+    {
+        ostreambuf_infl b;
+        const int exp_len = strlen(exp);
+        char* buf = new char[exp_len];
+        memcpy(buf, exp, exp_len);
+        b.pubsetbuf(buf, exp_len);
+        
+        char* act_seq = new char[exp_len + 1];
+        CHECK(b.sgetn(act_seq, exp_len + 1) == exp_len);
+        CHECK(!memcmp(exp, act_seq, exp_len));
+        delete[] act_seq;
+
+        int new_off = b.pubseekoff(0, ios_base::cur, ios_base::out);
+        CHECK(new_off == exp_len);
+    }
 }
 
 TEST_CASE("output buffer - buffer management", "[Stream buffers]")
@@ -2008,11 +2064,8 @@ TEST_CASE("output stream - large data", "[Stream buffers]")
 {
     const int len = 1 << 21;
     const int plen = 1 << 12;
-    vector<char> chars;
-
-    chars.reserve(len);
-    for (char& c : chars)
-        c = rand();
+    vector<char> chars(len, '\0');
+    generate_n(chars.begin(), len, rand);
 
     SECTION("0 bytes reserved")
     {