]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Pull request #3312: JSN: Unescape Text Processing
authorMike Stepanek (mstepane) <mstepane@cisco.com>
Wed, 23 Mar 2022 19:31:39 +0000 (19:31 +0000)
committerMike Stepanek (mstepane) <mstepane@cisco.com>
Wed, 23 Mar 2022 19:31:39 +0000 (19:31 +0000)
Merge in SNORT/snort3 from ~OSERHIIE/snort3:js_unescape to master

Squashed commit of the following:

commit 5e79a2a365a4b5b74670d4bfc6f94bcc35f3b2d6
Author: Oleksandr Serhiienko <oserhiie@cisco.com>
Date:   Fri Mar 18 20:39:48 2022 +0200

    utils: fix JS Normalizer benchmark build

commit 8b79a4adbc538ea1b6400486cbe1b82a5369d1af
Author: Oleksandr Serhiienko <oserhiie@cisco.com>
Date:   Fri Mar 4 22:05:17 2022 +0200

    http_inspect: add unescape text processing for Enhanced JS Normalizer

        * utils: decode %XX, %uXXXX, \uXX, \uXXXX, \xXX, \u{CHAR_CODE} escape sequences
        * utils: decode hexadecimal and decimal code points
        * utils: add support for unescape of universal sequences in identifiers,
          strings, template literals and regular expressions
        * utils: add support for unescape(), decodeURI(), decodeURIComponent() JavaScript
          functions
        * utils: add support for String.fromCharCode() JavaScript function
        * utils: add unit test coverage
        * utils: add benchmark test
        * http_inspect: enable alert 119:280 - mixed encoding
        * http_inspect: update dev notes
        * doc: update user manual

12 files changed:
doc/user/http_inspect.txt
src/service_inspectors/http_inspect/dev_notes.txt
src/service_inspectors/http_inspect/http_js_norm.cc
src/utils/js_identifier_ctx.cc
src/utils/js_normalizer.h
src/utils/js_tokenizer.h
src/utils/js_tokenizer.l
src/utils/test/CMakeLists.txt
src/utils/test/js_normalizer_test.cc
src/utils/test/js_test_utils.cc
src/utils/test/js_test_utils.h
src/utils/test/js_unescape_test.cc [new file with mode: 0644]

index 98dec3b9441ab026f1f32349ce75c198c20c9346..2d583e0cdd68fc79078e562f691722f0347878c8 100755 (executable)
@@ -78,8 +78,9 @@ Normalizer. The Enhanced Normalizer can normalize inline/external scripts.
 It supports scripts over multiple PDUs. It is a stateful JavaScript whitespace
 and identifiers normalizer. All JavaScript identifier names, except those from
 the ignore list, will be substituted with unified names in the following
-format: var_0000 -> var_ffff. Moreover, Normalizer validates the syntax
-concerning ECMA-262 Standard, including scope tracking and restrictions
+format: var_0000 -> var_ffff. The Normalizer tries to expand an escaped text,
+so it will appear in a usual form in the output. Moreover, Normalizer validates
+the syntax concerning ECMA-262 Standard, including scope tracking and restrictions
 for script elements. For more information on how additionally configure
 Enhanced Normalizer check with the following configuration options:
 js_norm_bytes_depth, js_norm_identifier_depth, js_norm_max_tmpl_nest,
index 5e785eda6aacf48a6e1a2a190126ff2d76094e5e..83c5970be967c5721391a23831a0f7bb06ad71dc 100755 (executable)
@@ -256,11 +256,33 @@ For example:
     a("hello") // will be substituted to 'console.log("hello")'
 
 In addition to the scope tracking, JS Normalizer specifically tracks unicode unescape
-functions(unescape, decodeURI, decodeURIComponent). This allows detection of
+functions(unescape, decodeURI, decodeURIComponent, String.fromCharCode). This allows detection of
 unescape functions nested within other unescape functions, which is a potential
 indicator of a multilevel obfuscation. The definition of a function call depends on
 identifier substitution, so such identifiers must be included in the ignore list in
-order to use this feature.
+order to use this feature. After determining the unescape sequence, it is decoded into the
+corresponding string.
+
+For example:
+
+   unescape('\u0062\u0061\u0072')        -> 'bar'
+   decodeURI('%62%61%72')                -> 'bar'
+   decodeURIComponent('\x62\x61\x72')    -> 'bar'
+   String.fromCharCode(98, 0x0061, 0x72) -> 'bar'
+
+Supported formats follow
+
+   \xXX
+   \uXXXX
+   \u{XXXX}
+   %XX
+   \uXX
+   %uXXXX
+   decimal code point
+   hexadecimal code point
+
+JS Normalizer is able to decode mixed encoding sequences. However, a built-in alert rises
+in such case.
 
 JS Normalizer's syntax parser follows ECMA-262 standard. For various features,
 tracking of variable scope and individual brackets is done in accordance to the standard.
index d73de30a633b62d8f58356c52a37040eb9b14f30..2fd94b7a0afefacd08c0b5a3fb5c1a56fc26c3fd 100644 (file)
@@ -219,6 +219,11 @@ void HttpJsNorm::do_external(const Field& input, Field& output,
             *infractions += INF_JS_UNESCAPE_NEST;
             events->create_event(EVENT_JS_UNESCAPE_NEST);
         }
+        if (js_ctx.is_mixed_encoding_seen())
+        {
+            *infractions += INF_JS_MULTIPLE_ENC;
+            events->create_event(EVENT_JS_MULTIPLE_ENC);
+        }
 
         if (ssn->js_built_in_event)
             break;
@@ -358,6 +363,11 @@ void HttpJsNorm::do_inline(const Field& input, Field& output,
             *infractions += INF_JS_UNESCAPE_NEST;
             events->create_event(EVENT_JS_UNESCAPE_NEST);
         }
+        if (js_ctx.is_mixed_encoding_seen())
+        {
+            *infractions += INF_JS_MULTIPLE_ENC;
+            events->create_event(EVENT_JS_MULTIPLE_ENC);
+        }
 
         script_continue = ret == JSTokenizer::SCRIPT_CONTINUE;
     }
index 49274c8340f776d7a99c36cc61692be6ea986b1a..e6900c8d79804ffe6088ecba7f62a10bf3605a3d 100644 (file)
@@ -200,7 +200,7 @@ const char* JSIdentifierCtx::alias_lookup(const char* alias) const
 
 // advanced program scope access for testing
 
-#ifdef CATCH_TEST_BUILD
+#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST)
 
 bool JSIdentifierCtx::scope_check(const std::list<JSProgramScopeType>& compare) const
 {
@@ -226,4 +226,4 @@ const std::list<JSProgramScopeType> JSIdentifierCtx::get_types() const
     return return_list;
 }
 
-#endif // CATCH_TEST_BUILD
+#endif // CATCH_TEST_BUILD || BENCHMARK_TEST
index 4a694d3c7a4c5096d8cac31de27c6e756e7324f3..8508cd7d46e8a34c06faead827b44008f17af212 100644 (file)
@@ -61,14 +61,17 @@ public:
     bool is_unescape_nesting_seen() const
     { return tokenizer.is_unescape_nesting_seen(); }
 
-#ifdef CATCH_TEST_BUILD
+    bool is_mixed_encoding_seen() const
+    { return tokenizer.is_mixed_encoding_seen(); }
+
+#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST)
     const char* get_tmp_buf() const
     { return tmp_buf; }
     size_t get_tmp_buf_size() const
     { return tmp_buf_size; }
     const JSTokenizer& get_tokenizer() const
     { return tokenizer; }
-#endif
+#endif // CATCH_TEST_BUILD || BENCHMARK_TEST
 
 #ifdef BENCHMARK_TEST
     void rewind_output()
index 2dcdc1fe74cc7d8a0d9463b04de623f81e490b78..0747af6d3258c3e6817486729814c09ca9708ba9 100644 (file)
@@ -43,9 +43,10 @@ extern THREAD_LOCAL const snort::Trace* http_trace;
 enum JSProgramScopeType : unsigned int;
 
 class JSIdentifierCtxBase;
-#ifdef CATCH_TEST_BUILD
+#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST)
 class JSTokenizerTester;
-#endif
+#endif // CATCH_TEST_BUILD || BENCHMARK_TEST
+
 class JSTokenizer : public yyFlexLexer
 {
 private:
@@ -98,7 +99,7 @@ private:
     {
         Scope(ScopeType t) :
             type(t), meta_type(ScopeMetaType::NOT_SET), func_call_type(FuncType::NOT_FUNC),
-            ident_norm(true), block_param(false), do_loop(false)
+            ident_norm(true), block_param(false), do_loop(false), encoding(0), char_code_str(false)
         {}
 
         ScopeType type;
@@ -107,6 +108,8 @@ private:
         bool ident_norm;
         bool block_param;
         bool do_loop;
+        uint32_t encoding;
+        bool char_code_str;
     };
 
     enum ASIGroup
@@ -164,6 +167,7 @@ public:
     JSRet process(size_t& bytes_in);
 
     bool is_unescape_nesting_seen() const;
+    bool is_mixed_encoding_seen() const;
 protected:
     [[noreturn]] void LexerError(const char* msg) override
     { snort::FatalError("%s", msg); }
@@ -181,6 +185,7 @@ private:
     JSRet push_identifier(const char* ident);
     bool unescape(const char* lexeme);
     void process_punctuator(JSToken tok = PUNCTUATOR);
+    void skip_punctuator();
     void process_closing_brace();
     JSRet process_subst_open();
 
@@ -209,11 +214,24 @@ private:
     FuncType func_call_type();
     FuncType detect_func_type();
     void check_function_nesting(FuncType);
+    void check_mixed_encoding(uint32_t);
     void set_block_param(bool);
     bool block_param();
     void set_do_loop(bool);
     bool do_loop();
 
+    void set_encoding(uint32_t f)
+    { scope_cur().encoding |= f; }
+
+    uint32_t encoding()
+    { return scope_cur().encoding; }
+
+    void set_char_code_str(bool f)
+    { scope_cur().char_code_str = f; }
+
+    bool char_code_str()
+    { return scope_cur().char_code_str; }
+
     static JSProgramScopeType m2p(ScopeMetaType);
     static const char* m2str(ScopeMetaType);
     static bool is_operator(JSToken);
@@ -269,6 +287,12 @@ private:
     JSRet general_literal();
     JSRet general_identifier();
     void general_unicode();
+    void escaped_unicode();
+    void escaped_code_point();
+    void escaped_url_sequence();
+    void dec_code_point();
+    void hex_code_point();
+    void char_code_no_match();
 
     static const char* p_scope_codes[];
 
@@ -283,6 +307,7 @@ private:
     bool prefix_increment = false;
     bool dealias_stored = false;
     bool unescape_nest_seen = false;
+    bool mixed_encoding_seen = false;
 
     uint8_t max_template_nesting;
     std::stack<uint16_t, std::vector<uint16_t>> brace_depth;
@@ -346,9 +371,9 @@ private:
     const uint32_t max_bracket_depth;
     std::stack<Scope> scope_stack;
 
-#ifdef CATCH_TEST_BUILD
+#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST)
     friend JSTokenizerTester;
-#endif // CATCH_TEST_BUILD
+#endif // CATCH_TEST_BUILD || BENCHMARK_TEST
 };
 
 #endif // JS_TOKENIZER_H
index b7f93df12f9ebb2a07cf2f30e0c60c85f3ce4c2b..074dd45e98f35dd7657b41575020c61393f1a4eb 100644 (file)
 
 constexpr bool JSTokenizer::insert_semicolon[ASI_GROUP_MAX][ASI_GROUP_MAX];
 
+// encoding flags
+
+enum EncodingType
+{
+    IS_HEX          = 1 << 0,   // hex code unit: 0xXXXX
+    IS_DEC          = 1 << 1,   // dec code unit: XXXX
+    IS_XBACKSLASH   = 1 << 2,   // \xXX
+    IS_UBACKSLASH_1 = 1 << 3,   // \uXX
+    IS_UBACKSLASH_2 = 1 << 4,   // \uXXXX
+    IS_UPERCENT     = 1 << 5,   // %uXXXX
+    IS_PERCENT      = 1 << 6,   // %XX
+    IS_UCODEPOINT   = 1 << 7    // \u{0xXXXX}
+};
+
 %}
 
 /* The following grammar was created based on ECMAScript specification */
@@ -848,9 +862,19 @@ UNICODE_CONNECTOR_PUNCTUATION    {CONNECTOR_PUNCT_RNG_1}|{CONNECTOR_PUNCT_RNG_2}
 UNICODE_ZWNJ    \xE2\x80\x8C
 UNICODE_ZWJ     \xE2\x80\x8D
 
-/* Unicode escape sequence */
-/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.8.4 (escape sequence) */
+/* according to https://262.ecma-international.org/12.0/#prod-UnicodeEscapeSequence */
 UNICODE_ESCAPE_SEQUENCE    \\u[0-9a-fA-F]{4}
+ESCAPED_CODE_POINT         \\u\{[0-9a-fA-F]+\}
+
+/* according to https://262.ecma-international.org/12.0/#prod-HexEscapeSequence */
+HEX_ESCAPE_SEQUENCE        \\x[0-9a-fA-F]{2}
+
+/* according to https://tc39.es/ecma262/multipage/additional-ecmascript-features-for-web-browsers.html#sec-unescape-string */
+BYTE_ESCAPE_SEQUENCE       \\u[0-9a-fA-F]{2}
+PERCENT_ESCAPE_SEQUENCE    %u[0-9a-fA-F]{4}
+
+/* according to https://datatracker.ietf.org/doc/html/rfc3986#section-2.1 */
+URL_ESCAPE_SEQUENCE        %[0-9a-fA-F]{2}
 
 /* whitespaces */
 /* according to https://ecma-international.org/ecma-262/5.1/#sec-7.2 */
@@ -947,7 +971,7 @@ PUNCTUATOR_ARROW               "=>"
 
 /* identifiers */
 /* according to https://ecma-international.org/ecma-262/5.1/#sec-7.6 */
-IDENTIFIER_START    [_$]|({UNICODE_LETTER})|{UNICODE_ESCAPE_SEQUENCE}
+IDENTIFIER_START    [_$]|({UNICODE_LETTER})|{UNICODE_ESCAPE_SEQUENCE}|{ESCAPED_CODE_POINT}
 IDENTIFIER_PART     (({IDENTIFIER_START})|({UNICODE_COMBINING_MARK})|({UNICODE_DIGIT})|({UNICODE_CONNECTOR_PUNCTUATION})|{UNICODE_ZWNJ}|{UNICODE_ZWJ})*
 IDENTIFIER          ({IDENTIFIER_START}{IDENTIFIER_PART})*
 
@@ -957,19 +981,20 @@ LITERAL_NULL                  null
 LITERAL_THIS                  this
 LITERAL_BOOLEAN               true|false
 LITERAL_DECIMAL               [.]?[0-9]+[\.]?[0-9]*[eE]?[0-9]*
+LITERAL_INTEGER               [0-9]*
 LITERAL_HEX_INTEGER           0x[0-9a-fA-F]*|0X[0-9a-fA-F]*
 LITERAL_DQ_STRING_START       \"
 LITERAL_DQ_STRING_END         \"
 LITERAL_DQ_STRING_SKIP        \\\"
-LITERAL_DQ_STRING_TEXT        [^\"\\\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32}
+LITERAL_DQ_STRING_TEXT        [^\"\\%\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32}
 LITERAL_SQ_STRING_START       \'
 LITERAL_SQ_STRING_END         \'
 LITERAL_SQ_STRING_SKIP        \\\'
-LITERAL_SQ_STRING_TEXT        [^\'\\\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32}
+LITERAL_SQ_STRING_TEXT        [^\'\\%\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32}
 LITERAL_TEMPLATE_START        \`
 LITERAL_TEMPLATE_END          \`
 LITERAL_TEMPLATE_SUBST_START  \$\{
-LITERAL_TEMPLATE_OTHER        [^\\\`(\$\{)("<"+(?i:\/script>))]{1,32}
+LITERAL_TEMPLATE_OTHER        [^\\%\`(\$\{)("<"+(?i:\/script>))]{1,32}
 LITERAL_REGEX_START           \/[^*\/]
 LITERAL_REGEX_END             \/[gimsuy]*
 LITERAL_REGEX_SKIP            \\\/
@@ -1014,63 +1039,112 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 /* in a regular expression */
 %x regex
 
+/* in a single-quoted string within unescape function */
+%x unesc_sqstr
+
+/* in a double-quoted string within unescape function */
+%x unesc_dqstr
+
+/* in a template literal within unescape function */
+%x unesc_tmpll
+
+/* to process code units within char code unescape function */
+%x char_code
+%x char_code_lcomm
+%x char_code_bcomm
+
 %%
 
-{WHITESPACES}                       { }
-{CHAR_ESCAPE_SEQUENCES}             { }
-{LINE_TERMINATORS}                  { BEGIN(regst); newline_found = true; }
+<INITIAL,divop,regst,char_code>{WHITESPACES}              { /* skip */ }
+<INITIAL,divop,regst,char_code>{CHAR_ESCAPE_SEQUENCES}    { /* skip */ }
+
+{LINE_TERMINATORS}                    { BEGIN(regst); newline_found = true; }
+<char_code>{LINE_TERMINATORS}         { newline_found = true; }
 
-<INITIAL,regex,dqstr,regst,sqstr,divop>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) }
+<INITIAL,regex,dqstr,regst,sqstr,divop,char_code>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) }
 {HTML_TAG_SCRIPT_CLOSE}             { EXEC(html_closing_script_tag()) }
 
-       {HTML_COMMENT_OPEN}          { BEGIN(lcomm); }
-       {LINE_COMMENT_START}         { BEGIN(lcomm); }
-<lcomm>{LINE_COMMENT_END1}          { BEGIN(regst); newline_found = true; }
-<lcomm>{LINE_COMMENT_END2}          { BEGIN(regst); newline_found = true; }
-<lcomm>{LINE_COMMENT_END3}          { BEGIN(regst); RETURN(OPENING_TAG) }
-<lcomm>{LINE_COMMENT_END4}          { BEGIN(regst); RETURN(CLOSING_TAG) }
-<lcomm>{LINE_COMMENT_SKIP}          { }
-<lcomm><<EOF>>                      { RETURN(SCRIPT_CONTINUE) }
-
-       {BLOCK_COMMENT_START}        { BEGIN(bcomm); }
-<bcomm>{BLOCK_COMMENT_END1}         { BEGIN(regst); }
-<bcomm>{BLOCK_COMMENT_END2}         { BEGIN(regst); RETURN(OPENING_TAG) }
-<bcomm>{BLOCK_COMMENT_END3}         { BEGIN(regst); RETURN(CLOSING_TAG) }
-<bcomm>{BLOCK_COMMENT_LINE1}        |
-<bcomm>{BLOCK_COMMENT_LINE2}        { newline_found = true;}
-<bcomm>{BLOCK_COMMENT_SKIP}         { }
-<bcomm><<EOF>>                      { RETURN(SCRIPT_CONTINUE) }
-
-       {LITERAL_DQ_STRING_START}    { EXEC(literal_dq_string_start()) }
-<dqstr>{LITERAL_DQ_STRING_END}      { dealias_append(); ECHO; BEGIN(divop); }
-<dqstr>{HTML_TAG_SCRIPT_CLOSE}      { BEGIN(regst); RETURN(CLOSING_TAG) }
-<dqstr>\\{CR}{LF}                   { }
-<dqstr>\\{LF}                       { }
-<dqstr>\\{CR}                       { }
-<dqstr>{LINE_TERMINATORS}           { BEGIN(regst); RETURN(BAD_TOKEN) }
-<dqstr>{LITERAL_DQ_STRING_SKIP}     { dealias_append(); ECHO; }
-<dqstr>{LITERAL_DQ_STRING_TEXT}     { dealias_append(); ECHO; }
-<dqstr><<EOF>>                      { RETURN(SCRIPT_CONTINUE) }
-
-       {LITERAL_SQ_STRING_START}    { EXEC(literal_sq_string_start()) }
-<sqstr>{LITERAL_SQ_STRING_END}      { dealias_append(); ECHO; BEGIN(divop); }
-<sqstr>{HTML_TAG_SCRIPT_CLOSE}      { BEGIN(regst); RETURN(CLOSING_TAG) }
-<sqstr>\\{CR}{LF}                   { }
-<sqstr>\\{LF}                       { }
-<sqstr>\\{CR}                       { }
-<sqstr>{LINE_TERMINATORS}           { BEGIN(regst); RETURN(BAD_TOKEN) }
-<sqstr>{LITERAL_SQ_STRING_SKIP}     { dealias_append(); ECHO; }
-<sqstr>{LITERAL_SQ_STRING_TEXT}     { dealias_append(); ECHO; }
-<sqstr><<EOF>>                      { RETURN(SCRIPT_CONTINUE) }
-
-       {LITERAL_TEMPLATE_START}                  { EXEC(literal_template_start()) }
-<tmpll>(\\\\)*{LITERAL_TEMPLATE_END}             { dealias_append(); ECHO; BEGIN(divop); }
-<tmpll>(\\\\)*{LITERAL_TEMPLATE_SUBST_START}     { EXEC(process_subst_open()) dealias_reset(); }
-<tmpll>{HTML_TAG_SCRIPT_CLOSE}                   { BEGIN(regst); RETURN(CLOSING_TAG) }
-<tmpll>(\\\\)*\\{LITERAL_TEMPLATE_SUBST_START}   | /* escaped template substitution */
-<tmpll>(\\\\)*\\{LITERAL_TEMPLATE_END}           | /* escaped backtick */
-<tmpll>{LITERAL_TEMPLATE_OTHER}                  { dealias_append(); ECHO; }
-<tmpll><<EOF>>                                   { RETURN(SCRIPT_CONTINUE) }
+    {HTML_COMMENT_OPEN}                       { BEGIN(lcomm); }
+    {LINE_COMMENT_START}                      { BEGIN(lcomm); }
+<char_code>{HTML_COMMENT_OPEN}                { BEGIN(char_code_lcomm); }
+<char_code>{LINE_COMMENT_START}               { BEGIN(char_code_lcomm); }
+<lcomm>{LINE_COMMENT_END1}                    { BEGIN(regst); newline_found = true; }
+<lcomm>{LINE_COMMENT_END2}                    { BEGIN(regst); newline_found = true; }
+<char_code_lcomm>{LINE_COMMENT_END1}          { BEGIN(char_code); newline_found = true; }
+<char_code_lcomm>{LINE_COMMENT_END2}          { BEGIN(char_code); newline_found = true; }
+<lcomm,char_code_lcomm>{LINE_COMMENT_END3}    { BEGIN(regst); RETURN(OPENING_TAG) }
+<lcomm,char_code_lcomm>{LINE_COMMENT_END4}    { BEGIN(regst); RETURN(CLOSING_TAG) }
+<lcomm,char_code_lcomm>{LINE_COMMENT_SKIP}    { /* skip */ }
+<lcomm,char_code_lcomm><<EOF>>                { RETURN(SCRIPT_CONTINUE) }
+
+    {BLOCK_COMMENT_START}                       { BEGIN(bcomm); }
+<char_code>{BLOCK_COMMENT_START}                { BEGIN(char_code_bcomm); }
+<bcomm>{BLOCK_COMMENT_END1}                     { BEGIN(regst); }
+<char_code_bcomm>{BLOCK_COMMENT_END1}           { BEGIN(char_code); }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_END2}     { BEGIN(regst); RETURN(OPENING_TAG) }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_END3}     { BEGIN(regst); RETURN(CLOSING_TAG) }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_LINE1}    |
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_LINE2}    { newline_found = true; }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_SKIP}     { /* skip */ }
+<bcomm,char_code_bcomm><<EOF>>                  { RETURN(SCRIPT_CONTINUE) }
+
+    {LITERAL_DQ_STRING_START}                 { EXEC(literal_dq_string_start()) }
+<dqstr,unesc_dqstr>{LITERAL_DQ_STRING_END}    { dealias_append(); ECHO; BEGIN(divop); }
+<dqstr,unesc_dqstr>{HTML_TAG_SCRIPT_CLOSE}    { BEGIN(regst); RETURN(CLOSING_TAG) }
+<dqstr,unesc_dqstr>\\{CR}{LF}                 { /* skip */ }
+<dqstr,unesc_dqstr>\\{LF}                     { /* skip */ }
+<dqstr,unesc_dqstr>\\{CR}                     { /* skip */ }
+<dqstr,unesc_dqstr>{LINE_TERMINATORS}         { BEGIN(regst); RETURN(BAD_TOKEN) }
+<dqstr,unesc_dqstr>{LITERAL_DQ_STRING_SKIP}   { dealias_append(); ECHO; }
+<dqstr,unesc_dqstr>{LITERAL_DQ_STRING_TEXT}   { dealias_append(); ECHO; }
+<dqstr,unesc_dqstr><<EOF>>                    { RETURN(SCRIPT_CONTINUE) }
+<dqstr>{UNICODE_ESCAPE_SEQUENCE}              |
+<dqstr>{HEX_ESCAPE_SEQUENCE}                  { escaped_unicode(); }
+<dqstr>{ESCAPED_CODE_POINT}                   { escaped_code_point(); }
+<unesc_dqstr>{UNICODE_ESCAPE_SEQUENCE}        { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); }
+<unesc_dqstr>{HEX_ESCAPE_SEQUENCE}            { set_encoding(IS_XBACKSLASH); escaped_unicode(); }
+<unesc_dqstr>{ESCAPED_CODE_POINT}             { set_encoding(IS_UCODEPOINT); escaped_code_point(); }
+<unesc_dqstr>{BYTE_ESCAPE_SEQUENCE}           { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); }
+<unesc_dqstr>{PERCENT_ESCAPE_SEQUENCE}        { set_encoding(IS_UPERCENT); escaped_unicode(); }
+<unesc_dqstr>{URL_ESCAPE_SEQUENCE}            { set_encoding(IS_PERCENT); escaped_url_sequence(); }
+
+    {LITERAL_SQ_STRING_START}                 { EXEC(literal_sq_string_start()) }
+<sqstr,unesc_sqstr>{LITERAL_SQ_STRING_END}    { dealias_append(); ECHO; BEGIN(divop); }
+<sqstr,unesc_sqstr>{HTML_TAG_SCRIPT_CLOSE}    { BEGIN(regst); RETURN(CLOSING_TAG) }
+<sqstr,unesc_sqstr>\\{CR}{LF}                 { /* skip */ }
+<sqstr,unesc_sqstr>\\{LF}                     { /* skip */ }
+<sqstr,unesc_sqstr>\\{CR}                     { /* skip */ }
+<sqstr,unesc_sqstr>{LINE_TERMINATORS}         { BEGIN(regst); RETURN(BAD_TOKEN) }
+<sqstr,unesc_sqstr>{LITERAL_SQ_STRING_SKIP}   { dealias_append(); ECHO; }
+<sqstr,unesc_sqstr>{LITERAL_SQ_STRING_TEXT}   { dealias_append(); ECHO; }
+<sqstr,unesc_sqstr><<EOF>>                    { RETURN(SCRIPT_CONTINUE) }
+<sqstr>{UNICODE_ESCAPE_SEQUENCE}              |
+<sqstr>{HEX_ESCAPE_SEQUENCE}                  { escaped_unicode(); }
+<sqstr>{ESCAPED_CODE_POINT}                   { escaped_code_point(); }
+<unesc_sqstr>{UNICODE_ESCAPE_SEQUENCE}        { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); }
+<unesc_sqstr>{HEX_ESCAPE_SEQUENCE}            { set_encoding(IS_XBACKSLASH); escaped_unicode(); }
+<unesc_sqstr>{ESCAPED_CODE_POINT}             { set_encoding(IS_UCODEPOINT); escaped_code_point(); }
+<unesc_sqstr>{BYTE_ESCAPE_SEQUENCE}           { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); }
+<unesc_sqstr>{PERCENT_ESCAPE_SEQUENCE}        { set_encoding(IS_UPERCENT); escaped_unicode(); }
+<unesc_sqstr>{URL_ESCAPE_SEQUENCE}            { set_encoding(IS_PERCENT); escaped_url_sequence(); }
+
+    {LITERAL_TEMPLATE_START}                                 { EXEC(literal_template_start()) }
+<tmpll,unesc_tmpll>(\\\\)*{LITERAL_TEMPLATE_END}             { dealias_append(); ECHO; BEGIN(divop); }
+<tmpll,unesc_tmpll>(\\\\)*{LITERAL_TEMPLATE_SUBST_START}     { EXEC(process_subst_open()) dealias_reset(); }
+<tmpll,unesc_tmpll>{HTML_TAG_SCRIPT_CLOSE}                   { BEGIN(regst); RETURN(CLOSING_TAG) }
+<tmpll,unesc_tmpll>(\\\\)*\\{LITERAL_TEMPLATE_SUBST_START}   | /* escaped template substitution */
+<tmpll,unesc_tmpll>(\\\\)*\\{LITERAL_TEMPLATE_END}           | /* escaped backtick */
+<tmpll,unesc_tmpll>{LITERAL_TEMPLATE_OTHER}                  { dealias_append(); ECHO; }
+<tmpll,unesc_tmpll><<EOF>>                                   { RETURN(SCRIPT_CONTINUE) }
+<tmpll>{UNICODE_ESCAPE_SEQUENCE}                             |
+<tmpll>{HEX_ESCAPE_SEQUENCE}                                 { escaped_unicode(); }
+<tmpll>{ESCAPED_CODE_POINT}                                  { escaped_code_point(); }
+<unesc_tmpll>{UNICODE_ESCAPE_SEQUENCE}                       { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); }
+<unesc_tmpll>{HEX_ESCAPE_SEQUENCE}                           { set_encoding(IS_XBACKSLASH); escaped_unicode(); }
+<unesc_tmpll>{ESCAPED_CODE_POINT}                            { set_encoding(IS_UCODEPOINT); escaped_code_point(); }
+<unesc_tmpll>{BYTE_ESCAPE_SEQUENCE}                          { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); }
+<unesc_tmpll>{PERCENT_ESCAPE_SEQUENCE}                       { set_encoding(IS_UPERCENT); escaped_unicode(); }
+<unesc_tmpll>{URL_ESCAPE_SEQUENCE}                           { set_encoding(IS_PERCENT); escaped_url_sequence(); }
 
 <regst>{LITERAL_REGEX_START}        { EXEC(literal_regex_start()) }
 <regex>{LITERAL_REGEX_END}          { ECHO; BEGIN(divop); }
@@ -1080,17 +1154,19 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <regex>\\{CR}                       |
 <regex>{LINE_TERMINATORS}           { BEGIN(regst); RETURN(BAD_TOKEN) }
 <regex>[^<{LF}{CR}{LS}{PS}\\\/]+    { ECHO; }
+<regex>{UNICODE_ESCAPE_SEQUENCE}    |
+<regex>{HEX_ESCAPE_SEQUENCE}        { escaped_unicode(); }
 <regex><<EOF>>                      { RETURN(SCRIPT_CONTINUE) }
 
 <divop>{DIV_OPERATOR}               |
 <divop>{DIV_ASSIGNMENT_OPERATOR}    { div_assignment_operator(); }
 
-{OPEN_BRACE}                        { EXEC(open_brace()) }
-{CLOSE_BRACE}                       { EXEC(close_brace()) }
-{OPEN_PARENTHESIS}                  { EXEC(open_parenthesis()) }
-{CLOSE_PARENTHESIS}                 { EXEC(close_parenthesis()) }
-{OPEN_BRACKET}                      { EXEC(open_bracket()) }
-{CLOSE_BRACKET}                     { EXEC(close_bracket()) }
+{OPEN_BRACE}                                          { EXEC(open_brace()) }
+{CLOSE_BRACE}                                         { EXEC(close_brace()) }
+{OPEN_PARENTHESIS}                                    { EXEC(open_parenthesis()) }
+<INITIAL,divop,regst,char_code>{CLOSE_PARENTHESIS}    { EXEC(close_parenthesis()) }
+{OPEN_BRACKET}                                        { EXEC(open_bracket()) }
+{CLOSE_BRACKET}                                       { EXEC(close_bracket()) }
 
 {PUNCTUATOR_PREFIX}                 { EXEC(punctuator_prefix()) }
 {DOT_ACCESSOR}                      { dot_accessor(); }
@@ -1101,7 +1177,9 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 {OPERATOR_COMPLEX_ASSIGNMENT}       { operator_complex_assignment(); }
 {OPERATOR_LOGICAL}                  { operator_logical(); }
 {OPERATOR_SHIFT}                    { operator_shift(); }
+
 {PUNCTUATOR_COMMA}                  { punctuator_comma(); }
+<char_code>{PUNCTUATOR_COMMA}       { /* skip */ }
 
 {USE_STRICT_DIRECTIVE}              { EXEC(use_strict_directive()) }
 {USE_STRICT_DIRECTIVE_SC}           { EXEC(use_strict_directive_sc()) }
@@ -1130,8 +1208,13 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 {LITERAL}                           { EXEC(general_literal()) }
 {IDENTIFIER}                        { EXEC(general_identifier()) }
 
+<char_code>{LITERAL_INTEGER}       { set_encoding(IS_DEC); dec_code_point(); }
+<char_code>{LITERAL_HEX_INTEGER}   { set_encoding(IS_HEX); hex_code_point(); }
+
 .|{ALL_UNICODE}                     { general_unicode(); }
-<<EOF>>                             { EEOF(eval_eof()) }
+
+<char_code>.|{ALL_UNICODE}                { char_code_no_match(); }
+<INITIAL,divop,regst,char_code><<EOF>>    { EEOF(eval_eof()) }
 
 %%
 
@@ -1154,6 +1237,8 @@ static std::string unicode_to_utf8(const unsigned int code)
         res += 0x80 | ((code >> 6) & 0x3f);
         res += 0x80 | (code & 0x3f);
     }
+    else
+        res += "\uffff";
 
     return res;
 }
@@ -1167,6 +1252,7 @@ static std::string unescape_unicode(const char* lexeme)
 
     bool is_unescape = false;
     bool is_unicode = false;
+    bool is_code_point = false;
     short digits_left = 4;
     std::string unicode_str;
 
@@ -1188,6 +1274,13 @@ static std::string unescape_unicode(const char* lexeme)
             is_unescape = false;
         }
 
+        if (is_unicode and ch == '{')
+        {
+            is_unicode = false;
+            is_code_point = true;
+            continue;
+        }
+
         if (is_unicode)
         {
             unicode_str += ch;
@@ -1203,6 +1296,22 @@ static std::string unescape_unicode(const char* lexeme)
             continue;
         }
 
+        if (is_code_point)
+        {
+            if (ch == '}')
+            {
+                const unsigned int code_point = std::stoi(unicode_str, nullptr, 16);
+                res += unicode_to_utf8(code_point);
+
+                unicode_str = "";
+                is_code_point = false;
+            }
+            else
+                unicode_str += ch;
+
+            continue;
+        }
+
         res += ch;
     }
 
@@ -1471,6 +1580,12 @@ void JSTokenizer::process_punctuator(JSToken tok)
     BEGIN(regst);
 }
 
+void JSTokenizer::skip_punctuator()
+{
+    token = PUNCTUATOR;
+    BEGIN(regst);
+}
+
 void JSTokenizer::process_closing_brace()
 {
     if (!brace_depth.empty())
@@ -1782,6 +1897,10 @@ JSTokenizer::FuncType JSTokenizer::detect_func_type()
     case IDENTIFIER:
     {
         FuncType ret = FuncType::GENERAL;
+
+        if (meta_type() == ScopeMetaType::FUNCTION)
+            return ret;
+
         if (ignored_id_pos >= 0)
         {
             std::streambuf* pbuf = yyout.rdbuf();
@@ -1801,6 +1920,8 @@ JSTokenizer::FuncType JSTokenizer::detect_func_type()
                     memcmp(tail, id.identifier.data(), size) == 0)
                 {
                     ret = id.type;
+                    pbuf->pubseekoff(-size, yyout.cur, yyout.out);
+
                     break;
                 }
             }
@@ -1826,11 +1947,21 @@ void JSTokenizer::check_function_nesting(JSTokenizer::FuncType type)
     }
 }
 
+void JSTokenizer::check_mixed_encoding(uint32_t flags)
+{
+    mixed_encoding_seen = (flags != (flags & -flags));
+}
+
 bool JSTokenizer::is_unescape_nesting_seen() const
 {
     return unescape_nest_seen;
 }
 
+bool JSTokenizer::is_mixed_encoding_seen() const
+{
+    return mixed_encoding_seen;
+}
+
 void JSTokenizer::set_block_param(bool f)
 {
     scope_cur().block_param = f;
@@ -2031,6 +2162,22 @@ JSTokenizer::JSRet JSTokenizer::literal_dq_string_start()
     ECHO;
     BEGIN(dqstr);
     set_ident_norm(true);
+
+    switch (func_call_type())
+    {
+    case FuncType::UNESCAPE:
+        BEGIN(unesc_dqstr);
+        break;
+    case FuncType::NOT_FUNC:
+    case FuncType::GENERAL:
+    case FuncType::CHAR_CODE:
+        BEGIN(dqstr);
+        break;
+    default:
+        assert(false);
+        return BAD_TOKEN;
+    }
+
     return EOS;
 }
 
@@ -2040,8 +2187,23 @@ JSTokenizer::JSRet JSTokenizer::literal_sq_string_start()
     EXEC(do_semicolon_insertion(ASI_GROUP_7))
     EXEC(do_spacing(LITERAL))
     ECHO;
-    BEGIN(sqstr);
     set_ident_norm(true);
+
+    switch (func_call_type())
+    {
+    case FuncType::UNESCAPE:
+        BEGIN(unesc_sqstr);
+        break;
+    case FuncType::NOT_FUNC:
+    case FuncType::GENERAL:
+    case FuncType::CHAR_CODE:
+        BEGIN(sqstr);
+        break;
+    default:
+        assert(false);
+        return BAD_TOKEN;
+    }
+
     return EOS;
 }
 
@@ -2051,8 +2213,23 @@ JSTokenizer::JSRet JSTokenizer::literal_template_start()
     EXEC(do_semicolon_insertion(ASI_GROUP_7))
     EXEC(do_spacing(LITERAL))
     ECHO;
-    BEGIN(tmpll);
     set_ident_norm(true);
+
+    switch (func_call_type())
+    {
+    case FuncType::UNESCAPE:
+        BEGIN(unesc_tmpll);
+        break;
+    case FuncType::NOT_FUNC:
+    case FuncType::GENERAL:
+    case FuncType::CHAR_CODE:
+        BEGIN(tmpll);
+        break;
+    default:
+        assert(false);
+        return BAD_TOKEN;
+    }
+
     return EOS;
 }
 
@@ -2121,7 +2298,27 @@ JSTokenizer::JSRet JSTokenizer::open_parenthesis()
     check_function_nesting(f_call);
     EXEC(scope_push(PARENTHESES))
     set_func_call_type(f_call);
-    process_punctuator();
+
+    switch (f_call)
+    {
+    case FuncType::CHAR_CODE:
+        token = LITERAL;
+        BEGIN(char_code);
+        set_char_code_str(true);
+        yyout << '\'';
+        break;
+    case FuncType::UNESCAPE:
+        skip_punctuator();
+        break;
+    case FuncType::NOT_FUNC:
+    case FuncType::GENERAL:
+        process_punctuator();
+        break;
+    default:
+        assert(false);
+        return BAD_TOKEN;
+    }
+
     return EOS;
 }
 
@@ -2129,11 +2326,16 @@ JSTokenizer::JSRet JSTokenizer::close_parenthesis()
 {
     dealias_clear_mutated(false);
     dealias_reset();
+
     FuncType f_call = func_call_type();
+    uint32_t flags = encoding();
+    bool ch_code_str = char_code_str();
     bool id_norm = ident_norm();
+
     if (meta_type() != ScopeMetaType::NOT_SET)
         EXEC(p_scope_pop(meta_type()))
     EXEC(scope_pop(PARENTHESES))
+
     if (f_call == FuncType::NOT_FUNC)
         set_ident_norm(id_norm);
     if (block_param())
@@ -2145,7 +2347,26 @@ JSTokenizer::JSRet JSTokenizer::close_parenthesis()
     {
         EXEC(do_semicolon_insertion(ASI_GROUP_5))
     }
-    ECHO;
+
+    switch (f_call)
+    {
+    case FuncType::NOT_FUNC:
+    case FuncType::GENERAL:
+        ECHO;
+        break;
+    case FuncType::UNESCAPE:
+        check_mixed_encoding(flags);
+        break;
+    case FuncType::CHAR_CODE:
+        check_mixed_encoding(flags);
+        if (ch_code_str)
+            yyout << '\'';
+        break;
+    default:
+        assert(false);
+        return BAD_TOKEN;
+    }
+
     token = PUNCTUATOR;
     BEGIN(divop);
     return EOS;
@@ -2522,10 +2743,54 @@ void JSTokenizer::general_unicode()
     set_ident_norm(true);
 }
 
+void JSTokenizer::escaped_unicode()
+{
+    // truncate escape symbol, get hex number only
+    std::string code(YYText() + 2);
+    yyout << unicode_to_utf8(std::stoi(code, nullptr, 16));
+}
+
+void JSTokenizer::escaped_code_point()
+{
+    // truncate escape symbols, get hex number only
+    std::string code(YYText() + 3);
+    code.resize(code.size() - 1);
+    yyout << unicode_to_utf8(std::stoi(code, nullptr, 16));
+}
+
+void JSTokenizer::escaped_url_sequence()
+{
+    // truncate escape symbol, get hex number only
+    std::string code(YYText() + 1);
+    yyout << unicode_to_utf8(std::stoi(code, nullptr, 16));
+}
+
+void JSTokenizer::dec_code_point()
+{
+    std::string code(YYText());
+    yyout << unicode_to_utf8(std::stoi(code, nullptr, 10));
+}
+
+void JSTokenizer::hex_code_point()
+{
+    std::string code(YYText());
+    yyout << unicode_to_utf8(std::stoi(code, nullptr, 16));
+}
+
+void JSTokenizer::char_code_no_match()
+{
+    BEGIN(regst);
+    yyout << '\'';
+    set_char_code_str(false);
+    yyless(0);
+    memset((void*)(states + sp), 0, sizeof(states[0]));
+}
+
 JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in)
 {
     yy_flush_buffer(YY_CURRENT_BUFFER);
     unescape_nest_seen = false;
+    mixed_encoding_seen = false;
 
     auto r = yylex();
 
index 18e4c7f19452814fcb6d65456701518b62392385..c382eadc6621f0a0eb8adcefccd73da26697f2db 100644 (file)
@@ -30,6 +30,16 @@ add_catch_test( js_dealias_test
         js_test_utils.cc
 )
 
+add_catch_test( js_unescape_test
+    SOURCES
+        ${FLEX_js_tokenizer_OUTPUTS}
+        ../js_identifier_ctx.cc
+        ../js_normalizer.cc
+        ../streambuf.cc
+        ../util_cstring.cc
+        js_test_utils.cc
+)
+
 add_catch_test( js_identifier_ctx_test
     SOURCES
         ../js_identifier_ctx.cc
index f3887aaed9d7f1e9d5d4314e0d7de8017e9d0116..0c30c01e71f478ab2c66b881edacb2a90410a569 100644 (file)
@@ -479,7 +479,7 @@ static const char clamav_buf12[] =
     "var x='test\\u0000test';";
 
 static const char clamav_expected12[] =
-    "var x='test\\u0000test';";
+    "var x='test\u0000test';";
 
 static const char clamav_buf13[] =
     "var x\\s12345";
@@ -491,7 +491,7 @@ static const char clamav_buf14[] =
     "document.write(unescape('test%20test";
 
 static const char clamav_expected14[] =
-    "document.write(unescape('test%20test";
+    "document.write('test test";
 
 TEST_CASE("clamav tests", "[JSNormalizer]")
 {
@@ -2314,8 +2314,8 @@ TEST_CASE("split in string literal", "[JSNormalizer]")
         const char dat1[] = "var str =\"any\\";
         const char dat2[] = "u1234tx\";";
         const char exp1[] = "var str=\"any\\";
-        const char exp2[] = "u1234tx\";";
-        const char exp[] = "var str=\"any\\u1234tx\";";
+        const char exp2[] = "\u1234tx\";";
+        const char exp[] = "var str=\"any\u1234tx\";";
 
         NORMALIZE_2(dat1, dat2, exp1, exp2);
         NORM_COMBINED_2(dat1, dat2, exp);
@@ -2325,8 +2325,8 @@ TEST_CASE("split in string literal", "[JSNormalizer]")
         const char dat1[] = "var str =\"any\\u";
         const char dat2[] = "1234tx\";";
         const char exp1[] = "var str=\"any\\u";
-        const char exp2[] = "1234tx\";";
-        const char exp[] = "var str=\"any\\u1234tx\";";
+        const char exp2[] = "\u1234tx\";";
+        const char exp[] = "var str=\"any\u1234tx\";";
 
         NORMALIZE_2(dat1, dat2, exp1, exp2);
         NORM_COMBINED_2(dat1, dat2, exp);
@@ -4245,32 +4245,33 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
         SECTION("in arguments")
         {
             tester.test_function_scopes({
-                {"unescape(", "unescape(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+                {"unescape(", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
             });
         }
         SECTION("separated identifier and call")
         {
             tester.test_function_scopes({
-                {"unescape  /*comment*/  (", "unescape(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+                {"unescape  /*comment*/  (", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
             });
         }
         SECTION("complete call")
         {
             tester.test_function_scopes({
-                {"unescape('%62%61%72')", "unescape('%62%61%72')", {FuncType::NOT_FUNC}}
+                {"unescape('%62%61%72')", "'bar'", {FuncType::NOT_FUNC}}
             });
         }
         SECTION("as named function definition")
         {
             tester.test_function_scopes({
-                {"unescape(){", "unescape(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
+                {"function unescape(){", "function unescape(){",
+                {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
             });
         }
         SECTION("after assignment substitution")
         {
             tester.test_function_scopes({
-                {"var a = unescape; a(", "var var_0000=unescape;unescape(", {FuncType::NOT_FUNC,
-                                                                             FuncType::UNESCAPE}}
+                {"var a = unescape; a(", "var var_0000=unescape;",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
             });
         }
         SECTION("literal")
@@ -4282,7 +4283,7 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
         SECTION("as a template literal substitution")
         {
             tester.test_function_scopes({
-                {"`literal ${unescape(", "`literal ${unescape(",
+                {"`literal ${unescape(", "`literal ${",
                 {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
             });
         }
@@ -4292,33 +4293,34 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
         SECTION("in arguments")
         {
             tester.test_function_scopes({
-                {"decodeURI(", "decodeURI(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+                {"decodeURI(", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
             });
         }
         SECTION("separated identifier and call")
         {
             tester.test_function_scopes({
-                {"decodeURI  /*comment*/  (", "decodeURI(", {FuncType::NOT_FUNC,
-                                                             FuncType::UNESCAPE}}
+                {"decodeURI  /*comment*/  (", "",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
             });
         }
         SECTION("complete call")
         {
             tester.test_function_scopes({
-                {"decodeURI('%62%61%72')", "decodeURI('%62%61%72')", {FuncType::NOT_FUNC}}
+                {"decodeURI('%62%61%72')", "'bar'", {FuncType::NOT_FUNC}}
             });
         }
         SECTION("as named function definition")
         {
             tester.test_function_scopes({
-                {"decodeURI(){", "decodeURI(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
+                {"function decodeURI(){", "function decodeURI(){",
+                {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
             });
         }
         SECTION("after assignment substitution")
         {
             tester.test_function_scopes({
-                {"var a = decodeURI; a(", "var var_0000=decodeURI;decodeURI(", {FuncType::NOT_FUNC,
-                                                                                FuncType::UNESCAPE}}
+                {"var a = decodeURI; a(", "var var_0000=decodeURI;",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
             });
         }
         SECTION("literal")
@@ -4330,7 +4332,7 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
         SECTION("as a template literal substitution")
         {
             tester.test_function_scopes({
-                {"`literal ${decodeURI(", "`literal ${decodeURI(",
+                {"`literal ${decodeURI(", "`literal ${",
                 {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
             });
         }
@@ -4340,37 +4342,36 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
         SECTION("in arguments")
         {
             tester.test_function_scopes({
-                {"decodeURIComponent(", "decodeURIComponent(", {FuncType::NOT_FUNC,
-                                                                FuncType::UNESCAPE}}
+                {"decodeURIComponent(", "",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
             });
         }
         SECTION("separated identifier and call")
         {
             tester.test_function_scopes({
-                {"decodeURIComponent  /*comment*/  (", "decodeURIComponent(", {FuncType::NOT_FUNC,
-                                                                               FuncType::UNESCAPE}}
+                {"decodeURIComponent  /*comment*/  (", "",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
             });
         }
         SECTION("complete call")
         {
             tester.test_function_scopes({
-                {"decodeURIComponent('%62%61%72')", "decodeURIComponent('%62%61%72')",
+                {"decodeURIComponent('%62%61%72')", "'bar'",
                 {FuncType::NOT_FUNC}}
             });
         }
         SECTION("as named function definition")
         {
             tester.test_function_scopes({
-                {"decodeURIComponent(){", "decodeURIComponent(){", {FuncType::NOT_FUNC,
-                                                                    FuncType::NOT_FUNC}}
+                {"function decodeURIComponent(){", "function decodeURIComponent(){",
+                {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
             });
         }
         SECTION("after assignment substitution")
         {
             tester.test_function_scopes({
-                {"var a = decodeURIComponent; a(",
-                "var var_0000=decodeURIComponent;decodeURIComponent(", {FuncType::NOT_FUNC,
-                                                                         FuncType::UNESCAPE}}
+                {"var a = decodeURIComponent; a(", "var var_0000=decodeURIComponent;",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
             });
         }
         SECTION("literal")
@@ -4382,8 +4383,8 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
         SECTION("as a template literal substitution")
         {
             tester.test_function_scopes({
-                {"`literal ${decodeURIComponent(", "`literal ${decodeURIComponent(",
-                 {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+                {"`literal ${decodeURIComponent(", "`literal ${",
+                {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
             });
         }
     }
@@ -4392,35 +4393,35 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
         SECTION("in arguments")
         {
             tester.test_function_scopes({
-                {"String.fromCharCode(", "String.fromCharCode(",
+                {"String.fromCharCode(", "'",
                 {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
             });
         }
         SECTION("separated identifier and call")
         {
             tester.test_function_scopes({
-                {"String.fromCharCode  /*comment*/  (", "String.fromCharCode(",
+                {"String.fromCharCode  /*comment*/  (", "'",
                 {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
             });
         }
         SECTION("complete call")
         {
             tester.test_function_scopes({
-                {"String.fromCharCode( 65, 0x42 )", "String.fromCharCode(65,0x42)",
+                {"String.fromCharCode( 65, 0x42 )", "'AB'",
                 {FuncType::NOT_FUNC}}
             });
         }
         SECTION("as named function definition")
         {
             tester.test_function_scopes({
-                {"String.fromCharCode(){", "String.fromCharCode(){",
+                {"function String.fromCharCode(){", "function String.fromCharCode(){",
                 {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
             });
         }
         SECTION("after class name assignment substitution")
         {
             tester.test_function_scopes({
-                {"var a = String; a.fromCharCode(", "var var_0000=String;String.fromCharCode(",
+                {"var a = String; a.fromCharCode(", "var var_0000=String;'",
                 {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
             });
         }
@@ -4428,7 +4429,7 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
         {
             tester.test_function_scopes({
                 {"var a = String.fromCharCode; a(",
-                "var var_0000=String.fromCharCode;String.fromCharCode(",
+                "var var_0000=String.fromCharCode;'",
                 {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
             });
         }
@@ -4449,7 +4450,7 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
         SECTION("as a template literal substitution")
         {
             tester.test_function_scopes({
-                {"`literal ${String.fromCharCode(", "`literal ${String.fromCharCode(",
+                {"`literal ${String.fromCharCode(", "`literal ${'",
                 {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
             });
         }
@@ -4476,7 +4477,7 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]")
         SECTION("Multiple unescape functions")
         {
             tester.test_function_scopes({
-                {"unescape( unescape( unescape(", "unescape(unescape(unescape(",
+                {"unescape( unescape( unescape(", "",
                 {FuncType::NOT_FUNC, FuncType::UNESCAPE, FuncType::UNESCAPE, FuncType::UNESCAPE}}
             });
             CHECK(tester.is_unescape_nesting_seen());
@@ -4484,31 +4485,24 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]")
         SECTION("Multiple different unescape functions")
         {
             tester.test_function_scopes({
-                {"unescape( decodeURI( decodeURIComponent(",
-                "unescape(decodeURI(decodeURIComponent(", {FuncType::NOT_FUNC,
-                                                           FuncType::UNESCAPE,
-                                                           FuncType::UNESCAPE,
-                                                           FuncType::UNESCAPE}}
+                {"unescape( decodeURI( decodeURIComponent(", "",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE, FuncType::UNESCAPE, FuncType::UNESCAPE}}
             });
             CHECK(tester.is_unescape_nesting_seen());
         }
         SECTION("Multiple String.fromCharCode functions")
         {
             tester.test_function_scopes({
-                {"String.fromCharCode( String.fromCharCode( String.fromCharCode(",
-                "String.fromCharCode(String.fromCharCode(String.fromCharCode(",
-                {FuncType::NOT_FUNC, FuncType::CHAR_CODE, FuncType::CHAR_CODE,
-                FuncType::CHAR_CODE}}
+                {"String.fromCharCode( String.fromCharCode( String.fromCharCode(", "'' '' '",
+                {FuncType::NOT_FUNC, FuncType::CHAR_CODE, FuncType::CHAR_CODE, FuncType::CHAR_CODE}}
             });
             CHECK(!tester.is_unescape_nesting_seen());
         }
         SECTION("Mixed function calls")
         {
             tester.test_function_scopes({
-                {"general( unescape( String.fromCharCode(",
-                "var_0000(unescape(String.fromCharCode(",
-                {FuncType::NOT_FUNC, FuncType::GENERAL, FuncType::UNESCAPE,
-                FuncType::CHAR_CODE}}
+                {"general( unescape( String.fromCharCode(", "var_0000('",
+                {FuncType::NOT_FUNC, FuncType::GENERAL, FuncType::UNESCAPE, FuncType::CHAR_CODE}}
             });
             CHECK(!tester.is_unescape_nesting_seen());
         }
@@ -4525,16 +4519,14 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]")
         SECTION("Multiple unescape functions")
         {
             tester.test_function_scopes({
-                {"unescape( unescape( unescape( '%62%61%72' ) )",
-                "unescape(unescape(unescape('%62%61%72'))", {FuncType::NOT_FUNC,
-                                                             FuncType::UNESCAPE }}
+                {"unescape( unescape( unescape( '%62%61%72' ) )", "'bar'",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE }}
             });
         }
         SECTION("Multiple different unescape functions")
         {
             tester.test_function_scopes({
-                {"unescape( decodeURI( decodeURIComponent( '%62%61%72' ) )",
-                "unescape(decodeURI(decodeURIComponent('%62%61%72'))",
+                {"unescape( decodeURI( decodeURIComponent( '%62%61%72' ) )", "'bar'",
                 {FuncType::NOT_FUNC, FuncType::UNESCAPE }}
             });
         }
@@ -4542,7 +4534,7 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]")
         {
             tester.test_function_scopes({
                 {"String.fromCharCode( String.fromCharCode( String.fromCharCode( 65, 0x42 ) )",
-                "String.fromCharCode(String.fromCharCode(String.fromCharCode(65,0x42))",
+                "'' '' 'AB'",
                 {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
             });
         }
@@ -4550,8 +4542,8 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]")
         {
             tester.test_function_scopes({
                 {"general( unescape( String.fromCharCode( 65, 0x42 ) )",
-                "var_0000(unescape(String.fromCharCode(65,0x42))", {FuncType::NOT_FUNC,
-                                                                    FuncType::GENERAL}}
+                "var_0000('AB'",
+                {FuncType::NOT_FUNC, FuncType::GENERAL}}
             });
         }
     }
@@ -4569,18 +4561,18 @@ TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]")
         tester.test_function_scopes({
             {"un",          "var_0000",     {FuncType::NOT_FUNC}},
             {"escape",      "unescape",     {FuncType::NOT_FUNC}},
-            {"(",           "unescape(",    {FuncType::NOT_FUNC,
+            {"(",           "",             {FuncType::NOT_FUNC,
                                              FuncType::UNESCAPE}},
-            {")",           "unescape()",   {FuncType::NOT_FUNC}},
+            {")",           "",             {FuncType::NOT_FUNC}},
         });
     }
     SECTION("split between identifier and parenthesis")
     {
         tester.test_function_scopes({
             {"decodeURI",   "decodeURI",    {FuncType::NOT_FUNC}},
-            {"(",           "decodeURI(",   {FuncType::NOT_FUNC,
+            {"(",           "",             {FuncType::NOT_FUNC,
                                              FuncType::UNESCAPE}},
-            {")",           "decodeURI()",  {FuncType::NOT_FUNC}},
+            {")",           "",             {FuncType::NOT_FUNC}},
         });
     }
     SECTION("comment between identifier and parenthesis")
@@ -4588,9 +4580,9 @@ TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]")
         tester.test_function_scopes({
             {"unescape",                "unescape",     {FuncType::NOT_FUNC}},
             {"//String.fromCharCode\n", "unescape",     {FuncType::NOT_FUNC}},
-            {"(",                       "unescape(",    {FuncType::NOT_FUNC,
+            {"(",                       "",             {FuncType::NOT_FUNC,
                                                          FuncType::UNESCAPE}},
-            {")",                       "unescape()",   {FuncType::NOT_FUNC}},
+            {")",                       "",             {FuncType::NOT_FUNC}},
         });
     }
     SECTION("split in arguments")
@@ -4611,13 +4603,13 @@ TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]")
         tester.test_function_scopes({
             {"String",          "String",                               {FuncType::NOT_FUNC}},
             {".fromCharCode",   "String.fromCharCode",                  {FuncType::NOT_FUNC}},
-            {"(`",              "String.fromCharCode(`",                {FuncType::NOT_FUNC,
+            {"(`",              "'' `",                                 {FuncType::NOT_FUNC,
                                                                          FuncType::CHAR_CODE}},
-            {"un",              "String.fromCharCode(`un",              {FuncType::NOT_FUNC,
+            {"un",              "'' `un",                               {FuncType::NOT_FUNC,
                                                                          FuncType::CHAR_CODE}},
-            {"escape(",         "String.fromCharCode(`unescape(",       {FuncType::NOT_FUNC,
+            {"escape(",         "'' `unescape(",                        {FuncType::NOT_FUNC,
                                                                          FuncType::CHAR_CODE}},
-            {"`)",              "String.fromCharCode(`unescape(`)",     {FuncType::NOT_FUNC}},
+            {"`)",              "'' `unescape(`",                       {FuncType::NOT_FUNC}},
         });
     }
     SECTION("Nesting - Mixed function calls")
@@ -4626,27 +4618,26 @@ TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]")
             {"decode",                      "var_0000",                 {FuncType::NOT_FUNC}},
             {"URI",                         "decodeURI",                {FuncType::NOT_FUNC}},
             {"Component",                   "decodeURIComponent",       {FuncType::NOT_FUNC}},
-            {"(",                           "decodeURIComponent(",      {FuncType::NOT_FUNC,
+            {"(",                           "",                         {FuncType::NOT_FUNC,
                                                                          FuncType::UNESCAPE}},
-            {" a, ",                        "decodeURIComponent(var_0001,",
+            {" a, ",                        "var_0001,",
                                                                         {FuncType::NOT_FUNC,
                                                                          FuncType::UNESCAPE}},
-            {" String.fromCharCode( ar",
-            "decodeURIComponent(var_0001,String.fromCharCode(var_0002",
+            {" String.fromCharCode( ar",    "var_0001,'' var_0002",
                                                                         {FuncType::NOT_FUNC,
                                                                          FuncType::UNESCAPE,
                                                                          FuncType::CHAR_CODE}},
-            {"g ), b, foo",
-            "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0005",
+
+            {"g ), b, foo",                 "var_0001,'' var_0003,var_0004,var_0005",
                                                                         {FuncType::NOT_FUNC,
                                                                          FuncType::UNESCAPE}},
-            {"bar( ",
-            "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0006(",
+
+            {"bar( ",                       "var_0001,'' var_0003,var_0004,var_0006(",
                                                                         {FuncType::NOT_FUNC,
                                                                          FuncType::UNESCAPE,
                                                                          FuncType::GENERAL}},
-            {"))",
-            "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0006())",
+
+            {"))",                          "var_0001,'' var_0003,var_0004,var_0006()",
                                                                         {FuncType::NOT_FUNC}}
         });
     }
@@ -4879,4 +4870,31 @@ TEST_CASE("JS Normalizer, automatic semicolon", "[JSNormalizer]")
         return normalizer_wo_ident.normalize(src_wo_semicolons, src_len);
     };
 }
+
+TEST_CASE("JS Normalizer, unescape", "[JSNormalizer]")
+{
+    auto str_unescape = make_input("'", "\\u0061", "'", norm_depth);
+    auto f_unescape = make_input_repeat("unescape('')", norm_depth);
+    const char* src_str_unescape = str_unescape.c_str();
+    const char* src_f_unescape = f_unescape.c_str();
+    size_t src_len = norm_depth;
+
+    JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
+    JSNormalizer norm(ident_ctx, unlim_depth, max_template_nesting, norm_depth);
+
+    REQUIRE(norm_ret(norm, str_unescape) == JSTokenizer::SCRIPT_ENDED);
+    BENCHMARK("unescape sequence")
+    {
+        norm.rewind_output();
+        return norm.normalize(src_str_unescape, src_len);
+    };
+
+    REQUIRE(norm_ret(norm, f_unescape) == JSTokenizer::SCRIPT_ENDED);
+    BENCHMARK("unescape function tracking")
+    {
+        norm.rewind_output();
+        return norm.normalize(src_f_unescape, src_len);
+    };
+}
+
 #endif // BENCHMARK_TEST
index 5083dbcba948a8200d5a40dee61123dc2458fd1f..cd871d43d42da8b665c1647e9453e4204f0106b6 100644 (file)
@@ -67,7 +67,7 @@ bool JSTokenizerTester::is_unescape_nesting_seen() const
     return normalizer.is_unescape_nesting_seen();
 }
 
-void test_scope(const char* context, std::list<JSProgramScopeType> stack)
+void test_scope(const char* context, const std::list<JSProgramScopeType>& stack)
 {
     std::string buf(context);
     buf += "</script>";
@@ -96,6 +96,17 @@ void test_normalization_bad(const char* source, const char* expected, JSTokenize
     CHECK(result_buf == expected);
 }
 
+void test_normalization_mixed_encoding(const char* source, const char* expected)
+{
+    JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
+    JSNormalizer normalizer(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
+    auto ret = normalizer.normalize(source, strlen(source));
+    std::string result_buf(normalizer.get_script(), normalizer.script_size());
+    CHECK(ret == JSTokenizer::JSRet::SCRIPT_CONTINUE);
+    CHECK(normalizer.is_mixed_encoding_seen());
+    CHECK(result_buf == expected);
+}
+
 void test_normalization(const std::vector<PduCase>& pdus)
 {
     JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
@@ -111,7 +122,7 @@ void test_normalization(const std::vector<PduCase>& pdus)
     }
 }
 
-void test_normalization(std::list<ScopedPduCase> pdus)
+void test_normalization(const std::list<ScopedPduCase>& pdus)
 {
     JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
     JSNormalizer normalizer(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
index 10f5b0a20ea56e6da2a8c20e16ac1f1f22b9ba56..269fabbb627de19abfefa809362432bed2d93d0c 100644 (file)
@@ -83,14 +83,15 @@ private:
     snort::JSNormalizer normalizer;
 };
 
-void test_scope(const char* context, std::list<JSProgramScopeType> stack);
+void test_scope(const char* context, const std::list<JSProgramScopeType>& stack);
 void test_normalization(const char* source, const char* expected);
 void test_normalization_bad(const char* source, const char* expected, JSTokenizer::JSRet eret);
+void test_normalization_mixed_encoding(const char* source, const char* expected);
 typedef std::pair<const char*, const char*> PduCase;
 // source, expected for a single PDU
 void test_normalization(const std::vector<PduCase>& pdus);
 typedef std::tuple<const char*,const char*, std::list<JSProgramScopeType>> ScopedPduCase;
 // source, expected, and current scope type stack for a single PDU
-void test_normalization(std::list<ScopedPduCase> pdus);
+void test_normalization(const std::list<ScopedPduCase>& pdus);
 
 #endif // JS_TEST_UTILS_H
diff --git a/src/utils/test/js_unescape_test.cc b/src/utils/test/js_unescape_test.cc
new file mode 100644 (file)
index 0000000..6736935
--- /dev/null
@@ -0,0 +1,1144 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_unescape_test.cc author Volodymyr Horban <vhorban@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "catch/catch.hpp"
+
+#include "utils/js_identifier_ctx.h"
+#include "utils/js_normalizer.h"
+
+#include "js_test_utils.h"
+
+#ifdef CATCH_TEST_BUILD
+
+TEST_CASE("Sequence parsing", "[JSNormalizer]")
+{
+    SECTION("\\xXX")
+    {
+        test_normalization(
+            "'\\x01'",
+            "'\u0001'"
+        );
+        test_normalization(
+            "'\\x23'",
+            "'\u0023'"
+        );
+        test_normalization(
+            "'\\x45'",
+            "'\u0045'"
+        );
+        test_normalization(
+            "'\\x67'",
+            "'\u0067'"
+        );
+        test_normalization(
+            "'\\x89'",
+            "'\u0089'"
+        );
+        test_normalization(
+            "'\\xaA'",
+            "'\u00aA'"
+        );
+        test_normalization(
+            "'\\xbB'",
+            "'\u00bB'"
+        );
+        test_normalization(
+            "'\\xcC'",
+            "'\u00cC'"
+        );
+        test_normalization(
+            "'\\xdD'",
+            "'\u00dD'"
+        );
+        test_normalization(
+            "'\\xeE'",
+            "'\u00eE'"
+        );
+        test_normalization(
+            "'\\xfF'",
+            "'\u00fF'"
+        );
+    }
+
+    SECTION("\\uXXXX")
+    {
+        test_normalization(
+            "'\\u0123'",
+            "'\u0123'"
+        );
+        test_normalization(
+            "'\\u4567'",
+            "'\u4567'"
+        );
+        test_normalization(
+            "'\\u89aA'",
+            "'\u89aA'"
+        );
+        test_normalization(
+            "'\\ubBcC'",
+            "'\ubBcC'"
+        );
+        test_normalization(
+            "'\\ueEfF'",
+            "'\ueEfF'"
+        );
+    }
+
+    SECTION("\\u{XXXX}")
+    {
+        test_normalization(
+            "'\\u{0123}'",
+            "'\u0123'"
+        );
+        test_normalization(
+            "'\\u{4567}'",
+            "'\u4567'"
+        );
+        test_normalization(
+            "'\\u{89aA}'",
+            "'\u89aA'"
+        );
+        test_normalization(
+            "'\\u{bBcC}'",
+            "'\ubBcC'"
+        );
+        test_normalization(
+            "'\\u{eEfF}'",
+            "'\ueEfF'"
+        );
+    }
+
+    SECTION("%XX")
+    {
+        test_normalization(
+            "unescape('%01')",
+            "'\u0001'"
+        );
+        test_normalization(
+            "unescape('%23')",
+            "'\u0023'"
+        );
+        test_normalization(
+            "unescape('%45')",
+            "'\u0045'"
+        );
+        test_normalization(
+            "unescape('%67')",
+            "'\u0067'"
+        );
+        test_normalization(
+            "unescape('%89')",
+            "'\u0089'"
+        );
+        test_normalization(
+            "unescape('%aA')",
+            "'\u00aA'"
+        );
+        test_normalization(
+            "unescape('%bB')",
+            "'\u00bB'"
+        );
+        test_normalization(
+            "unescape('%cC')",
+            "'\u00cC'"
+        );
+        test_normalization(
+            "unescape('%dD')",
+            "'\u00dD'"
+        );
+        test_normalization(
+            "unescape('%eE')",
+            "'\u00eE'"
+        );
+        test_normalization(
+            "unescape('%fF')",
+            "'\u00fF'"
+        );
+    }
+
+    SECTION("\\uXX")
+    {
+        test_normalization(
+            "unescape('\\u01')",
+            "'\u0001'"
+        );
+        test_normalization(
+            "unescape('%23')",
+            "'\u0023'"
+        );
+        test_normalization(
+            "unescape('\\u45')",
+            "'\u0045'"
+        );
+        test_normalization(
+            "unescape('\\u67')",
+            "'\u0067'"
+        );
+        test_normalization(
+            "unescape('\\u89')",
+            "'\u0089'"
+        );
+        test_normalization(
+            "unescape('\\uaA')",
+            "'\u00aA'"
+        );
+        test_normalization(
+            "unescape('\\ubB')",
+            "'\u00bB'"
+        );
+        test_normalization(
+            "unescape('\\ucC')",
+            "'\u00cC'"
+        );
+        test_normalization(
+            "unescape('\\udD')",
+            "'\u00dD'"
+        );
+        test_normalization(
+            "unescape('\\ueE')",
+            "'\u00eE'"
+        );
+        test_normalization(
+            "unescape('\\ufF')",
+            "'\u00fF'"
+        );
+    }
+
+    SECTION("%uXXXX")
+    {
+        test_normalization(
+            "unescape('%u0123')",
+            "'\u0123'"
+        );
+        test_normalization(
+            "unescape('%u4567')",
+            "'\u4567'"
+        );
+        test_normalization(
+            "unescape('%u89aA')",
+            "'\u89aA'"
+        );
+        test_normalization(
+            "unescape('%ubBcC')",
+            "'\ubBcC'"
+        );
+        test_normalization(
+            "unescape('%ueEfF')",
+            "'\ueEfF'"
+        );
+    }
+
+    SECTION("decimal")
+    {
+        test_normalization(
+            "String.fromCharCode(1)",
+            "'\u0001'"
+        );
+        test_normalization(
+            "String.fromCharCode(12)",
+            "'\u000c'"
+        );
+        test_normalization(
+            "String.fromCharCode(345)",
+            "'\u0159'"
+        );
+        test_normalization(
+            "String.fromCharCode(6789)",
+            "'\u1a85'"
+        );
+        test_normalization(
+            "String.fromCharCode(1000)",
+            "'\u03e8'"
+        );
+        test_normalization(
+            "String.fromCharCode(0001)",
+            "'\x01'"
+        );
+        test_normalization(
+            "String.fromCharCode(65536)",
+            "'\uffff'"
+        );
+    }
+
+    SECTION("hexadecimal")
+    {
+        test_normalization(
+            "String.fromCharCode(0x0001)",
+            "'\u0001'"
+        );
+        test_normalization(
+            "String.fromCharCode(0X0001)",
+            "'\u0001'"
+        );
+        test_normalization(
+            "String.fromCharCode(0x1234)",
+            "'\u1234'"
+        );
+        test_normalization(
+            "String.fromCharCode(0X5678)",
+            "'\u5678'"
+        );
+        test_normalization(
+            "String.fromCharCode(0x9aAb)",
+            "'\u9aAb'"
+        );
+        test_normalization(
+            "String.fromCharCode(0x9aAb)",
+            "'\u9aAb'"
+        );
+        test_normalization(
+            "String.fromCharCode(0xBcCd)",
+            "'\uBcCd'"
+        );
+        test_normalization(
+            "String.fromCharCode(0XeEfF)",
+            "'\ueEfF'"
+        );
+        test_normalization(
+            "String.fromCharCode(0x10000)",
+            "'\uffff'"
+        );
+        test_normalization(
+            "String.fromCharCode(0X10000)",
+            "'\uffff'"
+        );
+    }
+}
+
+TEST_CASE("Universal sequences", "[JSNormalizer]")
+{
+    SECTION("\\uXXXX")
+    {
+        test_normalization(
+            "\\u0065\\u0076\\u0061\\u006C () ;",
+            "eval();"
+        );
+        test_normalization(
+            "'\\u0062\\u0061\\u0072'",
+            "'bar'"
+        );
+        test_normalization(
+            "\"\\u0062\\u0061\\u0072\"",
+            "\"bar\""
+        );
+        test_normalization(
+            "`\\u0062\\u0061\\u0072`",
+            "`bar`"
+        );
+        test_normalization(
+            "/\\u0062\\u0061\\u0072/",
+            "/bar/"
+        );
+    }
+
+    SECTION("\\xXX")
+    {
+        test_normalization(
+            "'\\x62\\x61\\x72'",
+            "'bar'"
+        );
+        test_normalization(
+            "\"\\x62\\x61\\x72\"",
+            "\"bar\""
+        );
+        test_normalization(
+            "`\\x62\\x61\\x72`",
+            "`bar`"
+        );
+        test_normalization(
+            "/\\x62\\x61\\x72/",
+            "/bar/"
+        );
+    }
+
+    SECTION("\\u{XXXX}")
+    {
+        test_normalization(
+            "\\u{0065}\\u{0076}\\u{0061}\\u{006C} () ;",
+            "eval();"
+        );
+        test_normalization(
+            "'\\u{0062}\\u{0061}\\u{0072}'",
+            "'bar'"
+        );
+        test_normalization(
+            "\"\\u{0062}\\u{0061}\\u{0072}\"",
+            "\"bar\""
+        );
+        test_normalization(
+            "`\\u{0062}\\u{0061}\\u{0072}`",
+            "`bar`"
+        );
+    }
+}
+
+TEST_CASE("unescape()", "[JSNormalizer]")
+{
+    SECTION("%XX")
+    {
+        test_normalization(
+            "unescape('%62%61%72')",
+            "'bar'"
+        );
+        test_normalization(
+            "unescape(\"%62%61%72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "unescape(`%62%61%72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("%uXXXX")
+    {
+        test_normalization(
+            "unescape('%u0062%u0061%u0072')",
+            "'bar'"
+        );
+        test_normalization(
+            "unescape(\"%u0062%u0061%u0072\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "unescape(`%u0062%u0061%u0072`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\uXX")
+    {
+        test_normalization(
+            "unescape('\\u62\\u61\\u72')",
+            "'bar'"
+        );
+        test_normalization(
+            "unescape(\"\\u62\\u61\\u72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "unescape(`\\u62\\u61\\u72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\uXXXX")
+    {
+        test_normalization(
+            "unescape('\\u0062\\u0061\\u0072')",
+            "'bar'"
+        );
+        test_normalization(
+            "unescape(\"\\u0062\\u0061\\u0072\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "unescape(`\\u0062\\u0061\\u0072`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\xXX")
+    {
+        test_normalization(
+            "unescape('\\x62\\x61\\x72')",
+            "'bar'"
+        );
+        test_normalization(
+            "unescape(\"\\x62\\x61\\x72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "unescape(`\\x62\\x61\\x72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\u{XXXX}")
+    {
+        test_normalization(
+            "unescape('\\u{0062}\\u{0061}\\u{0072}')",
+            "'bar'"
+        );
+        test_normalization(
+            "unescape(\"\\u{0062}\\u{0061}\\u{0072}\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "unescape(`\\u{0062}\\u{0061}\\u{0072}`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("mixed sequence")
+    {
+        test_normalization_mixed_encoding(
+            "unescape('\\u62%61%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "unescape('\\x62%u0061%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "unescape('\\x62\\u61\\u72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "unescape('%u0062\\u0061%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "unescape('\\u0062\\x61%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "unescape('\\u0062\\u{0061}\\x72')",
+            "'bar'"
+        );
+    }
+}
+
+TEST_CASE("decodeURI()", "[JSNormalizer]")
+{
+    SECTION("%XX")
+    {
+        test_normalization(
+            "decodeURI('%62%61%72')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURI(\"%62%61%72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURI(`%62%61%72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("%uXXXX")
+    {
+        test_normalization(
+            "decodeURI('%u0062%u0061%u0072')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURI(\"%u0062%u0061%u0072\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURI(`%u0062%u0061%u0072`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\uXX")
+    {
+        test_normalization(
+            "decodeURI('\\u62\\u61\\u72')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURI(\"\\u62\\u61\\u72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURI(`\\u62\\u61\\u72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\uXXXX")
+    {
+        test_normalization(
+            "decodeURI('\\u0062\\u0061\\u0072')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURI(\"\\u0062\\u0061\\u0072\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURI(`\\u0062\\u0061\\u0072`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\xXX")
+    {
+        test_normalization(
+            "decodeURI('\\x62\\x61\\x72')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURI(\"\\x62\\x61\\x72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURI(`\\x62\\x61\\x72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\u{XXXX}")
+    {
+        test_normalization(
+            "decodeURI('\\u{0062}\\u{0061}\\u{0072}')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURI(\"\\u{0062}\\u{0061}\\u{0072}\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURI(`\\u{0062}\\u{0061}\\u{0072}`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("mixed sequence")
+    {
+        test_normalization_mixed_encoding(
+            "decodeURI('\\u62%61%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURI('\\x62%u0061%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURI('\\x62\\u61\\u72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURI('%u0062\\u0061%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURI('\\u0062\\x61%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURI('\\u0062\\u{0061}\\x72')",
+            "'bar'"
+        );
+    }
+}
+
+TEST_CASE("decodeURIComponent()", "[JSNormalizer]")
+{
+    SECTION("%XX")
+    {
+        test_normalization(
+            "decodeURIComponent('%62%61%72')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURIComponent(\"%62%61%72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURIComponent(`%62%61%72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("%uXXXX")
+    {
+        test_normalization(
+            "decodeURIComponent('%u0062%u0061%u0072')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURIComponent(\"%u0062%u0061%u0072\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURIComponent(`%u0062%u0061%u0072`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\uXX")
+    {
+        test_normalization(
+            "decodeURIComponent('\\u62\\u61\\u72')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURIComponent(\"\\u62\\u61\\u72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURIComponent(`\\u62\\u61\\u72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\uXXXX")
+    {
+        test_normalization(
+            "decodeURIComponent('\\u0062\\u0061\\u0072')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURIComponent(\"\\u0062\\u0061\\u0072\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURIComponent(`\\u0062\\u0061\\u0072`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\xXX")
+    {
+        test_normalization(
+            "decodeURIComponent('\\x62\\x61\\x72')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURIComponent(\"\\x62\\x61\\x72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURIComponent(`\\x62\\x61\\x72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\u{XXXX}")
+    {
+        test_normalization(
+            "decodeURIComponent('\\u{0062}\\u{0061}\\u{0072}')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURIComponent(\"\\u{0062}\\u{0061}\\u{0072}\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURIComponent(`\\u{0062}\\u{0061}\\u{0072}`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("mixed sequence")
+    {
+        test_normalization_mixed_encoding(
+            "decodeURIComponent('\\u62%61%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURIComponent('\\x62%u0061%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURIComponent('\\x62\\u61\\u72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURIComponent('%u0062\\u0061%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURIComponent('\\u0062\\x61%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURIComponent('\\u0062\\u{0061}\\x72')",
+            "'bar'"
+        );
+    }
+}
+
+TEST_CASE("String.fromCharCode()", "[JSNormalizer]")
+{
+    SECTION("decimal")
+    {
+        test_normalization(
+            "String.fromCharCode(98, 97, 114)",
+            "'bar'"
+        );
+    }
+
+    SECTION("hexadecimal")
+    {
+        test_normalization(
+            "String.fromCharCode(0x62, 0x61, 0x72)",
+            "'bar'"
+        );
+
+        test_normalization(
+            "String.fromCharCode(0x0062, 0x0061, 0x0072)",
+            "'bar'"
+        );
+    }
+
+    SECTION("mixed sequence")
+    {
+        test_normalization_mixed_encoding(
+            "String.fromCharCode(98, 97, 0x72)",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "String.fromCharCode(0x62, 97, 114)",
+            "'bar'"
+        );
+    }
+}
+
+TEST_CASE("Split", "[JSNormalizer]")
+{
+    SECTION("unescape()")
+    {
+        test_normalization({
+            { "unescape(", "" },
+            { ")", "" }
+        });
+
+        test_normalization({
+            { "unescape('%62", "'b" },
+            { "%61%72')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%62%61", "'ba" },
+            { "%72')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%62%61%72", "'bar" },
+            { "')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u", "'%u" },
+            { "0062%u0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u00", "'%u00" },
+            { "62%u0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062", "'b" },
+            { "%u0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062%u", "'b%u" },
+            { "0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062%u00", "'b%u00" },
+            { "61%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062%u0061", "'ba" },
+            { "%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062%u0061%u", "'ba%u" },
+            { "0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062%u0061%u00", "'ba%u00" },
+            { "72')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062%u0061%u0072", "'bar" },
+            { "')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062", "'b" },
+            { "%u0061", "'ba" },
+            { "%u0072')", "'bar'" }
+        });
+    }
+
+    SECTION("decodeURI()")
+    {
+        test_normalization({
+            { "decodeURI(", "" },
+            { ")", "" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u", "'%u" },
+            { "0062%u0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u00", "'%u00" },
+            { "62%u0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u0062%u", "'b%u" },
+            { "0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u0062%u00", "'b%u00" },
+            { "61%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u0062%u0061", "'ba" },
+            { "%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u0062%u0061%u", "'ba%u" },
+            { "0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u0062%u0061%u00", "'ba%u00" },
+            { "72')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u0062%u0061%u0072", "'bar" },
+            { "')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u0062", "'b" },
+            { "%u0061", "'ba" },
+            { "%u0072')", "'bar'" }
+        });
+    }
+
+    SECTION("decodeURIComponent()")
+    {
+        test_normalization({
+            { "decodeURIComponent(", "" },
+            { ")", "" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u", "'%u" },
+            { "0062%u0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u00", "'%u00" },
+            { "62%u0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u0062%u", "'b%u" },
+            { "0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u0062%u00", "'b%u00" },
+            { "61%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u0062%u0061", "'ba" },
+            { "%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u0062%u0061%u", "'ba%u" },
+            { "0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u0062%u0061%u00", "'ba%u00" },
+            { "72')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u0062%u0061%u0072", "'bar" },
+            { "')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u0062", "'b" },
+            { "%u0061", "'ba" },
+            { "%u0072')", "'bar'" }
+        });
+    }
+
+    SECTION("String.fromCharCode()")
+    {
+        test_normalization({
+            { "String.fromCharCode(", "'" },
+            { ")", "''" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(9", "'\u0009" },
+            { "8, 97, 114)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(98,", "'b" },
+            { "97, 114)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(98, 97", "'ba" },
+            { ",114)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(98, 97, 114", "'bar" },
+            { ")", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(0x0062", "'b" },
+            { ",0x0061, 0x0072)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(0x0062, 0x0061", "'ba" },
+            { ", 0x0072)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(0x0062, 0x0061, 0x0072", "'bar" },
+            { ")", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(0x0062,", "'b" },
+            { "0x0061,", "'ba" },
+            { "0x72)",   "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(98,", "'b" },
+            { "97,", "'ba" },
+            { "114)", "'bar'" }
+        });
+    }
+}
+
+TEST_CASE("Mixed input", "[JSNormalizer]")
+{
+    SECTION("string")
+    {
+        test_normalization(
+            "unescape ( ' A   \\x62   B   \\x61   C   \\x72 ' ) ;",
+            "' A   b   B   a   C   r ';"
+        );
+        test_normalization(
+            "unescape ( ' \\x62ar b\\x61r ba\\x72 ' ) ;",
+            "' bar bar bar ';"
+        );
+        test_normalization(
+            "unescape ( '\\x62\\x61\\x72', '\\x62\\x61\\x72' ) ;",
+            "'bar','bar';"
+        );
+        test_normalization(
+            "unescape ( '\\x62\\x61\\x72' + '\\x62\\x61\\x72' ) ;",
+            "'bar'+'bar';"
+        );
+        test_normalization_mixed_encoding(
+            "unescape ( '\\x62\\x61\\x72' + '\\u62\\u61\\u72' ) ;",
+            "'bar'+'bar';"
+        );
+    }
+
+    SECTION("literal")
+    {
+        test_normalization(
+            "unescape ( 2,  '\\x62\\x61\\x72', 2 ) ;",
+            "2,'bar',2;"
+        );
+    }
+
+    SECTION("identifier")
+    {
+        test_normalization(
+            "unescape ( f(\"A\\u20B\\u20C\"), eval(\"\\u66\\u6f\\u6f\"), \"\\u66\\u6f\\u6f\" ) ;",
+            "var_0000(\"A\\u20B\\u20C\"),eval(\"\\u66\\u6f\\u6f\"),\"foo\";"
+        );
+        test_normalization_mixed_encoding(
+            "String.fromCharCode (114, 0x72, eval('123'), 114, 0x72) ;",
+            "'rr' eval('123'),114,0x72;"
+        );
+    }
+
+    SECTION("comment")
+    {
+        test_normalization(
+            "String.fromCharCode(0x62, \n 0x61, // comment \n 0x72) ;",
+            "'bar';"
+        );
+        test_normalization(
+            "String.fromCharCode(0x62, \t 0x61, /* comment */ 0x72) ;",
+            "'bar';"
+        );
+        test_normalization(
+            "String.fromCharCode(0x62, \r 0x61, <!-- HTML comment \r 0x72) ;",
+            "'bar';"
+        );
+    }
+
+    SECTION("nested")
+    {
+        test_normalization(
+            "unescape('\\x62\\x61\\x72'+unescape('\\x62\\x61\\x72')+decodeURI('\\u62\\u61\\u72')) ;",
+            "'bar'+'bar'+'bar';"
+        );
+        test_normalization(
+            "document.write(unescape('%62%61%72')) ;",
+            "document.write('bar');"
+        );
+    }
+}
+
+#endif // CATCH_TEST_BUILD
+