Pull request #3312: JSN: Unescape Text Processing

author Mike Stepanek (mstepane) <mstepane@cisco.com>

Wed, 23 Mar 2022 19:31:39 +0000 (19:31 +0000)

committer Mike Stepanek (mstepane) <mstepane@cisco.com>

Wed, 23 Mar 2022 19:31:39 +0000 (19:31 +0000)
author Mike Stepanek (mstepane) <mstepane@cisco.com>
Wed, 23 Mar 2022 19:31:39 +0000 (19:31 +0000)
committer Mike Stepanek (mstepane) <mstepane@cisco.com>
Wed, 23 Mar 2022 19:31:39 +0000 (19:31 +0000)
diff --git a/doc/user/http_inspect.txt b/doc/user/http_inspect.txt

index 98dec3b9441ab026f1f32349ce75c198c20c9346..2d583e0cdd68fc79078e562f691722f0347878c8 100755 (executable)
--- a/doc/user/http_inspect.txt
+++ b/doc/user/http_inspect.txt
@@ -78,8 +78,9 @@ Normalizer. The Enhanced Normalizer can normalize inline/external scripts.
  It supports scripts over multiple PDUs. It is a stateful JavaScript whitespace
  and identifiers normalizer. All JavaScript identifier names, except those from
  the ignore list, will be substituted with unified names in the following
-format: var_0000 -> var_ffff. Moreover, Normalizer validates the syntax
-concerning ECMA-262 Standard, including scope tracking and restrictions
+format: var_0000 -> var_ffff. The Normalizer tries to expand an escaped text,
+so it will appear in a usual form in the output. Moreover, Normalizer validates
+the syntax concerning ECMA-262 Standard, including scope tracking and restrictions
  for script elements. For more information on how additionally configure
  Enhanced Normalizer check with the following configuration options:
  js_norm_bytes_depth, js_norm_identifier_depth, js_norm_max_tmpl_nest,
diff --git a/src/service_inspectors/http_inspect/dev_notes.txt b/src/service_inspectors/http_inspect/dev_notes.txt

index 5e785eda6aacf48a6e1a2a190126ff2d76094e5e..83c5970be967c5721391a23831a0f7bb06ad71dc 100755 (executable)
--- a/src/service_inspectors/http_inspect/dev_notes.txt
+++ b/src/service_inspectors/http_inspect/dev_notes.txt
@@ -256,11 +256,33 @@ For example:
      a("hello") // will be substituted to 'console.log("hello")'
  
  In addition to the scope tracking, JS Normalizer specifically tracks unicode unescape
-functions(unescape, decodeURI, decodeURIComponent). This allows detection of
+functions(unescape, decodeURI, decodeURIComponent, String.fromCharCode). This allows detection of
  unescape functions nested within other unescape functions, which is a potential
  indicator of a multilevel obfuscation. The definition of a function call depends on
  identifier substitution, so such identifiers must be included in the ignore list in
-order to use this feature.
+order to use this feature. After determining the unescape sequence, it is decoded into the
+corresponding string.
+
+For example:
+
+   unescape('\u0062\u0061\u0072')        -> 'bar'
+   decodeURI('%62%61%72')                -> 'bar'
+   decodeURIComponent('\x62\x61\x72')    -> 'bar'
+   String.fromCharCode(98, 0x0061, 0x72) -> 'bar'
+
+Supported formats follow
+
+   \xXX
+   \uXXXX
+   \u{XXXX}
+   %XX
+   \uXX
+   %uXXXX
+   decimal code point
+   hexadecimal code point
+
+JS Normalizer is able to decode mixed encoding sequences. However, a built-in alert rises
+in such case.
  
  JS Normalizer's syntax parser follows ECMA-262 standard. For various features,
  tracking of variable scope and individual brackets is done in accordance to the standard.
diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc

index d73de30a633b62d8f58356c52a37040eb9b14f30..2fd94b7a0afefacd08c0b5a3fb5c1a56fc26c3fd 100644 (file)
--- a/src/service_inspectors/http_inspect/http_js_norm.cc
+++ b/src/service_inspectors/http_inspect/http_js_norm.cc
@@ -219,6 +219,11 @@ void HttpJsNorm::do_external(const Field& input, Field& output,
              *infractions += INF_JS_UNESCAPE_NEST;
              events->create_event(EVENT_JS_UNESCAPE_NEST);
          }
+        if (js_ctx.is_mixed_encoding_seen())
+        {
+            *infractions += INF_JS_MULTIPLE_ENC;
+            events->create_event(EVENT_JS_MULTIPLE_ENC);
+        }
  
          if (ssn->js_built_in_event)
              break;
@@ -358,6 +363,11 @@ void HttpJsNorm::do_inline(const Field& input, Field& output,
              *infractions += INF_JS_UNESCAPE_NEST;
              events->create_event(EVENT_JS_UNESCAPE_NEST);
          }
+        if (js_ctx.is_mixed_encoding_seen())
+        {
+            *infractions += INF_JS_MULTIPLE_ENC;
+            events->create_event(EVENT_JS_MULTIPLE_ENC);
+        }
  
          script_continue = ret == JSTokenizer::SCRIPT_CONTINUE;
      }
diff --git a/src/utils/js_identifier_ctx.cc b/src/utils/js_identifier_ctx.cc

index 49274c8340f776d7a99c36cc61692be6ea986b1a..e6900c8d79804ffe6088ecba7f62a10bf3605a3d 100644 (file)
--- a/src/utils/js_identifier_ctx.cc
+++ b/src/utils/js_identifier_ctx.cc
@@ -200,7 +200,7 @@ const char* JSIdentifierCtx::alias_lookup(const char* alias) const
  
  // advanced program scope access for testing
  
-#ifdef CATCH_TEST_BUILD
+#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST)
  
  bool JSIdentifierCtx::scope_check(const std::list<JSProgramScopeType>& compare) const
  {
@@ -226,4 +226,4 @@ const std::list<JSProgramScopeType> JSIdentifierCtx::get_types() const
      return return_list;
  }
  
-#endif // CATCH_TEST_BUILD
+#endif // CATCH_TEST_BUILD || BENCHMARK_TEST
diff --git a/src/utils/js_normalizer.h b/src/utils/js_normalizer.h

index 4a694d3c7a4c5096d8cac31de27c6e756e7324f3..8508cd7d46e8a34c06faead827b44008f17af212 100644 (file)
--- a/src/utils/js_normalizer.h
+++ b/src/utils/js_normalizer.h
@@ -61,14 +61,17 @@ public:
      bool is_unescape_nesting_seen() const
      { return tokenizer.is_unescape_nesting_seen(); }
  
-#ifdef CATCH_TEST_BUILD
+    bool is_mixed_encoding_seen() const
+    { return tokenizer.is_mixed_encoding_seen(); }
+
+#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST)
      const char* get_tmp_buf() const
      { return tmp_buf; }
      size_t get_tmp_buf_size() const
      { return tmp_buf_size; }
      const JSTokenizer& get_tokenizer() const
      { return tokenizer; }
-#endif
+#endif // CATCH_TEST_BUILD || BENCHMARK_TEST
  
  #ifdef BENCHMARK_TEST
      void rewind_output()
diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h

index 2dcdc1fe74cc7d8a0d9463b04de623f81e490b78..0747af6d3258c3e6817486729814c09ca9708ba9 100644 (file)
--- a/src/utils/js_tokenizer.h
+++ b/src/utils/js_tokenizer.h
@@ -43,9 +43,10 @@ extern THREAD_LOCAL const snort::Trace* http_trace;
  enum JSProgramScopeType : unsigned int;
  
  class JSIdentifierCtxBase;
-#ifdef CATCH_TEST_BUILD
+#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST)
  class JSTokenizerTester;
-#endif
+#endif // CATCH_TEST_BUILD || BENCHMARK_TEST
+
  class JSTokenizer : public yyFlexLexer
  {
  private:
@@ -98,7 +99,7 @@ private:
      {
          Scope(ScopeType t) :
              type(t), meta_type(ScopeMetaType::NOT_SET), func_call_type(FuncType::NOT_FUNC),
-            ident_norm(true), block_param(false), do_loop(false)
+            ident_norm(true), block_param(false), do_loop(false), encoding(0), char_code_str(false)
          {}
  
          ScopeType type;
@@ -107,6 +108,8 @@ private:
          bool ident_norm;
          bool block_param;
          bool do_loop;
+        uint32_t encoding;
+        bool char_code_str;
      };
  
      enum ASIGroup
@@ -164,6 +167,7 @@ public:
      JSRet process(size_t& bytes_in);
  
      bool is_unescape_nesting_seen() const;
+    bool is_mixed_encoding_seen() const;
  protected:
      [[noreturn]] void LexerError(const char* msg) override
      { snort::FatalError("%s", msg); }
@@ -181,6 +185,7 @@ private:
      JSRet push_identifier(const char* ident);
      bool unescape(const char* lexeme);
      void process_punctuator(JSToken tok = PUNCTUATOR);
+    void skip_punctuator();
      void process_closing_brace();
      JSRet process_subst_open();
  
@@ -209,11 +214,24 @@ private:
      FuncType func_call_type();
      FuncType detect_func_type();
      void check_function_nesting(FuncType);
+    void check_mixed_encoding(uint32_t);
      void set_block_param(bool);
      bool block_param();
      void set_do_loop(bool);
      bool do_loop();
  
+    void set_encoding(uint32_t f)
+    { scope_cur().encoding |= f; }
+
+    uint32_t encoding()
+    { return scope_cur().encoding; }
+
+    void set_char_code_str(bool f)
+    { scope_cur().char_code_str = f; }
+
+    bool char_code_str()
+    { return scope_cur().char_code_str; }
+
      static JSProgramScopeType m2p(ScopeMetaType);
      static const char* m2str(ScopeMetaType);
      static bool is_operator(JSToken);
@@ -269,6 +287,12 @@ private:
      JSRet general_literal();
      JSRet general_identifier();
      void general_unicode();
+    void escaped_unicode();
+    void escaped_code_point();
+    void escaped_url_sequence();
+    void dec_code_point();
+    void hex_code_point();
+    void char_code_no_match();
  
      static const char* p_scope_codes[];
  
@@ -283,6 +307,7 @@ private:
      bool prefix_increment = false;
      bool dealias_stored = false;
      bool unescape_nest_seen = false;
+    bool mixed_encoding_seen = false;
  
      uint8_t max_template_nesting;
      std::stack<uint16_t, std::vector<uint16_t>> brace_depth;
@@ -346,9 +371,9 @@ private:
      const uint32_t max_bracket_depth;
      std::stack<Scope> scope_stack;
  
-#ifdef CATCH_TEST_BUILD
+#if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST)
      friend JSTokenizerTester;
-#endif // CATCH_TEST_BUILD
+#endif // CATCH_TEST_BUILD || BENCHMARK_TEST
  };
  
  #endif // JS_TOKENIZER_H
diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l

index b7f93df12f9ebb2a07cf2f30e0c60c85f3ce4c2b..074dd45e98f35dd7657b41575020c61393f1a4eb 100644 (file)
--- a/src/utils/js_tokenizer.l
+++ b/src/utils/js_tokenizer.l
@@ -82,6 +82,20 @@
  
  constexpr bool JSTokenizer::insert_semicolon[ASI_GROUP_MAX][ASI_GROUP_MAX];
  
+// encoding flags
+
+enum EncodingType
+{
+    IS_HEX          = 1 << 0,   // hex code unit: 0xXXXX
+    IS_DEC          = 1 << 1,   // dec code unit: XXXX
+    IS_XBACKSLASH   = 1 << 2,   // \xXX
+    IS_UBACKSLASH_1 = 1 << 3,   // \uXX
+    IS_UBACKSLASH_2 = 1 << 4,   // \uXXXX
+    IS_UPERCENT     = 1 << 5,   // %uXXXX
+    IS_PERCENT      = 1 << 6,   // %XX
+    IS_UCODEPOINT   = 1 << 7    // \u{0xXXXX}
+};
+
  %}
  
  /* The following grammar was created based on ECMAScript specification */
@@ -848,9 +862,19 @@ UNICODE_CONNECTOR_PUNCTUATION    {CONNECTOR_PUNCT_RNG_1}|{CONNECTOR_PUNCT_RNG_2}
  UNICODE_ZWNJ    \xE2\x80\x8C
  UNICODE_ZWJ     \xE2\x80\x8D
  
-/* Unicode escape sequence */
-/* according to https://ecma-international.org/ecma-262/5.1/#sec-7.8.4 (escape sequence) */
+/* according to https://262.ecma-international.org/12.0/#prod-UnicodeEscapeSequence */
  UNICODE_ESCAPE_SEQUENCE    \\u[0-9a-fA-F]{4}
+ESCAPED_CODE_POINT         \\u\{[0-9a-fA-F]+\}
+
+/* according to https://262.ecma-international.org/12.0/#prod-HexEscapeSequence */
+HEX_ESCAPE_SEQUENCE        \\x[0-9a-fA-F]{2}
+
+/* according to https://tc39.es/ecma262/multipage/additional-ecmascript-features-for-web-browsers.html#sec-unescape-string */
+BYTE_ESCAPE_SEQUENCE       \\u[0-9a-fA-F]{2}
+PERCENT_ESCAPE_SEQUENCE    %u[0-9a-fA-F]{4}
+
+/* according to https://datatracker.ietf.org/doc/html/rfc3986#section-2.1 */
+URL_ESCAPE_SEQUENCE        %[0-9a-fA-F]{2}
  
  /* whitespaces */
  /* according to https://ecma-international.org/ecma-262/5.1/#sec-7.2 */
@@ -947,7 +971,7 @@ PUNCTUATOR_ARROW               "=>"
  
  /* identifiers */
  /* according to https://ecma-international.org/ecma-262/5.1/#sec-7.6 */
-IDENTIFIER_START    [_$]|({UNICODE_LETTER})|{UNICODE_ESCAPE_SEQUENCE}
+IDENTIFIER_START    [_$]|({UNICODE_LETTER})|{UNICODE_ESCAPE_SEQUENCE}|{ESCAPED_CODE_POINT}
  IDENTIFIER_PART     (({IDENTIFIER_START})|({UNICODE_COMBINING_MARK})|({UNICODE_DIGIT})|({UNICODE_CONNECTOR_PUNCTUATION})|{UNICODE_ZWNJ}|{UNICODE_ZWJ})*
  IDENTIFIER          ({IDENTIFIER_START}{IDENTIFIER_PART})*
  
@@ -957,19 +981,20 @@ LITERAL_NULL                  null
  LITERAL_THIS                  this
  LITERAL_BOOLEAN               true|false
  LITERAL_DECIMAL               [.]?[0-9]+[\.]?[0-9]*[eE]?[0-9]*
+LITERAL_INTEGER               [0-9]*
  LITERAL_HEX_INTEGER           0x[0-9a-fA-F]*|0X[0-9a-fA-F]*
  LITERAL_DQ_STRING_START       \"
  LITERAL_DQ_STRING_END         \"
  LITERAL_DQ_STRING_SKIP        \\\"
-LITERAL_DQ_STRING_TEXT        [^\"\\\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32}
+LITERAL_DQ_STRING_TEXT        [^\"\\%\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32}
  LITERAL_SQ_STRING_START       \'
  LITERAL_SQ_STRING_END         \'
  LITERAL_SQ_STRING_SKIP        \\\'
-LITERAL_SQ_STRING_TEXT        [^\'\\\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32}
+LITERAL_SQ_STRING_TEXT        [^\'\\%\xA\xD\{0x10}(\xE2\x80\xA8)(\xE2\x80\xA9)("<"+(?i:script))("<"+(?i:\/script>))]{1,32}
  LITERAL_TEMPLATE_START        \`
  LITERAL_TEMPLATE_END          \`
  LITERAL_TEMPLATE_SUBST_START  \$\{
-LITERAL_TEMPLATE_OTHER        [^\\\`(\$\{)("<"+(?i:\/script>))]{1,32}
+LITERAL_TEMPLATE_OTHER        [^\\%\`(\$\{)("<"+(?i:\/script>))]{1,32}
  LITERAL_REGEX_START           \/[^*\/]
  LITERAL_REGEX_END             \/[gimsuy]*
  LITERAL_REGEX_SKIP            \\\/
@@ -1014,63 +1039,112 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
  /* in a regular expression */
  %x regex
  
+/* in a single-quoted string within unescape function */
+%x unesc_sqstr
+
+/* in a double-quoted string within unescape function */
+%x unesc_dqstr
+
+/* in a template literal within unescape function */
+%x unesc_tmpll
+
+/* to process code units within char code unescape function */
+%x char_code
+%x char_code_lcomm
+%x char_code_bcomm
+
  %%
  
-{WHITESPACES}                       { }
-{CHAR_ESCAPE_SEQUENCES}             { }
-{LINE_TERMINATORS}                  { BEGIN(regst); newline_found = true; }
+<INITIAL,divop,regst,char_code>{WHITESPACES}              { /* skip */ }
+<INITIAL,divop,regst,char_code>{CHAR_ESCAPE_SEQUENCES}    { /* skip */ }
+
+{LINE_TERMINATORS}                    { BEGIN(regst); newline_found = true; }
+<char_code>{LINE_TERMINATORS}         { newline_found = true; }
  
-<INITIAL,regex,dqstr,regst,sqstr,divop>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) }
+<INITIAL,regex,dqstr,regst,sqstr,divop,char_code>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) }
  {HTML_TAG_SCRIPT_CLOSE}             { EXEC(html_closing_script_tag()) }
  
-       {HTML_COMMENT_OPEN}          { BEGIN(lcomm); }
-       {LINE_COMMENT_START}         { BEGIN(lcomm); }
-<lcomm>{LINE_COMMENT_END1}          { BEGIN(regst); newline_found = true; }
-<lcomm>{LINE_COMMENT_END2}          { BEGIN(regst); newline_found = true; }
-<lcomm>{LINE_COMMENT_END3}          { BEGIN(regst); RETURN(OPENING_TAG) }
-<lcomm>{LINE_COMMENT_END4}          { BEGIN(regst); RETURN(CLOSING_TAG) }
-<lcomm>{LINE_COMMENT_SKIP}          { }
-<lcomm><<EOF>>                      { RETURN(SCRIPT_CONTINUE) }
-
-       {BLOCK_COMMENT_START}        { BEGIN(bcomm); }
-<bcomm>{BLOCK_COMMENT_END1}         { BEGIN(regst); }
-<bcomm>{BLOCK_COMMENT_END2}         { BEGIN(regst); RETURN(OPENING_TAG) }
-<bcomm>{BLOCK_COMMENT_END3}         { BEGIN(regst); RETURN(CLOSING_TAG) }
-<bcomm>{BLOCK_COMMENT_LINE1}        |
-<bcomm>{BLOCK_COMMENT_LINE2}        { newline_found = true;}
-<bcomm>{BLOCK_COMMENT_SKIP}         { }
-<bcomm><<EOF>>                      { RETURN(SCRIPT_CONTINUE) }
-
-       {LITERAL_DQ_STRING_START}    { EXEC(literal_dq_string_start()) }
-<dqstr>{LITERAL_DQ_STRING_END}      { dealias_append(); ECHO; BEGIN(divop); }
-<dqstr>{HTML_TAG_SCRIPT_CLOSE}      { BEGIN(regst); RETURN(CLOSING_TAG) }
-<dqstr>\\{CR}{LF}                   { }
-<dqstr>\\{LF}                       { }
-<dqstr>\\{CR}                       { }
-<dqstr>{LINE_TERMINATORS}           { BEGIN(regst); RETURN(BAD_TOKEN) }
-<dqstr>{LITERAL_DQ_STRING_SKIP}     { dealias_append(); ECHO; }
-<dqstr>{LITERAL_DQ_STRING_TEXT}     { dealias_append(); ECHO; }
-<dqstr><<EOF>>                      { RETURN(SCRIPT_CONTINUE) }
-
-       {LITERAL_SQ_STRING_START}    { EXEC(literal_sq_string_start()) }
-<sqstr>{LITERAL_SQ_STRING_END}      { dealias_append(); ECHO; BEGIN(divop); }
-<sqstr>{HTML_TAG_SCRIPT_CLOSE}      { BEGIN(regst); RETURN(CLOSING_TAG) }
-<sqstr>\\{CR}{LF}                   { }
-<sqstr>\\{LF}                       { }
-<sqstr>\\{CR}                       { }
-<sqstr>{LINE_TERMINATORS}           { BEGIN(regst); RETURN(BAD_TOKEN) }
-<sqstr>{LITERAL_SQ_STRING_SKIP}     { dealias_append(); ECHO; }
-<sqstr>{LITERAL_SQ_STRING_TEXT}     { dealias_append(); ECHO; }
-<sqstr><<EOF>>                      { RETURN(SCRIPT_CONTINUE) }
-
-       {LITERAL_TEMPLATE_START}                  { EXEC(literal_template_start()) }
-<tmpll>(\\\\)*{LITERAL_TEMPLATE_END}             { dealias_append(); ECHO; BEGIN(divop); }
-<tmpll>(\\\\)*{LITERAL_TEMPLATE_SUBST_START}     { EXEC(process_subst_open()) dealias_reset(); }
-<tmpll>{HTML_TAG_SCRIPT_CLOSE}                   { BEGIN(regst); RETURN(CLOSING_TAG) }
-<tmpll>(\\\\)*\\{LITERAL_TEMPLATE_SUBST_START}   | /* escaped template substitution */
-<tmpll>(\\\\)*\\{LITERAL_TEMPLATE_END}           | /* escaped backtick */
-<tmpll>{LITERAL_TEMPLATE_OTHER}                  { dealias_append(); ECHO; }
-<tmpll><<EOF>>                                   { RETURN(SCRIPT_CONTINUE) }
+    {HTML_COMMENT_OPEN}                       { BEGIN(lcomm); }
+    {LINE_COMMENT_START}                      { BEGIN(lcomm); }
+<char_code>{HTML_COMMENT_OPEN}                { BEGIN(char_code_lcomm); }
+<char_code>{LINE_COMMENT_START}               { BEGIN(char_code_lcomm); }
+<lcomm>{LINE_COMMENT_END1}                    { BEGIN(regst); newline_found = true; }
+<lcomm>{LINE_COMMENT_END2}                    { BEGIN(regst); newline_found = true; }
+<char_code_lcomm>{LINE_COMMENT_END1}          { BEGIN(char_code); newline_found = true; }
+<char_code_lcomm>{LINE_COMMENT_END2}          { BEGIN(char_code); newline_found = true; }
+<lcomm,char_code_lcomm>{LINE_COMMENT_END3}    { BEGIN(regst); RETURN(OPENING_TAG) }
+<lcomm,char_code_lcomm>{LINE_COMMENT_END4}    { BEGIN(regst); RETURN(CLOSING_TAG) }
+<lcomm,char_code_lcomm>{LINE_COMMENT_SKIP}    { /* skip */ }
+<lcomm,char_code_lcomm><<EOF>>                { RETURN(SCRIPT_CONTINUE) }
+
+    {BLOCK_COMMENT_START}                       { BEGIN(bcomm); }
+<char_code>{BLOCK_COMMENT_START}                { BEGIN(char_code_bcomm); }
+<bcomm>{BLOCK_COMMENT_END1}                     { BEGIN(regst); }
+<char_code_bcomm>{BLOCK_COMMENT_END1}           { BEGIN(char_code); }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_END2}     { BEGIN(regst); RETURN(OPENING_TAG) }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_END3}     { BEGIN(regst); RETURN(CLOSING_TAG) }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_LINE1}    |
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_LINE2}    { newline_found = true; }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_SKIP}     { /* skip */ }
+<bcomm,char_code_bcomm><<EOF>>                  { RETURN(SCRIPT_CONTINUE) }
+
+    {LITERAL_DQ_STRING_START}                 { EXEC(literal_dq_string_start()) }
+<dqstr,unesc_dqstr>{LITERAL_DQ_STRING_END}    { dealias_append(); ECHO; BEGIN(divop); }
+<dqstr,unesc_dqstr>{HTML_TAG_SCRIPT_CLOSE}    { BEGIN(regst); RETURN(CLOSING_TAG) }
+<dqstr,unesc_dqstr>\\{CR}{LF}                 { /* skip */ }
+<dqstr,unesc_dqstr>\\{LF}                     { /* skip */ }
+<dqstr,unesc_dqstr>\\{CR}                     { /* skip */ }
+<dqstr,unesc_dqstr>{LINE_TERMINATORS}         { BEGIN(regst); RETURN(BAD_TOKEN) }
+<dqstr,unesc_dqstr>{LITERAL_DQ_STRING_SKIP}   { dealias_append(); ECHO; }
+<dqstr,unesc_dqstr>{LITERAL_DQ_STRING_TEXT}   { dealias_append(); ECHO; }
+<dqstr,unesc_dqstr><<EOF>>                    { RETURN(SCRIPT_CONTINUE) }
+<dqstr>{UNICODE_ESCAPE_SEQUENCE}              |
+<dqstr>{HEX_ESCAPE_SEQUENCE}                  { escaped_unicode(); }
+<dqstr>{ESCAPED_CODE_POINT}                   { escaped_code_point(); }
+<unesc_dqstr>{UNICODE_ESCAPE_SEQUENCE}        { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); }
+<unesc_dqstr>{HEX_ESCAPE_SEQUENCE}            { set_encoding(IS_XBACKSLASH); escaped_unicode(); }
+<unesc_dqstr>{ESCAPED_CODE_POINT}             { set_encoding(IS_UCODEPOINT); escaped_code_point(); }
+<unesc_dqstr>{BYTE_ESCAPE_SEQUENCE}           { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); }
+<unesc_dqstr>{PERCENT_ESCAPE_SEQUENCE}        { set_encoding(IS_UPERCENT); escaped_unicode(); }
+<unesc_dqstr>{URL_ESCAPE_SEQUENCE}            { set_encoding(IS_PERCENT); escaped_url_sequence(); }
+
+    {LITERAL_SQ_STRING_START}                 { EXEC(literal_sq_string_start()) }
+<sqstr,unesc_sqstr>{LITERAL_SQ_STRING_END}    { dealias_append(); ECHO; BEGIN(divop); }
+<sqstr,unesc_sqstr>{HTML_TAG_SCRIPT_CLOSE}    { BEGIN(regst); RETURN(CLOSING_TAG) }
+<sqstr,unesc_sqstr>\\{CR}{LF}                 { /* skip */ }
+<sqstr,unesc_sqstr>\\{LF}                     { /* skip */ }
+<sqstr,unesc_sqstr>\\{CR}                     { /* skip */ }
+<sqstr,unesc_sqstr>{LINE_TERMINATORS}         { BEGIN(regst); RETURN(BAD_TOKEN) }
+<sqstr,unesc_sqstr>{LITERAL_SQ_STRING_SKIP}   { dealias_append(); ECHO; }
+<sqstr,unesc_sqstr>{LITERAL_SQ_STRING_TEXT}   { dealias_append(); ECHO; }
+<sqstr,unesc_sqstr><<EOF>>                    { RETURN(SCRIPT_CONTINUE) }
+<sqstr>{UNICODE_ESCAPE_SEQUENCE}              |
+<sqstr>{HEX_ESCAPE_SEQUENCE}                  { escaped_unicode(); }
+<sqstr>{ESCAPED_CODE_POINT}                   { escaped_code_point(); }
+<unesc_sqstr>{UNICODE_ESCAPE_SEQUENCE}        { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); }
+<unesc_sqstr>{HEX_ESCAPE_SEQUENCE}            { set_encoding(IS_XBACKSLASH); escaped_unicode(); }
+<unesc_sqstr>{ESCAPED_CODE_POINT}             { set_encoding(IS_UCODEPOINT); escaped_code_point(); }
+<unesc_sqstr>{BYTE_ESCAPE_SEQUENCE}           { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); }
+<unesc_sqstr>{PERCENT_ESCAPE_SEQUENCE}        { set_encoding(IS_UPERCENT); escaped_unicode(); }
+<unesc_sqstr>{URL_ESCAPE_SEQUENCE}            { set_encoding(IS_PERCENT); escaped_url_sequence(); }
+
+    {LITERAL_TEMPLATE_START}                                 { EXEC(literal_template_start()) }
+<tmpll,unesc_tmpll>(\\\\)*{LITERAL_TEMPLATE_END}             { dealias_append(); ECHO; BEGIN(divop); }
+<tmpll,unesc_tmpll>(\\\\)*{LITERAL_TEMPLATE_SUBST_START}     { EXEC(process_subst_open()) dealias_reset(); }
+<tmpll,unesc_tmpll>{HTML_TAG_SCRIPT_CLOSE}                   { BEGIN(regst); RETURN(CLOSING_TAG) }
+<tmpll,unesc_tmpll>(\\\\)*\\{LITERAL_TEMPLATE_SUBST_START}   | /* escaped template substitution */
+<tmpll,unesc_tmpll>(\\\\)*\\{LITERAL_TEMPLATE_END}           | /* escaped backtick */
+<tmpll,unesc_tmpll>{LITERAL_TEMPLATE_OTHER}                  { dealias_append(); ECHO; }
+<tmpll,unesc_tmpll><<EOF>>                                   { RETURN(SCRIPT_CONTINUE) }
+<tmpll>{UNICODE_ESCAPE_SEQUENCE}                             |
+<tmpll>{HEX_ESCAPE_SEQUENCE}                                 { escaped_unicode(); }
+<tmpll>{ESCAPED_CODE_POINT}                                  { escaped_code_point(); }
+<unesc_tmpll>{UNICODE_ESCAPE_SEQUENCE}                       { set_encoding(IS_UBACKSLASH_2); escaped_unicode(); }
+<unesc_tmpll>{HEX_ESCAPE_SEQUENCE}                           { set_encoding(IS_XBACKSLASH); escaped_unicode(); }
+<unesc_tmpll>{ESCAPED_CODE_POINT}                            { set_encoding(IS_UCODEPOINT); escaped_code_point(); }
+<unesc_tmpll>{BYTE_ESCAPE_SEQUENCE}                          { set_encoding(IS_UBACKSLASH_1); escaped_unicode(); }
+<unesc_tmpll>{PERCENT_ESCAPE_SEQUENCE}                       { set_encoding(IS_UPERCENT); escaped_unicode(); }
+<unesc_tmpll>{URL_ESCAPE_SEQUENCE}                           { set_encoding(IS_PERCENT); escaped_url_sequence(); }
  
  <regst>{LITERAL_REGEX_START}        { EXEC(literal_regex_start()) }
  <regex>{LITERAL_REGEX_END}          { ECHO; BEGIN(divop); }
@@ -1080,17 +1154,19 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
  <regex>\\{CR}                       |
  <regex>{LINE_TERMINATORS}           { BEGIN(regst); RETURN(BAD_TOKEN) }
  <regex>[^<{LF}{CR}{LS}{PS}\\\/]+    { ECHO; }
+<regex>{UNICODE_ESCAPE_SEQUENCE}    |
+<regex>{HEX_ESCAPE_SEQUENCE}        { escaped_unicode(); }
  <regex><<EOF>>                      { RETURN(SCRIPT_CONTINUE) }
  
  <divop>{DIV_OPERATOR}               |
  <divop>{DIV_ASSIGNMENT_OPERATOR}    { div_assignment_operator(); }
  
-{OPEN_BRACE}                        { EXEC(open_brace()) }
-{CLOSE_BRACE}                       { EXEC(close_brace()) }
-{OPEN_PARENTHESIS}                  { EXEC(open_parenthesis()) }
-{CLOSE_PARENTHESIS}                 { EXEC(close_parenthesis()) }
-{OPEN_BRACKET}                      { EXEC(open_bracket()) }
-{CLOSE_BRACKET}                     { EXEC(close_bracket()) }
+{OPEN_BRACE}                                          { EXEC(open_brace()) }
+{CLOSE_BRACE}                                         { EXEC(close_brace()) }
+{OPEN_PARENTHESIS}                                    { EXEC(open_parenthesis()) }
+<INITIAL,divop,regst,char_code>{CLOSE_PARENTHESIS}    { EXEC(close_parenthesis()) }
+{OPEN_BRACKET}                                        { EXEC(open_bracket()) }
+{CLOSE_BRACKET}                                       { EXEC(close_bracket()) }
  
  {PUNCTUATOR_PREFIX}                 { EXEC(punctuator_prefix()) }
  {DOT_ACCESSOR}                      { dot_accessor(); }
@@ -1101,7 +1177,9 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
  {OPERATOR_COMPLEX_ASSIGNMENT}       { operator_complex_assignment(); }
  {OPERATOR_LOGICAL}                  { operator_logical(); }
  {OPERATOR_SHIFT}                    { operator_shift(); }
+
  {PUNCTUATOR_COMMA}                  { punctuator_comma(); }
+<char_code>{PUNCTUATOR_COMMA}       { /* skip */ }
  
  {USE_STRICT_DIRECTIVE}              { EXEC(use_strict_directive()) }
  {USE_STRICT_DIRECTIVE_SC}           { EXEC(use_strict_directive_sc()) }
@@ -1130,8 +1208,13 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
  {LITERAL}                           { EXEC(general_literal()) }
  {IDENTIFIER}                        { EXEC(general_identifier()) }
  
+<char_code>{LITERAL_INTEGER}       { set_encoding(IS_DEC); dec_code_point(); }
+<char_code>{LITERAL_HEX_INTEGER}   { set_encoding(IS_HEX); hex_code_point(); }
+
  .|{ALL_UNICODE}                     { general_unicode(); }
-<<EOF>>                             { EEOF(eval_eof()) }
+
+<char_code>.|{ALL_UNICODE}                { char_code_no_match(); }
+<INITIAL,divop,regst,char_code><<EOF>>    { EEOF(eval_eof()) }
  
  %%
  
@@ -1154,6 +1237,8 @@ static std::string unicode_to_utf8(const unsigned int code)
          res += 0x80 | ((code >> 6) & 0x3f);
          res += 0x80 | (code & 0x3f);
      }
+    else
+        res += "\uffff";
  
      return res;
  }
@@ -1167,6 +1252,7 @@ static std::string unescape_unicode(const char* lexeme)
  
      bool is_unescape = false;
      bool is_unicode = false;
+    bool is_code_point = false;
      short digits_left = 4;
      std::string unicode_str;
  
@@ -1188,6 +1274,13 @@ static std::string unescape_unicode(const char* lexeme)
              is_unescape = false;
          }
  
+        if (is_unicode and ch == '{')
+        {
+            is_unicode = false;
+            is_code_point = true;
+            continue;
+        }
+
          if (is_unicode)
          {
              unicode_str += ch;
@@ -1203,6 +1296,22 @@ static std::string unescape_unicode(const char* lexeme)
              continue;
          }
  
+        if (is_code_point)
+        {
+            if (ch == '}')
+            {
+                const unsigned int code_point = std::stoi(unicode_str, nullptr, 16);
+                res += unicode_to_utf8(code_point);
+
+                unicode_str = "";
+                is_code_point = false;
+            }
+            else
+                unicode_str += ch;
+
+            continue;
+        }
+
          res += ch;
      }
  
@@ -1471,6 +1580,12 @@ void JSTokenizer::process_punctuator(JSToken tok)
      BEGIN(regst);
  }
  
+void JSTokenizer::skip_punctuator()
+{
+    token = PUNCTUATOR;
+    BEGIN(regst);
+}
+
  void JSTokenizer::process_closing_brace()
  {
      if (!brace_depth.empty())
@@ -1782,6 +1897,10 @@ JSTokenizer::FuncType JSTokenizer::detect_func_type()
      case IDENTIFIER:
      {
          FuncType ret = FuncType::GENERAL;
+
+        if (meta_type() == ScopeMetaType::FUNCTION)
+            return ret;
+
          if (ignored_id_pos >= 0)
          {
              std::streambuf* pbuf = yyout.rdbuf();
@@ -1801,6 +1920,8 @@ JSTokenizer::FuncType JSTokenizer::detect_func_type()
                      memcmp(tail, id.identifier.data(), size) == 0)
                  {
                      ret = id.type;
+                    pbuf->pubseekoff(-size, yyout.cur, yyout.out);
+
                      break;
                  }
              }
@@ -1826,11 +1947,21 @@ void JSTokenizer::check_function_nesting(JSTokenizer::FuncType type)
      }
  }
  
+void JSTokenizer::check_mixed_encoding(uint32_t flags)
+{
+    mixed_encoding_seen = (flags != (flags & -flags));
+}
+
  bool JSTokenizer::is_unescape_nesting_seen() const
  {
      return unescape_nest_seen;
  }
  
+bool JSTokenizer::is_mixed_encoding_seen() const
+{
+    return mixed_encoding_seen;
+}
+
  void JSTokenizer::set_block_param(bool f)
  {
      scope_cur().block_param = f;
@@ -2031,6 +2162,22 @@ JSTokenizer::JSRet JSTokenizer::literal_dq_string_start()
      ECHO;
      BEGIN(dqstr);
      set_ident_norm(true);
+
+    switch (func_call_type())
+    {
+    case FuncType::UNESCAPE:
+        BEGIN(unesc_dqstr);
+        break;
+    case FuncType::NOT_FUNC:
+    case FuncType::GENERAL:
+    case FuncType::CHAR_CODE:
+        BEGIN(dqstr);
+        break;
+    default:
+        assert(false);
+        return BAD_TOKEN;
+    }
+
      return EOS;
  }
  
@@ -2040,8 +2187,23 @@ JSTokenizer::JSRet JSTokenizer::literal_sq_string_start()
      EXEC(do_semicolon_insertion(ASI_GROUP_7))
      EXEC(do_spacing(LITERAL))
      ECHO;
-    BEGIN(sqstr);
      set_ident_norm(true);
+
+    switch (func_call_type())
+    {
+    case FuncType::UNESCAPE:
+        BEGIN(unesc_sqstr);
+        break;
+    case FuncType::NOT_FUNC:
+    case FuncType::GENERAL:
+    case FuncType::CHAR_CODE:
+        BEGIN(sqstr);
+        break;
+    default:
+        assert(false);
+        return BAD_TOKEN;
+    }
+
      return EOS;
  }
  
@@ -2051,8 +2213,23 @@ JSTokenizer::JSRet JSTokenizer::literal_template_start()
      EXEC(do_semicolon_insertion(ASI_GROUP_7))
      EXEC(do_spacing(LITERAL))
      ECHO;
-    BEGIN(tmpll);
      set_ident_norm(true);
+
+    switch (func_call_type())
+    {
+    case FuncType::UNESCAPE:
+        BEGIN(unesc_tmpll);
+        break;
+    case FuncType::NOT_FUNC:
+    case FuncType::GENERAL:
+    case FuncType::CHAR_CODE:
+        BEGIN(tmpll);
+        break;
+    default:
+        assert(false);
+        return BAD_TOKEN;
+    }
+
      return EOS;
  }
  
@@ -2121,7 +2298,27 @@ JSTokenizer::JSRet JSTokenizer::open_parenthesis()
      check_function_nesting(f_call);
      EXEC(scope_push(PARENTHESES))
      set_func_call_type(f_call);
-    process_punctuator();
+
+    switch (f_call)
+    {
+    case FuncType::CHAR_CODE:
+        token = LITERAL;
+        BEGIN(char_code);
+        set_char_code_str(true);
+        yyout << '\'';
+        break;
+    case FuncType::UNESCAPE:
+        skip_punctuator();
+        break;
+    case FuncType::NOT_FUNC:
+    case FuncType::GENERAL:
+        process_punctuator();
+        break;
+    default:
+        assert(false);
+        return BAD_TOKEN;
+    }
+
      return EOS;
  }
  
@@ -2129,11 +2326,16 @@ JSTokenizer::JSRet JSTokenizer::close_parenthesis()
  {
      dealias_clear_mutated(false);
      dealias_reset();
+
      FuncType f_call = func_call_type();
+    uint32_t flags = encoding();
+    bool ch_code_str = char_code_str();
      bool id_norm = ident_norm();
+
      if (meta_type() != ScopeMetaType::NOT_SET)
          EXEC(p_scope_pop(meta_type()))
      EXEC(scope_pop(PARENTHESES))
+
      if (f_call == FuncType::NOT_FUNC)
          set_ident_norm(id_norm);
      if (block_param())
@@ -2145,7 +2347,26 @@ JSTokenizer::JSRet JSTokenizer::close_parenthesis()
      {
          EXEC(do_semicolon_insertion(ASI_GROUP_5))
      }
-    ECHO;
+
+    switch (f_call)
+    {
+    case FuncType::NOT_FUNC:
+    case FuncType::GENERAL:
+        ECHO;
+        break;
+    case FuncType::UNESCAPE:
+        check_mixed_encoding(flags);
+        break;
+    case FuncType::CHAR_CODE:
+        check_mixed_encoding(flags);
+        if (ch_code_str)
+            yyout << '\'';
+        break;
+    default:
+        assert(false);
+        return BAD_TOKEN;
+    }
+
      token = PUNCTUATOR;
      BEGIN(divop);
      return EOS;
@@ -2522,10 +2743,54 @@ void JSTokenizer::general_unicode()
      set_ident_norm(true);
  }
  
+void JSTokenizer::escaped_unicode()
+{
+    // truncate escape symbol, get hex number only
+    std::string code(YYText() + 2);
+    yyout << unicode_to_utf8(std::stoi(code, nullptr, 16));
+}
+
+void JSTokenizer::escaped_code_point()
+{
+    // truncate escape symbols, get hex number only
+    std::string code(YYText() + 3);
+    code.resize(code.size() - 1);
+    yyout << unicode_to_utf8(std::stoi(code, nullptr, 16));
+}
+
+void JSTokenizer::escaped_url_sequence()
+{
+    // truncate escape symbol, get hex number only
+    std::string code(YYText() + 1);
+    yyout << unicode_to_utf8(std::stoi(code, nullptr, 16));
+}
+
+void JSTokenizer::dec_code_point()
+{
+    std::string code(YYText());
+    yyout << unicode_to_utf8(std::stoi(code, nullptr, 10));
+}
+
+void JSTokenizer::hex_code_point()
+{
+    std::string code(YYText());
+    yyout << unicode_to_utf8(std::stoi(code, nullptr, 16));
+}
+
+void JSTokenizer::char_code_no_match()
+{
+    BEGIN(regst);
+    yyout << '\'';
+    set_char_code_str(false);
+    yyless(0);
+    memset((void*)(states + sp), 0, sizeof(states[0]));
+}
+
  JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in)
  {
      yy_flush_buffer(YY_CURRENT_BUFFER);
      unescape_nest_seen = false;
+    mixed_encoding_seen = false;
  
      auto r = yylex();
  
diff --git a/src/utils/test/CMakeLists.txt b/src/utils/test/CMakeLists.txt

index 18e4c7f19452814fcb6d65456701518b62392385..c382eadc6621f0a0eb8adcefccd73da26697f2db 100644 (file)
--- a/src/utils/test/CMakeLists.txt
+++ b/src/utils/test/CMakeLists.txt
@@ -30,6 +30,16 @@ add_catch_test( js_dealias_test
          js_test_utils.cc
  )
  
+add_catch_test( js_unescape_test
+    SOURCES
+        ${FLEX_js_tokenizer_OUTPUTS}
+        ../js_identifier_ctx.cc
+        ../js_normalizer.cc
+        ../streambuf.cc
+        ../util_cstring.cc
+        js_test_utils.cc
+)
+
  add_catch_test( js_identifier_ctx_test
      SOURCES
          ../js_identifier_ctx.cc
diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc

index f3887aaed9d7f1e9d5d4314e0d7de8017e9d0116..0c30c01e71f478ab2c66b881edacb2a90410a569 100644 (file)
--- a/src/utils/test/js_normalizer_test.cc
+++ b/src/utils/test/js_normalizer_test.cc
@@ -479,7 +479,7 @@ static const char clamav_buf12[] =
      "var x='test\\u0000test';";
  
  static const char clamav_expected12[] =
-    "var x='test\\u0000test';";
+    "var x='test\u0000test';";
  
  static const char clamav_buf13[] =
      "var x\\s12345";
@@ -491,7 +491,7 @@ static const char clamav_buf14[] =
      "document.write(unescape('test%20test";
  
  static const char clamav_expected14[] =
-    "document.write(unescape('test%20test";
+    "document.write('test test";
  
  TEST_CASE("clamav tests", "[JSNormalizer]")
  {
@@ -2314,8 +2314,8 @@ TEST_CASE("split in string literal", "[JSNormalizer]")
          const char dat1[] = "var str =\"any\\";
          const char dat2[] = "u1234tx\";";
          const char exp1[] = "var str=\"any\\";
-        const char exp2[] = "u1234tx\";";
-        const char exp[] = "var str=\"any\\u1234tx\";";
+        const char exp2[] = "\u1234tx\";";
+        const char exp[] = "var str=\"any\u1234tx\";";
  
          NORMALIZE_2(dat1, dat2, exp1, exp2);
          NORM_COMBINED_2(dat1, dat2, exp);
@@ -2325,8 +2325,8 @@ TEST_CASE("split in string literal", "[JSNormalizer]")
          const char dat1[] = "var str =\"any\\u";
          const char dat2[] = "1234tx\";";
          const char exp1[] = "var str=\"any\\u";
-        const char exp2[] = "1234tx\";";
-        const char exp[] = "var str=\"any\\u1234tx\";";
+        const char exp2[] = "\u1234tx\";";
+        const char exp[] = "var str=\"any\u1234tx\";";
  
          NORMALIZE_2(dat1, dat2, exp1, exp2);
          NORM_COMBINED_2(dat1, dat2, exp);
@@ -4245,32 +4245,33 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
          SECTION("in arguments")
          {
              tester.test_function_scopes({
-                {"unescape(", "unescape(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+                {"unescape(", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
              });
          }
          SECTION("separated identifier and call")
          {
              tester.test_function_scopes({
-                {"unescape  /*comment*/  (", "unescape(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+                {"unescape  /*comment*/  (", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
              });
          }
          SECTION("complete call")
          {
              tester.test_function_scopes({
-                {"unescape('%62%61%72')", "unescape('%62%61%72')", {FuncType::NOT_FUNC}}
+                {"unescape('%62%61%72')", "'bar'", {FuncType::NOT_FUNC}}
              });
          }
          SECTION("as named function definition")
          {
              tester.test_function_scopes({
-                {"unescape(){", "unescape(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
+                {"function unescape(){", "function unescape(){",
+                {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
              });
          }
          SECTION("after assignment substitution")
          {
              tester.test_function_scopes({
-                {"var a = unescape; a(", "var var_0000=unescape;unescape(", {FuncType::NOT_FUNC,
-                                                                             FuncType::UNESCAPE}}
+                {"var a = unescape; a(", "var var_0000=unescape;",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
              });
          }
          SECTION("literal")
@@ -4282,7 +4283,7 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
          SECTION("as a template literal substitution")
          {
              tester.test_function_scopes({
-                {"`literal ${unescape(", "`literal ${unescape(",
+                {"`literal ${unescape(", "`literal ${",
                  {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
              });
          }
@@ -4292,33 +4293,34 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
          SECTION("in arguments")
          {
              tester.test_function_scopes({
-                {"decodeURI(", "decodeURI(", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+                {"decodeURI(", "", {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
              });
          }
          SECTION("separated identifier and call")
          {
              tester.test_function_scopes({
-                {"decodeURI  /*comment*/  (", "decodeURI(", {FuncType::NOT_FUNC,
-                                                             FuncType::UNESCAPE}}
+                {"decodeURI  /*comment*/  (", "",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
              });
          }
          SECTION("complete call")
          {
              tester.test_function_scopes({
-                {"decodeURI('%62%61%72')", "decodeURI('%62%61%72')", {FuncType::NOT_FUNC}}
+                {"decodeURI('%62%61%72')", "'bar'", {FuncType::NOT_FUNC}}
              });
          }
          SECTION("as named function definition")
          {
              tester.test_function_scopes({
-                {"decodeURI(){", "decodeURI(){", {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
+                {"function decodeURI(){", "function decodeURI(){",
+                {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
              });
          }
          SECTION("after assignment substitution")
          {
              tester.test_function_scopes({
-                {"var a = decodeURI; a(", "var var_0000=decodeURI;decodeURI(", {FuncType::NOT_FUNC,
-                                                                                FuncType::UNESCAPE}}
+                {"var a = decodeURI; a(", "var var_0000=decodeURI;",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
              });
          }
          SECTION("literal")
@@ -4330,7 +4332,7 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
          SECTION("as a template literal substitution")
          {
              tester.test_function_scopes({
-                {"`literal ${decodeURI(", "`literal ${decodeURI(",
+                {"`literal ${decodeURI(", "`literal ${",
                  {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
              });
          }
@@ -4340,37 +4342,36 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
          SECTION("in arguments")
          {
              tester.test_function_scopes({
-                {"decodeURIComponent(", "decodeURIComponent(", {FuncType::NOT_FUNC,
-                                                                FuncType::UNESCAPE}}
+                {"decodeURIComponent(", "",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
              });
          }
          SECTION("separated identifier and call")
          {
              tester.test_function_scopes({
-                {"decodeURIComponent  /*comment*/  (", "decodeURIComponent(", {FuncType::NOT_FUNC,
-                                                                               FuncType::UNESCAPE}}
+                {"decodeURIComponent  /*comment*/  (", "",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
              });
          }
          SECTION("complete call")
          {
              tester.test_function_scopes({
-                {"decodeURIComponent('%62%61%72')", "decodeURIComponent('%62%61%72')",
+                {"decodeURIComponent('%62%61%72')", "'bar'",
                  {FuncType::NOT_FUNC}}
              });
          }
          SECTION("as named function definition")
          {
              tester.test_function_scopes({
-                {"decodeURIComponent(){", "decodeURIComponent(){", {FuncType::NOT_FUNC,
-                                                                    FuncType::NOT_FUNC}}
+                {"function decodeURIComponent(){", "function decodeURIComponent(){",
+                {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
              });
          }
          SECTION("after assignment substitution")
          {
              tester.test_function_scopes({
-                {"var a = decodeURIComponent; a(",
-                "var var_0000=decodeURIComponent;decodeURIComponent(", {FuncType::NOT_FUNC,
-                                                                         FuncType::UNESCAPE}}
+                {"var a = decodeURIComponent; a(", "var var_0000=decodeURIComponent;",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE}}
              });
          }
          SECTION("literal")
@@ -4382,8 +4383,8 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
          SECTION("as a template literal substitution")
          {
              tester.test_function_scopes({
-                {"`literal ${decodeURIComponent(", "`literal ${decodeURIComponent(",
-                 {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
+                {"`literal ${decodeURIComponent(", "`literal ${",
+                {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::UNESCAPE}}
              });
          }
      }
@@ -4392,35 +4393,35 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
          SECTION("in arguments")
          {
              tester.test_function_scopes({
-                {"String.fromCharCode(", "String.fromCharCode(",
+                {"String.fromCharCode(", "'",
                  {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
              });
          }
          SECTION("separated identifier and call")
          {
              tester.test_function_scopes({
-                {"String.fromCharCode  /*comment*/  (", "String.fromCharCode(",
+                {"String.fromCharCode  /*comment*/  (", "'",
                  {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
              });
          }
          SECTION("complete call")
          {
              tester.test_function_scopes({
-                {"String.fromCharCode( 65, 0x42 )", "String.fromCharCode(65,0x42)",
+                {"String.fromCharCode( 65, 0x42 )", "'AB'",
                  {FuncType::NOT_FUNC}}
              });
          }
          SECTION("as named function definition")
          {
              tester.test_function_scopes({
-                {"String.fromCharCode(){", "String.fromCharCode(){",
+                {"function String.fromCharCode(){", "function String.fromCharCode(){",
                  {FuncType::NOT_FUNC, FuncType::NOT_FUNC}}
              });
          }
          SECTION("after class name assignment substitution")
          {
              tester.test_function_scopes({
-                {"var a = String; a.fromCharCode(", "var var_0000=String;String.fromCharCode(",
+                {"var a = String; a.fromCharCode(", "var var_0000=String;'",
                  {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
              });
          }
@@ -4428,7 +4429,7 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
          {
              tester.test_function_scopes({
                  {"var a = String.fromCharCode; a(",
-                "var var_0000=String.fromCharCode;String.fromCharCode(",
+                "var var_0000=String.fromCharCode;'",
                  {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
              });
          }
@@ -4449,7 +4450,7 @@ TEST_CASE("Function call tracking - basic", "[JSNormalizer]")
          SECTION("as a template literal substitution")
          {
              tester.test_function_scopes({
-                {"`literal ${String.fromCharCode(", "`literal ${String.fromCharCode(",
+                {"`literal ${String.fromCharCode(", "`literal ${'",
                  {FuncType::NOT_FUNC, FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
              });
          }
@@ -4476,7 +4477,7 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]")
          SECTION("Multiple unescape functions")
          {
              tester.test_function_scopes({
-                {"unescape( unescape( unescape(", "unescape(unescape(unescape(",
+                {"unescape( unescape( unescape(", "",
                  {FuncType::NOT_FUNC, FuncType::UNESCAPE, FuncType::UNESCAPE, FuncType::UNESCAPE}}
              });
              CHECK(tester.is_unescape_nesting_seen());
@@ -4484,31 +4485,24 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]")
          SECTION("Multiple different unescape functions")
          {
              tester.test_function_scopes({
-                {"unescape( decodeURI( decodeURIComponent(",
-                "unescape(decodeURI(decodeURIComponent(", {FuncType::NOT_FUNC,
-                                                           FuncType::UNESCAPE,
-                                                           FuncType::UNESCAPE,
-                                                           FuncType::UNESCAPE}}
+                {"unescape( decodeURI( decodeURIComponent(", "",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE, FuncType::UNESCAPE, FuncType::UNESCAPE}}
              });
              CHECK(tester.is_unescape_nesting_seen());
          }
          SECTION("Multiple String.fromCharCode functions")
          {
              tester.test_function_scopes({
-                {"String.fromCharCode( String.fromCharCode( String.fromCharCode(",
-                "String.fromCharCode(String.fromCharCode(String.fromCharCode(",
-                {FuncType::NOT_FUNC, FuncType::CHAR_CODE, FuncType::CHAR_CODE,
-                FuncType::CHAR_CODE}}
+                {"String.fromCharCode( String.fromCharCode( String.fromCharCode(", "'' '' '",
+                {FuncType::NOT_FUNC, FuncType::CHAR_CODE, FuncType::CHAR_CODE, FuncType::CHAR_CODE}}
              });
              CHECK(!tester.is_unescape_nesting_seen());
          }
          SECTION("Mixed function calls")
          {
              tester.test_function_scopes({
-                {"general( unescape( String.fromCharCode(",
-                "var_0000(unescape(String.fromCharCode(",
-                {FuncType::NOT_FUNC, FuncType::GENERAL, FuncType::UNESCAPE,
-                FuncType::CHAR_CODE}}
+                {"general( unescape( String.fromCharCode(", "var_0000('",
+                {FuncType::NOT_FUNC, FuncType::GENERAL, FuncType::UNESCAPE, FuncType::CHAR_CODE}}
              });
              CHECK(!tester.is_unescape_nesting_seen());
          }
@@ -4525,16 +4519,14 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]")
          SECTION("Multiple unescape functions")
          {
              tester.test_function_scopes({
-                {"unescape( unescape( unescape( '%62%61%72' ) )",
-                "unescape(unescape(unescape('%62%61%72'))", {FuncType::NOT_FUNC,
-                                                             FuncType::UNESCAPE }}
+                {"unescape( unescape( unescape( '%62%61%72' ) )", "'bar'",
+                {FuncType::NOT_FUNC, FuncType::UNESCAPE }}
              });
          }
          SECTION("Multiple different unescape functions")
          {
              tester.test_function_scopes({
-                {"unescape( decodeURI( decodeURIComponent( '%62%61%72' ) )",
-                "unescape(decodeURI(decodeURIComponent('%62%61%72'))",
+                {"unescape( decodeURI( decodeURIComponent( '%62%61%72' ) )", "'bar'",
                  {FuncType::NOT_FUNC, FuncType::UNESCAPE }}
              });
          }
@@ -4542,7 +4534,7 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]")
          {
              tester.test_function_scopes({
                  {"String.fromCharCode( String.fromCharCode( String.fromCharCode( 65, 0x42 ) )",
-                "String.fromCharCode(String.fromCharCode(String.fromCharCode(65,0x42))",
+                "'' '' 'AB'",
                  {FuncType::NOT_FUNC, FuncType::CHAR_CODE}}
              });
          }
@@ -4550,8 +4542,8 @@ TEST_CASE("Function call tracking - nesting", "[JSNormalizer]")
          {
              tester.test_function_scopes({
                  {"general( unescape( String.fromCharCode( 65, 0x42 ) )",
-                "var_0000(unescape(String.fromCharCode(65,0x42))", {FuncType::NOT_FUNC,
-                                                                    FuncType::GENERAL}}
+                "var_0000('AB'",
+                {FuncType::NOT_FUNC, FuncType::GENERAL}}
              });
          }
      }
@@ -4569,18 +4561,18 @@ TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]")
          tester.test_function_scopes({
              {"un",          "var_0000",     {FuncType::NOT_FUNC}},
              {"escape",      "unescape",     {FuncType::NOT_FUNC}},
-            {"(",           "unescape(",    {FuncType::NOT_FUNC,
+            {"(",           "",             {FuncType::NOT_FUNC,
                                               FuncType::UNESCAPE}},
-            {")",           "unescape()",   {FuncType::NOT_FUNC}},
+            {")",           "",             {FuncType::NOT_FUNC}},
          });
      }
      SECTION("split between identifier and parenthesis")
      {
          tester.test_function_scopes({
              {"decodeURI",   "decodeURI",    {FuncType::NOT_FUNC}},
-            {"(",           "decodeURI(",   {FuncType::NOT_FUNC,
+            {"(",           "",             {FuncType::NOT_FUNC,
                                               FuncType::UNESCAPE}},
-            {")",           "decodeURI()",  {FuncType::NOT_FUNC}},
+            {")",           "",             {FuncType::NOT_FUNC}},
          });
      }
      SECTION("comment between identifier and parenthesis")
@@ -4588,9 +4580,9 @@ TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]")
          tester.test_function_scopes({
              {"unescape",                "unescape",     {FuncType::NOT_FUNC}},
              {"//String.fromCharCode\n", "unescape",     {FuncType::NOT_FUNC}},
-            {"(",                       "unescape(",    {FuncType::NOT_FUNC,
+            {"(",                       "",             {FuncType::NOT_FUNC,
                                                           FuncType::UNESCAPE}},
-            {")",                       "unescape()",   {FuncType::NOT_FUNC}},
+            {")",                       "",             {FuncType::NOT_FUNC}},
          });
      }
      SECTION("split in arguments")
@@ -4611,13 +4603,13 @@ TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]")
          tester.test_function_scopes({
              {"String",          "String",                               {FuncType::NOT_FUNC}},
              {".fromCharCode",   "String.fromCharCode",                  {FuncType::NOT_FUNC}},
-            {"(`",              "String.fromCharCode(`",                {FuncType::NOT_FUNC,
+            {"(`",              "'' `",                                 {FuncType::NOT_FUNC,
                                                                           FuncType::CHAR_CODE}},
-            {"un",              "String.fromCharCode(`un",              {FuncType::NOT_FUNC,
+            {"un",              "'' `un",                               {FuncType::NOT_FUNC,
                                                                           FuncType::CHAR_CODE}},
-            {"escape(",         "String.fromCharCode(`unescape(",       {FuncType::NOT_FUNC,
+            {"escape(",         "'' `unescape(",                        {FuncType::NOT_FUNC,
                                                                           FuncType::CHAR_CODE}},
-            {"`)",              "String.fromCharCode(`unescape(`)",     {FuncType::NOT_FUNC}},
+            {"`)",              "'' `unescape(`",                       {FuncType::NOT_FUNC}},
          });
      }
      SECTION("Nesting - Mixed function calls")
@@ -4626,27 +4618,26 @@ TEST_CASE("Function call tracking - over multiple PDU", "[JSNormalizer]")
              {"decode",                      "var_0000",                 {FuncType::NOT_FUNC}},
              {"URI",                         "decodeURI",                {FuncType::NOT_FUNC}},
              {"Component",                   "decodeURIComponent",       {FuncType::NOT_FUNC}},
-            {"(",                           "decodeURIComponent(",      {FuncType::NOT_FUNC,
+            {"(",                           "",                         {FuncType::NOT_FUNC,
                                                                           FuncType::UNESCAPE}},
-            {" a, ",                        "decodeURIComponent(var_0001,",
+            {" a, ",                        "var_0001,",
                                                                          {FuncType::NOT_FUNC,
                                                                           FuncType::UNESCAPE}},
-            {" String.fromCharCode( ar",
-            "decodeURIComponent(var_0001,String.fromCharCode(var_0002",
+            {" String.fromCharCode( ar",    "var_0001,'' var_0002",
                                                                          {FuncType::NOT_FUNC,
                                                                           FuncType::UNESCAPE,
                                                                           FuncType::CHAR_CODE}},
-            {"g ), b, foo",
-            "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0005",
+
+            {"g ), b, foo",                 "var_0001,'' var_0003,var_0004,var_0005",
                                                                          {FuncType::NOT_FUNC,
                                                                           FuncType::UNESCAPE}},
-            {"bar( ",
-            "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0006(",
+
+            {"bar( ",                       "var_0001,'' var_0003,var_0004,var_0006(",
                                                                          {FuncType::NOT_FUNC,
                                                                           FuncType::UNESCAPE,
                                                                           FuncType::GENERAL}},
-            {"))",
-            "decodeURIComponent(var_0001,String.fromCharCode(var_0003),var_0004,var_0006())",
+
+            {"))",                          "var_0001,'' var_0003,var_0004,var_0006()",
                                                                          {FuncType::NOT_FUNC}}
          });
      }
@@ -4879,4 +4870,31 @@ TEST_CASE("JS Normalizer, automatic semicolon", "[JSNormalizer]")
          return normalizer_wo_ident.normalize(src_wo_semicolons, src_len);
      };
  }
+
+TEST_CASE("JS Normalizer, unescape", "[JSNormalizer]")
+{
+    auto str_unescape = make_input("'", "\\u0061", "'", norm_depth);
+    auto f_unescape = make_input_repeat("unescape('')", norm_depth);
+    const char* src_str_unescape = str_unescape.c_str();
+    const char* src_f_unescape = f_unescape.c_str();
+    size_t src_len = norm_depth;
+
+    JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
+    JSNormalizer norm(ident_ctx, unlim_depth, max_template_nesting, norm_depth);
+
+    REQUIRE(norm_ret(norm, str_unescape) == JSTokenizer::SCRIPT_ENDED);
+    BENCHMARK("unescape sequence")
+    {
+        norm.rewind_output();
+        return norm.normalize(src_str_unescape, src_len);
+    };
+
+    REQUIRE(norm_ret(norm, f_unescape) == JSTokenizer::SCRIPT_ENDED);
+    BENCHMARK("unescape function tracking")
+    {
+        norm.rewind_output();
+        return norm.normalize(src_f_unescape, src_len);
+    };
+}
+
  #endif // BENCHMARK_TEST
diff --git a/src/utils/test/js_test_utils.cc b/src/utils/test/js_test_utils.cc

index 5083dbcba948a8200d5a40dee61123dc2458fd1f..cd871d43d42da8b665c1647e9453e4204f0106b6 100644 (file)
--- a/src/utils/test/js_test_utils.cc
+++ b/src/utils/test/js_test_utils.cc
@@ -67,7 +67,7 @@ bool JSTokenizerTester::is_unescape_nesting_seen() const
      return normalizer.is_unescape_nesting_seen();
  }
  
-void test_scope(const char* context, std::list<JSProgramScopeType> stack)
+void test_scope(const char* context, const std::list<JSProgramScopeType>& stack)
  {
      std::string buf(context);
      buf += "</script>";
@@ -96,6 +96,17 @@ void test_normalization_bad(const char* source, const char* expected, JSTokenize
      CHECK(result_buf == expected);
  }
  
+void test_normalization_mixed_encoding(const char* source, const char* expected)
+{
+    JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
+    JSNormalizer normalizer(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
+    auto ret = normalizer.normalize(source, strlen(source));
+    std::string result_buf(normalizer.get_script(), normalizer.script_size());
+    CHECK(ret == JSTokenizer::JSRet::SCRIPT_CONTINUE);
+    CHECK(normalizer.is_mixed_encoding_seen());
+    CHECK(result_buf == expected);
+}
+
  void test_normalization(const std::vector<PduCase>& pdus)
  {
      JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
@@ -111,7 +122,7 @@ void test_normalization(const std::vector<PduCase>& pdus)
      }
  }
  
-void test_normalization(std::list<ScopedPduCase> pdus)
+void test_normalization(const std::list<ScopedPduCase>& pdus)
  {
      JSIdentifierCtx ident_ctx(norm_depth, max_scope_depth, s_ignored_ids);
      JSNormalizer normalizer(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth);
diff --git a/src/utils/test/js_test_utils.h b/src/utils/test/js_test_utils.h

index 10f5b0a20ea56e6da2a8c20e16ac1f1f22b9ba56..269fabbb627de19abfefa809362432bed2d93d0c 100644 (file)
--- a/src/utils/test/js_test_utils.h
+++ b/src/utils/test/js_test_utils.h
@@ -83,14 +83,15 @@ private:
      snort::JSNormalizer normalizer;
  };
  
-void test_scope(const char* context, std::list<JSProgramScopeType> stack);
+void test_scope(const char* context, const std::list<JSProgramScopeType>& stack);
  void test_normalization(const char* source, const char* expected);
  void test_normalization_bad(const char* source, const char* expected, JSTokenizer::JSRet eret);
+void test_normalization_mixed_encoding(const char* source, const char* expected);
  typedef std::pair<const char*, const char*> PduCase;
  // source, expected for a single PDU
  void test_normalization(const std::vector<PduCase>& pdus);
  typedef std::tuple<const char*,const char*, std::list<JSProgramScopeType>> ScopedPduCase;
  // source, expected, and current scope type stack for a single PDU
-void test_normalization(std::list<ScopedPduCase> pdus);
+void test_normalization(const std::list<ScopedPduCase>& pdus);
  
  #endif // JS_TEST_UTILS_H
diff --git a/src/utils/test/js_unescape_test.cc b/src/utils/test/js_unescape_test.cc

new file mode 100644 (file)

index 0000000..6736935
--- /dev/null
+++ b/src/utils/test/js_unescape_test.cc
@@ -0,0 +1,1144 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_unescape_test.cc author Volodymyr Horban <vhorban@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "catch/catch.hpp"
+
+#include "utils/js_identifier_ctx.h"
+#include "utils/js_normalizer.h"
+
+#include "js_test_utils.h"
+
+#ifdef CATCH_TEST_BUILD
+
+TEST_CASE("Sequence parsing", "[JSNormalizer]")
+{
+    SECTION("\\xXX")
+    {
+        test_normalization(
+            "'\\x01'",
+            "'\u0001'"
+        );
+        test_normalization(
+            "'\\x23'",
+            "'\u0023'"
+        );
+        test_normalization(
+            "'\\x45'",
+            "'\u0045'"
+        );
+        test_normalization(
+            "'\\x67'",
+            "'\u0067'"
+        );
+        test_normalization(
+            "'\\x89'",
+            "'\u0089'"
+        );
+        test_normalization(
+            "'\\xaA'",
+            "'\u00aA'"
+        );
+        test_normalization(
+            "'\\xbB'",
+            "'\u00bB'"
+        );
+        test_normalization(
+            "'\\xcC'",
+            "'\u00cC'"
+        );
+        test_normalization(
+            "'\\xdD'",
+            "'\u00dD'"
+        );
+        test_normalization(
+            "'\\xeE'",
+            "'\u00eE'"
+        );
+        test_normalization(
+            "'\\xfF'",
+            "'\u00fF'"
+        );
+    }
+
+    SECTION("\\uXXXX")
+    {
+        test_normalization(
+            "'\\u0123'",
+            "'\u0123'"
+        );
+        test_normalization(
+            "'\\u4567'",
+            "'\u4567'"
+        );
+        test_normalization(
+            "'\\u89aA'",
+            "'\u89aA'"
+        );
+        test_normalization(
+            "'\\ubBcC'",
+            "'\ubBcC'"
+        );
+        test_normalization(
+            "'\\ueEfF'",
+            "'\ueEfF'"
+        );
+    }
+
+    SECTION("\\u{XXXX}")
+    {
+        test_normalization(
+            "'\\u{0123}'",
+            "'\u0123'"
+        );
+        test_normalization(
+            "'\\u{4567}'",
+            "'\u4567'"
+        );
+        test_normalization(
+            "'\\u{89aA}'",
+            "'\u89aA'"
+        );
+        test_normalization(
+            "'\\u{bBcC}'",
+            "'\ubBcC'"
+        );
+        test_normalization(
+            "'\\u{eEfF}'",
+            "'\ueEfF'"
+        );
+    }
+
+    SECTION("%XX")
+    {
+        test_normalization(
+            "unescape('%01')",
+            "'\u0001'"
+        );
+        test_normalization(
+            "unescape('%23')",
+            "'\u0023'"
+        );
+        test_normalization(
+            "unescape('%45')",
+            "'\u0045'"
+        );
+        test_normalization(
+            "unescape('%67')",
+            "'\u0067'"
+        );
+        test_normalization(
+            "unescape('%89')",
+            "'\u0089'"
+        );
+        test_normalization(
+            "unescape('%aA')",
+            "'\u00aA'"
+        );
+        test_normalization(
+            "unescape('%bB')",
+            "'\u00bB'"
+        );
+        test_normalization(
+            "unescape('%cC')",
+            "'\u00cC'"
+        );
+        test_normalization(
+            "unescape('%dD')",
+            "'\u00dD'"
+        );
+        test_normalization(
+            "unescape('%eE')",
+            "'\u00eE'"
+        );
+        test_normalization(
+            "unescape('%fF')",
+            "'\u00fF'"
+        );
+    }
+
+    SECTION("\\uXX")
+    {
+        test_normalization(
+            "unescape('\\u01')",
+            "'\u0001'"
+        );
+        test_normalization(
+            "unescape('%23')",
+            "'\u0023'"
+        );
+        test_normalization(
+            "unescape('\\u45')",
+            "'\u0045'"
+        );
+        test_normalization(
+            "unescape('\\u67')",
+            "'\u0067'"
+        );
+        test_normalization(
+            "unescape('\\u89')",
+            "'\u0089'"
+        );
+        test_normalization(
+            "unescape('\\uaA')",
+            "'\u00aA'"
+        );
+        test_normalization(
+            "unescape('\\ubB')",
+            "'\u00bB'"
+        );
+        test_normalization(
+            "unescape('\\ucC')",
+            "'\u00cC'"
+        );
+        test_normalization(
+            "unescape('\\udD')",
+            "'\u00dD'"
+        );
+        test_normalization(
+            "unescape('\\ueE')",
+            "'\u00eE'"
+        );
+        test_normalization(
+            "unescape('\\ufF')",
+            "'\u00fF'"
+        );
+    }
+
+    SECTION("%uXXXX")
+    {
+        test_normalization(
+            "unescape('%u0123')",
+            "'\u0123'"
+        );
+        test_normalization(
+            "unescape('%u4567')",
+            "'\u4567'"
+        );
+        test_normalization(
+            "unescape('%u89aA')",
+            "'\u89aA'"
+        );
+        test_normalization(
+            "unescape('%ubBcC')",
+            "'\ubBcC'"
+        );
+        test_normalization(
+            "unescape('%ueEfF')",
+            "'\ueEfF'"
+        );
+    }
+
+    SECTION("decimal")
+    {
+        test_normalization(
+            "String.fromCharCode(1)",
+            "'\u0001'"
+        );
+        test_normalization(
+            "String.fromCharCode(12)",
+            "'\u000c'"
+        );
+        test_normalization(
+            "String.fromCharCode(345)",
+            "'\u0159'"
+        );
+        test_normalization(
+            "String.fromCharCode(6789)",
+            "'\u1a85'"
+        );
+        test_normalization(
+            "String.fromCharCode(1000)",
+            "'\u03e8'"
+        );
+        test_normalization(
+            "String.fromCharCode(0001)",
+            "'\x01'"
+        );
+        test_normalization(
+            "String.fromCharCode(65536)",
+            "'\uffff'"
+        );
+    }
+
+    SECTION("hexadecimal")
+    {
+        test_normalization(
+            "String.fromCharCode(0x0001)",
+            "'\u0001'"
+        );
+        test_normalization(
+            "String.fromCharCode(0X0001)",
+            "'\u0001'"
+        );
+        test_normalization(
+            "String.fromCharCode(0x1234)",
+            "'\u1234'"
+        );
+        test_normalization(
+            "String.fromCharCode(0X5678)",
+            "'\u5678'"
+        );
+        test_normalization(
+            "String.fromCharCode(0x9aAb)",
+            "'\u9aAb'"
+        );
+        test_normalization(
+            "String.fromCharCode(0x9aAb)",
+            "'\u9aAb'"
+        );
+        test_normalization(
+            "String.fromCharCode(0xBcCd)",
+            "'\uBcCd'"
+        );
+        test_normalization(
+            "String.fromCharCode(0XeEfF)",
+            "'\ueEfF'"
+        );
+        test_normalization(
+            "String.fromCharCode(0x10000)",
+            "'\uffff'"
+        );
+        test_normalization(
+            "String.fromCharCode(0X10000)",
+            "'\uffff'"
+        );
+    }
+}
+
+TEST_CASE("Universal sequences", "[JSNormalizer]")
+{
+    SECTION("\\uXXXX")
+    {
+        test_normalization(
+            "\\u0065\\u0076\\u0061\\u006C () ;",
+            "eval();"
+        );
+        test_normalization(
+            "'\\u0062\\u0061\\u0072'",
+            "'bar'"
+        );
+        test_normalization(
+            "\"\\u0062\\u0061\\u0072\"",
+            "\"bar\""
+        );
+        test_normalization(
+            "`\\u0062\\u0061\\u0072`",
+            "`bar`"
+        );
+        test_normalization(
+            "/\\u0062\\u0061\\u0072/",
+            "/bar/"
+        );
+    }
+
+    SECTION("\\xXX")
+    {
+        test_normalization(
+            "'\\x62\\x61\\x72'",
+            "'bar'"
+        );
+        test_normalization(
+            "\"\\x62\\x61\\x72\"",
+            "\"bar\""
+        );
+        test_normalization(
+            "`\\x62\\x61\\x72`",
+            "`bar`"
+        );
+        test_normalization(
+            "/\\x62\\x61\\x72/",
+            "/bar/"
+        );
+    }
+
+    SECTION("\\u{XXXX}")
+    {
+        test_normalization(
+            "\\u{0065}\\u{0076}\\u{0061}\\u{006C} () ;",
+            "eval();"
+        );
+        test_normalization(
+            "'\\u{0062}\\u{0061}\\u{0072}'",
+            "'bar'"
+        );
+        test_normalization(
+            "\"\\u{0062}\\u{0061}\\u{0072}\"",
+            "\"bar\""
+        );
+        test_normalization(
+            "`\\u{0062}\\u{0061}\\u{0072}`",
+            "`bar`"
+        );
+    }
+}
+
+TEST_CASE("unescape()", "[JSNormalizer]")
+{
+    SECTION("%XX")
+    {
+        test_normalization(
+            "unescape('%62%61%72')",
+            "'bar'"
+        );
+        test_normalization(
+            "unescape(\"%62%61%72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "unescape(`%62%61%72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("%uXXXX")
+    {
+        test_normalization(
+            "unescape('%u0062%u0061%u0072')",
+            "'bar'"
+        );
+        test_normalization(
+            "unescape(\"%u0062%u0061%u0072\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "unescape(`%u0062%u0061%u0072`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\uXX")
+    {
+        test_normalization(
+            "unescape('\\u62\\u61\\u72')",
+            "'bar'"
+        );
+        test_normalization(
+            "unescape(\"\\u62\\u61\\u72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "unescape(`\\u62\\u61\\u72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\uXXXX")
+    {
+        test_normalization(
+            "unescape('\\u0062\\u0061\\u0072')",
+            "'bar'"
+        );
+        test_normalization(
+            "unescape(\"\\u0062\\u0061\\u0072\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "unescape(`\\u0062\\u0061\\u0072`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\xXX")
+    {
+        test_normalization(
+            "unescape('\\x62\\x61\\x72')",
+            "'bar'"
+        );
+        test_normalization(
+            "unescape(\"\\x62\\x61\\x72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "unescape(`\\x62\\x61\\x72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\u{XXXX}")
+    {
+        test_normalization(
+            "unescape('\\u{0062}\\u{0061}\\u{0072}')",
+            "'bar'"
+        );
+        test_normalization(
+            "unescape(\"\\u{0062}\\u{0061}\\u{0072}\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "unescape(`\\u{0062}\\u{0061}\\u{0072}`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("mixed sequence")
+    {
+        test_normalization_mixed_encoding(
+            "unescape('\\u62%61%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "unescape('\\x62%u0061%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "unescape('\\x62\\u61\\u72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "unescape('%u0062\\u0061%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "unescape('\\u0062\\x61%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "unescape('\\u0062\\u{0061}\\x72')",
+            "'bar'"
+        );
+    }
+}
+
+TEST_CASE("decodeURI()", "[JSNormalizer]")
+{
+    SECTION("%XX")
+    {
+        test_normalization(
+            "decodeURI('%62%61%72')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURI(\"%62%61%72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURI(`%62%61%72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("%uXXXX")
+    {
+        test_normalization(
+            "decodeURI('%u0062%u0061%u0072')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURI(\"%u0062%u0061%u0072\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURI(`%u0062%u0061%u0072`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\uXX")
+    {
+        test_normalization(
+            "decodeURI('\\u62\\u61\\u72')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURI(\"\\u62\\u61\\u72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURI(`\\u62\\u61\\u72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\uXXXX")
+    {
+        test_normalization(
+            "decodeURI('\\u0062\\u0061\\u0072')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURI(\"\\u0062\\u0061\\u0072\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURI(`\\u0062\\u0061\\u0072`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\xXX")
+    {
+        test_normalization(
+            "decodeURI('\\x62\\x61\\x72')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURI(\"\\x62\\x61\\x72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURI(`\\x62\\x61\\x72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\u{XXXX}")
+    {
+        test_normalization(
+            "decodeURI('\\u{0062}\\u{0061}\\u{0072}')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURI(\"\\u{0062}\\u{0061}\\u{0072}\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURI(`\\u{0062}\\u{0061}\\u{0072}`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("mixed sequence")
+    {
+        test_normalization_mixed_encoding(
+            "decodeURI('\\u62%61%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURI('\\x62%u0061%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURI('\\x62\\u61\\u72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURI('%u0062\\u0061%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURI('\\u0062\\x61%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURI('\\u0062\\u{0061}\\x72')",
+            "'bar'"
+        );
+    }
+}
+
+TEST_CASE("decodeURIComponent()", "[JSNormalizer]")
+{
+    SECTION("%XX")
+    {
+        test_normalization(
+            "decodeURIComponent('%62%61%72')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURIComponent(\"%62%61%72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURIComponent(`%62%61%72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("%uXXXX")
+    {
+        test_normalization(
+            "decodeURIComponent('%u0062%u0061%u0072')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURIComponent(\"%u0062%u0061%u0072\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURIComponent(`%u0062%u0061%u0072`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\uXX")
+    {
+        test_normalization(
+            "decodeURIComponent('\\u62\\u61\\u72')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURIComponent(\"\\u62\\u61\\u72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURIComponent(`\\u62\\u61\\u72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\uXXXX")
+    {
+        test_normalization(
+            "decodeURIComponent('\\u0062\\u0061\\u0072')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURIComponent(\"\\u0062\\u0061\\u0072\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURIComponent(`\\u0062\\u0061\\u0072`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\xXX")
+    {
+        test_normalization(
+            "decodeURIComponent('\\x62\\x61\\x72')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURIComponent(\"\\x62\\x61\\x72\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURIComponent(`\\x62\\x61\\x72`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("\\u{XXXX}")
+    {
+        test_normalization(
+            "decodeURIComponent('\\u{0062}\\u{0061}\\u{0072}')",
+            "'bar'"
+        );
+        test_normalization(
+            "decodeURIComponent(\"\\u{0062}\\u{0061}\\u{0072}\")",
+            "\"bar\""
+        );
+        test_normalization(
+            "decodeURIComponent(`\\u{0062}\\u{0061}\\u{0072}`)",
+            "`bar`"
+        );
+    }
+
+    SECTION("mixed sequence")
+    {
+        test_normalization_mixed_encoding(
+            "decodeURIComponent('\\u62%61%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURIComponent('\\x62%u0061%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURIComponent('\\x62\\u61\\u72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURIComponent('%u0062\\u0061%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURIComponent('\\u0062\\x61%72')",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "decodeURIComponent('\\u0062\\u{0061}\\x72')",
+            "'bar'"
+        );
+    }
+}
+
+TEST_CASE("String.fromCharCode()", "[JSNormalizer]")
+{
+    SECTION("decimal")
+    {
+        test_normalization(
+            "String.fromCharCode(98, 97, 114)",
+            "'bar'"
+        );
+    }
+
+    SECTION("hexadecimal")
+    {
+        test_normalization(
+            "String.fromCharCode(0x62, 0x61, 0x72)",
+            "'bar'"
+        );
+
+        test_normalization(
+            "String.fromCharCode(0x0062, 0x0061, 0x0072)",
+            "'bar'"
+        );
+    }
+
+    SECTION("mixed sequence")
+    {
+        test_normalization_mixed_encoding(
+            "String.fromCharCode(98, 97, 0x72)",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "String.fromCharCode(0x62, 97, 114)",
+            "'bar'"
+        );
+    }
+}
+
+TEST_CASE("Split", "[JSNormalizer]")
+{
+    SECTION("unescape()")
+    {
+        test_normalization({
+            { "unescape(", "" },
+            { ")", "" }
+        });
+
+        test_normalization({
+            { "unescape('%62", "'b" },
+            { "%61%72')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%62%61", "'ba" },
+            { "%72')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%62%61%72", "'bar" },
+            { "')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u", "'%u" },
+            { "0062%u0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u00", "'%u00" },
+            { "62%u0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062", "'b" },
+            { "%u0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062%u", "'b%u" },
+            { "0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062%u00", "'b%u00" },
+            { "61%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062%u0061", "'ba" },
+            { "%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062%u0061%u", "'ba%u" },
+            { "0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062%u0061%u00", "'ba%u00" },
+            { "72')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062%u0061%u0072", "'bar" },
+            { "')", "'bar'" }
+        });
+
+        test_normalization({
+            { "unescape('%u0062", "'b" },
+            { "%u0061", "'ba" },
+            { "%u0072')", "'bar'" }
+        });
+    }
+
+    SECTION("decodeURI()")
+    {
+        test_normalization({
+            { "decodeURI(", "" },
+            { ")", "" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u", "'%u" },
+            { "0062%u0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u00", "'%u00" },
+            { "62%u0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u0062%u", "'b%u" },
+            { "0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u0062%u00", "'b%u00" },
+            { "61%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u0062%u0061", "'ba" },
+            { "%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u0062%u0061%u", "'ba%u" },
+            { "0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u0062%u0061%u00", "'ba%u00" },
+            { "72')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u0062%u0061%u0072", "'bar" },
+            { "')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURI('%u0062", "'b" },
+            { "%u0061", "'ba" },
+            { "%u0072')", "'bar'" }
+        });
+    }
+
+    SECTION("decodeURIComponent()")
+    {
+        test_normalization({
+            { "decodeURIComponent(", "" },
+            { ")", "" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u", "'%u" },
+            { "0062%u0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u00", "'%u00" },
+            { "62%u0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u0062%u", "'b%u" },
+            { "0061%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u0062%u00", "'b%u00" },
+            { "61%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u0062%u0061", "'ba" },
+            { "%u0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u0062%u0061%u", "'ba%u" },
+            { "0072')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u0062%u0061%u00", "'ba%u00" },
+            { "72')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u0062%u0061%u0072", "'bar" },
+            { "')", "'bar'" }
+        });
+
+        test_normalization({
+            { "decodeURIComponent('%u0062", "'b" },
+            { "%u0061", "'ba" },
+            { "%u0072')", "'bar'" }
+        });
+    }
+
+    SECTION("String.fromCharCode()")
+    {
+        test_normalization({
+            { "String.fromCharCode(", "'" },
+            { ")", "''" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(9", "'\u0009" },
+            { "8, 97, 114)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(98,", "'b" },
+            { "97, 114)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(98, 97", "'ba" },
+            { ",114)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(98, 97, 114", "'bar" },
+            { ")", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(0x0062", "'b" },
+            { ",0x0061, 0x0072)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(0x0062, 0x0061", "'ba" },
+            { ", 0x0072)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(0x0062, 0x0061, 0x0072", "'bar" },
+            { ")", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(0x0062,", "'b" },
+            { "0x0061,", "'ba" },
+            { "0x72)",   "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCharCode(98,", "'b" },
+            { "97,", "'ba" },
+            { "114)", "'bar'" }
+        });
+    }
+}
+
+TEST_CASE("Mixed input", "[JSNormalizer]")
+{
+    SECTION("string")
+    {
+        test_normalization(
+            "unescape ( ' A   \\x62   B   \\x61   C   \\x72 ' ) ;",
+            "' A   b   B   a   C   r ';"
+        );
+        test_normalization(
+            "unescape ( ' \\x62ar b\\x61r ba\\x72 ' ) ;",
+            "' bar bar bar ';"
+        );
+        test_normalization(
+            "unescape ( '\\x62\\x61\\x72', '\\x62\\x61\\x72' ) ;",
+            "'bar','bar';"
+        );
+        test_normalization(
+            "unescape ( '\\x62\\x61\\x72' + '\\x62\\x61\\x72' ) ;",
+            "'bar'+'bar';"
+        );
+        test_normalization_mixed_encoding(
+            "unescape ( '\\x62\\x61\\x72' + '\\u62\\u61\\u72' ) ;",
+            "'bar'+'bar';"
+        );
+    }
+
+    SECTION("literal")
+    {
+        test_normalization(
+            "unescape ( 2,  '\\x62\\x61\\x72', 2 ) ;",
+            "2,'bar',2;"
+        );
+    }
+
+    SECTION("identifier")
+    {
+        test_normalization(
+            "unescape ( f(\"A\\u20B\\u20C\"), eval(\"\\u66\\u6f\\u6f\"), \"\\u66\\u6f\\u6f\" ) ;",
+            "var_0000(\"A\\u20B\\u20C\"),eval(\"\\u66\\u6f\\u6f\"),\"foo\";"
+        );
+        test_normalization_mixed_encoding(
+            "String.fromCharCode (114, 0x72, eval('123'), 114, 0x72) ;",
+            "'rr' eval('123'),114,0x72;"
+        );
+    }
+
+    SECTION("comment")
+    {
+        test_normalization(
+            "String.fromCharCode(0x62, \n 0x61, // comment \n 0x72) ;",
+            "'bar';"
+        );
+        test_normalization(
+            "String.fromCharCode(0x62, \t 0x61, /* comment */ 0x72) ;",
+            "'bar';"
+        );
+        test_normalization(
+            "String.fromCharCode(0x62, \r 0x61, <!-- HTML comment \r 0x72) ;",
+            "'bar';"
+        );
+    }
+
+    SECTION("nested")
+    {
+        test_normalization(
+            "unescape('\\x62\\x61\\x72'+unescape('\\x62\\x61\\x72')+decodeURI('\\u62\\u61\\u72')) ;",
+            "'bar'+'bar'+'bar';"
+        );
+        test_normalization(
+            "document.write(unescape('%62%61%72')) ;",
+            "document.write('bar');"
+        );
+    }
+}
+
+#endif // CATCH_TEST_BUILD
+
author	Mike Stepanek (mstepane) <mstepane@cisco.com>
	Wed, 23 Mar 2022 19:31:39 +0000 (19:31 +0000)
committer	Mike Stepanek (mstepane) <mstepane@cisco.com>
	Wed, 23 Mar 2022 19:31:39 +0000 (19:31 +0000)
doc/user/http_inspect.txt		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/dev_notes.txt		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_js_norm.cc		patch \| blob \| blame \| history
src/utils/js_identifier_ctx.cc		patch \| blob \| blame \| history
src/utils/js_normalizer.h		patch \| blob \| blame \| history
src/utils/js_tokenizer.h		patch \| blob \| blame \| history
src/utils/js_tokenizer.l		patch \| blob \| blame \| history
src/utils/test/CMakeLists.txt		patch \| blob \| blame \| history
src/utils/test/js_normalizer_test.cc		patch \| blob \| blame \| history
src/utils/test/js_test_utils.cc		patch \| blob \| blame \| history
src/utils/test/js_test_utils.h		patch \| blob \| blame \| history
src/utils/test/js_unescape_test.cc	[new file with mode: 0644]	patch \| blob