]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Pull request #3326: JSN: decode String.fromCodePoint() JavaScript function
authorMike Stepanek (mstepane) <mstepane@cisco.com>
Wed, 30 Mar 2022 16:03:01 +0000 (16:03 +0000)
committerMike Stepanek (mstepane) <mstepane@cisco.com>
Wed, 30 Mar 2022 16:03:01 +0000 (16:03 +0000)
Merge in SNORT/snort3 from ~OSERHIIE/snort3:js_from_code_point to master

Squashed commit of the following:

commit a4e3c6cad84181fb907ccafec6e4941e4611a927
Author: Oleksandr Serhiienko <oserhiie@cisco.com>
Date:   Mon Mar 28 13:34:04 2022 +0300

    http_inspect: decode String.fromCodePoint() JavaScript function

        * utils: add support for supplementary characters in JS Normalizer
        * utils: add tracking and decoding of String.fromCodePoint() JavaScript
        function in JS Normalizer
        * utils: add unit test coverage
        * http_inspect: update dev notes
        * doc: update user manual

doc/user/http_inspect.txt
src/service_inspectors/http_inspect/dev_notes.txt
src/utils/js_tokenizer.h
src/utils/js_tokenizer.l
src/utils/test/js_unescape_test.cc

index 02bd93adfb5fa328ed779562bca90a825c1671d2..cf7dcaf2eb5f3c7cbc1e1ccc55aa07ad7b0cc7bc 100755 (executable)
@@ -80,7 +80,8 @@ and identifiers normalizer. Normalizer concatenates string literals whenever
 it's possible to do. This also works with any other normalizations that result
 in string literals. All JavaScript identifier names, except those from
 the ignore list, will be substituted with unified names in the following
-format: var_0000 -> var_ffff. The Normalizer tries to expand an escaped text,
+format: var_0000 -> var_ffff. But the unescape-like function names will be removed
+from the normalized data. The Normalizer tries to expand an escaped text,
 so it will appear in a usual form in the output. Moreover, Normalizer validates
 the syntax concerning ECMA-262 Standard, including scope tracking and restrictions
 for script elements. For more information on how additionally configure
index eb69bbe30d5ad75964235cc1e2d428e5d780b119..997d4afb502cd4660733aa133de97951c8804310 100755 (executable)
@@ -258,19 +258,20 @@ For example:
     a("hello") // will be substituted to 'console.log("hello")'
 
 In addition to the scope tracking, JS Normalizer specifically tracks unicode unescape
-functions(unescape, decodeURI, decodeURIComponent, String.fromCharCode). This allows detection of
-unescape functions nested within other unescape functions, which is a potential
-indicator of a multilevel obfuscation. The definition of a function call depends on
+functions(unescape, decodeURI, decodeURIComponent, String.fromCharCode, String.fromCodePoint).
+This allows detection of unescape functions nested within other unescape functions, which is
+a potential indicator of a multilevel obfuscation. The definition of a function call depends on
 identifier substitution, so such identifiers must be included in the ignore list in
 order to use this feature. After determining the unescape sequence, it is decoded into the
-corresponding string.
+corresponding string, and the name of unescape function will not be present in the output.
 
 For example:
 
-   unescape('\u0062\u0061\u0072')        -> 'bar'
-   decodeURI('%62%61%72')                -> 'bar'
-   decodeURIComponent('\x62\x61\x72')    -> 'bar'
-   String.fromCharCode(98, 0x0061, 0x72) -> 'bar'
+   unescape('\u0062\u0061\u0072')              -> 'bar'
+   decodeURI('%62%61%72')                      -> 'bar'
+   decodeURIComponent('\x62\x61\x72')          -> 'bar'
+   String.fromCharCode(98, 0x0061, 0x72)       -> 'bar'
+   String.fromCodePoint(65600, 65601, 0x10042) -> '𐁀𐁁𐁂'
 
 Supported formats follow
 
index e7b84d7ed1c097c27d0783424a9cb4a0d0938ff1..21a1fd7258d6fdfb76505165b084c340ab5d75c7 100644 (file)
@@ -364,12 +364,13 @@ private:
         FuncType type;
     };
 
-    const std::array<FunctionIdentifier, 4> function_identifiers
+    const std::array<FunctionIdentifier, 5> function_identifiers
     {{
-        {"unescape",            FuncType::UNESCAPE  },
-        {"decodeURI",           FuncType::UNESCAPE  },
-        {"decodeURIComponent",  FuncType::UNESCAPE  },
-        {"String.fromCharCode", FuncType::CHAR_CODE }
+        {"unescape",             FuncType::UNESCAPE },
+        {"decodeURI",            FuncType::UNESCAPE },
+        {"decodeURIComponent",   FuncType::UNESCAPE },
+        {"String.fromCharCode",  FuncType::CHAR_CODE},
+        {"String.fromCodePoint", FuncType::CHAR_CODE}
     }};
 
     const uint32_t max_bracket_depth;
index da6c8bf1582c2857e6c87fea63600b80dd1f5f26..263a917860911a37d5467e4062775c2046e5e4b7 100644 (file)
@@ -1237,8 +1237,15 @@ static std::string unicode_to_utf8(const unsigned int code)
         res += 0x80 | ((code >> 6) & 0x3f);
         res += 0x80 | (code & 0x3f);
     }
+    else if (code <= 0x1fffff)
+    {
+        res += 0xf0 | (code >> 18);
+        res += 0x80 | ((code >> 12) & 0x3f);
+        res += 0x80 | ((code >> 6) & 0x3f);
+        res += 0x80 | (code & 0x3f);
+    }
     else
-        res += "\uffff";
+        res += "\xf7\xbf\xbf\xbf";    // UTF-8 sequence for hex 0x1fffff
 
     return res;
 }
index 3c8d29ccf957a681fb23d04c0192860f41e738e1..ee58b113774130c9bdc10211e3ce8e6570cdcb44 100644 (file)
@@ -276,7 +276,11 @@ TEST_CASE("Sequence parsing", "[JSNormalizer]")
         );
         test_normalization(
             "String.fromCharCode(65536)",
-            "'\uffff'"
+            "'\xf0\x90\x80\x80'"
+        );
+        test_normalization(
+            "String.fromCodePoint(2097152)",
+            "'\xf7\xbf\xbf\xbf'"
         );
     }
 
@@ -315,12 +319,20 @@ TEST_CASE("Sequence parsing", "[JSNormalizer]")
             "'\ueEfF'"
         );
         test_normalization(
-            "String.fromCharCode(0x10000)",
-            "'\uffff'"
+            "String.fromCodePoint(0x10000)",
+            "'\xf0\x90\x80\x80'"
         );
         test_normalization(
             "String.fromCharCode(0X10000)",
-            "'\uffff'"
+            "'\xf0\x90\x80\x80'"
+        );
+        test_normalization(
+            "String.fromCodePoint(0x200000)",
+            "'\xf7\xbf\xbf\xbf'"
+        );
+        test_normalization(
+            "String.fromCodePoint(0X200000)",
+            "'\xf7\xbf\xbf\xbf'"
         );
     }
 }
@@ -825,6 +837,63 @@ TEST_CASE("String.fromCharCode()", "[JSNormalizer]")
     }
 }
 
+TEST_CASE("String.fromCodePoint()", "[JSNormalizer]")
+{
+    SECTION("decimal")
+    {
+        test_normalization(
+            "String.fromCodePoint(98, 97, 114)",
+            "'bar'"
+        );
+
+        test_normalization(
+            "String.fromCodePoint(65600, 65601, 65602)",
+            "'\xf0\x90\x81\x80\xf0\x90\x81\x81\xf0\x90\x81\x82'"
+        );
+    }
+
+    SECTION("hexadecimal")
+    {
+        test_normalization(
+            "String.fromCodePoint(0x62, 0x61, 0x72)",
+            "'bar'"
+        );
+
+        test_normalization(
+            "String.fromCodePoint(0x00000062, 0x00000061, 0x00000072)",
+            "'bar'"
+        );
+
+        test_normalization(
+            "String.fromCodePoint(0x10040, 0x10041, 0x10042)",
+            "'\xf0\x90\x81\x80\xf0\x90\x81\x81\xf0\x90\x81\x82'"
+        );
+    }
+
+    SECTION("mixed sequence")
+    {
+        test_normalization_mixed_encoding(
+            "String.fromCodePoint(98, 97, 0x72)",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "String.fromCodePoint(0x00000062, 97, 114)",
+            "'bar'"
+        );
+
+        test_normalization_mixed_encoding(
+            "String.fromCodePoint(65600, 0x10041, 65602)",
+            "'\xf0\x90\x81\x80\xf0\x90\x81\x81\xf0\x90\x81\x82'"
+        );
+
+        test_normalization_mixed_encoding(
+            "String.fromCodePoint(0x10040, 65601, 0x10042)",
+            "'\xf0\x90\x81\x80\xf0\x90\x81\x81\xf0\x90\x81\x82'"
+        );
+    }
+}
+
 TEST_CASE("Split", "[JSNormalizer]")
 {
     SECTION("unescape()")
@@ -1063,6 +1132,61 @@ TEST_CASE("Split", "[JSNormalizer]")
             { "114)", "'bar'" }
         });
     }
+
+    SECTION("String.fromCodePoint()")
+    {
+        test_normalization({
+            { "String.fromCodePoint(", "'" },
+            { ")", "''" }
+        });
+
+        test_normalization({
+            { "String.fromCodePoint(9", "'\u0009" },
+            { "8, 97, 114)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCodePoint(98,", "'b" },
+            { "97, 114)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCodePoint(98, 97", "'ba" },
+            { ",114)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCodePoint(98, 97, 114", "'bar" },
+            { ")", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCodePoint(0x0062", "'b" },
+            { ",0x0061, 0x0072)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCodePoint(0x00000062, 0x00000061", "'ba" },
+            { ", 0x0072)", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCodePoint(0x00000062, 0x00000061, 0x00000072", "'bar" },
+            { ")", "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCodePoint(0x00000062,", "'b" },
+            { "0x00000061,", "'ba" },
+            { "0x72)",   "'bar'" }
+        });
+
+        test_normalization({
+            { "String.fromCodePoint(98,", "'b" },
+            { "97,", "'ba" },
+            { "114)", "'bar'" }
+        });
+    }
 }
 
 TEST_CASE("Mixed input", "[JSNormalizer]")
@@ -1109,6 +1233,10 @@ TEST_CASE("Mixed input", "[JSNormalizer]")
             "String.fromCharCode (114, 0x72, eval('123'), 114, 0x72) ;",
             "'rr' eval('123'),114,0x72;"
         );
+        test_normalization_mixed_encoding(
+            "String.fromCodePoint (114, 0x00000072, eval('123'), 114, 0x00000072) ;",
+            "'rr' eval('123'),114,0x00000072;"
+        );
     }
 
     SECTION("comment")
@@ -1125,6 +1253,18 @@ TEST_CASE("Mixed input", "[JSNormalizer]")
             "String.fromCharCode(0x62, \r 0x61, <!-- HTML comment \r 0x72) ;",
             "'bar';"
         );
+        test_normalization(
+            "String.fromCodePoint(0x00000062, \n 0x00000061, // comment \n 0x00000072) ;",
+            "'bar';"
+        );
+        test_normalization(
+            "String.fromCodePoint(0x00000062, \t 0x00000061, /* comment */ 0x00000072) ;",
+            "'bar';"
+        );
+        test_normalization(
+            "String.fromCodePoint(0x00000062, \r 0x00000061, <!-- HTML comment \r 0x00000072) ;",
+            "'bar';"
+        );
     }
 
     SECTION("nested")
@@ -1137,6 +1277,14 @@ TEST_CASE("Mixed input", "[JSNormalizer]")
             "document.write(unescape('%62%61%72')) ;",
             "document.write('bar');"
         );
+        test_normalization(
+            "String.fromCodePoint(0x0062, 0x0061, String.fromCharCode(0x0062, 0x0061, 0x0072));",
+            "'ba' 'bar';"
+        );
+        test_normalization(
+            "String.fromCharCode(0x0062, 0x0061, String.fromCodePoint(0x0062, 0x0061, 0x0072));",
+            "'ba' 'bar';"
+        );
     }
 }