]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Pull request #3537: JS Normalizer: Escaped JavaScript Identifiers
authorOleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) <oshumeik@cisco.com>
Thu, 4 Aug 2022 12:51:42 +0000 (12:51 +0000)
committerOleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) <oshumeik@cisco.com>
Thu, 4 Aug 2022 12:51:42 +0000 (12:51 +0000)
Merge in SNORT/snort3 from ~OSERHIIE/snort3:js_unescape_ident to master

Squashed commit of the following:

commit 2b192d53735b7f6b346c17581adc28c1ee395b56
Author: Oleksandr Serhiienko <oserhiie@cisco.com>
Date:   Mon Aug 1 11:16:11 2022 +0300

    utils: fix compilation warning [-Wcomma]

commit ad2285d11ea0b1408937a7688179e7d65946031f
Author: Oleksandr Serhiienko <oserhiie@cisco.com>
Date:   Mon Aug 1 11:15:00 2022 +0300

    utils: validate escaped JavaScript identifiers

src/utils/js_identifier_ctx.cc
src/utils/js_tokenizer.h
src/utils/js_tokenizer.l
src/utils/test/js_unescape_test.cc

index b172d649465df4dd815172d2d43e071346e27991..a56bb05a0a202dc121ec9abc5f90890d0f5c44cf 100644 (file)
@@ -75,7 +75,7 @@ static void init_norm_names()
     assert(sizeof(norm_names) == c - norm_names);
 }
 
-static int _init_norm_names __attribute__((unused)) = (init_norm_names(), 0);
+static int _init_norm_names __attribute__((unused)) = (static_cast<void>(init_norm_names()), 0);
 
 JSIdentifierCtx::JSIdentifierCtx(int32_t depth, uint32_t max_scope_depth,
     const std::unordered_set<std::string>& ignored_ids_list,
index 18c8ce39204329da91266d814e644a940e4f3081..3bcb33cc46f9b0b0116bbf9f128ff31c34e99e2a 100644 (file)
@@ -331,6 +331,8 @@ private:
     JSIdentifierCtxBase& ident_ctx;
     size_t bytes_read;
     size_t tmp_bytes_read;
+    uint32_t tokens_read;
+    uint32_t tmp_tokens_read;
     bool ext_script;
     VStack<char> regex_stack;
 
index ca5821a12a1fae1d355981d442614dc8d6f18665..61db2e7416072f66f342d97c7103808740e42389 100644 (file)
@@ -1371,6 +1371,7 @@ JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out,
       max_template_nesting(max_template_nesting),
       ident_ctx(mapper),
       bytes_read(0),
+      tokens_read(0),
       tmp_buf(buf),
       tmp_buf_size(buf_size),
       tmp_cap_size(cap_size),
@@ -1397,6 +1398,7 @@ void JSTokenizer::switch_to_temporal(const std::string& data)
     yy_switch_to_buffer((YY_BUFFER_STATE)tmp_buffer);
 
     tmp_bytes_read = bytes_read;
+    tmp_tokens_read = tokens_read;
 }
 
 void JSTokenizer::switch_to_initial()
@@ -1406,6 +1408,7 @@ void JSTokenizer::switch_to_initial()
     tmp_buffer = nullptr;
 
     bytes_read = tmp_bytes_read;
+    tmp_tokens_read = tokens_read - tmp_tokens_read;
 }
 
 // A return value of this method uses to terminate the scanner
@@ -1414,16 +1417,20 @@ void JSTokenizer::switch_to_initial()
 // The return value should be used to make a decision about yyterminate() call
 JSTokenizer::JSRet JSTokenizer::eval_eof()
 {
-    // If the temporal scan buffer reaches EOF, cleanup and
-    // continue with the initial one
-    if (tmp_buffer)
-    {
-        switch_to_initial();
-        return EOS;
-    }
+    if (!tmp_buffer)
+        return SCRIPT_CONTINUE;
+
+    switch_to_initial();
 
-    // Normal termination
-    return SCRIPT_CONTINUE;
+    if (tmp_tokens_read != 1 or token != IDENTIFIER)
+        return BAD_TOKEN;
+
+    // remove temporal buffer normalization state
+    memset((void*)(states + sp), 0, sizeof(states[0]));
+    --sp;
+    sp %= JSTOKENIZER_MAX_STATES;
+
+    return EOS;
 }
 
 JSTokenizer::JSRet JSTokenizer::do_spacing(JSToken cur_token)
@@ -1745,6 +1752,7 @@ void JSTokenizer::states_over()
 bool JSTokenizer::states_process()
 {
     bytes_read += yyleng;
+    ++tokens_read;
 
     // Fulfillment goes after this check only in case of split over several input scripts.
     // Otherwise, new state is pushed.
@@ -2969,6 +2977,7 @@ JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in, bool external_script)
 
     bytes_in = std::max(bytes_read, bytes_in) - bytes_in;
     bytes_read = 0;
+    tokens_read = 0;
 
     return static_cast<JSTokenizer::JSRet>(r);
 }
index 3b8cdb58a336a984c2387127ce755529b55cf26e..ded5dbec359ab05d2b648f4a919f2f45cc116a8a 100644 (file)
@@ -798,6 +798,150 @@ TEST_CASE("String.fromCodePoint()", "[JSNormalizer]")
     }
 }
 
+TEST_CASE("Identifiers", "[JSNormalizer]")
+{
+    SECTION("all patterns")
+    {
+        test_normalization(
+            "\\u0061",
+            "var_0000"
+        );
+        test_normalization_bad(
+            "\\u0020",
+            "",
+            JSTokenizer::BAD_TOKEN
+        );
+
+        test_normalization(
+            "\\u{0061}",
+            "var_0000"
+        );
+        test_normalization(
+            "\\u{061}",
+            "var_0000"
+        );
+        test_normalization(
+            "\\u{61}",
+            "var_0000"
+        );
+        test_normalization_bad(
+            "\\u{1}",
+            "\u0001",
+            JSTokenizer::BAD_TOKEN
+        );
+    }
+
+    SECTION("valid sequence")
+    {
+        test_normalization(
+            " \\u0061bc ;",
+            "var_0000;"
+        );
+        test_normalization(
+            " a\\u0062c ;",
+            "var_0000;"
+        );
+        test_normalization(
+            " ab\\u0063 ;",
+            "var_0000;"
+        );
+    }
+
+    SECTION("invalid sequence")
+    {
+        test_normalization_bad(
+            " \\u0020bc ;",
+            "var_0000",
+            JSTokenizer::BAD_TOKEN
+        );
+        test_normalization_bad(
+            " a\\u0020c ;",
+            "var_0000 var_0001",
+            JSTokenizer::BAD_TOKEN
+        );
+        test_normalization_bad(
+            " ab\\u0020 ;",
+            "var_0000",
+            JSTokenizer::BAD_TOKEN
+        );
+    }
+
+    SECTION("valid code point")
+    {
+        test_normalization(
+            " \\u{61}bc ;",
+            "var_0000;"
+        );
+        test_normalization(
+            " a\\u{62}c ;",
+            "var_0000;"
+        );
+        test_normalization(
+            " ab\\u{63} ;",
+            "var_0000;"
+        );
+    }
+
+    SECTION("invalid code point")
+    {
+        test_normalization_bad(
+            " \\u{20}bc ;",
+            "var_0000",
+            JSTokenizer::BAD_TOKEN
+        );
+        test_normalization_bad(
+            " a\\u{20}c ;",
+            "var_0000 var_0001",
+            JSTokenizer::BAD_TOKEN
+        );
+        test_normalization_bad(
+            " ab\\u{20} ;",
+            "var_0000",
+            JSTokenizer::BAD_TOKEN
+        );
+    }
+
+    SECTION("valid dot accessor")
+    {
+        test_normalization(
+            "\\u0066\\u006f\\u006f.\\u0062\\u0061\\u0072 ;",
+            "var_0000.var_0001;"
+        );
+        test_normalization(
+            "console.\\u006c\\u006f\\u0067 ;",
+            "console.log;"
+        );
+        test_normalization(
+            "\\u0066\\u006f\\u006f.\\u006a\\u006f\\u0069\\u006e ;",
+            "var_0000.join;"
+        );
+    }
+
+    SECTION("invalid dot accessor")
+    {
+        test_normalization_bad(
+            "\\u0066\\u006f\\u006f.\\u0020\\u0061\\u0072 ;",
+            "var_0000.var_0001",
+            JSTokenizer::BAD_TOKEN
+        );
+        test_normalization_bad(
+            "\\u0066\\u0020\\u006f.\\u0062\\u0061\\u0072 ;",
+            "var_0000 var_0001",
+            JSTokenizer::BAD_TOKEN
+        );
+        test_normalization_bad(
+            "console.\\u006c\\u0020\\u0067 ;",
+            "console.l var_0000",
+            JSTokenizer::BAD_TOKEN
+        );
+        test_normalization_bad(
+            "\\u0066\\u0020\\u006f.\\u006a\\u006f\\u0069\\u006e ;",
+            "var_0000 var_0001",
+            JSTokenizer::BAD_TOKEN
+        );
+    }
+}
+
 TEST_CASE("Split", "[JSNormalizer]")
 {
     SECTION("unescape()")
@@ -1091,6 +1235,35 @@ TEST_CASE("Split", "[JSNormalizer]")
             { "114)", "'bar'" }
         });
     }
+
+    SECTION("identifier")
+    {
+        test_normalization({
+            { "\\u0062", "var_0000" },
+            { "\\u0061\\u0072", "var_0001" }
+        });
+        test_normalization({
+            { "\\u{62}", "var_0000" },
+            { "\\u{61}\\u{72}", "var_0001" }
+        });
+        test_normalization({
+            { "\\u0062", "var_0000" },
+            { "\\u{61}\\u{72}", "var_0001" }
+        });
+        test_normalization({
+            { "\\u{62}", "var_0000" },
+            { "\\u0061\\u0072", "var_0001" }
+        });
+        test_normalization({
+            { "\\u{63}\\u{6f}\\u{6e}", "var_0000" },
+            { "\\u{73}\\u{6f}\\u{6c}\\u{65}", "console" }
+        });
+        test_normalization({
+            { "\\u0062", "var_0000" },
+            { "\\u0061", "var_0001" },
+            { "\\u0072", "var_0002" }
+        });
+    }
 }
 
 TEST_CASE("Mixed input", "[JSNormalizer]")
@@ -1129,6 +1302,14 @@ TEST_CASE("Mixed input", "[JSNormalizer]")
 
     SECTION("identifier")
     {
+        test_normalization(
+            "\\u0062\\u{61}\\u0072",
+            "var_0000"
+        );
+        test_normalization(
+            "\\u{62}\\u0061\\u{72}",
+            "var_0000"
+        );
         test_normalization(
             "unescape ( f(\"A\\u20B\\u20C\"), eval(\"\\u66\\u6f\\u6f\"), \"\\u66\\u6f\\u6f\" ) ;",
             "var_0000(\"A\\u20B\\u20C\"),eval(\"\\u66\\u6f\\u6f\"),\"foo\";"