]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
utils: fix JS split to reflect tokens correction and re-normalization
authorOleksandr Serhiienko <oserhiie@cisco.com>
Thu, 4 Aug 2022 09:51:17 +0000 (12:51 +0300)
committerOleksandr Serhiienko <oserhiie@cisco.com>
Tue, 9 Aug 2022 13:44:30 +0000 (16:44 +0300)
src/utils/js_tokenizer.h
src/utils/js_tokenizer.l
src/utils/test/js_normalizer_test.cc

index 3bcb33cc46f9b0b0116bbf9f128ff31c34e99e2a..697b76d4944c7188f35ce73ff8161bc48c8264e6 100644 (file)
@@ -341,7 +341,8 @@ private:
         JSToken token = UNDEFINED;          // the token before
         int orig_len = 0;                   // current token original length
         int norm_len = 0;                   // normalized length of previous tokens
-        int sc = 0;                        // current Starting Condition (0 means NOT_SET)
+        int sc = 0;                         // current Starting Condition (0 means NOT_SET)
+        int correction = 0;                 // correction length
     } states[JSTOKENIZER_MAX_STATES];
     int sp = 0;                             // points to the top of states
     int eof_sp = 0;                         // points to the last state before the EOF
index 61db2e7416072f66f342d97c7103808740e42389..2a68209689b9bb8eaf653f28b7cdac575e83722b 100644 (file)
@@ -1702,6 +1702,9 @@ void JSTokenizer::states_correct(int take_off)
 
     bytes_read -= delta;
     state.orig_len -= delta;
+    state.correction = take_off;
+
+    yyless(take_off);
 }
 
 void JSTokenizer::states_over()
@@ -1766,6 +1769,7 @@ bool JSTokenizer::states_process()
         state.orig_len = yyleng;
         state.norm_len = yyout.rdbuf()->pubseekoff(0, std::ios_base::cur, std::ios_base::out);
         state.sc = yy_start;
+        state.correction = 0;
 
         return true;
     }
@@ -1781,6 +1785,16 @@ bool JSTokenizer::states_process()
     // Update parsing state every match
     else if (bytes_skip > 0)
     {
+        // if the state was corrected, reflect this during the parsing
+        if (auto correction = states[sp].correction)
+        {
+            auto delta = yyleng - correction;
+            bytes_skip += delta;
+            bytes_read -= delta;
+
+            yyless(correction);
+        }
+
         do { ++sp; sp %= JSTOKENIZER_MAX_STATES; }
         while (states[sp].sc == 0);
 
@@ -1804,6 +1818,7 @@ bool JSTokenizer::states_process()
         state.orig_len = yyleng;
         state.norm_len = yyout.rdbuf()->pubseekoff(0, std::ios_base::cur, std::ios_base::out);
         state.sc = yy_start;
+        state.correction = 0;
 
         return true;
     }
@@ -2316,7 +2331,6 @@ JSTokenizer::JSRet JSTokenizer::literal_regex_start()
     EXEC(do_spacing(LITERAL))
     yyout << '/';
     states_correct(1);
-    yyless(1);
     BEGIN(regex);
     set_ident_norm(true);
     regex_stack = VStack<char>();
@@ -2957,7 +2971,6 @@ void JSTokenizer::explicit_otag()
 
     // discard match of the script tag and scan again without leading '<'
     states_correct(1);
-    yyless(1);
 
     // process leading '<' as a comparison operator
     operator_comparison();
index 7f6cd4218600ceca85f476046afefc2acc429e18..c32c87e2a53f98b84715835a80987af9ef03c251 100644 (file)
@@ -2457,6 +2457,39 @@ TEST_CASE("split between tokens", "[JSNormalizer]")
         const char exp2[] = "<script)";
         const char exp[] = "(a<script)";
 
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORM_COMBINED_2(dat1, dat2, exp);
+    }
+    SECTION("complete regex (1 parsing group) - identifier")
+    {
+        const char dat1[] = "/ss/,";
+        const char dat2[] = " a ;";
+        const char exp1[] = "/ss/,";
+        const char exp2[] = "a;";
+        const char exp[] = "/ss/,a;";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORM_COMBINED_2(dat1, dat2, exp);
+    }
+    SECTION("complete regex (2 parsing groups) - identifier")
+    {
+        const char dat1[] = "/\\s/,";
+        const char dat2[] = " a ;";
+        const char exp1[] = "/\\s/,";
+        const char exp2[] = "a;";
+        const char exp[] = "/\\s/,a;";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORM_COMBINED_2(dat1, dat2, exp);
+    }
+    SECTION("complete regex (not the first) - identifier")
+    {
+        const char dat1[] = ",/\\s/,";
+        const char dat2[] = " a ;";
+        const char exp1[] = ",/\\s/,";
+        const char exp2[] = "a;";
+        const char exp[] = ",/\\s/,a;";
+
         NORMALIZE_2(dat1, dat2, exp1, exp2);
         NORM_COMBINED_2(dat1, dat2, exp);
     }
@@ -2745,7 +2778,7 @@ TEST_CASE("split in closing tag", "[JSNormalizer]")
         const char dat2[] = "ipt";
         const char dat3[] = ">";
         const char exp1[] = "::::</scr";
-        const char exp2[] = "cript";
+        const char exp2[] = "script";
         const char exp3[] = "";
         const char exp[] = "::::";