]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Pull request #3527: JavaScript Normalizer: normalize JavaScript after opening tag
authorOleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) <oshumeik@cisco.com>
Mon, 25 Jul 2022 08:29:27 +0000 (08:29 +0000)
committerOleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) <oshumeik@cisco.com>
Mon, 25 Jul 2022 08:29:27 +0000 (08:29 +0000)
Merge in SNORT/snort3 from ~OSERHIIE/snort3:js_fix_otag to master

Squashed commit of the following:

commit 28534c108a56e40b76310a6076820739b82e7e4a
Author: Oleksandr Serhiienko <oserhiie@cisco.com>
Date:   Tue Jul 19 20:53:56 2022 +0300

    utils: continue JS normalization after opening tag seen

        * utils: normalize JavaScript after any opening tag seen
        * utils: re-normalize explicit opening tag by common rules
        * utils: throw opening tag built-in alert for inline scripts only
        * utils: remove opening tag return code
        * http_inspect: do not stop normalization in case of opening script tag
        * http_inspect: update trace messages

src/service_inspectors/http_inspect/http_js_norm.cc
src/utils/js_normalizer.h
src/utils/js_tokenizer.h
src/utils/js_tokenizer.l
src/utils/test/js_normalizer_test.cc

index 196220ddbddbc63c6f54b5ee4bfb9fa497bc4596..5ce25cead2805597173fce283e2f69143274b4d6 100644 (file)
@@ -39,7 +39,6 @@ static const char* jsret_codes[] =
     "end of stream",
     "script ended",
     "script continues",
-    "opening tag",
     "closing tag",
     "bad token",
     "identifier overflow",
@@ -173,11 +172,6 @@ void HttpJsNorm::do_external(const Field& input, Field& output,
             events->create_event(EVENT_JS_CLOSING_TAG);
             ssn->js_built_in_event = true;
             break;
-        case JSTokenizer::OPENING_TAG:
-            *infractions += INF_JS_OPENING_TAG;
-            events->create_event(EVENT_JS_OPENING_TAG);
-            ssn->js_built_in_event = true;
-            break;
         case JSTokenizer::BAD_TOKEN:
         case JSTokenizer::WRONG_CLOSING_SYMBOL:
         case JSTokenizer::ENDED_IN_INNER_SCOPE:
@@ -313,10 +307,6 @@ void HttpJsNorm::do_inline(const Field& input, Field& output,
             break;
         case JSTokenizer::SCRIPT_CONTINUE:
             break;
-        case JSTokenizer::OPENING_TAG:
-            *infractions += INF_JS_OPENING_TAG;
-            events->create_event(EVENT_JS_OPENING_TAG);
-            break;
         case JSTokenizer::CLOSING_TAG:
             *infractions += INF_JS_CLOSING_TAG;
             events->create_event(EVENT_JS_CLOSING_TAG);
@@ -361,6 +351,11 @@ void HttpJsNorm::do_inline(const Field& input, Field& output,
             *infractions += INF_MIXED_ENCODINGS;
             events->create_event(EVENT_MIXED_ENCODINGS);
         }
+        if (js_ctx.is_opening_tag_seen())
+        {
+            *infractions += INF_JS_OPENING_TAG;
+            events->create_event(EVENT_JS_OPENING_TAG);
+        }
 
         script_continue = ret == JSTokenizer::SCRIPT_CONTINUE;
     }
index dcfb1e94f8eadb13a8a61013e2c89b953166f54b..db1b137609c4776e70f0a5aee0cbdde5323977fe 100644 (file)
@@ -65,6 +65,9 @@ public:
     bool is_mixed_encoding_seen() const
     { return tokenizer.is_mixed_encoding_seen(); }
 
+    bool is_opening_tag_seen() const
+    { return tokenizer.is_opening_tag_seen(); }
+
 #if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST)
     const char* get_tmp_buf() const
     { return tmp_buf; }
index fd985c6c8d9b35be6a93089a13fcb0710ac40ccc..18c8ce39204329da91266d814e644a940e4f3081 100644 (file)
@@ -151,7 +151,6 @@ public:
         EOS = 0,
         SCRIPT_ENDED,
         SCRIPT_CONTINUE,
-        OPENING_TAG,
         CLOSING_TAG,
         BAD_TOKEN,
         IDENTIFIER_OVERFLOW,
@@ -176,6 +175,7 @@ public:
 
     bool is_unescape_nesting_seen() const;
     bool is_mixed_encoding_seen() const;
+    bool is_opening_tag_seen() const;
 protected:
     [[noreturn]] void LexerError(const char* msg) override
     { snort::FatalError("%s", msg); }
@@ -306,6 +306,7 @@ private:
     void escaped_url_sequence_latin_1();
     void lit_int_code_point(int base);
     void char_code_no_match();
+    void explicit_otag();
 
     static const char* p_scope_codes[];
 
@@ -321,6 +322,7 @@ private:
     bool dealias_stored = false;
     bool unescape_nest_seen = false;
     bool mixed_encoding_seen = false;
+    bool opening_tag_seen = false;
 
     uint8_t max_template_nesting;
     VStack<uint16_t> brace_depth;
index d5e06b3738c134bc09ff30c245cd2f890be24428..ca5821a12a1fae1d355981d442614dc8d6f18665 100644 (file)
@@ -1075,7 +1075,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 {LINE_TERMINATORS}                            { BEGIN(regst); newline_found = true; }
 <char_code>{LINE_TERMINATORS}                 { newline_found = true; }
 
-<INITIAL,regex,regst,divop,char_code>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) }
+<INITIAL,regst,divop,char_code>{HTML_TAG_SCRIPT_OPEN} { explicit_otag(); }
 {HTML_TAG_SCRIPT_CLOSE}                       { EXEC(html_closing_script_tag()) }
 
     {HTML_COMMENT_OPEN}                       { BEGIN(lcomm); }
@@ -1088,7 +1088,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <char_code_lcomm>{LINE_COMMENT_END1}          { BEGIN(char_code); newline_found = true; }
 <char_code_lcomm>{LINE_COMMENT_END2}          { BEGIN(char_code); newline_found = true; }
 <char_code_lcomm>{LINE_COMMENT_END4}          { if (!ext_script) { BEGIN(regst); RETURN(CLOSING_TAG) } }
-<lcomm,char_code_lcomm>{LINE_COMMENT_END3}    { if (!ext_script) { BEGIN(regst); RETURN(OPENING_TAG) } }
+<lcomm,char_code_lcomm>{LINE_COMMENT_END3}    { if (!ext_script) { opening_tag_seen = true; } }
 <lcomm,char_code_lcomm>{LINE_COMMENT_SKIP}    { /* skip */ }
 <lcomm,char_code_lcomm><<EOF>>                { RETURN(SCRIPT_CONTINUE) }
 
@@ -1096,7 +1096,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <char_code>{BLOCK_COMMENT_START}              { BEGIN(char_code_bcomm); }
 <bcomm>{BLOCK_COMMENT_END1}                   { BEGIN(regst); }
 <char_code_bcomm>{BLOCK_COMMENT_END1}         { BEGIN(char_code); }
-<bcomm,char_code_bcomm>{BLOCK_COMMENT_END2}   { if (!ext_script) { BEGIN(regst); RETURN(OPENING_TAG) } }
+<bcomm,char_code_bcomm>{BLOCK_COMMENT_END2}   { if (!ext_script) { opening_tag_seen = true; } }
 <bcomm,char_code_bcomm>{BLOCK_COMMENT_END3}   { if (!ext_script) { BEGIN(regst); RETURN(CLOSING_TAG) } }
 <bcomm,char_code_bcomm>{BLOCK_COMMENT_LINE1}  |
 <bcomm,char_code_bcomm>{BLOCK_COMMENT_LINE2}  { newline_found = true; }
@@ -1105,7 +1105,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 
     {LITERAL_DQ_STRING_START}                 { EXEC(literal_dq_string_start()) }
 <dqstr,unesc_dqstr>{LITERAL_DQ_STRING_END}    { dealias_append(); ECHO; BEGIN(divop); }
-<dqstr,unesc_dqstr>{HTML_TAG_SCRIPT_OPEN}     { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(OPENING_TAG) } }
+<dqstr,unesc_dqstr>{HTML_TAG_SCRIPT_OPEN}     { if (!ext_script) { opening_tag_seen = true; } ECHO; }
 <dqstr,unesc_dqstr>{HTML_TAG_SCRIPT_CLOSE}    { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(CLOSING_TAG) } }
 <dqstr,unesc_dqstr>\\{CR}{LF}                 { /* skip */ }
 <dqstr,unesc_dqstr>\\{LF}                     { /* skip */ }
@@ -1127,7 +1127,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 
     {LITERAL_SQ_STRING_START}                 { EXEC(literal_sq_string_start()) }
 <sqstr,unesc_sqstr>{LITERAL_SQ_STRING_END}    { dealias_append(); ECHO; BEGIN(divop); }
-<sqstr,unesc_sqstr>{HTML_TAG_SCRIPT_OPEN}     { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(OPENING_TAG) } }
+<sqstr,unesc_sqstr>{HTML_TAG_SCRIPT_OPEN}     { if (!ext_script) { opening_tag_seen = true; } ECHO; }
 <sqstr,unesc_sqstr>{HTML_TAG_SCRIPT_CLOSE}    { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(CLOSING_TAG) } }
 <sqstr,unesc_sqstr>\\{CR}{LF}                 { /* skip */ }
 <sqstr,unesc_sqstr>\\{LF}                     { /* skip */ }
@@ -1150,6 +1150,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
     {LITERAL_TEMPLATE_START}                                 { EXEC(literal_template_start()) }
 <tmpll,unesc_tmpll>(\\\\)*{LITERAL_TEMPLATE_END}             { dealias_append(); ECHO; BEGIN(divop); }
 <tmpll,unesc_tmpll>(\\\\)*{LITERAL_TEMPLATE_SUBST_START}     { EXEC(process_subst_open()) dealias_reset(); }
+<tmpll,unesc_tmpll>{HTML_TAG_SCRIPT_OPEN}                    { if (!ext_script) { opening_tag_seen = true; } ECHO; }
 <tmpll,unesc_tmpll>{HTML_TAG_SCRIPT_CLOSE}                   { BEGIN(regst); RETURN(CLOSING_TAG) }
 <tmpll,unesc_tmpll><<EOF>>                                   { RETURN(SCRIPT_CONTINUE) }
 <tmpll>{UNICODE_ESCAPE_SEQUENCE}                             |
@@ -1167,6 +1168,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 
 <regst>{LITERAL_REGEX_START}        { EXEC(literal_regex_start()) }
 <regex>{LITERAL_REGEX_END}          { EXEC(literal_regex_end()) }
+<regex>{HTML_TAG_SCRIPT_OPEN}       { if (!ext_script) { opening_tag_seen = true; } ECHO; }
 <regex>{HTML_TAG_SCRIPT_CLOSE}      { BEGIN(regst); RETURN(CLOSING_TAG) }
 <regex>\\{CR}                       { BEGIN(regst); RETURN(BAD_TOKEN) }
 <regex>\\{LF}                       { BEGIN(regst); RETURN(BAD_TOKEN) }
@@ -2006,6 +2008,11 @@ bool JSTokenizer::is_mixed_encoding_seen() const
     return mixed_encoding_seen;
 }
 
+bool JSTokenizer::is_opening_tag_seen() const
+{
+    return opening_tag_seen;
+}
+
 void JSTokenizer::set_block_param(bool f)
 {
     scope_cur().block_param = f;
@@ -2935,6 +2942,19 @@ void JSTokenizer::char_code_no_match()
     memset((void*)(states + sp), 0, sizeof(states[0]));
 }
 
+void JSTokenizer::explicit_otag()
+{
+    if (!ext_script)
+        opening_tag_seen = true;
+
+    // discard match of the script tag and scan again without leading '<'
+    states_correct(1);
+    yyless(1);
+
+    // process leading '<' as a comparison operator
+    operator_comparison();
+}
+
 JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in, bool external_script)
 {
     yy_flush_buffer(YY_CURRENT_BUFFER);
index 926f6f1b4fdfb48125e40e29053c99bcf7a2b7fb..7f6cd4218600ceca85f476046afefc2acc429e18 100644 (file)
@@ -67,6 +67,12 @@ using namespace snort;
     CHECK(!memcmp(dst, expected, act_len));                \
     delete[] dst;
 
+#define CHECK_OTAG(is_set)                 \
+    if (is_set)                            \
+        CHECK(norm.is_opening_tag_seen()); \
+    else                                   \
+        CHECK(!norm.is_opening_tag_seen());
+
 
 #define NORMALIZE_L(src, src_len, dst, dst_len, depth, ret, ptr, len) \
     {                                                                 \
@@ -1683,10 +1689,7 @@ static const char unexpected_tag_buf0[] =
     "var b = 2;\r\n";
 
 static const char unexpected_tag_expected0[] =
-    "var a=1;";
-
-static const char unexpected_tag_expected0_ext[] =
-    "var a=1;";
+    "var a=1;<script>var b=2;";
 
 static const char unexpected_tag_buf1[] =
     "var a = 1;\n"
@@ -1694,10 +1697,7 @@ static const char unexpected_tag_buf1[] =
     "var b = 2;\r\n";
 
 static const char unexpected_tag_expected1[] =
-    "var a=1;";
-
-static const char unexpected_tag_expected1_ext[] =
-    "var a=1;";
+    "var a=1;<script type=application/javascript>var b=2;";
 
 static const char unexpected_tag_buf2[] =
     "var a = 1;\n"
@@ -1705,9 +1705,6 @@ static const char unexpected_tag_buf2[] =
     "var b = 2;\r\n";
 
 static const char unexpected_tag_expected2[] =
-    "var a=1;var str='";
-
-static const char unexpected_tag_expected2_ext[] =
     "var a=1;var str='<script> something';var b=2;";
 
 static const char unexpected_tag_buf3[] =
@@ -1716,9 +1713,6 @@ static const char unexpected_tag_buf3[] =
     "var b = 2;\r\n";
 
 static const char unexpected_tag_expected3[] =
-    "var a=1;var str='something ";
-
-static const char unexpected_tag_expected3_ext[] =
     "var a=1;var str='something <script> something';var b=2;";
 
 static const char unexpected_tag_buf4[] =
@@ -1727,9 +1721,6 @@ static const char unexpected_tag_buf4[] =
     "var b = 2;\r\n";
 
 static const char unexpected_tag_expected4[] =
-    "var a=1;var str='something ";
-
-static const char unexpected_tag_expected4_ext[] =
     "var a=1;var str='something <script>';var b=2;";
 
 static const char unexpected_tag_buf5[] =
@@ -1771,9 +1762,6 @@ static const char unexpected_tag_buf8[] =
     "var b = 2;\r\n";
 
 static const char unexpected_tag_expected8[] =
-    "var a=1;var str='something \\";
-
-static const char unexpected_tag_expected8_ext[] =
     "var a=1;var str='something \\<script> something';var b=2;";
 
 static const char unexpected_tag_buf9[] =
@@ -1793,9 +1781,6 @@ static const char unexpected_tag_buf10[] =
     "var b = 2;\r\n";
 
 static const char unexpected_tag_expected10[] =
-    "var a=1;";
-
-static const char unexpected_tag_expected10_ext[] =
     "var a=1;var b=2;";
 
 static const char unexpected_tag_buf11[] =
@@ -1804,9 +1789,6 @@ static const char unexpected_tag_buf11[] =
     "var b = 2;\r\n";
 
 static const char unexpected_tag_expected11[] =
-    "var a=1;";
-
-static const char unexpected_tag_expected11_ext[] =
     "var a=1;var b=2;";
 
 static const char unexpected_tag_buf12[] =
@@ -1815,9 +1797,6 @@ static const char unexpected_tag_buf12[] =
     "var b = 2;\r\n";
 
 static const char unexpected_tag_expected12[] =
-    "var a=1;";
-
-static const char unexpected_tag_expected12_ext[] =
     "var a=1;var b=2;";
 
 static const char unexpected_tag_buf13[] =
@@ -1826,9 +1805,6 @@ static const char unexpected_tag_buf13[] =
     "var b = 2;\r\n";
 
 static const char unexpected_tag_expected13[] =
-    "var a=1;";
-
-static const char unexpected_tag_expected13_ext[] =
     "var a=1;var b=2;";
 
 static const char unexpected_tag_buf14[] =
@@ -1837,9 +1813,6 @@ static const char unexpected_tag_buf14[] =
     "var b = 2;\r\n";
 
 static const char unexpected_tag_expected14[] =
-    "var a=1;";
-
-static const char unexpected_tag_expected14_ext[] =
     "var a=1;var b=2;";
 
 static const char unexpected_tag_buf15[] =
@@ -1848,9 +1821,6 @@ static const char unexpected_tag_buf15[] =
     "var b = 2;\r\n";
 
 static const char unexpected_tag_expected15[] =
-    "var a=1;";
-
-static const char unexpected_tag_expected15_ext[] =
     "var a=1;var b=2;";
 
 static const char unexpected_tag_buf16[] =
@@ -1931,18 +1901,12 @@ static const char unexpected_tag_buf22[] =
 static const char unexpected_tag_expected22[] =
     "var a=1;var str='script somescript /script something';var b=2;";
 
-static const char unexpected_tag_expected22_ext[] =
-    "var a=1;var str='script somescript /script something';var b=2;";
-
 static const char unexpected_tag_buf23[] =
     "var a = 1;\n"
     "var str = 'script somescript /script something <script>';\n"
     "var b = 2;\r\n";
 
 static const char unexpected_tag_expected23[] =
-    "var a=1;var str='script somescript /script something ";
-
-static const char unexpected_tag_expected23_ext[] =
     "var a=1;var str='script somescript /script something <script>';var b=2;";
 
 static const char unexpected_tag_buf24[] =
@@ -1951,37 +1915,51 @@ static const char unexpected_tag_buf24[] =
     "var b = 2;\r\n";
 
 static const char unexpected_tag_expected24[] =
-    "var a=1;var str='something ";
-
-static const char unexpected_tag_expected24_ext[] =
     "var a=1;var str='something <sCrIpT>';var b=2;";
 
+static const char unexpected_tag_buf25[] =
+    "var template = ` <script> ` ;";
+
+static const char unexpected_tag_expected25[] =
+    "var template=` <script> `;";
+
+static const char unexpected_tag_buf26[] =
+    "var regex = / <script> / ;";
+
+static const char unexpected_tag_expected26[] =
+    "var regex=/ <script> /;";
+
 TEST_CASE("nested script tags", "[JSNormalizer]")
 {
     SECTION("explicit open tag - simple")
     {
         NORMALIZE(unexpected_tag_buf0);
-        VALIDATE_FAIL(unexpected_tag_buf0, unexpected_tag_expected0, JSTokenizer::OPENING_TAG, 19);
+        VALIDATE(unexpected_tag_buf0, unexpected_tag_expected0);
+        CHECK_OTAG(true);
     }
     SECTION("explicit open tag - complex")
     {
         NORMALIZE(unexpected_tag_buf1);
-        VALIDATE_FAIL(unexpected_tag_buf1, unexpected_tag_expected1, JSTokenizer::OPENING_TAG, 19);
+        VALIDATE(unexpected_tag_buf1, unexpected_tag_expected1);
+        CHECK_OTAG(true);
     }
     SECTION("open tag within literal - start")
     {
         NORMALIZE(unexpected_tag_buf2);
-        VALIDATE_FAIL(unexpected_tag_buf2, unexpected_tag_expected2, JSTokenizer::OPENING_TAG, 30);
+        VALIDATE(unexpected_tag_buf2, unexpected_tag_expected2);
+        CHECK_OTAG(true);
     }
     SECTION("open tag within literal - mid")
     {
         NORMALIZE(unexpected_tag_buf3);
-        VALIDATE_FAIL(unexpected_tag_buf3, unexpected_tag_expected3, JSTokenizer::OPENING_TAG, 40);
+        VALIDATE(unexpected_tag_buf3, unexpected_tag_expected3);
+        CHECK_OTAG(true);
     }
     SECTION("open tag within literal - end")
     {
         NORMALIZE(unexpected_tag_buf4);
-        VALIDATE_FAIL(unexpected_tag_buf4, unexpected_tag_expected4, JSTokenizer::OPENING_TAG, 40);
+        VALIDATE(unexpected_tag_buf4, unexpected_tag_expected4);
+        CHECK_OTAG(true);
     }
     SECTION("close tag within literal - start")
     {
@@ -2001,7 +1979,8 @@ TEST_CASE("nested script tags", "[JSNormalizer]")
     SECTION("open tag within literal - escaped")
     {
         NORMALIZE(unexpected_tag_buf8);
-        VALIDATE_FAIL(unexpected_tag_buf8, unexpected_tag_expected8, JSTokenizer::OPENING_TAG, 41);
+        VALIDATE(unexpected_tag_buf8, unexpected_tag_expected8);
+        CHECK_OTAG(true);
     }
     SECTION("close tag within literal - escaped")
     {
@@ -2011,32 +1990,38 @@ TEST_CASE("nested script tags", "[JSNormalizer]")
     SECTION("open tag within single-line comment - start")
     {
         NORMALIZE(unexpected_tag_buf10);
-        VALIDATE_FAIL(unexpected_tag_buf10, unexpected_tag_expected10, JSTokenizer::OPENING_TAG, 20);
+        VALIDATE(unexpected_tag_buf10, unexpected_tag_expected10);
+        CHECK_OTAG(true);
     }
     SECTION("open tag within single-line comment - mid")
     {
         NORMALIZE(unexpected_tag_buf11);
-        VALIDATE_FAIL(unexpected_tag_buf11, unexpected_tag_expected11, JSTokenizer::OPENING_TAG, 30);
+        VALIDATE(unexpected_tag_buf11, unexpected_tag_expected11);
+        CHECK_OTAG(true);
     }
     SECTION("open tag within single-line comment - end")
     {
         NORMALIZE(unexpected_tag_buf12);
-        VALIDATE_FAIL(unexpected_tag_buf12, unexpected_tag_expected12, JSTokenizer::OPENING_TAG, 30);
+        VALIDATE(unexpected_tag_buf12, unexpected_tag_expected12);
+        CHECK_OTAG(true);
     }
     SECTION("open tag within multi-line comment - start")
     {
         NORMALIZE(unexpected_tag_buf13);
-        VALIDATE_FAIL(unexpected_tag_buf13, unexpected_tag_expected13, JSTokenizer::OPENING_TAG, 20);
+        VALIDATE(unexpected_tag_buf13, unexpected_tag_expected13);
+        CHECK_OTAG(true);
     }
     SECTION("open tag within multi-line comment - mid")
     {
         NORMALIZE(unexpected_tag_buf14);
-        VALIDATE_FAIL(unexpected_tag_buf14, unexpected_tag_expected14, JSTokenizer::OPENING_TAG, 30);
+        VALIDATE(unexpected_tag_buf14, unexpected_tag_expected14);
+        CHECK_OTAG(true);
     }
     SECTION("open tag within multi-line comment - end")
     {
         NORMALIZE(unexpected_tag_buf15);
-        VALIDATE_FAIL(unexpected_tag_buf15, unexpected_tag_expected15, JSTokenizer::OPENING_TAG, 30);
+        VALIDATE(unexpected_tag_buf15, unexpected_tag_expected15);
+        CHECK_OTAG(true);
     }
     SECTION("close tag within single-line comment - start")
     {
@@ -2076,12 +2061,26 @@ TEST_CASE("nested script tags", "[JSNormalizer]")
     SECTION("multiple patterns - matched")
     {
         NORMALIZE(unexpected_tag_buf23);
-        VALIDATE_FAIL(unexpected_tag_buf23, unexpected_tag_expected23, JSTokenizer::OPENING_TAG, 66);
+        VALIDATE(unexpected_tag_buf23, unexpected_tag_expected23);
+        CHECK_OTAG(true);
     }
     SECTION("mixed lower and upper case")
     {
         NORMALIZE(unexpected_tag_buf24);
-        VALIDATE_FAIL(unexpected_tag_buf24, unexpected_tag_expected24, JSTokenizer::OPENING_TAG, 40);
+        VALIDATE(unexpected_tag_buf24, unexpected_tag_expected24);
+        CHECK_OTAG(true);
+    }
+    SECTION("opening tag within template literal")
+    {
+        NORMALIZE(unexpected_tag_buf25);
+        VALIDATE(unexpected_tag_buf25, unexpected_tag_expected25);
+        CHECK_OTAG(true);
+    }
+    SECTION("opening tag within regex literal")
+    {
+        NORMALIZE(unexpected_tag_buf26);
+        VALIDATE(unexpected_tag_buf26, unexpected_tag_expected26);
+        CHECK_OTAG(true);
     }
 }
 
@@ -2094,6 +2093,7 @@ TEST_CASE("opening tag sequence", "[JSNormalizer]")
 
         NORMALIZE(src);
         VALIDATE(src, exp);
+        CHECK_OTAG(false);
     }
     SECTION("valid 1")
     {
@@ -2102,6 +2102,7 @@ TEST_CASE("opening tag sequence", "[JSNormalizer]")
 
         NORMALIZE(src);
         VALIDATE(src, exp);
+        CHECK_OTAG(false);
     }
     SECTION("valid 2")
     {
@@ -2110,54 +2111,61 @@ TEST_CASE("opening tag sequence", "[JSNormalizer]")
 
         NORMALIZE(src);
         VALIDATE(src, exp);
+        CHECK_OTAG(false);
     }
     SECTION("tabulation")
     {
         const char src[] = "<script\x9";
-        const char exp[] = "";
+        const char exp[] = "<script";
 
         NORMALIZE(src);
-        VALIDATE_FAIL(src, exp, JSTokenizer::OPENING_TAG, 8);
+        VALIDATE(src, exp);
+        CHECK_OTAG(true);
     }
     SECTION("line feed")
     {
         const char src[] = "<script\xA";
-        const char exp[] = "";
+        const char exp[] = "<script";
 
         NORMALIZE(src);
-        VALIDATE_FAIL(src, exp, JSTokenizer::OPENING_TAG, 8);
+        VALIDATE(src, exp);
+        CHECK_OTAG(true);
     }
     SECTION("form feed")
     {
         const char src[] = "<script\xC";
-        const char exp[] = "";
+        const char exp[] = "<script";
 
         NORMALIZE(src);
-        VALIDATE_FAIL(src, exp, JSTokenizer::OPENING_TAG, 8);
+        VALIDATE(src, exp);
+        CHECK_OTAG(true);
     }
     SECTION("space")
     {
         const char src[] = "<script\x20";
-        const char exp[] = "";
+        const char exp[] = "<script";
 
         NORMALIZE(src);
-        VALIDATE_FAIL(src, exp, JSTokenizer::OPENING_TAG, 8);
+        VALIDATE(src, exp);
+        CHECK_OTAG(true);
     }
     SECTION("solidus")
     {
         const char src[] = "<script\x2F";
-        const char exp[] = "";
+        const char exp[] = "<script\x2F";
 
         NORMALIZE(src);
-        VALIDATE_FAIL(src, exp, JSTokenizer::OPENING_TAG, 8);
+        VALIDATE(src, exp);
+        CHECK_OTAG(true);
     }
     SECTION("greater than")
     {
         const char src[] = "<script\x3E";
-        const char exp[] = "";
+        const char exp[] = "<script\x3E";
 
         NORMALIZE(src);
-        VALIDATE_FAIL(src, exp, JSTokenizer::OPENING_TAG, 8);
+        VALIDATE(src, exp);
+        CHECK_OTAG(true);
     }
 }
 
@@ -2166,27 +2174,32 @@ TEST_CASE("nested script tags in an external script", "[JSNormalizer]")
     SECTION("explicit open tag - simple")
     {
         NORMALIZE_EXT(unexpected_tag_buf0);
-        VALIDATE_FAIL(unexpected_tag_buf0, unexpected_tag_expected0_ext, JSTokenizer::OPENING_TAG, 19);
+        VALIDATE(unexpected_tag_buf0, unexpected_tag_expected0);
+        CHECK_OTAG(false);
     }
     SECTION("explicit open tag - complex")
     {
         NORMALIZE_EXT(unexpected_tag_buf1);
-        VALIDATE_FAIL(unexpected_tag_buf1, unexpected_tag_expected1_ext, JSTokenizer::OPENING_TAG, 19);
+        VALIDATE(unexpected_tag_buf1, unexpected_tag_expected1);
+        CHECK_OTAG(false);
     }
     SECTION("open tag within literal - start")
     {
         NORMALIZE_EXT(unexpected_tag_buf2);
-        VALIDATE(unexpected_tag_buf2, unexpected_tag_expected2_ext);
+        VALIDATE(unexpected_tag_buf2, unexpected_tag_expected2);
+        CHECK_OTAG(false);
     }
     SECTION("open tag within literal - mid")
     {
         NORMALIZE_EXT(unexpected_tag_buf3);
-        VALIDATE(unexpected_tag_buf3, unexpected_tag_expected3_ext);
+        VALIDATE(unexpected_tag_buf3, unexpected_tag_expected3);
+        CHECK_OTAG(false);
     }
     SECTION("open tag within literal - end")
     {
         NORMALIZE_EXT(unexpected_tag_buf4);
-        VALIDATE(unexpected_tag_buf4, unexpected_tag_expected4_ext);
+        VALIDATE(unexpected_tag_buf4, unexpected_tag_expected4);
+        CHECK_OTAG(false);
     }
     SECTION("close tag within literal - start")
     {
@@ -2206,7 +2219,8 @@ TEST_CASE("nested script tags in an external script", "[JSNormalizer]")
     SECTION("open tag within literal - escaped")
     {
         NORMALIZE_EXT(unexpected_tag_buf8);
-        VALIDATE(unexpected_tag_buf8, unexpected_tag_expected8_ext);
+        VALIDATE(unexpected_tag_buf8, unexpected_tag_expected8);
+        CHECK_OTAG(false);
     }
     SECTION("close tag within literal - escaped")
     {
@@ -2216,32 +2230,38 @@ TEST_CASE("nested script tags in an external script", "[JSNormalizer]")
     SECTION("open tag within single-line comment - start")
     {
         NORMALIZE_EXT(unexpected_tag_buf10);
-        VALIDATE(unexpected_tag_buf10, unexpected_tag_expected10_ext);
+        VALIDATE(unexpected_tag_buf10, unexpected_tag_expected10);
+        CHECK_OTAG(false);
     }
     SECTION("open tag within single-line comment - mid")
     {
         NORMALIZE_EXT(unexpected_tag_buf11);
-        VALIDATE(unexpected_tag_buf11, unexpected_tag_expected11_ext);
+        VALIDATE(unexpected_tag_buf11, unexpected_tag_expected11);
+        CHECK_OTAG(false);
     }
     SECTION("open tag within single-line comment - end")
     {
         NORMALIZE_EXT(unexpected_tag_buf12);
-        VALIDATE(unexpected_tag_buf12, unexpected_tag_expected12_ext);
+        VALIDATE(unexpected_tag_buf12, unexpected_tag_expected12);
+        CHECK_OTAG(false);
     }
     SECTION("open tag within multi-line comment - start")
     {
         NORMALIZE_EXT(unexpected_tag_buf13);
-        VALIDATE(unexpected_tag_buf13, unexpected_tag_expected13_ext);
+        VALIDATE(unexpected_tag_buf13, unexpected_tag_expected13);
+        CHECK_OTAG(false);
     }
     SECTION("open tag within multi-line comment - mid")
     {
         NORMALIZE_EXT(unexpected_tag_buf14);
-        VALIDATE(unexpected_tag_buf14, unexpected_tag_expected14_ext);
+        VALIDATE(unexpected_tag_buf14, unexpected_tag_expected14);
+        CHECK_OTAG(false);
     }
     SECTION("open tag within multi-line comment - end")
     {
         NORMALIZE_EXT(unexpected_tag_buf15);
-        VALIDATE(unexpected_tag_buf15, unexpected_tag_expected15_ext);
+        VALIDATE(unexpected_tag_buf15, unexpected_tag_expected15);
+        CHECK_OTAG(false);
     }
     SECTION("close tag within single-line comment - start")
     {
@@ -2276,17 +2296,31 @@ TEST_CASE("nested script tags in an external script", "[JSNormalizer]")
     SECTION("multiple patterns - not matched")
     {
         NORMALIZE_EXT(unexpected_tag_buf22);
-        VALIDATE(unexpected_tag_buf22, unexpected_tag_expected22_ext);
+        VALIDATE(unexpected_tag_buf22, unexpected_tag_expected22);
     }
     SECTION("multiple patterns - matched")
     {
         NORMALIZE_EXT(unexpected_tag_buf23);
-        VALIDATE(unexpected_tag_buf23, unexpected_tag_expected23_ext);
+        VALIDATE(unexpected_tag_buf23, unexpected_tag_expected23);
+        CHECK_OTAG(false);
     }
     SECTION("mixed lower and upper case")
     {
         NORMALIZE_EXT(unexpected_tag_buf24);
-        VALIDATE(unexpected_tag_buf24, unexpected_tag_expected24_ext);
+        VALIDATE(unexpected_tag_buf24, unexpected_tag_expected24);
+        CHECK_OTAG(false);
+    }
+    SECTION("opening tag within template literal")
+    {
+        NORMALIZE_EXT(unexpected_tag_buf25);
+        VALIDATE(unexpected_tag_buf25, unexpected_tag_expected25);
+        CHECK_OTAG(false);
+    }
+    SECTION("opening tag within regex literal")
+    {
+        NORMALIZE_EXT(unexpected_tag_buf26);
+        VALIDATE(unexpected_tag_buf26, unexpected_tag_expected26);
+        CHECK_OTAG(false);
     }
 }
 
@@ -2338,6 +2372,94 @@ TEST_CASE("split between tokens", "[JSNormalizer]")
         NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
         NORM_COMBINED_3(dat1, dat2, dat3, exp);
     }
+    SECTION("complete open tag - identifier")
+    {
+        const char dat1[] = " ( a <script> ";
+        const char dat2[] = " b ) ";
+        const char exp1[] = "(a<script>";
+        const char exp2[] = "b)";
+        const char exp[] = "(a<script>b)";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORM_COMBINED_2(dat1, dat2, exp);
+    }
+    SECTION("incomplete open tag - identifier")
+    {
+        const char dat1[] = " <script  ";
+        const char dat2[] = " a ; ";
+        const char exp1[] = "<script";
+        const char exp2[] = " a;";
+        const char exp[] = "<script a;";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORM_COMBINED_2(dat1, dat2, exp);
+    }
+    SECTION("incomplete open tag - operator")
+    {
+        const char dat1[] = " ( a <script  ";
+        const char dat2[] = " ) ";
+        const char exp1[] = "(a<script";
+        const char exp2[] = ")";
+        const char exp[] = "(a<script)";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORM_COMBINED_2(dat1, dat2, exp);
+    }
+    SECTION("identifier - complete open tag")
+    {
+        const char dat1[] = " ( a  ";
+        const char dat2[] = " <script> b ) ";
+        const char exp1[] = "(a";
+        const char exp2[] = "<script>b)";
+        const char exp[] = "(a<script>b)";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORM_COMBINED_2(dat1, dat2, exp);
+    }
+    SECTION("identifier - incomplete open tag")
+    {
+        const char dat1[] = " ( a  ";
+        const char dat2[] = " <script  ) ";
+        const char exp1[] = "(a";
+        const char exp2[] = "<script)";
+        const char exp[] = "(a<script)";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORM_COMBINED_2(dat1, dat2, exp);
+    }
+    SECTION("operator - incomplete open tag")
+    {
+        const char dat1[] = " ( a  < ";
+        const char dat2[] = " <script ) ";
+        const char exp1[] = "(a<";
+        const char exp2[] = "<script)";
+        const char exp[] = "(a<<script)";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORM_COMBINED_2(dat1, dat2, exp);
+    }
+    SECTION("identifier - incomplete open tag as a comparison")
+    {
+        const char dat1[] = " ( a  < ";
+        const char dat2[] = " script ) ";
+        const char exp1[] = "(a<";
+        const char exp2[] = "script)";
+        const char exp[] = "(a<script)";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORM_COMBINED_2(dat1, dat2, exp);
+    }
+    SECTION("identifier - complete open tag as a comparison")
+    {
+        const char dat1[] = " ( a  <";
+        const char dat2[] = "script ) ";
+        const char exp1[] = "(a<";
+        const char exp2[] = "<script)";
+        const char exp[] = "(a<script)";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORM_COMBINED_2(dat1, dat2, exp);
+    }
 }
 
 TEST_CASE("split in comments", "[JSNormalizer]")
@@ -2489,33 +2611,33 @@ TEST_CASE("split in opening tag", "[JSNormalizer]")
         const char dat1[] = "<";
         const char dat2[] = "script>";
         const char exp1[] = "<";
-        const char exp2[] = "";
-        const char exp[] = "";
+        const char exp2[] = "<script>";
+        const char exp[] = "<script>";
 
-        NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::OPENING_TAG);
-        NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::OPENING_TAG);
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORM_COMBINED_2(dat1, dat2, exp);
     }
     SECTION("str='<s cript'")
     {
         const char dat1[] = "var str ='<s";
         const char dat2[] = "cript>';";
         const char exp1[] = "var str='<s";
-        const char exp2[] = "";
-        const char exp[]  = "var str='";
+        const char exp2[] = "<script>';";
+        const char exp[]  = "var str='<script>';";
 
-        NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::OPENING_TAG);
-        NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::OPENING_TAG);
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORM_COMBINED_2(dat1, dat2, exp);
     }
     SECTION("str='<scrip t'")
     {
         const char dat1[] = "var str ='<scrip";
         const char dat2[] = "t>';";
         const char exp1[] = "var str='<scrip";
-        const char exp2[] = "";
-        const char exp[] = "var str='";
+        const char exp2[] = "<script>';";
+        const char exp[] = "var str='<script>';";
 
-        NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::OPENING_TAG);
-        NORM_COMBINED_BAD_2(dat1, dat2, exp, JSTokenizer::OPENING_TAG);
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORM_COMBINED_2(dat1, dat2, exp);
     }
     SECTION("< scr ipt")
     {
@@ -2524,11 +2646,11 @@ TEST_CASE("split in opening tag", "[JSNormalizer]")
         const char dat3[] = "ipt>";
         const char exp1[] = "<";
         const char exp2[] = "scr";
-        const char exp3[] = "";
-        const char exp[] = "";
+        const char exp3[] = "<script>";
+        const char exp[] = "<script>";
 
-        NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::OPENING_TAG);
-        NORM_COMBINED_BAD_3(dat1, dat2, dat3, exp, JSTokenizer::OPENING_TAG);
+        NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
+        NORM_COMBINED_3(dat1, dat2, dat3, exp);
     }
     SECTION("str='<sc rip t'")
     {
@@ -2537,11 +2659,11 @@ TEST_CASE("split in opening tag", "[JSNormalizer]")
         const char dat3[] = "t>\";";
         const char exp1[] = "var str=\"<sc";
         const char exp2[] = "scrip";
-        const char exp3[] = "";
-        const char exp[] = "var str=\"";
+        const char exp3[] = "<script>\";";
+        const char exp[] = "var str=\"<script>\";";
 
-        NORM_BAD_3(dat1, dat2, dat3, exp1, exp2, exp3, JSTokenizer::OPENING_TAG);
-        NORM_COMBINED_BAD_3(dat1, dat2, dat3, exp, JSTokenizer::OPENING_TAG);
+        NORMALIZE_3(dat1, dat2, dat3, exp1, exp2, exp3);
+        NORM_COMBINED_3(dat1, dat2, dat3, exp);
     }
 }