From: Mike Stepanek (mstepane) <mstepane@cisco.com>
Date: Wed, 21 Apr 2021 17:39:43 +0000 (+0000)
Subject: Merge pull request #2848 in SNORT/snort3 from ~OSERHIIE/snort3:js_inline_scripts... 
X-Git-Tag: 3.1.4.0~1
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e4ddccd8b6dde2ef2b3c436fbe55158a4d58f78c;p=thirdparty%2Fsnort3.git

Merge pull request #2848 in SNORT/snort3 from ~OSERHIIE/snort3:js_inline_scripts to master

Squashed commit of the following:

commit 8d130d92807ecc480c3832e7e85697883bf1ae42
Author: Oleksandr Serhiienko <oserhiie@cisco.com>
Date:   Fri Apr 16 12:48:49 2021 +0300

    http_inspect: add js_inline_scripts peg count

commit 07beb04a28389e09bc0e77f672e86f58e5ef4194
Author: Oleksandr Serhiienko <oserhiie@cisco.com>
Date:   Fri Apr 16 10:31:01 2021 +0300

    http_inspect: identify external javascripts

commit b4e77cf2d524ecc076eb6007d9e1f4743b2852e4
Author: Oleksandr Serhiienko <oserhiie@cisco.com>
Date:   Fri Apr 16 01:34:30 2021 +0300

    http_inspect: add built-in alert for unexpected tags within inline javascript
---

diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h
index 0002e4bbf..d3cf7817b 100755
--- a/src/service_inspectors/http_inspect/http_enum.h
+++ b/src/service_inspectors/http_inspect/http_enum.h
@@ -62,7 +62,7 @@ enum PEG_COUNT { PEG_FLOW = 0, PEG_SCAN, PEG_REASSEMBLE, PEG_INSPECT, PEG_REQUES
     PEG_OTHER_METHOD, PEG_REQUEST_BODY, PEG_CHUNKED, PEG_URI_NORM, PEG_URI_PATH, PEG_URI_CODING,
     PEG_CONCURRENT_SESSIONS, PEG_MAX_CONCURRENT_SESSIONS, PEG_SCRIPT_DETECTION,
     PEG_PARTIAL_INSPECT, PEG_EXCESS_PARAMS, PEG_PARAMS, PEG_CUTOVERS, PEG_SSL_SEARCH_ABND_EARLY,
-    PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_COUNT_MAX };
+    PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_JS_INLINE, PEG_COUNT_MAX };
 
 // Result of scanning by splitter
 enum ScanResult { SCAN_NOT_FOUND, SCAN_NOT_FOUND_ACCELERATE, SCAN_FOUND, SCAN_FOUND_PIECE,
@@ -264,6 +264,7 @@ enum Infraction
     INF_MULTIPLE_HOST_HDRS,
     INF_HTTP2_SETTINGS,
     INF_UPGRADE_HEADER_HTTP2,
+    INF_JS_UNEXPECTED_TAG,
     INF__MAX_VALUE
 };
 
@@ -322,6 +323,7 @@ enum EventSid
     EVENT_PDF_UNSUP_COMP_TYPE = 115,
     EVENT_PDF_CASC_COMP = 116,
     EVENT_PDF_PARSE_FAILURE = 117,
+    EVENT_JS_UNEXPECTED_TAG = 118,
 
     EVENT_LOSS_OF_SYNC = 201,
     EVENT_CHUNK_ZEROS = 202,
diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc
index 2ce7fb0f4..8aad96222 100644
--- a/src/service_inspectors/http_inspect/http_js_norm.cc
+++ b/src/service_inspectors/http_inspect/http_js_norm.cc
@@ -23,6 +23,7 @@
 
 #include "http_js_norm.h"
 
+#include "utils/js_norm_state.h"
 #include "utils/js_normalizer.h"
 #include "utils/safec.h"
 #include "utils/util_jsnorm.h"
@@ -40,6 +41,7 @@ HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_) :
 HttpJsNorm::~HttpJsNorm()
 {
     delete javascript_search_mpse;
+    delete js_src_attr_search_mpse;
     delete htmltype_search_mpse;
 }
 
@@ -49,11 +51,15 @@ void HttpJsNorm::configure()
         return;
 
     javascript_search_mpse = new SearchTool;
+    js_src_attr_search_mpse = new SearchTool;
     htmltype_search_mpse = new SearchTool;
 
     javascript_search_mpse->add(script_start, script_start_length, JS_JAVASCRIPT);
     javascript_search_mpse->prep();
 
+    js_src_attr_search_mpse->add(script_src_attr, script_src_attr_length, JS_ATTR_SRC);
+    js_src_attr_search_mpse->prep();
+
     struct HiSearchToken
     {
         const char* name;
@@ -78,8 +84,8 @@ void HttpJsNorm::configure()
     configure_once = true;
 }
 
-void HttpJsNorm::enhanced_normalize(const Field& input, Field& output,
-    int64_t js_normalization_depth) const
+void HttpJsNorm::enhanced_normalize(const Field& input, Field& output, HttpInfractions* infractions,
+    HttpEventGen* events, int64_t js_normalization_depth) const
 {
     bool js_present = false;
     int index = 0;
@@ -88,6 +94,10 @@ void HttpJsNorm::enhanced_normalize(const Field& input, Field& output,
 
     uint8_t* buffer = new uint8_t[input.length()];
 
+    JSNormState state;
+    state.norm_depth = js_normalization_depth;
+    state.alerts = 0;
+
     while (ptr < end)
     {
         int bytes_copied = 0;
@@ -103,12 +113,15 @@ void HttpJsNorm::enhanced_normalize(const Field& input, Field& output,
                 break;
 
             bool type_js = false;
+            bool external_js = false;
             if (angle_bracket > js_start)
             {
                 int mid;
                 const int script_found = htmltype_search_mpse->find(
                     js_start, (angle_bracket-js_start), search_html_found, false, &mid);
 
+                external_js = is_external_script(js_start, angle_bracket);
+
                 js_start = angle_bracket + 1;
                 if (script_found > 0)
                 {
@@ -138,11 +151,13 @@ void HttpJsNorm::enhanced_normalize(const Field& input, Field& output,
             }
 
             ptr = js_start;
-            if (!type_js)
+            if (!type_js or external_js)
                 continue;
 
             JSNormalizer::normalize(js_start, (uint16_t)(end-js_start), (char*)buffer+index,
-                (uint16_t)(input.length() - index), &ptr, &bytes_copied, js_normalization_depth);
+                (uint16_t)(input.length() - index), &ptr, &bytes_copied, state);
+
+            HttpModule::increment_peg_counts(PEG_JS_INLINE);
 
             index += bytes_copied;
         }
@@ -151,7 +166,14 @@ void HttpJsNorm::enhanced_normalize(const Field& input, Field& output,
     }
 
     if (js_present)
+    {
+        if (state.alerts & ALERT_UNEXPECTED_TAG)
+        {
+            *infractions += INF_JS_UNEXPECTED_TAG;
+            events->create_event(EVENT_JS_UNEXPECTED_TAG);
+        }
         output.set(index, buffer, true);
+    }
     else
         delete[] buffer;
 }
@@ -276,9 +298,36 @@ int HttpJsNorm::search_js_found(void*, void*, int index, void* index_ptr, void*)
     *((int*) index_ptr) = index - script_start_length;
     return 1;
 }
+int HttpJsNorm::search_js_src_attr_found(void*, void*, int index, void* index_ptr, void*)
+{
+    *((int*) index_ptr) = index - script_src_attr_length;
+    return 1;
+}
 int HttpJsNorm::search_html_found(void* id, void*, int, void* id_ptr, void*)
 {
     *((int*) id_ptr)  = (int)(uintptr_t)id;
     return 1;
 }
 
+bool HttpJsNorm::is_external_script(const char* it, const char* script_tag_end) const
+{
+    int src_pos;
+
+    while (js_src_attr_search_mpse->find(it, (script_tag_end - it),
+        search_js_src_attr_found, false, &src_pos))
+    {
+        it += (src_pos + script_src_attr_length - 1);
+        while (++it < script_tag_end)
+        {
+            if (*it == ' ')
+                continue;
+            else if (*it == '=')
+                return true;
+            else
+                break;
+        }
+    }
+
+    return false;
+}
+
diff --git a/src/service_inspectors/http_inspect/http_js_norm.h b/src/service_inspectors/http_inspect/http_js_norm.h
index 5f083349a..f48ec40d4 100644
--- a/src/service_inspectors/http_inspect/http_js_norm.h
+++ b/src/service_inspectors/http_inspect/http_js_norm.h
@@ -39,26 +39,33 @@ public:
     ~HttpJsNorm();
     void legacy_normalize(const Field& input, Field& output, HttpInfractions* infractions,
         HttpEventGen* events, int max_javascript_whitespaces) const;
-    void enhanced_normalize(const Field& input, Field& output,
-        int64_t js_normalization_depth) const;
+    void enhanced_normalize(const Field& input, Field& output, HttpInfractions* infractions,
+        HttpEventGen* events, int64_t js_normalization_depth) const;
 
     void configure();
 private:
     bool configure_once = false;
 
     enum JsSearchId { JS_JAVASCRIPT };
+    enum JsSrcAttrSearchId { JS_ATTR_SRC };
     enum HtmlSearchId { HTML_JS, HTML_EMA, HTML_VB };
 
     static constexpr const char* script_start = "<SCRIPT";
     static constexpr int script_start_length = sizeof("<SCRIPT") - 1;
+    static constexpr const char* script_src_attr = "SRC";
+    static constexpr int script_src_attr_length = sizeof("SRC") - 1;
 
     const HttpParaList::UriParam& uri_param;
 
     snort::SearchTool* javascript_search_mpse;
+    snort::SearchTool* js_src_attr_search_mpse;
     snort::SearchTool* htmltype_search_mpse;
 
     static int search_js_found(void*, void*, int index, void*, void*);
+    static int search_js_src_attr_found(void*, void*, int index, void*, void*);
     static int search_html_found(void* id, void*, int, void*, void*);
+
+    bool is_external_script(const char* it, const char* script_tag_end) const;
 };
 
 #endif
diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc
index 623384b2a..3b3a4000f 100644
--- a/src/service_inspectors/http_inspect/http_msg_body.cc
+++ b/src/service_inspectors/http_inspect/http_msg_body.cc
@@ -290,6 +290,7 @@ void HttpMsgBody::do_js_normalization(const Field& input, Field& output)
         output.set(input);
 
         params->js_norm_param.js_norm->enhanced_normalize(input, enhanced_js_norm_body,
+            transaction->get_infractions(source_id), session_data->events[source_id],
             params->js_norm_param.js_normalization_depth);
 
         const int32_t norm_length =
diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc
index 022b13815..ad7f3c869 100755
--- a/src/service_inspectors/http_inspect/http_tables.cc
+++ b/src/service_inspectors/http_inspect/http_tables.cc
@@ -357,6 +357,7 @@ const RuleMap HttpModule::http_events[] =
     { EVENT_PDF_UNSUP_COMP_TYPE,        "PDF file unsupported compression type" },
     { EVENT_PDF_CASC_COMP,              "PDF file cascaded compression" },
     { EVENT_PDF_PARSE_FAILURE,          "PDF file parse failure" },
+    { EVENT_JS_UNEXPECTED_TAG,          "unexpected script tag within inline javascript" },
     { EVENT_LOSS_OF_SYNC,               "not HTTP traffic" },
     { EVENT_CHUNK_ZEROS,                "chunk length has excessive leading zeros" },
     { EVENT_WS_BETWEEN_MSGS,            "white space before or between messages" },
@@ -463,6 +464,7 @@ const PegInfo HttpModule::peg_names[PEG_COUNT_MAX+1] =
     { CountType::SUM, "pipelined_flows", "total HTTP connections containing pipelined requests" },
     { CountType::SUM, "pipelined_requests", "total requests placed in a pipeline" },
     { CountType::SUM, "total_bytes", "total HTTP data bytes inspected" },
+    { CountType::SUM, "js_inline_scripts", "total number of inline JavaScripts processed" },
     { CountType::END, nullptr, nullptr }
 };
 
diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt
index 38fc2ddce..d42b18936 100644
--- a/src/utils/CMakeLists.txt
+++ b/src/utils/CMakeLists.txt
@@ -32,6 +32,7 @@ add_library ( utils OBJECT
     dnet_header.h
     dyn_array.cc
     dyn_array.h
+    js_norm_state.h
     js_normalizer.cc
     js_normalizer.h
     js_tokenizer.h
diff --git a/src/utils/js_norm_state.h b/src/utils/js_norm_state.h
new file mode 100644
index 000000000..764edb3d3
--- /dev/null
+++ b/src/utils/js_norm_state.h
@@ -0,0 +1,37 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_norm_state.h author Oleksandr Serhiienko <oserhiie@cisco.com>
+
+#ifndef JS_NORM_STATE_H
+#define JS_NORM_STATE_H
+
+#include "main/snort_types.h"
+
+namespace snort
+{
+#define ALERT_UNEXPECTED_TAG 0x1
+
+struct JSNormState
+{
+    int64_t norm_depth;
+    uint16_t alerts;
+};
+}
+
+#endif // JS_NORM_STATE_H
+
diff --git a/src/utils/js_normalizer.cc b/src/utils/js_normalizer.cc
index 1c41eaddc..a5868fe05 100644
--- a/src/utils/js_normalizer.cc
+++ b/src/utils/js_normalizer.cc
@@ -30,13 +30,13 @@
 using namespace snort;
 
 int JSNormalizer::normalize(const char* srcbuf, uint16_t srclen, char* dstbuf, uint16_t dstlen,
-        const char** ptr, int* bytes_copied, int64_t norm_depth)
+        const char** ptr, int* bytes_copied, JSNormState& state)
 {
     std::stringstream in, out;
+    in.rdbuf()->pubsetbuf(const_cast<char*>(srcbuf),
+        (state.norm_depth >= srclen) ? srclen : state.norm_depth);
 
-    in.rdbuf()->pubsetbuf(const_cast<char*>(srcbuf), (norm_depth >= srclen) ? srclen : norm_depth);
-    JSTokenizer tokenizer(in, out, dstbuf, dstlen, ptr, bytes_copied);
-
+    JSTokenizer tokenizer(in, out, dstbuf, dstlen, ptr, bytes_copied, state);
     return tokenizer.yylex();
 }
 
diff --git a/src/utils/js_normalizer.h b/src/utils/js_normalizer.h
index 9152e2142..2e562bb1b 100644
--- a/src/utils/js_normalizer.h
+++ b/src/utils/js_normalizer.h
@@ -22,13 +22,15 @@
 
 #include "main/snort_types.h"
 
+#include "js_norm_state.h"
+
 namespace snort
 {
 class JSNormalizer
 {
 public:
     static int normalize(const char* srcbuf, uint16_t srclen, char* dstbuf, uint16_t dstlen,
-        const char** ptr, int* bytes_copied, int64_t norm_depth);
+        const char** ptr, int* bytes_copied, JSNormState& state);
 };
 }
 
diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h
index 892fdc425..2e284ef44 100644
--- a/src/utils/js_tokenizer.h
+++ b/src/utils/js_tokenizer.h
@@ -24,6 +24,8 @@
 
 #include "log/messages.h"
 
+#include "js_norm_state.h"
+
 class JSTokenizer : public yyFlexLexer
 {
 private:
@@ -35,14 +37,13 @@ private:
         PUNCTUATOR,
         OPERATOR,
         LITERAL,
-        DIRECTIVE,
-        TAG_SCRIPT_OPEN
+        DIRECTIVE
     };
 
 public:
     // we need an out stream because yyFlexLexer API strongly requires that
     JSTokenizer(std::stringstream& in, std::stringstream& out, char* dstbuf,
-        const uint16_t dstlen, const char** ptr, int* bytes_copied);
+        const uint16_t dstlen, const char** ptr, int* bytes_copied, snort::JSNormState& state);
     ~JSTokenizer() override;
 
     // so, Flex will treat this class as yyclass
@@ -66,11 +67,11 @@ private:
     bool eval_string_literal(const char* match_prefix, const char quotes);
     bool eval_regex_literal(const char* match_prefix);
     bool eval_eof();
-    void skip_single_line_comment();
-    void skip_multi_line_comment();
+    bool eval_single_line_comment();
+    bool eval_multi_line_comment();
 
     bool parse_literal(const std::string& match_prefix, const char sentinel_ch,
-        std::string& result, bool is_regex = false);
+        std::string& result, bool& is_alert, bool is_regex = false);
 
     // main lexeme handler
     // all scanned tokens must pass here
@@ -80,7 +81,6 @@ private:
     bool normalize_punctuator(const JSToken prev_tok, const char* lexeme);
     bool normalize_operator(const JSToken prev_tok, const char* lexeme);
     bool normalize_directive(const JSToken prev_tok, const char* lexeme);
-    bool normalize_tag_script_open(const JSToken prev_tok, const char* lexeme);
     bool normalize_undefined(const JSToken prev_tok, const char* lexeme);
     bool normalize_lexeme(const JSToken prev_tok, const char* lexeme);
 
@@ -100,6 +100,8 @@ private:
 
     JSToken prev_tok = UNDEFINED;
 
+    snort::JSNormState& state;
+
 };
 
 #endif // JS_TOKENIZER_H
diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l
index af06087af..84e5ef6ea 100644
--- a/src/utils/js_tokenizer.l
+++ b/src/utils/js_tokenizer.l
@@ -31,6 +31,10 @@
     #endif
 
     #include "utils/js_tokenizer.h"
+
+    #include <cassert>
+
+    #include "utils/util_cstring.h"
 %}
 
 /* The following grammar was created based on ECMAScript specification */
@@ -869,9 +873,9 @@ LITERAL_INFINITY              Infinity|\xE2\x88\x9E
 LITERAL_NAN                   NaN
 LITERAL                       {LITERAL_NULL}|{LITERAL_BOOLEAN}|{LITERAL_DECIMAL}|{LITERAL_HEX_INTEGER}|{LITERAL_UNDEFINED}|{LITERAL_INFINITY}|{LITERAL_NAN}
 
-HTML_COMMENT_OPEN    <!--
-TAG_SCRIPT_OPEN      (?i:<script)
-TAG_SCRIPT_CLOSE     (?i:<\/script>)
+HTML_COMMENT_OPEN         <!--
+HTML_TAG_SCRIPT_OPEN      (?i:<script)
+HTML_TAG_SCRIPT_CLOSE     (?i:<\/script>)
 
 /* from 0x000 to 0x10FFFD to match undefined tokens */
 /* UTF-8 ranges generated with https://lists.gnu.org/archive/html/help-flex/2005-01/msg00043.html */
@@ -889,11 +893,11 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <*>{WHITESPACES}                                        { /* skip */ }
 <*>{CHAR_ESCAPE_SEQUENCES}                              { /* skip */ }
 <*>{LINE_TERMINATORS}                                   { BEGIN(regex); }
-<*>{TAG_SCRIPT_OPEN}                                    { if ( !eval(TAG_SCRIPT_OPEN, YYText()) ) { update_ptr(); return 1; } }
-<*>{TAG_SCRIPT_CLOSE}                                   { update_ptr(); *ptr -= YYLeng(); return 0; }
-<*>{HTML_COMMENT_OPEN}                                  { skip_single_line_comment(); }
-<*>{SINGLE_LINE_COMMENT}                                { skip_single_line_comment(); }
-<*>{MULTI_LINE_COMMENT}                                 { skip_multi_line_comment(); }
+<*>{HTML_TAG_SCRIPT_OPEN}                               { state.alerts |= ALERT_UNEXPECTED_TAG; update_ptr(); return 1; }
+<*>{HTML_TAG_SCRIPT_CLOSE}                              { update_ptr(); *ptr -= YYLeng(); return 0; }
+<*>{HTML_COMMENT_OPEN}                                  { if ( !eval_single_line_comment() ) { update_ptr(); return 1; } }
+<*>{SINGLE_LINE_COMMENT}                                { if ( !eval_single_line_comment() ) { update_ptr(); return 1; } }
+<*>{MULTI_LINE_COMMENT}                                 { if ( !eval_multi_line_comment() ) { update_ptr(); return 1; } }
 <*>{USE_STRICT_DIRECTIVE}                               { if ( !eval(DIRECTIVE, YYText()) ) { update_ptr(); return 1; } }
 <*>{KEYWORD}                                            { if ( !eval(KEYWORD, YYText()) ) { update_ptr(); return 1; } BEGIN(regex); }
 <*>{CLOSING_BRACES}                                     { if ( !eval(PUNCTUATOR, YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
@@ -909,8 +913,6 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <<EOF>>                                                 { if ( eval_eof() ) { update_ptr(); return 0; } }
 %%
 
-#include <cassert>
-
 // static helper functions
 
 static std::string unicode_to_utf8(const unsigned int code)
@@ -985,6 +987,40 @@ static std::string unescape_unicode(const char* lexeme)
     return res;
 }
 
+static bool contains_script_tags(const std::string& str)
+{
+    static constexpr const char* script = "SCRIPT";
+    static constexpr const int script_len = sizeof("SCRIPT") - 1;
+
+    const char* start = str.c_str();
+    const char* end = start + str.size();
+    const char* it = start;
+
+    while ( it )
+    {
+        it = snort::SnortStrcasestr(it, (end - it), script);
+        if ( it )
+        {
+            int d = it - start;
+            if ( d == 1 )
+            {
+                if ( *(it - 1) == '<' )
+                    return true;
+            }
+            else if ( d >= 2 )
+            {
+                if ( (*(it - 1) == '/' and *(it - 2) == '<') or
+                    (*(it - 1) == '<' and *(it - 2) != '\\') )
+                {
+                    return true;
+                }
+            }
+            it += script_len;
+        }
+    }
+    return false;
+}
+
 // JSTokenizer members
 
 struct JSTokenizer::ScanBuffers
@@ -994,12 +1030,13 @@ struct JSTokenizer::ScanBuffers
 };
 
 JSTokenizer::JSTokenizer(std::stringstream& in, std::stringstream& out, char* dstbuf,
-    uint16_t dstlen, const char** ptr, int* bytes_copied)
+    uint16_t dstlen, const char** ptr, int* bytes_copied, snort::JSNormState& state)
     : yyFlexLexer(in, out),
       dstbuf(dstbuf),
       dstlen(dstlen),
       ptr(ptr),
-      bytes_copied(bytes_copied)
+      bytes_copied(bytes_copied),
+      state(state)
 {
     assert(bytes_copied);
     init();
@@ -1049,7 +1086,11 @@ bool JSTokenizer::eval_identifier(const char* lexeme)
 bool JSTokenizer::eval_string_literal(const char* match_prefix, const char quotes)
 {
     std::string s;
-    bool is_ok = parse_literal(match_prefix, quotes, s);
+    bool is_alert = false;
+    bool is_ok = parse_literal(match_prefix, quotes, s, is_alert);
+
+    if ( is_alert )
+        return false;
 
     return eval(is_ok ? LITERAL : UNDEFINED, s.c_str());
 }
@@ -1059,7 +1100,11 @@ bool JSTokenizer::eval_regex_literal(const char* match_prefix)
     static const std::string regex_flags = "gimsuy";
 
     std::string s;
-    bool is_ok = parse_literal(match_prefix, '/', s, true);
+    bool is_alert = false;
+    bool is_ok = parse_literal(match_prefix, '/', s, is_alert, true);
+
+    if ( is_alert )
+        return false;
 
     // append regex flags
     char c;
@@ -1095,23 +1140,35 @@ bool JSTokenizer::eval_eof()
     return true;
 }
 
-void JSTokenizer::skip_single_line_comment()
+bool JSTokenizer::eval_single_line_comment()
 {
     char c;
+    std::string result;
 
     while ( (c = yyinput()) != 0 )
     {
+        result += c;
         if ( c == '\n' )
             break;
     }
+
+    if ( contains_script_tags(result) )
+    {
+        state.alerts |= ALERT_UNEXPECTED_TAG;
+        return false;
+    }
+    else
+        return true;
 }
 
-void JSTokenizer::skip_multi_line_comment()
+bool JSTokenizer::eval_multi_line_comment()
 {
     char c;
+    std::string result;
 
     while ( (c = yyinput()) != 0 )
     {
+        result += c;
         if ( c == '*' )
         {
             if ( (c = yyinput()) == '/' )
@@ -1120,6 +1177,14 @@ void JSTokenizer::skip_multi_line_comment()
                 unput(c);
         }
     }
+
+    if ( contains_script_tags(result) )
+    {
+        state.alerts |= ALERT_UNEXPECTED_TAG;
+        return false;
+    }
+    else
+        return true;
 }
 
 // Unicode line terminators
@@ -1133,7 +1198,7 @@ void JSTokenizer::skip_multi_line_comment()
 // Call this method when lexer meets those literals
 // match_prefix is a lexeme part already matched by the lexer (with sentinel char)
 bool JSTokenizer::parse_literal(const std::string& match_prefix, const char sentinel_ch,
-    std::string& result, bool is_regex)
+    std::string& result, bool& is_alert, bool is_regex)
 {
     bool is_ok = true;
     char c;
@@ -1206,6 +1271,12 @@ bool JSTokenizer::parse_literal(const std::string& match_prefix, const char sent
     if ( result.find(LS) != std::string::npos or result.find(PS) != std::string::npos )
         is_ok = false;
 
+    if ( contains_script_tags(result) )
+    {
+        is_alert = true;
+        state.alerts |= ALERT_UNEXPECTED_TAG;
+    }
+
     return is_ok;
 }
 
@@ -1239,10 +1310,6 @@ bool JSTokenizer::eval(const JSToken tok, const char* lexeme)
         ret = normalize_directive(prev_tok, lexeme);
     break;
 
-    case TAG_SCRIPT_OPEN:
-        ret = normalize_tag_script_open(prev_tok, lexeme);
-    break;
-
     case UNDEFINED:
         ret = normalize_undefined(prev_tok, lexeme);
     break;
@@ -1276,7 +1343,6 @@ bool JSTokenizer::normalize_operator(const JSToken prev_tok, const char* lexeme)
     case PUNCTUATOR:
     case LITERAL:
     case DIRECTIVE:
-    case TAG_SCRIPT_OPEN:
     case UNDEFINED:
         return write_output(lexeme);
     break;
@@ -1299,12 +1365,6 @@ bool JSTokenizer::normalize_directive(const JSToken prev_tok, const char* lexeme
     return normalize_lexeme(prev_tok, str.c_str());
 }
 
-bool JSTokenizer::normalize_tag_script_open(const JSToken, const char* lexeme)
-{
-    // FIXIT-L add builtin alert here
-    return write_output(lexeme);
-}
-
 bool JSTokenizer::normalize_undefined(const JSToken, const char* lexeme)
 { return write_output(lexeme); }
 
@@ -1322,7 +1382,6 @@ bool JSTokenizer::normalize_lexeme(const JSToken prev_tok, const char* lexeme)
     case IDENTIFIER:
     case KEYWORD:
     case LITERAL:
-    case TAG_SCRIPT_OPEN:
         return write_output(" " + std::string(lexeme));
     break;
     }
diff --git a/src/utils/test/CMakeLists.txt b/src/utils/test/CMakeLists.txt
index ca5bf3637..816907aa4 100644
--- a/src/utils/test/CMakeLists.txt
+++ b/src/utils/test/CMakeLists.txt
@@ -14,5 +14,6 @@ add_catch_test( js_normalizer_test
     SOURCES
         ${FLEX_js_tokenizer_OUTPUTS}
         ../js_normalizer.cc
+        ../util_cstring.cc
 )
 
diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc
index 117660f5b..1100bbf48 100644
--- a/src/utils/test/js_normalizer_test.cc
+++ b/src/utils/test/js_normalizer_test.cc
@@ -42,9 +42,11 @@ using namespace snort;
     char dstbuf[sizeof(expected)];                                         \
     int bytes_copied;                                                      \
     const char* ptr = srcbuf;                                              \
-    int norm_depth = NORM_DEPTH;                                           \
+    JSNormState state;                                                     \
+    state.norm_depth = NORM_DEPTH;                                         \
+    state.alerts = 0;                                                      \
     int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf),              \
-        dstbuf, sizeof(dstbuf), &ptr, &bytes_copied, norm_depth);
+        dstbuf, sizeof(dstbuf), &ptr, &bytes_copied, state);
 
 #define VALIDATE(srcbuf, expected)                    \
     CHECK(ret == 0);                                  \
@@ -52,6 +54,15 @@ using namespace snort;
     CHECK(bytes_copied == sizeof(expected) - 1);      \
     CHECK(!memcmp(dstbuf, expected, bytes_copied));
 
+#define VALIDATE_FAIL(srcbuf, expected, ret_code, ptr_offset)      \
+    CHECK(ret == ret_code);                                        \
+    CHECK((ptr - srcbuf) == ptr_offset);                           \
+    CHECK(bytes_copied == sizeof(expected) - 1);                   \
+    CHECK(!memcmp(dstbuf, expected, bytes_copied));
+
+#define VALIDATE_ALERT(alert)       \
+    CHECK(state.alerts & alert);
+
 // ClamAV test cases
 static const char clamav_buf0[] =
     "function foo(a, b) {\n"
@@ -308,15 +319,6 @@ static const char all_patterns_expected5[] =
     "$2abc _2abc abc $__$ èæ XÃA12 \u0041abc \u00FBdef \u1234ghi ab ww "
     "ab ww ab ww ab â ww 2 abc";
 
-static const char all_patterns_buf6[] =
-    "var a = 1;\n"
-    "<script>\n"
-    "<script var>\n"
-    "var b = 2 ;\n";
-
-static const char all_patterns_expected6[] =
-    "var a=1;<script><script var>var b=2;";
-
 TEST_CASE("all patterns", "[JSNormalizer]")
 {
     SECTION("whitespaces and special characters")
@@ -350,18 +352,20 @@ TEST_CASE("all patterns", "[JSNormalizer]")
         const char* ptr2 = srcbuf2;
         const char* ptr3 = srcbuf3;
         const char* ptr4 = srcbuf4;
-        int norm_depth = NORM_DEPTH;
+        JSNormState state;
+        state.norm_depth = NORM_DEPTH;
+        state.alerts = 0;
 
         int ret0 = JSNormalizer::normalize(srcbuf0, sizeof(srcbuf0), dstbuf0, sizeof(dstbuf0),
-            &ptr0, &bytes_copied0, norm_depth);
+            &ptr0, &bytes_copied0, state);
         int ret1 = JSNormalizer::normalize(srcbuf1, sizeof(srcbuf1), dstbuf1, sizeof(dstbuf1),
-            &ptr1, &bytes_copied1, norm_depth);
+            &ptr1, &bytes_copied1, state);
         int ret2 = JSNormalizer::normalize(srcbuf2, sizeof(srcbuf2), dstbuf2, sizeof(dstbuf2),
-            &ptr2, &bytes_copied2, norm_depth);
+            &ptr2, &bytes_copied2, state);
         int ret3 = JSNormalizer::normalize(srcbuf3, sizeof(srcbuf3), dstbuf3, sizeof(dstbuf3),
-            &ptr3, &bytes_copied3, norm_depth);
+            &ptr3, &bytes_copied3, state);
         int ret4 = JSNormalizer::normalize(srcbuf4, sizeof(srcbuf4), dstbuf4, sizeof(dstbuf4),
-            &ptr4, &bytes_copied4, norm_depth);
+            &ptr4, &bytes_copied4, state);
 
         CHECK(ret0 == 0);
         CHECK((ptr0 - srcbuf0) == sizeof(srcbuf0));
@@ -408,11 +412,6 @@ TEST_CASE("all patterns", "[JSNormalizer]")
         NORMALIZE(all_patterns_buf5, all_patterns_expected5);
         VALIDATE(all_patterns_buf5, all_patterns_expected5);
     }
-    SECTION("tag script open")
-    {
-        NORMALIZE(all_patterns_buf6, all_patterns_expected6);
-        VALIDATE(all_patterns_buf6, all_patterns_expected6);
-    }
 }
 
 // Tests for different syntax cases
@@ -828,9 +827,11 @@ TEST_CASE("norm_depth is specified", "[JSNormalizer]")
     char dstbuf[7];
     int bytes_copied;
     const char* ptr = srcbuf;
-    int norm_depth = 7;
+    JSNormState state;
+    state.norm_depth = 7;
+    state.alerts = 0;
     int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
-        &bytes_copied, norm_depth);
+        &bytes_copied, state);
 
     CHECK(ret == 0);
     CHECK(bytes_copied == sizeof(expected) - 1);
@@ -849,9 +850,11 @@ TEST_CASE("tag script end is specified", "[JSNormalizer]")
     char dstbuf[sizeof(expected)];
     int bytes_copied;
     const char* ptr = srcbuf;
-    int norm_depth = NORM_DEPTH;
+    JSNormState state;
+    state.norm_depth = NORM_DEPTH;
+    state.alerts = 0;
     int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
-        &bytes_copied, norm_depth);
+        &bytes_copied, state);
 
     CHECK(ret == 0);
     CHECK(bytes_copied == sizeof(expected) - 1);
@@ -870,9 +873,11 @@ TEST_CASE("parsing errors", "[JSNormalizer]")
         char dstbuf[7];
         int bytes_copied;
         const char* ptr = srcbuf;
-        int norm_depth = NORM_DEPTH;
+        JSNormState state;
+        state.norm_depth = NORM_DEPTH;
+        state.alerts = 0;
         int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
-            &bytes_copied, norm_depth);
+            &bytes_copied, state);
 
         CHECK(ret == 1);
         CHECK(bytes_copied == sizeof(expected) - 1);
@@ -880,3 +885,359 @@ TEST_CASE("parsing errors", "[JSNormalizer]")
     }
 }
 
+static const char unexpected_tag_buf0[] =
+    "var a = 1;\n"
+    "<script>\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected0[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf1[] =
+    "var a = 1;\n"
+    "<script type=application/javascript>\n"
+    "var b = 2;\r\n";;
+
+static const char unexpected_tag_expected1[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf2[] =
+    "var a = 1;\n"
+    "var str = '<script> something';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected2[] =
+    "var a=1;var str=";
+
+static const char unexpected_tag_buf3[] =
+    "var a = 1;\n"
+    "var str = 'something <script> something';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected3[] =
+    "var a=1;var str=";
+
+static const char unexpected_tag_buf4[] =
+    "var a = 1;\n"
+    "var str = 'something <script>';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected4[] =
+    "var a=1;var str=";
+
+static const char unexpected_tag_buf5[] =
+    "var a = 1;\n"
+    "var str = '</script> something';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected5[] =
+    "var a=1;var str=";
+
+static const char unexpected_tag_buf6[] =
+    "var a = 1;\n"
+    "var str = 'something </script> something';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected6[] =
+    "var a=1;var str=";
+
+static const char unexpected_tag_buf7[] =
+    "var a = 1;\n"
+    "var str = 'something </script>';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected7[] =
+    "var a=1;var str=";
+
+static const char unexpected_tag_buf8[] =
+    "var a = 1;\n"
+    "var str = 'something \\<script\\> something';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected8[] =
+    "var a=1;var str='something \\<script\\> something';var b=2;";
+
+static const char unexpected_tag_buf9[] =
+    "var a = 1;\n"
+    "var str = 'something \\<\\/script\\> something';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected9[] =
+    "var a=1;var str='something \\<\\/script\\> something';var b=2;";
+
+static const char unexpected_tag_buf10[] =
+    "var a = 1;\n"
+    "//<script> something\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected10[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf11[] =
+    "var a = 1;\n"
+    "//something <script> something\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected11[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf12[] =
+    "var a = 1;\n"
+    "//something <script>\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected12[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf13[] =
+    "var a = 1;\n"
+    "/*<script> something*/\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected13[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf14[] =
+    "var a = 1;\n"
+    "/*something <script> something*/\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected14[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf15[] =
+    "var a = 1;\n"
+    "/*something <script>*/\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected15[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf16[] =
+    "var a = 1;\n"
+    "//</script> something\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected16[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf17[] =
+    "var a = 1;\n"
+    "<!--something </script> something//-->\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected17[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf18[] =
+    "var a = 1;\n"
+    "//something </script>\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected18[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf19[] =
+    "var a = 1;\n"
+    "/*</script>\n"
+    "something*/\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected19[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf20[] =
+    "var a = 1;\n"
+    "/*something\n"
+    "</script>\n"
+    "something*/\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected20[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf21[] =
+    "var a = 1;\n"
+    "/*something\n"
+    "</script>*/\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected21[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf22[] =
+    "var a = 1;\n"
+    "var str = 'script somescript /script something';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected22[] =
+    "var a=1;var str='script somescript /script something';var b=2;";
+
+static const char unexpected_tag_buf23[] =
+    "var a = 1;\n"
+    "var str = 'script somescript /script something <script>';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected23[] =
+    "var a=1;var str=";
+
+static const char unexpected_tag_buf24[] =
+    "var a = 1;\n"
+    "var str = 'something <sCrIpT>';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected24[] =
+    "var a=1;var str=";
+
+TEST_CASE("unexpected script tag alert", "[JSNormalizer]")
+{
+    const int ret_code = 1;
+    SECTION("explicit open tag - simple")
+    {
+        NORMALIZE(unexpected_tag_buf0, unexpected_tag_expected0);
+        VALIDATE_FAIL(unexpected_tag_buf0, unexpected_tag_expected0, ret_code, 18);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("explicit open tag - complex")
+    {
+        NORMALIZE(unexpected_tag_buf1, unexpected_tag_expected1);
+        VALIDATE_FAIL(unexpected_tag_buf1, unexpected_tag_expected1, ret_code, 18);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within literal - start")
+    {
+        NORMALIZE(unexpected_tag_buf2, unexpected_tag_expected2);
+        VALIDATE_FAIL(unexpected_tag_buf2, unexpected_tag_expected2, ret_code, 41);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within literal - mid")
+    {
+        NORMALIZE(unexpected_tag_buf3, unexpected_tag_expected3);
+        VALIDATE_FAIL(unexpected_tag_buf3, unexpected_tag_expected3, ret_code, 51);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within literal - end")
+    {
+        NORMALIZE(unexpected_tag_buf4, unexpected_tag_expected4);
+        VALIDATE_FAIL(unexpected_tag_buf4, unexpected_tag_expected4, ret_code, 41);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within literal - start")
+    {
+        NORMALIZE(unexpected_tag_buf5, unexpected_tag_expected5);
+        VALIDATE_FAIL(unexpected_tag_buf5, unexpected_tag_expected5, ret_code, 42);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within literal - mid")
+    {
+        NORMALIZE(unexpected_tag_buf6, unexpected_tag_expected6);
+        VALIDATE_FAIL(unexpected_tag_buf6, unexpected_tag_expected6, ret_code, 52);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within literal - end")
+    {
+        NORMALIZE(unexpected_tag_buf7, unexpected_tag_expected7);
+        VALIDATE_FAIL(unexpected_tag_buf7, unexpected_tag_expected7, ret_code, 42);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within literal - escaped")
+    {
+        NORMALIZE(unexpected_tag_buf8, unexpected_tag_expected8);
+        VALIDATE(unexpected_tag_buf8, unexpected_tag_expected8);
+    }
+    SECTION("close tag within literal - escaped")
+    {
+        NORMALIZE(unexpected_tag_buf9, unexpected_tag_expected9);
+        VALIDATE(unexpected_tag_buf9, unexpected_tag_expected9);
+    }
+    SECTION("open tag within single-line comment - start")
+    {
+        NORMALIZE(unexpected_tag_buf10, unexpected_tag_expected10);
+        VALIDATE_FAIL(unexpected_tag_buf10, unexpected_tag_expected10, ret_code, 32);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within single-line comment - mid")
+    {
+        NORMALIZE(unexpected_tag_buf11, unexpected_tag_expected11);
+        VALIDATE_FAIL(unexpected_tag_buf11, unexpected_tag_expected11, ret_code, 42);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within single-line comment - end")
+    {
+        NORMALIZE(unexpected_tag_buf12, unexpected_tag_expected12);
+        VALIDATE_FAIL(unexpected_tag_buf12, unexpected_tag_expected12, ret_code, 32);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within multi-line comment - start")
+    {
+        NORMALIZE(unexpected_tag_buf13, unexpected_tag_expected13);
+        VALIDATE_FAIL(unexpected_tag_buf13, unexpected_tag_expected13, ret_code, 33);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within multi-line comment - mid")
+    {
+        NORMALIZE(unexpected_tag_buf14, unexpected_tag_expected14);
+        VALIDATE_FAIL(unexpected_tag_buf14, unexpected_tag_expected14, ret_code, 43);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within multi-line comment - end")
+    {
+        NORMALIZE(unexpected_tag_buf15, unexpected_tag_expected15);
+        VALIDATE_FAIL(unexpected_tag_buf15, unexpected_tag_expected15, ret_code, 33);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within single-line comment - start")
+    {
+        NORMALIZE(unexpected_tag_buf16, unexpected_tag_expected16);
+        VALIDATE_FAIL(unexpected_tag_buf16, unexpected_tag_expected16, ret_code, 33);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within single-line comment - mid")
+    {
+        NORMALIZE(unexpected_tag_buf17, unexpected_tag_expected17);
+        VALIDATE_FAIL(unexpected_tag_buf17, unexpected_tag_expected17, ret_code, 50);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within single-line comment - end")
+    {
+        NORMALIZE(unexpected_tag_buf18, unexpected_tag_expected18);
+        VALIDATE_FAIL(unexpected_tag_buf18, unexpected_tag_expected18, ret_code, 33);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within multi-line comment - start")
+    {
+        NORMALIZE(unexpected_tag_buf19, unexpected_tag_expected19);
+        VALIDATE_FAIL(unexpected_tag_buf19, unexpected_tag_expected19, ret_code, 34);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within multi-line comment - mid")
+    {
+        NORMALIZE(unexpected_tag_buf20, unexpected_tag_expected20);
+        VALIDATE_FAIL(unexpected_tag_buf20, unexpected_tag_expected20, ret_code, 44);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within multi-line comment - end")
+    {
+        NORMALIZE(unexpected_tag_buf21, unexpected_tag_expected21);
+        VALIDATE_FAIL(unexpected_tag_buf21, unexpected_tag_expected21, ret_code, 34);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("multiple patterns - not matched")
+    {
+        NORMALIZE(unexpected_tag_buf22, unexpected_tag_expected22);
+        VALIDATE(unexpected_tag_buf22, unexpected_tag_expected22);
+    }
+    SECTION("multiple patterns - matched")
+    {
+        NORMALIZE(unexpected_tag_buf23, unexpected_tag_expected23);
+        VALIDATE_FAIL(unexpected_tag_buf23, unexpected_tag_expected23, ret_code, 67);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("mixed lower and upper case")
+    {
+        NORMALIZE(unexpected_tag_buf24, unexpected_tag_expected24);
+        VALIDATE_FAIL(unexpected_tag_buf24, unexpected_tag_expected24, ret_code, 41);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+}
+