Merge pull request #2848 in SNORT/snort3 from ~OSERHIIE/snort3:js_inline_scripts...

author Mike Stepanek (mstepane) <mstepane@cisco.com>

Wed, 21 Apr 2021 17:39:43 +0000 (17:39 +0000)

committer Mike Stepanek (mstepane) <mstepane@cisco.com>

Wed, 21 Apr 2021 17:39:43 +0000 (17:39 +0000)
author Mike Stepanek (mstepane) <mstepane@cisco.com>
Wed, 21 Apr 2021 17:39:43 +0000 (17:39 +0000)
committer Mike Stepanek (mstepane) <mstepane@cisco.com>
Wed, 21 Apr 2021 17:39:43 +0000 (17:39 +0000)
diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h

index 0002e4bbf6c6da85c29aebea5cadedc56387d408..d3cf7817b52d37c1ba86fbf2a1e6177c3f06f4dd 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_enum.h
+++ b/src/service_inspectors/http_inspect/http_enum.h
@@ -62,7 +62,7 @@ enum PEG_COUNT { PEG_FLOW = 0, PEG_SCAN, PEG_REASSEMBLE, PEG_INSPECT, PEG_REQUES
      PEG_OTHER_METHOD, PEG_REQUEST_BODY, PEG_CHUNKED, PEG_URI_NORM, PEG_URI_PATH, PEG_URI_CODING,
      PEG_CONCURRENT_SESSIONS, PEG_MAX_CONCURRENT_SESSIONS, PEG_SCRIPT_DETECTION,
      PEG_PARTIAL_INSPECT, PEG_EXCESS_PARAMS, PEG_PARAMS, PEG_CUTOVERS, PEG_SSL_SEARCH_ABND_EARLY,
-    PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_COUNT_MAX };
+    PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_JS_INLINE, PEG_COUNT_MAX };
  
  // Result of scanning by splitter
  enum ScanResult { SCAN_NOT_FOUND, SCAN_NOT_FOUND_ACCELERATE, SCAN_FOUND, SCAN_FOUND_PIECE,
@@ -264,6 +264,7 @@ enum Infraction
      INF_MULTIPLE_HOST_HDRS,
      INF_HTTP2_SETTINGS,
      INF_UPGRADE_HEADER_HTTP2,
+    INF_JS_UNEXPECTED_TAG,
      INF__MAX_VALUE
  };
  
@@ -322,6 +323,7 @@ enum EventSid
      EVENT_PDF_UNSUP_COMP_TYPE = 115,
      EVENT_PDF_CASC_COMP = 116,
      EVENT_PDF_PARSE_FAILURE = 117,
+    EVENT_JS_UNEXPECTED_TAG = 118,
  
      EVENT_LOSS_OF_SYNC = 201,
      EVENT_CHUNK_ZEROS = 202,
diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc

index 2ce7fb0f4d21fb2bb8e7488441281c58fe310be0..8aad96222337c28cdbeaf48dc3a1b1abd0455b64 100644 (file)
--- a/src/service_inspectors/http_inspect/http_js_norm.cc
+++ b/src/service_inspectors/http_inspect/http_js_norm.cc
@@ -23,6 +23,7 @@
  
  #include "http_js_norm.h"
  
+#include "utils/js_norm_state.h"
  #include "utils/js_normalizer.h"
  #include "utils/safec.h"
  #include "utils/util_jsnorm.h"
@@ -40,6 +41,7 @@ HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_) :
  HttpJsNorm::~HttpJsNorm()
  {
      delete javascript_search_mpse;
+    delete js_src_attr_search_mpse;
      delete htmltype_search_mpse;
  }
  
@@ -49,11 +51,15 @@ void HttpJsNorm::configure()
          return;
  
      javascript_search_mpse = new SearchTool;
+    js_src_attr_search_mpse = new SearchTool;
      htmltype_search_mpse = new SearchTool;
  
      javascript_search_mpse->add(script_start, script_start_length, JS_JAVASCRIPT);
      javascript_search_mpse->prep();
  
+    js_src_attr_search_mpse->add(script_src_attr, script_src_attr_length, JS_ATTR_SRC);
+    js_src_attr_search_mpse->prep();
+
      struct HiSearchToken
      {
          const char* name;
@@ -78,8 +84,8 @@ void HttpJsNorm::configure()
      configure_once = true;
  }
  
-void HttpJsNorm::enhanced_normalize(const Field& input, Field& output,
-    int64_t js_normalization_depth) const
+void HttpJsNorm::enhanced_normalize(const Field& input, Field& output, HttpInfractions* infractions,
+    HttpEventGen* events, int64_t js_normalization_depth) const
  {
      bool js_present = false;
      int index = 0;
@@ -88,6 +94,10 @@ void HttpJsNorm::enhanced_normalize(const Field& input, Field& output,
  
      uint8_t* buffer = new uint8_t[input.length()];
  
+    JSNormState state;
+    state.norm_depth = js_normalization_depth;
+    state.alerts = 0;
+
      while (ptr < end)
      {
          int bytes_copied = 0;
@@ -103,12 +113,15 @@ void HttpJsNorm::enhanced_normalize(const Field& input, Field& output,
                  break;
  
              bool type_js = false;
+            bool external_js = false;
              if (angle_bracket > js_start)
              {
                  int mid;
                  const int script_found = htmltype_search_mpse->find(
                      js_start, (angle_bracket-js_start), search_html_found, false, &mid);
  
+                external_js = is_external_script(js_start, angle_bracket);
+
                  js_start = angle_bracket + 1;
                  if (script_found > 0)
                  {
@@ -138,11 +151,13 @@ void HttpJsNorm::enhanced_normalize(const Field& input, Field& output,
              }
  
              ptr = js_start;
-            if (!type_js)
+            if (!type_js or external_js)
                  continue;
  
              JSNormalizer::normalize(js_start, (uint16_t)(end-js_start), (char*)buffer+index,
-                (uint16_t)(input.length() - index), &ptr, &bytes_copied, js_normalization_depth);
+                (uint16_t)(input.length() - index), &ptr, &bytes_copied, state);
+
+            HttpModule::increment_peg_counts(PEG_JS_INLINE);
  
              index += bytes_copied;
          }
@@ -151,7 +166,14 @@ void HttpJsNorm::enhanced_normalize(const Field& input, Field& output,
      }
  
      if (js_present)
+    {
+        if (state.alerts & ALERT_UNEXPECTED_TAG)
+        {
+            *infractions += INF_JS_UNEXPECTED_TAG;
+            events->create_event(EVENT_JS_UNEXPECTED_TAG);
+        }
          output.set(index, buffer, true);
+    }
      else
          delete[] buffer;
  }
@@ -276,9 +298,36 @@ int HttpJsNorm::search_js_found(void*, void*, int index, void* index_ptr, void*)
      *((int*) index_ptr) = index - script_start_length;
      return 1;
  }
+int HttpJsNorm::search_js_src_attr_found(void*, void*, int index, void* index_ptr, void*)
+{
+    *((int*) index_ptr) = index - script_src_attr_length;
+    return 1;
+}
  int HttpJsNorm::search_html_found(void* id, void*, int, void* id_ptr, void*)
  {
      *((int*) id_ptr)  = (int)(uintptr_t)id;
      return 1;
  }
  
+bool HttpJsNorm::is_external_script(const char* it, const char* script_tag_end) const
+{
+    int src_pos;
+
+    while (js_src_attr_search_mpse->find(it, (script_tag_end - it),
+        search_js_src_attr_found, false, &src_pos))
+    {
+        it += (src_pos + script_src_attr_length - 1);
+        while (++it < script_tag_end)
+        {
+            if (*it == ' ')
+                continue;
+            else if (*it == '=')
+                return true;
+            else
+                break;
+        }
+    }
+
+    return false;
+}
+
diff --git a/src/service_inspectors/http_inspect/http_js_norm.h b/src/service_inspectors/http_inspect/http_js_norm.h

index 5f083349a9fd47193600718461e0dc04f55ba71a..f48ec40d4678a97a6dd20a728fa7f48f13dd71ab 100644 (file)
--- a/src/service_inspectors/http_inspect/http_js_norm.h
+++ b/src/service_inspectors/http_inspect/http_js_norm.h
@@ -39,26 +39,33 @@ public:
      ~HttpJsNorm();
      void legacy_normalize(const Field& input, Field& output, HttpInfractions* infractions,
          HttpEventGen* events, int max_javascript_whitespaces) const;
-    void enhanced_normalize(const Field& input, Field& output,
-        int64_t js_normalization_depth) const;
+    void enhanced_normalize(const Field& input, Field& output, HttpInfractions* infractions,
+        HttpEventGen* events, int64_t js_normalization_depth) const;
  
      void configure();
  private:
      bool configure_once = false;
  
      enum JsSearchId { JS_JAVASCRIPT };
+    enum JsSrcAttrSearchId { JS_ATTR_SRC };
      enum HtmlSearchId { HTML_JS, HTML_EMA, HTML_VB };
  
      static constexpr const char* script_start = "<SCRIPT";
      static constexpr int script_start_length = sizeof("<SCRIPT") - 1;
+    static constexpr const char* script_src_attr = "SRC";
+    static constexpr int script_src_attr_length = sizeof("SRC") - 1;
  
      const HttpParaList::UriParam& uri_param;
  
      snort::SearchTool* javascript_search_mpse;
+    snort::SearchTool* js_src_attr_search_mpse;
      snort::SearchTool* htmltype_search_mpse;
  
      static int search_js_found(void*, void*, int index, void*, void*);
+    static int search_js_src_attr_found(void*, void*, int index, void*, void*);
      static int search_html_found(void* id, void*, int, void*, void*);
+
+    bool is_external_script(const char* it, const char* script_tag_end) const;
  };
  
  #endif
diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc

index 623384b2a07f42fd8ac84c196931ff7cfd384d1d..3b3a4000f893cc8d681b5077c736b47d409dcf2f 100644 (file)
--- a/src/service_inspectors/http_inspect/http_msg_body.cc
+++ b/src/service_inspectors/http_inspect/http_msg_body.cc
@@ -290,6 +290,7 @@ void HttpMsgBody::do_js_normalization(const Field& input, Field& output)
          output.set(input);
  
          params->js_norm_param.js_norm->enhanced_normalize(input, enhanced_js_norm_body,
+            transaction->get_infractions(source_id), session_data->events[source_id],
              params->js_norm_param.js_normalization_depth);
  
          const int32_t norm_length =
diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc

index 022b13815751f02eef62e7637d910753df9cae16..ad7f3c8690b1457f4aea392e59e9545b8fec6f5a 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_tables.cc
+++ b/src/service_inspectors/http_inspect/http_tables.cc
@@ -357,6 +357,7 @@ const RuleMap HttpModule::http_events[] =
      { EVENT_PDF_UNSUP_COMP_TYPE,        "PDF file unsupported compression type" },
      { EVENT_PDF_CASC_COMP,              "PDF file cascaded compression" },
      { EVENT_PDF_PARSE_FAILURE,          "PDF file parse failure" },
+    { EVENT_JS_UNEXPECTED_TAG,          "unexpected script tag within inline javascript" },
      { EVENT_LOSS_OF_SYNC,               "not HTTP traffic" },
      { EVENT_CHUNK_ZEROS,                "chunk length has excessive leading zeros" },
      { EVENT_WS_BETWEEN_MSGS,            "white space before or between messages" },
@@ -463,6 +464,7 @@ const PegInfo HttpModule::peg_names[PEG_COUNT_MAX+1] =
      { CountType::SUM, "pipelined_flows", "total HTTP connections containing pipelined requests" },
      { CountType::SUM, "pipelined_requests", "total requests placed in a pipeline" },
      { CountType::SUM, "total_bytes", "total HTTP data bytes inspected" },
+    { CountType::SUM, "js_inline_scripts", "total number of inline JavaScripts processed" },
      { CountType::END, nullptr, nullptr }
  };
  
diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt

index 38fc2ddce3816fbacce50b0d89927ad24fe4cd64..d42b1893615341cfff278ce030343286d68af43a 100644 (file)
--- a/src/utils/CMakeLists.txt
+++ b/src/utils/CMakeLists.txt
@@ -32,6 +32,7 @@ add_library ( utils OBJECT
      dnet_header.h
      dyn_array.cc
      dyn_array.h
+    js_norm_state.h
      js_normalizer.cc
      js_normalizer.h
      js_tokenizer.h
diff --git a/src/utils/js_norm_state.h b/src/utils/js_norm_state.h

new file mode 100644 (file)

index 0000000..764edb3
--- /dev/null
+++ b/src/utils/js_norm_state.h
@@ -0,0 +1,37 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_norm_state.h author Oleksandr Serhiienko <oserhiie@cisco.com>
+
+#ifndef JS_NORM_STATE_H
+#define JS_NORM_STATE_H
+
+#include "main/snort_types.h"
+
+namespace snort
+{
+#define ALERT_UNEXPECTED_TAG 0x1
+
+struct JSNormState
+{
+    int64_t norm_depth;
+    uint16_t alerts;
+};
+}
+
+#endif // JS_NORM_STATE_H
+
diff --git a/src/utils/js_normalizer.cc b/src/utils/js_normalizer.cc

index 1c41eaddc1d4684d9f0369c2dc2aed9fb3b57ffa..a5868fe05e95b6a27ace43a7dec684d657b29a06 100644 (file)
--- a/src/utils/js_normalizer.cc
+++ b/src/utils/js_normalizer.cc
@@ -30,13 +30,13 @@
  using namespace snort;
  
  int JSNormalizer::normalize(const char* srcbuf, uint16_t srclen, char* dstbuf, uint16_t dstlen,
-        const char** ptr, int* bytes_copied, int64_t norm_depth)
+        const char** ptr, int* bytes_copied, JSNormState& state)
  {
      std::stringstream in, out;
+    in.rdbuf()->pubsetbuf(const_cast<char*>(srcbuf),
+        (state.norm_depth >= srclen) ? srclen : state.norm_depth);
  
-    in.rdbuf()->pubsetbuf(const_cast<char*>(srcbuf), (norm_depth >= srclen) ? srclen : norm_depth);
-    JSTokenizer tokenizer(in, out, dstbuf, dstlen, ptr, bytes_copied);
-
+    JSTokenizer tokenizer(in, out, dstbuf, dstlen, ptr, bytes_copied, state);
      return tokenizer.yylex();
  }
  
diff --git a/src/utils/js_normalizer.h b/src/utils/js_normalizer.h

index 9152e214236d6a1434dc1432ee21525633a48a24..2e562bb1b0438a705840000a6b088e3d204636f1 100644 (file)
--- a/src/utils/js_normalizer.h
+++ b/src/utils/js_normalizer.h
@@ -22,13 +22,15 @@
  
  #include "main/snort_types.h"
  
+#include "js_norm_state.h"
+
  namespace snort
  {
  class JSNormalizer
  {
  public:
      static int normalize(const char* srcbuf, uint16_t srclen, char* dstbuf, uint16_t dstlen,
-        const char** ptr, int* bytes_copied, int64_t norm_depth);
+        const char** ptr, int* bytes_copied, JSNormState& state);
  };
  }
  
diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h

index 892fdc425dc13133225e8b7da08ed02536d0e06b..2e284ef44eab437ee851f6f1331d2c19ef0633d4 100644 (file)
--- a/src/utils/js_tokenizer.h
+++ b/src/utils/js_tokenizer.h
@@ -24,6 +24,8 @@
  
  #include "log/messages.h"
  
+#include "js_norm_state.h"
+
  class JSTokenizer : public yyFlexLexer
  {
  private:
@@ -35,14 +37,13 @@ private:
          PUNCTUATOR,
          OPERATOR,
          LITERAL,
-        DIRECTIVE,
-        TAG_SCRIPT_OPEN
+        DIRECTIVE
      };
  
  public:
      // we need an out stream because yyFlexLexer API strongly requires that
      JSTokenizer(std::stringstream& in, std::stringstream& out, char* dstbuf,
-        const uint16_t dstlen, const char** ptr, int* bytes_copied);
+        const uint16_t dstlen, const char** ptr, int* bytes_copied, snort::JSNormState& state);
      ~JSTokenizer() override;
  
      // so, Flex will treat this class as yyclass
@@ -66,11 +67,11 @@ private:
      bool eval_string_literal(const char* match_prefix, const char quotes);
      bool eval_regex_literal(const char* match_prefix);
      bool eval_eof();
-    void skip_single_line_comment();
-    void skip_multi_line_comment();
+    bool eval_single_line_comment();
+    bool eval_multi_line_comment();
  
      bool parse_literal(const std::string& match_prefix, const char sentinel_ch,
-        std::string& result, bool is_regex = false);
+        std::string& result, bool& is_alert, bool is_regex = false);
  
      // main lexeme handler
      // all scanned tokens must pass here
@@ -80,7 +81,6 @@ private:
      bool normalize_punctuator(const JSToken prev_tok, const char* lexeme);
      bool normalize_operator(const JSToken prev_tok, const char* lexeme);
      bool normalize_directive(const JSToken prev_tok, const char* lexeme);
-    bool normalize_tag_script_open(const JSToken prev_tok, const char* lexeme);
      bool normalize_undefined(const JSToken prev_tok, const char* lexeme);
      bool normalize_lexeme(const JSToken prev_tok, const char* lexeme);
  
@@ -100,6 +100,8 @@ private:
  
      JSToken prev_tok = UNDEFINED;
  
+    snort::JSNormState& state;
+
  };
  
  #endif // JS_TOKENIZER_H
diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l

index af06087afb83b753b8a59a2da2b6ab0f14cbe5db..84e5ef6ea3c895d0af23fbd157566eced8cf9681 100644 (file)
--- a/src/utils/js_tokenizer.l
+++ b/src/utils/js_tokenizer.l
@@ -31,6 +31,10 @@
      #endif
  
      #include "utils/js_tokenizer.h"
+
+    #include <cassert>
+
+    #include "utils/util_cstring.h"
  %}
  
  /* The following grammar was created based on ECMAScript specification */
@@ -869,9 +873,9 @@ LITERAL_INFINITY              Infinity|\xE2\x88\x9E
  LITERAL_NAN                   NaN
  LITERAL                       {LITERAL_NULL}|{LITERAL_BOOLEAN}|{LITERAL_DECIMAL}|{LITERAL_HEX_INTEGER}|{LITERAL_UNDEFINED}|{LITERAL_INFINITY}|{LITERAL_NAN}
  
-HTML_COMMENT_OPEN    <!--
-TAG_SCRIPT_OPEN      (?i:<script)
-TAG_SCRIPT_CLOSE     (?i:<\/script>)
+HTML_COMMENT_OPEN         <!--
+HTML_TAG_SCRIPT_OPEN      (?i:<script)
+HTML_TAG_SCRIPT_CLOSE     (?i:<\/script>)
  
  /* from 0x000 to 0x10FFFD to match undefined tokens */
  /* UTF-8 ranges generated with https://lists.gnu.org/archive/html/help-flex/2005-01/msg00043.html */
@@ -889,11 +893,11 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
  <*>{WHITESPACES}                                        { /* skip */ }
  <*>{CHAR_ESCAPE_SEQUENCES}                              { /* skip */ }
  <*>{LINE_TERMINATORS}                                   { BEGIN(regex); }
-<*>{TAG_SCRIPT_OPEN}                                    { if ( !eval(TAG_SCRIPT_OPEN, YYText()) ) { update_ptr(); return 1; } }
-<*>{TAG_SCRIPT_CLOSE}                                   { update_ptr(); *ptr -= YYLeng(); return 0; }
-<*>{HTML_COMMENT_OPEN}                                  { skip_single_line_comment(); }
-<*>{SINGLE_LINE_COMMENT}                                { skip_single_line_comment(); }
-<*>{MULTI_LINE_COMMENT}                                 { skip_multi_line_comment(); }
+<*>{HTML_TAG_SCRIPT_OPEN}                               { state.alerts |= ALERT_UNEXPECTED_TAG; update_ptr(); return 1; }
+<*>{HTML_TAG_SCRIPT_CLOSE}                              { update_ptr(); *ptr -= YYLeng(); return 0; }
+<*>{HTML_COMMENT_OPEN}                                  { if ( !eval_single_line_comment() ) { update_ptr(); return 1; } }
+<*>{SINGLE_LINE_COMMENT}                                { if ( !eval_single_line_comment() ) { update_ptr(); return 1; } }
+<*>{MULTI_LINE_COMMENT}                                 { if ( !eval_multi_line_comment() ) { update_ptr(); return 1; } }
  <*>{USE_STRICT_DIRECTIVE}                               { if ( !eval(DIRECTIVE, YYText()) ) { update_ptr(); return 1; } }
  <*>{KEYWORD}                                            { if ( !eval(KEYWORD, YYText()) ) { update_ptr(); return 1; } BEGIN(regex); }
  <*>{CLOSING_BRACES}                                     { if ( !eval(PUNCTUATOR, YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
@@ -909,8 +913,6 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
  <<EOF>>                                                 { if ( eval_eof() ) { update_ptr(); return 0; } }
  %%
  
-#include <cassert>
-
  // static helper functions
  
  static std::string unicode_to_utf8(const unsigned int code)
@@ -985,6 +987,40 @@ static std::string unescape_unicode(const char* lexeme)
      return res;
  }
  
+static bool contains_script_tags(const std::string& str)
+{
+    static constexpr const char* script = "SCRIPT";
+    static constexpr const int script_len = sizeof("SCRIPT") - 1;
+
+    const char* start = str.c_str();
+    const char* end = start + str.size();
+    const char* it = start;
+
+    while ( it )
+    {
+        it = snort::SnortStrcasestr(it, (end - it), script);
+        if ( it )
+        {
+            int d = it - start;
+            if ( d == 1 )
+            {
+                if ( *(it - 1) == '<' )
+                    return true;
+            }
+            else if ( d >= 2 )
+            {
+                if ( (*(it - 1) == '/' and *(it - 2) == '<') or
+                    (*(it - 1) == '<' and *(it - 2) != '\\') )
+                {
+                    return true;
+                }
+            }
+            it += script_len;
+        }
+    }
+    return false;
+}
+
  // JSTokenizer members
  
  struct JSTokenizer::ScanBuffers
@@ -994,12 +1030,13 @@ struct JSTokenizer::ScanBuffers
  };
  
  JSTokenizer::JSTokenizer(std::stringstream& in, std::stringstream& out, char* dstbuf,
-    uint16_t dstlen, const char** ptr, int* bytes_copied)
+    uint16_t dstlen, const char** ptr, int* bytes_copied, snort::JSNormState& state)
      : yyFlexLexer(in, out),
        dstbuf(dstbuf),
        dstlen(dstlen),
        ptr(ptr),
-      bytes_copied(bytes_copied)
+      bytes_copied(bytes_copied),
+      state(state)
  {
      assert(bytes_copied);
      init();
@@ -1049,7 +1086,11 @@ bool JSTokenizer::eval_identifier(const char* lexeme)
  bool JSTokenizer::eval_string_literal(const char* match_prefix, const char quotes)
  {
      std::string s;
-    bool is_ok = parse_literal(match_prefix, quotes, s);
+    bool is_alert = false;
+    bool is_ok = parse_literal(match_prefix, quotes, s, is_alert);
+
+    if ( is_alert )
+        return false;
  
      return eval(is_ok ? LITERAL : UNDEFINED, s.c_str());
  }
@@ -1059,7 +1100,11 @@ bool JSTokenizer::eval_regex_literal(const char* match_prefix)
      static const std::string regex_flags = "gimsuy";
  
      std::string s;
-    bool is_ok = parse_literal(match_prefix, '/', s, true);
+    bool is_alert = false;
+    bool is_ok = parse_literal(match_prefix, '/', s, is_alert, true);
+
+    if ( is_alert )
+        return false;
  
      // append regex flags
      char c;
@@ -1095,23 +1140,35 @@ bool JSTokenizer::eval_eof()
      return true;
  }
  
-void JSTokenizer::skip_single_line_comment()
+bool JSTokenizer::eval_single_line_comment()
  {
      char c;
+    std::string result;
  
      while ( (c = yyinput()) != 0 )
      {
+        result += c;
          if ( c == '\n' )
              break;
      }
+
+    if ( contains_script_tags(result) )
+    {
+        state.alerts |= ALERT_UNEXPECTED_TAG;
+        return false;
+    }
+    else
+        return true;
  }
  
-void JSTokenizer::skip_multi_line_comment()
+bool JSTokenizer::eval_multi_line_comment()
  {
      char c;
+    std::string result;
  
      while ( (c = yyinput()) != 0 )
      {
+        result += c;
          if ( c == '*' )
          {
              if ( (c = yyinput()) == '/' )
@@ -1120,6 +1177,14 @@ void JSTokenizer::skip_multi_line_comment()
                  unput(c);
          }
      }
+
+    if ( contains_script_tags(result) )
+    {
+        state.alerts |= ALERT_UNEXPECTED_TAG;
+        return false;
+    }
+    else
+        return true;
  }
  
  // Unicode line terminators
@@ -1133,7 +1198,7 @@ void JSTokenizer::skip_multi_line_comment()
  // Call this method when lexer meets those literals
  // match_prefix is a lexeme part already matched by the lexer (with sentinel char)
  bool JSTokenizer::parse_literal(const std::string& match_prefix, const char sentinel_ch,
-    std::string& result, bool is_regex)
+    std::string& result, bool& is_alert, bool is_regex)
  {
      bool is_ok = true;
      char c;
@@ -1206,6 +1271,12 @@ bool JSTokenizer::parse_literal(const std::string& match_prefix, const char sent
      if ( result.find(LS) != std::string::npos or result.find(PS) != std::string::npos )
          is_ok = false;
  
+    if ( contains_script_tags(result) )
+    {
+        is_alert = true;
+        state.alerts |= ALERT_UNEXPECTED_TAG;
+    }
+
      return is_ok;
  }
  
@@ -1239,10 +1310,6 @@ bool JSTokenizer::eval(const JSToken tok, const char* lexeme)
          ret = normalize_directive(prev_tok, lexeme);
      break;
  
-    case TAG_SCRIPT_OPEN:
-        ret = normalize_tag_script_open(prev_tok, lexeme);
-    break;
-
      case UNDEFINED:
          ret = normalize_undefined(prev_tok, lexeme);
      break;
@@ -1276,7 +1343,6 @@ bool JSTokenizer::normalize_operator(const JSToken prev_tok, const char* lexeme)
      case PUNCTUATOR:
      case LITERAL:
      case DIRECTIVE:
-    case TAG_SCRIPT_OPEN:
      case UNDEFINED:
          return write_output(lexeme);
      break;
@@ -1299,12 +1365,6 @@ bool JSTokenizer::normalize_directive(const JSToken prev_tok, const char* lexeme
      return normalize_lexeme(prev_tok, str.c_str());
  }
  
-bool JSTokenizer::normalize_tag_script_open(const JSToken, const char* lexeme)
-{
-    // FIXIT-L add builtin alert here
-    return write_output(lexeme);
-}
-
  bool JSTokenizer::normalize_undefined(const JSToken, const char* lexeme)
  { return write_output(lexeme); }
  
@@ -1322,7 +1382,6 @@ bool JSTokenizer::normalize_lexeme(const JSToken prev_tok, const char* lexeme)
      case IDENTIFIER:
      case KEYWORD:
      case LITERAL:
-    case TAG_SCRIPT_OPEN:
          return write_output(" " + std::string(lexeme));
      break;
      }
diff --git a/src/utils/test/CMakeLists.txt b/src/utils/test/CMakeLists.txt

index ca5bf363794d0f9aa4ade4696d685c101edb08e4..816907aa4682565c9d6cd56217f8bcb5e08b7cb8 100644 (file)
--- a/src/utils/test/CMakeLists.txt
+++ b/src/utils/test/CMakeLists.txt
@@ -14,5 +14,6 @@ add_catch_test( js_normalizer_test
      SOURCES
          ${FLEX_js_tokenizer_OUTPUTS}
          ../js_normalizer.cc
+        ../util_cstring.cc
  )
  
diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc

index 117660f5bc22ed2ebba64b0fac72f1eede0b1b45..1100bbf48931d5548dd921c6241d93e544e6b1d0 100644 (file)
--- a/src/utils/test/js_normalizer_test.cc
+++ b/src/utils/test/js_normalizer_test.cc
@@ -42,9 +42,11 @@ using namespace snort;
      char dstbuf[sizeof(expected)];                                         \
      int bytes_copied;                                                      \
      const char* ptr = srcbuf;                                              \
-    int norm_depth = NORM_DEPTH;                                           \
+    JSNormState state;                                                     \
+    state.norm_depth = NORM_DEPTH;                                         \
+    state.alerts = 0;                                                      \
      int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf),              \
-        dstbuf, sizeof(dstbuf), &ptr, &bytes_copied, norm_depth);
+        dstbuf, sizeof(dstbuf), &ptr, &bytes_copied, state);
  
  #define VALIDATE(srcbuf, expected)                    \
      CHECK(ret == 0);                                  \
@@ -52,6 +54,15 @@ using namespace snort;
      CHECK(bytes_copied == sizeof(expected) - 1);      \
      CHECK(!memcmp(dstbuf, expected, bytes_copied));
  
+#define VALIDATE_FAIL(srcbuf, expected, ret_code, ptr_offset)      \
+    CHECK(ret == ret_code);                                        \
+    CHECK((ptr - srcbuf) == ptr_offset);                           \
+    CHECK(bytes_copied == sizeof(expected) - 1);                   \
+    CHECK(!memcmp(dstbuf, expected, bytes_copied));
+
+#define VALIDATE_ALERT(alert)       \
+    CHECK(state.alerts & alert);
+
  // ClamAV test cases
  static const char clamav_buf0[] =
      "function foo(a, b) {\n"
@@ -308,15 +319,6 @@ static const char all_patterns_expected5[] =
      "$2abc _2abc abc $__$ 肖晗 XÆA12 \u0041abc \u00FBdef \u1234ghi ab ww "
      "ab ww ab ww ab ∞ ww 2 abc";
  
-static const char all_patterns_buf6[] =
-    "var a = 1;\n"
-    "<script>\n"
-    "<script var>\n"
-    "var b = 2 ;\n";
-
-static const char all_patterns_expected6[] =
-    "var a=1;<script><script var>var b=2;";
-
  TEST_CASE("all patterns", "[JSNormalizer]")
  {
      SECTION("whitespaces and special characters")
@@ -350,18 +352,20 @@ TEST_CASE("all patterns", "[JSNormalizer]")
          const char* ptr2 = srcbuf2;
          const char* ptr3 = srcbuf3;
          const char* ptr4 = srcbuf4;
-        int norm_depth = NORM_DEPTH;
+        JSNormState state;
+        state.norm_depth = NORM_DEPTH;
+        state.alerts = 0;
  
          int ret0 = JSNormalizer::normalize(srcbuf0, sizeof(srcbuf0), dstbuf0, sizeof(dstbuf0),
-            &ptr0, &bytes_copied0, norm_depth);
+            &ptr0, &bytes_copied0, state);
          int ret1 = JSNormalizer::normalize(srcbuf1, sizeof(srcbuf1), dstbuf1, sizeof(dstbuf1),
-            &ptr1, &bytes_copied1, norm_depth);
+            &ptr1, &bytes_copied1, state);
          int ret2 = JSNormalizer::normalize(srcbuf2, sizeof(srcbuf2), dstbuf2, sizeof(dstbuf2),
-            &ptr2, &bytes_copied2, norm_depth);
+            &ptr2, &bytes_copied2, state);
          int ret3 = JSNormalizer::normalize(srcbuf3, sizeof(srcbuf3), dstbuf3, sizeof(dstbuf3),
-            &ptr3, &bytes_copied3, norm_depth);
+            &ptr3, &bytes_copied3, state);
          int ret4 = JSNormalizer::normalize(srcbuf4, sizeof(srcbuf4), dstbuf4, sizeof(dstbuf4),
-            &ptr4, &bytes_copied4, norm_depth);
+            &ptr4, &bytes_copied4, state);
  
          CHECK(ret0 == 0);
          CHECK((ptr0 - srcbuf0) == sizeof(srcbuf0));
@@ -408,11 +412,6 @@ TEST_CASE("all patterns", "[JSNormalizer]")
          NORMALIZE(all_patterns_buf5, all_patterns_expected5);
          VALIDATE(all_patterns_buf5, all_patterns_expected5);
      }
-    SECTION("tag script open")
-    {
-        NORMALIZE(all_patterns_buf6, all_patterns_expected6);
-        VALIDATE(all_patterns_buf6, all_patterns_expected6);
-    }
  }
  
  // Tests for different syntax cases
@@ -828,9 +827,11 @@ TEST_CASE("norm_depth is specified", "[JSNormalizer]")
      char dstbuf[7];
      int bytes_copied;
      const char* ptr = srcbuf;
-    int norm_depth = 7;
+    JSNormState state;
+    state.norm_depth = 7;
+    state.alerts = 0;
      int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
-        &bytes_copied, norm_depth);
+        &bytes_copied, state);
  
      CHECK(ret == 0);
      CHECK(bytes_copied == sizeof(expected) - 1);
@@ -849,9 +850,11 @@ TEST_CASE("tag script end is specified", "[JSNormalizer]")
      char dstbuf[sizeof(expected)];
      int bytes_copied;
      const char* ptr = srcbuf;
-    int norm_depth = NORM_DEPTH;
+    JSNormState state;
+    state.norm_depth = NORM_DEPTH;
+    state.alerts = 0;
      int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
-        &bytes_copied, norm_depth);
+        &bytes_copied, state);
  
      CHECK(ret == 0);
      CHECK(bytes_copied == sizeof(expected) - 1);
@@ -870,9 +873,11 @@ TEST_CASE("parsing errors", "[JSNormalizer]")
          char dstbuf[7];
          int bytes_copied;
          const char* ptr = srcbuf;
-        int norm_depth = NORM_DEPTH;
+        JSNormState state;
+        state.norm_depth = NORM_DEPTH;
+        state.alerts = 0;
          int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
-            &bytes_copied, norm_depth);
+            &bytes_copied, state);
  
          CHECK(ret == 1);
          CHECK(bytes_copied == sizeof(expected) - 1);
@@ -880,3 +885,359 @@ TEST_CASE("parsing errors", "[JSNormalizer]")
      }
  }
  
+static const char unexpected_tag_buf0[] =
+    "var a = 1;\n"
+    "<script>\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected0[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf1[] =
+    "var a = 1;\n"
+    "<script type=application/javascript>\n"
+    "var b = 2;\r\n";;
+
+static const char unexpected_tag_expected1[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf2[] =
+    "var a = 1;\n"
+    "var str = '<script> something';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected2[] =
+    "var a=1;var str=";
+
+static const char unexpected_tag_buf3[] =
+    "var a = 1;\n"
+    "var str = 'something <script> something';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected3[] =
+    "var a=1;var str=";
+
+static const char unexpected_tag_buf4[] =
+    "var a = 1;\n"
+    "var str = 'something <script>';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected4[] =
+    "var a=1;var str=";
+
+static const char unexpected_tag_buf5[] =
+    "var a = 1;\n"
+    "var str = '</script> something';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected5[] =
+    "var a=1;var str=";
+
+static const char unexpected_tag_buf6[] =
+    "var a = 1;\n"
+    "var str = 'something </script> something';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected6[] =
+    "var a=1;var str=";
+
+static const char unexpected_tag_buf7[] =
+    "var a = 1;\n"
+    "var str = 'something </script>';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected7[] =
+    "var a=1;var str=";
+
+static const char unexpected_tag_buf8[] =
+    "var a = 1;\n"
+    "var str = 'something \\<script\\> something';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected8[] =
+    "var a=1;var str='something \\<script\\> something';var b=2;";
+
+static const char unexpected_tag_buf9[] =
+    "var a = 1;\n"
+    "var str = 'something \\<\\/script\\> something';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected9[] =
+    "var a=1;var str='something \\<\\/script\\> something';var b=2;";
+
+static const char unexpected_tag_buf10[] =
+    "var a = 1;\n"
+    "//<script> something\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected10[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf11[] =
+    "var a = 1;\n"
+    "//something <script> something\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected11[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf12[] =
+    "var a = 1;\n"
+    "//something <script>\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected12[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf13[] =
+    "var a = 1;\n"
+    "/*<script> something*/\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected13[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf14[] =
+    "var a = 1;\n"
+    "/*something <script> something*/\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected14[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf15[] =
+    "var a = 1;\n"
+    "/*something <script>*/\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected15[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf16[] =
+    "var a = 1;\n"
+    "//</script> something\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected16[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf17[] =
+    "var a = 1;\n"
+    "<!--something </script> something//-->\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected17[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf18[] =
+    "var a = 1;\n"
+    "//something </script>\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected18[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf19[] =
+    "var a = 1;\n"
+    "/*</script>\n"
+    "something*/\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected19[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf20[] =
+    "var a = 1;\n"
+    "/*something\n"
+    "</script>\n"
+    "something*/\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected20[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf21[] =
+    "var a = 1;\n"
+    "/*something\n"
+    "</script>*/\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected21[] =
+    "var a=1;";
+
+static const char unexpected_tag_buf22[] =
+    "var a = 1;\n"
+    "var str = 'script somescript /script something';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected22[] =
+    "var a=1;var str='script somescript /script something';var b=2;";
+
+static const char unexpected_tag_buf23[] =
+    "var a = 1;\n"
+    "var str = 'script somescript /script something <script>';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected23[] =
+    "var a=1;var str=";
+
+static const char unexpected_tag_buf24[] =
+    "var a = 1;\n"
+    "var str = 'something <sCrIpT>';\n"
+    "var b = 2;\r\n";
+
+static const char unexpected_tag_expected24[] =
+    "var a=1;var str=";
+
+TEST_CASE("unexpected script tag alert", "[JSNormalizer]")
+{
+    const int ret_code = 1;
+    SECTION("explicit open tag - simple")
+    {
+        NORMALIZE(unexpected_tag_buf0, unexpected_tag_expected0);
+        VALIDATE_FAIL(unexpected_tag_buf0, unexpected_tag_expected0, ret_code, 18);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("explicit open tag - complex")
+    {
+        NORMALIZE(unexpected_tag_buf1, unexpected_tag_expected1);
+        VALIDATE_FAIL(unexpected_tag_buf1, unexpected_tag_expected1, ret_code, 18);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within literal - start")
+    {
+        NORMALIZE(unexpected_tag_buf2, unexpected_tag_expected2);
+        VALIDATE_FAIL(unexpected_tag_buf2, unexpected_tag_expected2, ret_code, 41);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within literal - mid")
+    {
+        NORMALIZE(unexpected_tag_buf3, unexpected_tag_expected3);
+        VALIDATE_FAIL(unexpected_tag_buf3, unexpected_tag_expected3, ret_code, 51);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within literal - end")
+    {
+        NORMALIZE(unexpected_tag_buf4, unexpected_tag_expected4);
+        VALIDATE_FAIL(unexpected_tag_buf4, unexpected_tag_expected4, ret_code, 41);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within literal - start")
+    {
+        NORMALIZE(unexpected_tag_buf5, unexpected_tag_expected5);
+        VALIDATE_FAIL(unexpected_tag_buf5, unexpected_tag_expected5, ret_code, 42);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within literal - mid")
+    {
+        NORMALIZE(unexpected_tag_buf6, unexpected_tag_expected6);
+        VALIDATE_FAIL(unexpected_tag_buf6, unexpected_tag_expected6, ret_code, 52);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within literal - end")
+    {
+        NORMALIZE(unexpected_tag_buf7, unexpected_tag_expected7);
+        VALIDATE_FAIL(unexpected_tag_buf7, unexpected_tag_expected7, ret_code, 42);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within literal - escaped")
+    {
+        NORMALIZE(unexpected_tag_buf8, unexpected_tag_expected8);
+        VALIDATE(unexpected_tag_buf8, unexpected_tag_expected8);
+    }
+    SECTION("close tag within literal - escaped")
+    {
+        NORMALIZE(unexpected_tag_buf9, unexpected_tag_expected9);
+        VALIDATE(unexpected_tag_buf9, unexpected_tag_expected9);
+    }
+    SECTION("open tag within single-line comment - start")
+    {
+        NORMALIZE(unexpected_tag_buf10, unexpected_tag_expected10);
+        VALIDATE_FAIL(unexpected_tag_buf10, unexpected_tag_expected10, ret_code, 32);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within single-line comment - mid")
+    {
+        NORMALIZE(unexpected_tag_buf11, unexpected_tag_expected11);
+        VALIDATE_FAIL(unexpected_tag_buf11, unexpected_tag_expected11, ret_code, 42);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within single-line comment - end")
+    {
+        NORMALIZE(unexpected_tag_buf12, unexpected_tag_expected12);
+        VALIDATE_FAIL(unexpected_tag_buf12, unexpected_tag_expected12, ret_code, 32);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within multi-line comment - start")
+    {
+        NORMALIZE(unexpected_tag_buf13, unexpected_tag_expected13);
+        VALIDATE_FAIL(unexpected_tag_buf13, unexpected_tag_expected13, ret_code, 33);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within multi-line comment - mid")
+    {
+        NORMALIZE(unexpected_tag_buf14, unexpected_tag_expected14);
+        VALIDATE_FAIL(unexpected_tag_buf14, unexpected_tag_expected14, ret_code, 43);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("open tag within multi-line comment - end")
+    {
+        NORMALIZE(unexpected_tag_buf15, unexpected_tag_expected15);
+        VALIDATE_FAIL(unexpected_tag_buf15, unexpected_tag_expected15, ret_code, 33);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within single-line comment - start")
+    {
+        NORMALIZE(unexpected_tag_buf16, unexpected_tag_expected16);
+        VALIDATE_FAIL(unexpected_tag_buf16, unexpected_tag_expected16, ret_code, 33);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within single-line comment - mid")
+    {
+        NORMALIZE(unexpected_tag_buf17, unexpected_tag_expected17);
+        VALIDATE_FAIL(unexpected_tag_buf17, unexpected_tag_expected17, ret_code, 50);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within single-line comment - end")
+    {
+        NORMALIZE(unexpected_tag_buf18, unexpected_tag_expected18);
+        VALIDATE_FAIL(unexpected_tag_buf18, unexpected_tag_expected18, ret_code, 33);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within multi-line comment - start")
+    {
+        NORMALIZE(unexpected_tag_buf19, unexpected_tag_expected19);
+        VALIDATE_FAIL(unexpected_tag_buf19, unexpected_tag_expected19, ret_code, 34);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within multi-line comment - mid")
+    {
+        NORMALIZE(unexpected_tag_buf20, unexpected_tag_expected20);
+        VALIDATE_FAIL(unexpected_tag_buf20, unexpected_tag_expected20, ret_code, 44);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("close tag within multi-line comment - end")
+    {
+        NORMALIZE(unexpected_tag_buf21, unexpected_tag_expected21);
+        VALIDATE_FAIL(unexpected_tag_buf21, unexpected_tag_expected21, ret_code, 34);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("multiple patterns - not matched")
+    {
+        NORMALIZE(unexpected_tag_buf22, unexpected_tag_expected22);
+        VALIDATE(unexpected_tag_buf22, unexpected_tag_expected22);
+    }
+    SECTION("multiple patterns - matched")
+    {
+        NORMALIZE(unexpected_tag_buf23, unexpected_tag_expected23);
+        VALIDATE_FAIL(unexpected_tag_buf23, unexpected_tag_expected23, ret_code, 67);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+    SECTION("mixed lower and upper case")
+    {
+        NORMALIZE(unexpected_tag_buf24, unexpected_tag_expected24);
+        VALIDATE_FAIL(unexpected_tag_buf24, unexpected_tag_expected24, ret_code, 41);
+        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+    }
+}
+
author	Mike Stepanek (mstepane) <mstepane@cisco.com>
	Wed, 21 Apr 2021 17:39:43 +0000 (17:39 +0000)
committer	Mike Stepanek (mstepane) <mstepane@cisco.com>
	Wed, 21 Apr 2021 17:39:43 +0000 (17:39 +0000)
src/service_inspectors/http_inspect/http_enum.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_js_norm.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_js_norm.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_msg_body.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_tables.cc		patch \| blob \| blame \| history
src/utils/CMakeLists.txt		patch \| blob \| blame \| history
src/utils/js_norm_state.h	[new file with mode: 0644]	patch \| blob
src/utils/js_normalizer.cc		patch \| blob \| blame \| history
src/utils/js_normalizer.h		patch \| blob \| blame \| history
src/utils/js_tokenizer.h		patch \| blob \| blame \| history
src/utils/js_tokenizer.l		patch \| blob \| blame \| history
src/utils/test/CMakeLists.txt		patch \| blob \| blame \| history
src/utils/test/js_normalizer_test.cc		patch \| blob \| blame \| history