Merge pull request #2904 in SNORT/snort3 from ~OSHUMEIK/snort3:js_mpdu to master

author Mike Stepanek (mstepane) <mstepane@cisco.com>

Fri, 28 May 2021 15:25:37 +0000 (15:25 +0000)

committer Mike Stepanek (mstepane) <mstepane@cisco.com>

Fri, 28 May 2021 15:25:37 +0000 (15:25 +0000)
author Mike Stepanek (mstepane) <mstepane@cisco.com>
Fri, 28 May 2021 15:25:37 +0000 (15:25 +0000)
committer Mike Stepanek (mstepane) <mstepane@cisco.com>
Fri, 28 May 2021 15:25:37 +0000 (15:25 +0000)
diff --git a/cmake/macros.cmake b/cmake/macros.cmake

index b21a07907a9121d0387f635e3cc6bdcf353158f6..df6030e710b2b1e788dbfaedff6b4483109eb153 100644 (file)
--- a/cmake/macros.cmake
+++ b/cmake/macros.cmake
@@ -34,6 +34,7 @@ function (add_cpputest testname)
          set(multiValueArgs SOURCES LIBS)
          cmake_parse_arguments(CppUTest "" "" "${multiValueArgs}" ${ARGN})
          add_executable(${testname} EXCLUDE_FROM_ALL ${testname}.cc ${CppUTest_SOURCES})
+        target_compile_options(${testname} PRIVATE "-DUNIT_TEST_BUILD")
          target_include_directories(${testname} PRIVATE ${CPPUTEST_INCLUDE_DIR})
          target_link_libraries(${testname} ${CPPUTEST_LIBRARIES} ${CppUTest_LIBS})
          add_test(${testname} ${testname})
diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h

index d3cf7817b52d37c1ba86fbf2a1e6177c3f06f4dd..e3af9347ea3cde4406448f5ea55312f47de37826 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_enum.h
+++ b/src/service_inspectors/http_inspect/http_enum.h
@@ -264,7 +264,9 @@ enum Infraction
      INF_MULTIPLE_HOST_HDRS,
      INF_HTTP2_SETTINGS,
      INF_UPGRADE_HEADER_HTTP2,
-    INF_JS_UNEXPECTED_TAG,
+    INF_JS_BAD_TOKEN,
+    INF_JS_OPENING_TAG,
+    INF_JS_CLOSING_TAG,
      INF__MAX_VALUE
  };
  
@@ -323,7 +325,9 @@ enum EventSid
      EVENT_PDF_UNSUP_COMP_TYPE = 115,
      EVENT_PDF_CASC_COMP = 116,
      EVENT_PDF_PARSE_FAILURE = 117,
-    EVENT_JS_UNEXPECTED_TAG = 118,
+    EVENT_JS_BAD_TOKEN = 118,
+    EVENT_JS_OPENING_TAG = 119,
+    EVENT_JS_CLOSING_TAG = 120,
  
      EVENT_LOSS_OF_SYNC = 201,
      EVENT_CHUNK_ZEROS = 202,
diff --git a/src/service_inspectors/http_inspect/http_flow_data.cc b/src/service_inspectors/http_inspect/http_flow_data.cc

index 61eb833502fb0fef65b991018503a25b79c8b5b8..a986dac88c8cfbf528bf827b3feaebc109668e1c 100644 (file)
--- a/src/service_inspectors/http_inspect/http_flow_data.cc
+++ b/src/service_inspectors/http_inspect/http_flow_data.cc
@@ -24,6 +24,7 @@
  #include "http_flow_data.h"
  
  #include "decompress/file_decomp.h"
+#include "utils/js_normalizer.h"
  
  #include "http_cutter.h"
  #include "http_common.h"
@@ -79,6 +80,14 @@ HttpFlowData::~HttpFlowData()
      if (HttpModule::get_peg_counts(PEG_CONCURRENT_SESSIONS) > 0)
          HttpModule::decrement_peg_counts(PEG_CONCURRENT_SESSIONS);
  
+#ifndef UNIT_TEST_BUILD
+    if (js_normalizer)
+    {
+        update_deallocations(JSNormalizer::size());
+        delete js_normalizer;
+    }
+#endif
+
      for (int k=0; k <= 1; k++)
      {
          delete infractions[k];
@@ -88,6 +97,8 @@ HttpFlowData::~HttpFlowData()
          update_deallocations(partial_buffer_length[k]);
          delete[] partial_detect_buffer[k];
          update_deallocations(partial_detect_length[k]);
+        delete[] js_detect_buffer[k];
+        update_deallocations(js_detect_length[k]);
          HttpTransaction::delete_transaction(transaction[k], nullptr);
          delete cutter[k];
          if (compress_stream[k] != nullptr)
@@ -204,6 +215,32 @@ void HttpFlowData::garbage_collect()
      }
  }
  
+#ifndef UNIT_TEST_BUILD
+snort::JSNormalizer& HttpFlowData::acquire_js_ctx()
+{
+    if (js_normalizer)
+        return *js_normalizer;
+
+    js_normalizer = new JSNormalizer();
+    update_allocations(JSNormalizer::size());
+
+    return *js_normalizer;
+}
+
+void HttpFlowData::release_js_ctx()
+{
+    if (!js_normalizer)
+        return;
+
+    update_deallocations(JSNormalizer::size());
+    delete js_normalizer;
+    js_normalizer = nullptr;
+}
+#else
+snort::JSNormalizer& HttpFlowData::acquire_js_ctx() { return *js_normalizer; }
+void HttpFlowData::release_js_ctx() {}
+#endif
+
  bool HttpFlowData::add_to_pipeline(HttpTransaction* latest)
  {
      if (pipeline == nullptr)
diff --git a/src/service_inspectors/http_inspect/http_flow_data.h b/src/service_inspectors/http_inspect/http_flow_data.h

index 617775c8208ce62fdca216f6d8ca03326eec894f..ffcb7aece8e1b2a9befd52ae4923e80083d6c0db 100644 (file)
--- a/src/service_inspectors/http_inspect/http_flow_data.h
+++ b/src/service_inspectors/http_inspect/http_flow_data.h
@@ -39,6 +39,11 @@ class HttpMsgSection;
  class HttpCutter;
  class HttpQueryParser;
  
+namespace snort
+{
+class JSNormalizer;
+}
+
  class HttpFlowData : public snort::FlowData
  {
  public:
@@ -49,6 +54,7 @@ public:
      size_t size_of() override;
  
      friend class HttpInspect;
+    friend class HttpJsNorm;
      friend class HttpMsgSection;
      friend class HttpMsgStart;
      friend class HttpMsgRequest;
@@ -169,6 +175,8 @@ private:
      uint8_t* partial_detect_buffer[2] = { nullptr, nullptr };
      uint32_t partial_detect_length[2] = { 0, 0 };
      uint32_t partial_js_detect_length[2] = { 0, 0 };
+    uint8_t* js_detect_buffer[2] = { nullptr, nullptr };
+    uint32_t js_detect_length[2] = { 0, 0 };
      int32_t status_code_num = HttpCommon::STAT_NOT_PRESENT;
      HttpEnums::VersionId version_id[2] = { HttpEnums::VERS__NOT_PRESENT,
                                              HttpEnums::VERS__NOT_PRESENT };
@@ -177,6 +185,12 @@ private:
      bool cutover_on_clear = false;
      bool ssl_search_abandoned = false;
  
+    // *** HttpJsNorm
+    snort::JSNormalizer* js_normalizer = nullptr;
+
+    snort::JSNormalizer& acquire_js_ctx();
+    void release_js_ctx();
+
      // *** Transaction management including pipelining
      static const int MAX_PIPELINE = 100;  // requests seen - responses seen <= MAX_PIPELINE
      HttpTransaction* transaction[2] = { nullptr, nullptr };
diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc

index 8aad96222337c28cdbeaf48dc3a1b1abd0455b64..90e7666f6c6d08cc0b13eb9f853e23d0e8f7c170 100644 (file)
--- a/src/service_inspectors/http_inspect/http_js_norm.cc
+++ b/src/service_inspectors/http_inspect/http_js_norm.cc
@@ -23,159 +23,164 @@
  
  #include "http_js_norm.h"
  
-#include "utils/js_norm_state.h"
  #include "utils/js_normalizer.h"
  #include "utils/safec.h"
  #include "utils/util_jsnorm.h"
  
+#include "http_common.h"
  #include "http_enum.h"
  
  using namespace HttpEnums;
  using namespace snort;
  
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_) :
-    uri_param(uri_param_), javascript_search_mpse(nullptr),
-    htmltype_search_mpse(nullptr)
+HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) :
+    uri_param(uri_param_),
+    normalization_depth(normalization_depth_),
+    mpse_otag(nullptr),
+    mpse_attr(nullptr),
+    mpse_type(nullptr)
  {}
  
  HttpJsNorm::~HttpJsNorm()
  {
-    delete javascript_search_mpse;
-    delete js_src_attr_search_mpse;
-    delete htmltype_search_mpse;
+    delete mpse_otag;
+    delete mpse_attr;
+    delete mpse_type;
  }
  
  void HttpJsNorm::configure()
  {
-    if ( configure_once )
+    if (configure_once)
          return;
  
-    javascript_search_mpse = new SearchTool;
-    js_src_attr_search_mpse = new SearchTool;
-    htmltype_search_mpse = new SearchTool;
-
-    javascript_search_mpse->add(script_start, script_start_length, JS_JAVASCRIPT);
-    javascript_search_mpse->prep();
-
-    js_src_attr_search_mpse->add(script_src_attr, script_src_attr_length, JS_ATTR_SRC);
-    js_src_attr_search_mpse->prep();
-
-    struct HiSearchToken
-    {
-        const char* name;
-        int name_len;
-        int search_id;
-    };
-
-    const HiSearchToken html_patterns[] =
-    {
-        { "JAVASCRIPT",      10, HTML_JS },
-        { "ECMASCRIPT",      10, HTML_EMA },
-        { "VBSCRIPT",         8, HTML_VB },
-        { nullptr,            0, 0 }
-    };
-
-    for (const HiSearchToken* tmp = &html_patterns[0]; tmp->name != nullptr; tmp++)
-    {
-        htmltype_search_mpse->add(tmp->name, tmp->name_len, tmp->search_id);
-    }
-    htmltype_search_mpse->prep();
+    mpse_otag = new SearchTool;
+    mpse_attr = new SearchTool;
+    mpse_type = new SearchTool;
+
+    static constexpr const char* otag_start = "<SCRIPT";
+    static constexpr const char* attr_gt = ">";
+    static constexpr const char* attr_src = "SRC";
+    static constexpr const char* attr_js1 = "JAVASCRIPT";
+    static constexpr const char* attr_js2 = "ECMASCRIPT";
+    static constexpr const char* attr_vb = "VBSCRIPT";
+
+    mpse_otag->add(otag_start, strlen(otag_start), 0);
+    mpse_attr->add(attr_gt, strlen(attr_gt), AID_GT);
+    mpse_attr->add(attr_src, strlen(attr_src), AID_SRC);
+    mpse_attr->add(attr_js1, strlen(attr_js1), AID_JS);
+    mpse_attr->add(attr_js2, strlen(attr_js2), AID_ECMA);
+    mpse_attr->add(attr_vb, strlen(attr_vb), AID_VB);
+    mpse_type->add(attr_js1, strlen(attr_js1), AID_JS);
+    mpse_type->add(attr_js2, strlen(attr_js2), AID_ECMA);
+    mpse_type->add(attr_vb, strlen(attr_vb), AID_VB);
+
+    mpse_otag->prep();
+    mpse_attr->prep();
+    mpse_type->prep();
  
      configure_once = true;
  }
  
-void HttpJsNorm::enhanced_normalize(const Field& input, Field& output, HttpInfractions* infractions,
-    HttpEventGen* events, int64_t js_normalization_depth) const
+void HttpJsNorm::enhanced_normalize(const Field& input, Field& output,
+    HttpInfractions* infractions, HttpFlowData* ssn) const
  {
-    bool js_present = false;
-    int index = 0;
      const char* ptr = (const char*)input.start();
      const char* const end = ptr + input.length();
  
-    uint8_t* buffer = new uint8_t[input.length()];
+    HttpEventGen* events = ssn->events[HttpCommon::SRC_SERVER];
  
-    JSNormState state;
-    state.norm_depth = js_normalization_depth;
-    state.alerts = 0;
+    char* buffer = nullptr;
+    char* dst = nullptr;
+    const char* dst_end = nullptr;
+
+    bool script_continue = alive_ctx(ssn);
  
      while (ptr < end)
      {
-        int bytes_copied = 0;
-        int mindex;
-
-        // Search for beginning of a javascript
-        if (javascript_search_mpse->find(ptr, end-ptr, search_js_found, false, &mindex) > 0)
+        if (!script_continue)
          {
-            const char* js_start = ptr + mindex;
-            const char* const angle_bracket =
-                (const char*)SnortStrnStr(js_start, end - js_start, ">");
-            if (angle_bracket == nullptr || (end - angle_bracket) == 0)
+            if (!mpse_otag->find(ptr, end - ptr, match_otag, false, &ptr))
+                break;
+            if (ptr >= end)
                  break;
  
-            bool type_js = false;
-            bool external_js = false;
-            if (angle_bracket > js_start)
-            {
-                int mid;
-                const int script_found = htmltype_search_mpse->find(
-                    js_start, (angle_bracket-js_start), search_html_found, false, &mid);
-
-                external_js = is_external_script(js_start, angle_bracket);
+            MatchContext sctx = {ptr, true, false};
  
-                js_start = angle_bracket + 1;
-                if (script_found > 0)
-                {
-                    switch (mid)
-                    {
-                    case HTML_JS:
-                        js_present = true;
-                        type_js = true;
-                        break;
-                    default:
-                        type_js = false;
-                        break;
-                    }
-                }
-                else
-                {
-                    // if no type or language is found we assume it is a javascript
-                    js_present = true;
-                    type_js = true;
-                }
-            }
-            // Save before the <script> begins
-            if (js_start > ptr)
+            if (ptr[0] == '>')
+                ptr++;
+            else
              {
-                if ((js_start - ptr) > (input.length() - index))
-                    break;
+                if (!mpse_attr->find(ptr, end - ptr, match_attr, false, &sctx))
+                    break; // the opening tag never ends
+                ptr = sctx.next;
              }
  
-            ptr = js_start;
-            if (!type_js or external_js)
+            if (!sctx.is_javascript || sctx.is_external)
                  continue;
  
-            JSNormalizer::normalize(js_start, (uint16_t)(end-js_start), (char*)buffer+index,
-                (uint16_t)(input.length() - index), &ptr, &bytes_copied, state);
-
+            // script found
              HttpModule::increment_peg_counts(PEG_JS_INLINE);
+        }
  
-            index += bytes_copied;
+        if (!buffer)
+        {
+            uint8_t* nbuf = ssn->js_detect_buffer[HttpCommon::SRC_SERVER];
+            uint32_t nlen = ssn->js_detect_length[HttpCommon::SRC_SERVER];
+
+            auto len = nlen + (end - ptr); // not more then the remaining raw data
+            buffer = new char[len];
+            if (nbuf)
+                memcpy(buffer, nbuf, nlen);
+            dst = buffer + nlen;
+            dst_end = buffer + len;
          }
-        else
-            break;
-    }
  
-    if (js_present)
-    {
-        if (state.alerts & ALERT_UNEXPECTED_TAG)
+        auto& ctx = ssn->acquire_js_ctx();
+        ctx.set_depth(normalization_depth);
+
+        auto ret = ctx.normalize(ptr, end - ptr, dst, dst_end - dst);
+        ptr = ctx.get_src_next();
+        dst = ctx.get_dst_next();
+
+        switch (ret)
          {
-            *infractions += INF_JS_UNEXPECTED_TAG;
-            events->create_event(EVENT_JS_UNEXPECTED_TAG);
+        case JSTokenizer::EOS:
+            ctx.reset_depth();
+            script_continue = false;
+            break;
+        case JSTokenizer::SCRIPT_ENDED:
+            script_continue = false;
+            break;
+        case JSTokenizer::SCRIPT_CONTINUE:
+            script_continue = true;
+            break;
+        case JSTokenizer::OPENING_TAG:
+            *infractions += INF_JS_OPENING_TAG;
+            events->create_event(EVENT_JS_OPENING_TAG);
+            script_continue = false;
+            break;
+        case JSTokenizer::CLOSING_TAG:
+            *infractions += INF_JS_CLOSING_TAG;
+            events->create_event(EVENT_JS_CLOSING_TAG);
+            script_continue = false;
+            break;
+        case JSTokenizer::BAD_TOKEN:
+            *infractions += INF_JS_BAD_TOKEN;
+            events->create_event(EVENT_JS_BAD_TOKEN);
+            script_continue = false;
+            break;
+        default:
+            assert(false);
+            script_continue = false;
+            break;
          }
-        output.set(index, buffer, true);
      }
-    else
-        delete[] buffer;
+
+    if (!script_continue)
+        ssn->release_js_ctx();
+
+    if (buffer)
+        output.set(dst - buffer, (const uint8_t*)buffer, true);
  }
  
  void HttpJsNorm::legacy_normalize(const Field& input, Field& output, HttpInfractions* infractions,
@@ -199,7 +204,7 @@ void HttpJsNorm::legacy_normalize(const Field& input, Field& output, HttpInfract
          int mindex;
  
          // Search for beginning of a javascript
-        if (javascript_search_mpse->find(ptr, end-ptr, search_js_found, false, &mindex) > 0)
+        if (mpse_otag->find(ptr, end-ptr, search_js_found, false, &mindex) > 0)
          {
              const char* js_start = ptr + mindex;
              const char* const angle_bracket =
@@ -211,7 +216,7 @@ void HttpJsNorm::legacy_normalize(const Field& input, Field& output, HttpInfract
              if (angle_bracket > js_start)
              {
                  int mid;
-                const int script_found = htmltype_search_mpse->find(
+                const int script_found = mpse_type->find(
                      js_start, (angle_bracket-js_start), search_html_found, false, &mid);
  
                  js_start = angle_bracket + 1;
@@ -219,7 +224,7 @@ void HttpJsNorm::legacy_normalize(const Field& input, Field& output, HttpInfract
                  {
                      switch (mid)
                      {
-                    case HTML_JS:
+                    case AID_JS:
                          js_present = true;
                          type_js = true;
                          break;
@@ -292,42 +297,59 @@ void HttpJsNorm::legacy_normalize(const Field& input, Field& output, HttpInfract
      }
  }
  
-/* Returning non-zero stops search, which is okay since we only look for one at a time */
  int HttpJsNorm::search_js_found(void*, void*, int index, void* index_ptr, void*)
  {
+    static constexpr int script_start_length = sizeof("<SCRIPT") - 1;
      *((int*) index_ptr) = index - script_start_length;
      return 1;
  }
-int HttpJsNorm::search_js_src_attr_found(void*, void*, int index, void* index_ptr, void*)
+
+int HttpJsNorm::search_html_found(void* id, void*, int, void* id_ptr, void*)
  {
-    *((int*) index_ptr) = index - script_src_attr_length;
+    *((int*) id_ptr)  = (int)(uintptr_t)id;
      return 1;
  }
-int HttpJsNorm::search_html_found(void* id, void*, int, void* id_ptr, void*)
+
+int HttpJsNorm::match_otag(void*, void*, int index, void* ptr, void*)
  {
-    *((int*) id_ptr)  = (int)(uintptr_t)id;
+    *(char**)ptr += index;
      return 1;
  }
  
-bool HttpJsNorm::is_external_script(const char* it, const char* script_tag_end) const
+int HttpJsNorm::match_attr(void* pid, void*, int index, void* sctx, void*)
  {
-    int src_pos;
+    MatchContext* ctx = (MatchContext*)sctx;
+    AttrId id = (AttrId)(uintptr_t)pid;
+    const char* c;
  
-    while (js_src_attr_search_mpse->find(it, (script_tag_end - it),
-        search_js_src_attr_found, false, &src_pos))
+    switch (id)
      {
-        it += (src_pos + script_src_attr_length - 1);
-        while (++it < script_tag_end)
-        {
-            if (*it == ' ')
-                continue;
-            else if (*it == '=')
-                return true;
-            else
-                break;
-        }
+    case AID_GT:
+        ctx->next += index;
+        return 1;
+
+    case AID_SRC:
+        c = ctx->next + index;
+        while (*c == ' ') c++;
+        ctx->is_external = ctx->is_external || *c == '=';
+        return 0;
+
+    case AID_JS:
+        ctx->is_javascript = true;
+        return 0;
+
+    case AID_ECMA:
+        ctx->is_javascript = true;
+        return 0;
+
+    case AID_VB:
+        ctx->is_javascript = false;
+        return 0;
+
+    default:
+        ctx->next += index;
+        ctx->is_external = false;
+        ctx->is_javascript = false;
+        return 1;
      }
-
-    return false;
  }
-
diff --git a/src/service_inspectors/http_inspect/http_js_norm.h b/src/service_inspectors/http_inspect/http_js_norm.h

index f48ec40d4678a97a6dd20a728fa7f48f13dd71ab..385754e169fdd5844853c93d1b4611a0db524567 100644 (file)
--- a/src/service_inspectors/http_inspect/http_js_norm.h
+++ b/src/service_inspectors/http_inspect/http_js_norm.h
@@ -25,6 +25,7 @@
  #include "search_engines/search_tool.h"
  
  #include "http_field.h"
+#include "http_flow_data.h"
  #include "http_event.h"
  #include "http_module.h"
  
@@ -35,37 +36,40 @@
  class HttpJsNorm
  {
  public:
-    HttpJsNorm(const HttpParaList::UriParam& uri_param_);
+    HttpJsNorm(const HttpParaList::UriParam&, int64_t normalization_depth);
      ~HttpJsNorm();
-    void legacy_normalize(const Field& input, Field& output, HttpInfractions* infractions,
-        HttpEventGen* events, int max_javascript_whitespaces) const;
-    void enhanced_normalize(const Field& input, Field& output, HttpInfractions* infractions,
-        HttpEventGen* events, int64_t js_normalization_depth) const;
+
+    void legacy_normalize(const Field& input, Field& output, HttpInfractions*, HttpEventGen*,
+        int max_javascript_whitespaces) const;
+    void enhanced_normalize(const Field& input, Field& output, HttpInfractions*, HttpFlowData*) const;
  
      void configure();
-private:
-    bool configure_once = false;
  
-    enum JsSearchId { JS_JAVASCRIPT };
-    enum JsSrcAttrSearchId { JS_ATTR_SRC };
-    enum HtmlSearchId { HTML_JS, HTML_EMA, HTML_VB };
+private:
+    enum AttrId { AID_GT, AID_SRC, AID_JS, AID_ECMA, AID_VB };
  
-    static constexpr const char* script_start = "<SCRIPT";
-    static constexpr int script_start_length = sizeof("<SCRIPT") - 1;
-    static constexpr const char* script_src_attr = "SRC";
-    static constexpr int script_src_attr_length = sizeof("SRC") - 1;
+    struct MatchContext
+    {
+        const char* next;
+        bool is_javascript;
+        bool is_external;
+    };
  
      const HttpParaList::UriParam& uri_param;
+    int64_t normalization_depth;
+    bool configure_once = false;
  
-    snort::SearchTool* javascript_search_mpse;
-    snort::SearchTool* js_src_attr_search_mpse;
-    snort::SearchTool* htmltype_search_mpse;
+    snort::SearchTool* mpse_otag;
+    snort::SearchTool* mpse_attr;
+    snort::SearchTool* mpse_type; // legacy only
  
-    static int search_js_found(void*, void*, int index, void*, void*);
-    static int search_js_src_attr_found(void*, void*, int index, void*, void*);
-    static int search_html_found(void* id, void*, int, void*, void*);
+    static int search_js_found(void*, void*, int index, void*, void*);  // legacy only
+    static int search_html_found(void* id, void*, int, void*, void*); // legacy only
+    static int match_otag(void*, void*, int, void*, void*);
+    static int match_attr(void*, void*, int, void*, void*);
  
-    bool is_external_script(const char* it, const char* script_tag_end) const;
+    bool alive_ctx(const HttpFlowData* ssn) const
+    { return ssn->js_normalizer; }
  };
  
  #endif
diff --git a/src/service_inspectors/http_inspect/http_module.cc b/src/service_inspectors/http_inspect/http_module.cc

index d72377cd30289bbea59e88deda20e23e3e5ebf57..cefc1c920668e1ffa65658ce78f78281568329ea 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_module.cc
+++ b/src/service_inspectors/http_inspect/http_module.cc
@@ -197,20 +197,16 @@ bool HttpModule::set(const char*, Value& val, SnortConfig*)
      else if (val.is("normalize_javascript"))
      {
          params->js_norm_param.normalize_javascript = val.get_bool();
-
-        if ( !params->js_norm_param.is_javascript_normalization )
-            params->js_norm_param.is_javascript_normalization =
-                params->js_norm_param.normalize_javascript;
+        params->js_norm_param.is_javascript_normalization =
+            params->js_norm_param.is_javascript_normalization
+            or params->js_norm_param.normalize_javascript;
      }
      else if (val.is("js_normalization_depth"))
      {
          int64_t v = val.get_int64();
-        params->js_norm_param.js_normalization_depth = (v == -1) ?
-          Parameter::get_int("max53") : v;
-
-        if ( !params->js_norm_param.is_javascript_normalization )
-            params->js_norm_param.is_javascript_normalization =
-                (params->js_norm_param.js_normalization_depth > 0);
+        params->js_norm_param.js_normalization_depth = v;
+        params->js_norm_param.is_javascript_normalization =
+            params->js_norm_param.is_javascript_normalization or (v != 0);
      }
      else if (val.is("max_javascript_whitespaces"))
      {
@@ -394,7 +390,7 @@ bool HttpModule::end(const char*, int, SnortConfig*)
          ParseError("Cannot use normalize_javascript and js_normalization_depth together.");
  
      if ( params->js_norm_param.is_javascript_normalization )
-        params->js_norm_param.js_norm = new HttpJsNorm(params->uri_param);
+        params->js_norm_param.js_norm = new HttpJsNorm(params->uri_param, params->js_norm_param.js_normalization_depth);
  
      params->script_detection_handle = script_detection_handle;
  
diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc

index 3b3a4000f893cc8d681b5077c736b47d409dcf2f..26a18d48dacf56bbd4864cfbe5aaa334c93c1648 100644 (file)
--- a/src/service_inspectors/http_inspect/http_msg_body.cc
+++ b/src/service_inspectors/http_inspect/http_msg_body.cc
@@ -119,7 +119,9 @@ void HttpMsgBody::analyze()
              memcpy(cumulative_buffer + partial_detect_length, decompressed_file_body.start(),
                  decompressed_file_body.length());
              cumulative_data.set(total_length, cumulative_buffer, true);
-            do_js_normalization(cumulative_data, js_norm_body);
+
+            do_js_normalization(cumulative_data, js_norm_body, true);
+
              if ((int32_t)partial_js_detect_length == js_norm_body.length())
              {
                  clean_partial(partial_inspected_octets, partial_detect_length,
@@ -128,7 +130,7 @@ void HttpMsgBody::analyze()
              }
          }
          else
-            do_js_normalization(decompressed_file_body, js_norm_body);
+            do_js_normalization(decompressed_file_body, js_norm_body, false);
  
          const int32_t detect_length =
              (js_norm_body.length() <= session_data->detect_depth_remaining[source_id]) ?
@@ -277,28 +279,57 @@ void HttpMsgBody::fd_event_callback(void* context, int event)
      }
  }
  
-void HttpMsgBody::do_js_normalization(const Field& input, Field& output)
+void HttpMsgBody::do_js_normalization(const Field& input, Field& output, bool partial_detect)
  {
-    if ( !params->js_norm_param.is_javascript_normalization or source_id == SRC_CLIENT )
+    if (!params->js_norm_param.is_javascript_normalization or source_id == SRC_CLIENT)
          output.set(input);
-    else if ( params->js_norm_param.normalize_javascript )
+    else if (params->js_norm_param.normalize_javascript)
          params->js_norm_param.js_norm->legacy_normalize(input, output,
              transaction->get_infractions(source_id), session_data->events[source_id],
              params->js_norm_param.max_javascript_whitespaces);
-    else if ( params->js_norm_param.js_normalization_depth )
+    else if (params->js_norm_param.js_normalization_depth)
      {
          output.set(input);
  
+        bool js_continuation = session_data->js_normalizer;
+        uint8_t*& buf = session_data->js_detect_buffer[source_id];
+        uint32_t& len = session_data->js_detect_length[source_id];
+
+        if (partial_detect)
+            session_data->release_js_ctx();
+        else
+        {
+            session_data->update_deallocations(len);
+            delete[] buf;
+            buf = nullptr;
+            len = 0;
+        }
+
          params->js_norm_param.js_norm->enhanced_normalize(input, enhanced_js_norm_body,
-            transaction->get_infractions(source_id), session_data->events[source_id],
-            params->js_norm_param.js_normalization_depth);
+            transaction->get_infractions(source_id), session_data);
  
          const int32_t norm_length =
              (enhanced_js_norm_body.length() <= session_data->detect_depth_remaining[source_id]) ?
              enhanced_js_norm_body.length() : session_data->detect_depth_remaining[source_id];
  
          if ( norm_length > 0 )
+        {
              set_script_data(enhanced_js_norm_body.start(), (unsigned int)norm_length);
+
+            if (partial_detect)
+                return;
+
+            if (js_continuation)
+            {
+                auto nscript_len = enhanced_js_norm_body.length();
+                uint8_t* nscript = new uint8_t[nscript_len];
+
+                memcpy(nscript, enhanced_js_norm_body.start(), nscript_len);
+                buf = nscript;
+                len = nscript_len;
+                session_data->update_allocations(len);
+            }
+        }
      }
  }
  
diff --git a/src/service_inspectors/http_inspect/http_msg_body.h b/src/service_inspectors/http_inspect/http_msg_body.h

index d4e3f671bdb2941f4a1fdc68881f76b8146a5394..689a9381db33c6cfac5b2155b373e0fd22c8fb62 100644 (file)
--- a/src/service_inspectors/http_inspect/http_msg_body.h
+++ b/src/service_inspectors/http_inspect/http_msg_body.h
@@ -58,7 +58,7 @@ private:
      void do_file_processing(const Field& file_data);
      void do_utf_decoding(const Field& input, Field& output);
      void do_file_decompression(const Field& input, Field& output);
-    void do_js_normalization(const Field& input, Field& output);
+    void do_js_normalization(const Field& input, Field& output, bool partial_detect);
      void clean_partial(uint32_t& partial_inspected_octets, uint32_t& partial_detect_length,
          uint8_t*& partial_detect_buffer,  uint32_t& partial_js_detect_length,
          int32_t detect_length);
diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc

index ad7f3c8690b1457f4aea392e59e9545b8fec6f5a..26909e2fa3864a3a4bb6112f3cfcea3fae270745 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_tables.cc
+++ b/src/service_inspectors/http_inspect/http_tables.cc
@@ -357,7 +357,9 @@ const RuleMap HttpModule::http_events[] =
      { EVENT_PDF_UNSUP_COMP_TYPE,        "PDF file unsupported compression type" },
      { EVENT_PDF_CASC_COMP,              "PDF file cascaded compression" },
      { EVENT_PDF_PARSE_FAILURE,          "PDF file parse failure" },
-    { EVENT_JS_UNEXPECTED_TAG,          "unexpected script tag within inline javascript" },
+    { EVENT_JS_BAD_TOKEN,               "bad token in JavaScript" },
+    { EVENT_JS_OPENING_TAG,             "unexpected script opening tag in JavaScript" },
+    { EVENT_JS_CLOSING_TAG,             "unexpected script closing tag in JavaScript" },
      { EVENT_LOSS_OF_SYNC,               "not HTTP traffic" },
      { EVENT_CHUNK_ZEROS,                "chunk length has excessive leading zeros" },
      { EVENT_WS_BETWEEN_MSGS,            "white space before or between messages" },
diff --git a/src/service_inspectors/http_inspect/test/http_module_test.cc b/src/service_inspectors/http_inspect/test/http_module_test.cc

index 709710239f9a2b1092d763c80d174dbd0ae96910..3aab7541ed57549eeac76e3763ae403d9df6e6a1 100755 (executable)
--- a/src/service_inspectors/http_inspect/test/http_module_test.cc
+++ b/src/service_inspectors/http_inspect/test/http_module_test.cc
@@ -64,8 +64,9 @@ int32_t substr_to_code(const uint8_t*, const int32_t, const StrCode []) { return
  long HttpTestManager::print_amount {};
  bool HttpTestManager::print_hex {};
  
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_) :
-    uri_param(uri_param_), javascript_search_mpse(nullptr), htmltype_search_mpse(nullptr) {}
+HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) :
+    uri_param(uri_param_), normalization_depth(normalization_depth_),
+    mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) {}
  HttpJsNorm::~HttpJsNorm() = default;
  void HttpJsNorm::configure(){}
  int64_t Parameter::get_int(char const*) { return 0; }
diff --git a/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc b/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc

index fb33bd3e19c09b98f73eff03df0a6ae52a14f706..f98e616a83665ae603b63b3df8af950ba1bed17c 100755 (executable)
--- a/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc
+++ b/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc
@@ -53,8 +53,9 @@ LiteralSearch* LiteralSearch::instantiate(LiteralSearch::Handle*, const uint8_t*
  void show_stats(PegCount*, const PegInfo*, unsigned, const char*) { }
  void show_stats(PegCount*, const PegInfo*, const IndexVec&, const char*, FILE*) { }
  
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_) :
-    uri_param(uri_param_), javascript_search_mpse(nullptr), htmltype_search_mpse(nullptr) {}
+HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) :
+    uri_param(uri_param_), normalization_depth(normalization_depth_),
+    mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) {}
  HttpJsNorm::~HttpJsNorm() = default;
  void HttpJsNorm::configure() {}
  int64_t Parameter::get_int(char const*) { return 0; }
diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt

index d42b1893615341cfff278ce030343286d68af43a..38fc2ddce3816fbacce50b0d89927ad24fe4cd64 100644 (file)
--- a/src/utils/CMakeLists.txt
+++ b/src/utils/CMakeLists.txt
@@ -32,7 +32,6 @@ add_library ( utils OBJECT
      dnet_header.h
      dyn_array.cc
      dyn_array.h
-    js_norm_state.h
      js_normalizer.cc
      js_normalizer.h
      js_tokenizer.h
diff --git a/src/utils/js_norm_state.h b/src/utils/js_norm_state.h

deleted file mode 100644 (file)

index 764edb3..0000000
--- a/src/utils/js_norm_state.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//--------------------------------------------------------------------------
-// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
-//
-// This program is free software; you can redistribute it and/or modify it
-// under the terms of the GNU General Public License Version 2 as published
-// by the Free Software Foundation.  You may not use, modify or distribute
-// this program under any other version of the GNU General Public License.
-//
-// This program is distributed in the hope that it will be useful, but
-// WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with this program; if not, write to the Free Software Foundation, Inc.,
-// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-//--------------------------------------------------------------------------
-// js_norm_state.h author Oleksandr Serhiienko <oserhiie@cisco.com>
-
-#ifndef JS_NORM_STATE_H
-#define JS_NORM_STATE_H
-
-#include "main/snort_types.h"
-
-namespace snort
-{
-#define ALERT_UNEXPECTED_TAG 0x1
-
-struct JSNormState
-{
-    int64_t norm_depth;
-    uint16_t alerts;
-};
-}
-
-#endif // JS_NORM_STATE_H
-
diff --git a/src/utils/js_normalizer.cc b/src/utils/js_normalizer.cc

index a5868fe05e95b6a27ace43a7dec684d657b29a06..7e4b1d9a24e13e25175f4b5b616e99a64b2b7df6 100644 (file)
--- a/src/utils/js_normalizer.cc
+++ b/src/utils/js_normalizer.cc
@@ -23,20 +23,57 @@
  
  #include "js_normalizer.h"
  
-#include <FlexLexer.h>
+using namespace snort;
+
+JSNormalizer::JSNormalizer()
+    : depth(-1),
+      rem_bytes(-1),
+      unlim(true),
+      src_next(nullptr),
+      dst_next(nullptr),
+      tokenizer(in, out)
+{
+}
  
-#include "js_tokenizer.h"
+void JSNormalizer::set_depth(size_t new_depth)
+{
+    if (depth == new_depth)
+        return;
  
-using namespace snort;
+    depth = new_depth;
+    rem_bytes = depth;
+    unlim = depth == (size_t)-1;
+}
  
-int JSNormalizer::normalize(const char* srcbuf, uint16_t srclen, char* dstbuf, uint16_t dstlen,
-        const char** ptr, int* bytes_copied, JSNormState& state)
+JSTokenizer::JSRet JSNormalizer::normalize(const char* src, size_t src_len, char* dst, size_t dst_len)
  {
-    std::stringstream in, out;
-    in.rdbuf()->pubsetbuf(const_cast<char*>(srcbuf),
-        (state.norm_depth >= srclen) ? srclen : state.norm_depth);
+    if (rem_bytes == 0 && !unlim)
+    {
+        src_next = src + src_len;
+        dst_next = dst;
+        return JSTokenizer::EOS;
+    }
+
+    size_t len = unlim ? src_len :
+        src_len < rem_bytes ? src_len : rem_bytes;
+    in.rdbuf()->pubsetbuf(const_cast<char*>(src), len);
+    out.rdbuf()->pubsetbuf(dst, dst_len);
+
+    JSTokenizer::JSRet ret = (JSTokenizer::JSRet)tokenizer.yylex();
+    in.clear();
+    out.clear();
+    size_t r_bytes = in.tellg();
+    size_t w_bytes = out.tellp();
  
-    JSTokenizer tokenizer(in, out, dstbuf, dstlen, ptr, bytes_copied, state);
-    return tokenizer.yylex();
+    if (!unlim)
+        rem_bytes -= r_bytes;
+    src_next = src + r_bytes;
+    dst_next = dst + w_bytes;
+
+    return rem_bytes ? ret : JSTokenizer::EOS;
  }
  
+size_t JSNormalizer::size()
+{
+    return sizeof(JSNormalizer) + 16834; // the default YY_BUF_SIZE
+}
diff --git a/src/utils/js_normalizer.h b/src/utils/js_normalizer.h

index 2e562bb1b0438a705840000a6b088e3d204636f1..75bd407685396e21b31aa9e45ec2d04868297759 100644 (file)
--- a/src/utils/js_normalizer.h
+++ b/src/utils/js_normalizer.h
@@ -22,16 +22,45 @@
  
  #include "main/snort_types.h"
  
-#include "js_norm_state.h"
+#include <FlexLexer.h>
+
+#include "js_tokenizer.h"
  
  namespace snort
  {
+
  class JSNormalizer
  {
  public:
-    static int normalize(const char* srcbuf, uint16_t srclen, char* dstbuf, uint16_t dstlen,
-        const char** ptr, int* bytes_copied, JSNormState& state);
+    JSNormalizer();
+
+    const char* get_src_next() const
+    { return src_next; }
+
+    char* get_dst_next() const // this can go beyond dst length, but no writing happens outside of dst
+    { return dst_next; }
+
+    void reset_depth()
+    { rem_bytes = depth; }
+
+    void set_depth(size_t depth);
+
+    JSTokenizer::JSRet normalize(const char* src, size_t src_len, char* dst, size_t dst_len);
+
+    static size_t size();
+
+private:
+    size_t depth;
+    size_t rem_bytes;
+    bool unlim;
+    const char* src_next;
+    char* dst_next;
+
+    std::stringstream in;
+    std::stringstream out;
+    JSTokenizer tokenizer;
  };
+
  }
  
  #endif //JS_NORMALIZER_H
diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h

index 2e284ef44eab437ee851f6f1331d2c19ef0633d4..0e0fd2a27f8edabd0b2246b717f29f57c76ded1f 100644 (file)
--- a/src/utils/js_tokenizer.h
+++ b/src/utils/js_tokenizer.h
@@ -24,8 +24,6 @@
  
  #include "log/messages.h"
  
-#include "js_norm_state.h"
-
  class JSTokenizer : public yyFlexLexer
  {
  private:
@@ -41,15 +39,20 @@ private:
      };
  
  public:
-    // we need an out stream because yyFlexLexer API strongly requires that
-    JSTokenizer(std::stringstream& in, std::stringstream& out, char* dstbuf,
-        const uint16_t dstlen, const char** ptr, int* bytes_copied, snort::JSNormState& state);
+    enum JSRet
+    {
+        EOS = 0,
+        SCRIPT_ENDED,
+        SCRIPT_CONTINUE,
+        OPENING_TAG,
+        CLOSING_TAG,
+        BAD_TOKEN
+    };
+
+    JSTokenizer(std::istream& in, std::ostream& out);
      ~JSTokenizer() override;
  
-    // so, Flex will treat this class as yyclass
-    // must come with yyclass Flex option
-    // don't need to define this method, it'll be substituted by Flex
-    // returns 0 if OK, 1 otherwise
+    // returns JSRet
      int yylex() override;
  
  protected:
@@ -57,51 +60,19 @@ protected:
      { snort::FatalError("%s", msg); }
  
  private:
-    void init();
-
-    // scan buffers control
-    void switch_to_temporal(const std::string& data);
      void switch_to_initial();
-
-    bool eval_identifier(const char* lexeme);
-    bool eval_string_literal(const char* match_prefix, const char quotes);
-    bool eval_regex_literal(const char* match_prefix);
-    bool eval_eof();
-    bool eval_single_line_comment();
-    bool eval_multi_line_comment();
-
-    bool parse_literal(const std::string& match_prefix, const char sentinel_ch,
-        std::string& result, bool& is_alert, bool is_regex = false);
-
-    // main lexeme handler
-    // all scanned tokens must pass here
-    bool eval(const JSToken tok, const char* lexeme);
-
-    bool normalize_identifier(const JSToken prev_tok, const char* lexeme);
-    bool normalize_punctuator(const JSToken prev_tok, const char* lexeme);
-    bool normalize_operator(const JSToken prev_tok, const char* lexeme);
-    bool normalize_directive(const JSToken prev_tok, const char* lexeme);
-    bool normalize_undefined(const JSToken prev_tok, const char* lexeme);
-    bool normalize_lexeme(const JSToken prev_tok, const char* lexeme);
-
-    bool write_output(const std::string& str);
-
-    void update_ptr();
+    void switch_to_temporal(const std::string& data);
+    JSRet eval_eof();
+    JSRet do_spacing(JSToken cur_token);
+    JSRet do_operator_spacing(JSToken cur_token);
+    bool unescape(const char* lexeme);
  
  private:
-    char* dstbuf;
-    const uint16_t dstlen;
-    const char** ptr;
-    int* bytes_copied;
-
-    struct ScanBuffers;
-    ScanBuffers* buffers = nullptr;
-    std::stringstream temporal;
-
-    JSToken prev_tok = UNDEFINED;
-
-    snort::JSNormState& state;
+    void* cur_buffer;
+    void* tmp_buffer = nullptr;
+    std::stringstream tmp;
  
+    JSToken token = UNDEFINED;
  };
  
  #endif // JS_TOKENIZER_H
diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l

index 84e5ef6ea3c895d0af23fbd157566eced8cf9681..3f9a0c748552ff4b6023bf1abf0d4e013cdea2b7 100644 (file)
--- a/src/utils/js_tokenizer.l
+++ b/src/utils/js_tokenizer.l
@@ -35,6 +35,8 @@
      #include <cassert>
  
      #include "utils/util_cstring.h"
+
+    #define EXEC(f) { auto r = (f); if (r) { BEGIN(regst); return r; } }
  %}
  
  /* The following grammar was created based on ECMAScript specification */
@@ -67,12 +69,22 @@ LINE_TERMINATORS    {LF}|{CR}|{LS}|{PS}
  
  /* comments */
  /* according to https://ecma-international.org/ecma-262/5.1/#sec-7.4 */
-SINGLE_LINE_COMMENT    "//"
-MULTI_LINE_COMMENT     "/\*"
+LINE_COMMENT_START   "//"
+LINE_COMMENT_END1    [^<\xA\xD]*\xA
+LINE_COMMENT_END2    [^<\xA\xD]*\xD
+LINE_COMMENT_END3    [^<\xA\xD]*"<"+(?i:script)
+LINE_COMMENT_END4    [^<\xA\xD]*"<"+(?i:\/script>)
+LINE_COMMENT_SKIP    [^<\xA\xD]*"<"?
+BLOCK_COMMENT_START  "/*"
+BLOCK_COMMENT_END1   [^<*]*"*"+"/"
+BLOCK_COMMENT_END2   [^<*]*"<"+(?i:script)
+BLOCK_COMMENT_END3   [^<*]*"<"+(?i:\/script>)
+BLOCK_COMMENT_SKIP   [^<*]*[<*]?
  
  /* directives */
  /* according to https://ecma-international.org/ecma-262/5.1/#sec-14.1 */
-USE_STRICT_DIRECTIVE    "\"use strict\"";*|"\'use strict\'";*
+USE_STRICT_DIRECTIVE    "\"use strict\""|"\'use strict\'"
+USE_STRICT_DIRECTIVE_SC "\"use strict\"";*|"\'use strict\'";*
  
  /* keywords */
  /* according to https://ecma-international.org/ecma-262/5.1/#sec-7.6.1.1 */
@@ -863,9 +875,15 @@ LITERAL_NULL                  null
  LITERAL_BOOLEAN               true|false
  LITERAL_DECIMAL               [.]?[0-9]+[\.]?[0-9]*[eE]?[0-9]*
  LITERAL_HEX_INTEGER           0x[0-9a-fA-F]*|0X[0-9a-fA-F]*
-LITERAL_DOUBLE_STRING_BEGIN   \"
-LITERAL_SINGLE_STRING_BEGIN   \'
-LITERAL_REGULAR_EXPRESSION    \/[^*\/]
+LITERAL_DQ_STRING_START       \"
+LITERAL_DQ_STRING_END         \"
+LITERAL_DQ_STRING_SKIP        \\\"
+LITERAL_SQ_STRING_START       \'
+LITERAL_SQ_STRING_END         \'
+LITERAL_SQ_STRING_SKIP        \\\'
+LITERAL_REGEX_START           \/[^*\/]
+LITERAL_REGEX_END             \/[gimsuy]*
+LITERAL_REGEX_SKIP            \\\/
  /* extra literals */
  /* according to https://ecma-international.org/ecma-262/5.1/#sec-4.3 */
  LITERAL_UNDEFINED             undefined
@@ -873,9 +891,9 @@ LITERAL_INFINITY              Infinity|\xE2\x88\x9E
  LITERAL_NAN                   NaN
  LITERAL                       {LITERAL_NULL}|{LITERAL_BOOLEAN}|{LITERAL_DECIMAL}|{LITERAL_HEX_INTEGER}|{LITERAL_UNDEFINED}|{LITERAL_INFINITY}|{LITERAL_NAN}
  
-HTML_COMMENT_OPEN         <!--
-HTML_TAG_SCRIPT_OPEN      (?i:<script)
-HTML_TAG_SCRIPT_CLOSE     (?i:<\/script>)
+HTML_COMMENT_OPEN         "<"+"!--"
+HTML_TAG_SCRIPT_OPEN      "<"+(?i:script)
+HTML_TAG_SCRIPT_CLOSE     "<"+(?i:\/script>)
  
  /* from 0x000 to 0x10FFFD to match undefined tokens */
  /* UTF-8 ranges generated with https://lists.gnu.org/archive/html/help-flex/2005-01/msg00043.html */
@@ -883,34 +901,99 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
  
  /* match regex literal only if the previous token was of type PUNCTUATOR_3 or KEYWORD */
  /* this resolves an ambiguity with a division operator: var x = 2/2/1; */
-%x regex
+%s regst
  
  /* do not match division operators as punctuators if the previous token was of type PUNCTUATOR */
  /* this resolves an ambiguity with regular expression in some cases such as (/=abc=/g) */
-%x div_op
+%s divop
+
+/* in a single line comment */
+%x lcomm
+
+/* in a multi line comment */
+%x bcomm
+
+/* in a single-quoted string */
+%x sqstr
+
+/* in a double-quoted string */
+%x dqstr
+
+/* in a regular expression */
+%x regex
  
  %%
-<*>{WHITESPACES}                                        { /* skip */ }
-<*>{CHAR_ESCAPE_SEQUENCES}                              { /* skip */ }
-<*>{LINE_TERMINATORS}                                   { BEGIN(regex); }
-<*>{HTML_TAG_SCRIPT_OPEN}                               { state.alerts |= ALERT_UNEXPECTED_TAG; update_ptr(); return 1; }
-<*>{HTML_TAG_SCRIPT_CLOSE}                              { update_ptr(); *ptr -= YYLeng(); return 0; }
-<*>{HTML_COMMENT_OPEN}                                  { if ( !eval_single_line_comment() ) { update_ptr(); return 1; } }
-<*>{SINGLE_LINE_COMMENT}                                { if ( !eval_single_line_comment() ) { update_ptr(); return 1; } }
-<*>{MULTI_LINE_COMMENT}                                 { if ( !eval_multi_line_comment() ) { update_ptr(); return 1; } }
-<*>{USE_STRICT_DIRECTIVE}                               { if ( !eval(DIRECTIVE, YYText()) ) { update_ptr(); return 1; } }
-<*>{KEYWORD}                                            { if ( !eval(KEYWORD, YYText()) ) { update_ptr(); return 1; } BEGIN(regex); }
-<*>{CLOSING_BRACES}                                     { if ( !eval(PUNCTUATOR, YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
-<div_op>{DIV_OPERATOR}|{DIV_ASSIGNMENT_OPERATOR}        { if ( !eval(PUNCTUATOR, YYText()) ) { update_ptr(); return 1; } }
-<*>{PUNCTUATOR}                                         { if ( !eval(PUNCTUATOR, YYText()) ) { update_ptr(); return 1; } BEGIN(regex); }
-<*>{OPERATOR}                                           { if ( !eval(OPERATOR, YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
-<*>{LITERAL}                                            { if ( !eval(LITERAL, YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
-<*>{LITERAL_DOUBLE_STRING_BEGIN}                        { if ( !eval_string_literal(YYText(), '"') ) { update_ptr(); return 1; } BEGIN(div_op); }
-<*>{LITERAL_SINGLE_STRING_BEGIN}                        { if ( !eval_string_literal(YYText(), '\'') ) { update_ptr(); return 1; } BEGIN(div_op); }
-<regex>{LITERAL_REGULAR_EXPRESSION}                     { if ( !eval_regex_literal(YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
-<*>{IDENTIFIER}                                         { if ( !eval_identifier(YYText()) ) { update_ptr(); return 1; } BEGIN(div_op); }
-<*>.|{ALL_UNICODE}                                      { if ( !eval(UNDEFINED, YYText()) ) { update_ptr(); return 1; } }
-<<EOF>>                                                 { if ( eval_eof() ) { update_ptr(); return 0; } }
+{WHITESPACES}                       { }
+{CHAR_ESCAPE_SEQUENCES}             { }
+{LINE_TERMINATORS}                  { BEGIN(regst); }
+
+<INITIAL,regex,dqstr,regst,sqstr,divop>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); return OPENING_TAG; }
+{HTML_TAG_SCRIPT_CLOSE}             { BEGIN(regst); return SCRIPT_ENDED; }
+
+       {HTML_COMMENT_OPEN}          { BEGIN(lcomm); }
+       {LINE_COMMENT_START}         { BEGIN(lcomm); }
+<lcomm>{LINE_COMMENT_END1}          { BEGIN(regst); }
+<lcomm>{LINE_COMMENT_END2}          { BEGIN(regst); }
+<lcomm>{LINE_COMMENT_END3}          { BEGIN(regst); return OPENING_TAG; }
+<lcomm>{LINE_COMMENT_END4}          { BEGIN(regst); return CLOSING_TAG; }
+<lcomm>{LINE_COMMENT_SKIP}          { }
+<lcomm><<EOF>>                      { return SCRIPT_CONTINUE; }
+
+       {BLOCK_COMMENT_START}        { BEGIN(bcomm); }
+<bcomm>{BLOCK_COMMENT_END1}         { BEGIN(regst); }
+<bcomm>{BLOCK_COMMENT_END2}         { BEGIN(regst); return OPENING_TAG; }
+<bcomm>{BLOCK_COMMENT_END3}         { BEGIN(regst); return CLOSING_TAG; }
+<bcomm>{BLOCK_COMMENT_SKIP}         { }
+<bcomm><<EOF>>                      { return SCRIPT_CONTINUE; }
+
+       {LITERAL_DQ_STRING_START}    { EXEC(do_spacing(LITERAL)); ECHO; BEGIN(dqstr); }
+<dqstr>{LITERAL_DQ_STRING_END}      { ECHO; BEGIN(divop); }
+<dqstr>{HTML_TAG_SCRIPT_CLOSE}      { BEGIN(regst); return CLOSING_TAG; }
+<dqstr>\\{CR}{LF}                   { }
+<dqstr>\\{LF}                       { }
+<dqstr>\\{CR}                       { }
+<dqstr>{LINE_TERMINATORS}           { BEGIN(regst); return BAD_TOKEN; }
+<dqstr>{LITERAL_DQ_STRING_SKIP}     { ECHO; }
+<dqstr>.                            { ECHO; }
+<dqstr><<EOF>>                      { return SCRIPT_CONTINUE; }
+
+       {LITERAL_SQ_STRING_START}    { EXEC(do_spacing(LITERAL)); ECHO; BEGIN(sqstr); }
+<sqstr>{LITERAL_SQ_STRING_END}      { ECHO; BEGIN(divop); }
+<sqstr>{HTML_TAG_SCRIPT_CLOSE}      { BEGIN(regst); return CLOSING_TAG; }
+<sqstr>\\{CR}{LF}                   { }
+<sqstr>\\{LF}                       { }
+<sqstr>\\{CR}                       { }
+<sqstr>{LINE_TERMINATORS}           { BEGIN(regst); return BAD_TOKEN; }
+<sqstr>{LITERAL_SQ_STRING_SKIP}     { ECHO; }
+<sqstr>.                            { ECHO; }
+<sqstr><<EOF>>                      { return SCRIPT_CONTINUE; }
+
+<regst>{LITERAL_REGEX_START}        { EXEC(do_spacing(LITERAL)); yyout << '/'; yyless(1); BEGIN(regex); }
+<regex>{LITERAL_REGEX_END}          { ECHO; BEGIN(divop); }
+<regex>{HTML_TAG_SCRIPT_CLOSE}      { BEGIN(regst); return CLOSING_TAG; }
+<regex>{LITERAL_REGEX_SKIP}         { ECHO; }
+<regex>\\{LF}                       |
+<regex>\\{CR}                       |
+<regex>{LINE_TERMINATORS}           { BEGIN(regst); return BAD_TOKEN; }
+<regex>[^<{LF}{CR}{LS}{PS}\\\/]+    { ECHO; }
+<regex><<EOF>>                      { return SCRIPT_CONTINUE; }
+
+<divop>{DIV_OPERATOR}               |
+<divop>{DIV_ASSIGNMENT_OPERATOR}    { ECHO; token = PUNCTUATOR; BEGIN(INITIAL); }
+
+{CLOSING_BRACES}                    { ECHO; token = PUNCTUATOR; BEGIN(divop); }
+{PUNCTUATOR}                        { ECHO; token = PUNCTUATOR; BEGIN(regst); }
+
+{USE_STRICT_DIRECTIVE}              { EXEC(do_spacing(DIRECTIVE)); ECHO; BEGIN(INITIAL); yyout << ';'; }
+{USE_STRICT_DIRECTIVE_SC}           { EXEC(do_spacing(DIRECTIVE)); ECHO; BEGIN(INITIAL); }
+{KEYWORD}                           { EXEC(do_spacing(KEYWORD)); ECHO; BEGIN(regst); }
+{OPERATOR}                          { EXEC(do_operator_spacing(OPERATOR)); ECHO; BEGIN(divop); }
+{LITERAL}                           { EXEC(do_spacing(LITERAL)); ECHO; BEGIN(divop); }
+{IDENTIFIER}                        { if (unescape(YYText())) { EXEC(do_spacing(IDENTIFIER)); ECHO; }; BEGIN(divop); }
+
+.|{ALL_UNICODE}                     { ECHO; token = UNDEFINED; BEGIN(INITIAL); }
+<<EOF>>                             { EXEC(eval_eof()); }
+
  %%
  
  // static helper functions
@@ -987,356 +1070,79 @@ static std::string unescape_unicode(const char* lexeme)
      return res;
  }
  
-static bool contains_script_tags(const std::string& str)
-{
-    static constexpr const char* script = "SCRIPT";
-    static constexpr const int script_len = sizeof("SCRIPT") - 1;
-
-    const char* start = str.c_str();
-    const char* end = start + str.size();
-    const char* it = start;
-
-    while ( it )
-    {
-        it = snort::SnortStrcasestr(it, (end - it), script);
-        if ( it )
-        {
-            int d = it - start;
-            if ( d == 1 )
-            {
-                if ( *(it - 1) == '<' )
-                    return true;
-            }
-            else if ( d >= 2 )
-            {
-                if ( (*(it - 1) == '/' and *(it - 2) == '<') or
-                    (*(it - 1) == '<' and *(it - 2) != '\\') )
-                {
-                    return true;
-                }
-            }
-            it += script_len;
-        }
-    }
-    return false;
-}
-
  // JSTokenizer members
  
-struct JSTokenizer::ScanBuffers
-{
-    YY_BUFFER_STATE initial = nullptr;
-    YY_BUFFER_STATE temporal = nullptr;
-};
-
-JSTokenizer::JSTokenizer(std::stringstream& in, std::stringstream& out, char* dstbuf,
-    uint16_t dstlen, const char** ptr, int* bytes_copied, snort::JSNormState& state)
-    : yyFlexLexer(in, out),
-      dstbuf(dstbuf),
-      dstlen(dstlen),
-      ptr(ptr),
-      bytes_copied(bytes_copied),
-      state(state)
+JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out)
+    : yyFlexLexer(in, out)
  {
-    assert(bytes_copied);
-    init();
+    BEGIN(regst);
  }
  
  JSTokenizer::~JSTokenizer()
-{ delete buffers; }
-
-void JSTokenizer::init()
  {
-    buffers = new ScanBuffers;
-    *bytes_copied = 0;
-
-    // since regular expression may occur at the beginning of the input
-    BEGIN(regex);
+    yy_delete_buffer((YY_BUFFER_STATE)tmp_buffer);
  }
  
  void JSTokenizer::switch_to_temporal(const std::string& data)
  {
-    temporal.str(data);
-    buffers->initial = YY_CURRENT_BUFFER;
-    buffers->temporal = yy_create_buffer(temporal, data.size());
-    yy_switch_to_buffer(buffers->temporal);
+    tmp.str(data);
+    cur_buffer = YY_CURRENT_BUFFER;
+    tmp_buffer = yy_create_buffer(tmp, data.size());
+    yy_switch_to_buffer((YY_BUFFER_STATE)tmp_buffer);
  }
  
  void JSTokenizer::switch_to_initial()
  {
-    yy_delete_buffer(buffers->temporal);
-    yy_switch_to_buffer(buffers->initial);
-    buffers->temporal = nullptr;
-}
-
-bool JSTokenizer::eval_identifier(const char* lexeme)
-{
-    // If an identifier has escaped Unicode, unescape and match again
-    // in a temporal scan buffer
-    if ( strstr(lexeme, "\\u") )
-    {
-        const std::string unescaped_lex = unescape_unicode(lexeme);
-        switch_to_temporal(unescaped_lex);
-        return true;
-    }
-
-    return eval(IDENTIFIER, lexeme);
-}
-
-bool JSTokenizer::eval_string_literal(const char* match_prefix, const char quotes)
-{
-    std::string s;
-    bool is_alert = false;
-    bool is_ok = parse_literal(match_prefix, quotes, s, is_alert);
-
-    if ( is_alert )
-        return false;
-
-    return eval(is_ok ? LITERAL : UNDEFINED, s.c_str());
-}
-
-bool JSTokenizer::eval_regex_literal(const char* match_prefix)
-{
-    static const std::string regex_flags = "gimsuy";
-
-    std::string s;
-    bool is_alert = false;
-    bool is_ok = parse_literal(match_prefix, '/', s, is_alert, true);
-
-    if ( is_alert )
-        return false;
-
-    // append regex flags
-    char c;
-    while ( (c = yyinput()) != 0 )
-    {
-        if ( regex_flags.find(c) != std::string::npos )
-            s += c;
-        else
-        {
-            unput(c);
-            break;
-        }
-    }
-
-    return eval(is_ok ? LITERAL : UNDEFINED, s.c_str());
+    yy_switch_to_buffer((YY_BUFFER_STATE)cur_buffer);
+    yy_delete_buffer((YY_BUFFER_STATE)tmp_buffer);
+    tmp_buffer = nullptr;
  }
  
  // A return value of this method uses to terminate the scanner
  // true - terminate, false - continue scanning
  // Use this method only in <<EOF>> handler
  // The return value should be used to make a decision about yyterminate() call
-bool JSTokenizer::eval_eof()
+JSTokenizer::JSRet JSTokenizer::eval_eof()
  {
      // If the temporal scan buffer reaches EOF, cleanup and
      // continue with the initial one
-    if ( buffers->temporal )
+    if ( tmp_buffer )
      {
          switch_to_initial();
-        return false;
+        return EOS;
      }
  
      // Normal termination
-    return true;
-}
-
-bool JSTokenizer::eval_single_line_comment()
-{
-    char c;
-    std::string result;
-
-    while ( (c = yyinput()) != 0 )
-    {
-        result += c;
-        if ( c == '\n' )
-            break;
-    }
-
-    if ( contains_script_tags(result) )
-    {
-        state.alerts |= ALERT_UNEXPECTED_TAG;
-        return false;
-    }
-    else
-        return true;
-}
-
-bool JSTokenizer::eval_multi_line_comment()
-{
-    char c;
-    std::string result;
-
-    while ( (c = yyinput()) != 0 )
-    {
-        result += c;
-        if ( c == '*' )
-        {
-            if ( (c = yyinput()) == '/' )
-                break;
-            else
-                unput(c);
-        }
-    }
-
-    if ( contains_script_tags(result) )
-    {
-        state.alerts |= ALERT_UNEXPECTED_TAG;
-        return false;
-    }
-    else
-        return true;
-}
-
-// Unicode line terminators
-#define LS "\u2028"
-#define PS "\u2029"
-
-// This method delineates and validates literals from the input stream such as:
-//   1. double quotes string literal
-//   2. single quotes string literal
-//   3. regex literal
-// Call this method when lexer meets those literals
-// match_prefix is a lexeme part already matched by the lexer (with sentinel char)
-bool JSTokenizer::parse_literal(const std::string& match_prefix, const char sentinel_ch,
-    std::string& result, bool& is_alert, bool is_regex)
-{
-    bool is_ok = true;
-    char c;
-    short n = 0;
-
-    for ( auto it = match_prefix.crbegin(); it != match_prefix.crend(); ++it )
-        unput(*it);
-
-    result += yyinput();
-    while ( (c = yyinput()) != 0 )
-    {
-        result += c;
-
-        if ( c == sentinel_ch and !( n % 2 ) )
-            break;
-        else if ( c == '\\' )
-        {
-            ++n;
-            continue;
-        }
-        else if ( c == '\r' )
-        {
-            if ( is_regex )
-            {
-                is_ok = false;
-                result = result.substr(0, result.size() - n);
-            }
-            else if ( n == 0 )
-                is_ok = false;
-            else if ( ( (c = yyinput()) != 0 ) and c == '\n' )
-            {
-                result = result.substr(0, result.size() - 2);
-                continue;
-            }
-            else
-            {
-                is_ok = false;
-                unput(c);
-            }
-
-            break;
-        }
-        else if ( c == '\n' )
-        {
-            if ( is_regex )
-            {
-                is_ok = false;
-                result = result.substr(0, result.size() - n);
-            }
-            else if ( n == 0 )
-                is_ok = false;
-            else
-            {
-                result = result.substr(0, result.size() - 2);
-                continue;
-            }
-
-            break;
-        }
-
-        n = 0;
-    }
-
-    if ( !is_ok )
-    {
-        result.back() = sentinel_ch;
-        return is_ok;
-    }
-
-    if ( result.find(LS) != std::string::npos or result.find(PS) != std::string::npos )
-        is_ok = false;
-
-    if ( contains_script_tags(result) )
-    {
-        is_alert = true;
-        state.alerts |= ALERT_UNEXPECTED_TAG;
-    }
-
-    return is_ok;
+    return SCRIPT_CONTINUE;
  }
  
-bool JSTokenizer::eval(const JSToken tok, const char* lexeme)
+JSTokenizer::JSRet JSTokenizer::do_spacing(JSToken cur_token)
  {
-    bool ret = false;
-
-    switch( tok )
+    switch (token)
      {
-    case IDENTIFIER:
-        ret = normalize_identifier(prev_tok, lexeme);
-    break;
-
-    case KEYWORD:
-        ret = normalize_lexeme(prev_tok, lexeme);
-    break;
-
      case PUNCTUATOR:
-        ret = normalize_punctuator(prev_tok, lexeme);
-    break;
-
      case OPERATOR:
-        ret = normalize_operator(prev_tok, lexeme);
-    break;
-
-    case LITERAL:
-        ret = normalize_lexeme(prev_tok, lexeme);
-    break;
-
      case DIRECTIVE:
-        ret = normalize_directive(prev_tok, lexeme);
-    break;
-
      case UNDEFINED:
-        ret = normalize_undefined(prev_tok, lexeme);
-    break;
-    }
+        token = cur_token;
+        return EOS;
  
-    prev_tok = tok;
-
-    // set a default pattern match start condition
-    if ( yy_start != INITIAL )
-        BEGIN(INITIAL);
-
-    return ret;
-}
+    case IDENTIFIER:
+    case KEYWORD:
+    case LITERAL:
+        yyout << ' ';
+        token = cur_token;
+        return EOS;
+    }
  
-bool JSTokenizer::normalize_identifier(const JSToken prev_tok, const char* lexeme)
-{
-    return normalize_lexeme(prev_tok, lexeme);
-}
+    assert(false);
  
-bool JSTokenizer::normalize_punctuator(const JSToken, const char* lexeme)
-{
-    return write_output(lexeme);
+    return BAD_TOKEN;
  }
  
-bool JSTokenizer::normalize_operator(const JSToken prev_tok, const char* lexeme)
+JSTokenizer::JSRet JSTokenizer::do_operator_spacing(JSToken cur_token)
  {
-    switch( prev_tok )
+    switch (token)
      {
      case IDENTIFIER:
      case KEYWORD:
@@ -1344,66 +1150,28 @@ bool JSTokenizer::normalize_operator(const JSToken prev_tok, const char* lexeme)
      case LITERAL:
      case DIRECTIVE:
      case UNDEFINED:
-        return write_output(lexeme);
-    break;
+        token = cur_token;
+        return EOS;
  
      case OPERATOR:
-        return write_output(" " + std::string(lexeme));
-    break;
+        yyout << ' ';
+        token = cur_token;
+        return EOS;
      }
  
-    return false;
-}
-
-bool JSTokenizer::normalize_directive(const JSToken prev_tok, const char* lexeme)
-{
-    std::string str = lexeme;
+    assert(false);
  
-    if ( str.rfind(";") == std::string::npos )
-        str += ";";
-
-    return normalize_lexeme(prev_tok, str.c_str());
+    return BAD_TOKEN;
  }
  
-bool JSTokenizer::normalize_undefined(const JSToken, const char* lexeme)
-{ return write_output(lexeme); }
-
-bool JSTokenizer::normalize_lexeme(const JSToken prev_tok, const char* lexeme)
+bool JSTokenizer::unescape(const char* lexeme)
  {
-    switch( prev_tok )
+    if ( strstr(lexeme, "\\u") )
      {
-    case PUNCTUATOR:
-    case OPERATOR:
-    case DIRECTIVE:
-    case UNDEFINED:
-        return write_output(lexeme);
-    break;
-
-    case IDENTIFIER:
-    case KEYWORD:
-    case LITERAL:
-        return write_output(" " + std::string(lexeme));
-    break;
-    }
-
-    return false;
-}
-
-bool JSTokenizer::write_output(const std::string& str)
-{
-    size_t len = str.size();
-    int new_size = *bytes_copied + len;
-
-    if ( new_size >= 0 and new_size <= dstlen )
-        memcpy((char*) dstbuf, (const char*)str.c_str(), len);
-    else
+        const std::string unescaped_lex = unescape_unicode(lexeme);
+        switch_to_temporal(unescaped_lex);
          return false;
+    }
  
-    dstbuf += len;
-    *bytes_copied = new_size;
      return true;
  }
-
-void JSTokenizer::update_ptr()
-{ *ptr += yyin.tellg(); }
-
diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc

index 1100bbf48931d5548dd921c6241d93e544e6b1d0..b66d77766be7d7963950c9780245f8a5a6887739 100644 (file)
--- a/src/utils/test/js_normalizer_test.cc
+++ b/src/utils/test/js_normalizer_test.cc
@@ -36,32 +36,36 @@ namespace snort
  
  using namespace snort;
  
-#define NORM_DEPTH 65535
-
-#define NORMALIZE(srcbuf, expected)                                        \
-    char dstbuf[sizeof(expected)];                                         \
-    int bytes_copied;                                                      \
-    const char* ptr = srcbuf;                                              \
-    JSNormState state;                                                     \
-    state.norm_depth = NORM_DEPTH;                                         \
-    state.alerts = 0;                                                      \
-    int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf),              \
-        dstbuf, sizeof(dstbuf), &ptr, &bytes_copied, state);
-
-#define VALIDATE(srcbuf, expected)                    \
-    CHECK(ret == 0);                                  \
-    CHECK((ptr - srcbuf) == sizeof(srcbuf));          \
-    CHECK(bytes_copied == sizeof(expected) - 1);      \
-    CHECK(!memcmp(dstbuf, expected, bytes_copied));
-
-#define VALIDATE_FAIL(srcbuf, expected, ret_code, ptr_offset)      \
-    CHECK(ret == ret_code);                                        \
-    CHECK((ptr - srcbuf) == ptr_offset);                           \
-    CHECK(bytes_copied == sizeof(expected) - 1);                   \
-    CHECK(!memcmp(dstbuf, expected, bytes_copied));
-
-#define VALIDATE_ALERT(alert)       \
-    CHECK(state.alerts & alert);
+#define DEPTH 65535
+
+#define NORMALIZE(src, expected)                                    \
+    char dst[sizeof(expected)];                                     \
+    JSNormalizer norm;                                              \
+    norm.set_depth(DEPTH);                                          \
+    auto ret = norm.normalize(src, sizeof(src), dst, sizeof(dst));  \
+    const char* ptr = norm.get_src_next();                          \
+    int act_len = norm.get_dst_next() - dst;                        \
+
+#define VALIDATE(src, expected)                 \
+    CHECK(ret == JSTokenizer::SCRIPT_CONTINUE); \
+    CHECK((ptr - src) == sizeof(src));          \
+    CHECK(act_len == sizeof(expected) - 1);     \
+    CHECK(!memcmp(dst, expected, act_len));
+
+#define VALIDATE_FAIL(src, expected, ret_code, ptr_offset)  \
+    CHECK(ret == ret_code);                                 \
+    CHECK((ptr - src) == ptr_offset);                       \
+    CHECK(act_len == sizeof(expected) - 1);                 \
+    CHECK(!memcmp(dst, expected, act_len));
+
+#define NORMALIZE_L(src, src_len, dst, dst_len, depth, ret, ptr, len)   \
+    {                                                                   \
+        JSNormalizer norm;                                              \
+        norm.set_depth(depth);                                          \
+        ret = norm.normalize(src, src_len, dst, dst_len);               \
+        ptr = norm.get_src_next();                                      \
+        len = norm.get_dst_next() - dst;                                \
+    }                                                                   \
  
  // ClamAV test cases
  static const char clamav_buf0[] =
@@ -256,6 +260,9 @@ TEST_CASE("clamav tests", "[JSNormalizer]")
      SECTION("test_case_14")
      {
          NORMALIZE(clamav_buf14, clamav_expected14);
+        // trailing \0 is included as a part of the string
+        // to utilize available macros we alter the read length
+        act_len -= 1;
          VALIDATE(clamav_buf14, clamav_expected14);
      }
  }
@@ -333,64 +340,56 @@ TEST_CASE("all patterns", "[JSNormalizer]")
      }
      SECTION("directives")
      {
-        const char srcbuf0[] = "'use strict'\nvar a = 1;";
-        const char srcbuf1[] = "\"use strict\"\nvar a = 1;";
-        const char srcbuf2[] = "'use strict';var a = 1;";
-        const char srcbuf3[] = "\"use strict\";var a = 1;";
-        const char srcbuf4[] = "var a = 1 'use strict';";
+        const char src0[] = "'use strict'\nvar a = 1;";
+        const char src1[] = "\"use strict\"\nvar a = 1;";
+        const char src2[] = "'use strict';var a = 1;";
+        const char src3[] = "\"use strict\";var a = 1;";
+        const char src4[] = "var a = 1 'use strict';";
+
          const char expected0[] = "'use strict';var a=1;";
          const char expected1[] = "\"use strict\";var a=1;";
          const char expected2[] = "var a=1 'use strict';";
-        char dstbuf0[sizeof(expected0)];
-        char dstbuf1[sizeof(expected1)];
-        char dstbuf2[sizeof(expected0)];
-        char dstbuf3[sizeof(expected1)];
-        char dstbuf4[sizeof(expected2)];
-        int bytes_copied0, bytes_copied1, bytes_copied2, bytes_copied3, bytes_copied4;
-        const char* ptr0 = srcbuf0;
-        const char* ptr1 = srcbuf1;
-        const char* ptr2 = srcbuf2;
-        const char* ptr3 = srcbuf3;
-        const char* ptr4 = srcbuf4;
-        JSNormState state;
-        state.norm_depth = NORM_DEPTH;
-        state.alerts = 0;
-
-        int ret0 = JSNormalizer::normalize(srcbuf0, sizeof(srcbuf0), dstbuf0, sizeof(dstbuf0),
-            &ptr0, &bytes_copied0, state);
-        int ret1 = JSNormalizer::normalize(srcbuf1, sizeof(srcbuf1), dstbuf1, sizeof(dstbuf1),
-            &ptr1, &bytes_copied1, state);
-        int ret2 = JSNormalizer::normalize(srcbuf2, sizeof(srcbuf2), dstbuf2, sizeof(dstbuf2),
-            &ptr2, &bytes_copied2, state);
-        int ret3 = JSNormalizer::normalize(srcbuf3, sizeof(srcbuf3), dstbuf3, sizeof(dstbuf3),
-            &ptr3, &bytes_copied3, state);
-        int ret4 = JSNormalizer::normalize(srcbuf4, sizeof(srcbuf4), dstbuf4, sizeof(dstbuf4),
-            &ptr4, &bytes_copied4, state);
-
-        CHECK(ret0 == 0);
-        CHECK((ptr0 - srcbuf0) == sizeof(srcbuf0));
-        CHECK(bytes_copied0 == sizeof(expected0) - 1);
-        CHECK(!memcmp(dstbuf0, expected0, bytes_copied0));
-
-        CHECK(ret1 == 0);
-        CHECK((ptr1 - srcbuf1) == sizeof(srcbuf1));
-        CHECK(bytes_copied1 == sizeof(expected1) - 1);
-        CHECK(!memcmp(dstbuf1, expected1, bytes_copied1));
-
-        CHECK(ret2 == 0);
-        CHECK((ptr2 - srcbuf2) == sizeof(srcbuf2));
-        CHECK(bytes_copied2 == sizeof(expected0) - 1);
-        CHECK(!memcmp(dstbuf2, expected0, bytes_copied2));
-
-        CHECK(ret3 == 0);
-        CHECK((ptr3 - srcbuf3) == sizeof(srcbuf3));
-        CHECK(bytes_copied3 == sizeof(expected1) - 1);
-        CHECK(!memcmp(dstbuf3, expected1, bytes_copied3));
-
-        CHECK(ret4 == 0);
-        CHECK((ptr4 - srcbuf4) == sizeof(srcbuf4));
-        CHECK(bytes_copied4 == sizeof(expected2) - 1);
-        CHECK(!memcmp(dstbuf4, expected2, bytes_copied4));
+
+        char dst0[sizeof(expected0)];
+        char dst1[sizeof(expected1)];
+        char dst2[sizeof(expected0)];
+        char dst3[sizeof(expected1)];
+        char dst4[sizeof(expected2)];
+
+        int ret0, ret1, ret2, ret3, ret4;
+        const char *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
+        int act_len0, act_len1, act_len2, act_len3, act_len4;
+
+        NORMALIZE_L(src0, sizeof(src0), dst0, sizeof(dst0), DEPTH, ret0, ptr0, act_len0);
+        NORMALIZE_L(src1, sizeof(src1), dst1, sizeof(dst1), DEPTH, ret1, ptr1, act_len1);
+        NORMALIZE_L(src2, sizeof(src2), dst2, sizeof(dst2), DEPTH, ret2, ptr2, act_len2);
+        NORMALIZE_L(src3, sizeof(src3), dst3, sizeof(dst3), DEPTH, ret3, ptr3, act_len3);
+        NORMALIZE_L(src4, sizeof(src4), dst4, sizeof(dst4), DEPTH, ret4, ptr4, act_len4);
+
+        CHECK(ret0 == JSTokenizer::SCRIPT_CONTINUE);
+        CHECK((ptr0 - src0) == sizeof(src0));
+        CHECK(act_len0 == sizeof(expected0) - 1);
+        CHECK(!memcmp(dst0, expected0, act_len0));
+
+        CHECK(ret1 == JSTokenizer::SCRIPT_CONTINUE);
+        CHECK((ptr1 - src1) == sizeof(src1));
+        CHECK(act_len1 == sizeof(expected1) - 1);
+        CHECK(!memcmp(dst1, expected1, act_len1));
+
+        CHECK(ret2 == JSTokenizer::SCRIPT_CONTINUE);
+        CHECK((ptr2 - src2) == sizeof(src2));
+        CHECK(act_len2 == sizeof(expected0) - 1);
+        CHECK(!memcmp(dst2, expected0, act_len2));
+
+        CHECK(ret3 == JSTokenizer::SCRIPT_CONTINUE);
+        CHECK((ptr3 - src3) == sizeof(src3));
+        CHECK(act_len3 == sizeof(expected1) - 1);
+        CHECK(!memcmp(dst3, expected1, act_len3));
+
+        CHECK(ret4 == JSTokenizer::SCRIPT_CONTINUE);
+        CHECK((ptr4 - src4) == sizeof(src4));
+        CHECK(act_len4 == sizeof(expected2) - 1);
+        CHECK(!memcmp(dst4, expected2, act_len4));
      }
      SECTION("punctuators")
      {
@@ -673,43 +672,51 @@ static const char syntax_cases_expected14[] =
      "var a=b% -c;"
      "var a=b+ -c;";
  
+// In the following cases:
+//   a reading cursor will be after the literal
+//   a malformed literal is not present in the output
+
  static const char syntax_cases_buf15[] =
-    "var str1 = 'abc\u2028 def' ;\n"
-    "var str2 = 'abc\u2029 def' ;\n\r";
+    "var invalid_str = 'abc\u2028 def' ;\n";
  
  static const char syntax_cases_expected15[] =
-    "var str1='abc\u2028 def';"
-    "var str2='abc\u2029 def';";
+    "var invalid_str='abc";
  
  static const char syntax_cases_buf16[] =
      "var invalid_str = \"abc\n def\"";
  
  static const char syntax_cases_expected16[] =
-    "var invalid_str=\"abc\"def \"";
+    "var invalid_str=\"abc";
  
  static const char syntax_cases_buf17[] =
      "var invalid_str = 'abc\r def'";
  
  static const char syntax_cases_expected17[] =
-    "var invalid_str='abc'def '";
+    "var invalid_str='abc";
  
  static const char syntax_cases_buf18[] =
      "var invalid_str = 'abc\\\n\r def'";
  
  static const char syntax_cases_expected18[] =
-    "var invalid_str='abc'def '";
+    "var invalid_str='abc";
  
  static const char syntax_cases_buf19[] =
      "var invalid_re = /abc\\\n def/";
  
  static const char syntax_cases_expected19[] =
-    "var invalid_re=/abc/def/";
+    "var invalid_re=/abc";
  
  static const char syntax_cases_buf20[] =
      "var invalid_re = /abc\\\r\n def/";
  
  static const char syntax_cases_expected20[] =
-    "var invalid_re=/abc/def/";
+    "var invalid_re=/abc";
+
+static const char syntax_cases_buf21[] =
+    "var invalid_str = 'abc\u2029 def' ;\n\r";
+
+static const char syntax_cases_expected21[] =
+    "var invalid_str='abc";
  
  TEST_CASE("syntax cases", "[JSNormalizer]")
  {
@@ -788,100 +795,115 @@ TEST_CASE("syntax cases", "[JSNormalizer]")
          NORMALIZE(syntax_cases_buf14, syntax_cases_expected14);
          VALIDATE(syntax_cases_buf14, syntax_cases_expected14);
      }
-    SECTION("LS and PS chars within literal")
+}
+
+TEST_CASE("bad tokens", "[JSNormalizer]")
+{
+    SECTION("LS chars within literal")
      {
          NORMALIZE(syntax_cases_buf15, syntax_cases_expected15);
-        VALIDATE(syntax_cases_buf15, syntax_cases_expected15);
+        VALIDATE_FAIL(syntax_cases_buf15, syntax_cases_expected15, JSTokenizer::BAD_TOKEN, 25);
+    }
+    SECTION("PS chars within literal")
+    {
+        NORMALIZE(syntax_cases_buf21, syntax_cases_expected21);
+        VALIDATE_FAIL(syntax_cases_buf21, syntax_cases_expected21, JSTokenizer::BAD_TOKEN, 25);
      }
      SECTION("explicit LF within literal")
      {
          NORMALIZE(syntax_cases_buf16, syntax_cases_expected16);
-        VALIDATE(syntax_cases_buf16, syntax_cases_expected16);
+        VALIDATE_FAIL(syntax_cases_buf16, syntax_cases_expected16, JSTokenizer::BAD_TOKEN, 23);
      }
      SECTION("explicit CR within literal")
      {
          NORMALIZE(syntax_cases_buf17, syntax_cases_expected17);
-        VALIDATE(syntax_cases_buf17, syntax_cases_expected17);
+        VALIDATE_FAIL(syntax_cases_buf17, syntax_cases_expected17, JSTokenizer::BAD_TOKEN, 23);
      }
      SECTION("escaped LF-CR sequence within literal")
      {
          NORMALIZE(syntax_cases_buf18, syntax_cases_expected18);
-        VALIDATE(syntax_cases_buf18, syntax_cases_expected18);
+        VALIDATE_FAIL(syntax_cases_buf18, syntax_cases_expected18, JSTokenizer::BAD_TOKEN, 25);
      }
      SECTION("escaped LF within regex literal")
      {
          NORMALIZE(syntax_cases_buf19, syntax_cases_expected19);
-        VALIDATE(syntax_cases_buf19, syntax_cases_expected19);
+        VALIDATE_FAIL(syntax_cases_buf19, syntax_cases_expected19, JSTokenizer::BAD_TOKEN, 23);
      }
      SECTION("escaped CR-LF within regex literal")
      {
          NORMALIZE(syntax_cases_buf20, syntax_cases_expected20);
-        VALIDATE(syntax_cases_buf20, syntax_cases_expected20);
+        VALIDATE_FAIL(syntax_cases_buf20, syntax_cases_expected20, JSTokenizer::BAD_TOKEN, 23);
      }
  }
  
-TEST_CASE("norm_depth is specified", "[JSNormalizer]")
+TEST_CASE("endings", "[JSNormalizer]")
  {
-    const char srcbuf[] = "var abc = 123;\n\r";
-    const char expected[] = "var abc";
-    char dstbuf[7];
-    int bytes_copied;
-    const char* ptr = srcbuf;
-    JSNormState state;
-    state.norm_depth = 7;
-    state.alerts = 0;
-    int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
-        &bytes_copied, state);
-
-    CHECK(ret == 0);
-    CHECK(bytes_copied == sizeof(expected) - 1);
-    CHECK(!memcmp(dstbuf, expected, bytes_copied));
-}
+    SECTION("script closing tag is present", "[JSNormalizer]")
+    {
+        const char src[] =
+            "var a = 1 ;\n" // 12 bytes
+            "var b = 2 ;\n" // 12 bytes
+            "</script>\n"   // ptr_offset is here = 33
+            "var c = 3 ;\n";
+        const int ptr_offset = 33;
+        const char expected[] = "var a=1;var b=2;";
+        char dst[sizeof(expected)];
+        int act_len;
+        const char* ptr;
+        int ret;
+
+        NORMALIZE_L(src, sizeof(src), dst, sizeof(dst), DEPTH, ret, ptr, act_len);
+
+        CHECK(ret == JSTokenizer::SCRIPT_ENDED);
+        CHECK(act_len == sizeof(expected) - 1);
+        CHECK((ptr - src) == ptr_offset);
+        CHECK(!memcmp(dst, expected, act_len));
+    }
+    SECTION("depth reached", "[JSNormalizer]")
+    {
+        const char src[] = "var abc = 123;\n\r";
+        const char src2[] = "var foo = 321;\n\r";
+        const char expected[] = "var abc";
+        char dst[sizeof(src)];
+        int act_len;
+        const char* ptr;
+        int ret;
  
-TEST_CASE("tag script end is specified", "[JSNormalizer]")
-{
-    const char srcbuf[] =
-        "var a = 1 ;\n" // 12 bytes
-        "var b = 2 ;\n" // 12 bytes --> ptr_offset = 24
-        "</script>\n"
-        "var c = 3 ;\n";
-    const int ptr_offset = 24;
-    const char expected[] = "var a=1;var b=2;";
-    char dstbuf[sizeof(expected)];
-    int bytes_copied;
-    const char* ptr = srcbuf;
-    JSNormState state;
-    state.norm_depth = NORM_DEPTH;
-    state.alerts = 0;
-    int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
-        &bytes_copied, state);
-
-    CHECK(ret == 0);
-    CHECK(bytes_copied == sizeof(expected) - 1);
-    CHECK((ptr - srcbuf) == ptr_offset);
-    CHECK(!memcmp(dstbuf, expected, bytes_copied));
-}
+        JSNormalizer norm;
  
-// Tests for JavaScript parsing errors and anomalies
+        norm.set_depth(7);
+        ret = norm.normalize(src, sizeof(src), dst, sizeof(dst));
+        ptr = norm.get_src_next();
+        act_len = norm.get_dst_next() - dst;
  
-TEST_CASE("parsing errors", "[JSNormalizer]")
-{
-    SECTION("dstlen is too small")
+        CHECK(ret == JSTokenizer::EOS);
+        CHECK(ptr == src + 7);
+        CHECK(act_len == sizeof(expected) - 1);
+        CHECK(!memcmp(dst, expected, act_len));
+
+        ret = norm.normalize(src2, sizeof(src2), dst, sizeof(dst));
+        ptr = norm.get_src_next();
+        act_len = norm.get_dst_next() - dst;
+
+        CHECK(ret == JSTokenizer::EOS);
+        CHECK(ptr == src2 + sizeof(src2));
+        CHECK(act_len == 0);
+    }
+    SECTION("dst size is less then src size")
      {
-        const char srcbuf[] = "var abc = 123;\n\r";
-        const char expected[] = "var abc";
-        char dstbuf[7];
-        int bytes_copied;
-        const char* ptr = srcbuf;
-        JSNormState state;
-        state.norm_depth = NORM_DEPTH;
-        state.alerts = 0;
-        int ret = JSNormalizer::normalize(srcbuf, sizeof(srcbuf), dstbuf, sizeof(dstbuf), &ptr,
-            &bytes_copied, state);
-
-        CHECK(ret == 1);
-        CHECK(bytes_copied == sizeof(expected) - 1);
-        CHECK(!memcmp(dstbuf, expected, bytes_copied));
+        const char src[] = "var abc = 123;\n\r";
+        const char expected[sizeof(src)] = "var abc";
+        char dst[7];
+        int act_len;
+        const char* ptr;
+        int ret;
+
+        NORMALIZE_L(src, sizeof(src), dst, sizeof(dst), DEPTH, ret, ptr, act_len);
+
+        CHECK(ret == JSTokenizer::SCRIPT_CONTINUE);
+        CHECK(ptr == src + sizeof(src));
+        CHECK(act_len == 12); // size of normalized src
+        CHECK(!memcmp(dst, expected, sizeof(dst)));
      }
  }
  
@@ -896,7 +918,7 @@ static const char unexpected_tag_expected0[] =
  static const char unexpected_tag_buf1[] =
      "var a = 1;\n"
      "<script type=application/javascript>\n"
-    "var b = 2;\r\n";;
+    "var b = 2;\r\n";
  
  static const char unexpected_tag_expected1[] =
      "var a=1;";
@@ -907,7 +929,7 @@ static const char unexpected_tag_buf2[] =
      "var b = 2;\r\n";
  
  static const char unexpected_tag_expected2[] =
-    "var a=1;var str=";
+    "var a=1;var str='";
  
  static const char unexpected_tag_buf3[] =
      "var a = 1;\n"
@@ -915,7 +937,7 @@ static const char unexpected_tag_buf3[] =
      "var b = 2;\r\n";
  
  static const char unexpected_tag_expected3[] =
-    "var a=1;var str=";
+    "var a=1;var str='something ";
  
  static const char unexpected_tag_buf4[] =
      "var a = 1;\n"
@@ -923,7 +945,7 @@ static const char unexpected_tag_buf4[] =
      "var b = 2;\r\n";
  
  static const char unexpected_tag_expected4[] =
-    "var a=1;var str=";
+    "var a=1;var str='something ";
  
  static const char unexpected_tag_buf5[] =
      "var a = 1;\n"
@@ -931,7 +953,7 @@ static const char unexpected_tag_buf5[] =
      "var b = 2;\r\n";
  
  static const char unexpected_tag_expected5[] =
-    "var a=1;var str=";
+    "var a=1;var str='";
  
  static const char unexpected_tag_buf6[] =
      "var a = 1;\n"
@@ -939,7 +961,7 @@ static const char unexpected_tag_buf6[] =
      "var b = 2;\r\n";
  
  static const char unexpected_tag_expected6[] =
-    "var a=1;var str=";
+    "var a=1;var str='something ";
  
  static const char unexpected_tag_buf7[] =
      "var a = 1;\n"
@@ -947,7 +969,7 @@ static const char unexpected_tag_buf7[] =
      "var b = 2;\r\n";
  
  static const char unexpected_tag_expected7[] =
-    "var a=1;var str=";
+    "var a=1;var str='something ";
  
  static const char unexpected_tag_buf8[] =
      "var a = 1;\n"
@@ -955,7 +977,7 @@ static const char unexpected_tag_buf8[] =
      "var b = 2;\r\n";
  
  static const char unexpected_tag_expected8[] =
-    "var a=1;var str='something \\<script\\> something';var b=2;";
+    "var a=1;var str='something \\";
  
  static const char unexpected_tag_buf9[] =
      "var a = 1;\n"
@@ -1079,7 +1101,7 @@ static const char unexpected_tag_buf23[] =
      "var b = 2;\r\n";
  
  static const char unexpected_tag_expected23[] =
-    "var a=1;var str=";
+    "var a=1;var str='script somescript /script something ";
  
  static const char unexpected_tag_buf24[] =
      "var a = 1;\n"
@@ -1087,63 +1109,54 @@ static const char unexpected_tag_buf24[] =
      "var b = 2;\r\n";
  
  static const char unexpected_tag_expected24[] =
-    "var a=1;var str=";
+    "var a=1;var str='something ";
  
-TEST_CASE("unexpected script tag alert", "[JSNormalizer]")
+TEST_CASE("nested script tags", "[JSNormalizer]")
  {
-    const int ret_code = 1;
      SECTION("explicit open tag - simple")
      {
          NORMALIZE(unexpected_tag_buf0, unexpected_tag_expected0);
-        VALIDATE_FAIL(unexpected_tag_buf0, unexpected_tag_expected0, ret_code, 18);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf0, unexpected_tag_expected0, JSTokenizer::OPENING_TAG, 18);
      }
      SECTION("explicit open tag - complex")
      {
          NORMALIZE(unexpected_tag_buf1, unexpected_tag_expected1);
-        VALIDATE_FAIL(unexpected_tag_buf1, unexpected_tag_expected1, ret_code, 18);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf1, unexpected_tag_expected1, JSTokenizer::OPENING_TAG, 18);
      }
      SECTION("open tag within literal - start")
      {
          NORMALIZE(unexpected_tag_buf2, unexpected_tag_expected2);
-        VALIDATE_FAIL(unexpected_tag_buf2, unexpected_tag_expected2, ret_code, 41);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf2, unexpected_tag_expected2, JSTokenizer::OPENING_TAG, 29);
      }
      SECTION("open tag within literal - mid")
      {
          NORMALIZE(unexpected_tag_buf3, unexpected_tag_expected3);
-        VALIDATE_FAIL(unexpected_tag_buf3, unexpected_tag_expected3, ret_code, 51);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf3, unexpected_tag_expected3, JSTokenizer::OPENING_TAG, 39);
      }
      SECTION("open tag within literal - end")
      {
          NORMALIZE(unexpected_tag_buf4, unexpected_tag_expected4);
-        VALIDATE_FAIL(unexpected_tag_buf4, unexpected_tag_expected4, ret_code, 41);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf4, unexpected_tag_expected4, JSTokenizer::OPENING_TAG, 39);
      }
      SECTION("close tag within literal - start")
      {
          NORMALIZE(unexpected_tag_buf5, unexpected_tag_expected5);
-        VALIDATE_FAIL(unexpected_tag_buf5, unexpected_tag_expected5, ret_code, 42);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf5, unexpected_tag_expected5, JSTokenizer::CLOSING_TAG, 31);
      }
      SECTION("close tag within literal - mid")
      {
          NORMALIZE(unexpected_tag_buf6, unexpected_tag_expected6);
-        VALIDATE_FAIL(unexpected_tag_buf6, unexpected_tag_expected6, ret_code, 52);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf6, unexpected_tag_expected6, JSTokenizer::CLOSING_TAG, 41);
      }
      SECTION("close tag within literal - end")
      {
          NORMALIZE(unexpected_tag_buf7, unexpected_tag_expected7);
-        VALIDATE_FAIL(unexpected_tag_buf7, unexpected_tag_expected7, ret_code, 42);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf7, unexpected_tag_expected7, JSTokenizer::CLOSING_TAG, 41);
      }
      SECTION("open tag within literal - escaped")
      {
          NORMALIZE(unexpected_tag_buf8, unexpected_tag_expected8);
-        VALIDATE(unexpected_tag_buf8, unexpected_tag_expected8);
+        VALIDATE_FAIL(unexpected_tag_buf8, unexpected_tag_expected8, JSTokenizer::OPENING_TAG, 40);
      }
      SECTION("close tag within literal - escaped")
      {
@@ -1153,74 +1166,62 @@ TEST_CASE("unexpected script tag alert", "[JSNormalizer]")
      SECTION("open tag within single-line comment - start")
      {
          NORMALIZE(unexpected_tag_buf10, unexpected_tag_expected10);
-        VALIDATE_FAIL(unexpected_tag_buf10, unexpected_tag_expected10, ret_code, 32);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf10, unexpected_tag_expected10, JSTokenizer::OPENING_TAG, 20);
      }
      SECTION("open tag within single-line comment - mid")
      {
          NORMALIZE(unexpected_tag_buf11, unexpected_tag_expected11);
-        VALIDATE_FAIL(unexpected_tag_buf11, unexpected_tag_expected11, ret_code, 42);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf11, unexpected_tag_expected11, JSTokenizer::OPENING_TAG, 30);
      }
      SECTION("open tag within single-line comment - end")
      {
          NORMALIZE(unexpected_tag_buf12, unexpected_tag_expected12);
-        VALIDATE_FAIL(unexpected_tag_buf12, unexpected_tag_expected12, ret_code, 32);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf12, unexpected_tag_expected12, JSTokenizer::OPENING_TAG, 30);
      }
      SECTION("open tag within multi-line comment - start")
      {
          NORMALIZE(unexpected_tag_buf13, unexpected_tag_expected13);
-        VALIDATE_FAIL(unexpected_tag_buf13, unexpected_tag_expected13, ret_code, 33);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf13, unexpected_tag_expected13, JSTokenizer::OPENING_TAG, 20);
      }
      SECTION("open tag within multi-line comment - mid")
      {
          NORMALIZE(unexpected_tag_buf14, unexpected_tag_expected14);
-        VALIDATE_FAIL(unexpected_tag_buf14, unexpected_tag_expected14, ret_code, 43);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf14, unexpected_tag_expected14, JSTokenizer::OPENING_TAG, 30);
      }
      SECTION("open tag within multi-line comment - end")
      {
          NORMALIZE(unexpected_tag_buf15, unexpected_tag_expected15);
-        VALIDATE_FAIL(unexpected_tag_buf15, unexpected_tag_expected15, ret_code, 33);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf15, unexpected_tag_expected15, JSTokenizer::OPENING_TAG, 30);
      }
      SECTION("close tag within single-line comment - start")
      {
          NORMALIZE(unexpected_tag_buf16, unexpected_tag_expected16);
-        VALIDATE_FAIL(unexpected_tag_buf16, unexpected_tag_expected16, ret_code, 33);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf16, unexpected_tag_expected16, JSTokenizer::CLOSING_TAG, 22);
      }
      SECTION("close tag within single-line comment - mid")
      {
          NORMALIZE(unexpected_tag_buf17, unexpected_tag_expected17);
-        VALIDATE_FAIL(unexpected_tag_buf17, unexpected_tag_expected17, ret_code, 50);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf17, unexpected_tag_expected17, JSTokenizer::CLOSING_TAG, 34);
      }
      SECTION("close tag within single-line comment - end")
      {
          NORMALIZE(unexpected_tag_buf18, unexpected_tag_expected18);
-        VALIDATE_FAIL(unexpected_tag_buf18, unexpected_tag_expected18, ret_code, 33);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf18, unexpected_tag_expected18, JSTokenizer::CLOSING_TAG, 32);
      }
      SECTION("close tag within multi-line comment - start")
      {
          NORMALIZE(unexpected_tag_buf19, unexpected_tag_expected19);
-        VALIDATE_FAIL(unexpected_tag_buf19, unexpected_tag_expected19, ret_code, 34);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf19, unexpected_tag_expected19, JSTokenizer::CLOSING_TAG, 22);
      }
      SECTION("close tag within multi-line comment - mid")
      {
          NORMALIZE(unexpected_tag_buf20, unexpected_tag_expected20);
-        VALIDATE_FAIL(unexpected_tag_buf20, unexpected_tag_expected20, ret_code, 44);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf20, unexpected_tag_expected20, JSTokenizer::CLOSING_TAG, 32);
      }
      SECTION("close tag within multi-line comment - end")
      {
          NORMALIZE(unexpected_tag_buf21, unexpected_tag_expected21);
-        VALIDATE_FAIL(unexpected_tag_buf21, unexpected_tag_expected21, ret_code, 34);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf21, unexpected_tag_expected21, JSTokenizer::CLOSING_TAG, 32);
      }
      SECTION("multiple patterns - not matched")
      {
@@ -1230,14 +1231,11 @@ TEST_CASE("unexpected script tag alert", "[JSNormalizer]")
      SECTION("multiple patterns - matched")
      {
          NORMALIZE(unexpected_tag_buf23, unexpected_tag_expected23);
-        VALIDATE_FAIL(unexpected_tag_buf23, unexpected_tag_expected23, ret_code, 67);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf23, unexpected_tag_expected23, JSTokenizer::OPENING_TAG, 65);
      }
      SECTION("mixed lower and upper case")
      {
          NORMALIZE(unexpected_tag_buf24, unexpected_tag_expected24);
-        VALIDATE_FAIL(unexpected_tag_buf24, unexpected_tag_expected24, ret_code, 41);
-        VALIDATE_ALERT(ALERT_UNEXPECTED_TAG);
+        VALIDATE_FAIL(unexpected_tag_buf24, unexpected_tag_expected24, JSTokenizer::OPENING_TAG, 39);
      }
  }
-
author	Mike Stepanek (mstepane) <mstepane@cisco.com>
	Fri, 28 May 2021 15:25:37 +0000 (15:25 +0000)
committer	Mike Stepanek (mstepane) <mstepane@cisco.com>
	Fri, 28 May 2021 15:25:37 +0000 (15:25 +0000)
cmake/macros.cmake		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_enum.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_flow_data.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_flow_data.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_js_norm.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_js_norm.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_module.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_msg_body.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_msg_body.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_tables.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/test/http_module_test.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/test/http_uri_norm_test.cc		patch \| blob \| blame \| history
src/utils/CMakeLists.txt		patch \| blob \| blame \| history
src/utils/js_norm_state.h	[deleted file]	patch \| blob \| blame \| history
src/utils/js_normalizer.cc		patch \| blob \| blame \| history
src/utils/js_normalizer.h		patch \| blob \| blame \| history
src/utils/js_tokenizer.h		patch \| blob \| blame \| history
src/utils/js_tokenizer.l		patch \| blob \| blame \| history
src/utils/test/js_normalizer_test.cc		patch \| blob \| blame \| history