Merge pull request #2992 in SNORT/snort3 from ~OSERHIIE/snort3:js_identifier_norm...

author Mike Stepanek (mstepane) <mstepane@cisco.com>

Mon, 9 Aug 2021 10:30:22 +0000 (10:30 +0000)

committer Mike Stepanek (mstepane) <mstepane@cisco.com>

Mon, 9 Aug 2021 10:30:22 +0000 (10:30 +0000)
author Mike Stepanek (mstepane) <mstepane@cisco.com>
Mon, 9 Aug 2021 10:30:22 +0000 (10:30 +0000)
committer Mike Stepanek (mstepane) <mstepane@cisco.com>
Mon, 9 Aug 2021 10:30:22 +0000 (10:30 +0000)
diff --git a/src/service_inspectors/http_inspect/dev_notes.txt b/src/service_inspectors/http_inspect/dev_notes.txt

index ace30ff37c39af706c68e6fe3b13a0372b45f30c..305ab6dd60958b8387566dce0e958a67e3fdcb71 100755 (executable)
--- a/src/service_inspectors/http_inspect/dev_notes.txt
+++ b/src/service_inspectors/http_inspect/dev_notes.txt
@@ -220,7 +220,7 @@ During message body analysis the Enhanced Normalizer does one of the following:
     subsequent bytes in a stream mode, until it finds a closing tag.
     It proceeds and scans the entire message body for inline scripts.
  
-Enhanced Normalizer is a stateful JavaScript whitespace normalizer.
+Enhanced Normalizer is a stateful JavaScript whitespace and identifiers normalizer.
  So, the following whitespace codes will be normalized:
   * \u0009 Tab <TAB>
   * \u000B Vertical Tab <VT>
@@ -231,6 +231,13 @@ So, the following whitespace codes will be normalized:
   * Any other Unicode “space separator” <USP>
   * Also including new-line and carriage-return line-break characters
  
+All JavaScript identifier names will be substituted to unified names with the
+following format: a0 -> z9999. So, the number of unique identifiers available
+is 260000 names per HTTP transaction. If Normalizer overruns the configured
+limit, built-in alert generated. Additionaly, there is a config option to
+specify the limit manually:
+ * http_inspect.js_norm_identifier_depth.
+
  Additionally, Normalizer validates the syntax with respect to ECMA-262 Standard,
  and checks for restrictions for contents of script elements (since, it is HTML-embedded JavaScript).
  
diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h

index 31b60846e10d888f70fbb72112cf3bdf6e292775..47177cb738a78e8b9ada08f421368a000c9f0ad4 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_enum.h
+++ b/src/service_inspectors/http_inspect/http_enum.h
@@ -64,7 +64,7 @@ enum PEG_COUNT { PEG_FLOW = 0, PEG_SCAN, PEG_REASSEMBLE, PEG_INSPECT, PEG_REQUES
      PEG_CONCURRENT_SESSIONS, PEG_MAX_CONCURRENT_SESSIONS, PEG_SCRIPT_DETECTION,
      PEG_PARTIAL_INSPECT, PEG_EXCESS_PARAMS, PEG_PARAMS, PEG_CUTOVERS, PEG_SSL_SEARCH_ABND_EARLY,
      PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_JS_INLINE, PEG_JS_EXTERNAL,
-    PEG_JS_BYTES, PEG_COUNT_MAX };
+    PEG_JS_BYTES, PEG_JS_IDENTIFIER, PEG_JS_IDENTIFIER_OVERFLOW, PEG_COUNT_MAX };
  
  // Result of scanning by splitter
  enum ScanResult { SCAN_NOT_FOUND, SCAN_NOT_FOUND_ACCELERATE, SCAN_FOUND, SCAN_FOUND_PIECE,
@@ -271,6 +271,7 @@ enum Infraction
      INF_JS_CLOSING_TAG,
      INF_JS_CODE_IN_EXTERNAL,
      INF_JS_SHORTENED_TAG,
+    INF_JS_IDENTIFIER_OVERFLOW,
      INF__MAX_VALUE
  };
  
@@ -399,6 +400,7 @@ enum EventSid
      EVENT_JS_CLOSING_TAG = 267,
      EVENT_JS_CODE_IN_EXTERNAL = 268,
      EVENT_JS_SHORTENED_TAG = 269,
+    EVENT_JS_IDENTIFIER_OVERFLOW = 270,
      EVENT__MAX_VALUE
  };
  
diff --git a/src/service_inspectors/http_inspect/http_flow_data.cc b/src/service_inspectors/http_inspect/http_flow_data.cc

index 3e090b35f29b73971a6a49bcf28a1ca4a475fff8..777b7ab00f3ac56a504602559e6bd483a00b41f3 100644 (file)
--- a/src/service_inspectors/http_inspect/http_flow_data.cc
+++ b/src/service_inspectors/http_inspect/http_flow_data.cc
@@ -25,6 +25,7 @@
  
  #include "decompress/file_decomp.h"
  #include "service_inspectors/http2_inspect/http2_flow_data.h"
+#include "utils/js_identifier_ctx.h"
  #include "utils/js_normalizer.h"
  
  #include "http_cutter.h"
@@ -91,6 +92,11 @@ HttpFlowData::~HttpFlowData()
          HttpModule::decrement_peg_counts(PEG_CONCURRENT_SESSIONS);
  
  #ifndef UNIT_TEST_BUILD
+    if (js_ident_ctx)
+    {
+        update_deallocations(js_ident_ctx->size());
+        delete js_ident_ctx;
+    }
      if (js_normalizer)
      {
          update_deallocations(JSNormalizer::size());
@@ -231,12 +237,24 @@ void HttpFlowData::garbage_collect()
  }
  
  #ifndef UNIT_TEST_BUILD
-snort::JSNormalizer& HttpFlowData::acquire_js_ctx()
+void HttpFlowData::reset_js_ident_ctx()
+{
+    if (js_ident_ctx)
+        js_ident_ctx->reset();
+}
+
+snort::JSNormalizer& HttpFlowData::acquire_js_ctx(int32_t ident_depth, size_t norm_depth)
  {
      if (js_normalizer)
          return *js_normalizer;
  
-    js_normalizer = new JSNormalizer();
+    if (!js_ident_ctx)
+    {
+        js_ident_ctx = new JSIdentifierCtx(ident_depth);
+        update_allocations(js_ident_ctx->size());
+    }
+
+    js_normalizer = new JSNormalizer(*js_ident_ctx, norm_depth);
      update_allocations(JSNormalizer::size());
  
      return *js_normalizer;
@@ -252,7 +270,9 @@ void HttpFlowData::release_js_ctx()
      js_normalizer = nullptr;
  }
  #else
-snort::JSNormalizer& HttpFlowData::acquire_js_ctx() { return *js_normalizer; }
+void HttpFlowData::reset_js_ident_ctx() {}
+snort::JSNormalizer& HttpFlowData::acquire_js_ctx(int32_t, size_t)
+{ return *js_normalizer; }
  void HttpFlowData::release_js_ctx() {}
  #endif
  
diff --git a/src/service_inspectors/http_inspect/http_flow_data.h b/src/service_inspectors/http_inspect/http_flow_data.h

index 38fd4ee71748b78989df7fb967919d6f9308dff3..2a1dfc148db18bda9a3210fe363e1def7a0a58cf 100644 (file)
--- a/src/service_inspectors/http_inspect/http_flow_data.h
+++ b/src/service_inspectors/http_inspect/http_flow_data.h
@@ -38,6 +38,7 @@ class HttpJsNorm;
  class HttpMsgSection;
  class HttpCutter;
  class HttpQueryParser;
+class JSIdentifierCtxBase;
  
  namespace snort
  {
@@ -193,10 +194,12 @@ private:
      bool ssl_search_abandoned = false;
  
      // *** HttpJsNorm
+    JSIdentifierCtxBase* js_ident_ctx = nullptr;
      snort::JSNormalizer* js_normalizer = nullptr;
      bool js_built_in_event = false;
  
-    snort::JSNormalizer& acquire_js_ctx();
+    void reset_js_ident_ctx();
+    snort::JSNormalizer& acquire_js_ctx(int32_t ident_depth, size_t norm_depth);
      void release_js_ctx();
  
      // *** Transaction management including pipelining
diff --git a/src/service_inspectors/http_inspect/http_inspect.cc b/src/service_inspectors/http_inspect/http_inspect.cc

index 29b646158a0d24ba652b66c2774ebcd1a282a13d..cdcaefac8a3b38e970fabcd9f736ec83c053325a 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_inspect.cc
+++ b/src/service_inspectors/http_inspect/http_inspect.cc
@@ -160,6 +160,7 @@ void HttpInspect::show(const SnortConfig*) const
          params->js_norm_param.max_javascript_whitespaces);
      ConfigLogger::log_value("js_normalization_depth",
          params->js_norm_param.js_normalization_depth);
+    ConfigLogger::log_value("js_norm_identifier_depth", params->js_norm_param.js_identifier_depth);
      ConfigLogger::log_value("bad_characters", bad_chars.c_str());
      ConfigLogger::log_value("ignore_unreserved", unreserved_chars.c_str());
      ConfigLogger::log_flag("percent_u", params->uri_param.percent_u);
diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc

index a851f85f798121cb7be1c450e486225d6f098104..f1536d3819a7dd47c572b58d462490530b075926 100644 (file)
--- a/src/service_inspectors/http_inspect/http_js_norm.cc
+++ b/src/service_inspectors/http_inspect/http_js_norm.cc
@@ -47,9 +47,11 @@ static inline JSTokenizer::JSRet js_normalize(JSNormalizer& ctx, const char* con
      return ret;
  }
  
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) :
+HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_,
+    int32_t identifier_depth_) :
      uri_param(uri_param_),
      normalization_depth(normalization_depth_),
+    identifier_depth(identifier_depth_),
      mpse_otag(nullptr),
      mpse_attr(nullptr),
      mpse_type(nullptr)
@@ -125,8 +127,7 @@ void HttpJsNorm::enhanced_external_normalize(const Field& input, Field& output,
              dst_end = buffer + len;
          }
  
-        auto& ctx = ssn->acquire_js_ctx();
-        ctx.set_depth(normalization_depth);
+        auto& ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth);
          auto ret = js_normalize(ctx, end, dst_end, ptr, dst);
  
          switch (ret)
@@ -150,6 +151,12 @@ void HttpJsNorm::enhanced_external_normalize(const Field& input, Field& output,
              events->create_event(EVENT_JS_BAD_TOKEN);
              ssn->js_built_in_event = true;
              break;
+        case JSTokenizer::IDENTIFIER_OVERFLOW:
+            HttpModule::increment_peg_counts(PEG_JS_IDENTIFIER_OVERFLOW);
+            *infractions += INF_JS_IDENTIFIER_OVERFLOW;
+            events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW);
+            ssn->js_built_in_event = true;
+            break;
          default:
              assert(false);
              break;
@@ -228,8 +235,7 @@ void HttpJsNorm::enhanced_inline_normalize(const Field& input, Field& output,
              dst_end = buffer + len;
          }
  
-        auto& ctx = ssn->acquire_js_ctx();
-        ctx.set_depth(normalization_depth);
+        auto& ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth);
          auto dst_before = dst;
          auto ret = js_normalize(ctx, end, dst_end, ptr, dst);
  
@@ -260,6 +266,12 @@ void HttpJsNorm::enhanced_inline_normalize(const Field& input, Field& output,
              events->create_event(EVENT_JS_BAD_TOKEN);
              script_continue = false;
              break;
+        case JSTokenizer::IDENTIFIER_OVERFLOW:
+            HttpModule::increment_peg_counts(PEG_JS_IDENTIFIER_OVERFLOW);
+            *infractions += INF_JS_IDENTIFIER_OVERFLOW;
+            events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW);
+            script_continue = false;
+            break;
          default:
              assert(false);
              script_continue = false;
diff --git a/src/service_inspectors/http_inspect/http_js_norm.h b/src/service_inspectors/http_inspect/http_js_norm.h

index 38f5399849d35bf544b0099f9aa8a0956ec3f6c8..c21c2462a0ebbff62e1776a5c0416152e2bbf843 100644 (file)
--- a/src/service_inspectors/http_inspect/http_js_norm.h
+++ b/src/service_inspectors/http_inspect/http_js_norm.h
@@ -36,7 +36,8 @@
  class HttpJsNorm
  {
  public:
-    HttpJsNorm(const HttpParaList::UriParam&, int64_t normalization_depth);
+    HttpJsNorm(const HttpParaList::UriParam&, int64_t normalization_depth,
+        int32_t identifier_depth);
      ~HttpJsNorm();
  
      void legacy_normalize(const Field& input, Field& output, HttpInfractions*, HttpEventGen*,
@@ -59,6 +60,7 @@ private:
  
      const HttpParaList::UriParam& uri_param;
      int64_t normalization_depth;
+    int32_t identifier_depth;
      bool configure_once = false;
  
      snort::SearchTool* mpse_otag;
diff --git a/src/service_inspectors/http_inspect/http_module.cc b/src/service_inspectors/http_inspect/http_module.cc

index ece3f5a6e3a15475f2ed43928fd63fcc9869eb9d..c0d8e1184c64cacc80b9c34a4027005c76e0fa2a 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_module.cc
+++ b/src/service_inspectors/http_inspect/http_module.cc
@@ -74,8 +74,13 @@ const Parameter HttpModule::http_params[] =
        "use legacy normalizer to normalize JavaScript in response bodies" },
  
      { "js_normalization_depth", Parameter::PT_INT, "-1:max53", "0",
-      "number of input JavaScript bytes to normalize with enhanced normalizer "
-      "(-1 max allowed value) (experimental)" },
+      "enable enhanced normalizer (0 is disabled); "
+      "number of input JavaScript bytes to normalize (-1 unlimited) "
+      "(experimental)" },
+
+    // range of accepted identifier names is (a0:z9999), so the max is 26 * 10000 = 260000
+    { "js_norm_identifier_depth", Parameter::PT_INT, "0:260000", "260000",
+      "max number of unique JavaScript identifiers to normalize" },
  
      { "max_javascript_whitespaces", Parameter::PT_INT, "1:65535", "200",
        "maximum consecutive whitespaces allowed within the JavaScript obfuscated data" },
@@ -206,6 +211,10 @@ bool HttpModule::set(const char*, Value& val, SnortConfig*)
              params->js_norm_param.is_javascript_normalization
              or params->js_norm_param.normalize_javascript;
      }
+    else if (val.is("js_norm_identifier_depth"))
+    {
+        params->js_norm_param.js_identifier_depth = val.get_int32();
+    }
      else if (val.is("js_normalization_depth"))
      {
          int64_t v = val.get_int64();
@@ -400,7 +409,8 @@ bool HttpModule::end(const char*, int, SnortConfig*)
          ParseError("Cannot use normalize_javascript and js_normalization_depth together.");
  
      if ( params->js_norm_param.is_javascript_normalization )
-        params->js_norm_param.js_norm = new HttpJsNorm(params->uri_param, params->js_norm_param.js_normalization_depth);
+        params->js_norm_param.js_norm = new HttpJsNorm(params->uri_param,
+        params->js_norm_param.js_normalization_depth, params->js_norm_param.js_identifier_depth);
  
      params->script_detection_handle = script_detection_handle;
  
diff --git a/src/service_inspectors/http_inspect/http_module.h b/src/service_inspectors/http_inspect/http_module.h

index e716ed93fc3730fc1bae5ae1900c7f552c1eabef..e1297abb958ed9f1b7a8e958cca772a5ea7efcad 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_module.h
+++ b/src/service_inspectors/http_inspect/http_module.h
@@ -56,6 +56,7 @@ public:
          bool normalize_javascript = false;
          bool is_javascript_normalization = false;
          int64_t js_normalization_depth = 0;
+        int32_t js_identifier_depth = 0;
          int max_javascript_whitespaces = 200;
          class HttpJsNorm* js_norm = nullptr;
      };
diff --git a/src/service_inspectors/http_inspect/http_msg_request.cc b/src/service_inspectors/http_inspect/http_msg_request.cc

index 3535d93a8260ee129d7ef50605db38d6895753f3..3d4587a09f5eedd482025955465d7c72d621e3d7 100644 (file)
--- a/src/service_inspectors/http_inspect/http_msg_request.cc
+++ b/src/service_inspectors/http_inspect/http_msg_request.cc
@@ -41,6 +41,7 @@ HttpMsgRequest::HttpMsgRequest(const uint8_t* buffer, const uint16_t buf_size,
      transaction->set_request(this);
      get_related_sections();
      session_data->release_js_ctx();
+    session_data->reset_js_ident_ctx();
  }
  
  HttpMsgRequest::~HttpMsgRequest()
diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc

index ab1ba12aad912fc6045d465b4df606fe96bbae9f..1177839b735332907f672ce47d38765b6cfb5c33 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_tables.cc
+++ b/src/service_inspectors/http_inspect/http_tables.cc
@@ -432,6 +432,7 @@ const RuleMap HttpModule::http_events[] =
      { EVENT_JS_CLOSING_TAG,             "unexpected script closing tag in JavaScript" },
      { EVENT_JS_CODE_IN_EXTERNAL,        "JavaScript code under the external script tags" },
      { EVENT_JS_SHORTENED_TAG,           "script opening tag in a short form" },
+    { EVENT_JS_IDENTIFIER_OVERFLOW,     "max number of unique JavaScript identifiers reached" },
      { 0, nullptr }
  };
  
@@ -471,6 +472,9 @@ const PegInfo HttpModule::peg_names[PEG_COUNT_MAX+1] =
      { CountType::SUM, "js_inline_scripts", "total number of inline JavaScripts processed" },
      { CountType::SUM, "js_external_scripts", "total number of external JavaScripts processed" },
      { CountType::SUM, "js_bytes", "total number of JavaScript bytes processed" },
+    { CountType::SUM, "js_identifiers", "total number of unique JavaScript identifiers processed" },
+    { CountType::SUM, "js_identifier_overflows", "total number of unique JavaScript identifier "
+        "limit overflows" },
      { CountType::END, nullptr, nullptr }
  };
  
diff --git a/src/service_inspectors/http_inspect/test/http_module_test.cc b/src/service_inspectors/http_inspect/test/http_module_test.cc

index 23d35c7e0551a420bb55039c57fd28cb017a47eb..134377823218a9233b1cd050b566834318b2e233 100755 (executable)
--- a/src/service_inspectors/http_inspect/test/http_module_test.cc
+++ b/src/service_inspectors/http_inspect/test/http_module_test.cc
@@ -64,9 +64,11 @@ int32_t substr_to_code(const uint8_t*, const int32_t, const StrCode []) { return
  long HttpTestManager::print_amount {};
  bool HttpTestManager::print_hex {};
  
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) :
+HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_,
+    int32_t identifier_depth_) :
      uri_param(uri_param_), normalization_depth(normalization_depth_),
-    mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) {}
+    identifier_depth(identifier_depth_), mpse_otag(nullptr), mpse_attr(nullptr),
+    mpse_type(nullptr) {}
  HttpJsNorm::~HttpJsNorm() = default;
  void HttpJsNorm::configure(){}
  int64_t Parameter::get_int(char const*) { return 0; }
diff --git a/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc b/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc

index 3982153e4a3fbf33006327cf763fff936cd5cfc8..376e3d1e70d86d2b1dd8c75308be317b47d420af 100755 (executable)
--- a/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc
+++ b/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc
@@ -53,9 +53,11 @@ LiteralSearch* LiteralSearch::instantiate(LiteralSearch::Handle*, const uint8_t*
  void show_stats(PegCount*, const PegInfo*, unsigned, const char*) { }
  void show_stats(PegCount*, const PegInfo*, const IndexVec&, const char*, FILE*) { }
  
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) :
+HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_,
+    int32_t identifier_depth_) :
      uri_param(uri_param_), normalization_depth(normalization_depth_),
-    mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) {}
+    identifier_depth(identifier_depth_), mpse_otag(nullptr), mpse_attr(nullptr),
+    mpse_type(nullptr) {}
  HttpJsNorm::~HttpJsNorm() = default;
  void HttpJsNorm::configure() {}
  int64_t Parameter::get_int(char const*) { return 0; }
diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt

index 38fc2ddce3816fbacce50b0d89927ad24fe4cd64..632a5f5b48f4343d063f8380b92975cc12d0d5a8 100644 (file)
--- a/src/utils/CMakeLists.txt
+++ b/src/utils/CMakeLists.txt
@@ -32,6 +32,8 @@ add_library ( utils OBJECT
      dnet_header.h
      dyn_array.cc
      dyn_array.h
+    js_identifier_ctx.cc
+    js_identifier_ctx.h
      js_normalizer.cc
      js_normalizer.h
      js_tokenizer.h
diff --git a/src/utils/js_identifier_ctx.cc b/src/utils/js_identifier_ctx.cc

new file mode 100644 (file)

index 0000000..308c7d7
--- /dev/null
+++ b/src/utils/js_identifier_ctx.cc
@@ -0,0 +1,85 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_identifier_ctx.cc author Oleksandr Serhiienko <oserhiie@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "js_identifier_ctx.h"
+
+#ifndef CATCH_TEST_BUILD
+#include "service_inspectors/http_inspect/http_enum.h"
+#include "service_inspectors/http_inspect/http_module.h"
+#else
+namespace HttpEnums
+{
+enum PEG_COUNT
+{
+    PEG_JS_IDENTIFIER
+};
+}
+
+class HttpModule
+{
+public:
+    static void increment_peg_counts(HttpEnums::PEG_COUNT) {}
+};
+#endif // CATCH_TEST_BUILD
+
+#define FIRST_NAME_SIZE   26
+#define LAST_NAME_SIZE  9999
+
+static const char s_ident_first_names[FIRST_NAME_SIZE] =
+{
+    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
+};
+
+const char* JSIdentifierCtx::substitute(const char* identifier)
+{
+    const auto it = ident_names.find(identifier);
+    if (it != ident_names.end())
+        return it->second.c_str();
+
+    if (++ident_last_name > LAST_NAME_SIZE)
+    {
+        if (++ident_first_name > FIRST_NAME_SIZE - 1)
+            return nullptr;
+
+        ident_last_name = 0;
+    }
+
+    if (++unique_ident_cnt > depth)
+        return nullptr;
+
+    ident_names[identifier] = s_ident_first_names[ident_first_name]
+        + std::to_string(ident_last_name);
+
+    HttpModule::increment_peg_counts(HttpEnums::PEG_JS_IDENTIFIER);
+    return ident_names[identifier].c_str();
+}
+
+void JSIdentifierCtx::reset()
+{
+    ident_first_name = 0;
+    ident_last_name = -1;
+    unique_ident_cnt = 0;
+    ident_names.clear();
+}
+
diff --git a/src/utils/js_identifier_ctx.h b/src/utils/js_identifier_ctx.h

new file mode 100644 (file)

index 0000000..6a5add2
--- /dev/null
+++ b/src/utils/js_identifier_ctx.h
@@ -0,0 +1,58 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_identifier_ctx.h author Oleksandr Serhiienko <oserhiie@cisco.com>
+
+#ifndef JS_IDENTIFIER_CTX
+#define JS_IDENTIFIER_CTX
+
+#include <string>
+#include <unordered_map>
+
+class JSIdentifierCtxBase
+{
+public:
+    virtual ~JSIdentifierCtxBase() = default;
+
+    virtual const char* substitute(const char* identifier) = 0;
+    virtual void reset() = 0;
+    virtual size_t size() const = 0;
+};
+
+class JSIdentifierCtx : public JSIdentifierCtxBase
+{
+public:
+    JSIdentifierCtx(int32_t depth) : depth(depth) {}
+
+    const char* substitute(const char* identifier) override;
+    void reset() override;
+
+    // approximated to 500 unique mappings insertions
+    size_t size() const override
+    { return (sizeof(JSIdentifierCtx) + (sizeof(std::string) * 2 * 500)); }
+
+private:
+    int ident_first_name = 0;
+    int ident_last_name = -1;
+    int32_t unique_ident_cnt = 0;
+    int32_t depth;
+
+    std::unordered_map<std::string, std::string> ident_names;
+};
+
+#endif // JS_IDENTIFIER_CTX
+
diff --git a/src/utils/js_normalizer.cc b/src/utils/js_normalizer.cc

index 7e4b1d9a24e13e25175f4b5b616e99a64b2b7df6..86d2d9ae51ecbf4e433d5aa6ad12e7d7d7214a4c 100644 (file)
--- a/src/utils/js_normalizer.cc
+++ b/src/utils/js_normalizer.cc
@@ -25,23 +25,14 @@
  
  using namespace snort;
  
-JSNormalizer::JSNormalizer()
-    : depth(-1),
-      rem_bytes(-1),
+JSNormalizer::JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t norm_depth)
+    : depth(norm_depth),
+      rem_bytes(norm_depth),
        unlim(true),
        src_next(nullptr),
        dst_next(nullptr),
-      tokenizer(in, out)
+      tokenizer(in, out, js_ident_ctx)
  {
-}
-
-void JSNormalizer::set_depth(size_t new_depth)
-{
-    if (depth == new_depth)
-        return;
-
-    depth = new_depth;
-    rem_bytes = depth;
      unlim = depth == (size_t)-1;
  }
  
@@ -68,7 +59,9 @@ JSTokenizer::JSRet JSNormalizer::normalize(const char* src, size_t src_len, char
      if (!unlim)
          rem_bytes -= r_bytes;
      src_next = src + r_bytes;
-    dst_next = dst + w_bytes;
+
+    // avoid heap overflow if number of written bytes bigger than accepted dst_len
+    dst_next = (w_bytes <= dst_len) ? dst + w_bytes : dst + dst_len;
  
      return rem_bytes ? ret : JSTokenizer::EOS;
  }
diff --git a/src/utils/js_normalizer.h b/src/utils/js_normalizer.h

index 75bd407685396e21b31aa9e45ec2d04868297759..13673e4a9470602dea1a7dc9efc6b66a957a18d3 100644 (file)
--- a/src/utils/js_normalizer.h
+++ b/src/utils/js_normalizer.h
@@ -32,7 +32,7 @@ namespace snort
  class JSNormalizer
  {
  public:
-    JSNormalizer();
+    JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t depth);
  
      const char* get_src_next() const
      { return src_next; }
@@ -43,8 +43,6 @@ public:
      void reset_depth()
      { rem_bytes = depth; }
  
-    void set_depth(size_t depth);
-
      JSTokenizer::JSRet normalize(const char* src, size_t src_len, char* dst, size_t dst_len);
  
      static size_t size();
diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h

index 0e0fd2a27f8edabd0b2246b717f29f57c76ded1f..e2612ac109da7b7be8b51e8aecb580ee41de801b 100644 (file)
--- a/src/utils/js_tokenizer.h
+++ b/src/utils/js_tokenizer.h
@@ -24,6 +24,8 @@
  
  #include "log/messages.h"
  
+class JSIdentifierCtxBase;
+
  class JSTokenizer : public yyFlexLexer
  {
  private:
@@ -46,10 +48,11 @@ public:
          SCRIPT_CONTINUE,
          OPENING_TAG,
          CLOSING_TAG,
-        BAD_TOKEN
+        BAD_TOKEN,
+        IDENTIFIER_OVERFLOW
      };
  
-    JSTokenizer(std::istream& in, std::ostream& out);
+    JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx);
      ~JSTokenizer() override;
  
      // returns JSRet
@@ -65,6 +68,7 @@ private:
      JSRet eval_eof();
      JSRet do_spacing(JSToken cur_token);
      JSRet do_operator_spacing(JSToken cur_token);
+    JSRet do_identifier_substitution(const char* lexeme);
      bool unescape(const char* lexeme);
  
  private:
@@ -73,6 +77,7 @@ private:
      std::stringstream tmp;
  
      JSToken token = UNDEFINED;
+    JSIdentifierCtxBase& ident_ctx;
  };
  
  #endif // JS_TOKENIZER_H
diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l

index 8649ff61189ede5b18c5796ca6fb65ca60067a84..8182d4379bcbcacf1a511ffd4adc5c6db8d3aff7 100644 (file)
--- a/src/utils/js_tokenizer.l
+++ b/src/utils/js_tokenizer.l
@@ -30,6 +30,7 @@
      #include "config.h"
      #endif
  
+    #include "utils/js_identifier_ctx.h"
      #include "utils/js_tokenizer.h"
  
      #include <cassert>
@@ -989,7 +990,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
  {KEYWORD}                           { EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
  {OPERATOR}                          { EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); }
  {LITERAL}                           { EXEC(do_spacing(LITERAL)) ECHO; BEGIN(divop); }
-{IDENTIFIER}                        { if (unescape(YYText())) { EXEC(do_spacing(IDENTIFIER)) ECHO; } BEGIN(divop); }
+{IDENTIFIER}                        { if (unescape(YYText())) { EXEC(do_spacing(IDENTIFIER)) EXEC(do_identifier_substitution(YYText())) } BEGIN(divop); }
  
  .|{ALL_UNICODE}                     { ECHO; token = UNDEFINED; BEGIN(INITIAL); }
  <<EOF>>                             { EXEC(eval_eof()) }
@@ -1072,8 +1073,9 @@ static std::string unescape_unicode(const char* lexeme)
  
  // JSTokenizer members
  
-JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out)
-    : yyFlexLexer(in, out)
+JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx)
+    : yyFlexLexer(in, out),
+      ident_ctx(ident_ctx)
  {
      BEGIN(regst);
  }
@@ -1164,6 +1166,19 @@ JSTokenizer::JSRet JSTokenizer::do_operator_spacing(JSToken cur_token)
      return BAD_TOKEN;
  }
  
+JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme)
+{
+    const char* ident = ident_ctx.substitute(lexeme);
+
+    if (ident)
+    {
+        yyout << ident;
+        return EOS;
+    }
+
+    return IDENTIFIER_OVERFLOW;
+}
+
  bool JSTokenizer::unescape(const char* lexeme)
  {
      if ( strstr(lexeme, "\\u") )
diff --git a/src/utils/test/CMakeLists.txt b/src/utils/test/CMakeLists.txt

index 816907aa4682565c9d6cd56217f8bcb5e08b7cb8..2a092f3231b2e8fce5d0f02952fcb8e5278041d6 100644 (file)
--- a/src/utils/test/CMakeLists.txt
+++ b/src/utils/test/CMakeLists.txt
@@ -13,7 +13,13 @@ FLEX_TARGET ( js_tokenizer ${CMAKE_CURRENT_SOURCE_DIR}/../js_tokenizer.l
  add_catch_test( js_normalizer_test
      SOURCES
          ${FLEX_js_tokenizer_OUTPUTS}
+        ../js_identifier_ctx.cc
          ../js_normalizer.cc
          ../util_cstring.cc
  )
  
+add_catch_test( js_identifier_ctx_test
+    SOURCES
+        ../js_identifier_ctx.cc
+)
+
diff --git a/src/utils/test/js_identifier_ctx_test.cc b/src/utils/test/js_identifier_ctx_test.cc

new file mode 100644 (file)

index 0000000..89f0252
--- /dev/null
+++ b/src/utils/test/js_identifier_ctx_test.cc
@@ -0,0 +1,93 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_identifier_ctx_test.cc author Oleksandr Serhiienko <oserhiie@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "catch/catch.hpp"
+
+#include <cstring>
+#include <vector>
+
+#include "utils/js_identifier_ctx.h"
+
+#define DEPTH 260000
+
+#define FIRST_NAME_SIZE   26
+#define LAST_NAME_SIZE  9999
+
+static const char s_ident_first_names[FIRST_NAME_SIZE] =
+{
+    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
+};
+
+TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]")
+{
+    SECTION("same name")
+    {
+        JSIdentifierCtx ident_ctx(DEPTH);
+
+        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+    }
+    SECTION("different names")
+    {
+        JSIdentifierCtx ident_ctx(DEPTH);
+
+        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+        CHECK(!strcmp(ident_ctx.substitute("b"), "a1"));
+        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+    }
+    SECTION("depth reached")
+    {
+        JSIdentifierCtx ident_ctx(2);
+
+        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+        CHECK(!strcmp(ident_ctx.substitute("b"), "a1"));
+        CHECK(ident_ctx.substitute("c") == nullptr);
+        CHECK(ident_ctx.substitute("d") == nullptr);
+        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+    }
+    SECTION("max names")
+    {
+        JSIdentifierCtx ident_ctx(DEPTH + 2);
+
+        std::vector<std::string> n, e;
+        n.reserve(DEPTH + 2);
+        e.reserve(DEPTH);
+
+        for (int it = 0; it < DEPTH + 2; ++it)
+            n.push_back("n" + std::to_string(it));
+
+        for (int it_first = 0; it_first < FIRST_NAME_SIZE; ++it_first)
+        {
+            for (int it_last = 0; it_last <= LAST_NAME_SIZE; ++it_last)
+                e.push_back(s_ident_first_names[it_first] + std::to_string(it_last));
+        }
+
+        for (int it = 0; it < DEPTH; ++it)
+            CHECK(!strcmp(ident_ctx.substitute(n[it].c_str()), e[it].c_str()));
+
+        CHECK(ident_ctx.substitute(n[DEPTH].c_str()) == nullptr);
+        CHECK(ident_ctx.substitute(n[DEPTH + 1].c_str()) == nullptr);
+    }
+}
+
diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc

index b66d77766be7d7963950c9780245f8a5a6887739..7c27c51a0f506f94a4d844bd72be57a093467b0c 100644 (file)
--- a/src/utils/test/js_normalizer_test.cc
+++ b/src/utils/test/js_normalizer_test.cc
@@ -25,6 +25,7 @@
  
  #include <cstring>
  
+#include "utils/js_identifier_ctx.h"
  #include "utils/js_normalizer.h"
  
  namespace snort
@@ -34,17 +35,28 @@ namespace snort
  { exit(EXIT_FAILURE); }
  }
  
+class JSIdentifierCtxTest : public JSIdentifierCtxBase
+{
+public:
+    JSIdentifierCtxTest() = default;
+
+    const char* substitute(const char* identifier) override
+    { return identifier; }
+    void reset() override {}
+    size_t size() const override {}
+};
+
  using namespace snort;
  
  #define DEPTH 65535
  
-#define NORMALIZE(src, expected)                                    \
-    char dst[sizeof(expected)];                                     \
-    JSNormalizer norm;                                              \
-    norm.set_depth(DEPTH);                                          \
-    auto ret = norm.normalize(src, sizeof(src), dst, sizeof(dst));  \
-    const char* ptr = norm.get_src_next();                          \
-    int act_len = norm.get_dst_next() - dst;                        \
+#define NORMALIZE(src, expected)                                   \
+    char dst[sizeof(expected)];                                    \
+    JSIdentifierCtxTest ident_ctx;                                 \
+    JSNormalizer norm(ident_ctx, DEPTH);                           \
+    auto ret = norm.normalize(src, sizeof(src), dst, sizeof(dst)); \
+    const char* ptr = norm.get_src_next();                         \
+    int act_len = norm.get_dst_next() - dst;
  
  #define VALIDATE(src, expected)                 \
      CHECK(ret == JSTokenizer::SCRIPT_CONTINUE); \
@@ -52,20 +64,20 @@ using namespace snort;
      CHECK(act_len == sizeof(expected) - 1);     \
      CHECK(!memcmp(dst, expected, act_len));
  
-#define VALIDATE_FAIL(src, expected, ret_code, ptr_offset)  \
-    CHECK(ret == ret_code);                                 \
-    CHECK((ptr - src) == ptr_offset);                       \
-    CHECK(act_len == sizeof(expected) - 1);                 \
+#define VALIDATE_FAIL(src, expected, ret_code, ptr_offset) \
+    CHECK(ret == ret_code);                                \
+    CHECK((ptr - src) == ptr_offset);                      \
+    CHECK(act_len == sizeof(expected) - 1);                \
      CHECK(!memcmp(dst, expected, act_len));
  
-#define NORMALIZE_L(src, src_len, dst, dst_len, depth, ret, ptr, len)   \
-    {                                                                   \
-        JSNormalizer norm;                                              \
-        norm.set_depth(depth);                                          \
-        ret = norm.normalize(src, src_len, dst, dst_len);               \
-        ptr = norm.get_src_next();                                      \
-        len = norm.get_dst_next() - dst;                                \
-    }                                                                   \
+#define NORMALIZE_L(src, src_len, dst, dst_len, depth, ret, ptr, len) \
+    {                                                                 \
+        JSIdentifierCtxTest ident_ctx;                                \
+        JSNormalizer norm(ident_ctx, depth);                          \
+        ret = norm.normalize(src, src_len, dst, dst_len);             \
+        ptr = norm.get_src_next();                                    \
+        len = norm.get_dst_next() - dst;                              \
+    }
  
  // ClamAV test cases
  static const char clamav_buf0[] =
@@ -869,9 +881,8 @@ TEST_CASE("endings", "[JSNormalizer]")
          const char* ptr;
          int ret;
  
-        JSNormalizer norm;
-
-        norm.set_depth(7);
+        JSIdentifierCtxTest ident_ctx;
+        JSNormalizer norm(ident_ctx, 7);
          ret = norm.normalize(src, sizeof(src), dst, sizeof(dst));
          ptr = norm.get_src_next();
          act_len = norm.get_dst_next() - dst;
@@ -902,7 +913,7 @@ TEST_CASE("endings", "[JSNormalizer]")
  
          CHECK(ret == JSTokenizer::SCRIPT_CONTINUE);
          CHECK(ptr == src + sizeof(src));
-        CHECK(act_len == 12); // size of normalized src
+        CHECK(act_len == 7); // size of normalized src
          CHECK(!memcmp(dst, expected, sizeof(dst)));
      }
  }
@@ -1239,3 +1250,4 @@ TEST_CASE("nested script tags", "[JSNormalizer]")
          VALIDATE_FAIL(unexpected_tag_buf24, unexpected_tag_expected24, JSTokenizer::OPENING_TAG, 39);
      }
  }
+
author	Mike Stepanek (mstepane) <mstepane@cisco.com>
	Mon, 9 Aug 2021 10:30:22 +0000 (10:30 +0000)
committer	Mike Stepanek (mstepane) <mstepane@cisco.com>
	Mon, 9 Aug 2021 10:30:22 +0000 (10:30 +0000)
src/service_inspectors/http_inspect/dev_notes.txt		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_enum.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_flow_data.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_flow_data.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_inspect.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_js_norm.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_js_norm.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_module.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_module.h		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_msg_request.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_tables.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/test/http_module_test.cc		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/test/http_uri_norm_test.cc		patch \| blob \| blame \| history
src/utils/CMakeLists.txt		patch \| blob \| blame \| history
src/utils/js_identifier_ctx.cc	[new file with mode: 0644]	patch \| blob
src/utils/js_identifier_ctx.h	[new file with mode: 0644]	patch \| blob
src/utils/js_normalizer.cc		patch \| blob \| blame \| history
src/utils/js_normalizer.h		patch \| blob \| blame \| history
src/utils/js_tokenizer.h		patch \| blob \| blame \| history
src/utils/js_tokenizer.l		patch \| blob \| blame \| history
src/utils/test/CMakeLists.txt		patch \| blob \| blame \| history
src/utils/test/js_identifier_ctx_test.cc	[new file with mode: 0644]	patch \| blob
src/utils/test/js_normalizer_test.cc		patch \| blob \| blame \| history