Merge pull request #3081 in SNORT/snort3 from ~VHORBATO/snort3:rename_norm_ident...

author Mike Stepanek (mstepane) <mstepane@cisco.com>

Fri, 1 Oct 2021 16:57:06 +0000 (16:57 +0000)

committer Mike Stepanek (mstepane) <mstepane@cisco.com>

Fri, 1 Oct 2021 16:57:06 +0000 (16:57 +0000)
author Mike Stepanek (mstepane) <mstepane@cisco.com>
Fri, 1 Oct 2021 16:57:06 +0000 (16:57 +0000)
committer Mike Stepanek (mstepane) <mstepane@cisco.com>
Fri, 1 Oct 2021 16:57:06 +0000 (16:57 +0000)
diff --git a/src/service_inspectors/http_inspect/dev_notes.txt b/src/service_inspectors/http_inspect/dev_notes.txt

index 70783537343cd2d03428615a7abd6f05a150ea47..c33a6e024e6cc3391a092c79bd801c0958756023 100755 (executable)
--- a/src/service_inspectors/http_inspect/dev_notes.txt
+++ b/src/service_inspectors/http_inspect/dev_notes.txt
@@ -232,9 +232,9 @@ So, the following whitespace codes will be normalized:
   * Also including new-line and carriage-return line-break characters
  
  All JavaScript identifier names will be substituted to unified names with the
-following format: a0 -> z9999. So, the number of unique identifiers available
-is 260000 names per HTTP transaction. If Normalizer overruns the configured
-limit, built-in alert generated. Additionaly, there is a config option to
+following format: var_0000 -> var_ffff. So, the number of unique identifiers available
+is 65536 names per HTTP transaction. If Normalizer overruns the configured
+limit, built-in alert is generated. Additionally, there is a config option to
  specify the limit manually:
   * http_inspect.js_norm_identifier_depth.
  
diff --git a/src/service_inspectors/http_inspect/http_module.cc b/src/service_inspectors/http_inspect/http_module.cc

index b090fbf64a8978696cf627b289410616a5f8b634..e924619506db3939840303b9ce0991b2f82d9cdd 100755 (executable)
--- a/src/service_inspectors/http_inspect/http_module.cc
+++ b/src/service_inspectors/http_inspect/http_module.cc
@@ -85,8 +85,8 @@ const Parameter HttpModule::http_params[] =
        "number of input JavaScript bytes to normalize (-1 unlimited) "
        "(experimental)" },
  
-    // range of accepted identifier names is (a0:z9999), so the max is 26 * 10000 = 260000
-    { "js_norm_identifier_depth", Parameter::PT_INT, "0:260000", "260000",
+    // range of accepted identifier names is (var_0000:var_ffff), so the max is 2^16
+    { "js_norm_identifier_depth", Parameter::PT_INT, "0:65536", "65536",
        "max number of unique JavaScript identifiers to normalize" },
  
      { "js_norm_max_tmpl_nest", Parameter::PT_INT, "0:255", "32",
diff --git a/src/utils/dev_notes.txt b/src/utils/dev_notes.txt

index 7ecc6c546454effe707da70673db7bb09f914208..5d5d1f1b780b20da4183c8975d1eec7c6011a56d 100644 (file)
--- a/src/utils/dev_notes.txt
+++ b/src/utils/dev_notes.txt
@@ -44,5 +44,5 @@ which could be useful for final consumer.
  
  From performance perspective, ostreambuf_infl can reserve an amount of memory
  before actual operations. Also, memory extending is done by predefined
-portions of 2^8^, 2^9^, 2^10^, 2^13^, 2^16^, 2^16^, 2^16^...
+portions of 2^11^, 2^12^, 2^13^, 2^14^, 2^15^, 2^15^, 2^15^...
  This tries to minimize the number of memory reallocation.
diff --git a/src/utils/js_identifier_ctx.cc b/src/utils/js_identifier_ctx.cc

index 3b682b49025eb5d95850dfbb8acc42721077b5d7..35b2b44dd29eabbd880dbe6a169d6923a780499a 100644 (file)
--- a/src/utils/js_identifier_ctx.cc
+++ b/src/utils/js_identifier_ctx.cc
@@ -42,44 +42,43 @@ public:
  };
  #endif // CATCH_TEST_BUILD
  
-#define FIRST_NAME_SIZE   26
-#define LAST_NAME_SIZE  9999
+#define MAX_LAST_NAME     65535
+#define HEX_DIGIT_MASK   15
  
-static const char s_ident_first_names[FIRST_NAME_SIZE] =
+static const char hex_digits[] = 
  {
-    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
-    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
+    '0', '1','2','3', '4', '5', '6', '7', '8','9', 'a', 'b', 'c', 'd', 'e', 'f'
  };
  
+static inline std::string format_name(int32_t num)
+{
+    std::string name("var_");
+    name.reserve(8);
+    name.push_back(hex_digits[(num >> 12) & HEX_DIGIT_MASK]); 
+    name.push_back(hex_digits[(num >> 8) & HEX_DIGIT_MASK]); 
+    name.push_back(hex_digits[(num >> 4) & HEX_DIGIT_MASK]);
+    name.push_back(hex_digits[num & HEX_DIGIT_MASK]); 
+
+    return name;
+}
+
  const char* JSIdentifierCtx::substitute(const char* identifier)
  {
      const auto it = ident_names.find(identifier);
      if (it != ident_names.end())
          return it->second.c_str();
  
-    if (++ident_last_name > LAST_NAME_SIZE)
-    {
-        if (++ident_first_name > FIRST_NAME_SIZE - 1)
-            return nullptr;
-
-        ident_last_name = 0;
-    }
-
-    if (++unique_ident_cnt > depth)
+    if (ident_last_name >= depth || ident_last_name > MAX_LAST_NAME)
          return nullptr;
  
-    ident_names[identifier] = s_ident_first_names[ident_first_name]
-        + std::to_string(ident_last_name);
-
+    ident_names[identifier] = format_name(ident_last_name++);
      HttpModule::increment_peg_counts(HttpEnums::PEG_JS_IDENTIFIER);
      return ident_names[identifier].c_str();
  }
  
  void JSIdentifierCtx::reset()
  {
-    ident_first_name = 0;
-    ident_last_name = -1;
-    unique_ident_cnt = 0;
+    ident_last_name = 0;
      ident_names.clear();
  }
  
diff --git a/src/utils/js_identifier_ctx.h b/src/utils/js_identifier_ctx.h

index 6a5add29403ca9cf78a4ee52ecba8c722728f381..b69ec867971cd0810f0b4ce2cfd287fa16bd8c27 100644 (file)
--- a/src/utils/js_identifier_ctx.h
+++ b/src/utils/js_identifier_ctx.h
@@ -46,9 +46,7 @@ public:
      { return (sizeof(JSIdentifierCtx) + (sizeof(std::string) * 2 * 500)); }
  
  private:
-    int ident_first_name = 0;
-    int ident_last_name = -1;
-    int32_t unique_ident_cnt = 0;
+    int32_t ident_last_name = 0;
      int32_t depth;
  
      std::unordered_map<std::string, std::string> ident_names;
diff --git a/src/utils/js_normalizer.cc b/src/utils/js_normalizer.cc

index eff85e64e644a0749e2e1d1a86bc3c69ff191fd6..cca5ed758589504c36c2401a4fb021d678f8fab3 100644 (file)
--- a/src/utils/js_normalizer.cc
+++ b/src/utils/js_normalizer.cc
@@ -23,6 +23,8 @@
  
  #include "js_normalizer.h"
  
+#define BUFF_EXP_FACTOR 1.3
+
  using namespace snort;
  using namespace std;
  
@@ -67,7 +69,7 @@ JSTokenizer::JSRet JSNormalizer::normalize(const char* src, size_t src_len)
      in_buf.pubsetbuf(nullptr, 0)
          ->pubsetbuf(tmp_buf, tmp_buf_size)
          ->pubsetbuf(const_cast<char*>(src), len);
-    out_buf.reserve(src_len);
+    out_buf.reserve(src_len * BUFF_EXP_FACTOR);
  
      JSTokenizer::JSRet ret = static_cast<JSTokenizer::JSRet>(tokenizer.yylex());
      in.clear();
diff --git a/src/utils/streambuf.cc b/src/utils/streambuf.cc

index dee1b939a3f0500d566d3d1b5906f93b5efa23c4..1b5e1d1b01a452993f62033fc86925cee810c97d 100644 (file)
--- a/src/utils/streambuf.cc
+++ b/src/utils/streambuf.cc
@@ -222,11 +222,11 @@ int istreambuf_glue::underflow()
  
  const ostreambuf_infl::State ostreambuf_infl::states[] =
  {
-    {states + 1, 1 << 8},
-    {states + 2, 1 << 9},
-    {states + 3, 1 << 10},
-    {states + 4, 1 << 13},
-    {states + 4, 1 << 16}
+    {states + 1, 1 << 11},
+    {states + 2, 1 << 12},
+    {states + 3, 1 << 13},
+    {states + 4, 1 << 14},
+    {states + 4, 1 << 15}
  };
  
  ostreambuf_infl::ostreambuf_infl() :
@@ -335,7 +335,7 @@ streamsize ostreambuf_infl::xsputn(const char* s, streamsize n)
  
      auto c_avail = epptr() - pptr();
      if (n > c_avail)
-        enlarge(n - c_avail);
+        gen.n > (n - c_avail) ? enlarge() : enlarge(n - c_avail);
  
      auto n_avail = epptr() - pptr();
      n = min(n, n_avail);
diff --git a/src/utils/test/js_identifier_ctx_test.cc b/src/utils/test/js_identifier_ctx_test.cc

index 89f02525038982681b505e8524dcd1a558dc1091..618bb22b98f4b23661326bef0949b0d33520e350 100644 (file)
--- a/src/utils/test/js_identifier_ctx_test.cc
+++ b/src/utils/test/js_identifier_ctx_test.cc
@@ -25,19 +25,11 @@
  
  #include <cstring>
  #include <vector>
+#include <iomanip>
  
  #include "utils/js_identifier_ctx.h"
  
-#define DEPTH 260000
-
-#define FIRST_NAME_SIZE   26
-#define LAST_NAME_SIZE  9999
-
-static const char s_ident_first_names[FIRST_NAME_SIZE] =
-{
-    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
-    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
-};
+#define DEPTH 65536
  
  TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]")
  {
@@ -45,26 +37,26 @@ TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]")
      {
          JSIdentifierCtx ident_ctx(DEPTH);
  
-        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
-        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+        CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
+        CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
      }
      SECTION("different names")
      {
          JSIdentifierCtx ident_ctx(DEPTH);
  
-        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
-        CHECK(!strcmp(ident_ctx.substitute("b"), "a1"));
-        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+        CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
+        CHECK(!strcmp(ident_ctx.substitute("b"), "var_0001"));
+        CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
      }
      SECTION("depth reached")
      {
          JSIdentifierCtx ident_ctx(2);
  
-        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
-        CHECK(!strcmp(ident_ctx.substitute("b"), "a1"));
+        CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
+        CHECK(!strcmp(ident_ctx.substitute("b"), "var_0001"));
          CHECK(ident_ctx.substitute("c") == nullptr);
          CHECK(ident_ctx.substitute("d") == nullptr);
-        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+        CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
      }
      SECTION("max names")
      {
@@ -77,10 +69,12 @@ TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]")
          for (int it = 0; it < DEPTH + 2; ++it)
              n.push_back("n" + std::to_string(it));
  
-        for (int it_first = 0; it_first < FIRST_NAME_SIZE; ++it_first)
+        for (int it_name = 0; it_name < DEPTH; ++it_name)
          {
-            for (int it_last = 0; it_last <= LAST_NAME_SIZE; ++it_last)
-                e.push_back(s_ident_first_names[it_first] + std::to_string(it_last));
+            std::stringstream stream;
+            stream << std::setfill ('0') << std::setw(4) 
+                << std::hex << it_name;
+            e.push_back("var_" + stream.str());
          }
  
          for (int it = 0; it < DEPTH; ++it)
diff --git a/src/utils/test/streambuf_test.cc b/src/utils/test/streambuf_test.cc

index 65778366e7311fe551aefe8b9205582c33b06292..f3246b4fce89990cb41a5ec4145361000dbf2c73 100644 (file)
--- a/src/utils/test/streambuf_test.cc
+++ b/src/utils/test/streambuf_test.cc
@@ -1442,9 +1442,9 @@ TEST_CASE("output buffer - basic", "[Stream buffers]")
          CHECK(c == 'A');
          CHECK(off_b == 0);
          CHECK(off_c == 1);
-        CHECK(off_e == 256);
+        CHECK(off_e == 2048);
  
-        EXP_RES(b, exp, 1, 256);
+        EXP_RES(b, exp, 1, 2048);
      }
  
      SECTION("put two chars")
@@ -1463,9 +1463,9 @@ TEST_CASE("output buffer - basic", "[Stream buffers]")
          CHECK(off_b == 0);
          CHECK(off_1 == 1);
          CHECK(off_2 == 2);
-        CHECK(off_e == 256);
+        CHECK(off_e == 2048);
  
-        EXP_RES(b, exp, 2, 256);
+        EXP_RES(b, exp, 2, 2048);
      }
  
      SECTION("extend buffer")
@@ -1484,9 +1484,9 @@ TEST_CASE("output buffer - basic", "[Stream buffers]")
          CHECK(c2 == 'Z');
          CHECK(off_b == 0);
          CHECK(off_1 == 1);
-        CHECK(off_2 == 257);
-        CHECK(off_e == 256);
-        CHECK(off_z == 768);
+        CHECK(off_2 == 2049);
+        CHECK(off_e == 2048);
+        CHECK(off_z == 6144);
      }
  
      SECTION("put sequence of chars")
@@ -1524,7 +1524,7 @@ TEST_CASE("output buffer - basic", "[Stream buffers]")
          CHECK(c2 == 'Z');
          CHECK(off_b == 0);
          CHECK(off_c == len + 2);
-        CHECK(off_e == 4096 + 512);
+        CHECK(off_e == 4096 + 2048);
      }
  }
author	Mike Stepanek (mstepane) <mstepane@cisco.com>
	Fri, 1 Oct 2021 16:57:06 +0000 (16:57 +0000)
committer	Mike Stepanek (mstepane) <mstepane@cisco.com>
	Fri, 1 Oct 2021 16:57:06 +0000 (16:57 +0000)
src/service_inspectors/http_inspect/dev_notes.txt		patch \| blob \| blame \| history
src/service_inspectors/http_inspect/http_module.cc		patch \| blob \| blame \| history
src/utils/dev_notes.txt		patch \| blob \| blame \| history
src/utils/js_identifier_ctx.cc		patch \| blob \| blame \| history
src/utils/js_identifier_ctx.h		patch \| blob \| blame \| history
src/utils/js_normalizer.cc		patch \| blob \| blame \| history
src/utils/streambuf.cc		patch \| blob \| blame \| history
src/utils/test/js_identifier_ctx_test.cc		patch \| blob \| blame \| history
src/utils/test/streambuf_test.cc		patch \| blob \| blame \| history