]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Merge pull request #2992 in SNORT/snort3 from ~OSERHIIE/snort3:js_identifier_norm...
authorMike Stepanek (mstepane) <mstepane@cisco.com>
Mon, 9 Aug 2021 10:30:22 +0000 (10:30 +0000)
committerMike Stepanek (mstepane) <mstepane@cisco.com>
Mon, 9 Aug 2021 10:30:22 +0000 (10:30 +0000)
Squashed commit of the following:

commit 6cbd5f096fd4531ec454edbbadc707919258e847
Author: Oleksandr Serhiienko <oserhiie@cisco.com>
Date:   Mon Jul 12 13:39:20 2021 +0300

    http_inspect: add JavaScript identifiers normalization

        * utils: add identifiers normalization to js_tokenizer and js_identifier_ctx as a context of identifiers normalization
        * utils: adjust js_normalizer_test unit tests
        * utils: add js_identifier_ctx_test unit tests
        * http_inspect: add js_norm_identifier_depth config option
        * http_inspect: add JS_IDENTIFIER_OVERFLOW built-in alert
        * http_inspect: add js_identifiers and js_identifier_overflows peg counts
        * http_inspect: update dev_notes.txt

23 files changed:
src/service_inspectors/http_inspect/dev_notes.txt
src/service_inspectors/http_inspect/http_enum.h
src/service_inspectors/http_inspect/http_flow_data.cc
src/service_inspectors/http_inspect/http_flow_data.h
src/service_inspectors/http_inspect/http_inspect.cc
src/service_inspectors/http_inspect/http_js_norm.cc
src/service_inspectors/http_inspect/http_js_norm.h
src/service_inspectors/http_inspect/http_module.cc
src/service_inspectors/http_inspect/http_module.h
src/service_inspectors/http_inspect/http_msg_request.cc
src/service_inspectors/http_inspect/http_tables.cc
src/service_inspectors/http_inspect/test/http_module_test.cc
src/service_inspectors/http_inspect/test/http_uri_norm_test.cc
src/utils/CMakeLists.txt
src/utils/js_identifier_ctx.cc [new file with mode: 0644]
src/utils/js_identifier_ctx.h [new file with mode: 0644]
src/utils/js_normalizer.cc
src/utils/js_normalizer.h
src/utils/js_tokenizer.h
src/utils/js_tokenizer.l
src/utils/test/CMakeLists.txt
src/utils/test/js_identifier_ctx_test.cc [new file with mode: 0644]
src/utils/test/js_normalizer_test.cc

index ace30ff37c39af706c68e6fe3b13a0372b45f30c..305ab6dd60958b8387566dce0e958a67e3fdcb71 100755 (executable)
@@ -220,7 +220,7 @@ During message body analysis the Enhanced Normalizer does one of the following:
    subsequent bytes in a stream mode, until it finds a closing tag.
    It proceeds and scans the entire message body for inline scripts.
 
-Enhanced Normalizer is a stateful JavaScript whitespace normalizer.
+Enhanced Normalizer is a stateful JavaScript whitespace and identifiers normalizer.
 So, the following whitespace codes will be normalized:
  * \u0009 Tab <TAB>
  * \u000B Vertical Tab <VT>
@@ -231,6 +231,13 @@ So, the following whitespace codes will be normalized:
  * Any other Unicode “space separator” <USP>
  * Also including new-line and carriage-return line-break characters
 
+All JavaScript identifier names will be substituted to unified names with the
+following format: a0 -> z9999. So, the number of unique identifiers available
+is 260000 names per HTTP transaction. If Normalizer overruns the configured
+limit, built-in alert generated. Additionaly, there is a config option to
+specify the limit manually:
+ * http_inspect.js_norm_identifier_depth.
+
 Additionally, Normalizer validates the syntax with respect to ECMA-262 Standard,
 and checks for restrictions for contents of script elements (since, it is HTML-embedded JavaScript).
 
index 31b60846e10d888f70fbb72112cf3bdf6e292775..47177cb738a78e8b9ada08f421368a000c9f0ad4 100755 (executable)
@@ -64,7 +64,7 @@ enum PEG_COUNT { PEG_FLOW = 0, PEG_SCAN, PEG_REASSEMBLE, PEG_INSPECT, PEG_REQUES
     PEG_CONCURRENT_SESSIONS, PEG_MAX_CONCURRENT_SESSIONS, PEG_SCRIPT_DETECTION,
     PEG_PARTIAL_INSPECT, PEG_EXCESS_PARAMS, PEG_PARAMS, PEG_CUTOVERS, PEG_SSL_SEARCH_ABND_EARLY,
     PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_JS_INLINE, PEG_JS_EXTERNAL,
-    PEG_JS_BYTES, PEG_COUNT_MAX };
+    PEG_JS_BYTES, PEG_JS_IDENTIFIER, PEG_JS_IDENTIFIER_OVERFLOW, PEG_COUNT_MAX };
 
 // Result of scanning by splitter
 enum ScanResult { SCAN_NOT_FOUND, SCAN_NOT_FOUND_ACCELERATE, SCAN_FOUND, SCAN_FOUND_PIECE,
@@ -271,6 +271,7 @@ enum Infraction
     INF_JS_CLOSING_TAG,
     INF_JS_CODE_IN_EXTERNAL,
     INF_JS_SHORTENED_TAG,
+    INF_JS_IDENTIFIER_OVERFLOW,
     INF__MAX_VALUE
 };
 
@@ -399,6 +400,7 @@ enum EventSid
     EVENT_JS_CLOSING_TAG = 267,
     EVENT_JS_CODE_IN_EXTERNAL = 268,
     EVENT_JS_SHORTENED_TAG = 269,
+    EVENT_JS_IDENTIFIER_OVERFLOW = 270,
     EVENT__MAX_VALUE
 };
 
index 3e090b35f29b73971a6a49bcf28a1ca4a475fff8..777b7ab00f3ac56a504602559e6bd483a00b41f3 100644 (file)
@@ -25,6 +25,7 @@
 
 #include "decompress/file_decomp.h"
 #include "service_inspectors/http2_inspect/http2_flow_data.h"
+#include "utils/js_identifier_ctx.h"
 #include "utils/js_normalizer.h"
 
 #include "http_cutter.h"
@@ -91,6 +92,11 @@ HttpFlowData::~HttpFlowData()
         HttpModule::decrement_peg_counts(PEG_CONCURRENT_SESSIONS);
 
 #ifndef UNIT_TEST_BUILD
+    if (js_ident_ctx)
+    {
+        update_deallocations(js_ident_ctx->size());
+        delete js_ident_ctx;
+    }
     if (js_normalizer)
     {
         update_deallocations(JSNormalizer::size());
@@ -231,12 +237,24 @@ void HttpFlowData::garbage_collect()
 }
 
 #ifndef UNIT_TEST_BUILD
-snort::JSNormalizer& HttpFlowData::acquire_js_ctx()
+void HttpFlowData::reset_js_ident_ctx()
+{
+    if (js_ident_ctx)
+        js_ident_ctx->reset();
+}
+
+snort::JSNormalizer& HttpFlowData::acquire_js_ctx(int32_t ident_depth, size_t norm_depth)
 {
     if (js_normalizer)
         return *js_normalizer;
 
-    js_normalizer = new JSNormalizer();
+    if (!js_ident_ctx)
+    {
+        js_ident_ctx = new JSIdentifierCtx(ident_depth);
+        update_allocations(js_ident_ctx->size());
+    }
+
+    js_normalizer = new JSNormalizer(*js_ident_ctx, norm_depth);
     update_allocations(JSNormalizer::size());
 
     return *js_normalizer;
@@ -252,7 +270,9 @@ void HttpFlowData::release_js_ctx()
     js_normalizer = nullptr;
 }
 #else
-snort::JSNormalizer& HttpFlowData::acquire_js_ctx() { return *js_normalizer; }
+void HttpFlowData::reset_js_ident_ctx() {}
+snort::JSNormalizer& HttpFlowData::acquire_js_ctx(int32_t, size_t)
+{ return *js_normalizer; }
 void HttpFlowData::release_js_ctx() {}
 #endif
 
index 38fd4ee71748b78989df7fb967919d6f9308dff3..2a1dfc148db18bda9a3210fe363e1def7a0a58cf 100644 (file)
@@ -38,6 +38,7 @@ class HttpJsNorm;
 class HttpMsgSection;
 class HttpCutter;
 class HttpQueryParser;
+class JSIdentifierCtxBase;
 
 namespace snort
 {
@@ -193,10 +194,12 @@ private:
     bool ssl_search_abandoned = false;
 
     // *** HttpJsNorm
+    JSIdentifierCtxBase* js_ident_ctx = nullptr;
     snort::JSNormalizer* js_normalizer = nullptr;
     bool js_built_in_event = false;
 
-    snort::JSNormalizer& acquire_js_ctx();
+    void reset_js_ident_ctx();
+    snort::JSNormalizer& acquire_js_ctx(int32_t ident_depth, size_t norm_depth);
     void release_js_ctx();
 
     // *** Transaction management including pipelining
index 29b646158a0d24ba652b66c2774ebcd1a282a13d..cdcaefac8a3b38e970fabcd9f736ec83c053325a 100755 (executable)
@@ -160,6 +160,7 @@ void HttpInspect::show(const SnortConfig*) const
         params->js_norm_param.max_javascript_whitespaces);
     ConfigLogger::log_value("js_normalization_depth",
         params->js_norm_param.js_normalization_depth);
+    ConfigLogger::log_value("js_norm_identifier_depth", params->js_norm_param.js_identifier_depth);
     ConfigLogger::log_value("bad_characters", bad_chars.c_str());
     ConfigLogger::log_value("ignore_unreserved", unreserved_chars.c_str());
     ConfigLogger::log_flag("percent_u", params->uri_param.percent_u);
index a851f85f798121cb7be1c450e486225d6f098104..f1536d3819a7dd47c572b58d462490530b075926 100644 (file)
@@ -47,9 +47,11 @@ static inline JSTokenizer::JSRet js_normalize(JSNormalizer& ctx, const char* con
     return ret;
 }
 
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) :
+HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_,
+    int32_t identifier_depth_) :
     uri_param(uri_param_),
     normalization_depth(normalization_depth_),
+    identifier_depth(identifier_depth_),
     mpse_otag(nullptr),
     mpse_attr(nullptr),
     mpse_type(nullptr)
@@ -125,8 +127,7 @@ void HttpJsNorm::enhanced_external_normalize(const Field& input, Field& output,
             dst_end = buffer + len;
         }
 
-        auto& ctx = ssn->acquire_js_ctx();
-        ctx.set_depth(normalization_depth);
+        auto& ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth);
         auto ret = js_normalize(ctx, end, dst_end, ptr, dst);
 
         switch (ret)
@@ -150,6 +151,12 @@ void HttpJsNorm::enhanced_external_normalize(const Field& input, Field& output,
             events->create_event(EVENT_JS_BAD_TOKEN);
             ssn->js_built_in_event = true;
             break;
+        case JSTokenizer::IDENTIFIER_OVERFLOW:
+            HttpModule::increment_peg_counts(PEG_JS_IDENTIFIER_OVERFLOW);
+            *infractions += INF_JS_IDENTIFIER_OVERFLOW;
+            events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW);
+            ssn->js_built_in_event = true;
+            break;
         default:
             assert(false);
             break;
@@ -228,8 +235,7 @@ void HttpJsNorm::enhanced_inline_normalize(const Field& input, Field& output,
             dst_end = buffer + len;
         }
 
-        auto& ctx = ssn->acquire_js_ctx();
-        ctx.set_depth(normalization_depth);
+        auto& ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth);
         auto dst_before = dst;
         auto ret = js_normalize(ctx, end, dst_end, ptr, dst);
 
@@ -260,6 +266,12 @@ void HttpJsNorm::enhanced_inline_normalize(const Field& input, Field& output,
             events->create_event(EVENT_JS_BAD_TOKEN);
             script_continue = false;
             break;
+        case JSTokenizer::IDENTIFIER_OVERFLOW:
+            HttpModule::increment_peg_counts(PEG_JS_IDENTIFIER_OVERFLOW);
+            *infractions += INF_JS_IDENTIFIER_OVERFLOW;
+            events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW);
+            script_continue = false;
+            break;
         default:
             assert(false);
             script_continue = false;
index 38f5399849d35bf544b0099f9aa8a0956ec3f6c8..c21c2462a0ebbff62e1776a5c0416152e2bbf843 100644 (file)
@@ -36,7 +36,8 @@
 class HttpJsNorm
 {
 public:
-    HttpJsNorm(const HttpParaList::UriParam&, int64_t normalization_depth);
+    HttpJsNorm(const HttpParaList::UriParam&, int64_t normalization_depth,
+        int32_t identifier_depth);
     ~HttpJsNorm();
 
     void legacy_normalize(const Field& input, Field& output, HttpInfractions*, HttpEventGen*,
@@ -59,6 +60,7 @@ private:
 
     const HttpParaList::UriParam& uri_param;
     int64_t normalization_depth;
+    int32_t identifier_depth;
     bool configure_once = false;
 
     snort::SearchTool* mpse_otag;
index ece3f5a6e3a15475f2ed43928fd63fcc9869eb9d..c0d8e1184c64cacc80b9c34a4027005c76e0fa2a 100755 (executable)
@@ -74,8 +74,13 @@ const Parameter HttpModule::http_params[] =
       "use legacy normalizer to normalize JavaScript in response bodies" },
 
     { "js_normalization_depth", Parameter::PT_INT, "-1:max53", "0",
-      "number of input JavaScript bytes to normalize with enhanced normalizer "
-      "(-1 max allowed value) (experimental)" },
+      "enable enhanced normalizer (0 is disabled); "
+      "number of input JavaScript bytes to normalize (-1 unlimited) "
+      "(experimental)" },
+
+    // range of accepted identifier names is (a0:z9999), so the max is 26 * 10000 = 260000
+    { "js_norm_identifier_depth", Parameter::PT_INT, "0:260000", "260000",
+      "max number of unique JavaScript identifiers to normalize" },
 
     { "max_javascript_whitespaces", Parameter::PT_INT, "1:65535", "200",
       "maximum consecutive whitespaces allowed within the JavaScript obfuscated data" },
@@ -206,6 +211,10 @@ bool HttpModule::set(const char*, Value& val, SnortConfig*)
             params->js_norm_param.is_javascript_normalization
             or params->js_norm_param.normalize_javascript;
     }
+    else if (val.is("js_norm_identifier_depth"))
+    {
+        params->js_norm_param.js_identifier_depth = val.get_int32();
+    }
     else if (val.is("js_normalization_depth"))
     {
         int64_t v = val.get_int64();
@@ -400,7 +409,8 @@ bool HttpModule::end(const char*, int, SnortConfig*)
         ParseError("Cannot use normalize_javascript and js_normalization_depth together.");
 
     if ( params->js_norm_param.is_javascript_normalization )
-        params->js_norm_param.js_norm = new HttpJsNorm(params->uri_param, params->js_norm_param.js_normalization_depth);
+        params->js_norm_param.js_norm = new HttpJsNorm(params->uri_param,
+        params->js_norm_param.js_normalization_depth, params->js_norm_param.js_identifier_depth);
 
     params->script_detection_handle = script_detection_handle;
 
index e716ed93fc3730fc1bae5ae1900c7f552c1eabef..e1297abb958ed9f1b7a8e958cca772a5ea7efcad 100755 (executable)
@@ -56,6 +56,7 @@ public:
         bool normalize_javascript = false;
         bool is_javascript_normalization = false;
         int64_t js_normalization_depth = 0;
+        int32_t js_identifier_depth = 0;
         int max_javascript_whitespaces = 200;
         class HttpJsNorm* js_norm = nullptr;
     };
index 3535d93a8260ee129d7ef50605db38d6895753f3..3d4587a09f5eedd482025955465d7c72d621e3d7 100644 (file)
@@ -41,6 +41,7 @@ HttpMsgRequest::HttpMsgRequest(const uint8_t* buffer, const uint16_t buf_size,
     transaction->set_request(this);
     get_related_sections();
     session_data->release_js_ctx();
+    session_data->reset_js_ident_ctx();
 }
 
 HttpMsgRequest::~HttpMsgRequest()
index ab1ba12aad912fc6045d465b4df606fe96bbae9f..1177839b735332907f672ce47d38765b6cfb5c33 100755 (executable)
@@ -432,6 +432,7 @@ const RuleMap HttpModule::http_events[] =
     { EVENT_JS_CLOSING_TAG,             "unexpected script closing tag in JavaScript" },
     { EVENT_JS_CODE_IN_EXTERNAL,        "JavaScript code under the external script tags" },
     { EVENT_JS_SHORTENED_TAG,           "script opening tag in a short form" },
+    { EVENT_JS_IDENTIFIER_OVERFLOW,     "max number of unique JavaScript identifiers reached" },
     { 0, nullptr }
 };
 
@@ -471,6 +472,9 @@ const PegInfo HttpModule::peg_names[PEG_COUNT_MAX+1] =
     { CountType::SUM, "js_inline_scripts", "total number of inline JavaScripts processed" },
     { CountType::SUM, "js_external_scripts", "total number of external JavaScripts processed" },
     { CountType::SUM, "js_bytes", "total number of JavaScript bytes processed" },
+    { CountType::SUM, "js_identifiers", "total number of unique JavaScript identifiers processed" },
+    { CountType::SUM, "js_identifier_overflows", "total number of unique JavaScript identifier "
+        "limit overflows" },
     { CountType::END, nullptr, nullptr }
 };
 
index 23d35c7e0551a420bb55039c57fd28cb017a47eb..134377823218a9233b1cd050b566834318b2e233 100755 (executable)
@@ -64,9 +64,11 @@ int32_t substr_to_code(const uint8_t*, const int32_t, const StrCode []) { return
 long HttpTestManager::print_amount {};
 bool HttpTestManager::print_hex {};
 
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) :
+HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_,
+    int32_t identifier_depth_) :
     uri_param(uri_param_), normalization_depth(normalization_depth_),
-    mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) {}
+    identifier_depth(identifier_depth_), mpse_otag(nullptr), mpse_attr(nullptr),
+    mpse_type(nullptr) {}
 HttpJsNorm::~HttpJsNorm() = default;
 void HttpJsNorm::configure(){}
 int64_t Parameter::get_int(char const*) { return 0; }
index 3982153e4a3fbf33006327cf763fff936cd5cfc8..376e3d1e70d86d2b1dd8c75308be317b47d420af 100755 (executable)
@@ -53,9 +53,11 @@ LiteralSearch* LiteralSearch::instantiate(LiteralSearch::Handle*, const uint8_t*
 void show_stats(PegCount*, const PegInfo*, unsigned, const char*) { }
 void show_stats(PegCount*, const PegInfo*, const IndexVec&, const char*, FILE*) { }
 
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) :
+HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_,
+    int32_t identifier_depth_) :
     uri_param(uri_param_), normalization_depth(normalization_depth_),
-    mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) {}
+    identifier_depth(identifier_depth_), mpse_otag(nullptr), mpse_attr(nullptr),
+    mpse_type(nullptr) {}
 HttpJsNorm::~HttpJsNorm() = default;
 void HttpJsNorm::configure() {}
 int64_t Parameter::get_int(char const*) { return 0; }
index 38fc2ddce3816fbacce50b0d89927ad24fe4cd64..632a5f5b48f4343d063f8380b92975cc12d0d5a8 100644 (file)
@@ -32,6 +32,8 @@ add_library ( utils OBJECT
     dnet_header.h
     dyn_array.cc
     dyn_array.h
+    js_identifier_ctx.cc
+    js_identifier_ctx.h
     js_normalizer.cc
     js_normalizer.h
     js_tokenizer.h
diff --git a/src/utils/js_identifier_ctx.cc b/src/utils/js_identifier_ctx.cc
new file mode 100644 (file)
index 0000000..308c7d7
--- /dev/null
@@ -0,0 +1,85 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_identifier_ctx.cc author Oleksandr Serhiienko <oserhiie@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "js_identifier_ctx.h"
+
+#ifndef CATCH_TEST_BUILD
+#include "service_inspectors/http_inspect/http_enum.h"
+#include "service_inspectors/http_inspect/http_module.h"
+#else
+namespace HttpEnums
+{
+enum PEG_COUNT
+{
+    PEG_JS_IDENTIFIER
+};
+}
+
+class HttpModule
+{
+public:
+    static void increment_peg_counts(HttpEnums::PEG_COUNT) {}
+};
+#endif // CATCH_TEST_BUILD
+
+#define FIRST_NAME_SIZE   26
+#define LAST_NAME_SIZE  9999
+
+static const char s_ident_first_names[FIRST_NAME_SIZE] =
+{
+    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
+};
+
+const char* JSIdentifierCtx::substitute(const char* identifier)
+{
+    const auto it = ident_names.find(identifier);
+    if (it != ident_names.end())
+        return it->second.c_str();
+
+    if (++ident_last_name > LAST_NAME_SIZE)
+    {
+        if (++ident_first_name > FIRST_NAME_SIZE - 1)
+            return nullptr;
+
+        ident_last_name = 0;
+    }
+
+    if (++unique_ident_cnt > depth)
+        return nullptr;
+
+    ident_names[identifier] = s_ident_first_names[ident_first_name]
+        + std::to_string(ident_last_name);
+
+    HttpModule::increment_peg_counts(HttpEnums::PEG_JS_IDENTIFIER);
+    return ident_names[identifier].c_str();
+}
+
+void JSIdentifierCtx::reset()
+{
+    ident_first_name = 0;
+    ident_last_name = -1;
+    unique_ident_cnt = 0;
+    ident_names.clear();
+}
+
diff --git a/src/utils/js_identifier_ctx.h b/src/utils/js_identifier_ctx.h
new file mode 100644 (file)
index 0000000..6a5add2
--- /dev/null
@@ -0,0 +1,58 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_identifier_ctx.h author Oleksandr Serhiienko <oserhiie@cisco.com>
+
+#ifndef JS_IDENTIFIER_CTX
+#define JS_IDENTIFIER_CTX
+
+#include <string>
+#include <unordered_map>
+
+class JSIdentifierCtxBase
+{
+public:
+    virtual ~JSIdentifierCtxBase() = default;
+
+    virtual const char* substitute(const char* identifier) = 0;
+    virtual void reset() = 0;
+    virtual size_t size() const = 0;
+};
+
+class JSIdentifierCtx : public JSIdentifierCtxBase
+{
+public:
+    JSIdentifierCtx(int32_t depth) : depth(depth) {}
+
+    const char* substitute(const char* identifier) override;
+    void reset() override;
+
+    // approximated to 500 unique mappings insertions
+    size_t size() const override
+    { return (sizeof(JSIdentifierCtx) + (sizeof(std::string) * 2 * 500)); }
+
+private:
+    int ident_first_name = 0;
+    int ident_last_name = -1;
+    int32_t unique_ident_cnt = 0;
+    int32_t depth;
+
+    std::unordered_map<std::string, std::string> ident_names;
+};
+
+#endif // JS_IDENTIFIER_CTX
+
index 7e4b1d9a24e13e25175f4b5b616e99a64b2b7df6..86d2d9ae51ecbf4e433d5aa6ad12e7d7d7214a4c 100644 (file)
 
 using namespace snort;
 
-JSNormalizer::JSNormalizer()
-    : depth(-1),
-      rem_bytes(-1),
+JSNormalizer::JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t norm_depth)
+    : depth(norm_depth),
+      rem_bytes(norm_depth),
       unlim(true),
       src_next(nullptr),
       dst_next(nullptr),
-      tokenizer(in, out)
+      tokenizer(in, out, js_ident_ctx)
 {
-}
-
-void JSNormalizer::set_depth(size_t new_depth)
-{
-    if (depth == new_depth)
-        return;
-
-    depth = new_depth;
-    rem_bytes = depth;
     unlim = depth == (size_t)-1;
 }
 
@@ -68,7 +59,9 @@ JSTokenizer::JSRet JSNormalizer::normalize(const char* src, size_t src_len, char
     if (!unlim)
         rem_bytes -= r_bytes;
     src_next = src + r_bytes;
-    dst_next = dst + w_bytes;
+
+    // avoid heap overflow if number of written bytes bigger than accepted dst_len
+    dst_next = (w_bytes <= dst_len) ? dst + w_bytes : dst + dst_len;
 
     return rem_bytes ? ret : JSTokenizer::EOS;
 }
index 75bd407685396e21b31aa9e45ec2d04868297759..13673e4a9470602dea1a7dc9efc6b66a957a18d3 100644 (file)
@@ -32,7 +32,7 @@ namespace snort
 class JSNormalizer
 {
 public:
-    JSNormalizer();
+    JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t depth);
 
     const char* get_src_next() const
     { return src_next; }
@@ -43,8 +43,6 @@ public:
     void reset_depth()
     { rem_bytes = depth; }
 
-    void set_depth(size_t depth);
-
     JSTokenizer::JSRet normalize(const char* src, size_t src_len, char* dst, size_t dst_len);
 
     static size_t size();
index 0e0fd2a27f8edabd0b2246b717f29f57c76ded1f..e2612ac109da7b7be8b51e8aecb580ee41de801b 100644 (file)
@@ -24,6 +24,8 @@
 
 #include "log/messages.h"
 
+class JSIdentifierCtxBase;
+
 class JSTokenizer : public yyFlexLexer
 {
 private:
@@ -46,10 +48,11 @@ public:
         SCRIPT_CONTINUE,
         OPENING_TAG,
         CLOSING_TAG,
-        BAD_TOKEN
+        BAD_TOKEN,
+        IDENTIFIER_OVERFLOW
     };
 
-    JSTokenizer(std::istream& in, std::ostream& out);
+    JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx);
     ~JSTokenizer() override;
 
     // returns JSRet
@@ -65,6 +68,7 @@ private:
     JSRet eval_eof();
     JSRet do_spacing(JSToken cur_token);
     JSRet do_operator_spacing(JSToken cur_token);
+    JSRet do_identifier_substitution(const char* lexeme);
     bool unescape(const char* lexeme);
 
 private:
@@ -73,6 +77,7 @@ private:
     std::stringstream tmp;
 
     JSToken token = UNDEFINED;
+    JSIdentifierCtxBase& ident_ctx;
 };
 
 #endif // JS_TOKENIZER_H
index 8649ff61189ede5b18c5796ca6fb65ca60067a84..8182d4379bcbcacf1a511ffd4adc5c6db8d3aff7 100644 (file)
@@ -30,6 +30,7 @@
     #include "config.h"
     #endif
 
+    #include "utils/js_identifier_ctx.h"
     #include "utils/js_tokenizer.h"
 
     #include <cassert>
@@ -989,7 +990,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 {KEYWORD}                           { EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
 {OPERATOR}                          { EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); }
 {LITERAL}                           { EXEC(do_spacing(LITERAL)) ECHO; BEGIN(divop); }
-{IDENTIFIER}                        { if (unescape(YYText())) { EXEC(do_spacing(IDENTIFIER)) ECHO; } BEGIN(divop); }
+{IDENTIFIER}                        { if (unescape(YYText())) { EXEC(do_spacing(IDENTIFIER)) EXEC(do_identifier_substitution(YYText())) } BEGIN(divop); }
 
 .|{ALL_UNICODE}                     { ECHO; token = UNDEFINED; BEGIN(INITIAL); }
 <<EOF>>                             { EXEC(eval_eof()) }
@@ -1072,8 +1073,9 @@ static std::string unescape_unicode(const char* lexeme)
 
 // JSTokenizer members
 
-JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out)
-    : yyFlexLexer(in, out)
+JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx)
+    : yyFlexLexer(in, out),
+      ident_ctx(ident_ctx)
 {
     BEGIN(regst);
 }
@@ -1164,6 +1166,19 @@ JSTokenizer::JSRet JSTokenizer::do_operator_spacing(JSToken cur_token)
     return BAD_TOKEN;
 }
 
+JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme)
+{
+    const char* ident = ident_ctx.substitute(lexeme);
+
+    if (ident)
+    {
+        yyout << ident;
+        return EOS;
+    }
+
+    return IDENTIFIER_OVERFLOW;
+}
+
 bool JSTokenizer::unescape(const char* lexeme)
 {
     if ( strstr(lexeme, "\\u") )
index 816907aa4682565c9d6cd56217f8bcb5e08b7cb8..2a092f3231b2e8fce5d0f02952fcb8e5278041d6 100644 (file)
@@ -13,7 +13,13 @@ FLEX_TARGET ( js_tokenizer ${CMAKE_CURRENT_SOURCE_DIR}/../js_tokenizer.l
 add_catch_test( js_normalizer_test
     SOURCES
         ${FLEX_js_tokenizer_OUTPUTS}
+        ../js_identifier_ctx.cc
         ../js_normalizer.cc
         ../util_cstring.cc
 )
 
+add_catch_test( js_identifier_ctx_test
+    SOURCES
+        ../js_identifier_ctx.cc
+)
+
diff --git a/src/utils/test/js_identifier_ctx_test.cc b/src/utils/test/js_identifier_ctx_test.cc
new file mode 100644 (file)
index 0000000..89f0252
--- /dev/null
@@ -0,0 +1,93 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_identifier_ctx_test.cc author Oleksandr Serhiienko <oserhiie@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "catch/catch.hpp"
+
+#include <cstring>
+#include <vector>
+
+#include "utils/js_identifier_ctx.h"
+
+#define DEPTH 260000
+
+#define FIRST_NAME_SIZE   26
+#define LAST_NAME_SIZE  9999
+
+static const char s_ident_first_names[FIRST_NAME_SIZE] =
+{
+    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
+};
+
+TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]")
+{
+    SECTION("same name")
+    {
+        JSIdentifierCtx ident_ctx(DEPTH);
+
+        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+    }
+    SECTION("different names")
+    {
+        JSIdentifierCtx ident_ctx(DEPTH);
+
+        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+        CHECK(!strcmp(ident_ctx.substitute("b"), "a1"));
+        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+    }
+    SECTION("depth reached")
+    {
+        JSIdentifierCtx ident_ctx(2);
+
+        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+        CHECK(!strcmp(ident_ctx.substitute("b"), "a1"));
+        CHECK(ident_ctx.substitute("c") == nullptr);
+        CHECK(ident_ctx.substitute("d") == nullptr);
+        CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+    }
+    SECTION("max names")
+    {
+        JSIdentifierCtx ident_ctx(DEPTH + 2);
+
+        std::vector<std::string> n, e;
+        n.reserve(DEPTH + 2);
+        e.reserve(DEPTH);
+
+        for (int it = 0; it < DEPTH + 2; ++it)
+            n.push_back("n" + std::to_string(it));
+
+        for (int it_first = 0; it_first < FIRST_NAME_SIZE; ++it_first)
+        {
+            for (int it_last = 0; it_last <= LAST_NAME_SIZE; ++it_last)
+                e.push_back(s_ident_first_names[it_first] + std::to_string(it_last));
+        }
+
+        for (int it = 0; it < DEPTH; ++it)
+            CHECK(!strcmp(ident_ctx.substitute(n[it].c_str()), e[it].c_str()));
+
+        CHECK(ident_ctx.substitute(n[DEPTH].c_str()) == nullptr);
+        CHECK(ident_ctx.substitute(n[DEPTH + 1].c_str()) == nullptr);
+    }
+}
+
index b66d77766be7d7963950c9780245f8a5a6887739..7c27c51a0f506f94a4d844bd72be57a093467b0c 100644 (file)
@@ -25,6 +25,7 @@
 
 #include <cstring>
 
+#include "utils/js_identifier_ctx.h"
 #include "utils/js_normalizer.h"
 
 namespace snort
@@ -34,17 +35,28 @@ namespace snort
 { exit(EXIT_FAILURE); }
 }
 
+class JSIdentifierCtxTest : public JSIdentifierCtxBase
+{
+public:
+    JSIdentifierCtxTest() = default;
+
+    const char* substitute(const char* identifier) override
+    { return identifier; }
+    void reset() override {}
+    size_t size() const override {}
+};
+
 using namespace snort;
 
 #define DEPTH 65535
 
-#define NORMALIZE(src, expected)                                    \
-    char dst[sizeof(expected)];                                     \
-    JSNormalizer norm;                                              \
-    norm.set_depth(DEPTH);                                          \
-    auto ret = norm.normalize(src, sizeof(src), dst, sizeof(dst));  \
-    const char* ptr = norm.get_src_next();                          \
-    int act_len = norm.get_dst_next() - dst;                        \
+#define NORMALIZE(src, expected)                                   \
+    char dst[sizeof(expected)];                                    \
+    JSIdentifierCtxTest ident_ctx;                                 \
+    JSNormalizer norm(ident_ctx, DEPTH);                           \
+    auto ret = norm.normalize(src, sizeof(src), dst, sizeof(dst)); \
+    const char* ptr = norm.get_src_next();                         \
+    int act_len = norm.get_dst_next() - dst;
 
 #define VALIDATE(src, expected)                 \
     CHECK(ret == JSTokenizer::SCRIPT_CONTINUE); \
@@ -52,20 +64,20 @@ using namespace snort;
     CHECK(act_len == sizeof(expected) - 1);     \
     CHECK(!memcmp(dst, expected, act_len));
 
-#define VALIDATE_FAIL(src, expected, ret_code, ptr_offset)  \
-    CHECK(ret == ret_code);                                 \
-    CHECK((ptr - src) == ptr_offset);                       \
-    CHECK(act_len == sizeof(expected) - 1);                 \
+#define VALIDATE_FAIL(src, expected, ret_code, ptr_offset) \
+    CHECK(ret == ret_code);                                \
+    CHECK((ptr - src) == ptr_offset);                      \
+    CHECK(act_len == sizeof(expected) - 1);                \
     CHECK(!memcmp(dst, expected, act_len));
 
-#define NORMALIZE_L(src, src_len, dst, dst_len, depth, ret, ptr, len)   \
-    {                                                                   \
-        JSNormalizer norm;                                              \
-        norm.set_depth(depth);                                          \
-        ret = norm.normalize(src, src_len, dst, dst_len);               \
-        ptr = norm.get_src_next();                                      \
-        len = norm.get_dst_next() - dst;                                \
-    }                                                                   \
+#define NORMALIZE_L(src, src_len, dst, dst_len, depth, ret, ptr, len) \
+    {                                                                 \
+        JSIdentifierCtxTest ident_ctx;                                \
+        JSNormalizer norm(ident_ctx, depth);                          \
+        ret = norm.normalize(src, src_len, dst, dst_len);             \
+        ptr = norm.get_src_next();                                    \
+        len = norm.get_dst_next() - dst;                              \
+    }
 
 // ClamAV test cases
 static const char clamav_buf0[] =
@@ -869,9 +881,8 @@ TEST_CASE("endings", "[JSNormalizer]")
         const char* ptr;
         int ret;
 
-        JSNormalizer norm;
-
-        norm.set_depth(7);
+        JSIdentifierCtxTest ident_ctx;
+        JSNormalizer norm(ident_ctx, 7);
         ret = norm.normalize(src, sizeof(src), dst, sizeof(dst));
         ptr = norm.get_src_next();
         act_len = norm.get_dst_next() - dst;
@@ -902,7 +913,7 @@ TEST_CASE("endings", "[JSNormalizer]")
 
         CHECK(ret == JSTokenizer::SCRIPT_CONTINUE);
         CHECK(ptr == src + sizeof(src));
-        CHECK(act_len == 12); // size of normalized src
+        CHECK(act_len == 7); // size of normalized src
         CHECK(!memcmp(dst, expected, sizeof(dst)));
     }
 }
@@ -1239,3 +1250,4 @@ TEST_CASE("nested script tags", "[JSNormalizer]")
         VALIDATE_FAIL(unexpected_tag_buf24, unexpected_tag_expected24, JSTokenizer::OPENING_TAG, 39);
     }
 }
+