From: Mike Stepanek (mstepane) Date: Mon, 9 Aug 2021 10:30:22 +0000 (+0000) Subject: Merge pull request #2992 in SNORT/snort3 from ~OSERHIIE/snort3:js_identifier_norm... X-Git-Tag: 3.1.10.0~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c9e33e84aa7ef9d108934ef90496655430f5ecb4;p=thirdparty%2Fsnort3.git Merge pull request #2992 in SNORT/snort3 from ~OSERHIIE/snort3:js_identifier_norm to master Squashed commit of the following: commit 6cbd5f096fd4531ec454edbbadc707919258e847 Author: Oleksandr Serhiienko Date: Mon Jul 12 13:39:20 2021 +0300 http_inspect: add JavaScript identifiers normalization * utils: add identifiers normalization to js_tokenizer and js_identifier_ctx as a context of identifiers normalization * utils: adjust js_normalizer_test unit tests * utils: add js_identifier_ctx_test unit tests * http_inspect: add js_norm_identifier_depth config option * http_inspect: add JS_IDENTIFIER_OVERFLOW built-in alert * http_inspect: add js_identifiers and js_identifier_overflows peg counts * http_inspect: update dev_notes.txt --- diff --git a/src/service_inspectors/http_inspect/dev_notes.txt b/src/service_inspectors/http_inspect/dev_notes.txt index ace30ff37..305ab6dd6 100755 --- a/src/service_inspectors/http_inspect/dev_notes.txt +++ b/src/service_inspectors/http_inspect/dev_notes.txt @@ -220,7 +220,7 @@ During message body analysis the Enhanced Normalizer does one of the following: subsequent bytes in a stream mode, until it finds a closing tag. It proceeds and scans the entire message body for inline scripts. -Enhanced Normalizer is a stateful JavaScript whitespace normalizer. +Enhanced Normalizer is a stateful JavaScript whitespace and identifiers normalizer. So, the following whitespace codes will be normalized: * \u0009 Tab * \u000B Vertical Tab @@ -231,6 +231,13 @@ So, the following whitespace codes will be normalized: * Any other Unicode “space separator” * Also including new-line and carriage-return line-break characters +All JavaScript identifier names will be substituted to unified names with the +following format: a0 -> z9999. So, the number of unique identifiers available +is 260000 names per HTTP transaction. If Normalizer overruns the configured +limit, built-in alert generated. Additionaly, there is a config option to +specify the limit manually: + * http_inspect.js_norm_identifier_depth. + Additionally, Normalizer validates the syntax with respect to ECMA-262 Standard, and checks for restrictions for contents of script elements (since, it is HTML-embedded JavaScript). diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h index 31b60846e..47177cb73 100755 --- a/src/service_inspectors/http_inspect/http_enum.h +++ b/src/service_inspectors/http_inspect/http_enum.h @@ -64,7 +64,7 @@ enum PEG_COUNT { PEG_FLOW = 0, PEG_SCAN, PEG_REASSEMBLE, PEG_INSPECT, PEG_REQUES PEG_CONCURRENT_SESSIONS, PEG_MAX_CONCURRENT_SESSIONS, PEG_SCRIPT_DETECTION, PEG_PARTIAL_INSPECT, PEG_EXCESS_PARAMS, PEG_PARAMS, PEG_CUTOVERS, PEG_SSL_SEARCH_ABND_EARLY, PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_JS_INLINE, PEG_JS_EXTERNAL, - PEG_JS_BYTES, PEG_COUNT_MAX }; + PEG_JS_BYTES, PEG_JS_IDENTIFIER, PEG_JS_IDENTIFIER_OVERFLOW, PEG_COUNT_MAX }; // Result of scanning by splitter enum ScanResult { SCAN_NOT_FOUND, SCAN_NOT_FOUND_ACCELERATE, SCAN_FOUND, SCAN_FOUND_PIECE, @@ -271,6 +271,7 @@ enum Infraction INF_JS_CLOSING_TAG, INF_JS_CODE_IN_EXTERNAL, INF_JS_SHORTENED_TAG, + INF_JS_IDENTIFIER_OVERFLOW, INF__MAX_VALUE }; @@ -399,6 +400,7 @@ enum EventSid EVENT_JS_CLOSING_TAG = 267, EVENT_JS_CODE_IN_EXTERNAL = 268, EVENT_JS_SHORTENED_TAG = 269, + EVENT_JS_IDENTIFIER_OVERFLOW = 270, EVENT__MAX_VALUE }; diff --git a/src/service_inspectors/http_inspect/http_flow_data.cc b/src/service_inspectors/http_inspect/http_flow_data.cc index 3e090b35f..777b7ab00 100644 --- a/src/service_inspectors/http_inspect/http_flow_data.cc +++ b/src/service_inspectors/http_inspect/http_flow_data.cc @@ -25,6 +25,7 @@ #include "decompress/file_decomp.h" #include "service_inspectors/http2_inspect/http2_flow_data.h" +#include "utils/js_identifier_ctx.h" #include "utils/js_normalizer.h" #include "http_cutter.h" @@ -91,6 +92,11 @@ HttpFlowData::~HttpFlowData() HttpModule::decrement_peg_counts(PEG_CONCURRENT_SESSIONS); #ifndef UNIT_TEST_BUILD + if (js_ident_ctx) + { + update_deallocations(js_ident_ctx->size()); + delete js_ident_ctx; + } if (js_normalizer) { update_deallocations(JSNormalizer::size()); @@ -231,12 +237,24 @@ void HttpFlowData::garbage_collect() } #ifndef UNIT_TEST_BUILD -snort::JSNormalizer& HttpFlowData::acquire_js_ctx() +void HttpFlowData::reset_js_ident_ctx() +{ + if (js_ident_ctx) + js_ident_ctx->reset(); +} + +snort::JSNormalizer& HttpFlowData::acquire_js_ctx(int32_t ident_depth, size_t norm_depth) { if (js_normalizer) return *js_normalizer; - js_normalizer = new JSNormalizer(); + if (!js_ident_ctx) + { + js_ident_ctx = new JSIdentifierCtx(ident_depth); + update_allocations(js_ident_ctx->size()); + } + + js_normalizer = new JSNormalizer(*js_ident_ctx, norm_depth); update_allocations(JSNormalizer::size()); return *js_normalizer; @@ -252,7 +270,9 @@ void HttpFlowData::release_js_ctx() js_normalizer = nullptr; } #else -snort::JSNormalizer& HttpFlowData::acquire_js_ctx() { return *js_normalizer; } +void HttpFlowData::reset_js_ident_ctx() {} +snort::JSNormalizer& HttpFlowData::acquire_js_ctx(int32_t, size_t) +{ return *js_normalizer; } void HttpFlowData::release_js_ctx() {} #endif diff --git a/src/service_inspectors/http_inspect/http_flow_data.h b/src/service_inspectors/http_inspect/http_flow_data.h index 38fd4ee71..2a1dfc148 100644 --- a/src/service_inspectors/http_inspect/http_flow_data.h +++ b/src/service_inspectors/http_inspect/http_flow_data.h @@ -38,6 +38,7 @@ class HttpJsNorm; class HttpMsgSection; class HttpCutter; class HttpQueryParser; +class JSIdentifierCtxBase; namespace snort { @@ -193,10 +194,12 @@ private: bool ssl_search_abandoned = false; // *** HttpJsNorm + JSIdentifierCtxBase* js_ident_ctx = nullptr; snort::JSNormalizer* js_normalizer = nullptr; bool js_built_in_event = false; - snort::JSNormalizer& acquire_js_ctx(); + void reset_js_ident_ctx(); + snort::JSNormalizer& acquire_js_ctx(int32_t ident_depth, size_t norm_depth); void release_js_ctx(); // *** Transaction management including pipelining diff --git a/src/service_inspectors/http_inspect/http_inspect.cc b/src/service_inspectors/http_inspect/http_inspect.cc index 29b646158..cdcaefac8 100755 --- a/src/service_inspectors/http_inspect/http_inspect.cc +++ b/src/service_inspectors/http_inspect/http_inspect.cc @@ -160,6 +160,7 @@ void HttpInspect::show(const SnortConfig*) const params->js_norm_param.max_javascript_whitespaces); ConfigLogger::log_value("js_normalization_depth", params->js_norm_param.js_normalization_depth); + ConfigLogger::log_value("js_norm_identifier_depth", params->js_norm_param.js_identifier_depth); ConfigLogger::log_value("bad_characters", bad_chars.c_str()); ConfigLogger::log_value("ignore_unreserved", unreserved_chars.c_str()); ConfigLogger::log_flag("percent_u", params->uri_param.percent_u); diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc index a851f85f7..f1536d381 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.cc +++ b/src/service_inspectors/http_inspect/http_js_norm.cc @@ -47,9 +47,11 @@ static inline JSTokenizer::JSRet js_normalize(JSNormalizer& ctx, const char* con return ret; } -HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) : +HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_, + int32_t identifier_depth_) : uri_param(uri_param_), normalization_depth(normalization_depth_), + identifier_depth(identifier_depth_), mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) @@ -125,8 +127,7 @@ void HttpJsNorm::enhanced_external_normalize(const Field& input, Field& output, dst_end = buffer + len; } - auto& ctx = ssn->acquire_js_ctx(); - ctx.set_depth(normalization_depth); + auto& ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth); auto ret = js_normalize(ctx, end, dst_end, ptr, dst); switch (ret) @@ -150,6 +151,12 @@ void HttpJsNorm::enhanced_external_normalize(const Field& input, Field& output, events->create_event(EVENT_JS_BAD_TOKEN); ssn->js_built_in_event = true; break; + case JSTokenizer::IDENTIFIER_OVERFLOW: + HttpModule::increment_peg_counts(PEG_JS_IDENTIFIER_OVERFLOW); + *infractions += INF_JS_IDENTIFIER_OVERFLOW; + events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW); + ssn->js_built_in_event = true; + break; default: assert(false); break; @@ -228,8 +235,7 @@ void HttpJsNorm::enhanced_inline_normalize(const Field& input, Field& output, dst_end = buffer + len; } - auto& ctx = ssn->acquire_js_ctx(); - ctx.set_depth(normalization_depth); + auto& ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth); auto dst_before = dst; auto ret = js_normalize(ctx, end, dst_end, ptr, dst); @@ -260,6 +266,12 @@ void HttpJsNorm::enhanced_inline_normalize(const Field& input, Field& output, events->create_event(EVENT_JS_BAD_TOKEN); script_continue = false; break; + case JSTokenizer::IDENTIFIER_OVERFLOW: + HttpModule::increment_peg_counts(PEG_JS_IDENTIFIER_OVERFLOW); + *infractions += INF_JS_IDENTIFIER_OVERFLOW; + events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW); + script_continue = false; + break; default: assert(false); script_continue = false; diff --git a/src/service_inspectors/http_inspect/http_js_norm.h b/src/service_inspectors/http_inspect/http_js_norm.h index 38f539984..c21c2462a 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.h +++ b/src/service_inspectors/http_inspect/http_js_norm.h @@ -36,7 +36,8 @@ class HttpJsNorm { public: - HttpJsNorm(const HttpParaList::UriParam&, int64_t normalization_depth); + HttpJsNorm(const HttpParaList::UriParam&, int64_t normalization_depth, + int32_t identifier_depth); ~HttpJsNorm(); void legacy_normalize(const Field& input, Field& output, HttpInfractions*, HttpEventGen*, @@ -59,6 +60,7 @@ private: const HttpParaList::UriParam& uri_param; int64_t normalization_depth; + int32_t identifier_depth; bool configure_once = false; snort::SearchTool* mpse_otag; diff --git a/src/service_inspectors/http_inspect/http_module.cc b/src/service_inspectors/http_inspect/http_module.cc index ece3f5a6e..c0d8e1184 100755 --- a/src/service_inspectors/http_inspect/http_module.cc +++ b/src/service_inspectors/http_inspect/http_module.cc @@ -74,8 +74,13 @@ const Parameter HttpModule::http_params[] = "use legacy normalizer to normalize JavaScript in response bodies" }, { "js_normalization_depth", Parameter::PT_INT, "-1:max53", "0", - "number of input JavaScript bytes to normalize with enhanced normalizer " - "(-1 max allowed value) (experimental)" }, + "enable enhanced normalizer (0 is disabled); " + "number of input JavaScript bytes to normalize (-1 unlimited) " + "(experimental)" }, + + // range of accepted identifier names is (a0:z9999), so the max is 26 * 10000 = 260000 + { "js_norm_identifier_depth", Parameter::PT_INT, "0:260000", "260000", + "max number of unique JavaScript identifiers to normalize" }, { "max_javascript_whitespaces", Parameter::PT_INT, "1:65535", "200", "maximum consecutive whitespaces allowed within the JavaScript obfuscated data" }, @@ -206,6 +211,10 @@ bool HttpModule::set(const char*, Value& val, SnortConfig*) params->js_norm_param.is_javascript_normalization or params->js_norm_param.normalize_javascript; } + else if (val.is("js_norm_identifier_depth")) + { + params->js_norm_param.js_identifier_depth = val.get_int32(); + } else if (val.is("js_normalization_depth")) { int64_t v = val.get_int64(); @@ -400,7 +409,8 @@ bool HttpModule::end(const char*, int, SnortConfig*) ParseError("Cannot use normalize_javascript and js_normalization_depth together."); if ( params->js_norm_param.is_javascript_normalization ) - params->js_norm_param.js_norm = new HttpJsNorm(params->uri_param, params->js_norm_param.js_normalization_depth); + params->js_norm_param.js_norm = new HttpJsNorm(params->uri_param, + params->js_norm_param.js_normalization_depth, params->js_norm_param.js_identifier_depth); params->script_detection_handle = script_detection_handle; diff --git a/src/service_inspectors/http_inspect/http_module.h b/src/service_inspectors/http_inspect/http_module.h index e716ed93f..e1297abb9 100755 --- a/src/service_inspectors/http_inspect/http_module.h +++ b/src/service_inspectors/http_inspect/http_module.h @@ -56,6 +56,7 @@ public: bool normalize_javascript = false; bool is_javascript_normalization = false; int64_t js_normalization_depth = 0; + int32_t js_identifier_depth = 0; int max_javascript_whitespaces = 200; class HttpJsNorm* js_norm = nullptr; }; diff --git a/src/service_inspectors/http_inspect/http_msg_request.cc b/src/service_inspectors/http_inspect/http_msg_request.cc index 3535d93a8..3d4587a09 100644 --- a/src/service_inspectors/http_inspect/http_msg_request.cc +++ b/src/service_inspectors/http_inspect/http_msg_request.cc @@ -41,6 +41,7 @@ HttpMsgRequest::HttpMsgRequest(const uint8_t* buffer, const uint16_t buf_size, transaction->set_request(this); get_related_sections(); session_data->release_js_ctx(); + session_data->reset_js_ident_ctx(); } HttpMsgRequest::~HttpMsgRequest() diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc index ab1ba12aa..1177839b7 100755 --- a/src/service_inspectors/http_inspect/http_tables.cc +++ b/src/service_inspectors/http_inspect/http_tables.cc @@ -432,6 +432,7 @@ const RuleMap HttpModule::http_events[] = { EVENT_JS_CLOSING_TAG, "unexpected script closing tag in JavaScript" }, { EVENT_JS_CODE_IN_EXTERNAL, "JavaScript code under the external script tags" }, { EVENT_JS_SHORTENED_TAG, "script opening tag in a short form" }, + { EVENT_JS_IDENTIFIER_OVERFLOW, "max number of unique JavaScript identifiers reached" }, { 0, nullptr } }; @@ -471,6 +472,9 @@ const PegInfo HttpModule::peg_names[PEG_COUNT_MAX+1] = { CountType::SUM, "js_inline_scripts", "total number of inline JavaScripts processed" }, { CountType::SUM, "js_external_scripts", "total number of external JavaScripts processed" }, { CountType::SUM, "js_bytes", "total number of JavaScript bytes processed" }, + { CountType::SUM, "js_identifiers", "total number of unique JavaScript identifiers processed" }, + { CountType::SUM, "js_identifier_overflows", "total number of unique JavaScript identifier " + "limit overflows" }, { CountType::END, nullptr, nullptr } }; diff --git a/src/service_inspectors/http_inspect/test/http_module_test.cc b/src/service_inspectors/http_inspect/test/http_module_test.cc index 23d35c7e0..134377823 100755 --- a/src/service_inspectors/http_inspect/test/http_module_test.cc +++ b/src/service_inspectors/http_inspect/test/http_module_test.cc @@ -64,9 +64,11 @@ int32_t substr_to_code(const uint8_t*, const int32_t, const StrCode []) { return long HttpTestManager::print_amount {}; bool HttpTestManager::print_hex {}; -HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) : +HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_, + int32_t identifier_depth_) : uri_param(uri_param_), normalization_depth(normalization_depth_), - mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) {} + identifier_depth(identifier_depth_), mpse_otag(nullptr), mpse_attr(nullptr), + mpse_type(nullptr) {} HttpJsNorm::~HttpJsNorm() = default; void HttpJsNorm::configure(){} int64_t Parameter::get_int(char const*) { return 0; } diff --git a/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc b/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc index 3982153e4..376e3d1e7 100755 --- a/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc +++ b/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc @@ -53,9 +53,11 @@ LiteralSearch* LiteralSearch::instantiate(LiteralSearch::Handle*, const uint8_t* void show_stats(PegCount*, const PegInfo*, unsigned, const char*) { } void show_stats(PegCount*, const PegInfo*, const IndexVec&, const char*, FILE*) { } -HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_) : +HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_, + int32_t identifier_depth_) : uri_param(uri_param_), normalization_depth(normalization_depth_), - mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) {} + identifier_depth(identifier_depth_), mpse_otag(nullptr), mpse_attr(nullptr), + mpse_type(nullptr) {} HttpJsNorm::~HttpJsNorm() = default; void HttpJsNorm::configure() {} int64_t Parameter::get_int(char const*) { return 0; } diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index 38fc2ddce..632a5f5b4 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -32,6 +32,8 @@ add_library ( utils OBJECT dnet_header.h dyn_array.cc dyn_array.h + js_identifier_ctx.cc + js_identifier_ctx.h js_normalizer.cc js_normalizer.h js_tokenizer.h diff --git a/src/utils/js_identifier_ctx.cc b/src/utils/js_identifier_ctx.cc new file mode 100644 index 000000000..308c7d7fb --- /dev/null +++ b/src/utils/js_identifier_ctx.cc @@ -0,0 +1,85 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// js_identifier_ctx.cc author Oleksandr Serhiienko + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "js_identifier_ctx.h" + +#ifndef CATCH_TEST_BUILD +#include "service_inspectors/http_inspect/http_enum.h" +#include "service_inspectors/http_inspect/http_module.h" +#else +namespace HttpEnums +{ +enum PEG_COUNT +{ + PEG_JS_IDENTIFIER +}; +} + +class HttpModule +{ +public: + static void increment_peg_counts(HttpEnums::PEG_COUNT) {} +}; +#endif // CATCH_TEST_BUILD + +#define FIRST_NAME_SIZE 26 +#define LAST_NAME_SIZE 9999 + +static const char s_ident_first_names[FIRST_NAME_SIZE] = +{ + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' +}; + +const char* JSIdentifierCtx::substitute(const char* identifier) +{ + const auto it = ident_names.find(identifier); + if (it != ident_names.end()) + return it->second.c_str(); + + if (++ident_last_name > LAST_NAME_SIZE) + { + if (++ident_first_name > FIRST_NAME_SIZE - 1) + return nullptr; + + ident_last_name = 0; + } + + if (++unique_ident_cnt > depth) + return nullptr; + + ident_names[identifier] = s_ident_first_names[ident_first_name] + + std::to_string(ident_last_name); + + HttpModule::increment_peg_counts(HttpEnums::PEG_JS_IDENTIFIER); + return ident_names[identifier].c_str(); +} + +void JSIdentifierCtx::reset() +{ + ident_first_name = 0; + ident_last_name = -1; + unique_ident_cnt = 0; + ident_names.clear(); +} + diff --git a/src/utils/js_identifier_ctx.h b/src/utils/js_identifier_ctx.h new file mode 100644 index 000000000..6a5add294 --- /dev/null +++ b/src/utils/js_identifier_ctx.h @@ -0,0 +1,58 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// js_identifier_ctx.h author Oleksandr Serhiienko + +#ifndef JS_IDENTIFIER_CTX +#define JS_IDENTIFIER_CTX + +#include +#include + +class JSIdentifierCtxBase +{ +public: + virtual ~JSIdentifierCtxBase() = default; + + virtual const char* substitute(const char* identifier) = 0; + virtual void reset() = 0; + virtual size_t size() const = 0; +}; + +class JSIdentifierCtx : public JSIdentifierCtxBase +{ +public: + JSIdentifierCtx(int32_t depth) : depth(depth) {} + + const char* substitute(const char* identifier) override; + void reset() override; + + // approximated to 500 unique mappings insertions + size_t size() const override + { return (sizeof(JSIdentifierCtx) + (sizeof(std::string) * 2 * 500)); } + +private: + int ident_first_name = 0; + int ident_last_name = -1; + int32_t unique_ident_cnt = 0; + int32_t depth; + + std::unordered_map ident_names; +}; + +#endif // JS_IDENTIFIER_CTX + diff --git a/src/utils/js_normalizer.cc b/src/utils/js_normalizer.cc index 7e4b1d9a2..86d2d9ae5 100644 --- a/src/utils/js_normalizer.cc +++ b/src/utils/js_normalizer.cc @@ -25,23 +25,14 @@ using namespace snort; -JSNormalizer::JSNormalizer() - : depth(-1), - rem_bytes(-1), +JSNormalizer::JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t norm_depth) + : depth(norm_depth), + rem_bytes(norm_depth), unlim(true), src_next(nullptr), dst_next(nullptr), - tokenizer(in, out) + tokenizer(in, out, js_ident_ctx) { -} - -void JSNormalizer::set_depth(size_t new_depth) -{ - if (depth == new_depth) - return; - - depth = new_depth; - rem_bytes = depth; unlim = depth == (size_t)-1; } @@ -68,7 +59,9 @@ JSTokenizer::JSRet JSNormalizer::normalize(const char* src, size_t src_len, char if (!unlim) rem_bytes -= r_bytes; src_next = src + r_bytes; - dst_next = dst + w_bytes; + + // avoid heap overflow if number of written bytes bigger than accepted dst_len + dst_next = (w_bytes <= dst_len) ? dst + w_bytes : dst + dst_len; return rem_bytes ? ret : JSTokenizer::EOS; } diff --git a/src/utils/js_normalizer.h b/src/utils/js_normalizer.h index 75bd40768..13673e4a9 100644 --- a/src/utils/js_normalizer.h +++ b/src/utils/js_normalizer.h @@ -32,7 +32,7 @@ namespace snort class JSNormalizer { public: - JSNormalizer(); + JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t depth); const char* get_src_next() const { return src_next; } @@ -43,8 +43,6 @@ public: void reset_depth() { rem_bytes = depth; } - void set_depth(size_t depth); - JSTokenizer::JSRet normalize(const char* src, size_t src_len, char* dst, size_t dst_len); static size_t size(); diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h index 0e0fd2a27..e2612ac10 100644 --- a/src/utils/js_tokenizer.h +++ b/src/utils/js_tokenizer.h @@ -24,6 +24,8 @@ #include "log/messages.h" +class JSIdentifierCtxBase; + class JSTokenizer : public yyFlexLexer { private: @@ -46,10 +48,11 @@ public: SCRIPT_CONTINUE, OPENING_TAG, CLOSING_TAG, - BAD_TOKEN + BAD_TOKEN, + IDENTIFIER_OVERFLOW }; - JSTokenizer(std::istream& in, std::ostream& out); + JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx); ~JSTokenizer() override; // returns JSRet @@ -65,6 +68,7 @@ private: JSRet eval_eof(); JSRet do_spacing(JSToken cur_token); JSRet do_operator_spacing(JSToken cur_token); + JSRet do_identifier_substitution(const char* lexeme); bool unescape(const char* lexeme); private: @@ -73,6 +77,7 @@ private: std::stringstream tmp; JSToken token = UNDEFINED; + JSIdentifierCtxBase& ident_ctx; }; #endif // JS_TOKENIZER_H diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l index 8649ff611..8182d4379 100644 --- a/src/utils/js_tokenizer.l +++ b/src/utils/js_tokenizer.l @@ -30,6 +30,7 @@ #include "config.h" #endif + #include "utils/js_identifier_ctx.h" #include "utils/js_tokenizer.h" #include @@ -989,7 +990,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {KEYWORD} { EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); } {OPERATOR} { EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); } {LITERAL} { EXEC(do_spacing(LITERAL)) ECHO; BEGIN(divop); } -{IDENTIFIER} { if (unescape(YYText())) { EXEC(do_spacing(IDENTIFIER)) ECHO; } BEGIN(divop); } +{IDENTIFIER} { if (unescape(YYText())) { EXEC(do_spacing(IDENTIFIER)) EXEC(do_identifier_substitution(YYText())) } BEGIN(divop); } .|{ALL_UNICODE} { ECHO; token = UNDEFINED; BEGIN(INITIAL); } <> { EXEC(eval_eof()) } @@ -1072,8 +1073,9 @@ static std::string unescape_unicode(const char* lexeme) // JSTokenizer members -JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out) - : yyFlexLexer(in, out) +JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx) + : yyFlexLexer(in, out), + ident_ctx(ident_ctx) { BEGIN(regst); } @@ -1164,6 +1166,19 @@ JSTokenizer::JSRet JSTokenizer::do_operator_spacing(JSToken cur_token) return BAD_TOKEN; } +JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme) +{ + const char* ident = ident_ctx.substitute(lexeme); + + if (ident) + { + yyout << ident; + return EOS; + } + + return IDENTIFIER_OVERFLOW; +} + bool JSTokenizer::unescape(const char* lexeme) { if ( strstr(lexeme, "\\u") ) diff --git a/src/utils/test/CMakeLists.txt b/src/utils/test/CMakeLists.txt index 816907aa4..2a092f323 100644 --- a/src/utils/test/CMakeLists.txt +++ b/src/utils/test/CMakeLists.txt @@ -13,7 +13,13 @@ FLEX_TARGET ( js_tokenizer ${CMAKE_CURRENT_SOURCE_DIR}/../js_tokenizer.l add_catch_test( js_normalizer_test SOURCES ${FLEX_js_tokenizer_OUTPUTS} + ../js_identifier_ctx.cc ../js_normalizer.cc ../util_cstring.cc ) +add_catch_test( js_identifier_ctx_test + SOURCES + ../js_identifier_ctx.cc +) + diff --git a/src/utils/test/js_identifier_ctx_test.cc b/src/utils/test/js_identifier_ctx_test.cc new file mode 100644 index 000000000..89f025250 --- /dev/null +++ b/src/utils/test/js_identifier_ctx_test.cc @@ -0,0 +1,93 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// js_identifier_ctx_test.cc author Oleksandr Serhiienko + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "catch/catch.hpp" + +#include +#include + +#include "utils/js_identifier_ctx.h" + +#define DEPTH 260000 + +#define FIRST_NAME_SIZE 26 +#define LAST_NAME_SIZE 9999 + +static const char s_ident_first_names[FIRST_NAME_SIZE] = +{ + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' +}; + +TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]") +{ + SECTION("same name") + { + JSIdentifierCtx ident_ctx(DEPTH); + + CHECK(!strcmp(ident_ctx.substitute("a"), "a0")); + CHECK(!strcmp(ident_ctx.substitute("a"), "a0")); + } + SECTION("different names") + { + JSIdentifierCtx ident_ctx(DEPTH); + + CHECK(!strcmp(ident_ctx.substitute("a"), "a0")); + CHECK(!strcmp(ident_ctx.substitute("b"), "a1")); + CHECK(!strcmp(ident_ctx.substitute("a"), "a0")); + } + SECTION("depth reached") + { + JSIdentifierCtx ident_ctx(2); + + CHECK(!strcmp(ident_ctx.substitute("a"), "a0")); + CHECK(!strcmp(ident_ctx.substitute("b"), "a1")); + CHECK(ident_ctx.substitute("c") == nullptr); + CHECK(ident_ctx.substitute("d") == nullptr); + CHECK(!strcmp(ident_ctx.substitute("a"), "a0")); + } + SECTION("max names") + { + JSIdentifierCtx ident_ctx(DEPTH + 2); + + std::vector n, e; + n.reserve(DEPTH + 2); + e.reserve(DEPTH); + + for (int it = 0; it < DEPTH + 2; ++it) + n.push_back("n" + std::to_string(it)); + + for (int it_first = 0; it_first < FIRST_NAME_SIZE; ++it_first) + { + for (int it_last = 0; it_last <= LAST_NAME_SIZE; ++it_last) + e.push_back(s_ident_first_names[it_first] + std::to_string(it_last)); + } + + for (int it = 0; it < DEPTH; ++it) + CHECK(!strcmp(ident_ctx.substitute(n[it].c_str()), e[it].c_str())); + + CHECK(ident_ctx.substitute(n[DEPTH].c_str()) == nullptr); + CHECK(ident_ctx.substitute(n[DEPTH + 1].c_str()) == nullptr); + } +} + diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc index b66d77766..7c27c51a0 100644 --- a/src/utils/test/js_normalizer_test.cc +++ b/src/utils/test/js_normalizer_test.cc @@ -25,6 +25,7 @@ #include +#include "utils/js_identifier_ctx.h" #include "utils/js_normalizer.h" namespace snort @@ -34,17 +35,28 @@ namespace snort { exit(EXIT_FAILURE); } } +class JSIdentifierCtxTest : public JSIdentifierCtxBase +{ +public: + JSIdentifierCtxTest() = default; + + const char* substitute(const char* identifier) override + { return identifier; } + void reset() override {} + size_t size() const override {} +}; + using namespace snort; #define DEPTH 65535 -#define NORMALIZE(src, expected) \ - char dst[sizeof(expected)]; \ - JSNormalizer norm; \ - norm.set_depth(DEPTH); \ - auto ret = norm.normalize(src, sizeof(src), dst, sizeof(dst)); \ - const char* ptr = norm.get_src_next(); \ - int act_len = norm.get_dst_next() - dst; \ +#define NORMALIZE(src, expected) \ + char dst[sizeof(expected)]; \ + JSIdentifierCtxTest ident_ctx; \ + JSNormalizer norm(ident_ctx, DEPTH); \ + auto ret = norm.normalize(src, sizeof(src), dst, sizeof(dst)); \ + const char* ptr = norm.get_src_next(); \ + int act_len = norm.get_dst_next() - dst; #define VALIDATE(src, expected) \ CHECK(ret == JSTokenizer::SCRIPT_CONTINUE); \ @@ -52,20 +64,20 @@ using namespace snort; CHECK(act_len == sizeof(expected) - 1); \ CHECK(!memcmp(dst, expected, act_len)); -#define VALIDATE_FAIL(src, expected, ret_code, ptr_offset) \ - CHECK(ret == ret_code); \ - CHECK((ptr - src) == ptr_offset); \ - CHECK(act_len == sizeof(expected) - 1); \ +#define VALIDATE_FAIL(src, expected, ret_code, ptr_offset) \ + CHECK(ret == ret_code); \ + CHECK((ptr - src) == ptr_offset); \ + CHECK(act_len == sizeof(expected) - 1); \ CHECK(!memcmp(dst, expected, act_len)); -#define NORMALIZE_L(src, src_len, dst, dst_len, depth, ret, ptr, len) \ - { \ - JSNormalizer norm; \ - norm.set_depth(depth); \ - ret = norm.normalize(src, src_len, dst, dst_len); \ - ptr = norm.get_src_next(); \ - len = norm.get_dst_next() - dst; \ - } \ +#define NORMALIZE_L(src, src_len, dst, dst_len, depth, ret, ptr, len) \ + { \ + JSIdentifierCtxTest ident_ctx; \ + JSNormalizer norm(ident_ctx, depth); \ + ret = norm.normalize(src, src_len, dst, dst_len); \ + ptr = norm.get_src_next(); \ + len = norm.get_dst_next() - dst; \ + } // ClamAV test cases static const char clamav_buf0[] = @@ -869,9 +881,8 @@ TEST_CASE("endings", "[JSNormalizer]") const char* ptr; int ret; - JSNormalizer norm; - - norm.set_depth(7); + JSIdentifierCtxTest ident_ctx; + JSNormalizer norm(ident_ctx, 7); ret = norm.normalize(src, sizeof(src), dst, sizeof(dst)); ptr = norm.get_src_next(); act_len = norm.get_dst_next() - dst; @@ -902,7 +913,7 @@ TEST_CASE("endings", "[JSNormalizer]") CHECK(ret == JSTokenizer::SCRIPT_CONTINUE); CHECK(ptr == src + sizeof(src)); - CHECK(act_len == 12); // size of normalized src + CHECK(act_len == 7); // size of normalized src CHECK(!memcmp(dst, expected, sizeof(dst))); } } @@ -1239,3 +1250,4 @@ TEST_CASE("nested script tags", "[JSNormalizer]") VALIDATE_FAIL(unexpected_tag_buf24, unexpected_tag_expected24, JSTokenizer::OPENING_TAG, 39); } } +