From: Steve Chew (stechew) Date: Thu, 17 Nov 2022 00:56:54 +0000 (+0000) Subject: Pull request #3620: Move Enhanced JS Normalizer from NHI to a standalone component X-Git-Tag: 3.1.47.0~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ef14e2217f8c09741db619c7dbfd1855261274e6;p=thirdparty%2Fsnort3.git Pull request #3620: Move Enhanced JS Normalizer from NHI to a standalone component Merge in SNORT/snort3 from ~OSERHIIE/snort3:js_module to master Squashed commit of the following: commit 2678dac41df3f2862e165ccce92ab70598dad0ff Author: Oleksii Shumeiko Date: Mon Oct 10 13:20:11 2022 +0300 http_inspect: move Enhanced JS Normalizer from NHI to a standalone component * http_inspect: remove Enhanced JavaScript Normalizer from NHI * utils: move JavaScript Normalizer to js_norm component, including unit tests * js_norm: implement standalone Enhanced JavaScript Normalizer * ips_options: implement js_data IPS option * lua: remove default_http_inspect, add default_js_norm --- diff --git a/lua/snort.lua b/lua/snort.lua index e2357a150..efbd3f65e 100644 --- a/lua/snort.lua +++ b/lua/snort.lua @@ -84,13 +84,15 @@ ftp_server = default_ftp_server ftp_client = { } ftp_data = { } -http_inspect = default_http_inspect +http_inspect = { } http2_inspect = { } -- see file_magic.rules for file id rules file_id = { rules_file = 'file_magic.rules' } file_policy = { } +js_norm = default_js_norm + -- the following require additional configuration to be fully effective: appid = diff --git a/lua/snort_defaults.lua b/lua/snort_defaults.lua index 721198ec1..519d161a7 100644 --- a/lua/snort_defaults.lua +++ b/lua/snort_defaults.lua @@ -1286,7 +1286,7 @@ default_js_norm_ident_ignore = default_js_norm_prop_ignore = { - -- Object + -- Object 'constructor', 'prototype', '__proto__', '__defineGetter__', '__defineSetter__', '__lookupGetter__', '__lookupSetter__', '__count__', '__noSuchMethod__', '__parent__', 'hasOwnProperty', 'isPrototypeOf', 'propertyIsEnumerable', 'toLocaleString', 'toString', @@ -1313,7 +1313,7 @@ default_js_norm_prop_ignore = -- Array 'copyWithin', 'entries', 'every', 'fill', 'filter', 'find', 'findIndex', 'flat', 'flatMap', - 'forEach', 'groupBy', 'groupByToMap', 'join', 'keys', 'map', 'pop', 'push', 'reduce', + 'forEach', 'groupBy', 'groupByToMap', 'join', 'keys', 'map', 'pop', 'push', 'reduce', 'reduceRight', 'reverse', 'shift', 'unshift', 'some', 'sort', 'splice', -- Generator @@ -1337,7 +1337,7 @@ default_js_norm_prop_ignore = 'setCapture', 'setHTML', 'setPointerCapture', 'toggleAttribute', -- HTMLElement - 'contentEditable', 'contextMenu', 'dataset', 'dir', 'enterKeyHint', 'hidden', 'inert', + 'contentEditable', 'contextMenu', 'dataset', 'dir', 'enterKeyHint', 'hidden', 'inert', 'innerText', 'lang', 'nonce', 'outerText', 'style', 'tabIndex', 'title', 'attachInternals', @@ -1348,11 +1348,11 @@ default_js_norm_prop_ignore = 'ExportStyle', 'callee' } -default_http_inspect = +default_js_norm = { -- params not specified here get internal defaults - js_norm_ident_ignore = default_js_norm_ident_ignore, - js_norm_prop_ignore = default_js_norm_prop_ignore, + ident_ignore = default_js_norm_ident_ignore, + prop_ignore = default_js_norm_prop_ignore, } --------------------------------------------------------------------------- @@ -1374,8 +1374,7 @@ default_whitelist = ip_hi_dist icmp_low_sweep icmp_med_sweep icmp_hi_sweep default_hi_port_scan default_med_port_scan default_low_port_scan default_variables netflow_versions default_js_norm_ident_ignore - default_js_norm_prop_ignore default_http_inspect + default_js_norm_prop_ignore default_js_norm ]] snort_whitelist_append(default_whitelist) - diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index eea834190..71ad64cae 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -94,6 +94,7 @@ add_subdirectory(filters) add_subdirectory(flow) add_subdirectory(framework) add_subdirectory(hash) +add_subdirectory(js_norm) add_subdirectory(latency) add_subdirectory(log) add_subdirectory(main) @@ -151,6 +152,7 @@ add_executable( snort $ $ $ + $ $ $ $ diff --git a/src/framework/inspector.h b/src/framework/inspector.h index bcd25be5e..2a7c77e31 100644 --- a/src/framework/inspector.h +++ b/src/framework/inspector.h @@ -47,8 +47,8 @@ struct InspectionBuffer { enum Type { - // this is the only generic rule option - IBT_VBA, + // these are the only generic rule options + IBT_VBA, IBT_JS_DATA, // FIXIT-M all of these should be eliminated IBT_KEY, IBT_HEADER, IBT_BODY, diff --git a/src/ips_options/CMakeLists.txt b/src/ips_options/CMakeLists.txt index bb51945d9..966778899 100644 --- a/src/ips_options/CMakeLists.txt +++ b/src/ips_options/CMakeLists.txt @@ -62,6 +62,7 @@ set (IPS_SOURCES ips_flowbits.cc ips_flowbits.h ips_hash.cc + ips_js_data.cc ips_luajit.cc ips_metadata.cc ips_options.cc diff --git a/src/ips_options/dev_notes.txt b/src/ips_options/dev_notes.txt index 8aa54518e..b3efb7f99 100644 --- a/src/ips_options/dev_notes.txt +++ b/src/ips_options/dev_notes.txt @@ -30,4 +30,9 @@ for the "replace" content the rule will not match. - Only the first occurrence of the content will be replaced. - "replace" works for raw packets only. So, TCP data must either fit under the "pkt_data" buffer requirements or one should enable detection -on TCP payload before reassembly: search_engine.detect_raw_tcp=true. \ No newline at end of file +on TCP payload before reassembly: search_engine.detect_raw_tcp=true. + +"js_data" option is used by Enhanced JavaScript Normalizer to access normalized data. +It's implemented as a generic IPS buffer and follows the JIT approach. +"js_data" buffer must be specified in the list of buffers available for a particular +inspector. diff --git a/src/ips_options/ips_js_data.cc b/src/ips_options/ips_js_data.cc new file mode 100644 index 000000000..d8ed0d26b --- /dev/null +++ b/src/ips_options/ips_js_data.cc @@ -0,0 +1,128 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// ips_js_data.cc author Oleksandr Serhiienko + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "framework/cursor.h" +#include "framework/inspector.h" +#include "framework/ips_option.h" +#include "framework/module.h" +#include "profiler/profiler.h" +#include "protocols/packet.h" + +using namespace snort; + +static constexpr const char* s_name = "js_data"; +static constexpr const char* s_help = "rule option to set detection cursor to normalized JavaScript data"; + +static THREAD_LOCAL ProfileStats js_data_profile_stats; + +class JSDataModule : public Module +{ +public: + JSDataModule() : Module(s_name, s_help) { } + + Usage get_usage() const override + { return DETECT; } + + ProfileStats* get_profile() const override + { return &js_data_profile_stats; } +}; + +class JSDataOption : public IpsOption +{ +public: + JSDataOption() : IpsOption(s_name) { } + + CursorActionType get_cursor_type() const override + { return CAT_SET_FAST_PATTERN; } + + section_flags get_pdu_section(bool) const override + { return section_to_flag(PS_BODY); } + + EvalStatus eval(Cursor& c, Packet* p) override + { + RuleProfile profile(js_data_profile_stats); + InspectionBuffer buf; + + if (!p->flow or !p->flow->gadget) + return NO_MATCH; + + if (p->flow->gadget->get_fp_buf(buf.IBT_JS_DATA, p, buf)) + { + c.set(s_name, buf.data, buf.len); + return MATCH; + } + + return NO_MATCH; + } +}; + +//------------------------------------------------------------------------- +// api methods +//------------------------------------------------------------------------- + +static Module* mod_ctor() +{ return new JSDataModule; } + +static void mod_dtor(Module* m) +{ delete m; } + +static IpsOption* js_data_ctor(Module*, OptTreeNode*) +{ return new JSDataOption; } + +static void js_data_dtor(IpsOption* opt) +{ delete opt; } + +static const IpsApi js_data_api = +{ + { + PT_IPS_OPTION, + sizeof(IpsApi), + IPSAPI_VERSION, + 0, + API_RESERVED, + API_OPTIONS, + s_name, + s_help, + mod_ctor, + mod_dtor + }, + OPT_TYPE_DETECTION, + 0, PROTO_BIT__TCP, + nullptr, + nullptr, + nullptr, + nullptr, + js_data_ctor, + js_data_dtor, + nullptr +}; + +#ifdef BUILDING_SO +SO_PUBLIC const BaseApi* snort_plugins[] = +#else +const BaseApi* ips_js_data[] = +#endif +{ + &js_data_api.base, + nullptr +}; diff --git a/src/ips_options/ips_options.cc b/src/ips_options/ips_options.cc index 5886c0a74..a7ab39d8e 100644 --- a/src/ips_options/ips_options.cc +++ b/src/ips_options/ips_options.cc @@ -35,6 +35,7 @@ extern const BaseApi* ips_file_data; extern const BaseApi* ips_file_meta; extern const BaseApi* ips_flow; extern const BaseApi* ips_flowbits; +extern const BaseApi* ips_js_data; extern const BaseApi* ips_md5; extern const BaseApi* ips_metadata; extern const BaseApi* ips_pkt_data; @@ -103,6 +104,7 @@ static const BaseApi* ips_options[] = ips_file_meta, ips_flow, ips_flowbits, + ips_js_data, ips_md5, ips_metadata, ips_pkt_data, diff --git a/src/js_norm/CMakeLists.txt b/src/js_norm/CMakeLists.txt new file mode 100644 index 000000000..30a376c63 --- /dev/null +++ b/src/js_norm/CMakeLists.txt @@ -0,0 +1,24 @@ +FLEX_TARGET ( js_tokenizer + ${CMAKE_CURRENT_SOURCE_DIR}/js_tokenizer.l + ${CMAKE_CURRENT_BINARY_DIR}/js_tokenizer.cc + COMPILE_FLAGS ${FLEX_FLAGS} +) + +set ( JS_SOURCES + ${FLEX_js_tokenizer_OUTPUTS} + js_config.h + js_enum.h + js_identifier_ctx.cc + js_identifier_ctx.h + js_norm.cc + js_norm.h + js_norm_module.cc + js_norm_module.h + js_normalizer.cc + js_normalizer.h + js_tokenizer.h +) + +add_library(js_norm OBJECT ${JS_SOURCES}) + +add_subdirectory(test) diff --git a/src/js_norm/dev_notes.txt b/src/js_norm/dev_notes.txt new file mode 100644 index 000000000..6196aabe6 --- /dev/null +++ b/src/js_norm/dev_notes.txt @@ -0,0 +1,131 @@ +Enhanced JavaScript Normalizer is a stateful JavaScript whitespace and identifiers normalizer. +JSNorm is a basic implementation, so other modules can use it right away or provide some +customization to it. Normalizer will remove all extraneous whitespace and newlines, keeping a +single space where syntactically necessary. Comments will be removed, but contents of string +literals will be kept intact. Any string literals, added by the plus operator, will be concatenated. +This also works for functions that result in string literals. Semicolons will be inserted, if not +already present, according to ECMAScript automatic semicolon insertion rules. + +All JavaScript identifier names, except those from the ident_ignore or prop_ignore lists, +will be substituted with unified names in the following format: var_0000 -> var_ffff. +So, the number of unique identifiers available is 65536 names per transaction. +If Normalizer overruns the configured limit, built-in alert is generated. + +A config option to set the limit manually: + * js_norm.identifier_depth. + +Identifiers from the ident_ignore list will be placed as is, without substitution. Starting with +the listed identifier, any chain of dot accessors, brackets and function calls will be kept +intact. +For example: + * console.log("bar") + * document.getElementById("id").text + * eval("script") + * foo["bar"] + +Ignored identifiers are configured via the following config option, +it accepts a list of object and function names: + * js_norm.ident_ignore = { 'console', 'document', 'eval', 'foo' } + +When a variable assignment that 'aliases' an identifier from the list is found, +the assignment will be tracked, and subsequent occurrences of the variable will be +replaced with the stored value. This substitution will follow JavaScript variable scope +limits. + +For example: + + var a = console.log + a("hello") // will be substituted to 'console.log("hello")' + a.foo.bar() // will be normalized as 'console.log.foo.bar()'. When variable is 'de-aliased', + // following identifiers are not normalized, just like identifiers from ident_ignore + +When an object is created using a 'new' keyword, and the class/constructor is found in ident_ignore +list, the object will be tracked, and although its own identifier will be converted to normal form +its property and function calls will be kept intact, as with ignored identifiers. + +For example: + var obj = new Array() + obj.insert(1,2,3) // will be normalized to var_0000.insert(1,2,3) + +For properties and methods of objects that can be created implicitly, there is a +prop_ignore list. All names in the call chain after the first property or +method from the list has been occurred will not be normalized. + +Note that identifiers are normalized by name, i.e. an identifier and a property with the same name +will be normalized to the same value. However, the ignore lists act separately on identifiers +and properties. + +For example: + + js_norm.prop_ignore = { 'split' } + + in: "string".toUpperCase().split("").reverse().join(""); + out: "string".var_0000().split("").reverse().join(""); + +In addition to the scope tracking, JS Normalizer specifically tracks unescape-like JavaScript +functions (unescape, decodeURI, decodeURIComponent, String.fromCharCode, String.fromCodePoint). +This allows detection of unescape functions nested within other unescape functions, which is +a potential indicator of a multilevel obfuscation. The definition of a function call depends on +identifier substitution, so such identifiers must be included in the ignore list in +order to use this feature. After determining the unescape sequence, it is decoded into the +corresponding string, and the name of unescape function will not be present in the output. +Single-byte escape sequences within the string and template literals which are arguments of +unescape, decodeURI and decodeURIComponent functions will be decoded according to ISO/IEC 8859-1 +(Latin-1) charset. Except these cases, escape sequences and code points will be decoded to UTF-8 +format. + +For example: + + unescape('\u0062\u0061\u0072') -> 'bar' + decodeURI('%62%61%72') -> 'bar' + decodeURIComponent('\x62\x61\x72') -> 'bar' + String.fromCharCode(98, 0x0061, 0x72) -> 'bar' + String.fromCodePoint(65600, 65601, 0x10042) -> '𐁀𐁁𐁂' + +Supported formats follow + + \xXX + \uXXXX + \u{XXXX} + %XX + \uXX + %uXXXX + decimal code point + hexadecimal code point + +JS Normalizer is able to decode mixed encoding sequences. However, a built-in alert rises +in such case. + +JS Normalizer's syntax parser follows ECMA-262 standard. For various features, +tracking of variable scope and individual brackets is done in accordance to the standard. +Additionally, Normalizer enforces standard limits on HTML content in JavaScript: + * no nesting tags allowed, i.e. two opening tags in a row + * script closing tag is not allowed in string literals, block comments, regular expression literals, etc. + +If source JavaScript is syntactically incorrect (containing a bad token, brackets mismatch, +HTML-tags, etc) Normalizer fires corresponding built-in rule and abandons the current script, +though the already-processed data remains in the output buffer. + +Enhanced JavaScript Normalizer has some trace messages available. Trace options follow: + +* trace.module.js_norm.proc turns on messages from script processing flow. ++ +Verbosity levels: ++ +1. Script opening tag detected (available in release build) +2. Attributes of detected script (available in release build) +3. Normalizer return code (available in release build) +4. Contexts management (debug build only) +5. Parser states (debug build only) +6. Input stream states (debug build only) + +* trace.module.js_norm.dump dumps JavaScript data from processing layers. ++ +Verbosity levels: ++ +1. js_data buffer as it is being passed to detection (available in release build) +2. (no messages available currently) +3. Payload passed to Normalizer (available in release build) +4. Temporary buffer (debug build only) +5. Matched token (debug build only) +6. Identifier substitution (debug build only) diff --git a/src/js_norm/js_config.h b/src/js_norm/js_config.h new file mode 100644 index 000000000..5cb438b0b --- /dev/null +++ b/src/js_norm/js_config.h @@ -0,0 +1,37 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// js_config.h author Danylo Kyrylov + +#ifndef JS_CONFIG_H +#define JS_CONFIG_H + +#include +#include + +struct JSNormConfig +{ + int64_t bytes_depth = -1; + int32_t identifier_depth = 0xffff; + uint8_t max_template_nesting = 32; + uint32_t max_bracket_depth = 256; + uint32_t max_scope_depth = 256; + std::unordered_set ignored_ids; + std::unordered_set ignored_props; +}; + +#endif diff --git a/src/js_norm/js_enum.h b/src/js_norm/js_enum.h new file mode 100644 index 000000000..8f25b66ff --- /dev/null +++ b/src/js_norm/js_enum.h @@ -0,0 +1,65 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// js_enum.h authors Danylo Kyrylov , Oleksandr Serhiienko + +#ifndef JS_ENUM_H +#define JS_ENUM_H + +#include "utils/event_gen.h" + +namespace jsn +{ + +static constexpr unsigned js_gid = 154; + +enum +{ + TRACE_PROC = 0, + TRACE_DUMP +}; + +// This enum must be synchronized with JSNormModule::peg_names[] in js_norm_module.cc +enum PEG_COUNT +{ + PEG_BYTES = 0, + PEG_IDENTIFIERS, + PEG_IDENTIFIER_OVERFLOWS, + PEG_COUNT_MAX +}; + +// This enum must be synchronized with JSNormModule::events[] in js_norm_module.cc +enum EventSid +{ + EVENT__NONE = -1, + EVENT_NEST_UNESCAPE_FUNC = 1, + EVENT_MIXED_UNESCAPE_SEQUENCE = 2, + EVENT_BAD_TOKEN = 3, + EVENT_OPENING_TAG = 4, + EVENT_CLOSING_TAG = 5, + EVENT_IDENTIFIER_OVERFLOW = 6, + EVENT_BRACKET_NEST_OVERFLOW = 7, + EVENT_DATA_LOST = 8, + EVENT_SCOPE_NEST_OVERFLOW = 9, + EVENT__MAX_VALUE +}; + +} + +using JSEvents = EventGen; + +#endif diff --git a/src/utils/js_identifier_ctx.cc b/src/js_norm/js_identifier_ctx.cc similarity index 93% rename from src/utils/js_identifier_ctx.cc rename to src/js_norm/js_identifier_ctx.cc index c5d0478a4..bc486d576 100644 --- a/src/utils/js_identifier_ctx.cc +++ b/src/js_norm/js_identifier_ctx.cc @@ -26,24 +26,10 @@ #include #include -#if !defined(CATCH_TEST_BUILD) && !defined(BENCHMARK_TEST) -#include "service_inspectors/http_inspect/http_enum.h" -#include "service_inspectors/http_inspect/http_module.h" -#else -namespace HttpEnums -{ -enum PEG_COUNT -{ - PEG_JS_IDENTIFIER -}; -} +#include "js_enum.h" +#include "js_norm_module.h" -class HttpModule -{ -public: - static void increment_peg_counts(HttpEnums::PEG_COUNT) {} -}; -#endif // CATCH_TEST_BUILD +using namespace jsn; #define NORM_NAME_SIZE 9 // size of the normalized form plus null symbol #define NORM_NAME_CNT 65536 @@ -80,7 +66,7 @@ static int _init_norm_names __attribute__((unused)) = (static_cast(init_no JSIdentifierCtx::JSIdentifierCtx(int32_t depth, uint32_t max_scope_depth, const std::unordered_set& ignored_ids_list, const std::unordered_set& ignored_props_list) - : ignored_ids_list(ignored_ids_list), ignored_props_list(ignored_props_list), + : ignored_ids_list(ignored_ids_list), ignored_props_list(ignored_props_list), max_scope_depth(max_scope_depth) { norm_name = norm_names; @@ -133,7 +119,7 @@ const char* JSIdentifierCtx::acquire_norm_name(NormId& id) auto n = norm_name; norm_name += NORM_NAME_SIZE; - HttpModule::increment_peg_counts(HttpEnums::PEG_JS_IDENTIFIER); + JSNormModule::increment_peg_counts(PEG_IDENTIFIERS); if (id.prop_name || id.id_name) { diff --git a/src/utils/js_identifier_ctx.h b/src/js_norm/js_identifier_ctx.h similarity index 97% rename from src/utils/js_identifier_ctx.h rename to src/js_norm/js_identifier_ctx.h index 4d648f27e..6092d1bea 100644 --- a/src/utils/js_identifier_ctx.h +++ b/src/js_norm/js_identifier_ctx.h @@ -26,6 +26,9 @@ #include #include +namespace jsn +{ + enum JSProgramScopeType : unsigned int { GLOBAL = 0, // the global scope (the initial one) @@ -34,10 +37,10 @@ enum JSProgramScopeType : unsigned int PROG_SCOPE_TYPE_MAX }; -class JSIdentifierCtxBase +class JSIdentifier { public: - virtual ~JSIdentifierCtxBase() = default; + virtual ~JSIdentifier() = default; virtual const char* substitute(const char* identifier, bool is_property) = 0; virtual void add_alias(const char* alias, const std::string&& value) = 0; @@ -52,7 +55,7 @@ public: virtual size_t size() const = 0; }; -class JSIdentifierCtx : public JSIdentifierCtxBase +class JSIdentifierCtx : public JSIdentifier { public: JSIdentifierCtx(int32_t depth, uint32_t max_scope_depth, @@ -83,7 +86,7 @@ private: const char* prop_name = nullptr; uint8_t type = 0; }; - + using Alias = std::vector; using AliasRef = std::list; using AliasMap = std::unordered_map; @@ -136,4 +139,6 @@ public: #endif // CATCH_TEST_BUILD }; +} + #endif // JS_IDENTIFIER_CTX diff --git a/src/js_norm/js_norm.cc b/src/js_norm/js_norm.cc new file mode 100644 index 000000000..e21ba8bb4 --- /dev/null +++ b/src/js_norm/js_norm.cc @@ -0,0 +1,185 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// js_norm.cc author Cisco + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "js_norm.h" + +#include "js_identifier_ctx.h" +#include "js_normalizer.h" + +#include "js_norm_module.h" + +using namespace jsn; +using namespace snort; + +static const char* jsret_codes[] = +{ + "end of stream", + "script ended", + "script continues", + "closing tag", + "bad token", + "identifier overflow", + "template nesting overflow", + "bracket nesting overflow", + "scope nesting overflow", + "wrong closing symbol", + "ended in inner scope", + "unknown" +}; + +const char* jsn::ret2str(int r) +{ + JSTokenizer::JSRet ret = (JSTokenizer::JSRet)r; + + assert(ret < JSTokenizer::JSRet::MAX); + assert(ret < sizeof(jsret_codes)/sizeof(jsret_codes[0])); + + ret = ret < JSTokenizer::JSRet::MAX ? ret : JSTokenizer::JSRet::MAX; + + return jsret_codes[ret]; +} + +JSNorm::JSNorm(JSNormConfig* jsn_config, bool ext_script_type) : + alive(true), pdu_cnt(0), src_ptr(nullptr), src_end(nullptr), + idn_ctx(nullptr), jsn_ctx(nullptr), ext_script_type(ext_script_type) +{ + config = jsn_config; + alive = (bool)config; + + if (!alive) + return; + + idn_ctx = new JSIdentifierCtx(config->identifier_depth, + config->max_scope_depth, config->ignored_ids, config->ignored_props); + jsn_ctx = new JSNormalizer(*idn_ctx, config->bytes_depth, + config->max_template_nesting, config->max_bracket_depth); + + debug_log(4, js_trace, TRACE_PROC, nullptr, "context created\n"); +} + +JSNorm::~JSNorm() +{ + delete idn_ctx; + delete jsn_ctx; + + debug_log(4, js_trace, TRACE_PROC, nullptr, "context deleted\n"); +} + +void JSNorm::normalize(const void* in_data, size_t in_len, const void*& data, size_t& len) +{ + if (!alive) + { + len = 0; + data = nullptr; + return; + } + + if (pdu_cnt > 2) + { + len = 0; + data = nullptr; + return; + } + if (pdu_cnt > 1) + { + events.create_event(EVENT_DATA_LOST); + ++pdu_cnt; + len = 0; + data = nullptr; + return; + } + pdu_cnt = 0; + + src_ptr = (const uint8_t*)in_data; + src_end = src_ptr + in_len; + + while (alive and pre_proc()) + { + auto ret = jsn_ctx->normalize((const char*)src_ptr, src_end - src_ptr, ext_script_type); + const uint8_t* next = (const uint8_t*)jsn_ctx->get_src_next(); + + JSNormModule::increment_peg_counts(PEG_BYTES, next - src_ptr); + src_ptr = next; + + alive = post_proc(ret); + } + + len = jsn_ctx->script_size(); + data = jsn_ctx->get_script(); +} + +bool JSNorm::pre_proc() +{ + return src_ptr < src_end; +} + +bool JSNorm::post_proc(int ret) +{ + if (jsn_ctx->is_unescape_nesting_seen()) + events.create_event(EVENT_NEST_UNESCAPE_FUNC); + + if (jsn_ctx->is_mixed_encoding_seen()) + events.create_event(EVENT_MIXED_UNESCAPE_SEQUENCE); + + if (jsn_ctx->is_opening_tag_seen()) + events.create_event(EVENT_OPENING_TAG); + + if (jsn_ctx->is_closing_tag_seen()) + events.create_event(EVENT_CLOSING_TAG); + + switch ((JSTokenizer::JSRet)ret) + { + case JSTokenizer::EOS: + case JSTokenizer::SCRIPT_ENDED: + case JSTokenizer::SCRIPT_CONTINUE: + return true; + + case JSTokenizer::CLOSING_TAG: + events.create_event(EVENT_CLOSING_TAG); + return false; + + case JSTokenizer::BAD_TOKEN: + case JSTokenizer::WRONG_CLOSING_SYMBOL: + case JSTokenizer::ENDED_IN_INNER_SCOPE: + events.create_event(EVENT_BAD_TOKEN); + return false; + + case JSTokenizer::IDENTIFIER_OVERFLOW: + JSNormModule::increment_peg_counts(PEG_IDENTIFIER_OVERFLOWS); + events.create_event(EVENT_IDENTIFIER_OVERFLOW); + return false; + + case JSTokenizer::TEMPLATE_NESTING_OVERFLOW: + case JSTokenizer::BRACKET_NESTING_OVERFLOW: + events.create_event(EVENT_BRACKET_NEST_OVERFLOW); + return false; + + case JSTokenizer::SCOPE_NESTING_OVERFLOW: + events.create_event(EVENT_SCOPE_NEST_OVERFLOW); + return false; + + default: + assert(false); + return false; + } +} diff --git a/src/js_norm/js_norm.h b/src/js_norm/js_norm.h new file mode 100644 index 000000000..654faa7e6 --- /dev/null +++ b/src/js_norm/js_norm.h @@ -0,0 +1,71 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// js_norm.h author Cisco + +#ifndef JS_NORM_H +#define JS_NORM_H + +#include "utils/event_gen.h" + +#include "js_config.h" +#include "js_enum.h" + +namespace jsn +{ +class JSIdentifier; +class JSNormalizer; + +const char* ret2str(int); +} + +namespace snort +{ + +class JSNorm +{ +public: + JSNorm(JSNormConfig*, bool ext_script_type = false); + JSNorm(const JSNorm&) = delete; + virtual ~JSNorm(); + + void tick() + { ++pdu_cnt; } + + void normalize(const void*, size_t, const void*&, size_t&); + +protected: + virtual bool pre_proc(); + virtual bool post_proc(int); + + bool alive; + uint32_t pdu_cnt; + + const uint8_t* src_ptr; + const uint8_t* src_end; + + jsn::JSIdentifier* idn_ctx; + jsn::JSNormalizer* jsn_ctx; + bool ext_script_type; + + JSEvents events; + JSNormConfig* config; +}; + +} + +#endif diff --git a/src/js_norm/js_norm_module.cc b/src/js_norm/js_norm_module.cc new file mode 100644 index 000000000..af573ba37 --- /dev/null +++ b/src/js_norm/js_norm_module.cc @@ -0,0 +1,179 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// js_norm_module.cc author Danylo Kyrylov + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "js_norm_module.h" + +#include "trace/trace.h" + +#include "js_config.h" +#include "js_enum.h" + +using namespace jsn; +using namespace snort; + +static constexpr char s_name[] = "js_norm"; +static constexpr char s_help[] = "JavaScript normalizer"; + +THREAD_LOCAL const Trace* js_trace = nullptr; + +THREAD_LOCAL PegCount JSNormModule::peg_counts[PEG_COUNT_MAX] = {}; +THREAD_LOCAL ProfileStats JSNormModule::profile_stats; + +static const Parameter ident_ignore_param[] = +{ + { "ident_name", Parameter::PT_STRING, nullptr, nullptr, "name of the identifier to ignore" }, + { nullptr, Parameter::PT_MAX, nullptr, nullptr, nullptr } +}; + +static const Parameter prop_ignore_param[] = +{ + { "prop_name", Parameter::PT_STRING, nullptr, nullptr, "name of the object property to ignore" }, + { nullptr, Parameter::PT_MAX, nullptr, nullptr, nullptr } +}; + +const Parameter JSNormModule::params[] = +{ + { "bytes_depth", Parameter::PT_INT, "-1:max53", "-1", + "number of input JavaScript bytes to normalize (-1 unlimited)" }, + + { "identifier_depth", Parameter::PT_INT, "0:65536", "65536", + "max number of unique JavaScript identifiers to normalize" }, + + { "max_tmpl_nest", Parameter::PT_INT, "0:255", "32", + "maximum depth of template literal nesting that enhanced JavaScript normalizer will process" }, + + { "max_bracket_depth", Parameter::PT_INT, "1:65535", "256", + "maximum depth of bracket nesting that enhanced JavaScript normalizer will process" }, + + { "max_scope_depth", Parameter::PT_INT, "1:65535", "256", + "maximum depth of scope nesting that enhanced JavaScript normalizer will process" }, + + { "ident_ignore", Parameter::PT_LIST, ident_ignore_param, nullptr, + "list of JavaScript ignored identifiers which will not be normalized" }, + + { "prop_ignore", Parameter::PT_LIST, prop_ignore_param, nullptr, + "list of JavaScript ignored object properties which will not be normalized" }, + + { nullptr, Parameter::PT_MAX, nullptr, nullptr, nullptr } +}; + +static const TraceOption trace_options[] = +{ + { "proc", TRACE_PROC, "enable processing logging" }, + { "dump", TRACE_DUMP, "enable data logging" }, + { nullptr, 0, nullptr } +}; + +const PegInfo JSNormModule::peg_names[PEG_COUNT_MAX + 1] = +{ + { CountType::SUM, "bytes", "total number of bytes processed" }, + { CountType::SUM, "identifiers", "total number of unique identifiers processed" }, + { CountType::SUM, "identifier_overflows", "total number of unique identifier limit overflows" }, + { CountType::END, nullptr, nullptr } +}; + +const RuleMap JSNormModule::events[] = +{ + { EVENT_NEST_UNESCAPE_FUNC, "nested unescape functions" }, + { EVENT_MIXED_UNESCAPE_SEQUENCE, "mixed unescape sequence" }, + { EVENT_BAD_TOKEN, "bad token" }, + { EVENT_OPENING_TAG, "unexpected HTML script opening tag" }, + { EVENT_CLOSING_TAG, "unexpected HTML script closing tag" }, + { EVENT_IDENTIFIER_OVERFLOW, "max number of unique identifiers reached" }, + { EVENT_BRACKET_NEST_OVERFLOW, "excessive bracket nesting" }, + { EVENT_DATA_LOST, "data gaps during normalization" }, + { EVENT_SCOPE_NEST_OVERFLOW, "excessive scope nesting" }, + { 0, nullptr } +}; + +JSNormModule::JSNormModule() : Module(s_name, s_help, params), config(nullptr) +{ } + +JSNormModule::~JSNormModule() +{ } + +bool JSNormModule::begin(const char* fqn, int, SnortConfig*) +{ + if (strcmp(s_name, fqn)) + return true; + + auto policy = get_inspection_policy(); + assert(policy); + + delete policy->jsn_config; + policy->jsn_config = new JSNormConfig; + config = policy->jsn_config; + + return true; +} + +bool JSNormModule::set(const char*, Value& v, SnortConfig*) +{ + assert(config); + + if (v.is("bytes_depth")) + { + config->bytes_depth = v.get_int64(); + } + else if (v.is("identifier_depth")) + { + config->identifier_depth = v.get_int32(); + } + else if (v.is("max_tmpl_nest")) + { + config->max_template_nesting = v.get_uint8(); + } + else if (v.is("max_bracket_depth")) + { + config->max_bracket_depth = v.get_uint32(); + } + else if (v.is("max_scope_depth")) + { + config->max_scope_depth = v.get_uint32(); + } + else if (v.is("ident_name")) + { + config->ignored_ids.insert(v.get_string()); + } + else if (v.is("prop_name")) + { + config->ignored_props.insert(v.get_string()); + } + + return true; +} + +void JSNormModule::set_trace(const Trace* trace) const +{ + js_trace = trace; +} + +const TraceOption* JSNormModule::get_trace_options() const +{ + return trace_options; +} + +unsigned JSNormModule::get_gid() const +{ + return js_gid; +} diff --git a/src/js_norm/js_norm_module.h b/src/js_norm/js_norm_module.h new file mode 100644 index 000000000..cd7d84016 --- /dev/null +++ b/src/js_norm/js_norm_module.h @@ -0,0 +1,86 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// js_norm_module.h author Danylo Kyrylov + +#ifndef JS_NORM_MODULE_H +#define JS_NORM_MODULE_H + +#include "framework/module.h" +#include "main/policy.h" +#include "profiler/profiler.h" + +#include "js_config.h" +#include "js_enum.h" + +namespace snort +{ +class Trace; +} + +extern THREAD_LOCAL const snort::Trace* js_trace; + +class JSNormModule : public snort::Module +{ +public: + JSNormModule(); + ~JSNormModule() override; + + bool begin(const char*, int, snort::SnortConfig*) override; + bool set(const char*, snort::Value&, snort::SnortConfig*) override; + + void set_trace(const snort::Trace*) const override; + const snort::TraceOption* get_trace_options() const override; + + unsigned get_gid() const override; + + const snort::RuleMap* get_rules() const override + { return events; } + + const PegInfo* get_pegs() const override + { return peg_names; } + + PegCount* get_counts() const override + { return peg_counts; } + + snort::ProfileStats* get_profile() const override + { return &profile_stats; } + + Usage get_usage() const override + { return INSPECT; } + + static void increment_peg_counts(jsn::PEG_COUNT counter) + { peg_counts[counter]++; } + + static void increment_peg_counts(jsn::PEG_COUNT counter, uint64_t value) + { peg_counts[counter] += value; } + + static PegCount get_peg_counts(jsn::PEG_COUNT counter) + { return peg_counts[counter]; } + +private: + static const snort::Parameter params[]; + static const snort::RuleMap events[]; + static const PegInfo peg_names[]; + + static THREAD_LOCAL PegCount peg_counts[]; + static THREAD_LOCAL snort::ProfileStats profile_stats; + + JSNormConfig* config; +}; + +#endif diff --git a/src/utils/js_normalizer.cc b/src/js_norm/js_normalizer.cc similarity index 92% rename from src/utils/js_normalizer.cc rename to src/js_norm/js_normalizer.cc index d2ae20955..5407ee2ba 100644 --- a/src/utils/js_normalizer.cc +++ b/src/js_norm/js_normalizer.cc @@ -23,12 +23,14 @@ #include "js_normalizer.h" +#include "js_norm/js_enum.h" + #define BUFF_EXP_FACTOR 1.3 -using namespace snort; +using namespace jsn; using namespace std; -JSNormalizer::JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t norm_depth, +JSNormalizer::JSNormalizer(JSIdentifier& js_ident_ctx, size_t norm_depth, uint8_t max_template_nesting, uint32_t max_bracket_depth, int tmp_cap_size) : depth(norm_depth), rem_bytes(norm_depth), @@ -63,14 +65,14 @@ JSTokenizer::JSRet JSNormalizer::normalize(const char* src, size_t src_len, bool if (rem_bytes == 0) { - debug_log(5, http_trace, TRACE_JS_PROC, nullptr, + debug_log(5, js_trace, TRACE_PROC, nullptr, "depth limit reached\n"); src_next = src + src_len; return JSTokenizer::EOS; } - debug_logf(4, http_trace, TRACE_JS_DUMP, nullptr, + debug_logf(4, js_trace, TRACE_DUMP, nullptr, "tmp buffer[%zu]: %.*s\n", tmp_buf_size, static_cast(tmp_buf_size), tmp_buf); src_len = min(src_len, rem_bytes); diff --git a/src/utils/js_normalizer.h b/src/js_norm/js_normalizer.h similarity index 92% rename from src/utils/js_normalizer.h rename to src/js_norm/js_normalizer.h index 5de32aee8..2ad8b45c2 100644 --- a/src/utils/js_normalizer.h +++ b/src/js_norm/js_normalizer.h @@ -25,15 +25,16 @@ #include #include "js_tokenizer.h" -#include "streambuf.h" -namespace snort +#include "utils/streambuf.h" + +namespace jsn { class JSNormalizer { public: - JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t depth, + JSNormalizer(JSIdentifier& js_ident_ctx, size_t depth, uint8_t max_template_nesting, uint32_t max_bracket_depth, int tmp_cap_size = JSTOKENIZER_BUF_MAX_SIZE); ~JSNormalizer(); @@ -56,9 +57,6 @@ public: size_t script_size() { return out.tellp(); } - static size_t size() - { return sizeof(JSNormalizer) + 16834; /* YY_BUF_SIZE */ } - bool is_unescape_nesting_seen() const { return tokenizer.is_unescape_nesting_seen(); } @@ -97,8 +95,8 @@ private: char* tmp_buf; size_t tmp_buf_size; - istreambuf_glue in_buf; - ostreambuf_infl out_buf; + snort::istreambuf_glue in_buf; + snort::ostreambuf_infl out_buf; std::istream in; std::ostream out; JSTokenizer tokenizer; diff --git a/src/utils/js_tokenizer.h b/src/js_norm/js_tokenizer.h similarity index 97% rename from src/utils/js_tokenizer.h rename to src/js_norm/js_tokenizer.h index a13307409..f5c97de0f 100644 --- a/src/utils/js_tokenizer.h +++ b/src/js_norm/js_tokenizer.h @@ -26,10 +26,9 @@ #include #include "log/messages.h" -#include "service_inspectors/http_inspect/http_enum.h" #include "trace/trace_api.h" -extern THREAD_LOCAL const snort::Trace* http_trace; +extern THREAD_LOCAL const snort::Trace* js_trace; // The longest pattern has 9 characters " < / s c r i p t > ", // 8 of them can reside in 1st chunk @@ -40,9 +39,12 @@ extern THREAD_LOCAL const snort::Trace* http_trace; // To hold potentially long identifiers #define JSTOKENIZER_BUF_MAX_SIZE 256 +namespace jsn +{ + enum JSProgramScopeType : unsigned int; -class JSIdentifierCtxBase; +class JSIdentifier; #if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST) class JSTokenizerTester; class JSTestConfig; @@ -166,7 +168,7 @@ public: }; JSTokenizer() = delete; - explicit JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx, + explicit JSTokenizer(std::istream& in, std::ostream& out, JSIdentifier& ident_ctx, uint8_t max_template_nesting, uint32_t max_bracket_depth, char*& buf, size_t& buf_size, int cap_size = JSTOKENIZER_BUF_MAX_SIZE); ~JSTokenizer() override; @@ -182,10 +184,6 @@ public: bool is_closing_tag_seen() const; bool is_buffer_adjusted() const; -protected: - [[noreturn]] void LexerError(const char* msg) override - { snort::FatalError("%s", msg); } - private: int yylex() override; @@ -342,7 +340,7 @@ private: VStack brace_depth; JSToken token = UNDEFINED; ASIGroup previous_group = ASI_OTHER; - JSIdentifierCtxBase& ident_ctx; + JSIdentifier& ident_ctx; size_t bytes_read; size_t tmp_bytes_read; uint32_t tokens_read; @@ -413,4 +411,6 @@ private: #endif // CATCH_TEST_BUILD || BENCHMARK_TEST }; +} + #endif // JS_TOKENIZER_H diff --git a/src/utils/js_tokenizer.l b/src/js_norm/js_tokenizer.l similarity index 99% rename from src/utils/js_tokenizer.l rename to src/js_norm/js_tokenizer.l index de0bcdd0d..d368dabc6 100644 --- a/src/utils/js_tokenizer.l +++ b/src/js_norm/js_tokenizer.l @@ -30,21 +30,27 @@ #include "config.h" #endif -#include "utils/js_tokenizer.h" #include #include -#include "utils/js_identifier_ctx.h" +#include "js_norm/js_enum.h" +#include "js_norm/js_identifier_ctx.h" +#include "js_norm/js_tokenizer.h" #include "utils/util_cstring.h" +using namespace jsn; + #define YY_NO_UNPUT + +#define YY_FATAL_ERROR(msg) { snort::FatalError("%s", msg); } + #define YY_USER_ACTION \ { \ - debug_logf(5, http_trace, TRACE_JS_PROC, nullptr, \ + debug_logf(5, js_trace, TRACE_PROC, nullptr, \ "pattern #%d, sc %d\n", yy_act, YY_START); \ \ - debug_logf(5, http_trace, TRACE_JS_DUMP, nullptr, \ + debug_logf(5, js_trace, TRACE_DUMP, nullptr, \ "text '%s'\n", YYText()); \ \ if (!states_process()) \ @@ -1365,7 +1371,7 @@ const char* JSTokenizer::p_scope_codes[] = }; JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out, - JSIdentifierCtxBase& mapper, uint8_t max_template_nesting, + JSIdentifier& mapper, uint8_t max_template_nesting, uint32_t max_bracket_depth, char*& buf, size_t& buf_size, int cap_size) : yyFlexLexer(in, out), max_template_nesting(max_template_nesting), @@ -1532,7 +1538,7 @@ JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme, b if (!name) { - debug_logf(6, http_trace, TRACE_JS_DUMP, nullptr, + debug_logf(6, js_trace, TRACE_DUMP, nullptr, "'%s' => IDENTIFIER_OVERFLOW\n", lexeme); return IDENTIFIER_OVERFLOW; } @@ -1556,15 +1562,15 @@ JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme, b dealias_stored = true; yyout << alias; - debug_logf(6, http_trace, TRACE_JS_DUMP, nullptr, - "'%s' => '%s'\n", lexeme, alias); + debug_logf(6, js_trace, TRACE_DUMP, nullptr, + "'%s' => '%s'\n", lexeme, alias); return EOS; } ignored_id_pos = -1; yyout << name; - debug_logf(6, http_trace, TRACE_JS_DUMP, nullptr, + debug_logf(6, js_trace, TRACE_DUMP, nullptr, "'%s' => '%s'\n", lexeme, name); return EOS; } @@ -2077,7 +2083,7 @@ JSTokenizer::JSRet JSTokenizer::p_scope_push(ScopeMetaType t) if (!ident_ctx.scope_push(m2p(t))) return SCOPE_NESTING_OVERFLOW; - debug_logf(5, http_trace, TRACE_JS_PROC, nullptr, "scope pushed: '%s'\n", m2str(t)); + debug_logf(5, js_trace, TRACE_PROC, nullptr, "scope pushed: '%s'\n", m2str(t)); return EOS; } @@ -2087,7 +2093,7 @@ JSTokenizer::JSRet JSTokenizer::p_scope_pop(ScopeMetaType t) if (!ident_ctx.scope_pop(m2p(t))) return WRONG_CLOSING_SYMBOL; - debug_logf(5, http_trace, TRACE_JS_PROC, nullptr, "scope popped: '%s'\n", m2str(t)); + debug_logf(5, js_trace, TRACE_PROC, nullptr, "scope popped: '%s'\n", m2str(t)); return EOS; } @@ -2398,7 +2404,7 @@ JSTokenizer::JSRet JSTokenizer::literal_regex_g_close() case ')': if (regex_stack.empty()) { - debug_logf(5, http_trace, TRACE_JS_PROC, nullptr, + debug_logf(5, js_trace, TRACE_PROC, nullptr, "no group to close, .. %c\n", yytext[0]); return BAD_TOKEN; } diff --git a/src/js_norm/test/CMakeLists.txt b/src/js_norm/test/CMakeLists.txt new file mode 100644 index 000000000..bb4634c96 --- /dev/null +++ b/src/js_norm/test/CMakeLists.txt @@ -0,0 +1,70 @@ +FLEX_TARGET ( js_tokenizer ${CMAKE_CURRENT_SOURCE_DIR}/../js_tokenizer.l + ${CMAKE_CURRENT_BINARY_DIR}/../js_tokenizer.cc + COMPILE_FLAGS ${FLEX_FLAGS} +) + +add_catch_test( js_normalizer_test + SOURCES + ${FLEX_js_tokenizer_OUTPUTS} + ../js_identifier_ctx.cc + ../js_normalizer.cc + ${CMAKE_SOURCE_DIR}/src/utils/streambuf.cc + ${CMAKE_SOURCE_DIR}/src/utils/util_cstring.cc + js_test_options.cc + js_test_stubs.cc + js_test_utils.cc +) + +if (ENABLE_BENCHMARK_TESTS) + add_catch_test( js_norm_benchmark + SOURCES + ${FLEX_js_tokenizer_OUTPUTS} + ../js_identifier_ctx.cc + ../js_normalizer.cc + ${CMAKE_SOURCE_DIR}/src/utils/streambuf.cc + ${CMAKE_SOURCE_DIR}/src/utils/util_cstring.cc + js_test_options.cc + js_test_stubs.cc + js_test_utils.cc + ) +endif(ENABLE_BENCHMARK_TESTS) + +add_catch_test( js_dealias_test + SOURCES + ${FLEX_js_tokenizer_OUTPUTS} + ../js_identifier_ctx.cc + ../js_normalizer.cc + ${CMAKE_SOURCE_DIR}/src/utils/streambuf.cc + ${CMAKE_SOURCE_DIR}/src/utils/util_cstring.cc + js_test_options.cc + js_test_stubs.cc + js_test_utils.cc +) + +add_catch_test( js_unescape_test + SOURCES + ${FLEX_js_tokenizer_OUTPUTS} + ../js_identifier_ctx.cc + ../js_normalizer.cc + ${CMAKE_SOURCE_DIR}/src/utils/streambuf.cc + ${CMAKE_SOURCE_DIR}/src/utils/util_cstring.cc + js_test_options.cc + js_test_stubs.cc + js_test_utils.cc +) + +add_catch_test( js_identifier_ctx_test + SOURCES + ../js_identifier_ctx.cc + js_test_stubs.cc +) + +add_catch_test( jsn_test + SOURCES + ${FLEX_js_tokenizer_OUTPUTS} + ../js_identifier_ctx.cc + ../js_norm.cc + ../js_normalizer.cc + ${CMAKE_SOURCE_DIR}/src/utils/streambuf.cc + js_test_stubs.cc +) diff --git a/src/utils/test/dev_notes.txt b/src/js_norm/test/dev_notes.txt similarity index 96% rename from src/utils/test/dev_notes.txt rename to src/js_norm/test/dev_notes.txt index bde1f61f2..6ede83787 100644 --- a/src/utils/test/dev_notes.txt +++ b/src/js_norm/test/dev_notes.txt @@ -31,3 +31,6 @@ Checklist to add a new option with an already present type: 5. Add field assignment to the set_overrides test case; 6. Add a named constuctor that returns Config with your option as the type. +js_test_stubs: +Provides stubs for external dependencies. + diff --git a/src/utils/test/js_dealias_test.cc b/src/js_norm/test/js_dealias_test.cc similarity index 99% rename from src/utils/test/js_dealias_test.cc rename to src/js_norm/test/js_dealias_test.cc index 138132e03..85c655cc6 100644 --- a/src/utils/test/js_dealias_test.cc +++ b/src/js_norm/test/js_dealias_test.cc @@ -25,7 +25,7 @@ #include "js_test_utils.h" -using namespace snort; +using namespace jsn; // Unit tests diff --git a/src/utils/test/js_identifier_ctx_test.cc b/src/js_norm/test/js_identifier_ctx_test.cc similarity index 99% rename from src/utils/test/js_identifier_ctx_test.cc rename to src/js_norm/test/js_identifier_ctx_test.cc index 9513f54ca..dafea345b 100644 --- a/src/utils/test/js_identifier_ctx_test.cc +++ b/src/js_norm/test/js_identifier_ctx_test.cc @@ -28,7 +28,9 @@ #include #include -#include "utils/js_identifier_ctx.h" +#include "js_norm/js_identifier_ctx.h" + +using namespace jsn; #define DEPTH 65536 #define SCOPE_DEPTH 256 diff --git a/src/utils/test/js_norm_benchmark.cc b/src/js_norm/test/js_norm_benchmark.cc similarity index 99% rename from src/utils/test/js_norm_benchmark.cc rename to src/js_norm/test/js_norm_benchmark.cc index bc18c66ce..896160ee2 100644 --- a/src/utils/test/js_norm_benchmark.cc +++ b/src/js_norm/test/js_norm_benchmark.cc @@ -28,12 +28,12 @@ #include "catch/catch.hpp" -#include "utils/js_identifier_ctx.h" -#include "utils/js_normalizer.h" +#include "js_norm/js_identifier_ctx.h" +#include "js_norm/js_normalizer.h" #include "js_test_utils.h" -using namespace snort; +using namespace jsn; static constexpr const char* s_closing_tag = ""; diff --git a/src/utils/test/js_normalizer_test.cc b/src/js_norm/test/js_normalizer_test.cc similarity index 99% rename from src/utils/test/js_normalizer_test.cc rename to src/js_norm/test/js_normalizer_test.cc index 1550cd3b7..d2b190cd3 100644 --- a/src/utils/test/js_normalizer_test.cc +++ b/src/js_norm/test/js_normalizer_test.cc @@ -25,12 +25,12 @@ #include "catch/catch.hpp" -#include "utils/js_identifier_ctx.h" -#include "utils/js_normalizer.h" +#include "js_norm/js_identifier_ctx.h" +#include "js_norm/js_normalizer.h" #include "js_test_utils.h" -using namespace snort; +using namespace jsn; using namespace std::string_literals; #ifdef CATCH_TEST_BUILD @@ -160,7 +160,6 @@ TEST_CASE("clamav tests", "[JSNormalizer]") "var x='test\u0000test';"s ); } - // FIXIT-L this should be revisited SECTION("test_case_13 - invalid escape sequence") { test_normalization_noident( @@ -5784,8 +5783,6 @@ TEST_CASE("Scope tracking - over multiple PDU", "[JSNormalizer]") test_normalization({ {"long_", "var_0000", {GLOBAL}}, {"variable", "var_0001", {GLOBAL}} - //FIXIT-E: if variable index will be preserved across PDUs, second pdu expected - // will be "var_0000" }); SECTION("general - variable extension: ignored identifier to a regular one") diff --git a/src/utils/test/js_test_options.cc b/src/js_norm/test/js_test_options.cc similarity index 100% rename from src/utils/test/js_test_options.cc rename to src/js_norm/test/js_test_options.cc diff --git a/src/utils/test/js_test_options.h b/src/js_norm/test/js_test_options.h similarity index 97% rename from src/utils/test/js_test_options.h rename to src/js_norm/test/js_test_options.h index e27102d81..e588666c8 100644 --- a/src/utils/test/js_test_options.h +++ b/src/js_norm/test/js_test_options.h @@ -26,8 +26,10 @@ #include #include -#include "utils/js_identifier_ctx.h" -#include "utils/js_normalizer.h" +#include "js_norm/js_identifier_ctx.h" +#include "js_norm/js_normalizer.h" + +using namespace jsn; typedef std::unordered_set StringSet; diff --git a/src/js_norm/test/js_test_stubs.cc b/src/js_norm/test/js_test_stubs.cc new file mode 100644 index 000000000..645598b11 --- /dev/null +++ b/src/js_norm/test/js_test_stubs.cc @@ -0,0 +1,40 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// js_test_stubs.cc author Oleksandr Serhiienko + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "js_norm/js_enum.h" +#include "js_norm/js_norm_module.h" +#include "trace/trace_api.h" + +THREAD_LOCAL const snort::Trace* js_trace = nullptr; +THREAD_LOCAL PegCount JSNormModule::peg_counts[jsn::PEG_COUNT_MAX] = {}; + +namespace snort +{ +[[noreturn]] void FatalError(const char*, ...) { exit(EXIT_FAILURE); } + +void trace_vprintf(const char*, TraceLevel, const char*, const Packet*, const char*, va_list) { } +uint8_t TraceApi::get_constraints_generation() { return 0; } +void TraceApi::filter(const Packet&) { } + +int DetectionEngine::queue_event(unsigned int, unsigned int) { return 0; } +} diff --git a/src/utils/test/js_test_utils.cc b/src/js_norm/test/js_test_utils.cc similarity index 94% rename from src/utils/test/js_test_utils.cc rename to src/js_norm/test/js_test_utils.cc index 7b415dce0..4eebc7d09 100644 --- a/src/utils/test/js_test_utils.cc +++ b/src/js_norm/test/js_test_utils.cc @@ -25,18 +25,7 @@ #include "catch/catch.hpp" -namespace snort -{ -[[noreturn]] void FatalError(const char*, ...) -{ exit(EXIT_FAILURE); } -void trace_vprintf(const char*, TraceLevel, const char*, const Packet*, const char*, va_list) { } -uint8_t TraceApi::get_constraints_generation() { return 0; } -void TraceApi::filter(const Packet&) { } -} - -THREAD_LOCAL const snort::Trace* http_trace = nullptr; - -using namespace snort; +using namespace jsn; JSTokenizerTester::JSTokenizerTester(const JSTestConfig& conf) : ident_ctx(conf.identifier_depth, @@ -45,8 +34,8 @@ JSTokenizerTester::JSTokenizerTester(const JSTestConfig& conf) : conf.ignored_properties_list), normalizer( conf.normalize_identifiers ? - static_cast(ident_ctx) : - static_cast(ident_ctx_stub), + static_cast(ident_ctx) : + static_cast(ident_ctx_stub), conf.norm_depth, conf.max_template_nesting, conf.max_bracket_depth, diff --git a/src/utils/test/js_test_utils.h b/src/js_norm/test/js_test_utils.h similarity index 92% rename from src/utils/test/js_test_utils.h rename to src/js_norm/test/js_test_utils.h index 1f99ec7a8..40be89ca0 100644 --- a/src/utils/test/js_test_utils.h +++ b/src/js_norm/test/js_test_utils.h @@ -26,20 +26,17 @@ #include #include -#include "utils/js_identifier_ctx.h" -#include "utils/js_normalizer.h" +#include "js_norm/js_identifier_ctx.h" +#include "js_norm/js_normalizer.h" #include "js_test_options.h" constexpr int unlim_depth = -1; -namespace snort +namespace jsn { -[[noreturn]] void FatalError(const char*, ...); -void trace_vprintf(const char*, TraceLevel, const char*, const Packet*, const char*, va_list); -} -class JSIdentifierCtxStub : public JSIdentifierCtxBase +class JSIdentifierCtxStub : public JSIdentifier { public: JSIdentifierCtxStub() = default; @@ -72,7 +69,7 @@ public: JSIdentifierCtx ident_ctx; JSIdentifierCtxStub ident_ctx_stub; - snort::JSNormalizer normalizer; + JSNormalizer normalizer; private: const JSTestConfig& config; @@ -95,7 +92,7 @@ public: JSTestConfig(const Overrides& values); JSTestConfig derive(const Overrides& values) const; - snort::JSNormalizer&& make_normalizer() const; + JSNormalizer&& make_normalizer() const; void test_scope(const std::string& context, const std::list& stack) const; void test_scope(const std::string& context, const std::list& stack, @@ -134,7 +131,9 @@ static const JSTestConfig default_config({ normalize_identifiers(true) }); -void test_scope(const std::string& context, const std::list& stack); +} + +void test_scope(const std::string& context, const std::list& stack); void test_normalization(const std::string& source, const std::string& expected, const Overrides& overrides = {}); void test_normalization_noident(const std::string& source, const std::string& expected, const Overrides& overrides = {}); diff --git a/src/utils/test/js_unescape_test.cc b/src/js_norm/test/js_unescape_test.cc similarity index 99% rename from src/utils/test/js_unescape_test.cc rename to src/js_norm/test/js_unescape_test.cc index ded5dbec3..270d5e502 100644 --- a/src/utils/test/js_unescape_test.cc +++ b/src/js_norm/test/js_unescape_test.cc @@ -23,11 +23,13 @@ #include "catch/catch.hpp" -#include "utils/js_identifier_ctx.h" -#include "utils/js_normalizer.h" +#include "js_norm/js_identifier_ctx.h" +#include "js_norm/js_normalizer.h" #include "js_test_utils.h" +using namespace jsn; + #ifdef CATCH_TEST_BUILD TEST_CASE("Sequence parsing", "[JSNormalizer]") diff --git a/src/js_norm/test/jsn_test.cc b/src/js_norm/test/jsn_test.cc new file mode 100644 index 000000000..f6c5f9844 --- /dev/null +++ b/src/js_norm/test/jsn_test.cc @@ -0,0 +1,280 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// jsn_test.cc author Oleksandr Serhiienko + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "catch/catch.hpp" + +#include "js_norm/js_norm.h" + +using namespace jsn; +using namespace snort; + +#ifdef CATCH_TEST_BUILD + +TEST_CASE("configuration", "[JSNorm]") +{ + const void* dst = nullptr; + size_t dst_len = 0; + + SECTION("no config passed") + { + JSNorm jsn(nullptr); + + const std::string src = "var"; + + jsn.normalize(src.c_str(), src.size(), dst, dst_len); + + CHECK(dst == nullptr); + CHECK(dst_len == 0); + } + + SECTION("config passed") + { + JSNormConfig config; + JSNorm jsn(&config); + + const std::string src = "var "; + const std::string exp = "var"; + + jsn.normalize(src.c_str(), src.size(), dst, dst_len); + + REQUIRE(dst != nullptr); + REQUIRE(dst_len != 0); + + CHECK(std::string((const char*)dst, dst_len) == exp); + } +} + +TEST_CASE("normalization", "[JSNorm]") +{ + JSNormConfig config; + JSNorm jsn(&config); + + const void* dst = nullptr; + size_t dst_len = 0; + + SECTION("missed input") + { + const std::string src = "var"; + + jsn.tick(); + jsn.tick(); + jsn.tick(); + + jsn.normalize(src.c_str(), src.size(), dst, dst_len); + + CHECK(dst == nullptr); + CHECK(dst_len == 0); + } + + SECTION("data lost") + { + const std::string src = "var"; + + jsn.tick(); + jsn.tick(); + + jsn.normalize(src.c_str(), src.size(), dst, dst_len); + + CHECK(dst == nullptr); + CHECK(dst_len == 0); + } + + SECTION("passed") + { + const std::string pdu_1 = "var "; + const std::string pdu_2 = "a = "; + const std::string pdu_3 = "1 ;"; + + // dst buffer is accumulated if no explicit flushing + const std::string norm_pdu_1 = "var"; + const std::string norm_pdu_2 = "var var_0000="; + const std::string norm_pdu_3 = "var var_0000=1;"; + + jsn.tick(); + jsn.normalize(pdu_1.c_str(), pdu_1.size(), dst, dst_len); + + REQUIRE(dst != nullptr); + REQUIRE(dst_len != 0); + + CHECK(std::string((const char*)dst, dst_len) == norm_pdu_1); + + jsn.tick(); + jsn.normalize(pdu_2.c_str(), pdu_2.size(), dst, dst_len); + + REQUIRE(dst != nullptr); + REQUIRE(dst_len != 0); + + CHECK(std::string((const char*)dst, dst_len) == norm_pdu_2); + + jsn.tick(); + jsn.normalize(pdu_3.c_str(), pdu_3.size(), dst, dst_len); + + REQUIRE(dst != nullptr); + REQUIRE(dst_len != 0); + + CHECK(std::string((const char*)dst, dst_len) == norm_pdu_3); + } +} + +TEST_CASE("non-blocking events", "[JSNorm]") +{ + REQUIRE(EventSid::EVENT__MAX_VALUE == 10); + + JSNormConfig config; + config.ignored_ids.insert("unescape"); + + JSNorm jsn(&config, false); + const void* dst = nullptr; + size_t dst_len = 0; + + std::string src = "'bar'"; + std::string exp = "'bar'"; + + SECTION("EVENT_NEST_UNESCAPE_FUNC") + { + src = "unescape(unescape('foo')) ;"; + exp = "'foo';"; + } + + SECTION("EVENT_MIXED_UNESCAPE_SEQUENCE") + { + src = "unescape(\"\\u66%6f\\u6f\") ;"; + exp = "\"foo\";"; + } + + SECTION("EVENT_OPENING_TAG") + { + src = "'' ;"; + std::string exp = "'';"; + + jsn.normalize(src.c_str(), src.size(), dst, dst_len); + + REQUIRE(dst != nullptr); + REQUIRE(dst_len != 0); + + CHECK(std::string((const char*)dst, dst_len) == exp); + } + + jsn.normalize(src.c_str(), src.size(), dst, dst_len); + + REQUIRE(dst != nullptr); + REQUIRE(dst_len != 0); + + CHECK(std::string((const char*)dst, dst_len) == exp); +} + +TEST_CASE("blocking events", "[JSNorm]") +{ + REQUIRE(EventSid::EVENT__MAX_VALUE == 10); + + JSNormConfig config; + JSNorm jsn(&config, false); + + const void* dst = nullptr; + size_t dst_len = 0; + + std::string src = "'bar'"; + std::string exp = "'bar'"; + + SECTION("EVENT_CLOSING_TAG") + { + src = "'' ;"; + exp = "'"; + } + + SECTION("EVENT_BAD_TOKEN") + { + src = "{)"; + exp = "{"; + } + + SECTION("EVENT_IDENTIFIER_OVERFLOW") + { + config.identifier_depth = 0; + + JSNorm jsn(&config, false); + + std::string src = "; a"; + std::string exp = ";"; + + jsn.normalize(src.c_str(), src.size(), dst, dst_len); + + REQUIRE(dst != nullptr); + REQUIRE(dst_len != 0); + + CHECK(std::string((const char*)dst, dst_len) == exp); + } + + SECTION("EVENT_BRACKET_NEST_OVERFLOW") + { + config.max_bracket_depth = 0; + + JSNorm jsn(&config, false); + + std::string src = "; {"; + std::string exp = ";"; + + jsn.normalize(src.c_str(), src.size(), dst, dst_len); + + REQUIRE(dst != nullptr); + REQUIRE(dst_len != 0); + + CHECK(std::string((const char*)dst, dst_len) == exp); + } + + SECTION("EVENT_SCOPE_NEST_OVERFLOW") + { + config.max_scope_depth = 0; + + JSNorm jsn(&config, false); + + std::string src = "; function f () {"; + std::string exp = ";function var_0000"; + + jsn.normalize(src.c_str(), src.size(), dst, dst_len); + + REQUIRE(dst != nullptr); + REQUIRE(dst_len != 0); + + CHECK(std::string((const char*)dst, dst_len) == exp); + } + + jsn.normalize(src.c_str(), src.size(), dst, dst_len); + + REQUIRE(dst != nullptr); + REQUIRE(dst_len != 0); + + CHECK(std::string((const char*)dst, dst_len) == exp); +} + +#endif diff --git a/src/main/modules.cc b/src/main/modules.cc index a356fcd88..70489e09c 100644 --- a/src/main/modules.cc +++ b/src/main/modules.cc @@ -42,6 +42,7 @@ #include "framework/module.h" #include "host_tracker/host_tracker_module.h" #include "host_tracker/host_cache_module.h" +#include "js_norm/js_norm_module.h" #include "latency/latency_module.h" #include "log/messages.h" #include "managers/module_manager.h" @@ -2117,6 +2118,7 @@ void module_init() // these modules should be in ips policy ModuleManager::add_module(new EventFilterModule); + ModuleManager::add_module(new JSNormModule); ModuleManager::add_module(new RateFilterModule); ModuleManager::add_module(new SuppressModule); diff --git a/src/main/policy.cc b/src/main/policy.cc index 0f3b07190..284a54eae 100644 --- a/src/main/policy.cc +++ b/src/main/policy.cc @@ -29,6 +29,7 @@ #include "detection/detection_engine.h" #include "framework/file_policy.h" #include "framework/policy_selector.h" +#include "js_norm/js_config.h" #include "log/messages.h" #include "main/thread_config.h" #include "managers/inspector_manager.h" @@ -168,6 +169,7 @@ void InspectionPolicy::init(InspectionPolicy* other_inspection_policy) InspectionPolicy::~InspectionPolicy() { InspectorManager::delete_policy(this, cloned); + delete jsn_config; } void InspectionPolicy::configure() diff --git a/src/main/policy.h b/src/main/policy.h index c38a0f049..d6cf1ce65 100644 --- a/src/main/policy.h +++ b/src/main/policy.h @@ -51,6 +51,7 @@ struct SnortConfig; struct _daq_flow_stats; struct _daq_pkt_hdr; +struct JSNormConfig; struct PortTable; struct vartable_t; struct sfip_var_t; @@ -156,6 +157,8 @@ public: snort::DataBus dbus; bool cloned; + JSNormConfig* jsn_config = nullptr; + private: void init(InspectionPolicy* old_inspection_policy); }; diff --git a/src/pub_sub/test/pub_sub_http_request_body_event_test.cc b/src/pub_sub/test/pub_sub_http_request_body_event_test.cc index c7e4e7ccc..098e0b40b 100644 --- a/src/pub_sub/test/pub_sub_http_request_body_event_test.cc +++ b/src/pub_sub/test/pub_sub_http_request_body_event_test.cc @@ -55,7 +55,6 @@ void HttpMsgBody::publish() {} void HttpMsgBody::do_file_processing(const Field&) {} void HttpMsgBody::do_utf_decoding(const Field&, Field&) {} void HttpMsgBody::do_file_decompression(const Field&, Field&) {} -void HttpMsgBody::do_enhanced_js_normalization(const Field&, Field&) {} void HttpMsgBody::clean_partial(uint32_t&, uint32_t&, uint8_t*&, uint32_t&) {} void HttpMsgBody::bookkeeping_regular_flush(uint32_t&, uint8_t*&, uint32_t&, int32_t) {} bool HttpMsgBody::run_detection(snort::Packet*) { return true; } diff --git a/src/service_inspectors/http_inspect/dev_notes_js_norm.txt b/src/service_inspectors/http_inspect/dev_notes_js_norm.txt index 3d9b905c7..42a88673d 100644 --- a/src/service_inspectors/http_inspect/dev_notes_js_norm.txt +++ b/src/service_inspectors/http_inspect/dev_notes_js_norm.txt @@ -1,6 +1,15 @@ HttpJsNorm class serves as a script Normalizer, and currently has two implementations: the Legacy Normalizer and the Enhanced Normalizer. +In NHI, there are two JSNorm extensions: + * HttpInlineJSNorm, processes content of HTML script tags. + * HttpExternalJSNorm, processes payload with JavaScript MIME type. + +Normalization context is per transaction. It is created once js_data calls for normalized JS data, +and is deleted once transaction ends. Partial inspections feed data incrementally to JS Normalizer, +but the output is accumulated and concatenated in the right way, presenting more comprehensive block +of data to Detection. + During message body analysis the Enhanced Normalizer does one of the following: 1. If Content-Type says its an external script then Normalizer processes the whole message body as a script text. @@ -8,118 +17,7 @@ During message body analysis the Enhanced Normalizer does one of the following: subsequent bytes in a stream mode, until it finds a closing tag. It proceeds and scans the entire message body for inline scripts. -Enhanced Normalizer is a stateful JavaScript whitespace and identifiers normalizer. -Normalizer will remove all extraneous whitespace and newlines, keeping a single space where -syntactically necessary. Comments will be removed, but contents of string literals will -be kept intact. Any string literals, added by the plus operator, -will be concatenated. This also works for functions that result in string -literals. Semicolons will be inserted, if not already present, according to ECMAScript -automatic semicolon insertion rules. - -All JavaScript identifier names, except those from the ident_ignore or prop_ignore lists, -will be substituted with unified names in the following format: var_0000 -> var_ffff. -The number of unique identifiers available is 65536 names per HTTP transaction. If Normalizer -overruns the configured limit, built-in alert is generated. - -A config option to set the limit manually: - - * http_inspect.js_norm_identifier_depth. - -Identifiers from the ident_ignore list will be placed as is, without substitution. Starting with -the listed identifier, any chain of dot accessors, brackets and function calls will be kept -intact. - -For example: - - * console.log("bar") - * document.getElementById("id").text - * eval("script") - * foo["bar"] - -Ignored identifiers are configured via the following config option that accepts a list of object -and function names: - - * http_inspect.js_norm_ident_ignore = { 'console', 'document', 'eval', 'foo' } - -When a variable assignment that 'aliases' an identifier from the list is found, -the assignment will be tracked and subsequent occurrences of the variable will be -replaced with the stored value. This substitution will follow JavaScript variable scope -limits. - -For example: - - var a = console.log - a("hello") // will be substituted to 'console.log("hello")' - a.foo.bar() // will be normalized as 'console.log.foo.bar()'. When variable is 'de-aliased', - // following identifiers are not normalized, just like identifiers from ident_ignore - -When an object is created using a 'new' keyword, and the class/constructor is found in ident_ignore -list, the object will be tracked, and although its own identifier will be converted to normal form -its property and function calls will be kept intact, as with ignored identifiers. - -For example: - - var obj = new Array() - obj.insert(1,2,3) // will be normalized to var_0000.insert(1,2,3) - -For properties and methods of objects that can be created implicitly, there is a -js_norm_prop_ignore list. All names in the call chain after the first property or -method from the list has been occurred will not be normalized. - -Note that identifiers are normalized by name, i.e. an identifier and a property with the same name -will be normalized to the same value. However, the ignore lists act separately on identifiers -and properties. - -For example: - - http_inspect.js_norm_prop_ignore = { 'split' } - - in: "string".toUpperCase().split("").reverse().join(""); - out: "string".var_0000().split("").reverse().join(""); - -In addition to the scope tracking, JS Normalizer specifically tracks unescape-like JavaScript -functions (unescape, decodeURI, decodeURIComponent, String.fromCharCode, String.fromCodePoint). -This allows detection of unescape functions nested within other unescape functions, which is -a potential indicator of a multilevel obfuscation. The definition of a function call depends on -identifier substitution, so such identifiers must be included in the ignore list in -order to use this feature. After determining the unescape sequence, it is decoded into the -corresponding string, and the name of unescape function will not be present in the output. -Single-byte escape sequences within the string and template literals which are arguments of -unescape, decodeURI and decodeURIComponent functions will be decoded according to ISO/IEC 8859-1 -(Latin-1) charset. Except these cases, escape sequences and code points will be decoded to UTF-8 -format. - -For example: - - unescape('\u0062\u0061\u0072') -> 'bar' - decodeURI('%62%61%72') -> 'bar' - decodeURIComponent('\x62\x61\x72') -> 'bar' - String.fromCharCode(98, 0x0061, 0x72) -> 'bar' - String.fromCodePoint(65600, 65601, 0x10042) -> '𐁀𐁁𐁂' - -Supported formats follow - - \xXX - \uXXXX - \u{XXXX} - %XX - \uXX - %uXXXX - decimal code point - hexadecimal code point - -JS Normalizer is able to decode mixed encoding sequences. However, a built-in alert rises -in such case. - -JS Normalizer's syntax parser follows ECMA-262 standard. For various features, -tracking of variable scope and individual brackets is done in accordance to the standard. -Additionally, Normalizer enforces standard limits on HTML content in JavaScript: - * no nesting tags allowed, i.e. two opening tags in a row - * script closing tag is not allowed in string literals, block comments, regular expression literals, etc. - -If source JavaScript is syntactically incorrect (containing a bad token, brackets mismatch, -HTML-tags, etc) Normalizer fires corresponding built-in rule and abandons the current script, -though the already-processed data remains in the output buffer. +Also, js_data IPS option's buffer is a part of NHI processing in order to start the normalization. Enhanced Normalizer supports scripts over multiple PDUs. So, if the script is not ended, Normalizer's context is saved in HttpFlowData. diff --git a/src/service_inspectors/http_inspect/dev_notes_test_tool.txt b/src/service_inspectors/http_inspect/dev_notes_test_tool.txt index e78785f6a..c15ffa324 100644 --- a/src/service_inspectors/http_inspect/dev_notes_test_tool.txt +++ b/src/service_inspectors/http_inspect/dev_notes_test_tool.txt @@ -88,27 +88,3 @@ developer to get it right. The test tool is designed for single-threaded operation only. The test tool is only available when compiled with REG_TEST. - -NHI has some trace messages available. Trace options follow: - -* trace.module.http_inspect.js_proc turns on messages from script processing flow. -+ -Verbosity levels: -+ -1. Script opening tag detected (available in release build) -2. Attributes of detected script (available in release build) -3. Normalizer return code (available in release build) -4. Contexts management (debug build only) -5. Parser states (debug build only) -6. Input stream states (debug build only) - -* trace.module.http_inspect.js_dump dumps JavaScript data from processing layers. -+ -Verbosity levels: -+ -1. js_data buffer as it is being passed to detection (available in release build) -2. (no messages available currently) -3. Payload passed to Normalizer (available in release build) -4. Temporary buffer (debug build only) -5. Matched token (debug build only) -6. Identifier substitution (debug build only) diff --git a/src/service_inspectors/http_inspect/http_api.cc b/src/service_inspectors/http_inspect/http_api.cc index 93064df4c..89cf49e43 100644 --- a/src/service_inspectors/http_inspect/http_api.cc +++ b/src/service_inspectors/http_inspect/http_api.cc @@ -126,7 +126,6 @@ extern const BaseApi* ips_http_true_ip; extern const BaseApi* ips_http_uri; extern const BaseApi* ips_http_version; extern const BaseApi* ips_http_version_match; -extern const BaseApi* ips_js_data; #ifdef BUILDING_SO SO_PUBLIC const BaseApi* snort_plugins[] = @@ -161,7 +160,6 @@ const BaseApi* sin_http[] = ips_http_uri, ips_http_version, ips_http_version_match, - ips_js_data, nullptr }; diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h index 28f999130..9b2032dc5 100755 --- a/src/service_inspectors/http_inspect/http_enum.h +++ b/src/service_inspectors/http_inspect/http_enum.h @@ -22,12 +22,6 @@ #include -enum -{ - TRACE_JS_PROC = 0, - TRACE_JS_DUMP -}; - namespace HttpEnums { static const int MAX_OCTETS = 63780; @@ -69,7 +63,7 @@ enum PEG_COUNT { PEG_FLOW = 0, PEG_SCAN, PEG_REASSEMBLE, PEG_INSPECT, PEG_REQUES PEG_CONCURRENT_SESSIONS, PEG_MAX_CONCURRENT_SESSIONS, PEG_SCRIPT_DETECTION, PEG_PARTIAL_INSPECT, PEG_EXCESS_PARAMS, PEG_PARAMS, PEG_CUTOVERS, PEG_SSL_SEARCH_ABND_EARLY, PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_JS_INLINE, PEG_JS_EXTERNAL, - PEG_JS_BYTES, PEG_JS_IDENTIFIER, PEG_JS_IDENTIFIER_OVERFLOW, PEG_SKIP_MIME_ATTACH, PEG_COUNT_MAX }; + PEG_SKIP_MIME_ATTACH, PEG_COUNT_MAX }; // Result of scanning by splitter enum ScanResult { SCAN_NOT_FOUND, SCAN_NOT_FOUND_ACCELERATE, SCAN_FOUND, SCAN_FOUND_PIECE, @@ -281,18 +275,11 @@ enum Infraction INF_MULTIPLE_HOST_HDRS = 118, INF_HTTP2_SETTINGS = 119, INF_UPGRADE_HEADER_HTTP2 = 120, - INF_JS_BAD_TOKEN = 121, - INF_JS_OPENING_TAG = 122, - INF_JS_CLOSING_TAG = 123, INF_JS_CODE_IN_EXTERNAL = 124, INF_JS_SHORTENED_TAG = 125, - INF_JS_IDENTIFIER_OVERFLOW = 126, - INF_JS_BRACKET_NEST_OVERFLOW = 127, INF_CHUNK_OVER_MAXIMUM = 128, INF_LONG_HOST_VALUE = 129, INF_ACCEPT_ENCODING_CONSECUTIVE_COMMAS = 130, - INF_JS_DATA_LOST = 131, - INF_JS_SCOPE_NEST_OVERFLOW = 132, INF_INVALID_SUBVERSION = 133, INF_VERSION_0 = 134, INF_GZIP_FEXTRA = 135, @@ -423,16 +410,16 @@ enum EventSid EVENT_LONG_SCHEME = 262, EVENT_HTTP2_UPGRADE_REQUEST = 263, EVENT_HTTP2_UPGRADE_RESPONSE = 264, - EVENT_JS_BAD_TOKEN = 265, - EVENT_JS_OPENING_TAG = 266, - EVENT_JS_CLOSING_TAG = 267, + // EVENT_JS_BAD_TOKEN = 265, // Retired. Do not reuse this number + // EVENT_JS_OPENING_TAG = 266, // Retired. Do not reuse this number + // EVENT_JS_CLOSING_TAG = 267, // Retired. Do not reuse this number EVENT_JS_CODE_IN_EXTERNAL = 268, EVENT_JS_SHORTENED_TAG = 269, - EVENT_JS_IDENTIFIER_OVERFLOW = 270, - EVENT_JS_BRACKET_NEST_OVERFLOW = 271, + // EVENT_JS_IDENTIFIER_OVERFLOW = 270, // Retired. Do not reuse this number + // EVENT_JS_BRACKET_NEST_OVERFLOW = 271, // Retired. Do not reuse this number EVENT_ACCEPT_ENCODING_CONSECUTIVE_COMMAS = 272, - EVENT_JS_DATA_LOST = 273, - EVENT_JS_SCOPE_NEST_OVERFLOW = 274, + // EVENT_JS_DATA_LOST = 273, // Retired. Do not reuse this number + // EVENT_JS_SCOPE_NEST_OVERFLOW = 274, // Retired. Do not reuse this number EVENT_INVALID_SUBVERSION = 275, EVENT_VERSION_0 = 276, EVENT_VERSION_HIGHER_THAN_1 = 277, diff --git a/src/service_inspectors/http_inspect/http_flow_data.cc b/src/service_inspectors/http_inspect/http_flow_data.cc index 60f05c485..c8c7a210d 100644 --- a/src/service_inspectors/http_inspect/http_flow_data.cc +++ b/src/service_inspectors/http_inspect/http_flow_data.cc @@ -26,13 +26,11 @@ #include "decompress/file_decomp.h" #include "mime/file_mime_process.h" #include "service_inspectors/http2_inspect/http2_flow_data.h" -#include "trace/trace_api.h" -#include "utils/js_identifier_ctx.h" -#include "utils/js_normalizer.h" #include "http_cutter.h" #include "http_common.h" #include "http_enum.h" +#include "http_js_norm.h" #include "http_module.h" #include "http_msg_header.h" #include "http_msg_request.h" @@ -98,23 +96,6 @@ HttpFlowData::~HttpFlowData() if (HttpModule::get_peg_counts(PEG_CONCURRENT_SESSIONS) > 0) HttpModule::decrement_peg_counts(PEG_CONCURRENT_SESSIONS); -#ifndef UNIT_TEST_BUILD - if (js_ident_ctx) - { - delete js_ident_ctx; - - debug_log(4, http_trace, TRACE_JS_PROC, nullptr, - "js_ident_ctx deleted\n"); - } - if (js_normalizer) - { - delete js_normalizer; - - debug_log(4, http_trace, TRACE_JS_PROC, nullptr, - "js_normalizer deleted\n"); - } -#endif - for (int k=0; k <= 1; k++) { delete infractions[k]; @@ -134,6 +115,7 @@ HttpFlowData::~HttpFlowData() delete utf_state[k]; if (fd_state[k] != nullptr) File_Decomp_StopFree(fd_state[k]); + delete js_ctx[k]; } delete_pipeline(); @@ -236,74 +218,6 @@ void HttpFlowData::garbage_collect() } } -#ifndef UNIT_TEST_BUILD -void HttpFlowData::reset_js_data_idx() -{ - js_data_processed_idx = js_data_idx = 0; - js_data_lost_once = false; -} - -void HttpFlowData::reset_js_ident_ctx() -{ - if (js_ident_ctx) - { - js_ident_ctx->reset(); - debug_log(4, http_trace, TRACE_JS_PROC, nullptr, - "js_ident_ctx reset\n"); - } -} - -snort::JSNormalizer& HttpFlowData::acquire_js_ctx(const HttpParaList::JsNormParam& js_norm_param) -{ - if (js_normalizer) - return *js_normalizer; - - if (!js_ident_ctx) - { - js_ident_ctx = new JSIdentifierCtx(js_norm_param.js_identifier_depth, - js_norm_param.max_scope_depth, js_norm_param.ignored_ids, js_norm_param.ignored_props); - - debug_logf(4, http_trace, TRACE_JS_PROC, nullptr, - "js_ident_ctx created (ident_depth %d)\n", js_norm_param.js_identifier_depth); - } - - js_normalizer = new JSNormalizer(*js_ident_ctx, js_norm_param.js_norm_bytes_depth, - js_norm_param.max_template_nesting, js_norm_param.max_bracket_depth); - - debug_logf(4, http_trace, TRACE_JS_PROC, nullptr, - "js_normalizer created (norm_depth %zd, max_template_nesting %d)\n", - js_norm_param.js_norm_bytes_depth, js_norm_param.max_template_nesting); - - return *js_normalizer; -} - -bool HttpFlowData::sync_js_data_idx() -{ - bool data_missed = ((js_data_idx - js_data_processed_idx) > 1); - js_data_processed_idx = js_data_idx; - return data_missed; -} - -void HttpFlowData::release_js_ctx() -{ - js_continue = false; - - if (!js_normalizer) - return; - - delete js_normalizer; - js_normalizer = nullptr; - - debug_log(4, http_trace, TRACE_JS_PROC, nullptr, - "js_normalizer deleted\n"); -} -#else -void HttpFlowData::reset_js_ident_ctx() {} -snort::JSNormalizer& HttpFlowData::acquire_js_ctx(const HttpParaList::JsNormParam&) -{ return *js_normalizer; } -void HttpFlowData::release_js_ctx() {} -#endif - bool HttpFlowData::add_to_pipeline(HttpTransaction* latest) { if (pipeline == nullptr) diff --git a/src/service_inspectors/http_inspect/http_flow_data.h b/src/service_inspectors/http_inspect/http_flow_data.h index 203c08c5c..2ae92e715 100644 --- a/src/service_inspectors/http_inspect/http_flow_data.h +++ b/src/service_inspectors/http_inspect/http_flow_data.h @@ -36,15 +36,13 @@ #include "http_module.h" class HttpTransaction; -class HttpJsNorm; +class HttpJSNorm; class HttpMsgSection; class HttpCutter; class HttpQueryParser; -class JSIdentifierCtxBase; namespace snort { -class JSNormalizer; class MimeSession; } @@ -58,7 +56,7 @@ public: friend class HttpBodyCutter; friend class HttpInspect; - friend class HttpJsNorm; + friend class HttpJSNorm; friend class HttpMsgSection; friend class HttpMsgStart; friend class HttpMsgRequest; @@ -209,22 +207,7 @@ private: HttpTransaction* take_from_pipeline(); void delete_pipeline(); - bool js_data_lost_once = false; - uint32_t js_data_idx = 0; - uint32_t js_data_processed_idx = 0; - - // *** HttpJsNorm - JSIdentifierCtxBase* js_ident_ctx = nullptr; - snort::JSNormalizer* js_normalizer = nullptr; - bool js_continue = false; - bool js_built_in_event = false; - - void reset_js_data_idx(); - void reset_js_ident_ctx(); - snort::JSNormalizer& acquire_js_ctx(const HttpParaList::JsNormParam& js_norm_param); - void release_js_ctx(); - bool sync_js_data_idx(); - + HttpJSNorm* js_ctx[2] = { nullptr, nullptr }; bool cutover_on_clear = false; bool ssl_search_abandoned = false; diff --git a/src/service_inspectors/http_inspect/http_inspect.cc b/src/service_inspectors/http_inspect/http_inspect.cc index 677ed8c6e..1ca5d1864 100755 --- a/src/service_inspectors/http_inspect/http_inspect.cc +++ b/src/service_inspectors/http_inspect/http_inspect.cc @@ -140,7 +140,7 @@ HttpInspect::~HttpInspect() bool HttpInspect::configure(SnortConfig* ) { - params->js_norm_param.js_norm->configure(); + params->js_norm_param.configure(); params->mime_decode_conf->sync_all_depths(); return true; @@ -154,14 +154,6 @@ void HttpInspect::show(const SnortConfig*) const auto bad_chars = GetBadChars(params->uri_param.bad_characters); auto xff_headers = GetXFFHeaders(params->xff_headers); - std::string js_norm_ident_ignore; - for (auto s : params->js_norm_param.ignored_ids) - js_norm_ident_ignore += s + " "; - - std::string js_norm_prop_ignore; - for (auto s : params->js_norm_param.ignored_props) - js_norm_prop_ignore += s + " "; - ConfigLogger::log_limit("request_depth", params->request_depth, -1); ConfigLogger::log_limit("response_depth", params->response_depth, -1); ConfigLogger::log_flag("unzip", params->unzip); @@ -173,16 +165,8 @@ void HttpInspect::show(const SnortConfig*) const ConfigLogger::log_value("max_mime_attach", params->max_mime_attach); ConfigLogger::log_flag("script_detection", params->script_detection); ConfigLogger::log_flag("normalize_javascript", params->js_norm_param.normalize_javascript); - ConfigLogger::log_value("max_javascript_whitespaces", params->js_norm_param.max_javascript_whitespaces); - ConfigLogger::log_value("js_norm_bytes_depth", params->js_norm_param.js_norm_bytes_depth); - ConfigLogger::log_value("js_norm_identifier_depth", params->js_norm_param.js_identifier_depth); - ConfigLogger::log_value("js_norm_max_tmpl_nest", params->js_norm_param.max_template_nesting); - ConfigLogger::log_value("js_norm_max_bracket_depth", params->js_norm_param.max_bracket_depth); - ConfigLogger::log_value("js_norm_max_scope_depth", params->js_norm_param.max_scope_depth); - if (!js_norm_ident_ignore.empty()) - ConfigLogger::log_list("js_norm_ident_ignore", js_norm_ident_ignore.c_str()); - if (!js_norm_prop_ignore.empty()) - ConfigLogger::log_list("js_norm_prop_ignore", js_norm_prop_ignore.c_str()); + ConfigLogger::log_value("max_javascript_whitespaces", + params->js_norm_param.max_javascript_whitespaces); ConfigLogger::log_value("bad_characters", bad_chars.c_str()); ConfigLogger::log_value("ignore_unreserved", unreserved_chars.c_str()); ConfigLogger::log_flag("percent_u", params->uri_param.percent_u); @@ -244,6 +228,9 @@ bool HttpInspect::get_buf(InspectionBuffer::Type ibt, Packet* p, InspectionBuffe case InspectionBuffer::IBT_VBA: return get_buf(BUFFER_VBA_DATA, p, b); + case InspectionBuffer::IBT_JS_DATA: + return get_buf(BUFFER_JS_DATA, p, b); + default: assert(false); return false; @@ -352,7 +339,7 @@ void HttpInspect::set_hx_body_state(snort::Flow* flow, HttpCommon::SourceId sour bool HttpInspect::get_fp_buf(InspectionBuffer::Type ibt, Packet* p, InspectionBuffer& b) { - assert(ibt == InspectionBuffer::IBT_VBA); + assert(ibt == InspectionBuffer::IBT_VBA or ibt == InspectionBuffer::IBT_JS_DATA); if (get_latest_is(p) == PS_NONE) return false; diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc index 702669558..58b662148 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.cc +++ b/src/service_inspectors/http_inspect/http_js_norm.cc @@ -16,6 +16,7 @@ // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. //-------------------------------------------------------------------------- // http_js_norm.cc author Tom Peters +// http_js_norm.cc author Oleksandr Serhiienko #ifdef HAVE_CONFIG_H #include "config.h" @@ -23,98 +24,63 @@ #include "http_js_norm.h" +#include "js_norm/js_enum.h" +#include "js_norm/js_normalizer.h" #include "trace/trace_api.h" -#include "utils/js_normalizer.h" #include "utils/safec.h" #include "utils/util_jsnorm.h" -#include "http_common.h" -#include "http_enum.h" - using namespace HttpEnums; +using namespace jsn; using namespace snort; -static const char* jsret_codes[] = -{ - "end of stream", - "script ended", - "script continues", - "closing tag", - "bad token", - "identifier overflow", - "template nesting overflow", - "bracket nesting overflow", - "scope nesting overflow", - "wrong closing symbol", - "ended in inner scope", - "unknown" -}; +extern THREAD_LOCAL const snort::Trace* js_trace; -static const char* ret2str(JSTokenizer::JSRet ret) -{ - assert(ret < JSTokenizer::JSRet::MAX); - ret = ret < JSTokenizer::JSRet::MAX ? ret : JSTokenizer::JSRet::MAX; - return jsret_codes[ret]; -} +enum AttrId { AID_OPEN, AID_SLASH, AID_GT, AID_SRC, AID_JS, AID_NON_JS, AID_ECMA, AID_VB }; -static inline JSTokenizer::JSRet js_normalize(JSNormalizer& ctx, const Packet* current_packet, - const char* const end, const char*& ptr, bool external_script) +struct MatchContext { - trace_logf(3, http_trace, TRACE_JS_DUMP, current_packet, - "original[%zu]: %.*s\n", end - ptr, static_cast(end - ptr), ptr); - - auto ret = ctx.normalize(ptr, end - ptr, external_script); - auto src_next = ctx.get_src_next(); + const uint8_t* next = nullptr; + bool is_javascript = true; + bool is_external = false; + bool is_shortened = false; +}; - trace_logf(3, http_trace, TRACE_JS_PROC, current_packet, - "normalizer returned with %d '%s'\n", ret, ret2str(ret)); +SearchTool* js_create_mpse_open_tag() +{ + constexpr const char* otag_start = " ptr) - HttpModule::increment_peg_counts(PEG_JS_BYTES, src_next - ptr); - else - src_next = end; // Normalizer has failed, thus aborting the remaining input + mpse->add(otag_start, strlen(otag_start), AID_OPEN); - ptr = src_next; + mpse->prep(); - return ret; + return mpse; } -HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, - const HttpParaList::JsNormParam& js_norm_param_) : - uri_param(uri_param_), - js_norm_param(js_norm_param_), - mpse_otag(nullptr), - mpse_attr(nullptr), - mpse_type(nullptr) -{} - -HttpJsNorm::~HttpJsNorm() +SearchTool* js_create_mpse_tag_type() { - delete mpse_otag; - delete mpse_attr; - delete mpse_type; -} + constexpr const char* attr_js = "JAVASCRIPT"; + constexpr const char* attr_ecma = "ECMASCRIPT"; + constexpr const char* attr_vb = "VBSCRIPT"; + SearchTool* mpse = new SearchTool; -void HttpJsNorm::configure() -{ - if (configure_once) - return; + mpse->add(attr_js, strlen(attr_js), AID_JS); + mpse->add(attr_ecma, strlen(attr_ecma), AID_ECMA); + mpse->add(attr_vb, strlen(attr_vb), AID_VB); - mpse_otag = new SearchTool; - mpse_attr = new SearchTool; - mpse_type = new SearchTool; + mpse->prep(); - static constexpr const char* otag_start = "add(otag_start, strlen(otag_start), 0); + SearchTool* mpse = new SearchTool; - mpse_attr->add(attr_slash, strlen(attr_slash), AID_SLASH); - mpse_attr->add(attr_gt, strlen(attr_gt), AID_GT); - mpse_attr->add(attr_src, strlen(attr_src), AID_SRC); + mpse->add(attr_slash, strlen(attr_slash), AID_SLASH); + mpse->add(attr_gt, strlen(attr_gt), AID_GT); + mpse->add(attr_src, strlen(attr_src), AID_SRC); for (unsigned i = 0; i < attrs_js_size; ++i) - mpse_attr->add(attrs_js[i], strlen(attrs_js[i]), AID_JS); + mpse->add(attrs_js[i], strlen(attrs_js[i]), AID_JS); for (unsigned i = 0; i < attrs_non_js_size; ++i) - mpse_attr->add(attrs_non_js[i], strlen(attrs_non_js[i]), AID_NON_JS); + mpse->add(attrs_non_js[i], strlen(attrs_non_js[i]), AID_NON_JS); - mpse_type->add(attr_js, strlen(attr_js), AID_JS); - mpse_type->add(attr_ecma, strlen(attr_ecma), AID_ECMA); - mpse_type->add(attr_vb, strlen(attr_vb), AID_VB); + mpse->prep(); - mpse_otag->prep(); - mpse_attr->prep(); - mpse_type->prep(); - - configure_once = true; + return mpse; } -void HttpJsNorm::do_external(const Field& input, Field& output, - HttpInfractions* infractions, HttpFlowData* ssn, bool final_portion) const +static int match_script(void*, void*, int index, void* index_ptr, void*) { - if (ssn->js_built_in_event) - return; - const Packet* current_packet = DetectionEngine::get_current_packet(); - const char* ptr = (const char*)input.start(); - const char* const end = ptr + input.length(); - - HttpEventGen* events = ssn->events[HttpCommon::SRC_SERVER]; - - if (!alive_ctx(ssn)) - { - HttpModule::increment_peg_counts(PEG_JS_EXTERNAL); - trace_logf(2, http_trace, TRACE_JS_PROC, current_packet, - "script starts\n"); - } - else - trace_logf(2, http_trace, TRACE_JS_PROC, current_packet, - "script continues\n"); - - auto& js_ctx = ssn->acquire_js_ctx(js_norm_param); - - while (ptr < end) - { - trace_logf(1, http_trace, TRACE_JS_PROC, current_packet, - "external script at %zd offset\n", ptr - (const char*)input.start()); - - auto ret = js_normalize(js_ctx, current_packet, end, ptr, true); - - switch (ret) - { - case JSTokenizer::EOS: - case JSTokenizer::SCRIPT_CONTINUE: - break; - case JSTokenizer::SCRIPT_ENDED: - case JSTokenizer::CLOSING_TAG: - assert(false); // should not be present in external - break; - case JSTokenizer::BAD_TOKEN: - case JSTokenizer::WRONG_CLOSING_SYMBOL: - case JSTokenizer::ENDED_IN_INNER_SCOPE: - *infractions += INF_JS_BAD_TOKEN; - events->create_event(EVENT_JS_BAD_TOKEN); - ssn->js_built_in_event = true; - break; - case JSTokenizer::IDENTIFIER_OVERFLOW: - HttpModule::increment_peg_counts(PEG_JS_IDENTIFIER_OVERFLOW); - *infractions += INF_JS_IDENTIFIER_OVERFLOW; - events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW); - ssn->js_built_in_event = true; - break; - case JSTokenizer::TEMPLATE_NESTING_OVERFLOW: - case JSTokenizer::BRACKET_NESTING_OVERFLOW: - *infractions += INF_JS_BRACKET_NEST_OVERFLOW; - events->create_event(EVENT_JS_BRACKET_NEST_OVERFLOW); - ssn->js_built_in_event = true; - break; - case JSTokenizer::SCOPE_NESTING_OVERFLOW: - *infractions += INF_JS_SCOPE_NEST_OVERFLOW; - events->create_event(EVENT_JS_SCOPE_NEST_OVERFLOW); - ssn->js_built_in_event = true; - break; - default: - assert(false); - break; - } - - if (js_ctx.is_unescape_nesting_seen()) - { - *infractions += INF_JS_OBFUSCATION_EXCD; - events->create_event(EVENT_JS_OBFUSCATION_EXCD); - } - if (js_ctx.is_mixed_encoding_seen()) - { - *infractions += INF_MIXED_ENCODINGS; - events->create_event(EVENT_MIXED_ENCODINGS); - } - if (js_ctx.is_closing_tag_seen()) - { - *infractions += INF_JS_CLOSING_TAG; - events->create_event(EVENT_JS_CLOSING_TAG); - } - if (js_ctx.is_buffer_adjusted()) - output.set_accumulation(true); - - if (ssn->js_built_in_event) - break; - } - - debug_logf(4, http_trace, TRACE_JS_PROC, current_packet, - "input data was %s\n", final_portion ? "last one in PDU" : "a part of PDU"); - - uint32_t data_len = js_ctx.script_size(); - - if (data_len) - { - const char* data = final_portion ? js_ctx.take_script() : js_ctx.get_script(); - - if (data) - { - trace_logf(1, http_trace, TRACE_JS_DUMP, current_packet, - "js_data[%u]: %.*s\n", data_len, data_len, data); - - output.set(data_len, (const uint8_t*)data, final_portion); - } - } + static constexpr int script_start_length = sizeof("events[HttpCommon::SRC_SERVER]; +static int match_otag(void*, void*, int index, void* ptr, void*) +{ + *(uint8_t**)ptr += index; + return 1; +} - bool script_continue = ssn->js_continue; - bool script_external = false; +static int match_attr(void* pid, void*, int index, void* sctx, void*) +{ + MatchContext* ctx = (MatchContext*)sctx; + AttrId id = (AttrId)(uintptr_t)pid; + const char* c; - while (ptr < end) + switch (id) { - if (!script_continue) - { - if (!mpse_otag->find(ptr, end - ptr, match_otag, false, &ptr)) - break; - if (ptr >= end) - break; - - MatchContext sctx = {ptr, true, false, false}; - - if (ptr[0] == '>') - ptr++; - else - { - if (!mpse_attr->find(ptr, end - ptr, match_attr, false, &sctx) || ptr == sctx.next) - break; // the opening tag never ends - ptr = sctx.next; - } - - trace_logf(1, http_trace, TRACE_JS_PROC, current_packet, - "opening tag at %zd offset\n", ptr - (const char*)input.start()); - - trace_logf(2, http_trace, TRACE_JS_PROC, current_packet, - "script attributes [%s, %s, %s]\n", - sctx.is_shortened ? "shortened form" : "full form", - sctx.is_javascript ? "JavaScript type" : "unknown type", - sctx.is_external ? "external source" : "inline"); - - if (sctx.is_shortened) - { - *infractions += INF_JS_SHORTENED_TAG; - events->create_event(EVENT_JS_SHORTENED_TAG); - continue; - } - - if (!sctx.is_javascript) - continue; - - script_external = sctx.is_external; - - // script found - if (!script_external) - HttpModule::increment_peg_counts(PEG_JS_INLINE); - } - - auto& js_ctx = ssn->acquire_js_ctx(js_norm_param); - auto output_size_before = js_ctx.script_size(); - - auto ret = js_normalize(js_ctx, current_packet, end, ptr, false); - - switch (ret) - { - case JSTokenizer::EOS: - js_ctx.reset_depth(); - break; - case JSTokenizer::SCRIPT_ENDED: - break; - case JSTokenizer::SCRIPT_CONTINUE: - break; - case JSTokenizer::CLOSING_TAG: - *infractions += INF_JS_CLOSING_TAG; - events->create_event(EVENT_JS_CLOSING_TAG); - break; - case JSTokenizer::BAD_TOKEN: - case JSTokenizer::WRONG_CLOSING_SYMBOL: - case JSTokenizer::ENDED_IN_INNER_SCOPE: - *infractions += INF_JS_BAD_TOKEN; - events->create_event(EVENT_JS_BAD_TOKEN); - break; - case JSTokenizer::IDENTIFIER_OVERFLOW: - HttpModule::increment_peg_counts(PEG_JS_IDENTIFIER_OVERFLOW); - *infractions += INF_JS_IDENTIFIER_OVERFLOW; - events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW); - break; - case JSTokenizer::TEMPLATE_NESTING_OVERFLOW: - case JSTokenizer::BRACKET_NESTING_OVERFLOW: - *infractions += INF_JS_BRACKET_NEST_OVERFLOW; - events->create_event(EVENT_JS_BRACKET_NEST_OVERFLOW); - break; - case JSTokenizer::SCOPE_NESTING_OVERFLOW: - *infractions += INF_JS_SCOPE_NEST_OVERFLOW; - events->create_event(EVENT_JS_SCOPE_NEST_OVERFLOW); - break; - default: - assert(false); - break; - } - - if (script_external && output_size_before != js_ctx.script_size()) - { - *infractions += INF_JS_CODE_IN_EXTERNAL; - events->create_event(EVENT_JS_CODE_IN_EXTERNAL); - } - if (js_ctx.is_unescape_nesting_seen()) - { - *infractions += INF_JS_OBFUSCATION_EXCD; - events->create_event(EVENT_JS_OBFUSCATION_EXCD); - } - if (js_ctx.is_mixed_encoding_seen()) + case AID_SLASH: + if (*(ctx->next + index) == '>') { - *infractions += INF_MIXED_ENCODINGS; - events->create_event(EVENT_MIXED_ENCODINGS); + ctx->is_shortened = true; + ctx->next += index; + return 1; } - if (js_ctx.is_opening_tag_seen()) + else { - *infractions += INF_JS_OPENING_TAG; - events->create_event(EVENT_JS_OPENING_TAG); + ctx->is_shortened = false; + return 0; } - if (js_ctx.is_buffer_adjusted()) - output.set_accumulation(true); - - script_continue = ret == JSTokenizer::SCRIPT_CONTINUE; - } - - ssn->js_continue = script_continue; - - if (!alive_ctx(ssn)) - return; - debug_logf(4, http_trace, TRACE_JS_PROC, current_packet, - "input data was %s\n", final_portion ? "last one in PDU" : "a part of PDU"); + case AID_GT: + ctx->next += index; + return 1; - auto js_ctx = ssn->js_normalizer; - uint32_t data_len = js_ctx->script_size(); + case AID_SRC: + c = (const char*)ctx->next + index; + while (*c == ' ') c++; + ctx->is_external = ctx->is_external || *c == '='; + return 0; - if (data_len) - { - const char* data = final_portion ? js_ctx->take_script() : js_ctx->get_script(); + case AID_JS: + ctx->is_javascript = true; + return 0; - if (data) - { - trace_logf(1, http_trace, TRACE_JS_DUMP, current_packet, - "js_data[%u]: %.*s\n", data_len, data_len, data); + case AID_NON_JS: + ctx->is_javascript = false; + return 0; - output.set(data_len, (const uint8_t*)data, final_portion); - } + default: + assert(false); + return 1; } - - if (!script_continue && final_portion) - ssn->release_js_ctx(); } -void HttpJsNorm::do_legacy(const Field& input, Field& output, HttpInfractions* infractions, - HttpEventGen* events, int max_javascript_whitespaces) const +void js_normalize(const Field& input, Field& output, + const HttpParaList* params, HttpInfractions* inf, HttpEventGen* events) { + assert(params); + assert(inf); + assert(events); + bool js_present = false; int index = 0; const char* ptr = (const char*)input.start(); const char* const end = ptr + input.length(); + auto mpse_otag = params->js_norm_param.mpse_otag; + auto mpse_type = params->js_norm_param.mpse_type; + auto& uri_param = params->uri_param; JSState js; - js.allowed_spaces = max_javascript_whitespaces; + js.allowed_spaces = params->js_norm_param.max_javascript_whitespaces; js.allowed_levels = MAX_ALLOWED_OBFUSCATION; js.alerts = 0; @@ -448,7 +216,7 @@ void HttpJsNorm::do_legacy(const Field& input, Field& output, HttpInfractions* i int mindex; // Search for beginning of a javascript - if (mpse_otag->find(ptr, end-ptr, search_js_found, false, &mindex) > 0) + if (mpse_otag->find(ptr, end-ptr, match_script, false, &mindex) > 0) { const char* js_start = ptr + mindex; const char* const angle_bracket = @@ -461,7 +229,7 @@ void HttpJsNorm::do_legacy(const Field& input, Field& output, HttpInfractions* i { int mid; const int script_found = mpse_type->find( - js_start, (angle_bracket-js_start), search_html_found, false, &mid); + js_start, (angle_bracket-js_start), match_html, false, &mid); js_start = angle_bracket + 1; if (script_found > 0) @@ -518,17 +286,17 @@ void HttpJsNorm::do_legacy(const Field& input, Field& output, HttpInfractions* i { if (js.alerts & ALERT_LEVELS_EXCEEDED) { - *infractions += INF_JS_OBFUSCATION_EXCD; + *inf += INF_JS_OBFUSCATION_EXCD; events->create_event(EVENT_JS_OBFUSCATION_EXCD); } if (js.alerts & ALERT_SPACES_EXCEEDED) { - *infractions += INF_JS_EXCESS_WS; + *inf += INF_JS_EXCESS_WS; events->create_event(EVENT_JS_EXCESS_WS); } if (js.alerts & ALERT_MIXED_ENCODINGS) { - *infractions += INF_MIXED_ENCODINGS; + *inf += INF_MIXED_ENCODINGS; events->create_event(EVENT_MIXED_ENCODINGS); } } @@ -541,68 +309,140 @@ void HttpJsNorm::do_legacy(const Field& input, Field& output, HttpInfractions* i } } -int HttpJsNorm::search_js_found(void*, void*, int index, void* index_ptr, void*) +void HttpJSNorm::flush_data(const void*& data, size_t& len) { - static constexpr int script_start_length = sizeof("script_size(); + data = jsn_ctx->take_script(); } -int HttpJsNorm::search_html_found(void* id, void*, int, void* id_ptr, void*) +bool HttpInlineJSNorm::pre_proc() { - *((int*) id_ptr) = (int)(uintptr_t)id; - return 1; + assert(mpse_otag); + assert(mpse_attr); + assert(http_events); + assert(infractions); + + if ((*infractions & INF_UNKNOWN_ENCODING) or (*infractions & INF_UNSUPPORTED_ENCODING)) + return false; + + if (src_ptr >= src_end) + return false; + + const Packet* packet = DetectionEngine::get_current_packet(); + + if (!script_continue) + { + while (true) + { + if (!mpse_otag->find((const char*)src_ptr, src_end - src_ptr, match_otag, false, &src_ptr) + || src_ptr >= src_end) + { + return false; + } + + MatchContext sctx = {src_ptr, true, false, false}; + + if (!mpse_attr->find((const char*)src_ptr, src_end - src_ptr, match_attr, false, &sctx) + || src_ptr == sctx.next || sctx.next >= src_end) + { + return false; + } + + src_ptr = sctx.next; + + trace_logf(1, js_trace, TRACE_PROC, packet, + "opening tag at %zd offset\n", src_ptr - page_start); + + trace_logf(2, js_trace, TRACE_PROC, packet, + "script attributes [%s, %s, %s]\n", + sctx.is_shortened ? "shortened form" : "full form", + sctx.is_javascript ? "JavaScript type" : "unknown type", + sctx.is_external ? "external source" : "inline"); + + if (sctx.is_shortened) + { + *infractions += INF_JS_SHORTENED_TAG; + http_events->create_event(EVENT_JS_SHORTENED_TAG); + continue; + } + + if (!sctx.is_javascript) + continue; + + ext_ref_type = sctx.is_external; + + break; + } + + if (!ext_ref_type) + HttpModule::increment_peg_counts(PEG_JS_INLINE); + } + + ext_script_type = false; + output_size = jsn_ctx->script_size(); + + trace_logf(3, js_trace, TRACE_DUMP, packet, + "original[%zu]: %.*s\n", src_end - src_ptr, (int)(src_end - src_ptr), src_ptr); + + return true; } -int HttpJsNorm::match_otag(void*, void*, int index, void* ptr, void*) +bool HttpInlineJSNorm::post_proc(int ret) { - *(char**)ptr += index; - return 1; + trace_logf(3, js_trace, TRACE_PROC, DetectionEngine::get_current_packet(), + "normalizer returned with %d '%s'\n", ret, jsn::ret2str(ret)); + + assert(http_events); + assert(infractions); + + if (ext_ref_type && output_size != jsn_ctx->script_size()) + { + *infractions += INF_JS_CODE_IN_EXTERNAL; + http_events->create_event(EVENT_JS_CODE_IN_EXTERNAL); + } + + script_continue = ret == (int)jsn::JSTokenizer::SCRIPT_CONTINUE; + + if (!script_continue) + jsn_ctx->reset_depth(); + + JSNorm::post_proc(ret); + + return true; // reuse context } -int HttpJsNorm::match_attr(void* pid, void*, int index, void* sctx, void*) +bool HttpExternalJSNorm::pre_proc() { - MatchContext* ctx = (MatchContext*)sctx; - AttrId id = (AttrId)(uintptr_t)pid; - const char* c; + if (src_ptr >= src_end) + return false; - switch (id) + const Packet* packet = DetectionEngine::get_current_packet(); + + if (!ext_script_type) { - case AID_SLASH: - if (*(ctx->next + index) == '>') - { - ctx->is_shortened = true; - ctx->next += index; - return 1; - } - else - { - ctx->is_shortened = false; - return 0; - } + HttpModule::increment_peg_counts(PEG_JS_EXTERNAL); + trace_logf(1, js_trace, TRACE_PROC, packet, + "external script starts\n"); + ext_script_type = true; + } + else + { + trace_logf(2, js_trace, TRACE_PROC, packet, + "script continues\n"); + } - case AID_GT: - ctx->next += index; - return 1; + trace_logf(3, js_trace, TRACE_DUMP, packet, + "original[%zu]: %.*s\n", src_end - src_ptr, (int)(src_end - src_ptr), src_ptr); - case AID_SRC: - c = ctx->next + index; - while (*c == ' ') c++; - ctx->is_external = ctx->is_external || *c == '='; - return 0; + return true; +} - case AID_JS: - ctx->is_javascript = true; - return 0; +bool HttpExternalJSNorm::post_proc(int ret) +{ + trace_logf(3, js_trace, TRACE_PROC, DetectionEngine::get_current_packet(), + "normalizer returned with %d '%s'\n", ret, jsn::ret2str(ret)); - case AID_NON_JS: - ctx->is_javascript = false; - return 0; + script_continue = ret == (int)jsn::JSTokenizer::SCRIPT_CONTINUE; - default: - assert(false); - ctx->is_external = false; - ctx->is_javascript = false; - return 1; - } + return JSNorm::post_proc(ret); } diff --git a/src/service_inspectors/http_inspect/http_js_norm.h b/src/service_inspectors/http_inspect/http_js_norm.h index 90094afc3..ee61c14e0 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.h +++ b/src/service_inspectors/http_inspect/http_js_norm.h @@ -16,12 +16,14 @@ // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. //-------------------------------------------------------------------------- // http_js_norm.h author Tom Peters +// http_js_norm.h author Oleksandr Serhiienko #ifndef HTTP_JS_NORM_H #define HTTP_JS_NORM_H #include +#include "js_norm/js_norm.h" #include "search_engines/search_tool.h" #include "http_field.h" @@ -29,50 +31,61 @@ #include "http_event.h" #include "http_module.h" -//------------------------------------------------------------------------- -// HttpJsNorm class -//------------------------------------------------------------------------- +snort::SearchTool* js_create_mpse_open_tag(); +snort::SearchTool* js_create_mpse_tag_type(); +snort::SearchTool* js_create_mpse_tag_attr(); -class HttpJsNorm +void js_normalize(const Field& input, Field& output, const HttpParaList*, HttpInfractions*, HttpEventGen*); + +class HttpJSNorm : public snort::JSNorm { public: - HttpJsNorm(const HttpParaList::UriParam& uri_param_, - const HttpParaList::JsNormParam& js_norm_param_); - ~HttpJsNorm(); + HttpJSNorm(JSNormConfig* jsn_config) : snort::JSNorm(jsn_config) {} - void do_legacy(const Field& input, Field& output, HttpInfractions*, HttpEventGen*, - int max_javascript_whitespaces) const; - void do_inline(const Field& input, Field& output, HttpInfractions*, HttpFlowData*, bool) const; - void do_external(const Field& input, Field& output, HttpInfractions*, HttpFlowData*, bool) const; + void flush_data(const void*&, size_t&); - void configure(); + void link(const void* page, HttpEventGen* http_events_, HttpInfractions* infs) + { page_start = (const uint8_t*)page; http_events = http_events_; infractions = infs; } -private: - enum AttrId { AID_SLASH, AID_GT, AID_SRC, AID_JS, AID_NON_JS, AID_ECMA, AID_VB }; + uint64_t get_trans_num() const + { return trans_num; } - struct MatchContext - { - const char* next; - bool is_javascript; - bool is_external; - bool is_shortened; - }; +protected: + const uint8_t* page_start = nullptr; + HttpEventGen* http_events = nullptr; + HttpInfractions* infractions = nullptr; + uint64_t trans_num = 0; + bool script_continue = false; +}; + +class HttpInlineJSNorm : public HttpJSNorm +{ +public: + HttpInlineJSNorm(JSNormConfig* jsn_config, uint64_t tid, snort::SearchTool* mpse_open_tag, + snort::SearchTool* mpse_tag_attr) : + HttpJSNorm(jsn_config), mpse_otag(mpse_open_tag), mpse_attr(mpse_tag_attr), output_size(0), ext_ref_type(false) + { trans_num = tid; } - const HttpParaList::UriParam& uri_param; - const HttpParaList::JsNormParam& js_norm_param; - bool configure_once = false; +protected: + bool pre_proc() override; + bool post_proc(int) override; +private: snort::SearchTool* mpse_otag; snort::SearchTool* mpse_attr; - snort::SearchTool* mpse_type; // legacy only + size_t output_size; + bool ext_ref_type; +}; - static int search_js_found(void*, void*, int index, void*, void*); // legacy only - static int search_html_found(void* id, void*, int, void*, void*); // legacy only - static int match_otag(void*, void*, int, void*, void*); - static int match_attr(void*, void*, int, void*, void*); +class HttpExternalJSNorm : public HttpJSNorm +{ +public: + HttpExternalJSNorm(JSNormConfig* jsn_config, uint64_t tid) : HttpJSNorm(jsn_config) + { trans_num = tid; } - bool alive_ctx(const HttpFlowData* ssn) const - { return ssn->js_normalizer; } +protected: + bool pre_proc() override; + bool post_proc(int) override; }; #endif diff --git a/src/service_inspectors/http_inspect/http_module.cc b/src/service_inspectors/http_inspect/http_module.cc index 28153a072..125c0630f 100755 --- a/src/service_inspectors/http_inspect/http_module.cc +++ b/src/service_inspectors/http_inspect/http_module.cc @@ -24,7 +24,6 @@ #include "http_module.h" #include "log/messages.h" -#include "trace/trace.h" #include "http_enum.h" #include "http_js_norm.h" @@ -45,18 +44,6 @@ HttpModule::~HttpModule() LiteralSearch::cleanup(script_detection_handle); } -static const Parameter js_norm_ident_ignore_param[] = -{ - { "ident_name", Parameter::PT_STRING, nullptr, nullptr, "name of the identifier to ignore" }, - { nullptr, Parameter::PT_MAX, nullptr, nullptr, nullptr } -}; - -static const Parameter js_norm_prop_ignore_param[] = -{ - { "prop_name", Parameter::PT_STRING, nullptr, nullptr, "name of the object property to ignore" }, - { nullptr, Parameter::PT_MAX, nullptr, nullptr, nullptr } -}; - const Parameter HttpModule::http_params[] = { { "request_depth", Parameter::PT_INT, "-1:max53", "-1", @@ -107,29 +94,6 @@ const Parameter HttpModule::http_params[] = { "normalize_javascript", Parameter::PT_BOOL, nullptr, "false", "use legacy normalizer to normalize JavaScript in response bodies" }, - { "js_norm_bytes_depth", Parameter::PT_INT, "-1:max53", "-1", - "number of input JavaScript bytes to normalize (-1 unlimited)" }, - - // range of accepted identifier names is (var_0000:var_ffff), so the max is 2^16 - { "js_norm_identifier_depth", Parameter::PT_INT, "0:65536", "65536", - "max number of unique JavaScript identifiers to normalize" }, - - { "js_norm_max_tmpl_nest", Parameter::PT_INT, "0:255", "32", - "maximum depth of template literal nesting that enhanced javascript normalizer " - "will process" }, - - { "js_norm_max_bracket_depth", Parameter::PT_INT, "1:65535", "256", - "maximum depth of bracket nesting that enhanced JavaScript normalizer will process" }, - - { "js_norm_max_scope_depth", Parameter::PT_INT, "1:65535", "256", - "maximum depth of scope nesting that enhanced JavaScript normalizer will process" }, - - { "js_norm_ident_ignore", Parameter::PT_LIST, js_norm_ident_ignore_param, nullptr, - "list of JavaScript ignored identifiers which will not be normalized" }, - - { "js_norm_prop_ignore", Parameter::PT_LIST, js_norm_prop_ignore_param, nullptr, - "list of JavaScript ignored object properties which will not be normalized" }, - { "max_javascript_whitespaces", Parameter::PT_INT, "1:65535", "200", "maximum consecutive whitespaces allowed within the JavaScript obfuscated data" }, @@ -217,25 +181,6 @@ ProfileStats* HttpModule::get_profile() const THREAD_LOCAL PegCount HttpModule::peg_counts[PEG_COUNT_MAX] = { }; -THREAD_LOCAL const Trace* http_trace = nullptr; - -static const TraceOption http_trace_options[] = -{ - { "js_proc", TRACE_JS_PROC, "enable JavaScript processing logging" }, - { "js_dump", TRACE_JS_DUMP, "enable JavaScript data logging" }, - { nullptr, 0, nullptr } -}; - -void HttpModule::set_trace(const Trace* trace) const -{ - http_trace = trace; -} - -const TraceOption* HttpModule::get_trace_options() const -{ - return http_trace_options; -} - bool HttpModule::begin(const char* fqn, int, SnortConfig*) { if (strcmp(fqn, "http_inspect")) @@ -320,34 +265,6 @@ bool HttpModule::set(const char*, Value& val, SnortConfig*) { params->js_norm_param.normalize_javascript = val.get_bool(); } - else if (val.is("js_norm_identifier_depth")) - { - params->js_norm_param.js_identifier_depth = val.get_int32(); - } - else if (val.is("js_norm_bytes_depth")) - { - params->js_norm_param.js_norm_bytes_depth = val.get_int64(); - } - else if (val.is("js_norm_max_tmpl_nest")) - { - params->js_norm_param.max_template_nesting = val.get_uint8(); - } - else if (val.is("js_norm_max_bracket_depth")) - { - params->js_norm_param.max_bracket_depth = val.get_uint32(); - } - else if (val.is("js_norm_max_scope_depth")) - { - params->js_norm_param.max_scope_depth = val.get_uint32(); - } - else if (val.is("ident_name")) - { - params->js_norm_param.ignored_ids.insert(val.get_string()); - } - else if (val.is("prop_name")) - { - params->js_norm_param.ignored_props.insert(val.get_string()); - } else if (val.is("max_javascript_whitespaces")) { params->js_norm_param.max_javascript_whitespaces = val.get_uint16(); @@ -536,8 +453,6 @@ bool HttpModule::end(const char* fqn, int, SnortConfig*) params->uri_param.iis_unicode_code_page); } - params->js_norm_param.js_norm = new HttpJsNorm(params->uri_param, params->js_norm_param); - params->script_detection_handle = script_detection_handle; prepare_http_header_list(params); @@ -561,7 +476,16 @@ HttpParaList::~HttpParaList() HttpParaList::JsNormParam::~JsNormParam() { - delete js_norm; + delete mpse_otag; + delete mpse_type; + delete mpse_attr; +} + +void HttpParaList::JsNormParam::configure() const +{ + mpse_otag = js_create_mpse_open_tag(); + mpse_type = js_create_mpse_tag_type(); + mpse_attr = js_create_mpse_tag_attr(); } // Characters that should not be percent-encoded diff --git a/src/service_inspectors/http_inspect/http_module.h b/src/service_inspectors/http_inspect/http_module.h index 46d8834d1..78c878fb5 100755 --- a/src/service_inspectors/http_inspect/http_module.h +++ b/src/service_inspectors/http_inspect/http_module.h @@ -29,6 +29,7 @@ #include "helpers/literal_search.h" #include "mime/file_mime_config.h" #include "profiler/profiler.h" +#include "search_engines/search_tool.h" #include "http_enum.h" #include "http_str_to_code.h" @@ -42,8 +43,6 @@ class Trace; struct SnortConfig; } -extern THREAD_LOCAL const snort::Trace* http_trace; - struct HttpParaList { public: @@ -70,24 +69,21 @@ public: struct JsNormParam { - public: ~JsNormParam(); + + void configure() const; + bool normalize_javascript = false; - int64_t js_norm_bytes_depth = -1; - int32_t js_identifier_depth = 0; - uint8_t max_template_nesting = 32; - uint32_t max_bracket_depth = 256; - uint32_t max_scope_depth = 256; - std::unordered_set ignored_ids; - std::unordered_set ignored_props; int max_javascript_whitespaces = 200; - class HttpJsNorm* js_norm = nullptr; + + mutable snort::SearchTool* mpse_otag = nullptr; + mutable snort::SearchTool* mpse_type = nullptr; + mutable snort::SearchTool* mpse_attr = nullptr; }; JsNormParam js_norm_param; struct UriParam { - public: UriParam(); ~UriParam() { delete[] unicode_map; } @@ -195,9 +191,6 @@ public: bool is_bindable() const override { return true; } - void set_trace(const snort::Trace*) const override; - const snort::TraceOption* get_trace_options() const override; - #ifdef REG_TEST static const PegInfo* get_peg_names() { return peg_names; } static const PegCount* get_peg_counts() { return peg_counts; } diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc index d2c46878c..6ab2b765b 100644 --- a/src/service_inspectors/http_inspect/http_msg_body.cc +++ b/src/service_inspectors/http_inspect/http_msg_body.cc @@ -27,6 +27,7 @@ #include "file_api/file_flows.h" #include "file_api/file_service.h" #include "helpers/buffer_data.h" +#include "js_norm/js_enum.h" #include "pub_sub/http_request_body_event.h" #include "http_api.h" @@ -41,6 +42,9 @@ using namespace snort; using namespace HttpCommon; using namespace HttpEnums; +using namespace jsn; + +extern THREAD_LOCAL const snort::Trace* js_trace; HttpMsgBody::HttpMsgBody(const uint8_t* buffer, const uint16_t buf_size, HttpFlowData* session_data_, SourceId source_id_, bool buf_owner, Flow* flow_, @@ -236,6 +240,9 @@ void HttpMsgBody::analyze() { do_file_decompression(decoded_body, decompressed_file_body); + if (decompressed_file_body.length() > 0 and session_data->js_ctx[source_id]) + session_data->js_ctx[source_id]->tick(); + uint32_t& partial_detect_length = session_data->partial_detect_length[source_id]; uint8_t*& partial_detect_buffer = session_data->partial_detect_buffer[source_id]; uint32_t& partial_js_detect_length = session_data->partial_js_detect_length[source_id]; @@ -268,9 +275,6 @@ void HttpMsgBody::analyze() else do_legacy_js_normalization(decompressed_file_body, js_norm_body); - if (decompressed_file_body.length() > 0) - ++session_data->js_data_idx; - const int32_t detect_length = (js_norm_body.length() <= session_data->detect_depth_remaining[source_id]) ? js_norm_body.length() : session_data->detect_depth_remaining[source_id]; @@ -441,29 +445,37 @@ void HttpMsgBody::fd_event_callback(void* context, int event) } } -void HttpMsgBody::do_enhanced_js_normalization(const Field& input, Field& output) +void HttpMsgBody::do_legacy_js_normalization(const Field& input, Field& output) { - if (session_data->js_data_lost_once) + if (!params->js_norm_param.normalize_javascript || source_id == SRC_CLIENT) + { + output.set(input); return; + } - auto infractions = transaction->get_infractions(source_id); - auto back = !session_data->partial_flush[source_id]; - auto http_header = get_header(source_id); - auto normalizer = params->js_norm_param.js_norm; + js_normalize(input, output, params, + transaction->get_infractions(source_id), session_data->events[source_id]); +} - if ((*infractions & INF_UNKNOWN_ENCODING) or (*infractions & INF_UNSUPPORTED_ENCODING)) - return; +HttpJSNorm* HttpMsgBody::acquire_js_ctx() +{ + HttpJSNorm* js_ctx = session_data->js_ctx[source_id]; - if (session_data->sync_js_data_idx()) + if (js_ctx) { - *infractions += INF_JS_DATA_LOST; - session_data->events[HttpCommon::SRC_SERVER]->create_event(EVENT_JS_DATA_LOST); - session_data->js_data_lost_once = true; - return; + if (js_ctx->get_trans_num() == trans_num) + return js_ctx; + + delete js_ctx; + js_ctx = nullptr; } + auto http_header = get_header(source_id); + if (!http_header) - return; + return nullptr; + + JSNormConfig* jsn_config = get_inspection_policy()->jsn_config; switch(http_header->get_content_type()) { @@ -483,27 +495,20 @@ void HttpMsgBody::do_enhanced_js_normalization(const Field& input, Field& output case CT_TEXT_X_ECMASCRIPT: case CT_TEXT_JSCRIPT: case CT_TEXT_LIVESCRIPT: - normalizer->do_external(input, output, infractions, session_data, back); + // an external script should be processed from the beginning + js_ctx = first_body ? new HttpExternalJSNorm(jsn_config, trans_num) : nullptr; break; case CT_APPLICATION_XHTML_XML: case CT_TEXT_HTML: - normalizer->do_inline(input, output, infractions, session_data, back); + js_ctx = new HttpInlineJSNorm(jsn_config, trans_num, params->js_norm_param.mpse_otag, + params->js_norm_param.mpse_attr); break; } -} -void HttpMsgBody::do_legacy_js_normalization(const Field& input, Field& output) -{ - if (!params->js_norm_param.normalize_javascript || source_id == SRC_CLIENT) - { - output.set(input); - return; - } + session_data->js_ctx[source_id] = js_ctx; - params->js_norm_param.js_norm->do_legacy(input, output, - transaction->get_infractions(source_id), session_data->events[source_id], - params->js_norm_param.max_javascript_whitespaces); + return js_ctx; } void HttpMsgBody::do_file_processing(const Field& file_data) @@ -715,10 +720,36 @@ const Field& HttpMsgBody::get_norm_js_data() return norm_js_data; } - do_enhanced_js_normalization(decompressed_file_body, norm_js_data); + auto jsn = acquire_js_ctx(); + + if (!jsn) + { + norm_js_data.set(STAT_NO_SOURCE); + return norm_js_data; + } + + const void* dst = nullptr; + size_t dst_len = HttpCommon::STAT_NOT_PRESENT; + auto back = !session_data->partial_flush[source_id]; + + jsn->link(decompressed_file_body.start(), session_data->events[source_id], transaction->get_infractions(source_id)); + jsn->normalize(decompressed_file_body.start(), decompressed_file_body.length(), dst, dst_len); + + debug_logf(4, js_trace, TRACE_PROC, DetectionEngine::get_current_packet(), + "input data was %s\n", back ? "last one in PDU" : "a part of PDU"); - if (norm_js_data.length() == STAT_NOT_COMPUTE) + if (!dst or !dst_len) norm_js_data.set(STAT_NOT_PRESENT); + else + { + if (back) + jsn->flush_data(dst, dst_len); + + trace_logf(1, js_trace, TRACE_DUMP, DetectionEngine::get_current_packet(), + "js_data[%u]: %.*s\n", (unsigned)dst_len, (int)dst_len, (const char*)dst); + + norm_js_data.set(dst_len, (const uint8_t*)dst, back); + } return norm_js_data; } diff --git a/src/service_inspectors/http_inspect/http_msg_body.h b/src/service_inspectors/http_inspect/http_msg_body.h index ab10bed5b..10ceb6b17 100644 --- a/src/service_inspectors/http_inspect/http_msg_body.h +++ b/src/service_inspectors/http_inspect/http_msg_body.h @@ -71,8 +71,9 @@ private: void do_file_processing(const Field& file_data); void do_utf_decoding(const Field& input, Field& output); void do_file_decompression(const Field& input, Field& output); - void do_enhanced_js_normalization(const Field& input, Field& output); void do_legacy_js_normalization(const Field& input, Field& output); + HttpJSNorm* acquire_js_ctx(); + void clean_partial(uint32_t& partial_inspected_octets, uint32_t& partial_detect_length, uint8_t*& partial_detect_buffer, uint32_t& partial_js_detect_length); void bookkeeping_regular_flush(uint32_t& partial_detect_length, diff --git a/src/service_inspectors/http_inspect/http_msg_request.cc b/src/service_inspectors/http_inspect/http_msg_request.cc index 1895673a4..aa9a80fae 100644 --- a/src/service_inspectors/http_inspect/http_msg_request.cc +++ b/src/service_inspectors/http_inspect/http_msg_request.cc @@ -40,9 +40,6 @@ HttpMsgRequest::HttpMsgRequest(const uint8_t* buffer, const uint16_t buf_size, { transaction->set_request(this); get_related_sections(); - session_data->release_js_ctx(); - session_data->reset_js_ident_ctx(); - session_data->reset_js_data_idx(); } HttpMsgRequest::~HttpMsgRequest() diff --git a/src/service_inspectors/http_inspect/http_msg_section.cc b/src/service_inspectors/http_inspect/http_msg_section.cc index 842c87eb7..6c7cb7d38 100644 --- a/src/service_inspectors/http_inspect/http_msg_section.cc +++ b/src/service_inspectors/http_inspect/http_msg_section.cc @@ -169,24 +169,24 @@ const Field& HttpMsgSection::get_classic_buffer(const HttpBufferInfo& buf) switch (buf.type) { case HTTP_BUFFER_CLIENT_BODY: - { + { if (source_id != SRC_CLIENT) return Field::FIELD_NULL; return (get_body() != nullptr) ? get_body()->get_classic_client_body() : Field::FIELD_NULL; - } + } case HTTP_BUFFER_COOKIE: case HTTP_BUFFER_RAW_COOKIE: - { + { if (header[buffer_side] == nullptr) return Field::FIELD_NULL; return (buf.type == HTTP_BUFFER_COOKIE) ? header[buffer_side]->get_classic_norm_cookie() : header[buffer_side]->get_classic_raw_cookie(); - } + } case HTTP_BUFFER_HEADER: case HTTP_BUFFER_TRAILER: case HTTP_HEADER_TEST: case HTTP_TRAILER_TEST: - { + { HttpMsgHeadShared* const head = (buf.type == HTTP_BUFFER_HEADER || buf.type == HTTP_HEADER_TEST) ? (HttpMsgHeadShared*)header[buffer_side] : (HttpMsgHeadShared*)trailer[buffer_side]; if (head == nullptr) @@ -194,18 +194,18 @@ const Field& HttpMsgSection::get_classic_buffer(const HttpBufferInfo& buf) if (buf.sub_id == 0) return head->get_classic_norm_header(); return head->get_header_value_norm((HeaderId)buf.sub_id); - } + } case HTTP_BUFFER_METHOD: - { + { return (request != nullptr) ? request->get_method() : Field::FIELD_NULL; - } + } case HTTP_BUFFER_RAW_BODY: - { + { return (get_body() != nullptr) ? get_body()->get_raw_body() : Field::FIELD_NULL; - } + } case HTTP_BUFFER_RAW_HEADER: case HTTP_BUFFER_RAW_TRAILER: - { + { HttpMsgHeadShared* const head = (buf.type == HTTP_BUFFER_RAW_HEADER) ? (HttpMsgHeadShared*)header[buffer_side] : (HttpMsgHeadShared*)trailer[buffer_side]; if (head == nullptr) @@ -213,31 +213,31 @@ const Field& HttpMsgSection::get_classic_buffer(const HttpBufferInfo& buf) if (buf.sub_id == 0) return head->msg_text; return head->get_all_header_values_raw((HeaderId)buf.sub_id); - } + } case HTTP_BUFFER_RAW_REQUEST: - { + { return (request != nullptr) ? request->msg_text : Field::FIELD_NULL; - } + } case HTTP_BUFFER_RAW_STATUS: - { + { return (status != nullptr) ? status->msg_text : Field::FIELD_NULL; - } + } case HTTP_BUFFER_STAT_CODE: - { + { return (status != nullptr) ? status->get_status_code() : Field::FIELD_NULL; - } + } case HTTP_BUFFER_STAT_MSG: - { + { return (status != nullptr) ? status->get_reason_phrase() : Field::FIELD_NULL; - } + } case HTTP_BUFFER_TRUE_IP: - { + { return (header[SRC_CLIENT] != nullptr) ? header[SRC_CLIENT]->get_true_ip() : Field::FIELD_NULL; - } + } case HTTP_BUFFER_URI: case HTTP_BUFFER_RAW_URI: - { + { const bool raw = (buf.type == HTTP_BUFFER_RAW_URI); if (request == nullptr) return Field::FIELD_NULL; @@ -263,29 +263,29 @@ const Field& HttpMsgSection::get_classic_buffer(const HttpBufferInfo& buf) } assert(false); return Field::FIELD_NULL; - } + } case HTTP_BUFFER_VERSION: - { + { HttpMsgStart* start = (buffer_side == SRC_CLIENT) ? (HttpMsgStart*)request : (HttpMsgStart*)status; return (start != nullptr) ? start->get_version() : Field::FIELD_NULL; - } + } case BUFFER_VBA_DATA: - { + { HttpMsgBody* msg_body = get_body(); if (msg_body) - return msg_body->get_decomp_vba_data(); + return msg_body->get_decomp_vba_data(); else return Field::FIELD_NULL; - } + } case BUFFER_JS_DATA: - { + { HttpMsgBody* msg_body = get_body(); if (msg_body) - return msg_body->get_norm_js_data(); + return msg_body->get_norm_js_data(); else return Field::FIELD_NULL; - } + } default: assert(false); return Field::FIELD_NULL; @@ -519,4 +519,3 @@ void HttpMsgSection::print_peg_counts(FILE* output) const } #endif - diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc index 99001b840..1db07a9e3 100755 --- a/src/service_inspectors/http_inspect/http_tables.cc +++ b/src/service_inspectors/http_inspect/http_tables.cc @@ -328,16 +328,9 @@ const RuleMap HttpModule::http_events[] = { EVENT_LONG_SCHEME, "HTTP URI scheme longer than 10 characters" }, { EVENT_HTTP2_UPGRADE_REQUEST, "HTTP/1 client requested HTTP/2 upgrade" }, { EVENT_HTTP2_UPGRADE_RESPONSE, "HTTP/1 server granted HTTP/2 upgrade" }, - { EVENT_JS_BAD_TOKEN, "bad token in JavaScript" }, - { EVENT_JS_OPENING_TAG, "unexpected script opening tag in JavaScript" }, - { EVENT_JS_CLOSING_TAG, "unexpected script closing tag in JavaScript" }, { EVENT_JS_CODE_IN_EXTERNAL, "JavaScript code under the external script tags" }, { EVENT_JS_SHORTENED_TAG, "script opening tag in a short form" }, - { EVENT_JS_IDENTIFIER_OVERFLOW, "max number of unique JavaScript identifiers reached" }, - { EVENT_JS_BRACKET_NEST_OVERFLOW, "excessive JavaScript bracket nesting" }, { EVENT_ACCEPT_ENCODING_CONSECUTIVE_COMMAS, "Consecutive commas in HTTP Accept-Encoding header" }, - { EVENT_JS_DATA_LOST, "data gaps during JavaScript normalization" }, - { EVENT_JS_SCOPE_NEST_OVERFLOW, "excessive JavaScript scope nesting" }, { EVENT_INVALID_SUBVERSION, "HTTP/1 version other than 1.0 or 1.1" }, { EVENT_VERSION_0, "HTTP version in start line is 0" }, { EVENT_VERSION_HIGHER_THAN_1, "HTTP version in start line is higher than 1" }, @@ -390,9 +383,6 @@ const PegInfo HttpModule::peg_names[PEG_COUNT_MAX+1] = { CountType::SUM, "total_bytes", "total HTTP data bytes inspected" }, { CountType::SUM, "js_inline_scripts", "total number of inline JavaScripts processed" }, { CountType::SUM, "js_external_scripts", "total number of external JavaScripts processed" }, - { CountType::SUM, "js_bytes", "total number of JavaScript bytes processed" }, - { CountType::SUM, "js_identifiers", "total number of unique JavaScript identifiers processed" }, - { CountType::SUM, "js_identifier_overflows", "total number of unique JavaScript identifier limit overflows" }, { CountType::SUM, "skip_mime_attach", "total number of HTTP requests with too many MIME attachments to inspect" }, { CountType::END, nullptr, nullptr } }; diff --git a/src/service_inspectors/http_inspect/ips_http_buffer.cc b/src/service_inspectors/http_inspect/ips_http_buffer.cc index 989f4ee34..a3a9fa219 100644 --- a/src/service_inspectors/http_inspect/ips_http_buffer.cc +++ b/src/service_inspectors/http_inspect/ips_http_buffer.cc @@ -1069,46 +1069,6 @@ static const IpsApi version_api = nullptr }; -//------------------------------------------------------------------------- -// js_data -//------------------------------------------------------------------------- -// - -#undef IPS_OPT -#define IPS_OPT "js_data" -#undef IPS_HELP -#define IPS_HELP "rule option to set detection cursor to normalized JavaScript data" -static Module* js_data_mod_ctor() -{ - return new HttpBufferRuleOptModule(IPS_OPT, IPS_HELP, BUFFER_JS_DATA, CAT_SET_FAST_PATTERN, - BUFFER_PSI_JS_DATA); -} - -static const IpsApi js_data_api = -{ - { - PT_IPS_OPTION, - sizeof(IpsApi), - IPSAPI_VERSION, - 1, - API_RESERVED, - API_OPTIONS, - IPS_OPT, - IPS_HELP, - js_data_mod_ctor, - HttpBufferRuleOptModule::mod_dtor - }, - OPT_TYPE_DETECTION, - 0, PROTO_BIT__TCP, - nullptr, - nullptr, - nullptr, - nullptr, - HttpBufferIpsOption::opt_ctor, - HttpBufferIpsOption::opt_dtor, - nullptr -}; - //------------------------------------------------------------------------- // plugins //------------------------------------------------------------------------- @@ -1130,4 +1090,4 @@ const BaseApi* ips_http_trailer = &trailer_api.base; const BaseApi* ips_http_true_ip = &true_ip_api.base; const BaseApi* ips_http_uri = &uri_api.base; const BaseApi* ips_http_version = &version_api.base; -const BaseApi* ips_js_data = &js_data_api.base; + diff --git a/src/service_inspectors/http_inspect/test/http_module_test.cc b/src/service_inspectors/http_inspect/test/http_module_test.cc index de5324408..7a7cdb990 100755 --- a/src/service_inspectors/http_inspect/test/http_module_test.cc +++ b/src/service_inspectors/http_inspect/test/http_module_test.cc @@ -58,6 +58,8 @@ void DecodeConfig::set_decompress_pdf(bool) {} void DecodeConfig::set_decompress_swf(bool) {} void DecodeConfig::set_decompress_zip(bool) {} void DecodeConfig::set_decompress_vba(bool) {} + +SearchTool::~SearchTool() {} } void show_stats(PegCount*, const PegInfo*, unsigned, const char*) { } @@ -69,12 +71,10 @@ int32_t substr_to_code(const uint8_t*, const int32_t, const StrCode []) { return long HttpTestManager::print_amount {}; bool HttpTestManager::print_hex {}; -HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, - const HttpParaList::JsNormParam& js_norm_param_) : - uri_param(uri_param_), js_norm_param(js_norm_param_), mpse_otag(nullptr), mpse_attr(nullptr), - mpse_type(nullptr) {} -HttpJsNorm::~HttpJsNorm() = default; -void HttpJsNorm::configure(){} +snort::SearchTool* js_create_mpse_open_tag() { return nullptr; } +snort::SearchTool* js_create_mpse_tag_type() { return nullptr; } +snort::SearchTool* js_create_mpse_tag_attr() { return nullptr; } + int64_t Parameter::get_int(char const*) { return 0; } TEST_GROUP(http_peg_count_test) diff --git a/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc b/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc index f6f6e17e5..da3bc6b1c 100755 --- a/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc +++ b/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc @@ -53,17 +53,16 @@ void DecodeConfig::set_decompress_pdf(bool) {} void DecodeConfig::set_decompress_swf(bool) {} void DecodeConfig::set_decompress_zip(bool) {} void DecodeConfig::set_decompress_vba(bool) {} +SearchTool::~SearchTool() {} } +snort::SearchTool* js_create_mpse_open_tag() { return nullptr; } +snort::SearchTool* js_create_mpse_tag_type() { return nullptr; } +snort::SearchTool* js_create_mpse_tag_attr() { return nullptr; } + void show_stats(PegCount*, const PegInfo*, unsigned, const char*) { } void show_stats(PegCount*, const PegInfo*, const IndexVec&, const char*, FILE*) { } -HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, - const HttpParaList::JsNormParam& js_norm_param_) : - uri_param(uri_param_), js_norm_param(js_norm_param_), mpse_otag(nullptr), mpse_attr(nullptr), - mpse_type(nullptr) {} -HttpJsNorm::~HttpJsNorm() = default; -void HttpJsNorm::configure() {} int64_t Parameter::get_int(char const*) { return 0; } TEST_GROUP(http_inspect_uri_norm) diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index f69c3f00c..19372c3af 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -18,24 +18,13 @@ set( UTIL_INCLUDES util_utf.h ) -FLEX_TARGET ( js_tokenizer ${CMAKE_CURRENT_SOURCE_DIR}/js_tokenizer.l - ${CMAKE_CURRENT_BINARY_DIR}/js_tokenizer.cc - COMPILE_FLAGS ${FLEX_FLAGS} -) - add_library ( utils OBJECT ${UTIL_INCLUDES} ${SNPRINTF_SOURCES} - ${FLEX_js_tokenizer_OUTPUTS} boyer_moore.cc dnet_header.h dyn_array.cc dyn_array.h - js_identifier_ctx.cc - js_identifier_ctx.h - js_normalizer.cc - js_normalizer.h - js_tokenizer.h kmap.cc sflsq.cc snort_bounds.h diff --git a/src/utils/test/CMakeLists.txt b/src/utils/test/CMakeLists.txt index 12a694b3b..1c6a91c68 100644 --- a/src/utils/test/CMakeLists.txt +++ b/src/utils/test/CMakeLists.txt @@ -5,62 +5,6 @@ add_cpputest( boyer_moore_test add_cpputest( memcap_allocator_test ) -FLEX_TARGET ( js_tokenizer ${CMAKE_CURRENT_SOURCE_DIR}/../js_tokenizer.l - ${CMAKE_CURRENT_BINARY_DIR}/../js_tokenizer.cc - COMPILE_FLAGS ${FLEX_FLAGS} -) - -add_catch_test( js_normalizer_test - SOURCES - ${FLEX_js_tokenizer_OUTPUTS} - ../js_identifier_ctx.cc - ../js_normalizer.cc - ../streambuf.cc - ../util_cstring.cc - js_test_options.cc - js_test_utils.cc -) - -if (ENABLE_BENCHMARK_TESTS) - add_catch_test( js_norm_benchmark - SOURCES - ${FLEX_js_tokenizer_OUTPUTS} - ../js_identifier_ctx.cc - ../js_normalizer.cc - ../streambuf.cc - ../util_cstring.cc - js_test_options.cc - js_test_utils.cc - ) -endif(ENABLE_BENCHMARK_TESTS) - -add_catch_test( js_dealias_test - SOURCES - ${FLEX_js_tokenizer_OUTPUTS} - ../js_identifier_ctx.cc - ../js_normalizer.cc - ../streambuf.cc - ../util_cstring.cc - js_test_options.cc - js_test_utils.cc -) - -add_catch_test( js_unescape_test - SOURCES - ${FLEX_js_tokenizer_OUTPUTS} - ../js_identifier_ctx.cc - ../js_normalizer.cc - ../streambuf.cc - ../util_cstring.cc - js_test_options.cc - js_test_utils.cc -) - -add_catch_test( js_identifier_ctx_test - SOURCES - ../js_identifier_ctx.cc -) - add_catch_test( streambuf_test SOURCES ../streambuf.cc