From: Mike Stepanek (mstepane) Date: Wed, 13 Oct 2021 17:59:05 +0000 (+0000) Subject: Merge pull request #3095 in SNORT/snort3 from ~OSERHIIE/snort3:js_built_ins to master X-Git-Tag: 3.1.15.0~8 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=18d33891b1f964dc86f90fbf9ac7b91b7a3729fc;p=thirdparty%2Fsnort3.git Merge pull request #3095 in SNORT/snort3 from ~OSERHIIE/snort3:js_built_ins to master Squashed commit of the following: commit d253c19d845340b83e7abac8085d07b38b5ebca4 Author: Oleksandr Serhiienko Date: Wed Sep 29 17:00:12 2021 +0300 http_inspect: do not normalize JavaScript built-in identifiers * utils: update JSTokenizer to track the scope * utils: update JSTokenizer to track JavaScript built-in identifiers * utils: update JSIdentifierCtx to check for JavaScript built-in identifiers * utils: add unit tests for scope and identifiers tracking * utils: add benchmarks for scope and identifiers tracking * http_inspect: add js_norm_max_scope_depth config option to limit maximum depth of scope nesting * http_inspect: add js_norm_built_in_ident config option as a list of JavaScript built-in identifiers * http_inspect: update 119:271 rule to alert on both template and scope depth limit reached * http_inspect: update 119:265 rule to alert on the scope mismatch * http_inspect: update dev_notes.txt with info about JavaScript built-in identifiers and scope tracking * lua: update snort_defaults.lua with a default list of JavaScript built-in identifiers * doc: update user/http_inspect.txt with info about JavaScript built-in identifiers and scope tracking * doc: update reference/builtin_stubs.txt with updates in 119:271 rule description --- diff --git a/doc/reference/builtin_stubs.txt b/doc/reference/builtin_stubs.txt index ba29eaf3c..ddf0e8448 100644 --- a/doc/reference/builtin_stubs.txt +++ b/doc/reference/builtin_stubs.txt @@ -1198,9 +1198,11 @@ network traffic and may be an indication that an attacker is trying to exhaust r In JavaScript, template literals can have substitutions, that in turn can have nested template literals, which requires a stack to track for proper whitespace normalization. -When the depth of nesting exceeds limit set in http_inspect.js_norm_max_tmpl_nest, -this alert is raised. This alert is not expected for typical network traffic and may be -an indication that an attacker is trying to exhaust resources. +Also, the normalization tracks the current scope, which requires a stack as well. +When the depth of nesting exceeds limit set in http_inspect.js_norm_max_tmpl_nest or in +http_inspect.js_norm_max_scope_depth, this alert is raised. This alert is not expected +for typical network traffic and may be an indication that an attacker is trying to exhaust +resources. 119:272 diff --git a/doc/user/http_inspect.txt b/doc/user/http_inspect.txt index c03215bb8..6f9fd5f5e 100755 --- a/doc/user/http_inspect.txt +++ b/doc/user/http_inspect.txt @@ -202,6 +202,36 @@ is present to limit the amount of memory dedicated to this tracking. This option is used only when js_normalization_depth is not 0. This feature is currently experimental and still under development. +===== js_norm_max_scope_depth + +js_norm_max_scope_depth = N {0 : 65535} (default 256) is an option of the enhanced +JavaScript normalizer that determines the deepest level of nested scope. The scope +term includes code sections("{}"), parentheses("()") and brackets("[]"). This option +is present to limit the amount of memory dedicated to this tracking. This option is used +only when js_normalization_depth is not 0. This feature is currently experimental and +still under development. + +===== js_norm_built_in_ident + +js_norm_built_in_ident = {}. +The default list is present in "snort_defaults.lua". + +The built-in JavaScript identifiers will be placed as is, without substitution. Normalizer +tracks built-in identifier expressions based on the configured list of built-in names. +The built-in identifier expression is the built-in name (function or object) and the chain +of dot and bracket accessors after it, including the function calls. +For example: + + console.log("bar") + document.getElementById("id").text + eval("script") + foo["bar"] + +The list must contain object and function names only. +For example: + + http_inspect.js_norm_built_in_ident = { 'console', 'document', 'eval', 'foo' } + ===== xff_headers This configuration supports defining custom x-forwarded-for type headers. In a diff --git a/lua/snort_defaults.lua b/lua/snort_defaults.lua index 36f42256f..547cd3781 100644 --- a/lua/snort_defaults.lua +++ b/lua/snort_defaults.lua @@ -1169,6 +1169,106 @@ default_low_port_scan = icmp_sweep = icmp_low_sweep, } +--------------------------------------------------------------------------- +-- ECMAScript Standard Built-in Objects and Functions Names (Identifiers) +-- Also, might include other non-specification identifiers like those +-- are part of WebAPI or frameworks +--------------------------------------------------------------------------- +default_js_norm_built_in_ident = +{ + -- GlobalObject.Functions + 'eval', 'PerformEval', 'HostEnsureCanCompileStrings', 'EvalDeclarationInstantiation', + 'isFinite', 'isNaN', 'parseFloat', 'parseInt', 'Encode', 'Decode', 'decodeURI', + 'decodeURIComponent', 'encodeURI', 'encodeURIComponent', + + -- GlobalObject.Constructors + 'AggregateError', 'Array', 'ArrayBuffer', 'BigInt', 'BitInt64Array', 'BigUint64Array', + 'Boolean', 'DataView', 'Date', 'Error', 'EvalError', 'FinalizationRegistry', + 'Float32Array', 'Float64Array', 'Function', 'Int8Array', 'Int16Array', 'Int32Array', + 'Map', 'NativeError', 'Number', 'Object', 'Promise', 'Proxy', + 'RangeError', 'ReferenceError', 'RegExp', 'Set', 'SharedArrayBuffer', 'String', + 'Symbol', 'SyntaxError', 'TypeError', 'Uint8Array', 'Uint8ClampedArray', 'Uint16Array', + 'Uint32Array', 'URIError', 'WeakMap', 'WeakRef', 'WeakSet', + + -- Atomics + 'Atomics', 'WaiterList', 'ValidateIntegerTypedArray', 'ValidateAtomicAccess', 'GetWaiterList', + 'EnterCriticalSection', 'LeaveCriticalSection', 'AddWaiter', 'RemoveWaiter', 'RemoveWaiters', + 'SuspendAgent', 'NotifyWaiter', 'AtomicReadModifyWrite', 'ByteListBitwiseOp', 'ByteListEqual', + + -- JSON + 'JSON', 'InternalizeJSONProperty', 'SerializeJSONProperty', 'QuoteJSONString', 'UnicodeEscape', + 'SerializeJSONObject','SerializeJSONArray', + + -- Math + 'Math', + + -- Reflect + 'Reflect', + + -- Date and Time + 'LocalTZA', 'LocalTime', 'UTC', 'MakeTime', 'MakeDay', 'MakeDate', 'TimeClip', 'TimeString', + 'DateString', 'TimeZoneString', 'ToDateString', + + -- String + 'StringPad', 'GetSubstitution', 'SplitMatch', 'TrimString', + + -- RegExp + 'RegExpExec', 'RegExpBuiltinExec', 'AdvanceStringIndex', 'RegExpHasFlag', + + -- TypedArray + 'TypedArray', 'TypedArraySpeciesCreate', 'TypedArrayCreate', 'ValidateTypedArray', + 'AllocateTypedArray', 'InitializeTypedArrayFromTypedArray', + 'InitializeTypedArrayFromArrayBuffer', 'InitializeTypedArrayFromList', + 'InitializeTypedArrayFromArrayLike', 'AllocateTypedArrayBuffer', + + -- ArrayBuffer + 'AllocateArrayBuffer', 'IsDetachedBuffer', 'DetachArrayBuffer', 'CloneArrayBuffer', + 'IsUnsignedElementType', 'IsUnclampedIntegerElementType', 'IsBigIntElementType', + 'IsNoTearConfiguration', 'RawBytesToNumeric', 'GetValueFromBuffer', 'NumericToRawBytes', + 'SetValueInBuffer', 'GetModifySetValueInBuffer', + + -- SharedArrayBuffer + 'AllocateSharedArrayBuffer', 'IsSharedArrayBuffer', + + -- DataView + 'GetViewValue', 'SetViewValue', 'getDataView', + + -- WeakRef + 'WeakRefDeref', + + -- Promise + 'IfAbruptRejectPromise', 'CreateResolvingFunctions', 'FulfillPromise', 'NewPromiseCapability', + 'IsPromise', 'RejectPromise', 'TriggerPromiseReactions', 'HostPromiseRejectionTracker', + 'NewPromiseReactionJob', 'NewPromiseResolveThenableJob', 'GetPromiseResolve', + 'PerformPromiseAll', 'PerformPromiseAllSettled', 'PerformPromiseAny', 'PerformPromiseRace', + 'PromiseResolve', 'PerformPromiseThen', + + -- GeneratorFunction + 'GeneratorFunction', 'AsyncGeneratorFunction', + + -- Generator + 'Generator', 'GeneratorStart', 'GeneratorValidate', 'GeneratorResume', 'GeneratorResumeAbrupt', + 'GetGeneratorKind', 'GeneratorYield', 'Yield', 'CreateIteratorFromClosure', + + -- AsyncGenerator + 'AsyncGenerator', 'AsyncGeneratorStart', 'AsyncGeneratorValidate', 'AsyncGeneratorResolve', + 'AsyncGeneratorReject', 'AsyncGeneratorResumeNext', 'AsyncGeneratorEnqueue', + 'AsyncGeneratorYield', 'CreateAsyncIteratorFromClosure', + + -- AsyncFunction + 'AsyncFunction', 'AsyncFunctionStart', + + -- WebAPI + 'console', 'document', + + -- Misc + 'CreateDynamicFunction', 'HostHasSourceTextAvailable', 'SymbolDescriptiveString', + 'IsConcatSpreadable', 'FlattenIntoArray', 'SortCompare', 'AddEntriesFromIterable', + 'CreateMapIterator', 'CreateSetIterator', 'EventSet', 'SharedDataBlockEventSet', + 'HostEventSet', 'ComposeWriteEventBytes', 'ValueOfReadEvent', 'escape', 'unescape', + 'CreateHTML' +} + --------------------------------------------------------------------------- -- default whitelist --------------------------------------------------------------------------- @@ -1187,7 +1287,7 @@ default_whitelist = ip_med_sweep ip_med_dist ip_hi_proto ip_hi_decoy ip_hi_sweep ip_hi_dist icmp_low_sweep icmp_med_sweep icmp_hi_sweep default_hi_port_scan default_med_port_scan default_low_port_scan - default_variables netflow_versions + default_variables netflow_versions default_js_norm_built_in_ident ]] snort_whitelist_append(default_whitelist) diff --git a/src/service_inspectors/http_inspect/dev_notes.txt b/src/service_inspectors/http_inspect/dev_notes.txt index c33a6e024..2b95547a8 100755 --- a/src/service_inspectors/http_inspect/dev_notes.txt +++ b/src/service_inspectors/http_inspect/dev_notes.txt @@ -231,15 +231,33 @@ So, the following whitespace codes will be normalized: * Any other Unicode “space separator” * Also including new-line and carriage-return line-break characters -All JavaScript identifier names will be substituted to unified names with the -following format: var_0000 -> var_ffff. So, the number of unique identifiers available -is 65536 names per HTTP transaction. If Normalizer overruns the configured -limit, built-in alert is generated. Additionally, there is a config option to -specify the limit manually: +All JavaScript identifier names, except those, are from the list of built-in identifiers, +will be substituted to unified names with the following format: var_0000 -> var_ffff. +So, the number of unique identifiers available is 65536 names per HTTP transaction. +If Normalizer overruns the configured limit, built-in alert is generated. Additionally, +there is a config option to specify the limit manually: * http_inspect.js_norm_identifier_depth. -Additionally, Normalizer validates the syntax with respect to ECMA-262 Standard, -and checks for restrictions for contents of script elements (since, it is HTML-embedded JavaScript). +The built-in JavaScript identifiers will be placed as is, without substitution. Normalizer +tracks built-in identifier expressions based on the configured list of built-in names. +The built-in identifier expression is the built-in name (function or object) and the chain +of dot and bracket accessors after it, including the function calls. +For example: + * console.log("bar") + * document.getElementById("id").text + * eval("script") + * foo["bar"] + +The list of built-in identifiers should be configured with the following config option: + * http_inspect.js_norm_built_in_ident + +This list must contain object and function names only. +For example: + * http_inspect.js_norm_built_in_ident = { 'console', 'document', 'eval', 'foo' } + +Additionally, Normalizer validates the syntax with respect to ECMA-262 Standard, including +scope tracking, and checks for restrictions for contents of script elements (since, it +is HTML-embedded JavaScript). The following rules applied: * no nesting tags allowed, i.e. two opening tags in a row diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h index 811c91403..7ccbe6c5f 100755 --- a/src/service_inspectors/http_inspect/http_enum.h +++ b/src/service_inspectors/http_inspect/http_enum.h @@ -278,7 +278,7 @@ enum Infraction INF_JS_CODE_IN_EXTERNAL = 124, INF_JS_SHORTENED_TAG = 125, INF_JS_IDENTIFIER_OVERFLOW = 126, - INF_JS_TMPL_NEST_OVFLOW = 127, + INF_JS_SCOPE_NEST_OVFLOW = 127, INF_CHUNK_OVER_MAXIMUM = 128, INF_LONG_HOST_VALUE = 129, INF_ACCEPT_ENCODING_CONSECUTIVE_COMMAS = 130, @@ -411,7 +411,7 @@ enum EventSid EVENT_JS_CODE_IN_EXTERNAL = 268, EVENT_JS_SHORTENED_TAG = 269, EVENT_JS_IDENTIFIER_OVERFLOW = 270, - EVENT_JS_TMPL_NEST_OVFLOW = 271, + EVENT_JS_SCOPE_NEST_OVFLOW = 271, EVENT_ACCEPT_ENCODING_CONSECUTIVE_COMMAS = 272, EVENT__MAX_VALUE }; diff --git a/src/service_inspectors/http_inspect/http_flow_data.cc b/src/service_inspectors/http_inspect/http_flow_data.cc index 8cd3ea751..6dbd3fd55 100644 --- a/src/service_inspectors/http_inspect/http_flow_data.cc +++ b/src/service_inspectors/http_inspect/http_flow_data.cc @@ -255,21 +255,23 @@ void HttpFlowData::reset_js_ident_ctx() } snort::JSNormalizer& HttpFlowData::acquire_js_ctx(int32_t ident_depth, size_t norm_depth, - uint8_t max_template_nesting) + uint8_t max_template_nesting, uint32_t max_scope_depth, + const std::unordered_set& built_in_ident) { if (js_normalizer) return *js_normalizer; if (!js_ident_ctx) { - js_ident_ctx = new JSIdentifierCtx(ident_depth); + js_ident_ctx = new JSIdentifierCtx(ident_depth, built_in_ident); update_allocations(js_ident_ctx->size()); debug_logf(4, http_trace, TRACE_JS_PROC, nullptr, "js_ident_ctx created (ident_depth %d)\n", ident_depth); } - js_normalizer = new JSNormalizer(*js_ident_ctx, norm_depth, max_template_nesting); + js_normalizer = new JSNormalizer(*js_ident_ctx, norm_depth, + max_template_nesting, max_scope_depth); update_allocations(JSNormalizer::size()); auto ptr = js_detect_buffer[HttpCommon::SRC_SERVER]; @@ -297,7 +299,8 @@ void HttpFlowData::release_js_ctx() } #else void HttpFlowData::reset_js_ident_ctx() {} -snort::JSNormalizer& HttpFlowData::acquire_js_ctx(int32_t, size_t, uint8_t) +snort::JSNormalizer& HttpFlowData::acquire_js_ctx(int32_t, size_t, uint8_t, uint32_t, + const std::unordered_set&) { return *js_normalizer; } void HttpFlowData::release_js_ctx() {} #endif diff --git a/src/service_inspectors/http_inspect/http_flow_data.h b/src/service_inspectors/http_inspect/http_flow_data.h index 02925e799..415fd4c00 100644 --- a/src/service_inspectors/http_inspect/http_flow_data.h +++ b/src/service_inspectors/http_inspect/http_flow_data.h @@ -218,7 +218,8 @@ private: void reset_js_ident_ctx(); snort::JSNormalizer& acquire_js_ctx(int32_t ident_depth, size_t norm_depth, - uint8_t max_template_nesting); + uint8_t max_template_nesting, uint32_t max_scope_depth, + const std::unordered_set& built_in_ident); void release_js_ctx(); bool cutover_on_clear = false; diff --git a/src/service_inspectors/http_inspect/http_inspect.cc b/src/service_inspectors/http_inspect/http_inspect.cc index ddb9098fc..25249d555 100755 --- a/src/service_inspectors/http_inspect/http_inspect.cc +++ b/src/service_inspectors/http_inspect/http_inspect.cc @@ -147,6 +147,10 @@ void HttpInspect::show(const SnortConfig*) const auto bad_chars = GetBadChars(params->uri_param.bad_characters); auto xff_headers = GetXFFHeaders(params->xff_headers); + std::string js_built_in_ident; + for (auto s : params->js_norm_param.built_in_ident) + js_built_in_ident += s + " "; + ConfigLogger::log_limit("request_depth", params->request_depth, -1LL); ConfigLogger::log_limit("response_depth", params->response_depth, -1LL); ConfigLogger::log_flag("unzip", params->unzip); @@ -159,11 +163,12 @@ void HttpInspect::show(const SnortConfig*) const ConfigLogger::log_flag("normalize_javascript", params->js_norm_param.normalize_javascript); ConfigLogger::log_value("max_javascript_whitespaces", params->js_norm_param.max_javascript_whitespaces); - ConfigLogger::log_value("js_normalization_depth", - params->js_norm_param.js_normalization_depth); + ConfigLogger::log_value("js_normalization_depth", params->js_norm_param.js_normalization_depth); ConfigLogger::log_value("js_norm_identifier_depth", params->js_norm_param.js_identifier_depth); - ConfigLogger::log_value("js_norm_max_tmpl_nest", - params->js_norm_param.max_template_nesting); + ConfigLogger::log_value("js_norm_max_tmpl_nest", params->js_norm_param.max_template_nesting); + ConfigLogger::log_value("js_norm_max_scope_depth", params->js_norm_param.max_scope_depth); + if (!js_built_in_ident.empty()) + ConfigLogger::log_list("js_norm_built_in_ident", js_built_in_ident.c_str()); ConfigLogger::log_value("bad_characters", bad_chars.c_str()); ConfigLogger::log_value("ignore_unreserved", unreserved_chars.c_str()); ConfigLogger::log_flag("percent_u", params->uri_param.percent_u); diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc index 453fe7203..036dc7530 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.cc +++ b/src/service_inspectors/http_inspect/http_js_norm.cc @@ -44,11 +44,15 @@ static const char* jsret_codes[] = "bad token", "identifier overflow", "template nesting overflow", + "scope nesting overflow", + "wrong closing symbol", + "ended in inner scope", "unknown" }; static const char* ret2str(JSTokenizer::JSRet ret) { + assert(ret < JSTokenizer::JSRet::MAX); ret = ret < JSTokenizer::JSRet::MAX ? ret : JSTokenizer::JSRet::MAX; return jsret_codes[ret]; } @@ -76,11 +80,14 @@ static inline JSTokenizer::JSRet js_normalize(JSNormalizer& ctx, const char* con } HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_, - int32_t identifier_depth_, uint8_t max_template_nesting_) : + int32_t identifier_depth_, uint8_t max_template_nesting_, uint32_t max_scope_depth_, + const std::unordered_set& built_in_ident_) : uri_param(uri_param_), normalization_depth(normalization_depth_), identifier_depth(identifier_depth_), max_template_nesting(max_template_nesting_), + max_scope_depth(max_scope_depth_), + built_in_ident(built_in_ident_), mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) @@ -150,7 +157,8 @@ void HttpJsNorm::enhanced_external_normalize(const Field& input, Field& output, "script continues\n"); - auto& js_ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth, max_template_nesting); + auto& js_ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth, max_template_nesting, + max_scope_depth, built_in_ident); while (ptr < end) { @@ -176,6 +184,8 @@ void HttpJsNorm::enhanced_external_normalize(const Field& input, Field& output, ssn->js_built_in_event = true; break; case JSTokenizer::BAD_TOKEN: + case JSTokenizer::WRONG_CLOSING_SYMBOL: + case JSTokenizer::ENDED_IN_INNER_SCOPE: *infractions += INF_JS_BAD_TOKEN; events->create_event(EVENT_JS_BAD_TOKEN); ssn->js_built_in_event = true; @@ -187,8 +197,9 @@ void HttpJsNorm::enhanced_external_normalize(const Field& input, Field& output, ssn->js_built_in_event = true; break; case JSTokenizer::TEMPLATE_NESTING_OVERFLOW: - *infractions += INF_JS_TMPL_NEST_OVFLOW; - events->create_event(EVENT_JS_TMPL_NEST_OVFLOW); + case JSTokenizer::SCOPE_NESTING_OVERFLOW: + *infractions += INF_JS_SCOPE_NEST_OVFLOW; + events->create_event(EVENT_JS_SCOPE_NEST_OVFLOW); ssn->js_built_in_event = true; break; default: @@ -270,7 +281,8 @@ void HttpJsNorm::enhanced_inline_normalize(const Field& input, Field& output, HttpModule::increment_peg_counts(PEG_JS_INLINE); } - auto& js_ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth, max_template_nesting); + auto& js_ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth, + max_template_nesting, max_scope_depth, built_in_ident); auto output_size_before = js_ctx.peek_script_size(); auto ret = js_normalize(js_ctx, end, ptr); @@ -293,6 +305,8 @@ void HttpJsNorm::enhanced_inline_normalize(const Field& input, Field& output, events->create_event(EVENT_JS_CLOSING_TAG); break; case JSTokenizer::BAD_TOKEN: + case JSTokenizer::WRONG_CLOSING_SYMBOL: + case JSTokenizer::ENDED_IN_INNER_SCOPE: *infractions += INF_JS_BAD_TOKEN; events->create_event(EVENT_JS_BAD_TOKEN); break; @@ -302,8 +316,9 @@ void HttpJsNorm::enhanced_inline_normalize(const Field& input, Field& output, events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW); break; case JSTokenizer::TEMPLATE_NESTING_OVERFLOW: - *infractions += INF_JS_TMPL_NEST_OVFLOW; - events->create_event(EVENT_JS_TMPL_NEST_OVFLOW); + case JSTokenizer::SCOPE_NESTING_OVERFLOW: + *infractions += INF_JS_SCOPE_NEST_OVFLOW; + events->create_event(EVENT_JS_SCOPE_NEST_OVFLOW); break; default: assert(false); diff --git a/src/service_inspectors/http_inspect/http_js_norm.h b/src/service_inspectors/http_inspect/http_js_norm.h index 64b27c4e7..851ddcb65 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.h +++ b/src/service_inspectors/http_inspect/http_js_norm.h @@ -37,7 +37,8 @@ class HttpJsNorm { public: HttpJsNorm(const HttpParaList::UriParam&, int64_t normalization_depth, - int32_t identifier_depth, uint8_t max_template_nesting); + int32_t identifier_depth, uint8_t max_template_nesting, uint32_t max_scope_depth, + const std::unordered_set& built_in_ident); ~HttpJsNorm(); void legacy_normalize(const Field& input, Field& output, HttpInfractions*, HttpEventGen*, @@ -62,6 +63,8 @@ private: int64_t normalization_depth; int32_t identifier_depth; uint8_t max_template_nesting; + uint32_t max_scope_depth; + const std::unordered_set& built_in_ident; bool configure_once = false; snort::SearchTool* mpse_otag; diff --git a/src/service_inspectors/http_inspect/http_module.cc b/src/service_inspectors/http_inspect/http_module.cc index af0729769..eb75456bd 100755 --- a/src/service_inspectors/http_inspect/http_module.cc +++ b/src/service_inspectors/http_inspect/http_module.cc @@ -45,6 +45,12 @@ HttpModule::~HttpModule() LiteralSearch::cleanup(script_detection_handle); } +static const Parameter js_built_in_ident_param[] = +{ + { "ident_name", Parameter::PT_STRING, nullptr, nullptr, "name of built-in identifier" }, + { nullptr, Parameter::PT_MAX, nullptr, nullptr, nullptr } +}; + const Parameter HttpModule::http_params[] = { { "request_depth", Parameter::PT_INT, "-1:max53", "-1", @@ -96,6 +102,13 @@ const Parameter HttpModule::http_params[] = "maximum depth of template literal nesting that enhanced javascript normalizer " "will process (experimental)" }, + { "js_norm_max_scope_depth", Parameter::PT_INT, "0:65535", "256", + "maximum depth of scope nesting that enhanced JavaScript normalizer will process " + "(experimental)" }, + + { "js_norm_built_in_ident", Parameter::PT_LIST, js_built_in_ident_param, nullptr, + "list of JavaScript built-in identifiers which will not be normalized (experimental)" }, + { "max_javascript_whitespaces", Parameter::PT_INT, "1:65535", "200", "maximum consecutive whitespaces allowed within the JavaScript obfuscated data" }, @@ -196,8 +209,11 @@ const TraceOption* HttpModule::get_trace_options() const return http_trace_options; } -bool HttpModule::begin(const char*, int, SnortConfig*) +bool HttpModule::begin(const char* fqn, int, SnortConfig*) { + if (strcmp(fqn, "http_inspect")) + return true; + delete params; params = new HttpParaList; return true; @@ -271,6 +287,14 @@ bool HttpModule::set(const char*, Value& val, SnortConfig*) { params->js_norm_param.max_template_nesting = val.get_uint8(); } + else if (val.is("js_norm_max_scope_depth")) + { + params->js_norm_param.max_scope_depth = val.get_int32(); + } + else if (val.is("ident_name")) + { + params->js_norm_param.built_in_ident.insert(val.get_string()); + } else if (val.is("max_javascript_whitespaces")) { params->js_norm_param.max_javascript_whitespaces = val.get_uint16(); @@ -434,8 +458,11 @@ static void prepare_http_header_list(HttpParaList* params) params->header_list[hdr_idx] = end_header; } -bool HttpModule::end(const char*, int, SnortConfig*) +bool HttpModule::end(const char* fqn, int, SnortConfig*) { + if (strcmp(fqn, "http_inspect")) + return true; + if (!params->uri_param.utf8 && params->uri_param.utf8_bare_byte) { ParseWarning(WARN_CONF, "Meaningless to do bare byte when not doing UTF-8"); @@ -460,7 +487,8 @@ bool HttpModule::end(const char*, int, SnortConfig*) if ( params->js_norm_param.is_javascript_normalization ) params->js_norm_param.js_norm = new HttpJsNorm(params->uri_param, params->js_norm_param.js_normalization_depth, params->js_norm_param.js_identifier_depth, - params->js_norm_param.max_template_nesting); + params->js_norm_param.max_template_nesting, params->js_norm_param.max_scope_depth, + params->js_norm_param.built_in_ident); params->script_detection_handle = script_detection_handle; diff --git a/src/service_inspectors/http_inspect/http_module.h b/src/service_inspectors/http_inspect/http_module.h index 01e10c1f2..58569955c 100755 --- a/src/service_inspectors/http_inspect/http_module.h +++ b/src/service_inspectors/http_inspect/http_module.h @@ -20,8 +20,9 @@ #ifndef HTTP_MODULE_H #define HTTP_MODULE_H -#include #include +#include +#include #include "framework/module.h" #include "helpers/literal_search.h" @@ -69,6 +70,8 @@ public: int64_t js_normalization_depth = 0; int32_t js_identifier_depth = 0; uint8_t max_template_nesting = 32; + uint32_t max_scope_depth = 256; + std::unordered_set built_in_ident; int max_javascript_whitespaces = 200; class HttpJsNorm* js_norm = nullptr; }; diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc index 22800c5fb..689ab31da 100755 --- a/src/service_inspectors/http_inspect/http_tables.cc +++ b/src/service_inspectors/http_inspect/http_tables.cc @@ -330,7 +330,7 @@ const RuleMap HttpModule::http_events[] = { EVENT_JS_CODE_IN_EXTERNAL, "JavaScript code under the external script tags" }, { EVENT_JS_SHORTENED_TAG, "script opening tag in a short form" }, { EVENT_JS_IDENTIFIER_OVERFLOW, "max number of unique JavaScript identifiers reached" }, - { EVENT_JS_TMPL_NEST_OVFLOW, "JavaScript template literal nesting is over capacity" }, + { EVENT_JS_SCOPE_NEST_OVFLOW, "JavaScript scope nesting is over capacity" }, { EVENT_ACCEPT_ENCODING_CONSECUTIVE_COMMAS, "Consecutive commas in HTTP Accept-Encoding " "header" }, { 0, nullptr } diff --git a/src/service_inspectors/http_inspect/test/http_module_test.cc b/src/service_inspectors/http_inspect/test/http_module_test.cc index 584f7d8f9..72a54d655 100755 --- a/src/service_inspectors/http_inspect/test/http_module_test.cc +++ b/src/service_inspectors/http_inspect/test/http_module_test.cc @@ -65,9 +65,11 @@ long HttpTestManager::print_amount {}; bool HttpTestManager::print_hex {}; HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_, - int32_t identifier_depth_, uint8_t max_template_nesting_) : + int32_t identifier_depth_, uint8_t max_template_nesting_, uint32_t max_scope_depth_, + const std::unordered_set& built_in_ident_) : uri_param(uri_param_), normalization_depth(normalization_depth_), identifier_depth(identifier_depth_), max_template_nesting(max_template_nesting_), + max_scope_depth(max_scope_depth_), built_in_ident(built_in_ident_), mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) {} HttpJsNorm::~HttpJsNorm() = default; void HttpJsNorm::configure(){} diff --git a/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc b/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc index f285760b4..1c95abcf2 100755 --- a/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc +++ b/src/service_inspectors/http_inspect/test/http_uri_norm_test.cc @@ -54,9 +54,11 @@ void show_stats(PegCount*, const PegInfo*, unsigned, const char*) { } void show_stats(PegCount*, const PegInfo*, const IndexVec&, const char*, FILE*) { } HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_, - int32_t identifier_depth_, uint8_t max_template_nesting_) : + int32_t identifier_depth_, uint8_t max_template_nesting_, uint32_t max_scope_depth_, + const std::unordered_set& built_in_ident_) : uri_param(uri_param_), normalization_depth(normalization_depth_), identifier_depth(identifier_depth_), max_template_nesting(max_template_nesting_), + max_scope_depth(max_scope_depth_), built_in_ident(built_in_ident_), mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) {} HttpJsNorm::~HttpJsNorm() = default; void HttpJsNorm::configure() {} diff --git a/src/utils/js_identifier_ctx.cc b/src/utils/js_identifier_ctx.cc index 35b2b44dd..277f320a5 100644 --- a/src/utils/js_identifier_ctx.cc +++ b/src/utils/js_identifier_ctx.cc @@ -76,6 +76,11 @@ const char* JSIdentifierCtx::substitute(const char* identifier) return ident_names[identifier].c_str(); } +bool JSIdentifierCtx::built_in(const char* identifier) const +{ + return ident_built_in.count(identifier); +} + void JSIdentifierCtx::reset() { ident_last_name = 0; diff --git a/src/utils/js_identifier_ctx.h b/src/utils/js_identifier_ctx.h index b69ec8679..c9824b573 100644 --- a/src/utils/js_identifier_ctx.h +++ b/src/utils/js_identifier_ctx.h @@ -22,6 +22,7 @@ #include #include +#include class JSIdentifierCtxBase { @@ -29,6 +30,7 @@ public: virtual ~JSIdentifierCtxBase() = default; virtual const char* substitute(const char* identifier) = 0; + virtual bool built_in(const char* identifier) const = 0; virtual void reset() = 0; virtual size_t size() const = 0; }; @@ -36,9 +38,12 @@ public: class JSIdentifierCtx : public JSIdentifierCtxBase { public: - JSIdentifierCtx(int32_t depth) : depth(depth) {} + JSIdentifierCtx(int32_t depth, const std::unordered_set& ident_built_in) + : depth(depth), ident_built_in(ident_built_in) + {} const char* substitute(const char* identifier) override; + bool built_in(const char* identifier) const override; void reset() override; // approximated to 500 unique mappings insertions @@ -50,6 +55,7 @@ private: int32_t depth; std::unordered_map ident_names; + const std::unordered_set& ident_built_in; }; #endif // JS_IDENTIFIER_CTX diff --git a/src/utils/js_normalizer.cc b/src/utils/js_normalizer.cc index cca5ed758..4e040b76b 100644 --- a/src/utils/js_normalizer.cc +++ b/src/utils/js_normalizer.cc @@ -29,7 +29,7 @@ using namespace snort; using namespace std; JSNormalizer::JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t norm_depth, - uint8_t max_template_nesting, int tmp_cap_size) + uint8_t max_template_nesting, uint32_t max_scope_depth, int tmp_cap_size) : depth(norm_depth), rem_bytes(norm_depth), unlim(norm_depth == static_cast(-1)), @@ -38,7 +38,7 @@ JSNormalizer::JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t norm_depth, tmp_buf_size(0), in(&in_buf), out(&out_buf), - tokenizer(in, out, js_ident_ctx, max_template_nesting, tmp_buf, tmp_buf_size, tmp_cap_size) + tokenizer(in, out, js_ident_ctx, max_template_nesting, max_scope_depth, tmp_buf, tmp_buf_size, tmp_cap_size) { } diff --git a/src/utils/js_normalizer.h b/src/utils/js_normalizer.h index c4f30d0e7..f2866d11a 100644 --- a/src/utils/js_normalizer.h +++ b/src/utils/js_normalizer.h @@ -34,7 +34,8 @@ class JSNormalizer { public: JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t depth, - uint8_t max_template_nesting, int tmp_cap_size = JSTOKENIZER_BUF_MAX_SIZE); + uint8_t max_template_nesting, uint32_t max_scope_depth, + int tmp_cap_size = JSTOKENIZER_BUF_MAX_SIZE); ~JSNormalizer(); const char* get_src_next() const diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h index 4b9c0fe2b..47239648e 100644 --- a/src/utils/js_tokenizer.h +++ b/src/utils/js_tokenizer.h @@ -52,7 +52,27 @@ private: PUNCTUATOR, OPERATOR, LITERAL, - DIRECTIVE + DIRECTIVE, + DOT, + CLOSING_BRACKET + }; + + enum ScopeType + { + GLOBAL = 0, + BRACES, // {} + PARENTHESES, // () + BRACKETS // [] + }; + struct Scope + { + Scope(ScopeType t) + : type(t), ident_norm(true), func_call(false) + {} + + ScopeType type; + bool ident_norm; + bool func_call; }; enum ASIGroup @@ -84,11 +104,15 @@ public: BAD_TOKEN, IDENTIFIER_OVERFLOW, TEMPLATE_NESTING_OVERFLOW, + SCOPE_NESTING_OVERFLOW, + WRONG_CLOSING_SYMBOL, + ENDED_IN_INNER_SCOPE, MAX }; - JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx, - uint8_t max_template_nesting, char*& buf, size_t& buf_size, + JSTokenizer() = delete; + explicit JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx, + uint8_t max_template_nesting, uint32_t max_scope_depth, char*& buf, size_t& buf_size, int cap_size = JSTOKENIZER_BUF_MAX_SIZE); ~JSTokenizer() override; @@ -106,21 +130,33 @@ private: JSRet do_spacing(JSToken cur_token); JSRet do_operator_spacing(JSToken cur_token); void do_semicolon_insertion(ASIGroup current); - JSRet do_identifier_substitution(const char* lexeme); + JSRet do_identifier_substitution(const char* lexeme, bool id_part); bool unescape(const char* lexeme); void process_punctuator(); - void process_closing_bracket(); + void process_closing_brace(); JSRet process_subst_open(); void states_push(); void states_apply(); void states_correct(int); + // scope stack servicing + JSRet scope_push(ScopeType); + JSRet scope_pop(ScopeType); + Scope& scope_cur(); + + // interactions with the current scope + bool global_scope(); + void set_ident_norm(bool); + bool ident_norm(); + void set_func_call(bool); + bool func_call(); + void* cur_buffer; void* tmp_buffer = nullptr; std::stringstream tmp; uint8_t max_template_nesting; - std::stack> bracket_depth; + std::stack> brace_depth; JSToken token = UNDEFINED; ASIGroup previous_group = ASI_OTHER; JSIdentifierCtxBase& ident_ctx; @@ -136,6 +172,7 @@ private: char*& tmp_buf; size_t& tmp_buf_size; const int tmp_cap_size; + bool newline_found = false; constexpr static bool insert_semicolon[ASI_GROUP_MAX][ASI_GROUP_MAX] { @@ -151,6 +188,9 @@ private: {false, true, false, true, false, false, true, true, true, true, true, }, {false, false, false, false, false, false, false, false, false, false, false,} }; + + const uint32_t max_scope_depth; + std::stack scope_stack; }; #endif // JS_TOKENIZER_H diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l index 81d8f30fe..f399dc1bc 100644 --- a/src/utils/js_tokenizer.l +++ b/src/utils/js_tokenizer.l @@ -878,13 +878,15 @@ KEYWORD_OTHER case|catch|class|const|default|else|enum|export|extends|finally /* punctuators */ /* according to https://ecma-international.org/ecma-262/5.1/#sec-7.7 */ -CLOSING_PAREN ")" -CLOSING_BRACE "]" -OPEN_BRACKET "{" -CLOSE_BRACKET "}" +OPEN_BRACE "{" +CLOSE_BRACE "}" +OPEN_PARENTHESIS "(" +CLOSE_PARENTHESIS ")" +OPEN_BRACKET "[" +CLOSE_BRACKET "]" +DOT_ACCESSOR "." PUNCTUATOR_PREFIX "~"|"!" -OPEN_PAREN_BRACE "("|"[" -PUNCTUATOR ">="|"=="|"!="|"==="|"!=="|"."|";"|","|"<"|">"|"<="|"<<"|">>"|">>>"|"&"|"|"|"^"|"&&"|"||"|"?"|":"|"="|"+="|"-="|"*="|"%="|"<<="|">>="|">>>="|"&="|"|="|"^=" +PUNCTUATOR ">="|"=="|"!="|"==="|"!=="|";"|","|"<"|">"|"<="|"<<"|">>"|">>>"|"&"|"|"|"^"|"&&"|"||"|"?"|":"|"="|"+="|"-="|"*="|"%="|"<<="|">>="|">>>="|"&="|"|="|"^=" OPERATOR_PREFIX "+"|"-" OPERATOR_INCR_DECR "--"|"++" OPERATOR "*"|"%" @@ -966,7 +968,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LINE_TERMINATORS} { BEGIN(regst); newline_found = true; } {HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); return OPENING_TAG; } -{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return SCRIPT_ENDED; } +{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); if (!global_scope()) return ENDED_IN_INNER_SCOPE; else return SCRIPT_ENDED; } {HTML_COMMENT_OPEN} { BEGIN(lcomm); } {LINE_COMMENT_START} { BEGIN(lcomm); } @@ -986,7 +988,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {BLOCK_COMMENT_SKIP} { } <> { states_apply(); return SCRIPT_CONTINUE; } - {LITERAL_DQ_STRING_START} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(dqstr); } + {LITERAL_DQ_STRING_START} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(dqstr); set_ident_norm(true); } {LITERAL_DQ_STRING_END} { ECHO; BEGIN(divop); } {HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return CLOSING_TAG; } \\{CR}{LF} { } @@ -997,7 +999,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_DQ_STRING_TEXT} { ECHO; } <> { states_apply(); return SCRIPT_CONTINUE; } - {LITERAL_SQ_STRING_START} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(sqstr); } + {LITERAL_SQ_STRING_START} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(sqstr); set_ident_norm(true); } {LITERAL_SQ_STRING_END} { ECHO; BEGIN(divop); } {HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return CLOSING_TAG; } \\{CR}{LF} { } @@ -1008,10 +1010,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_SQ_STRING_TEXT} { ECHO; } <> { states_apply(); return SCRIPT_CONTINUE; } -{OPEN_BRACKET} { do_semicolon_insertion(ASI_GROUP_1); if (!bracket_depth.empty()) bracket_depth.top()++; process_punctuator(); } -{CLOSE_BRACKET} { do_semicolon_insertion(ASI_GROUP_2); process_closing_bracket(); } - - {LITERAL_TEMPLATE_START} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(tmpll); } + {LITERAL_TEMPLATE_START} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(tmpll); set_ident_norm(true); } (\\\\)*{LITERAL_TEMPLATE_END} { ECHO; BEGIN(divop); } (\\\\)*{LITERAL_TEMPLATE_SUBST_START} { EXEC(process_subst_open()) } {HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return CLOSING_TAG; } @@ -1020,7 +1019,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_TEMPLATE_OTHER} { ECHO; } <> { return SCRIPT_CONTINUE; } -{LITERAL_REGEX_START} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) yyout << '/'; states_correct(1); yyless(1); BEGIN(regex); } +{LITERAL_REGEX_START} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) yyout << '/'; states_correct(1); yyless(1); BEGIN(regex); set_ident_norm(true); } {LITERAL_REGEX_END} { ECHO; BEGIN(divop); } {HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); return CLOSING_TAG; } {LITERAL_REGEX_SKIP} { ECHO; } @@ -1031,28 +1030,33 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 <> { states_apply(); return SCRIPT_CONTINUE; } {DIV_OPERATOR} | -{DIV_ASSIGNMENT_OPERATOR} { previous_group = ASI_OTHER; ECHO; token = PUNCTUATOR; BEGIN(INITIAL); } +{DIV_ASSIGNMENT_OPERATOR} { previous_group = ASI_OTHER; ECHO; token = PUNCTUATOR; BEGIN(INITIAL); set_ident_norm(true); } + +{OPEN_BRACE} { do_semicolon_insertion(ASI_GROUP_1); EXEC(scope_push(BRACES)) if (!brace_depth.empty()) brace_depth.top()++; process_punctuator(); } +{CLOSE_BRACE} { do_semicolon_insertion(ASI_GROUP_2); EXEC(scope_pop(BRACES)) process_closing_brace(); set_ident_norm(true); } +{OPEN_PARENTHESIS} { do_semicolon_insertion(ASI_GROUP_3); EXEC(scope_push(PARENTHESES)) if (token == IDENTIFIER || token == CLOSING_BRACKET || token == KEYWORD) set_func_call(true); process_punctuator(); } +{CLOSE_PARENTHESIS} { do_semicolon_insertion(ASI_GROUP_5); bool f_call = func_call(); bool id_norm = ident_norm(); EXEC(scope_pop(PARENTHESES)) if (!f_call) set_ident_norm(id_norm); ECHO; token = PUNCTUATOR; BEGIN(divop); } +{OPEN_BRACKET} { do_semicolon_insertion(ASI_GROUP_3); do_semicolon_insertion(ASI_GROUP_4); EXEC(scope_push(BRACKETS)) process_punctuator(); } +{CLOSE_BRACKET} { do_semicolon_insertion(ASI_GROUP_4); EXEC(scope_pop(BRACKETS)) ECHO; token = CLOSING_BRACKET; BEGIN(divop); } -{CLOSING_PAREN} { do_semicolon_insertion(ASI_GROUP_5); ECHO; token = PUNCTUATOR; BEGIN(divop); } -{CLOSING_BRACE} { do_semicolon_insertion(ASI_GROUP_4); ECHO; token = PUNCTUATOR; BEGIN(divop); } -{PUNCTUATOR_PREFIX} { do_semicolon_insertion(ASI_GROUP_10); process_punctuator(); } -{OPEN_PAREN_BRACE} { do_semicolon_insertion(ASI_GROUP_3); process_punctuator(); } -{PUNCTUATOR} { previous_group = ASI_OTHER; process_punctuator(); } +{PUNCTUATOR_PREFIX} { do_semicolon_insertion(ASI_GROUP_10); process_punctuator(); set_ident_norm(true); } +{DOT_ACCESSOR} { previous_group = ASI_OTHER; ECHO; token = DOT; BEGIN(regst); } +{PUNCTUATOR} { previous_group = ASI_OTHER; process_punctuator(); set_ident_norm(true); } -{USE_STRICT_DIRECTIVE} { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); yyout << ';'; } -{USE_STRICT_DIRECTIVE_SC} { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); } -{KEYWORD_B} { do_semicolon_insertion(ASI_GROUP_10); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); } -{KEYWORD_BA} { do_semicolon_insertion(ASI_GROUP_9); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); } -{KEYWORD_OTHER} { previous_group = ASI_OTHER; EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); } +{USE_STRICT_DIRECTIVE} { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); yyout << ';'; set_ident_norm(true); } +{USE_STRICT_DIRECTIVE_SC} { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); set_ident_norm(true); } -{OPERATOR_PREFIX} { do_semicolon_insertion(ASI_GROUP_6); EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); } -{OPERATOR_INCR_DECR} { do_semicolon_insertion(ASI_GROUP_8); EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); } -{OPERATOR} { previous_group = ASI_OTHER; EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); } +{KEYWORD_B} { do_semicolon_insertion(ASI_GROUP_10); if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); } +{KEYWORD_BA} { do_semicolon_insertion(ASI_GROUP_9); if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); } +{KEYWORD_OTHER} { previous_group = ASI_OTHER; if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); } -{LITERAL} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(divop); } -{IDENTIFIER} { do_semicolon_insertion(ASI_GROUP_7); if (unescape(YYText())) { EXEC(do_spacing(IDENTIFIER)) EXEC(do_identifier_substitution(YYText())) } BEGIN(divop); } +{OPERATOR_PREFIX} { do_semicolon_insertion(ASI_GROUP_6); EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); set_ident_norm(true); } +{OPERATOR_INCR_DECR} { do_semicolon_insertion(ASI_GROUP_8); EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); set_ident_norm(true); } +{OPERATOR} { previous_group = ASI_OTHER; EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); set_ident_norm(true); } +{LITERAL} { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(divop); set_ident_norm(true); } +{IDENTIFIER} { do_semicolon_insertion(ASI_GROUP_7); if (unescape(YYText())) { bool id_part = (token == DOT); EXEC(do_spacing(IDENTIFIER)) EXEC(do_identifier_substitution(YYText(), id_part)) } BEGIN(divop); } -.|{ALL_UNICODE} { previous_group = ASI_OTHER; ECHO; token = UNDEFINED; BEGIN(INITIAL); } +.|{ALL_UNICODE} { previous_group = ASI_OTHER; ECHO; token = UNDEFINED; BEGIN(INITIAL); set_ident_norm(true); } <> { EEOF(eval_eof()) } %% @@ -1135,14 +1139,16 @@ static std::string unescape_unicode(const char* lexeme) JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& mapper, uint8_t max_template_nesting, - char*& buf, size_t& buf_size, int cap_size) + uint32_t max_scope_depth, char*& buf, size_t& buf_size, int cap_size) : yyFlexLexer(in, out), max_template_nesting(max_template_nesting), ident_ctx(mapper), tmp_buf(buf), tmp_buf_size(buf_size), - tmp_cap_size(cap_size) + tmp_cap_size(cap_size), + max_scope_depth(max_scope_depth) { + scope_push(GLOBAL); BEGIN(regst); } @@ -1185,7 +1191,6 @@ JSTokenizer::JSRet JSTokenizer::eval_eof() // Normal termination states_apply(); - return SCRIPT_CONTINUE; } @@ -1196,6 +1201,8 @@ JSTokenizer::JSRet JSTokenizer::do_spacing(JSToken cur_token) case PUNCTUATOR: case OPERATOR: case DIRECTIVE: + case DOT: + case CLOSING_BRACKET: case UNDEFINED: token = cur_token; return EOS; @@ -1222,6 +1229,8 @@ JSTokenizer::JSRet JSTokenizer::do_operator_spacing(JSToken cur_token) case PUNCTUATOR: case LITERAL: case DIRECTIVE: + case DOT: + case CLOSING_BRACKET: case UNDEFINED: token = cur_token; return EOS; @@ -1237,23 +1246,37 @@ JSTokenizer::JSRet JSTokenizer::do_operator_spacing(JSToken cur_token) return BAD_TOKEN; } -JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme) +JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme, bool id_part) { + if (!ident_norm() && id_part) + { + yyout << lexeme; + return EOS; + } + else + set_ident_norm(true); + + if (ident_ctx.built_in(lexeme) && !id_part) + { + set_ident_norm(false); + return do_identifier_substitution(lexeme, true); + } + const char* ident = ident_ctx.substitute(lexeme); - if (ident) + if (!ident) { debug_logf(6, http_trace, TRACE_JS_DUMP, nullptr, - "'%s' => '%s'\n", lexeme, ident); + "'%s' => IDENTIFIER_OVERFLOW\n", lexeme); - yyout << ident; - return EOS; + return IDENTIFIER_OVERFLOW; } debug_logf(6, http_trace, TRACE_JS_DUMP, nullptr, - "'%s' => IDENTIFIER_OVERFLOW\n", lexeme); + "'%s' => '%s'\n", lexeme, ident); - return IDENTIFIER_OVERFLOW; + yyout << ident; + return EOS; } void JSTokenizer::do_semicolon_insertion(ASIGroup current) @@ -1281,7 +1304,6 @@ bool JSTokenizer::unescape(const char* lexeme) switch_to_temporal(unescaped_lex); return false; } - return true; } @@ -1292,15 +1314,15 @@ void JSTokenizer::process_punctuator() BEGIN(regst); } -void JSTokenizer::process_closing_bracket() +void JSTokenizer::process_closing_brace() { - if (!bracket_depth.empty()) + if (!brace_depth.empty()) { - if (bracket_depth.top()) - bracket_depth.top()--; + if (brace_depth.top()) + brace_depth.top()--; else { - bracket_depth.pop(); + brace_depth.pop(); ECHO; BEGIN(tmpll); return; @@ -1311,13 +1333,13 @@ void JSTokenizer::process_closing_bracket() JSTokenizer::JSRet JSTokenizer::process_subst_open() { - if (bracket_depth.size() >= max_template_nesting) + if (brace_depth.size() >= max_template_nesting) return TEMPLATE_NESTING_OVERFLOW; - bracket_depth.push(0); + brace_depth.push(0); token = PUNCTUATOR; ECHO; BEGIN(divop); - return EOS; + return scope_push(BRACES); } void JSTokenizer::states_push() @@ -1370,4 +1392,78 @@ void JSTokenizer::states_apply() delete[] tmp_buf; tmp_buf = buf; tmp_buf_size = tail_size; + + // Reverse traversal over buffer to adjust scope stack before the next PDU buffer starts + bool is_tmpl = false; + const char* c = tmp_buf + tmp_buf_size; + const char* const s = tmp_buf; + while (c-- > s) + { + switch (*c) + { + case '{': scope_pop(BRACES); if (is_tmpl) brace_depth.pop(); break; + case '}': scope_push(BRACES); if (is_tmpl) brace_depth.push(0); break; + case '(': scope_pop(PARENTHESES); break; + case ')': + { + bool id_norm = ident_norm(); + scope_push(PARENTHESES); + if (!id_norm) + set_func_call(true); + break; + } + case '[': scope_pop(BRACKETS); break; + case ']': scope_push(BRACKETS); break; + case '`': is_tmpl = !is_tmpl; break; + } + } +} + +JSTokenizer::JSRet JSTokenizer::scope_push(ScopeType t) +{ + if (scope_stack.size() > max_scope_depth) + return SCOPE_NESTING_OVERFLOW; + + scope_stack.emplace(t); + return EOS; +} + +JSTokenizer::JSRet JSTokenizer::scope_pop(ScopeType t) +{ + if (t != scope_cur().type) + return WRONG_CLOSING_SYMBOL; + + scope_stack.pop(); + return EOS; +} + +JSTokenizer::Scope& JSTokenizer::scope_cur() +{ + assert(!scope_stack.empty()); + return scope_stack.top(); +} + +bool JSTokenizer::global_scope() +{ + return scope_cur().type == GLOBAL; +} + +void JSTokenizer::set_ident_norm(bool f) +{ + scope_cur().ident_norm = f; +} + +bool JSTokenizer::ident_norm() +{ + return scope_cur().ident_norm; +} + +void JSTokenizer::set_func_call(bool f) +{ + scope_cur().func_call = f; +} + +bool JSTokenizer::func_call() +{ + return scope_cur().func_call; } diff --git a/src/utils/test/js_identifier_ctx_test.cc b/src/utils/test/js_identifier_ctx_test.cc index 1baa74a83..2b37036d1 100644 --- a/src/utils/test/js_identifier_ctx_test.cc +++ b/src/utils/test/js_identifier_ctx_test.cc @@ -32,18 +32,20 @@ #define DEPTH 65536 +static const std::unordered_set s_ident_built_in { "console" }; + TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]") { SECTION("same name") { - JSIdentifierCtx ident_ctx(DEPTH); + JSIdentifierCtx ident_ctx(DEPTH, s_ident_built_in); CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000")); CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000")); } SECTION("different names") { - JSIdentifierCtx ident_ctx(DEPTH); + JSIdentifierCtx ident_ctx(DEPTH, s_ident_built_in); CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000")); CHECK(!strcmp(ident_ctx.substitute("b"), "var_0001")); @@ -51,7 +53,7 @@ TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]") } SECTION("depth reached") { - JSIdentifierCtx ident_ctx(2); + JSIdentifierCtx ident_ctx(2, s_ident_built_in); CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000")); CHECK(!strcmp(ident_ctx.substitute("b"), "var_0001")); @@ -61,7 +63,7 @@ TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]") } SECTION("max names") { - JSIdentifierCtx ident_ctx(DEPTH + 2); + JSIdentifierCtx ident_ctx(DEPTH + 2, s_ident_built_in); std::vector n, e; n.reserve(DEPTH + 2); @@ -86,3 +88,11 @@ TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]") } } +TEST_CASE("JSIdentifierCtx::built_in()", "[JSIdentifierCtx]") +{ + JSIdentifierCtx ident_ctx(DEPTH, s_ident_built_in); + + SECTION("match") { CHECK(ident_ctx.built_in("console") == true); } + SECTION("no match") { CHECK(ident_ctx.built_in("foo") == false); } +} + diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc index 5b059466b..9ac56bf7b 100644 --- a/src/utils/test/js_normalizer_test.cc +++ b/src/utils/test/js_normalizer_test.cc @@ -48,6 +48,8 @@ public: const char* substitute(const char* identifier) override { return identifier; } + bool built_in(const char*) const override + { return false; } void reset() override {} size_t size() const override { return 0; } }; @@ -57,7 +59,10 @@ public: using namespace snort; #define DEPTH 65535 -#define MAX_TEMPLATE_NESTNIG 4 +#define MAX_TEMPLATE_NESTING 4 +#define MAX_SCOPE_DEPTH 256 + +static const std::unordered_set s_ident_built_in { "console", "eval", "document" }; // Unit tests @@ -67,7 +72,7 @@ using namespace snort; #define NORMALIZE(src) \ JSIdentifierCtxTest ident_ctx; \ - JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG); \ + JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); \ auto ret = norm.normalize(src, sizeof(src)); \ const char* ptr = norm.get_src_next(); \ auto result = norm.get_script(); \ @@ -92,7 +97,7 @@ using namespace snort; #define NORMALIZE_L(src, src_len, dst, dst_len, depth, ret, ptr, len) \ { \ JSIdentifierCtxTest ident_ctx; \ - JSNormalizer norm(ident_ctx, depth, MAX_TEMPLATE_NESTNIG); \ + JSNormalizer norm(ident_ctx, depth, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); \ ret = norm.normalize(src, src_len); \ ptr = norm.get_src_next(); \ auto result = norm.get_script(); \ @@ -136,13 +141,56 @@ using namespace snort; CHECK(ret == JSTokenizer::SCRIPT_ENDED); \ } +#define NORMALIZE_S(src1, exp1) \ + { \ + char dst1[sizeof(exp1)]; \ + \ + JSIdentifierCtx ident_ctx(DEPTH, s_ident_built_in); \ + JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); \ + \ + DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \ + CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \ + \ + CLOSE(); \ + } + +#define NORMALIZE_T(src1, src2, exp1, exp2) \ + { \ + char dst1[sizeof(exp1)]; \ + char dst2[sizeof(exp2)]; \ + \ + JSIdentifierCtx ident_ctx(DEPTH, s_ident_built_in); \ + JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); \ + \ + DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \ + CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \ + \ + DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1); \ + CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1)); \ + \ + CLOSE(); \ + } + +#define NORMALIZE_1(src1, exp1) \ + { \ + char dst1[sizeof(exp1)]; \ + \ + JSIdentifierCtxTest ident_ctx; \ + JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); \ + \ + DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \ + CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \ + \ + CLOSE(); \ + } + #define NORMALIZE_2(src1, src2, exp1, exp2) \ { \ char dst1[sizeof(exp1)]; \ char dst2[sizeof(exp2)]; \ \ JSIdentifierCtxTest ident_ctx; \ - JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG); \ + JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); \ \ DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \ CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \ @@ -160,7 +208,7 @@ using namespace snort; char dst3[sizeof(exp3)]; \ \ JSIdentifierCtxTest ident_ctx; \ - JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG); \ + JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); \ \ DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \ CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \ @@ -174,13 +222,24 @@ using namespace snort; CLOSE(); \ } +#define NORM_BAD_1(src1, exp1, code) \ + { \ + char dst1[sizeof(exp1)]; \ + \ + JSIdentifierCtxTest ident_ctx; \ + JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); \ + \ + TRY(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1, code); \ + CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \ + } + #define NORM_BAD_2(src1, src2, exp1, exp2, code) \ { \ char dst1[sizeof(exp1)]; \ char dst2[sizeof(exp2)]; \ \ JSIdentifierCtxTest ident_ctx; \ - JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG); \ + JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); \ \ DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \ CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \ @@ -196,7 +255,7 @@ using namespace snort; char dst3[sizeof(exp3)]; \ \ JSIdentifierCtxTest ident_ctx; \ - JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG); \ + JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); \ \ DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \ CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \ @@ -214,7 +273,7 @@ using namespace snort; char dst2[sizeof(exp2)]; \ \ JSIdentifierCtxTest ident_ctx; \ - JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG, limit); \ + JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH, limit); \ \ DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1); \ CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1)); \ @@ -1392,7 +1451,7 @@ TEST_CASE("endings", "[JSNormalizer]") int ret; JSIdentifierCtxTest ident_ctx; - JSNormalizer norm(ident_ctx, 7, MAX_TEMPLATE_NESTNIG); + JSNormalizer norm(ident_ctx, 7, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); ret = norm.normalize(src, sizeof(src)); ptr = norm.get_src_next(); auto res1 = norm.get_script(); @@ -2206,6 +2265,1030 @@ TEST_CASE("memcap", "[JSNormalizer]") } } +TEST_CASE("scope tracking", "[JSNormalizer]") +{ + SECTION("parentheses") + { + const char dat1[] = "()"; + const char dat2[] = "()()()"; + const char dat3[] = "((()))"; + const char exp1[] = "()"; + const char exp2[] = "()()()"; + const char exp3[] = "((()))"; + + NORMALIZE_1(dat1, exp1); + NORMALIZE_1(dat2, exp2); + NORMALIZE_1(dat3, exp3); + } + SECTION("curly braces") + { + const char dat1[] = "{}"; + const char dat2[] = "{}{}{}"; + const char dat3[] = "{{{}}}"; + const char exp1[] = "{}"; + const char exp2[] = "{}{}{}"; + const char exp3[] = "{{{}}}"; + + NORMALIZE_1(dat1, exp1); + NORMALIZE_1(dat2, exp2); + NORMALIZE_1(dat3, exp3); + } + SECTION("square brackets") + { + const char dat1[] = "[]"; + const char dat2[] = "[][][]"; + const char dat3[] = "[[[]]]"; + const char exp1[] = "[]"; + const char exp2[] = "[][][]"; + const char exp3[] = "[[[]]]"; + + NORMALIZE_1(dat1, exp1); + NORMALIZE_1(dat2, exp2); + NORMALIZE_1(dat3, exp3); + } + SECTION("mix of brackets") + { + const char dat1[] = "(){}[]"; + const char dat2[] = "({})[]"; + const char dat3[] = "(){[]}"; + const char exp1[] = "(){}[]"; + const char exp2[] = "({})[]"; + const char exp3[] = "(){[]}"; + + NORMALIZE_1(dat1, exp1); + NORMALIZE_1(dat2, exp2); + NORMALIZE_1(dat3, exp3); + } + SECTION("parentheses - wrong closing symbol") + { + const char dat1[] = "({[ (} ]})"; + const char dat2[] = "({[ (] ]})"; + const char exp1[] = "({[("; + const char exp2[] = "({[("; + + NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL); + } + SECTION("curly braces - wrong closing symbol") + { + const char dat1[] = "({[ {) ]})"; + const char dat2[] = "({[ {] ]})"; + const char exp1[] = "({[{"; + const char exp2[] = "({[{"; + + NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL); + } + SECTION("square brackets - wrong closing symbol") + { + const char dat1[] = "([{ [) }])"; + const char dat2[] = "([{ [} }])"; + const char exp1[] = "([{["; + const char exp2[] = "([{["; + + NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL); + } + SECTION("parentheses - mismatch") + { + const char dat1[] = ")"; + const char dat2[] = "())"; + const char dat3[] = "({[ ()) ]})"; + const char dat4[] = "("; + const char dat5[] = "(()"; + const char exp1[] = ""; + const char exp2[] = "()"; + const char exp3[] = "({[()"; + const char exp4[] = "("; + const char exp5[] = "(()"; + + NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_1(dat3, exp3, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_1(dat4, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE); + NORM_BAD_1(dat5, exp5, JSTokenizer::ENDED_IN_INNER_SCOPE); + } + SECTION("curly braces - mismatch") + { + const char dat1[] = "}"; + const char dat2[] = "{}}"; + const char dat3[] = "({[ {}} ]})"; + const char dat4[] = "{"; + const char dat5[] = "{{}"; + const char exp1[] = ""; + const char exp2[] = "{}"; + const char exp3[] = "({[{}"; + const char exp4[] = "{"; + const char exp5[] = "{{}"; + + NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_1(dat3, exp3, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_1(dat4, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE); + NORM_BAD_1(dat5, exp5, JSTokenizer::ENDED_IN_INNER_SCOPE); + } + SECTION("square brackets - mismatch") + { + const char dat1[] = "]"; + const char dat2[] = "[]]"; + const char dat3[] = "([{ []] }])"; + const char dat4[] = "["; + const char dat5[] = "[[]"; + const char exp1[] = ""; + const char exp2[] = "[]"; + const char exp3[] = "([{[]"; + const char exp4[] = "["; + const char exp5[] = "[[]"; + + NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_1(dat3, exp3, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_1(dat4, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE); + NORM_BAD_1(dat5, exp5, JSTokenizer::ENDED_IN_INNER_SCOPE); + } + SECTION("parentheses - continuation") + { + const char dat1[] = "(("; + const char dat2[] = "))"; + const char exp1[] = "(("; + const char exp2[] = "(())"; + + NORMALIZE_2(dat1, dat2, exp1, exp2); + } + SECTION("curly braces - continuation") + { + const char dat1[] = "{{"; + const char dat2[] = "}}"; + const char exp1[] = "{{"; + const char exp2[] = "{{}}"; + + NORMALIZE_2(dat1, dat2, exp1, exp2); + } + SECTION("square brackets - continuation") + { + const char dat1[] = "[["; + const char dat2[] = "]]"; + const char exp1[] = "[["; + const char exp2[] = "[[]]"; + + NORMALIZE_2(dat1, dat2, exp1, exp2); + } + SECTION("parentheses - mismatch in continuation") + { + const char dat1[] = "("; + const char dat2[] = "))"; + const char dat3[] = "("; + const char dat4[] = " "; + const char exp1[] = "("; + const char exp2[] = "()"; + const char exp3[] = "("; + const char exp4[] = "("; + + NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE); + } + SECTION("curly braces - mismatch in continuation") + { + const char dat1[] = "{"; + const char dat2[] = "}}"; + const char dat3[] = "{"; + const char dat4[] = " "; + const char exp1[] = "{"; + const char exp2[] = "{}"; + const char exp3[] = "{"; + const char exp4[] = "{"; + + NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE); + } + SECTION("square brackets - mismatch in continuation") + { + const char dat1[] = "["; + const char dat2[] = "]]"; + const char dat3[] = "["; + const char dat4[] = " "; + const char exp1[] = "["; + const char exp2[] = "[]"; + const char exp3[] = "["; + const char exp4[] = "["; + + NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE); + } +} + +TEST_CASE("scope misc", "[JSNormalizer]") +{ + const int stack_limit = 256; + const char* open = "1+("; + const char* close = "-1)"; + + SECTION("max stack") + { + std::string scr; + + for (int i = 0; i < stack_limit; ++i) + scr += open; + for (int i = 0; i < stack_limit; ++i) + scr += close; + + const char* dat = scr.c_str(); + int dat_len = strlen(dat); + const char* exp = scr.c_str(); + int exp_len = strlen(exp); + char* act = new char[exp_len]; + + JSIdentifierCtxTest ident_ctx; + JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); + + DO(dat, dat_len, act, exp_len); + CHECK(!memcmp(exp, act, exp_len)); + + delete[] act; + + CLOSE(); + } + + SECTION("max stack") + { + std::string scr; + std::string nsc; + + for (int i = 0; i < stack_limit + 1; ++i) + scr += open; + for (int i = 0; i < stack_limit + 1; ++i) + scr += close; + for (int i = 0; i < stack_limit; ++i) + nsc += open; + nsc += "1+"; + + const char* dat = scr.c_str(); + int dat_len = strlen(dat); + const char* exp = nsc.c_str(); + int exp_len = strlen(exp); + char* act = new char[exp_len]; + + JSIdentifierCtxTest ident_ctx; + JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); + + TRY(dat, dat_len, act, exp_len, JSTokenizer::SCOPE_NESTING_OVERFLOW); + CHECK(!memcmp(exp, act, exp_len)); + + delete[] act; + } +} + +TEST_CASE("scope tail handling", "[JSNormalizer]") +{ + // Padding ':' symbol has been chosen, since it: + // * forms a single state for Parser + // * doesn't insert white spaces + // * forms a single match, i.e. there are no '::' ':::' patterns + // + // Thus, the tail of "::({[]})" will have JSTOKENIZER_MAX_STATES + // and the same number of characters in it. + +#if JSTOKENIZER_MAX_STATES != 8 +#error "scope tail handling" tests are designed for the tail of 8 bytes size +#endif + + SECTION("no scope-symbols in the tail") + { + const char dat1[] = "((((::::::::"; + const char dat2[] = "):):):):"; + const char dat3[] = "{}{{::::::::"; + const char dat4[] = "::{}}}::"; + const char dat5[] = "[][[::::::::"; + const char dat6[] = "::::]][]"; + const char exp1[] = "((((::::::::"; + const char exp2[] = "::::::::):):):):"; + const char exp3[] = "{}{{::::::::"; + const char exp4[] = "::::::::::{}}}::"; + const char exp5[] = "[][[::::::::"; + const char exp6[] = "::::::::::::]][]"; + + NORMALIZE_2(dat1, dat2, exp1, exp2); + NORMALIZE_2(dat3, dat4, exp3, exp4); + NORMALIZE_2(dat5, dat6, exp5, exp6); + } + + SECTION("opening scope-symbols in the tail") + { + const char dat1[] = "::::(:::::::"; + const char dat2[] = "):::::::"; + const char dat3[] = ":::::::::::{"; + const char dat4[] = ":::::::}"; + const char dat5[] = "::::[:::::::"; + const char dat6[] = "::::]:::"; + const char exp1[] = "::::(:::::::"; + const char exp2[] = "(:::::::):::::::"; + const char exp3[] = ":::::::::::{"; + const char exp4[] = ":::::::{:::::::}"; + const char exp5[] = "::::[:::::::"; + const char exp6[] = "[:::::::::::]:::"; + + NORMALIZE_2(dat1, dat2, exp1, exp2); + NORMALIZE_2(dat3, dat4, exp3, exp4); + NORMALIZE_2(dat5, dat6, exp5, exp6); + } + + SECTION("closing scope-symbols in the tail") + { + const char dat1[] = "(((()::::::)"; + const char dat2[] = "()::::))"; + const char dat3[] = "{{{{:::::::}"; + const char dat4[] = ":::::}}}"; + const char dat5[] = "[::::::::]::"; + const char dat6[] = "::::::::"; + const char exp1[] = "(((()::::::)"; + const char exp2[] = ")::::::)()::::))"; + const char exp3[] = "{{{{:::::::}"; + const char exp4[] = ":::::::}:::::}}}"; + const char exp5[] = "[::::::::]::"; + const char exp6[] = ":::::]::::::::::"; + + NORMALIZE_2(dat1, dat2, exp1, exp2); + NORMALIZE_2(dat3, dat4, exp3, exp4); + NORMALIZE_2(dat5, dat6, exp5, exp6); + } + + SECTION("newly opening scope-symbols in the tail") + { + const char dat1[] = "(:::(::::::("; + const char dat2[] = "))):::::"; + const char dat3[] = "{:{:{:{:{:{:"; + const char dat4[] = "::}}}}}}"; + const char dat5[] = "[:[:[:::[:::"; + const char dat6[] = "::::]]]]"; + const char exp1[] = "(:::(::::::("; + const char exp2[] = "(::::::())):::::"; + const char exp3[] = "{:{:{:{:{:{:"; + const char exp4[] = "{:{:{:{:::}}}}}}"; + const char exp5[] = "[:[:[:::[:::"; + const char exp6[] = "[:::[:::::::]]]]"; + + NORMALIZE_2(dat1, dat2, exp1, exp2); + NORMALIZE_2(dat3, dat4, exp3, exp4); + NORMALIZE_2(dat5, dat6, exp5, exp6); + } + + SECTION("fully closing scope-symbols in the tail") + { + const char dat1[] = "((((::::))))"; + const char dat2[] = "::::::::"; + const char dat3[] = "{{{{}:}:}:}:"; + const char dat4[] = "::::{}{}"; + const char dat5[] = "[[:::::::]:]"; + const char dat6[] = "[::::::]"; + const char exp1[] = "((((::::))))"; + const char exp2[] = "::::))))::::::::"; + const char exp3[] = "{{{{}:}:}:}:"; + const char exp4[] = "}:}:}:}:::::{}{}"; + const char exp5[] = "[[:::::::]:]"; + const char exp6[] = ":::::]:][::::::]"; + + NORMALIZE_2(dat1, dat2, exp1, exp2); + NORMALIZE_2(dat3, dat4, exp3, exp4); + NORMALIZE_2(dat5, dat6, exp5, exp6); + } + + SECTION("extra scope-symbols in the tail") + { + const char dat1[] = "(((((((("; + const char dat2[] = ")))))))))"; + const char dat3[] = "{{{{{{{{"; + const char dat4[] = "}}}}}}]}"; + const char dat5[] = "[[[[[[[["; + const char dat6[] = "]]]]]]]"; + const char exp1[] = "(((((((("; + const char exp2[] = "(((((((())))))))"; + const char exp3[] = "{{{{{{{{"; + const char exp4[] = "{{{{{{{{}}}}}}"; + const char exp5[] = "[[[[[[[["; + const char exp6[] = "[[[[[[[[]]]]]]]"; + + NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::WRONG_CLOSING_SYMBOL); + NORM_BAD_2(dat5, dat6, exp5, exp6, JSTokenizer::ENDED_IN_INNER_SCOPE); + } + + SECTION("overwriting scope-symbols in the tail") + { + const char dat1[] = "(((((((())))"; + const char dat2[] = ":))))"; + const char dat3[] = "({[(:):]{}{}"; + const char dat4[] = "}[]())"; + const char dat5[] = "{{{{}[[]]((("; + const char dat6[] = ")))}}}"; + const char exp1[] = "(((((((())))"; + const char exp2[] = "(((()))):))))"; + const char exp3[] = "({[(:):]{}{}"; + const char exp4[] = ":):]{}{}}[]())"; + const char exp5[] = "{{{{}[[]]((("; + const char exp6[] = "}[[]]((()))}}}"; + + NORMALIZE_2(dat1, dat2, exp1, exp2); + NORMALIZE_2(dat3, dat4, exp3, exp4); + NORMALIZE_2(dat5, dat6, exp5, exp6); + } +} + +TEST_CASE("built-in identifiers syntax", "[JSNormalizer]") +{ + // 'console' 'eval' 'document' are built-in identifiers + + SECTION("a standalone identifier") + { + const char dat1[] = "alpha bravo console delta eval"; + const char dat2[] = "var a = 0; console = 1;"; + const char dat3[] = "var a = 0; var console = 1;"; + const char dat4[] = "foo(0); console(1); bar(2); console1(3); baz(4);"; + const char dat5[] = "foo(0); eval(1); bar(2); evaluate(3); baz(4);"; + const char exp1[] = "var_0000 var_0001 console var_0002 eval"; + const char exp2[] = "var var_0000=0;console=1;"; + const char exp3[] = "var var_0000=0;var console=1;"; + const char exp4[] = "var_0000(0);console(1);var_0001(2);var_0002(3);var_0003(4);"; + const char exp5[] = "var_0000(0);eval(1);var_0001(2);var_0002(3);var_0003(4);"; + + NORMALIZE_S(dat1, exp1); + NORMALIZE_S(dat2, exp2); + NORMALIZE_S(dat3, exp3); + NORMALIZE_S(dat4, exp4); + NORMALIZE_S(dat5, exp5); + } + + SECTION("inner objects") + { + const char dat1[] = "alpha.bravo.charlie.delta"; + const char dat2[] = "alpha.bravo.console.delta"; + const char dat3[] = "eval.alpha.bravo.charlie.delta"; + const char dat4[] = "eval.alpha.bravo.console.delta"; + const char exp1[] = "var_0000.var_0001.var_0002.var_0003"; + const char exp2[] = "var_0000.var_0001.var_0002.var_0003"; + const char exp3[] = "eval.alpha.bravo.charlie.delta"; + const char exp4[] = "eval.alpha.bravo.console.delta"; + + NORMALIZE_S(dat1, exp1); + NORMALIZE_S(dat2, exp2); + NORMALIZE_S(dat3, exp3); + NORMALIZE_S(dat4, exp4); + } + + SECTION("function calls") + { + const char dat1[] = "foo.bar.baz()"; + const char dat2[] = "foo.bar().baz"; + const char dat3[] = "foo().bar.baz"; + const char dat4[] = "eval.bar.baz()"; + const char dat5[] = "eval.bar().baz"; + const char dat6[] = "eval().bar.baz"; + const char dat7[] = "foo.eval.baz()"; + const char dat8[] = "foo.eval().baz"; + const char dat9[] = "foo().eval.baz"; + const char dat10[] = "foo.bar.eval()"; + const char dat11[] = "foo.bar().eval"; + const char dat12[] = "var_0000().bar.eval"; + const char exp1[] = "var_0000.var_0001.var_0002()"; + const char exp2[] = "var_0000.var_0001().var_0002"; + const char exp3[] = "var_0000().var_0001.var_0002"; + const char exp4[] = "eval.bar.baz()"; + const char exp5[] = "eval.bar().baz"; + const char exp6[] = "eval().bar.baz"; + const char exp7[] = "var_0000.var_0001.var_0002()"; + const char exp8[] = "var_0000.var_0001().var_0002"; + const char exp9[] = "var_0000().var_0001.var_0002"; + const char exp10[] = "var_0000.var_0001.var_0002()"; + const char exp11[] = "var_0000.var_0001().var_0002"; + const char exp12[] = "var_0000().var_0001.var_0002"; + + NORMALIZE_S(dat1, exp1); + NORMALIZE_S(dat2, exp2); + NORMALIZE_S(dat3, exp3); + NORMALIZE_S(dat4, exp4); + NORMALIZE_S(dat5, exp5); + NORMALIZE_S(dat6, exp6); + NORMALIZE_S(dat7, exp7); + NORMALIZE_S(dat8, exp8); + NORMALIZE_S(dat9, exp9); + NORMALIZE_S(dat10, exp10); + NORMALIZE_S(dat11, exp11); + NORMALIZE_S(dat12, exp12); + } +} + +TEST_CASE("built-in chain tracking", "[JSNormalizer]") +{ + // 'console' 'eval' 'document' are built-in identifiers + + SECTION("chain terminators") + { + const char dat1[] = "eval.foo.bar.baz"; + const char dat2[] = "eval.foo bar.baz"; + const char dat3[] = "eval.foo;bar.baz"; + const char dat4[] = "eval.foo,bar.baz"; + const char dat5[] = "eval.foo*bar.baz"; + const char dat6[] = "eval.foo*=bar.baz"; + const char dat7[] = "eval.foo/bar.baz"; + const char dat8[] = "eval.foo/=bar.baz"; + const char dat9[] = "eval.foo%bar.baz"; + const char dat10[] = "eval.foo%=bar.baz"; + const char dat11[] = "eval.foo+bar.baz"; + const char dat12[] = "eval.foo+=bar.baz"; + const char dat13[] = "eval.foo-bar.baz"; + const char dat14[] = "eval.foo-=bar.baz"; + const char dat15[] = "eval.foo< ids{}; + JSIdentifierCtx ident_ctx(DEPTH, ids); + JSNormalizer normalizer_w_ident(ident_ctx, UNLIM_DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); + REQUIRE(norm_ret(normalizer_w_ident, input) == JSTokenizer::SCRIPT_ENDED); BENCHMARK("with substitution") { normalizer_w_ident.rewind_output(); - return normalizer_w_ident.normalize(src, src_len); + return normalizer_w_ident.normalize(input.c_str(), input.size()); + }; + + const std::unordered_set ids_n { "n" }; + JSIdentifierCtx ident_ctx_ids_n(DEPTH, ids_n); + JSNormalizer normalizer_built_ins(ident_ctx_ids_n, UNLIM_DEPTH, + MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH); + + REQUIRE(norm_ret(normalizer_built_ins, input) == JSTokenizer::SCRIPT_ENDED); + BENCHMARK("with built-ins") + { + normalizer_built_ins.rewind_output(); + return normalizer_built_ins.normalize(input.c_str(), input.size()); + }; +} + +TEST_CASE("benchmarking - ::normalize() - scope", "[JSNormalizer]") +{ + constexpr uint32_t depth = 65535; + JSIdentifierCtxTest ident_ctx; + JSNormalizer normalizer(ident_ctx, UNLIM_DEPTH, MAX_TEMPLATE_NESTING, depth); + char dst[depth]; + + auto src_ws = make_input("", " ", "", depth); + auto src_brace_rep = make_input_repeat("{}", depth); + auto src_paren_rep = make_input_repeat("()", depth); + auto src_bracket_rep = make_input_repeat("[]", depth); + + BENCHMARK("memcpy - ...{}{}{}... - 65535 bytes") + { + return memcpy(dst, src_brace_rep.c_str(), src_brace_rep.size()); + }; + + REQUIRE(norm_ret(normalizer, src_ws) == JSTokenizer::SCRIPT_ENDED); + BENCHMARK("whitespaces - 65535 bytes") + { + normalizer.rewind_output(); + return normalizer.normalize(src_ws.c_str(), src_ws.size()); + }; + + REQUIRE(norm_ret(normalizer, src_brace_rep) == JSTokenizer::SCRIPT_ENDED); + BENCHMARK("...{}{}{}... - 65535 bytes") + { + normalizer.rewind_output(); + return normalizer.normalize(src_brace_rep.c_str(), src_brace_rep.size()); + }; + + REQUIRE(norm_ret(normalizer, src_paren_rep) == JSTokenizer::SCRIPT_ENDED); + BENCHMARK("...()()()... - 65535 bytes") + { + normalizer.rewind_output(); + return normalizer.normalize(src_paren_rep.c_str(), src_paren_rep.size()); + }; + + REQUIRE(norm_ret(normalizer, src_bracket_rep) == JSTokenizer::SCRIPT_ENDED); + BENCHMARK("...[][][]... - 65535 bytes") + { + normalizer.rewind_output(); + return normalizer.normalize(src_bracket_rep.c_str(), src_bracket_rep.size()); }; } TEST_CASE("benchmarking - ::normalize() - automatic semicolon insertion") { - auto w_semicolons = make_input("", "a;\n", s_closing_tag, DEPTH); - auto wo_semicolons = make_input("", "a \n", s_closing_tag, DEPTH); + auto w_semicolons = make_input("", "a;\n", "", DEPTH); + auto wo_semicolons = make_input("", "a \n", "", DEPTH); const char* src_w_semicolons = w_semicolons.c_str(); const char* src_wo_semicolons = wo_semicolons.c_str(); size_t src_len = w_semicolons.size(); JSIdentifierCtxTest ident_ctx_mock; - JSNormalizer normalizer_wo_ident(ident_ctx_mock, UNLIM_DEPTH, MAX_TEMPLATE_NESTNIG); + JSNormalizer normalizer_wo_ident(ident_ctx_mock, UNLIM_DEPTH, MAX_TEMPLATE_NESTING, DEPTH); + REQUIRE(norm_ret(normalizer_wo_ident, w_semicolons) == JSTokenizer::SCRIPT_ENDED); BENCHMARK("without semicolon insertion") { + normalizer_wo_ident.rewind_output(); return normalizer_wo_ident.normalize(src_w_semicolons, src_len); }; + REQUIRE(norm_ret(normalizer_wo_ident, wo_semicolons) == JSTokenizer::SCRIPT_ENDED); BENCHMARK("with semicolon insertion") { + normalizer_wo_ident.rewind_output(); return normalizer_wo_ident.normalize(src_wo_semicolons, src_len); }; }