]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Merge pull request #3095 in SNORT/snort3 from ~OSERHIIE/snort3:js_built_ins to master
authorMike Stepanek (mstepane) <mstepane@cisco.com>
Wed, 13 Oct 2021 17:59:05 +0000 (17:59 +0000)
committerMike Stepanek (mstepane) <mstepane@cisco.com>
Wed, 13 Oct 2021 17:59:05 +0000 (17:59 +0000)
Squashed commit of the following:

commit d253c19d845340b83e7abac8085d07b38b5ebca4
Author: Oleksandr Serhiienko <oserhiie@cisco.com>
Date:   Wed Sep 29 17:00:12 2021 +0300

    http_inspect: do not normalize JavaScript built-in identifiers

        * utils: update JSTokenizer to track the scope
        * utils: update JSTokenizer to track JavaScript built-in identifiers
        * utils: update JSIdentifierCtx to check for JavaScript built-in identifiers
        * utils: add unit tests for scope and identifiers tracking
        * utils: add benchmarks for scope and identifiers tracking
        * http_inspect: add js_norm_max_scope_depth config option to limit maximum depth of scope nesting
        * http_inspect: add js_norm_built_in_ident config option as a list of JavaScript built-in identifiers
        * http_inspect: update 119:271 rule to alert on both template and scope depth limit reached
        * http_inspect: update 119:265 rule to alert on the scope mismatch
        * http_inspect: update dev_notes.txt with info about JavaScript built-in identifiers and scope tracking
        * lua: update snort_defaults.lua with a default list of JavaScript built-in identifiers
        * doc: update user/http_inspect.txt with info about JavaScript built-in identifiers and scope tracking
        * doc: update reference/builtin_stubs.txt with updates in 119:271 rule description

23 files changed:
doc/reference/builtin_stubs.txt
doc/user/http_inspect.txt
lua/snort_defaults.lua
src/service_inspectors/http_inspect/dev_notes.txt
src/service_inspectors/http_inspect/http_enum.h
src/service_inspectors/http_inspect/http_flow_data.cc
src/service_inspectors/http_inspect/http_flow_data.h
src/service_inspectors/http_inspect/http_inspect.cc
src/service_inspectors/http_inspect/http_js_norm.cc
src/service_inspectors/http_inspect/http_js_norm.h
src/service_inspectors/http_inspect/http_module.cc
src/service_inspectors/http_inspect/http_module.h
src/service_inspectors/http_inspect/http_tables.cc
src/service_inspectors/http_inspect/test/http_module_test.cc
src/service_inspectors/http_inspect/test/http_uri_norm_test.cc
src/utils/js_identifier_ctx.cc
src/utils/js_identifier_ctx.h
src/utils/js_normalizer.cc
src/utils/js_normalizer.h
src/utils/js_tokenizer.h
src/utils/js_tokenizer.l
src/utils/test/js_identifier_ctx_test.cc
src/utils/test/js_normalizer_test.cc

index ba29eaf3c5267319c37c27404113135885a18cec..ddf0e84486dfef2697547779f089d364c84900c2 100644 (file)
@@ -1198,9 +1198,11 @@ network traffic and may be an indication that an attacker is trying to exhaust r
 
 In JavaScript, template literals can have substitutions, that in turn can have nested
 template literals, which requires a stack to track for proper whitespace normalization.
-When the depth of nesting exceeds limit set in http_inspect.js_norm_max_tmpl_nest,
-this alert is raised. This alert is not expected for typical network traffic and may be
-an indication that an attacker is trying to exhaust resources.
+Also, the normalization tracks the current scope, which requires a stack as well.
+When the depth of nesting exceeds limit set in http_inspect.js_norm_max_tmpl_nest or in
+http_inspect.js_norm_max_scope_depth, this alert is raised. This alert is not expected
+for typical network traffic and may be an indication that an attacker is trying to exhaust
+resources.
 
 119:272
 
index c03215bb831092365331733010f6047e9a461321..6f9fd5f5ec8ce8c56a6c11f081f522c417cab7ab 100755 (executable)
@@ -202,6 +202,36 @@ is present to limit the amount of memory dedicated to this tracking. This option
 is used only when js_normalization_depth is not 0. This feature
 is currently experimental and still under development.
 
+===== js_norm_max_scope_depth
+
+js_norm_max_scope_depth = N {0 : 65535} (default 256) is an option of the enhanced
+JavaScript normalizer that determines the deepest level of nested scope. The scope
+term includes code sections("{}"), parentheses("()") and brackets("[]"). This option
+is present to limit the amount of memory dedicated to this tracking. This option is used
+only when js_normalization_depth is not 0. This feature is currently experimental and
+still under development.
+
+===== js_norm_built_in_ident
+
+js_norm_built_in_ident = {<the list of built-in JavaScript identifier names>}.
+The default list is present in "snort_defaults.lua".
+
+The built-in JavaScript identifiers will be placed as is, without substitution. Normalizer
+tracks built-in identifier expressions based on the configured list of built-in names.
+The built-in identifier expression is the built-in name (function or object) and the chain
+of dot and bracket accessors after it, including the function calls.
+For example:
+
+    console.log("bar")
+    document.getElementById("id").text
+    eval("script")
+    foo["bar"]
+
+The list must contain object and function names only.
+For example:
+
+    http_inspect.js_norm_built_in_ident = { 'console', 'document', 'eval', 'foo' }
+
 ===== xff_headers
 
 This configuration supports defining custom x-forwarded-for type headers. In a
index 36f42256fed56d5ed0caff618dbed1a4138a4c40..547cd37818a41265de6390bdde710fb736fcde24 100644 (file)
@@ -1169,6 +1169,106 @@ default_low_port_scan =
     icmp_sweep = icmp_low_sweep,
 }
 
+---------------------------------------------------------------------------
+-- ECMAScript Standard Built-in Objects and Functions Names (Identifiers)
+-- Also, might include other non-specification identifiers like those
+-- are part of WebAPI or frameworks
+---------------------------------------------------------------------------
+default_js_norm_built_in_ident =
+{
+    -- GlobalObject.Functions
+    'eval', 'PerformEval', 'HostEnsureCanCompileStrings', 'EvalDeclarationInstantiation',
+    'isFinite', 'isNaN', 'parseFloat', 'parseInt', 'Encode', 'Decode', 'decodeURI',
+    'decodeURIComponent', 'encodeURI', 'encodeURIComponent',
+
+    -- GlobalObject.Constructors
+    'AggregateError', 'Array', 'ArrayBuffer', 'BigInt', 'BitInt64Array', 'BigUint64Array',
+    'Boolean', 'DataView', 'Date', 'Error', 'EvalError', 'FinalizationRegistry',
+    'Float32Array', 'Float64Array', 'Function', 'Int8Array', 'Int16Array', 'Int32Array',
+    'Map', 'NativeError', 'Number', 'Object', 'Promise', 'Proxy',
+    'RangeError', 'ReferenceError', 'RegExp', 'Set', 'SharedArrayBuffer', 'String',
+    'Symbol', 'SyntaxError', 'TypeError', 'Uint8Array', 'Uint8ClampedArray', 'Uint16Array',
+    'Uint32Array', 'URIError', 'WeakMap', 'WeakRef', 'WeakSet',
+
+    -- Atomics
+    'Atomics', 'WaiterList', 'ValidateIntegerTypedArray', 'ValidateAtomicAccess', 'GetWaiterList',
+    'EnterCriticalSection', 'LeaveCriticalSection', 'AddWaiter', 'RemoveWaiter', 'RemoveWaiters',
+    'SuspendAgent', 'NotifyWaiter', 'AtomicReadModifyWrite', 'ByteListBitwiseOp', 'ByteListEqual',
+
+    -- JSON
+    'JSON', 'InternalizeJSONProperty', 'SerializeJSONProperty', 'QuoteJSONString', 'UnicodeEscape',
+    'SerializeJSONObject','SerializeJSONArray',
+
+    -- Math
+    'Math',
+
+    -- Reflect
+    'Reflect',
+
+    -- Date and Time
+    'LocalTZA', 'LocalTime', 'UTC', 'MakeTime', 'MakeDay', 'MakeDate', 'TimeClip', 'TimeString',
+    'DateString', 'TimeZoneString', 'ToDateString',
+
+    -- String
+    'StringPad', 'GetSubstitution', 'SplitMatch', 'TrimString',
+
+    -- RegExp
+    'RegExpExec', 'RegExpBuiltinExec', 'AdvanceStringIndex', 'RegExpHasFlag',
+
+    -- TypedArray
+    'TypedArray', 'TypedArraySpeciesCreate', 'TypedArrayCreate', 'ValidateTypedArray',
+    'AllocateTypedArray', 'InitializeTypedArrayFromTypedArray',
+    'InitializeTypedArrayFromArrayBuffer', 'InitializeTypedArrayFromList',
+    'InitializeTypedArrayFromArrayLike', 'AllocateTypedArrayBuffer',
+
+    -- ArrayBuffer
+    'AllocateArrayBuffer', 'IsDetachedBuffer', 'DetachArrayBuffer', 'CloneArrayBuffer',
+    'IsUnsignedElementType', 'IsUnclampedIntegerElementType', 'IsBigIntElementType',
+    'IsNoTearConfiguration', 'RawBytesToNumeric', 'GetValueFromBuffer', 'NumericToRawBytes',
+    'SetValueInBuffer', 'GetModifySetValueInBuffer',
+
+    -- SharedArrayBuffer
+    'AllocateSharedArrayBuffer', 'IsSharedArrayBuffer',
+
+    -- DataView
+    'GetViewValue', 'SetViewValue', 'getDataView',
+
+    -- WeakRef
+    'WeakRefDeref',
+
+    -- Promise
+    'IfAbruptRejectPromise', 'CreateResolvingFunctions', 'FulfillPromise', 'NewPromiseCapability',
+    'IsPromise', 'RejectPromise', 'TriggerPromiseReactions', 'HostPromiseRejectionTracker',
+    'NewPromiseReactionJob', 'NewPromiseResolveThenableJob', 'GetPromiseResolve',
+    'PerformPromiseAll', 'PerformPromiseAllSettled', 'PerformPromiseAny', 'PerformPromiseRace',
+    'PromiseResolve', 'PerformPromiseThen',
+
+    -- GeneratorFunction
+    'GeneratorFunction', 'AsyncGeneratorFunction',
+
+    -- Generator
+    'Generator', 'GeneratorStart', 'GeneratorValidate', 'GeneratorResume', 'GeneratorResumeAbrupt',
+    'GetGeneratorKind', 'GeneratorYield', 'Yield', 'CreateIteratorFromClosure',
+
+    -- AsyncGenerator
+    'AsyncGenerator', 'AsyncGeneratorStart', 'AsyncGeneratorValidate', 'AsyncGeneratorResolve',
+    'AsyncGeneratorReject', 'AsyncGeneratorResumeNext', 'AsyncGeneratorEnqueue',
+    'AsyncGeneratorYield', 'CreateAsyncIteratorFromClosure',
+
+    -- AsyncFunction
+    'AsyncFunction', 'AsyncFunctionStart',
+
+    -- WebAPI
+    'console', 'document',
+
+    -- Misc
+    'CreateDynamicFunction', 'HostHasSourceTextAvailable', 'SymbolDescriptiveString',
+    'IsConcatSpreadable', 'FlattenIntoArray', 'SortCompare', 'AddEntriesFromIterable',
+    'CreateMapIterator', 'CreateSetIterator', 'EventSet', 'SharedDataBlockEventSet',
+    'HostEventSet', 'ComposeWriteEventBytes', 'ValueOfReadEvent', 'escape', 'unescape',
+    'CreateHTML'
+}
+
 ---------------------------------------------------------------------------
 -- default whitelist
 ---------------------------------------------------------------------------
@@ -1187,7 +1287,7 @@ default_whitelist =
     ip_med_sweep ip_med_dist ip_hi_proto ip_hi_decoy ip_hi_sweep
     ip_hi_dist icmp_low_sweep icmp_med_sweep icmp_hi_sweep
     default_hi_port_scan default_med_port_scan default_low_port_scan
-    default_variables netflow_versions
+    default_variables netflow_versions default_js_norm_built_in_ident
 ]]
 
 snort_whitelist_append(default_whitelist)
index c33a6e024e6cc3391a092c79bd801c0958756023..2b95547a810b915da828cfbc0459cbb245c26e33 100755 (executable)
@@ -231,15 +231,33 @@ So, the following whitespace codes will be normalized:
  * Any other Unicode “space separator” <USP>
  * Also including new-line and carriage-return line-break characters
 
-All JavaScript identifier names will be substituted to unified names with the
-following format: var_0000 -> var_ffff. So, the number of unique identifiers available
-is 65536 names per HTTP transaction. If Normalizer overruns the configured
-limit, built-in alert is generated. Additionally, there is a config option to
-specify the limit manually:
+All JavaScript identifier names, except those, are from the list of built-in identifiers,
+will be substituted to unified names with the following format: var_0000 -> var_ffff.
+So, the number of unique identifiers available is 65536 names per HTTP transaction.
+If Normalizer overruns the configured limit, built-in alert is generated. Additionally,
+there is a config option to specify the limit manually:
  * http_inspect.js_norm_identifier_depth.
 
-Additionally, Normalizer validates the syntax with respect to ECMA-262 Standard,
-and checks for restrictions for contents of script elements (since, it is HTML-embedded JavaScript).
+The built-in JavaScript identifiers will be placed as is, without substitution. Normalizer
+tracks built-in identifier expressions based on the configured list of built-in names.
+The built-in identifier expression is the built-in name (function or object) and the chain
+of dot and bracket accessors after it, including the function calls.
+For example:
+ * console.log("bar")
+ * document.getElementById("id").text
+ * eval("script")
+ * foo["bar"]
+
+The list of built-in identifiers should be configured with the following config option:
+ * http_inspect.js_norm_built_in_ident
+
+This list must contain object and function names only.
+For example:
+ * http_inspect.js_norm_built_in_ident = { 'console', 'document', 'eval', 'foo' }
+
+Additionally, Normalizer validates the syntax with respect to ECMA-262 Standard, including
+scope tracking, and checks for restrictions for contents of script elements (since, it
+is HTML-embedded JavaScript).
 
 The following rules applied:
  * no nesting tags allowed, i.e. two opening tags in a row
index 811c91403834d25ba19cdb49b32664e9385ca5d7..7ccbe6c5fbb2b8bf1013980dfde71e45439fc1fb 100755 (executable)
@@ -278,7 +278,7 @@ enum Infraction
     INF_JS_CODE_IN_EXTERNAL = 124,
     INF_JS_SHORTENED_TAG = 125,
     INF_JS_IDENTIFIER_OVERFLOW = 126,
-    INF_JS_TMPL_NEST_OVFLOW = 127,
+    INF_JS_SCOPE_NEST_OVFLOW = 127,
     INF_CHUNK_OVER_MAXIMUM = 128,
     INF_LONG_HOST_VALUE = 129,
     INF_ACCEPT_ENCODING_CONSECUTIVE_COMMAS = 130,
@@ -411,7 +411,7 @@ enum EventSid
     EVENT_JS_CODE_IN_EXTERNAL = 268,
     EVENT_JS_SHORTENED_TAG = 269,
     EVENT_JS_IDENTIFIER_OVERFLOW = 270,
-    EVENT_JS_TMPL_NEST_OVFLOW = 271,
+    EVENT_JS_SCOPE_NEST_OVFLOW = 271,
     EVENT_ACCEPT_ENCODING_CONSECUTIVE_COMMAS = 272,
     EVENT__MAX_VALUE
 };
index 8cd3ea7512993b57124b9f441b51dfa88ad959c9..6dbd3fd55f0edcd96603ce0156f1975aedcaaa6e 100644 (file)
@@ -255,21 +255,23 @@ void HttpFlowData::reset_js_ident_ctx()
 }
 
 snort::JSNormalizer& HttpFlowData::acquire_js_ctx(int32_t ident_depth, size_t norm_depth,
-     uint8_t max_template_nesting)
+    uint8_t max_template_nesting, uint32_t max_scope_depth,
+    const std::unordered_set<std::string>& built_in_ident)
 {
     if (js_normalizer)
         return *js_normalizer;
 
     if (!js_ident_ctx)
     {
-        js_ident_ctx = new JSIdentifierCtx(ident_depth);
+        js_ident_ctx = new JSIdentifierCtx(ident_depth, built_in_ident);
         update_allocations(js_ident_ctx->size());
 
         debug_logf(4, http_trace, TRACE_JS_PROC, nullptr,
             "js_ident_ctx created (ident_depth %d)\n", ident_depth);
     }
 
-    js_normalizer = new JSNormalizer(*js_ident_ctx, norm_depth, max_template_nesting);
+    js_normalizer = new JSNormalizer(*js_ident_ctx, norm_depth,
+        max_template_nesting, max_scope_depth);
     update_allocations(JSNormalizer::size());
 
     auto ptr = js_detect_buffer[HttpCommon::SRC_SERVER];
@@ -297,7 +299,8 @@ void HttpFlowData::release_js_ctx()
 }
 #else
 void HttpFlowData::reset_js_ident_ctx() {}
-snort::JSNormalizer& HttpFlowData::acquire_js_ctx(int32_t, size_t, uint8_t)
+snort::JSNormalizer& HttpFlowData::acquire_js_ctx(int32_t, size_t, uint8_t, uint32_t,
+    const std::unordered_set<std::string>&)
 { return *js_normalizer; }
 void HttpFlowData::release_js_ctx() {}
 #endif
index 02925e799d9d02b7c46e5f279218aebc70b371ad..415fd4c00355bf287c45b24e241091c6fc6d2305 100644 (file)
@@ -218,7 +218,8 @@ private:
 
     void reset_js_ident_ctx();
     snort::JSNormalizer& acquire_js_ctx(int32_t ident_depth, size_t norm_depth,
-        uint8_t max_template_nesting);
+        uint8_t max_template_nesting, uint32_t max_scope_depth,
+        const std::unordered_set<std::string>& built_in_ident);
     void release_js_ctx();
 
     bool cutover_on_clear = false;
index ddb9098fcf73329c7294aa92bd1204ea444d053b..25249d55505741c9bb0ba32b132abf8a483db0af 100755 (executable)
@@ -147,6 +147,10 @@ void HttpInspect::show(const SnortConfig*) const
     auto bad_chars = GetBadChars(params->uri_param.bad_characters);
     auto xff_headers = GetXFFHeaders(params->xff_headers);
 
+    std::string js_built_in_ident;
+    for (auto s : params->js_norm_param.built_in_ident)
+        js_built_in_ident += s + " ";
+
     ConfigLogger::log_limit("request_depth", params->request_depth, -1LL);
     ConfigLogger::log_limit("response_depth", params->response_depth, -1LL);
     ConfigLogger::log_flag("unzip", params->unzip);
@@ -159,11 +163,12 @@ void HttpInspect::show(const SnortConfig*) const
     ConfigLogger::log_flag("normalize_javascript", params->js_norm_param.normalize_javascript);
     ConfigLogger::log_value("max_javascript_whitespaces",
         params->js_norm_param.max_javascript_whitespaces);
-    ConfigLogger::log_value("js_normalization_depth",
-        params->js_norm_param.js_normalization_depth);
+    ConfigLogger::log_value("js_normalization_depth", params->js_norm_param.js_normalization_depth);
     ConfigLogger::log_value("js_norm_identifier_depth", params->js_norm_param.js_identifier_depth);
-    ConfigLogger::log_value("js_norm_max_tmpl_nest",
-        params->js_norm_param.max_template_nesting);
+    ConfigLogger::log_value("js_norm_max_tmpl_nest", params->js_norm_param.max_template_nesting);
+    ConfigLogger::log_value("js_norm_max_scope_depth", params->js_norm_param.max_scope_depth);
+    if (!js_built_in_ident.empty())
+        ConfigLogger::log_list("js_norm_built_in_ident", js_built_in_ident.c_str());
     ConfigLogger::log_value("bad_characters", bad_chars.c_str());
     ConfigLogger::log_value("ignore_unreserved", unreserved_chars.c_str());
     ConfigLogger::log_flag("percent_u", params->uri_param.percent_u);
index 453fe72031eed74f5cdbaadc11d85262e3142d04..036dc75307b21bfe5c21f416cd159bd06f36151c 100644 (file)
@@ -44,11 +44,15 @@ static const char* jsret_codes[] =
     "bad token",
     "identifier overflow",
     "template nesting overflow",
+    "scope nesting overflow",
+    "wrong closing symbol",
+    "ended in inner scope",
     "unknown"
 };
 
 static const char* ret2str(JSTokenizer::JSRet ret)
 {
+    assert(ret < JSTokenizer::JSRet::MAX);
     ret = ret < JSTokenizer::JSRet::MAX ? ret : JSTokenizer::JSRet::MAX;
     return jsret_codes[ret];
 }
@@ -76,11 +80,14 @@ static inline JSTokenizer::JSRet js_normalize(JSNormalizer& ctx, const char* con
 }
 
 HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_,
-    int32_t identifier_depth_, uint8_t max_template_nesting_) :
+    int32_t identifier_depth_, uint8_t max_template_nesting_, uint32_t max_scope_depth_,
+    const std::unordered_set<std::string>& built_in_ident_) :
     uri_param(uri_param_),
     normalization_depth(normalization_depth_),
     identifier_depth(identifier_depth_),
     max_template_nesting(max_template_nesting_),
+    max_scope_depth(max_scope_depth_),
+    built_in_ident(built_in_ident_),
     mpse_otag(nullptr),
     mpse_attr(nullptr),
     mpse_type(nullptr)
@@ -150,7 +157,8 @@ void HttpJsNorm::enhanced_external_normalize(const Field& input, Field& output,
             "script continues\n");
 
 
-    auto& js_ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth, max_template_nesting);
+    auto& js_ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth, max_template_nesting,
+        max_scope_depth, built_in_ident);
 
     while (ptr < end)
     {
@@ -176,6 +184,8 @@ void HttpJsNorm::enhanced_external_normalize(const Field& input, Field& output,
             ssn->js_built_in_event = true;
             break;
         case JSTokenizer::BAD_TOKEN:
+        case JSTokenizer::WRONG_CLOSING_SYMBOL:
+        case JSTokenizer::ENDED_IN_INNER_SCOPE:
             *infractions += INF_JS_BAD_TOKEN;
             events->create_event(EVENT_JS_BAD_TOKEN);
             ssn->js_built_in_event = true;
@@ -187,8 +197,9 @@ void HttpJsNorm::enhanced_external_normalize(const Field& input, Field& output,
             ssn->js_built_in_event = true;
             break;
         case JSTokenizer::TEMPLATE_NESTING_OVERFLOW:
-            *infractions += INF_JS_TMPL_NEST_OVFLOW;
-            events->create_event(EVENT_JS_TMPL_NEST_OVFLOW);
+        case JSTokenizer::SCOPE_NESTING_OVERFLOW:
+            *infractions += INF_JS_SCOPE_NEST_OVFLOW;
+            events->create_event(EVENT_JS_SCOPE_NEST_OVFLOW);
             ssn->js_built_in_event = true;
             break;
         default:
@@ -270,7 +281,8 @@ void HttpJsNorm::enhanced_inline_normalize(const Field& input, Field& output,
                 HttpModule::increment_peg_counts(PEG_JS_INLINE);
         }
 
-        auto& js_ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth, max_template_nesting);
+        auto& js_ctx = ssn->acquire_js_ctx(identifier_depth, normalization_depth,
+            max_template_nesting, max_scope_depth, built_in_ident);
         auto output_size_before = js_ctx.peek_script_size();
 
         auto ret = js_normalize(js_ctx, end, ptr);
@@ -293,6 +305,8 @@ void HttpJsNorm::enhanced_inline_normalize(const Field& input, Field& output,
             events->create_event(EVENT_JS_CLOSING_TAG);
             break;
         case JSTokenizer::BAD_TOKEN:
+        case JSTokenizer::WRONG_CLOSING_SYMBOL:
+        case JSTokenizer::ENDED_IN_INNER_SCOPE:
             *infractions += INF_JS_BAD_TOKEN;
             events->create_event(EVENT_JS_BAD_TOKEN);
             break;
@@ -302,8 +316,9 @@ void HttpJsNorm::enhanced_inline_normalize(const Field& input, Field& output,
             events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW);
             break;
         case JSTokenizer::TEMPLATE_NESTING_OVERFLOW:
-            *infractions += INF_JS_TMPL_NEST_OVFLOW;
-            events->create_event(EVENT_JS_TMPL_NEST_OVFLOW);
+        case JSTokenizer::SCOPE_NESTING_OVERFLOW:
+            *infractions += INF_JS_SCOPE_NEST_OVFLOW;
+            events->create_event(EVENT_JS_SCOPE_NEST_OVFLOW);
             break;
         default:
             assert(false);
index 64b27c4e7db507d1f6cd2aa2a3169c3e73dbcad0..851ddcb65b26dd509de7c46a17ad992cc1bae8e3 100644 (file)
@@ -37,7 +37,8 @@ class HttpJsNorm
 {
 public:
     HttpJsNorm(const HttpParaList::UriParam&, int64_t normalization_depth,
-        int32_t identifier_depth, uint8_t max_template_nesting);
+        int32_t identifier_depth, uint8_t max_template_nesting, uint32_t max_scope_depth,
+        const std::unordered_set<std::string>& built_in_ident);
     ~HttpJsNorm();
 
     void legacy_normalize(const Field& input, Field& output, HttpInfractions*, HttpEventGen*,
@@ -62,6 +63,8 @@ private:
     int64_t normalization_depth;
     int32_t identifier_depth;
     uint8_t max_template_nesting;
+    uint32_t max_scope_depth;
+    const std::unordered_set<std::string>& built_in_ident;
     bool configure_once = false;
 
     snort::SearchTool* mpse_otag;
index af0729769488170f5003443faff6dd039b4d53d3..eb75456bda9645e61c2e6822b162026dc861d5e3 100755 (executable)
@@ -45,6 +45,12 @@ HttpModule::~HttpModule()
     LiteralSearch::cleanup(script_detection_handle);
 }
 
+static const Parameter js_built_in_ident_param[] =
+{
+    { "ident_name", Parameter::PT_STRING, nullptr, nullptr, "name of built-in identifier" },
+    { nullptr, Parameter::PT_MAX, nullptr, nullptr, nullptr }
+};
+
 const Parameter HttpModule::http_params[] =
 {
     { "request_depth", Parameter::PT_INT, "-1:max53", "-1",
@@ -96,6 +102,13 @@ const Parameter HttpModule::http_params[] =
       "maximum depth of template literal nesting that enhanced javascript normalizer "
       "will process (experimental)" },
 
+    { "js_norm_max_scope_depth", Parameter::PT_INT, "0:65535", "256",
+      "maximum depth of scope nesting that enhanced JavaScript normalizer will process "
+      "(experimental)" },
+
+    { "js_norm_built_in_ident", Parameter::PT_LIST, js_built_in_ident_param, nullptr,
+      "list of JavaScript built-in identifiers which will not be normalized (experimental)" },
+
     { "max_javascript_whitespaces", Parameter::PT_INT, "1:65535", "200",
       "maximum consecutive whitespaces allowed within the JavaScript obfuscated data" },
 
@@ -196,8 +209,11 @@ const TraceOption* HttpModule::get_trace_options() const
     return http_trace_options;
 }
 
-bool HttpModule::begin(const char*, int, SnortConfig*)
+bool HttpModule::begin(const char* fqn, int, SnortConfig*)
 {
+    if (strcmp(fqn, "http_inspect"))
+        return true;
+
     delete params;
     params = new HttpParaList;
     return true;
@@ -271,6 +287,14 @@ bool HttpModule::set(const char*, Value& val, SnortConfig*)
     {
         params->js_norm_param.max_template_nesting = val.get_uint8();
     }
+    else if (val.is("js_norm_max_scope_depth"))
+    {
+        params->js_norm_param.max_scope_depth = val.get_int32();
+    }
+    else if (val.is("ident_name"))
+    {
+        params->js_norm_param.built_in_ident.insert(val.get_string());
+    }
     else if (val.is("max_javascript_whitespaces"))
     {
         params->js_norm_param.max_javascript_whitespaces = val.get_uint16();
@@ -434,8 +458,11 @@ static void prepare_http_header_list(HttpParaList* params)
     params->header_list[hdr_idx] = end_header;
 }
 
-bool HttpModule::end(const char*, int, SnortConfig*)
+bool HttpModule::end(const char* fqn, int, SnortConfig*)
 {
+    if (strcmp(fqn, "http_inspect"))
+        return true;
+
     if (!params->uri_param.utf8 && params->uri_param.utf8_bare_byte)
     {
         ParseWarning(WARN_CONF, "Meaningless to do bare byte when not doing UTF-8");
@@ -460,7 +487,8 @@ bool HttpModule::end(const char*, int, SnortConfig*)
     if ( params->js_norm_param.is_javascript_normalization )
         params->js_norm_param.js_norm = new HttpJsNorm(params->uri_param,
         params->js_norm_param.js_normalization_depth, params->js_norm_param.js_identifier_depth,
-        params->js_norm_param.max_template_nesting);
+        params->js_norm_param.max_template_nesting, params->js_norm_param.max_scope_depth,
+        params->js_norm_param.built_in_ident);
 
     params->script_detection_handle = script_detection_handle;
 
index 01e10c1f2b91ac1b8ef83a38ee49aec940a4c1fb..58569955caaad905f1362dd4396fac393a1e65db 100755 (executable)
@@ -20,8 +20,9 @@
 #ifndef HTTP_MODULE_H
 #define HTTP_MODULE_H
 
-#include <string>
 #include <bitset>
+#include <string>
+#include <unordered_set>
 
 #include "framework/module.h"
 #include "helpers/literal_search.h"
@@ -69,6 +70,8 @@ public:
         int64_t js_normalization_depth = 0;
         int32_t js_identifier_depth = 0;
         uint8_t max_template_nesting = 32;
+        uint32_t max_scope_depth = 256;
+        std::unordered_set<std::string> built_in_ident;
         int max_javascript_whitespaces = 200;
         class HttpJsNorm* js_norm = nullptr;
     };
index 22800c5fb3cf2650725afa1753fe030f5aac6967..689ab31dabc839467620f3e1570f77c0de3bb130 100755 (executable)
@@ -330,7 +330,7 @@ const RuleMap HttpModule::http_events[] =
     { EVENT_JS_CODE_IN_EXTERNAL,        "JavaScript code under the external script tags" },
     { EVENT_JS_SHORTENED_TAG,           "script opening tag in a short form" },
     { EVENT_JS_IDENTIFIER_OVERFLOW,     "max number of unique JavaScript identifiers reached" },
-    { EVENT_JS_TMPL_NEST_OVFLOW,        "JavaScript template literal nesting is over capacity" },
+    { EVENT_JS_SCOPE_NEST_OVFLOW,       "JavaScript scope nesting is over capacity" },
     { EVENT_ACCEPT_ENCODING_CONSECUTIVE_COMMAS, "Consecutive commas in HTTP Accept-Encoding "
                                         "header" },
     { 0, nullptr }
index 584f7d8f9a8af056da0ce2c1f83ac24dc726550f..72a54d65577c506e2e2d9ce7ccf7830a5b17907a 100755 (executable)
@@ -65,9 +65,11 @@ long HttpTestManager::print_amount {};
 bool HttpTestManager::print_hex {};
 
 HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_,
-    int32_t identifier_depth_, uint8_t max_template_nesting_) :
+    int32_t identifier_depth_, uint8_t max_template_nesting_, uint32_t max_scope_depth_,
+    const std::unordered_set<std::string>& built_in_ident_) :
     uri_param(uri_param_), normalization_depth(normalization_depth_),
     identifier_depth(identifier_depth_), max_template_nesting(max_template_nesting_),
+    max_scope_depth(max_scope_depth_), built_in_ident(built_in_ident_),
     mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) {}
 HttpJsNorm::~HttpJsNorm() = default;
 void HttpJsNorm::configure(){}
index f285760b41ff39f2d67e38e937b932ca35387ca6..1c95abcf26cd8a43e44e4d4119353f24512db424 100755 (executable)
@@ -54,9 +54,11 @@ void show_stats(PegCount*, const PegInfo*, unsigned, const char*) { }
 void show_stats(PegCount*, const PegInfo*, const IndexVec&, const char*, FILE*) { }
 
 HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_, int64_t normalization_depth_,
-    int32_t identifier_depth_, uint8_t max_template_nesting_) :
+    int32_t identifier_depth_, uint8_t max_template_nesting_, uint32_t max_scope_depth_,
+    const std::unordered_set<std::string>& built_in_ident_) :
     uri_param(uri_param_), normalization_depth(normalization_depth_),
     identifier_depth(identifier_depth_), max_template_nesting(max_template_nesting_),
+    max_scope_depth(max_scope_depth_), built_in_ident(built_in_ident_),
     mpse_otag(nullptr), mpse_attr(nullptr), mpse_type(nullptr) {}
 HttpJsNorm::~HttpJsNorm() = default;
 void HttpJsNorm::configure() {}
index 35b2b44dd29eabbd880dbe6a169d6923a780499a..277f320a5f3b5c91d655a4e175498c460a0a5cc7 100644 (file)
@@ -76,6 +76,11 @@ const char* JSIdentifierCtx::substitute(const char* identifier)
     return ident_names[identifier].c_str();
 }
 
+bool JSIdentifierCtx::built_in(const char* identifier) const
+{
+    return ident_built_in.count(identifier);
+}
+
 void JSIdentifierCtx::reset()
 {
     ident_last_name = 0;
index b69ec867971cd0810f0b4ce2cfd287fa16bd8c27..c9824b573bc9b42c9f0a7eb4cd7ed05b63b6f4de 100644 (file)
@@ -22,6 +22,7 @@
 
 #include <string>
 #include <unordered_map>
+#include <unordered_set>
 
 class JSIdentifierCtxBase
 {
@@ -29,6 +30,7 @@ public:
     virtual ~JSIdentifierCtxBase() = default;
 
     virtual const char* substitute(const char* identifier) = 0;
+    virtual bool built_in(const char* identifier) const = 0;
     virtual void reset() = 0;
     virtual size_t size() const = 0;
 };
@@ -36,9 +38,12 @@ public:
 class JSIdentifierCtx : public JSIdentifierCtxBase
 {
 public:
-    JSIdentifierCtx(int32_t depth) : depth(depth) {}
+    JSIdentifierCtx(int32_t depth, const std::unordered_set<std::string>& ident_built_in)
+        : depth(depth), ident_built_in(ident_built_in)
+    {}
 
     const char* substitute(const char* identifier) override;
+    bool built_in(const char* identifier) const override;
     void reset() override;
 
     // approximated to 500 unique mappings insertions
@@ -50,6 +55,7 @@ private:
     int32_t depth;
 
     std::unordered_map<std::string, std::string> ident_names;
+    const std::unordered_set<std::string>& ident_built_in;
 };
 
 #endif // JS_IDENTIFIER_CTX
index cca5ed758589504c36c2401a4fb021d678f8fab3..4e040b76bc313d0dac3c8a2bc4b626f03cd8fd12 100644 (file)
@@ -29,7 +29,7 @@ using namespace snort;
 using namespace std;
 
 JSNormalizer::JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t norm_depth,
-    uint8_t max_template_nesting, int tmp_cap_size)
+    uint8_t max_template_nesting, uint32_t max_scope_depth, int tmp_cap_size)
     : depth(norm_depth),
       rem_bytes(norm_depth),
       unlim(norm_depth == static_cast<size_t>(-1)),
@@ -38,7 +38,7 @@ JSNormalizer::JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t norm_depth,
       tmp_buf_size(0),
       in(&in_buf),
       out(&out_buf),
-      tokenizer(in, out, js_ident_ctx, max_template_nesting, tmp_buf, tmp_buf_size, tmp_cap_size)
+      tokenizer(in, out, js_ident_ctx, max_template_nesting, max_scope_depth, tmp_buf, tmp_buf_size, tmp_cap_size)
 {
 }
 
index c4f30d0e74b23d0438fc36be631d5eb0d9534cdb..f2866d11acc717f07ee5f82d19b17c010a4a5d0c 100644 (file)
@@ -34,7 +34,8 @@ class JSNormalizer
 {
 public:
     JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t depth,
-        uint8_t max_template_nesting, int tmp_cap_size = JSTOKENIZER_BUF_MAX_SIZE);
+        uint8_t max_template_nesting, uint32_t max_scope_depth,
+        int tmp_cap_size = JSTOKENIZER_BUF_MAX_SIZE);
     ~JSNormalizer();
 
     const char* get_src_next() const
index 4b9c0fe2b5ee05bd54701ade2700c35e202353be..47239648e0a2d206185490faa7bc7741b51b0bd5 100644 (file)
@@ -52,7 +52,27 @@ private:
         PUNCTUATOR,
         OPERATOR,
         LITERAL,
-        DIRECTIVE
+        DIRECTIVE,
+        DOT,
+        CLOSING_BRACKET
+    };
+
+    enum ScopeType
+    {
+        GLOBAL = 0,
+        BRACES,      // {}
+        PARENTHESES, // ()
+        BRACKETS     // []
+    };
+    struct Scope
+    {
+        Scope(ScopeType t)
+            : type(t), ident_norm(true), func_call(false)
+        {}
+
+        ScopeType type;
+        bool ident_norm;
+        bool func_call;
     };
 
     enum ASIGroup
@@ -84,11 +104,15 @@ public:
         BAD_TOKEN,
         IDENTIFIER_OVERFLOW,
         TEMPLATE_NESTING_OVERFLOW,
+        SCOPE_NESTING_OVERFLOW,
+        WRONG_CLOSING_SYMBOL,
+        ENDED_IN_INNER_SCOPE,
         MAX
     };
 
-    JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx,
-        uint8_t max_template_nesting, char*& buf, size_t& buf_size,
+    JSTokenizer() = delete;
+    explicit JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx,
+        uint8_t max_template_nesting, uint32_t max_scope_depth, char*& buf, size_t& buf_size,
         int cap_size = JSTOKENIZER_BUF_MAX_SIZE);
     ~JSTokenizer() override;
 
@@ -106,21 +130,33 @@ private:
     JSRet do_spacing(JSToken cur_token);
     JSRet do_operator_spacing(JSToken cur_token);
     void do_semicolon_insertion(ASIGroup current);
-    JSRet do_identifier_substitution(const char* lexeme);
+    JSRet do_identifier_substitution(const char* lexeme, bool id_part);
     bool unescape(const char* lexeme);
     void process_punctuator();
-    void process_closing_bracket();
+    void process_closing_brace();
     JSRet process_subst_open();
 
     void states_push();
     void states_apply();
     void states_correct(int);
 
+    // scope stack servicing
+    JSRet scope_push(ScopeType);
+    JSRet scope_pop(ScopeType);
+    Scope& scope_cur();
+
+    // interactions with the current scope
+    bool global_scope();
+    void set_ident_norm(bool);
+    bool ident_norm();
+    void set_func_call(bool);
+    bool func_call();
+
     void* cur_buffer;
     void* tmp_buffer = nullptr;
     std::stringstream tmp;
     uint8_t max_template_nesting;
-    std::stack<uint16_t, std::vector<uint16_t>> bracket_depth;
+    std::stack<uint16_t, std::vector<uint16_t>> brace_depth;
     JSToken token = UNDEFINED;
     ASIGroup previous_group = ASI_OTHER;
     JSIdentifierCtxBase& ident_ctx;
@@ -136,6 +172,7 @@ private:
     char*& tmp_buf;
     size_t& tmp_buf_size;
     const int tmp_cap_size;
+
     bool newline_found = false;
     constexpr static bool insert_semicolon[ASI_GROUP_MAX][ASI_GROUP_MAX]
     {
@@ -151,6 +188,9 @@ private:
         {false, true,  false, true,  false, false, true,  true,  true,  true,  true, },
         {false, false, false, false, false, false, false, false, false, false, false,}
     };
+
+    const uint32_t max_scope_depth;
+    std::stack<Scope> scope_stack;
 };
 
 #endif // JS_TOKENIZER_H
index 81d8f30fec250d953159bdaf3f374be9f581d89e..f399dc1bcac0909560cb793d79dd5f7773580247 100644 (file)
@@ -878,13 +878,15 @@ KEYWORD_OTHER    case|catch|class|const|default|else|enum|export|extends|finally
 
 /* punctuators */
 /* according to https://ecma-international.org/ecma-262/5.1/#sec-7.7 */
-CLOSING_PAREN              ")"
-CLOSING_BRACE              "]"
-OPEN_BRACKET               "{"
-CLOSE_BRACKET              "}"
+OPEN_BRACE                 "{"
+CLOSE_BRACE                "}"
+OPEN_PARENTHESIS           "("
+CLOSE_PARENTHESIS          ")"
+OPEN_BRACKET               "["
+CLOSE_BRACKET              "]"
+DOT_ACCESSOR               "."
 PUNCTUATOR_PREFIX          "~"|"!"
-OPEN_PAREN_BRACE           "("|"["
-PUNCTUATOR                 ">="|"=="|"!="|"==="|"!=="|"."|";"|","|"<"|">"|"<="|"<<"|">>"|">>>"|"&"|"|"|"^"|"&&"|"||"|"?"|":"|"="|"+="|"-="|"*="|"%="|"<<="|">>="|">>>="|"&="|"|="|"^="
+PUNCTUATOR                 ">="|"=="|"!="|"==="|"!=="|";"|","|"<"|">"|"<="|"<<"|">>"|">>>"|"&"|"|"|"^"|"&&"|"||"|"?"|":"|"="|"+="|"-="|"*="|"%="|"<<="|">>="|">>>="|"&="|"|="|"^="
 OPERATOR_PREFIX            "+"|"-"
 OPERATOR_INCR_DECR         "--"|"++"
 OPERATOR                   "*"|"%"
@@ -966,7 +968,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 {LINE_TERMINATORS}                  { BEGIN(regst); newline_found = true; }
 
 <INITIAL,regex,dqstr,regst,sqstr,divop>{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); return OPENING_TAG; }
-{HTML_TAG_SCRIPT_CLOSE}             { BEGIN(regst); return SCRIPT_ENDED; }
+{HTML_TAG_SCRIPT_CLOSE}             { BEGIN(regst); if (!global_scope()) return ENDED_IN_INNER_SCOPE; else return SCRIPT_ENDED; }
 
        {HTML_COMMENT_OPEN}          { BEGIN(lcomm); }
        {LINE_COMMENT_START}         { BEGIN(lcomm); }
@@ -986,7 +988,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <bcomm>{BLOCK_COMMENT_SKIP}         { }
 <bcomm><<EOF>>                      { states_apply(); return SCRIPT_CONTINUE; }
 
-       {LITERAL_DQ_STRING_START}    { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(dqstr); }
+       {LITERAL_DQ_STRING_START}    { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(dqstr); set_ident_norm(true); }
 <dqstr>{LITERAL_DQ_STRING_END}      { ECHO; BEGIN(divop); }
 <dqstr>{HTML_TAG_SCRIPT_CLOSE}      { BEGIN(regst); return CLOSING_TAG; }
 <dqstr>\\{CR}{LF}                   { }
@@ -997,7 +999,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <dqstr>{LITERAL_DQ_STRING_TEXT}     { ECHO; }
 <dqstr><<EOF>>                      { states_apply(); return SCRIPT_CONTINUE; }
 
-       {LITERAL_SQ_STRING_START}    { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(sqstr); }
+       {LITERAL_SQ_STRING_START}    { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(sqstr); set_ident_norm(true); }
 <sqstr>{LITERAL_SQ_STRING_END}      { ECHO; BEGIN(divop); }
 <sqstr>{HTML_TAG_SCRIPT_CLOSE}      { BEGIN(regst); return CLOSING_TAG; }
 <sqstr>\\{CR}{LF}                   { }
@@ -1008,10 +1010,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <sqstr>{LITERAL_SQ_STRING_TEXT}     { ECHO; }
 <sqstr><<EOF>>                      { states_apply(); return SCRIPT_CONTINUE; }
 
-{OPEN_BRACKET}                      { do_semicolon_insertion(ASI_GROUP_1); if (!bracket_depth.empty()) bracket_depth.top()++; process_punctuator(); }
-{CLOSE_BRACKET}                     { do_semicolon_insertion(ASI_GROUP_2); process_closing_bracket(); }
-
-       {LITERAL_TEMPLATE_START}                  { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(tmpll); }
+       {LITERAL_TEMPLATE_START}                  { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(tmpll); set_ident_norm(true); }
 <tmpll>(\\\\)*{LITERAL_TEMPLATE_END}             { ECHO; BEGIN(divop); }
 <tmpll>(\\\\)*{LITERAL_TEMPLATE_SUBST_START}     { EXEC(process_subst_open()) }
 <tmpll>{HTML_TAG_SCRIPT_CLOSE}                   { BEGIN(regst); return CLOSING_TAG; }
@@ -1020,7 +1019,7 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <tmpll>{LITERAL_TEMPLATE_OTHER}                  { ECHO; }
 <tmpll><<EOF>>                                   { return SCRIPT_CONTINUE; }
 
-<regst>{LITERAL_REGEX_START}        { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) yyout << '/'; states_correct(1); yyless(1); BEGIN(regex); }
+<regst>{LITERAL_REGEX_START}        { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) yyout << '/'; states_correct(1); yyless(1); BEGIN(regex); set_ident_norm(true); }
 <regex>{LITERAL_REGEX_END}          { ECHO; BEGIN(divop); }
 <regex>{HTML_TAG_SCRIPT_CLOSE}      { BEGIN(regst); return CLOSING_TAG; }
 <regex>{LITERAL_REGEX_SKIP}         { ECHO; }
@@ -1031,28 +1030,33 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 <regex><<EOF>>                      { states_apply(); return SCRIPT_CONTINUE; }
 
 <divop>{DIV_OPERATOR}               |
-<divop>{DIV_ASSIGNMENT_OPERATOR}    { previous_group = ASI_OTHER; ECHO; token = PUNCTUATOR; BEGIN(INITIAL); }
+<divop>{DIV_ASSIGNMENT_OPERATOR}    { previous_group = ASI_OTHER; ECHO; token = PUNCTUATOR; BEGIN(INITIAL); set_ident_norm(true); }
+
+{OPEN_BRACE}                        { do_semicolon_insertion(ASI_GROUP_1); EXEC(scope_push(BRACES)) if (!brace_depth.empty()) brace_depth.top()++; process_punctuator(); }
+{CLOSE_BRACE}                       { do_semicolon_insertion(ASI_GROUP_2); EXEC(scope_pop(BRACES)) process_closing_brace(); set_ident_norm(true); }
+{OPEN_PARENTHESIS}                  { do_semicolon_insertion(ASI_GROUP_3); EXEC(scope_push(PARENTHESES)) if (token == IDENTIFIER || token == CLOSING_BRACKET || token == KEYWORD) set_func_call(true); process_punctuator(); }
+{CLOSE_PARENTHESIS}                 { do_semicolon_insertion(ASI_GROUP_5); bool f_call = func_call(); bool id_norm = ident_norm(); EXEC(scope_pop(PARENTHESES)) if (!f_call) set_ident_norm(id_norm); ECHO; token = PUNCTUATOR; BEGIN(divop); }
+{OPEN_BRACKET}                      { do_semicolon_insertion(ASI_GROUP_3); do_semicolon_insertion(ASI_GROUP_4); EXEC(scope_push(BRACKETS)) process_punctuator(); }
+{CLOSE_BRACKET}                     { do_semicolon_insertion(ASI_GROUP_4); EXEC(scope_pop(BRACKETS)) ECHO; token = CLOSING_BRACKET; BEGIN(divop); }
 
-{CLOSING_PAREN}                     { do_semicolon_insertion(ASI_GROUP_5); ECHO; token = PUNCTUATOR; BEGIN(divop); }
-{CLOSING_BRACE}                     { do_semicolon_insertion(ASI_GROUP_4); ECHO; token = PUNCTUATOR; BEGIN(divop); }
-{PUNCTUATOR_PREFIX}                 { do_semicolon_insertion(ASI_GROUP_10); process_punctuator(); }
-{OPEN_PAREN_BRACE}                  { do_semicolon_insertion(ASI_GROUP_3); process_punctuator(); }
-{PUNCTUATOR}                        { previous_group = ASI_OTHER; process_punctuator(); }
+{PUNCTUATOR_PREFIX}                 { do_semicolon_insertion(ASI_GROUP_10); process_punctuator(); set_ident_norm(true); }
+{DOT_ACCESSOR}                      { previous_group = ASI_OTHER; ECHO; token = DOT; BEGIN(regst); }
+{PUNCTUATOR}                        { previous_group = ASI_OTHER; process_punctuator(); set_ident_norm(true); }
 
-{USE_STRICT_DIRECTIVE}              { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); yyout << ';'; }
-{USE_STRICT_DIRECTIVE_SC}           { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); }
-{KEYWORD_B}                         { do_semicolon_insertion(ASI_GROUP_10); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
-{KEYWORD_BA}                        { do_semicolon_insertion(ASI_GROUP_9); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
-{KEYWORD_OTHER}                     { previous_group = ASI_OTHER; EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
+{USE_STRICT_DIRECTIVE}              { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); yyout << ';'; set_ident_norm(true); }
+{USE_STRICT_DIRECTIVE_SC}           { previous_group = ASI_OTHER; EXEC(do_spacing(DIRECTIVE)) ECHO; BEGIN(INITIAL); set_ident_norm(true); }
 
-{OPERATOR_PREFIX}                   { do_semicolon_insertion(ASI_GROUP_6); EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); }
-{OPERATOR_INCR_DECR}                { do_semicolon_insertion(ASI_GROUP_8); EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); }
-{OPERATOR}                          { previous_group = ASI_OTHER; EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); }
+{KEYWORD_B}                         { do_semicolon_insertion(ASI_GROUP_10); if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
+{KEYWORD_BA}                        { do_semicolon_insertion(ASI_GROUP_9); if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
+{KEYWORD_OTHER}                     { previous_group = ASI_OTHER; if (token != DOT) set_ident_norm(true); EXEC(do_spacing(KEYWORD)) ECHO; BEGIN(regst); }
 
-{LITERAL}                           { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(divop); }
-{IDENTIFIER}                        { do_semicolon_insertion(ASI_GROUP_7); if (unescape(YYText())) { EXEC(do_spacing(IDENTIFIER)) EXEC(do_identifier_substitution(YYText())) } BEGIN(divop); }
+{OPERATOR_PREFIX}                   { do_semicolon_insertion(ASI_GROUP_6); EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); set_ident_norm(true); }
+{OPERATOR_INCR_DECR}                { do_semicolon_insertion(ASI_GROUP_8); EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); set_ident_norm(true); }
+{OPERATOR}                          { previous_group = ASI_OTHER; EXEC(do_operator_spacing(OPERATOR)) ECHO; BEGIN(divop); set_ident_norm(true); }
+{LITERAL}                           { do_semicolon_insertion(ASI_GROUP_7); EXEC(do_spacing(LITERAL)) ECHO; BEGIN(divop); set_ident_norm(true); }
+{IDENTIFIER}                        { do_semicolon_insertion(ASI_GROUP_7); if (unescape(YYText())) { bool id_part = (token == DOT); EXEC(do_spacing(IDENTIFIER)) EXEC(do_identifier_substitution(YYText(), id_part)) } BEGIN(divop); }
 
-.|{ALL_UNICODE}                     { previous_group = ASI_OTHER; ECHO; token = UNDEFINED; BEGIN(INITIAL); }
+.|{ALL_UNICODE}                     { previous_group = ASI_OTHER; ECHO; token = UNDEFINED; BEGIN(INITIAL); set_ident_norm(true); }
 <<EOF>>                             { EEOF(eval_eof()) }
 
 %%
@@ -1135,14 +1139,16 @@ static std::string unescape_unicode(const char* lexeme)
 
 JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out,
     JSIdentifierCtxBase& mapper, uint8_t max_template_nesting,
-    char*& buf, size_t& buf_size, int cap_size)
+    uint32_t max_scope_depth, char*& buf, size_t& buf_size, int cap_size)
     : yyFlexLexer(in, out),
       max_template_nesting(max_template_nesting),
       ident_ctx(mapper),
       tmp_buf(buf),
       tmp_buf_size(buf_size),
-      tmp_cap_size(cap_size)
+      tmp_cap_size(cap_size),
+      max_scope_depth(max_scope_depth)
 {
+    scope_push(GLOBAL);
     BEGIN(regst);
 }
 
@@ -1185,7 +1191,6 @@ JSTokenizer::JSRet JSTokenizer::eval_eof()
 
     // Normal termination
     states_apply();
-
     return SCRIPT_CONTINUE;
 }
 
@@ -1196,6 +1201,8 @@ JSTokenizer::JSRet JSTokenizer::do_spacing(JSToken cur_token)
     case PUNCTUATOR:
     case OPERATOR:
     case DIRECTIVE:
+    case DOT:
+    case CLOSING_BRACKET:
     case UNDEFINED:
         token = cur_token;
         return EOS;
@@ -1222,6 +1229,8 @@ JSTokenizer::JSRet JSTokenizer::do_operator_spacing(JSToken cur_token)
     case PUNCTUATOR:
     case LITERAL:
     case DIRECTIVE:
+    case DOT:
+    case CLOSING_BRACKET:
     case UNDEFINED:
         token = cur_token;
         return EOS;
@@ -1237,23 +1246,37 @@ JSTokenizer::JSRet JSTokenizer::do_operator_spacing(JSToken cur_token)
     return BAD_TOKEN;
 }
 
-JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme)
+JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme, bool id_part)
 {
+    if (!ident_norm() && id_part)
+    {
+        yyout << lexeme;
+        return EOS;
+    }
+    else
+        set_ident_norm(true);
+
+    if (ident_ctx.built_in(lexeme) && !id_part)
+    {
+        set_ident_norm(false);
+        return do_identifier_substitution(lexeme, true);
+    }
+
     const char* ident = ident_ctx.substitute(lexeme);
 
-    if (ident)
+    if (!ident)
     {
         debug_logf(6, http_trace, TRACE_JS_DUMP, nullptr,
-            "'%s' => '%s'\n", lexeme, ident);
+            "'%s' => IDENTIFIER_OVERFLOW\n", lexeme);
 
-        yyout << ident;
-        return EOS;
+        return IDENTIFIER_OVERFLOW;
     }
 
     debug_logf(6, http_trace, TRACE_JS_DUMP, nullptr,
-        "'%s' => IDENTIFIER_OVERFLOW\n", lexeme);
+        "'%s' => '%s'\n", lexeme, ident);
 
-    return IDENTIFIER_OVERFLOW;
+    yyout << ident;
+    return EOS;
 }
 
 void JSTokenizer::do_semicolon_insertion(ASIGroup current)
@@ -1281,7 +1304,6 @@ bool JSTokenizer::unescape(const char* lexeme)
         switch_to_temporal(unescaped_lex);
         return false;
     }
-
     return true;
 }
 
@@ -1292,15 +1314,15 @@ void JSTokenizer::process_punctuator()
     BEGIN(regst);
 }
 
-void JSTokenizer::process_closing_bracket()
+void JSTokenizer::process_closing_brace()
 {
-    if (!bracket_depth.empty())
+    if (!brace_depth.empty())
     {
-        if (bracket_depth.top())
-            bracket_depth.top()--;
+        if (brace_depth.top())
+            brace_depth.top()--;
         else
         {
-            bracket_depth.pop();
+            brace_depth.pop();
             ECHO;
             BEGIN(tmpll);
             return;
@@ -1311,13 +1333,13 @@ void JSTokenizer::process_closing_bracket()
 
 JSTokenizer::JSRet JSTokenizer::process_subst_open()
 {
-    if (bracket_depth.size() >= max_template_nesting)
+    if (brace_depth.size() >= max_template_nesting)
         return TEMPLATE_NESTING_OVERFLOW;
-    bracket_depth.push(0);
+    brace_depth.push(0);
     token = PUNCTUATOR;
     ECHO;
     BEGIN(divop);
-    return EOS;
+    return scope_push(BRACES);
 }
 
 void JSTokenizer::states_push()
@@ -1370,4 +1392,78 @@ void JSTokenizer::states_apply()
     delete[] tmp_buf;
     tmp_buf = buf;
     tmp_buf_size = tail_size;
+
+    // Reverse traversal over buffer to adjust scope stack before the next PDU buffer starts
+    bool is_tmpl = false;
+    const char* c = tmp_buf + tmp_buf_size;
+    const char* const s = tmp_buf;
+    while (c-- > s)
+    {
+        switch (*c)
+        {
+        case '{': scope_pop(BRACES); if (is_tmpl) brace_depth.pop(); break;
+        case '}': scope_push(BRACES); if (is_tmpl) brace_depth.push(0); break;
+        case '(': scope_pop(PARENTHESES); break;
+        case ')':
+        {
+            bool id_norm = ident_norm();
+            scope_push(PARENTHESES);
+            if (!id_norm)
+                set_func_call(true);
+            break;
+        }
+        case '[': scope_pop(BRACKETS); break;
+        case ']': scope_push(BRACKETS); break;
+        case '`': is_tmpl = !is_tmpl; break;
+        }
+    }
+}
+
+JSTokenizer::JSRet JSTokenizer::scope_push(ScopeType t)
+{
+    if (scope_stack.size() > max_scope_depth)
+        return SCOPE_NESTING_OVERFLOW;
+
+    scope_stack.emplace(t);
+    return EOS;
+}
+
+JSTokenizer::JSRet JSTokenizer::scope_pop(ScopeType t)
+{
+    if (t != scope_cur().type)
+        return WRONG_CLOSING_SYMBOL;
+
+    scope_stack.pop();
+    return EOS;
+}
+
+JSTokenizer::Scope& JSTokenizer::scope_cur()
+{
+    assert(!scope_stack.empty());
+    return scope_stack.top();
+}
+
+bool JSTokenizer::global_scope()
+{
+    return scope_cur().type == GLOBAL;
+}
+
+void JSTokenizer::set_ident_norm(bool f)
+{
+    scope_cur().ident_norm = f;
+}
+
+bool JSTokenizer::ident_norm()
+{
+    return scope_cur().ident_norm;
+}
+
+void JSTokenizer::set_func_call(bool f)
+{
+    scope_cur().func_call = f;
+}
+
+bool JSTokenizer::func_call()
+{
+    return scope_cur().func_call;
 }
index 1baa74a83d7c21215f2a8de1df7589d2298783af..2b37036d141a495c765b00713517f807d0ec524c 100644 (file)
 
 #define DEPTH 65536
 
+static const std::unordered_set<std::string> s_ident_built_in { "console" };
+
 TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]")
 {
     SECTION("same name")
     {
-        JSIdentifierCtx ident_ctx(DEPTH);
+        JSIdentifierCtx ident_ctx(DEPTH, s_ident_built_in);
 
         CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
         CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
     }
     SECTION("different names")
     {
-        JSIdentifierCtx ident_ctx(DEPTH);
+        JSIdentifierCtx ident_ctx(DEPTH, s_ident_built_in);
 
         CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
         CHECK(!strcmp(ident_ctx.substitute("b"), "var_0001"));
@@ -51,7 +53,7 @@ TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]")
     }
     SECTION("depth reached")
     {
-        JSIdentifierCtx ident_ctx(2);
+        JSIdentifierCtx ident_ctx(2, s_ident_built_in);
 
         CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
         CHECK(!strcmp(ident_ctx.substitute("b"), "var_0001"));
@@ -61,7 +63,7 @@ TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]")
     }
     SECTION("max names")
     {
-        JSIdentifierCtx ident_ctx(DEPTH + 2);
+        JSIdentifierCtx ident_ctx(DEPTH + 2, s_ident_built_in);
 
         std::vector<std::string> n, e;
         n.reserve(DEPTH + 2);
@@ -86,3 +88,11 @@ TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]")
     }
 }
 
+TEST_CASE("JSIdentifierCtx::built_in()", "[JSIdentifierCtx]")
+{
+    JSIdentifierCtx ident_ctx(DEPTH, s_ident_built_in);
+
+    SECTION("match") { CHECK(ident_ctx.built_in("console") == true); }
+    SECTION("no match") { CHECK(ident_ctx.built_in("foo") == false); }
+}
+
index 5b059466b89dad2f7c5484d6ac23b0425879fcb3..9ac56bf7bc4fb3d7e56ee908032cfd1aba272ed9 100644 (file)
@@ -48,6 +48,8 @@ public:
 
     const char* substitute(const char* identifier) override
     { return identifier; }
+    bool built_in(const char*) const override
+    { return false; }
     void reset() override {}
     size_t size() const override { return 0; }
 };
@@ -57,7 +59,10 @@ public:
 using namespace snort;
 
 #define DEPTH 65535
-#define MAX_TEMPLATE_NESTNIG 4
+#define MAX_TEMPLATE_NESTING 4
+#define MAX_SCOPE_DEPTH 256
+
+static const std::unordered_set<std::string> s_ident_built_in { "console", "eval", "document" };
 
 // Unit tests
 
@@ -67,7 +72,7 @@ using namespace snort;
 
 #define NORMALIZE(src)                                             \
     JSIdentifierCtxTest ident_ctx;                                 \
-    JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG);     \
+    JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);     \
     auto ret = norm.normalize(src, sizeof(src));                   \
     const char* ptr = norm.get_src_next();                         \
     auto result = norm.get_script();                               \
@@ -92,7 +97,7 @@ using namespace snort;
 #define NORMALIZE_L(src, src_len, dst, dst_len, depth, ret, ptr, len) \
     {                                                                 \
         JSIdentifierCtxTest ident_ctx;                                \
-        JSNormalizer norm(ident_ctx, depth, MAX_TEMPLATE_NESTNIG);    \
+        JSNormalizer norm(ident_ctx, depth, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);    \
         ret = norm.normalize(src, src_len);                           \
         ptr = norm.get_src_next();                                    \
         auto result = norm.get_script();                              \
@@ -136,13 +141,56 @@ using namespace snort;
         CHECK(ret == JSTokenizer::SCRIPT_ENDED);                        \
     }
 
+#define NORMALIZE_S(src1, exp1)                                     \
+    {                                                               \
+        char dst1[sizeof(exp1)];                                    \
+                                                                    \
+        JSIdentifierCtx ident_ctx(DEPTH, s_ident_built_in);         \
+        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);  \
+                                                                    \
+        DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);         \
+        CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
+                                                                    \
+        CLOSE();                                                    \
+    }
+
+#define NORMALIZE_T(src1, src2, exp1, exp2)                         \
+    {                                                               \
+        char dst1[sizeof(exp1)];                                    \
+        char dst2[sizeof(exp2)];                                    \
+                                                                    \
+        JSIdentifierCtx ident_ctx(DEPTH, s_ident_built_in);         \
+        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);  \
+                                                                    \
+        DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);         \
+        CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
+                                                                    \
+        DO(src2, sizeof(src2) - 1, dst2, sizeof(dst2) - 1);         \
+        CHECK(!memcmp(exp2, dst2, sizeof(exp2) - 1));               \
+                                                                    \
+        CLOSE();                                                    \
+    }
+
+#define NORMALIZE_1(src1, exp1)                                     \
+    {                                                               \
+        char dst1[sizeof(exp1)];                                    \
+                                                                    \
+        JSIdentifierCtxTest ident_ctx;                              \
+        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);  \
+                                                                    \
+        DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);         \
+        CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
+                                                                    \
+        CLOSE();                                                    \
+    }
+
 #define NORMALIZE_2(src1, src2, exp1, exp2)                         \
     {                                                               \
         char dst1[sizeof(exp1)];                                    \
         char dst2[sizeof(exp2)];                                    \
                                                                     \
         JSIdentifierCtxTest ident_ctx;                              \
-        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG);  \
+        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);  \
                                                                     \
         DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);         \
         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
@@ -160,7 +208,7 @@ using namespace snort;
         char dst3[sizeof(exp3)];                                    \
                                                                     \
         JSIdentifierCtxTest ident_ctx;                              \
-        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG);  \
+        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);  \
                                                                     \
         DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);         \
         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
@@ -174,13 +222,24 @@ using namespace snort;
         CLOSE();                                                    \
     }
 
+#define NORM_BAD_1(src1, exp1, code)                                \
+    {                                                               \
+        char dst1[sizeof(exp1)];                                    \
+                                                                    \
+        JSIdentifierCtxTest ident_ctx;                              \
+        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);  \
+                                                                    \
+        TRY(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1, code);  \
+        CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
+    }
+
 #define NORM_BAD_2(src1, src2, exp1, exp2, code)                    \
     {                                                               \
         char dst1[sizeof(exp1)];                                    \
         char dst2[sizeof(exp2)];                                    \
                                                                     \
         JSIdentifierCtxTest ident_ctx;                              \
-        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG);  \
+        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);  \
                                                                     \
         DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);         \
         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
@@ -196,7 +255,7 @@ using namespace snort;
         char dst3[sizeof(exp3)];                                    \
                                                                     \
         JSIdentifierCtxTest ident_ctx;                              \
-        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG);  \
+        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);  \
                                                                     \
         DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);         \
         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));               \
@@ -214,7 +273,7 @@ using namespace snort;
         char dst2[sizeof(exp2)];                                        \
                                                                         \
         JSIdentifierCtxTest ident_ctx;                                  \
-        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTNIG, limit); \
+        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH, limit); \
                                                                         \
         DO(src1, sizeof(src1) - 1, dst1, sizeof(dst1) - 1);             \
         CHECK(!memcmp(exp1, dst1, sizeof(exp1) - 1));                   \
@@ -1392,7 +1451,7 @@ TEST_CASE("endings", "[JSNormalizer]")
         int ret;
 
         JSIdentifierCtxTest ident_ctx;
-        JSNormalizer norm(ident_ctx, 7, MAX_TEMPLATE_NESTNIG);
+        JSNormalizer norm(ident_ctx, 7, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);
         ret = norm.normalize(src, sizeof(src));
         ptr = norm.get_src_next();
         auto res1 = norm.get_script();
@@ -2206,6 +2265,1030 @@ TEST_CASE("memcap", "[JSNormalizer]")
     }
 }
 
+TEST_CASE("scope tracking", "[JSNormalizer]")
+{
+    SECTION("parentheses")
+    {
+        const char dat1[] = "()";
+        const char dat2[] = "()()()";
+        const char dat3[] = "((()))";
+        const char exp1[] = "()";
+        const char exp2[] = "()()()";
+        const char exp3[] = "((()))";
+
+        NORMALIZE_1(dat1, exp1);
+        NORMALIZE_1(dat2, exp2);
+        NORMALIZE_1(dat3, exp3);
+    }
+    SECTION("curly braces")
+    {
+        const char dat1[] = "{}";
+        const char dat2[] = "{}{}{}";
+        const char dat3[] = "{{{}}}";
+        const char exp1[] = "{}";
+        const char exp2[] = "{}{}{}";
+        const char exp3[] = "{{{}}}";
+
+        NORMALIZE_1(dat1, exp1);
+        NORMALIZE_1(dat2, exp2);
+        NORMALIZE_1(dat3, exp3);
+    }
+    SECTION("square brackets")
+    {
+        const char dat1[] = "[]";
+        const char dat2[] = "[][][]";
+        const char dat3[] = "[[[]]]";
+        const char exp1[] = "[]";
+        const char exp2[] = "[][][]";
+        const char exp3[] = "[[[]]]";
+
+        NORMALIZE_1(dat1, exp1);
+        NORMALIZE_1(dat2, exp2);
+        NORMALIZE_1(dat3, exp3);
+    }
+    SECTION("mix of brackets")
+    {
+        const char dat1[] = "(){}[]";
+        const char dat2[] = "({})[]";
+        const char dat3[] = "(){[]}";
+        const char exp1[] = "(){}[]";
+        const char exp2[] = "({})[]";
+        const char exp3[] = "(){[]}";
+
+        NORMALIZE_1(dat1, exp1);
+        NORMALIZE_1(dat2, exp2);
+        NORMALIZE_1(dat3, exp3);
+    }
+    SECTION("parentheses - wrong closing symbol")
+    {
+        const char dat1[] = "({[ (} ]})";
+        const char dat2[] = "({[ (] ]})";
+        const char exp1[] = "({[(";
+        const char exp2[] = "({[(";
+
+        NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
+    }
+    SECTION("curly braces - wrong closing symbol")
+    {
+        const char dat1[] = "({[ {) ]})";
+        const char dat2[] = "({[ {] ]})";
+        const char exp1[] = "({[{";
+        const char exp2[] = "({[{";
+
+        NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
+    }
+    SECTION("square brackets - wrong closing symbol")
+    {
+        const char dat1[] = "([{ [) }])";
+        const char dat2[] = "([{ [} }])";
+        const char exp1[] = "([{[";
+        const char exp2[] = "([{[";
+
+        NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
+    }
+    SECTION("parentheses - mismatch")
+    {
+        const char dat1[] = ")";
+        const char dat2[] = "())";
+        const char dat3[] = "({[ ()) ]})";
+        const char dat4[] = "(</script>";
+        const char dat5[] = "(()</script>";
+        const char exp1[] = "";
+        const char exp2[] = "()";
+        const char exp3[] = "({[()";
+        const char exp4[] = "(";
+        const char exp5[] = "(()";
+
+        NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_1(dat3, exp3, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_1(dat4, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
+        NORM_BAD_1(dat5, exp5, JSTokenizer::ENDED_IN_INNER_SCOPE);
+    }
+    SECTION("curly braces - mismatch")
+    {
+        const char dat1[] = "}";
+        const char dat2[] = "{}}";
+        const char dat3[] = "({[ {}} ]})";
+        const char dat4[] = "{</script>";
+        const char dat5[] = "{{}</script>";
+        const char exp1[] = "";
+        const char exp2[] = "{}";
+        const char exp3[] = "({[{}";
+        const char exp4[] = "{";
+        const char exp5[] = "{{}";
+
+        NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_1(dat3, exp3, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_1(dat4, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
+        NORM_BAD_1(dat5, exp5, JSTokenizer::ENDED_IN_INNER_SCOPE);
+    }
+    SECTION("square brackets - mismatch")
+    {
+        const char dat1[] = "]";
+        const char dat2[] = "[]]";
+        const char dat3[] = "([{ []] }])";
+        const char dat4[] = "[</script>";
+        const char dat5[] = "[[]</script>";
+        const char exp1[] = "";
+        const char exp2[] = "[]";
+        const char exp3[] = "([{[]";
+        const char exp4[] = "[";
+        const char exp5[] = "[[]";
+
+        NORM_BAD_1(dat1, exp1, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_1(dat2, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_1(dat3, exp3, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_1(dat4, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
+        NORM_BAD_1(dat5, exp5, JSTokenizer::ENDED_IN_INNER_SCOPE);
+    }
+    SECTION("parentheses - continuation")
+    {
+        const char dat1[] = "((";
+        const char dat2[] = "))";
+        const char exp1[] = "((";
+        const char exp2[] = "(())";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+    }
+    SECTION("curly braces - continuation")
+    {
+        const char dat1[] = "{{";
+        const char dat2[] = "}}";
+        const char exp1[] = "{{";
+        const char exp2[] = "{{}}";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+    }
+    SECTION("square brackets - continuation")
+    {
+        const char dat1[] = "[[";
+        const char dat2[] = "]]";
+        const char exp1[] = "[[";
+        const char exp2[] = "[[]]";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+    }
+    SECTION("parentheses - mismatch in continuation")
+    {
+        const char dat1[] = "(";
+        const char dat2[] = "))";
+        const char dat3[] = "(";
+        const char dat4[] = " </script>";
+        const char exp1[] = "(";
+        const char exp2[] = "()";
+        const char exp3[] = "(";
+        const char exp4[] = "(";
+
+        NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
+    }
+    SECTION("curly braces - mismatch in continuation")
+    {
+        const char dat1[] = "{";
+        const char dat2[] = "}}";
+        const char dat3[] = "{";
+        const char dat4[] = " </script>";
+        const char exp1[] = "{";
+        const char exp2[] = "{}";
+        const char exp3[] = "{";
+        const char exp4[] = "{";
+
+        NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
+    }
+    SECTION("square brackets - mismatch in continuation")
+    {
+        const char dat1[] = "[";
+        const char dat2[] = "]]";
+        const char dat3[] = "[";
+        const char dat4[] = " </script>";
+        const char exp1[] = "[";
+        const char exp2[] = "[]";
+        const char exp3[] = "[";
+        const char exp4[] = "[";
+
+        NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::ENDED_IN_INNER_SCOPE);
+    }
+}
+
+TEST_CASE("scope misc", "[JSNormalizer]")
+{
+    const int stack_limit = 256;
+    const char* open = "1+(";
+    const char* close = "-1)";
+
+    SECTION("max stack")
+    {
+        std::string scr;
+
+        for (int i = 0; i < stack_limit; ++i)
+            scr += open;
+        for (int i = 0; i < stack_limit; ++i)
+            scr += close;
+
+        const char* dat = scr.c_str();
+        int dat_len = strlen(dat);
+        const char* exp = scr.c_str();
+        int exp_len = strlen(exp);
+        char* act = new char[exp_len];
+
+        JSIdentifierCtxTest ident_ctx;
+        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);
+
+        DO(dat, dat_len, act, exp_len);
+        CHECK(!memcmp(exp, act, exp_len));
+
+        delete[] act;
+
+        CLOSE();
+    }
+
+    SECTION("max stack")
+    {
+        std::string scr;
+        std::string nsc;
+
+        for (int i = 0; i < stack_limit + 1; ++i)
+            scr += open;
+        for (int i = 0; i < stack_limit + 1; ++i)
+            scr += close;
+        for (int i = 0; i < stack_limit; ++i)
+            nsc += open;
+        nsc += "1+";
+
+        const char* dat = scr.c_str();
+        int dat_len = strlen(dat);
+        const char* exp = nsc.c_str();
+        int exp_len = strlen(exp);
+        char* act = new char[exp_len];
+
+        JSIdentifierCtxTest ident_ctx;
+        JSNormalizer norm(ident_ctx, DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);
+
+        TRY(dat, dat_len, act, exp_len, JSTokenizer::SCOPE_NESTING_OVERFLOW);
+        CHECK(!memcmp(exp, act, exp_len));
+
+        delete[] act;
+    }
+}
+
+TEST_CASE("scope tail handling", "[JSNormalizer]")
+{
+    // Padding ':' symbol has been chosen, since it:
+    //  * forms a single state for Parser
+    //  * doesn't insert white spaces
+    //  * forms a single match, i.e. there are no '::' ':::' patterns
+    //
+    // Thus, the tail of "::({[]})" will have JSTOKENIZER_MAX_STATES
+    // and the same number of characters in it.
+
+#if JSTOKENIZER_MAX_STATES != 8
+#error "scope tail handling" tests are designed for the tail of 8 bytes size
+#endif
+
+    SECTION("no scope-symbols in the tail")
+    {
+        const char dat1[] = "((((::::::::";
+        const char dat2[] = "):):):):";
+        const char dat3[] = "{}{{::::::::";
+        const char dat4[] = "::{}}}::";
+        const char dat5[] = "[][[::::::::";
+        const char dat6[] = "::::]][]";
+        const char exp1[] = "((((::::::::";
+        const char exp2[] = "::::::::):):):):";
+        const char exp3[] = "{}{{::::::::";
+        const char exp4[] = "::::::::::{}}}::";
+        const char exp5[] = "[][[::::::::";
+        const char exp6[] = "::::::::::::]][]";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORMALIZE_2(dat3, dat4, exp3, exp4);
+        NORMALIZE_2(dat5, dat6, exp5, exp6);
+    }
+
+    SECTION("opening scope-symbols in the tail")
+    {
+        const char dat1[] = "::::(:::::::";
+        const char dat2[] = "):::::::";
+        const char dat3[] = ":::::::::::{";
+        const char dat4[] = ":::::::}";
+        const char dat5[] = "::::[:::::::";
+        const char dat6[] = "::::]:::";
+        const char exp1[] = "::::(:::::::";
+        const char exp2[] = "(:::::::):::::::";
+        const char exp3[] = ":::::::::::{";
+        const char exp4[] = ":::::::{:::::::}";
+        const char exp5[] = "::::[:::::::";
+        const char exp6[] = "[:::::::::::]:::";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORMALIZE_2(dat3, dat4, exp3, exp4);
+        NORMALIZE_2(dat5, dat6, exp5, exp6);
+    }
+
+    SECTION("closing scope-symbols in the tail")
+    {
+        const char dat1[] = "(((()::::::)";
+        const char dat2[] = "()::::))";
+        const char dat3[] = "{{{{:::::::}";
+        const char dat4[] = ":::::}}}";
+        const char dat5[] = "[::::::::]::";
+        const char dat6[] = "::::::::";
+        const char exp1[] = "(((()::::::)";
+        const char exp2[] = ")::::::)()::::))";
+        const char exp3[] = "{{{{:::::::}";
+        const char exp4[] = ":::::::}:::::}}}";
+        const char exp5[] = "[::::::::]::";
+        const char exp6[] = ":::::]::::::::::";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORMALIZE_2(dat3, dat4, exp3, exp4);
+        NORMALIZE_2(dat5, dat6, exp5, exp6);
+    }
+
+    SECTION("newly opening scope-symbols in the tail")
+    {
+        const char dat1[] = "(:::(::::::(";
+        const char dat2[] = "))):::::";
+        const char dat3[] = "{:{:{:{:{:{:";
+        const char dat4[] = "::}}}}}}";
+        const char dat5[] = "[:[:[:::[:::";
+        const char dat6[] = "::::]]]]";
+        const char exp1[] = "(:::(::::::(";
+        const char exp2[] = "(::::::())):::::";
+        const char exp3[] = "{:{:{:{:{:{:";
+        const char exp4[] = "{:{:{:{:::}}}}}}";
+        const char exp5[] = "[:[:[:::[:::";
+        const char exp6[] = "[:::[:::::::]]]]";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORMALIZE_2(dat3, dat4, exp3, exp4);
+        NORMALIZE_2(dat5, dat6, exp5, exp6);
+    }
+
+    SECTION("fully closing scope-symbols in the tail")
+    {
+        const char dat1[] = "((((::::))))";
+        const char dat2[] = "::::::::";
+        const char dat3[] = "{{{{}:}:}:}:";
+        const char dat4[] = "::::{}{}";
+        const char dat5[] = "[[:::::::]:]";
+        const char dat6[] = "[::::::]";
+        const char exp1[] = "((((::::))))";
+        const char exp2[] = "::::))))::::::::";
+        const char exp3[] = "{{{{}:}:}:}:";
+        const char exp4[] = "}:}:}:}:::::{}{}";
+        const char exp5[] = "[[:::::::]:]";
+        const char exp6[] = ":::::]:][::::::]";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORMALIZE_2(dat3, dat4, exp3, exp4);
+        NORMALIZE_2(dat5, dat6, exp5, exp6);
+    }
+
+    SECTION("extra scope-symbols in the tail")
+    {
+        const char dat1[] = "((((((((";
+        const char dat2[] = ")))))))))";
+        const char dat3[] = "{{{{{{{{";
+        const char dat4[] = "}}}}}}]}";
+        const char dat5[] = "[[[[[[[[";
+        const char dat6[] = "]]]]]]]</script>";
+        const char exp1[] = "((((((((";
+        const char exp2[] = "(((((((())))))))";
+        const char exp3[] = "{{{{{{{{";
+        const char exp4[] = "{{{{{{{{}}}}}}";
+        const char exp5[] = "[[[[[[[[";
+        const char exp6[] = "[[[[[[[[]]]]]]]";
+
+        NORM_BAD_2(dat1, dat2, exp1, exp2, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_2(dat3, dat4, exp3, exp4, JSTokenizer::WRONG_CLOSING_SYMBOL);
+        NORM_BAD_2(dat5, dat6, exp5, exp6, JSTokenizer::ENDED_IN_INNER_SCOPE);
+    }
+
+    SECTION("overwriting scope-symbols in the tail")
+    {
+        const char dat1[] = "(((((((())))";
+        const char dat2[] = ":))))";
+        const char dat3[] = "({[(:):]{}{}";
+        const char dat4[] = "}[]())";
+        const char dat5[] = "{{{{}[[]](((";
+        const char dat6[] = ")))}}}";
+        const char exp1[] = "(((((((())))";
+        const char exp2[] = "(((()))):))))";
+        const char exp3[] = "({[(:):]{}{}";
+        const char exp4[] = ":):]{}{}}[]())";
+        const char exp5[] = "{{{{}[[]](((";
+        const char exp6[] = "}[[]]((()))}}}";
+
+        NORMALIZE_2(dat1, dat2, exp1, exp2);
+        NORMALIZE_2(dat3, dat4, exp3, exp4);
+        NORMALIZE_2(dat5, dat6, exp5, exp6);
+    }
+}
+
+TEST_CASE("built-in identifiers syntax", "[JSNormalizer]")
+{
+    // 'console' 'eval' 'document' are built-in identifiers
+
+    SECTION("a standalone identifier")
+    {
+        const char dat1[] = "alpha bravo console delta eval";
+        const char dat2[] = "var a = 0;     console = 1;";
+        const char dat3[] = "var a = 0; var console = 1;";
+        const char dat4[] = "foo(0); console(1); bar(2); console1(3); baz(4);";
+        const char dat5[] = "foo(0);    eval(1); bar(2); evaluate(3); baz(4);";
+        const char exp1[] = "var_0000 var_0001 console var_0002 eval";
+        const char exp2[] = "var var_0000=0;console=1;";
+        const char exp3[] = "var var_0000=0;var console=1;";
+        const char exp4[] = "var_0000(0);console(1);var_0001(2);var_0002(3);var_0003(4);";
+        const char exp5[] = "var_0000(0);eval(1);var_0001(2);var_0002(3);var_0003(4);";
+
+        NORMALIZE_S(dat1, exp1);
+        NORMALIZE_S(dat2, exp2);
+        NORMALIZE_S(dat3, exp3);
+        NORMALIZE_S(dat4, exp4);
+        NORMALIZE_S(dat5, exp5);
+    }
+
+    SECTION("inner objects")
+    {
+        const char dat1[] = "alpha.bravo.charlie.delta";
+        const char dat2[] = "alpha.bravo.console.delta";
+        const char dat3[] = "eval.alpha.bravo.charlie.delta";
+        const char dat4[] = "eval.alpha.bravo.console.delta";
+        const char exp1[] = "var_0000.var_0001.var_0002.var_0003";
+        const char exp2[] = "var_0000.var_0001.var_0002.var_0003";
+        const char exp3[] = "eval.alpha.bravo.charlie.delta";
+        const char exp4[] = "eval.alpha.bravo.console.delta";
+
+        NORMALIZE_S(dat1, exp1);
+        NORMALIZE_S(dat2, exp2);
+        NORMALIZE_S(dat3, exp3);
+        NORMALIZE_S(dat4, exp4);
+    }
+
+    SECTION("function calls")
+    {
+        const char dat1[] = "foo.bar.baz()";
+        const char dat2[] = "foo.bar().baz";
+        const char dat3[] = "foo().bar.baz";
+        const char dat4[] = "eval.bar.baz()";
+        const char dat5[] = "eval.bar().baz";
+        const char dat6[] = "eval().bar.baz";
+        const char dat7[] = "foo.eval.baz()";
+        const char dat8[] = "foo.eval().baz";
+        const char dat9[] = "foo().eval.baz";
+        const char dat10[] = "foo.bar.eval()";
+        const char dat11[] = "foo.bar().eval";
+        const char dat12[] = "var_0000().bar.eval";
+        const char exp1[] = "var_0000.var_0001.var_0002()";
+        const char exp2[] = "var_0000.var_0001().var_0002";
+        const char exp3[] = "var_0000().var_0001.var_0002";
+        const char exp4[] = "eval.bar.baz()";
+        const char exp5[] = "eval.bar().baz";
+        const char exp6[] = "eval().bar.baz";
+        const char exp7[] = "var_0000.var_0001.var_0002()";
+        const char exp8[] = "var_0000.var_0001().var_0002";
+        const char exp9[] = "var_0000().var_0001.var_0002";
+        const char exp10[] = "var_0000.var_0001.var_0002()";
+        const char exp11[] = "var_0000.var_0001().var_0002";
+        const char exp12[] = "var_0000().var_0001.var_0002";
+
+        NORMALIZE_S(dat1, exp1);
+        NORMALIZE_S(dat2, exp2);
+        NORMALIZE_S(dat3, exp3);
+        NORMALIZE_S(dat4, exp4);
+        NORMALIZE_S(dat5, exp5);
+        NORMALIZE_S(dat6, exp6);
+        NORMALIZE_S(dat7, exp7);
+        NORMALIZE_S(dat8, exp8);
+        NORMALIZE_S(dat9, exp9);
+        NORMALIZE_S(dat10, exp10);
+        NORMALIZE_S(dat11, exp11);
+        NORMALIZE_S(dat12, exp12);
+    }
+}
+
+TEST_CASE("built-in chain tracking", "[JSNormalizer]")
+{
+    // 'console' 'eval' 'document' are built-in identifiers
+
+    SECTION("chain terminators")
+    {
+        const char dat1[] = "eval.foo.bar.baz";
+        const char dat2[] = "eval.foo bar.baz";
+        const char dat3[] = "eval.foo;bar.baz";
+        const char dat4[] = "eval.foo,bar.baz";
+        const char dat5[] = "eval.foo*bar.baz";
+        const char dat6[] = "eval.foo*=bar.baz";
+        const char dat7[] = "eval.foo/bar.baz";
+        const char dat8[] = "eval.foo/=bar.baz";
+        const char dat9[] = "eval.foo%bar.baz";
+        const char dat10[] = "eval.foo%=bar.baz";
+        const char dat11[] = "eval.foo+bar.baz";
+        const char dat12[] = "eval.foo+=bar.baz";
+        const char dat13[] = "eval.foo-bar.baz";
+        const char dat14[] = "eval.foo-=bar.baz";
+        const char dat15[] = "eval.foo<<bar.baz";
+        const char dat16[] = "eval.foo<<=bar.baz";
+        const char dat17[] = "eval.foo>>bar.baz";
+        const char dat18[] = "eval.foo>>=bar.baz";
+        const char dat19[] = "eval.foo>>>bar.baz";
+        const char dat20[] = "eval.foo>>>=bar.baz";
+        const char dat21[] = "eval.foo<bar.baz";
+        const char dat22[] = "eval.foo<=bar.baz";
+        const char dat23[] = "eval.foo>bar.baz";
+        const char dat24[] = "eval.foo>=bar.baz";
+        const char dat25[] = "eval.foo instanceof bar.baz";
+        const char dat26[] = "eval.foo==bar.baz";
+        const char dat27[] = "eval.foo!=bar.baz";
+        const char dat28[] = "eval.foo===bar.baz";
+        const char dat29[] = "eval.foo!==bar.baz";
+        const char dat30[] = "eval.foo&bar.baz";
+        const char dat31[] = "eval.foo&=bar.baz";
+        const char dat32[] = "eval.foo&&bar.baz";
+        const char dat33[] = "eval.foo|bar.baz";
+        const char dat34[] = "eval.foo|=bar.baz";
+        const char dat35[] = "eval.foo||bar.baz";
+        const char dat36[] = "eval.foo^bar.baz";
+        const char dat37[] = "eval.foo^=bar.baz";
+        const char dat38[] = "eval.foo?bar.baz";
+        const char dat39[] = "eval.foo(bar.baz)";
+        const char dat40[] = "eval.var.foo";
+        const char dat41[] = "eval. break() . foo";
+
+        const char exp1[] = "eval.foo.bar.baz";
+        const char exp2[] = "eval.foo var_0000.var_0001";
+        const char exp3[] = "eval.foo;var_0000.var_0001";
+        const char exp4[] = "eval.foo,var_0000.var_0001";
+        const char exp5[] = "eval.foo*var_0000.var_0001";
+        const char exp6[] = "eval.foo*=var_0000.var_0001";
+        const char exp7[] = "eval.foo/var_0000.var_0001";
+        const char exp8[] = "eval.foo/=var_0000.var_0001";
+        const char exp9[] = "eval.foo%var_0000.var_0001";
+        const char exp10[] = "eval.foo%=var_0000.var_0001";
+        const char exp11[] = "eval.foo+var_0000.var_0001";
+        const char exp12[] = "eval.foo+=var_0000.var_0001";
+        const char exp13[] = "eval.foo-var_0000.var_0001";
+        const char exp14[] = "eval.foo-=var_0000.var_0001";
+        const char exp15[] = "eval.foo<<var_0000.var_0001";
+        const char exp16[] = "eval.foo<<=var_0000.var_0001";
+        const char exp17[] = "eval.foo>>var_0000.var_0001";
+        const char exp18[] = "eval.foo>>=var_0000.var_0001";
+        const char exp19[] = "eval.foo>>>var_0000.var_0001";
+        const char exp20[] = "eval.foo>>>=var_0000.var_0001";
+        const char exp21[] = "eval.foo<var_0000.var_0001";
+        const char exp22[] = "eval.foo<=var_0000.var_0001";
+        const char exp23[] = "eval.foo>var_0000.var_0001";
+        const char exp24[] = "eval.foo>=var_0000.var_0001";
+        const char exp25[] = "eval.foo instanceof var_0000.var_0001";
+        const char exp26[] = "eval.foo==var_0000.var_0001";
+        const char exp27[] = "eval.foo!=var_0000.var_0001";
+        const char exp28[] = "eval.foo===var_0000.var_0001";
+        const char exp29[] = "eval.foo!==var_0000.var_0001";
+        const char exp30[] = "eval.foo&var_0000.var_0001";
+        const char exp31[] = "eval.foo&=var_0000.var_0001";
+        const char exp32[] = "eval.foo&&var_0000.var_0001";
+        const char exp33[] = "eval.foo|var_0000.var_0001";
+        const char exp34[] = "eval.foo|=var_0000.var_0001";
+        const char exp35[] = "eval.foo||var_0000.var_0001";
+        const char exp36[] = "eval.foo^var_0000.var_0001";
+        const char exp37[] = "eval.foo^=var_0000.var_0001";
+        const char exp38[] = "eval.foo?var_0000.var_0001";
+        const char exp39[] = "eval.foo(var_0000.var_0001)";
+        const char exp40[] = "eval.var.foo";
+        const char exp41[] = "eval.break().foo";
+
+        NORMALIZE_S(dat1, exp1);
+        NORMALIZE_S(dat2, exp2);
+        NORMALIZE_S(dat3, exp3);
+        NORMALIZE_S(dat4, exp4);
+        NORMALIZE_S(dat5, exp5);
+        NORMALIZE_S(dat6, exp6);
+        NORMALIZE_S(dat7, exp7);
+        NORMALIZE_S(dat8, exp8);
+        NORMALIZE_S(dat9, exp9);
+        NORMALIZE_S(dat10, exp10);
+        NORMALIZE_S(dat11, exp11);
+        NORMALIZE_S(dat12, exp12);
+        NORMALIZE_S(dat13, exp13);
+        NORMALIZE_S(dat14, exp14);
+        NORMALIZE_S(dat15, exp15);
+        NORMALIZE_S(dat16, exp16);
+        NORMALIZE_S(dat17, exp17);
+        NORMALIZE_S(dat18, exp18);
+        NORMALIZE_S(dat19, exp19);
+        NORMALIZE_S(dat20, exp20);
+        NORMALIZE_S(dat21, exp21);
+        NORMALIZE_S(dat22, exp22);
+        NORMALIZE_S(dat23, exp23);
+        NORMALIZE_S(dat24, exp24);
+        NORMALIZE_S(dat25, exp25);
+        NORMALIZE_S(dat26, exp26);
+        NORMALIZE_S(dat27, exp27);
+        NORMALIZE_S(dat28, exp28);
+        NORMALIZE_S(dat29, exp29);
+        NORMALIZE_S(dat30, exp30);
+        NORMALIZE_S(dat31, exp31);
+        NORMALIZE_S(dat32, exp32);
+        NORMALIZE_S(dat33, exp33);
+        NORMALIZE_S(dat34, exp34);
+        NORMALIZE_S(dat35, exp35);
+        NORMALIZE_S(dat36, exp36);
+        NORMALIZE_S(dat37, exp37);
+        NORMALIZE_S(dat38, exp38);
+        NORMALIZE_S(dat39, exp39);
+        NORMALIZE_S(dat40, exp40);
+        NORMALIZE_S(dat41, exp41);
+    }
+
+    SECTION("over inner scopes")
+    {
+        const char dat1[] = "eval.foo.bar.baz";
+        const char dat2[] = "eval().foo.bar.baz";
+        const char dat3[] = "eval.foo().bar.baz";
+        const char dat4[] = "eval(foo.bar).baz";
+        const char dat5[] = "eval.foo().bar[].baz";
+        const char dat6[] = "eval.foo{bar[]}.baz";
+        const char dat7[] = "eval(foo+bar).baz";
+        const char dat8[] = "eval(foo bar).baz";
+        const char exp1[] = "eval.foo.bar.baz";
+        const char exp2[] = "eval().foo.bar.baz";
+        const char exp3[] = "eval.foo().bar.baz";
+        const char exp4[] = "eval(var_0000.var_0001).baz";
+        const char exp5[] = "eval.foo().bar[].baz";
+        const char exp6[] = "eval.foo{var_0000[]}.var_0001";
+        const char exp7[] = "eval(var_0000+var_0001).baz";
+        const char exp8[] = "eval(var_0000 var_0001).baz";
+
+        NORMALIZE_S(dat1, exp1);
+        NORMALIZE_S(dat2, exp2);
+        NORMALIZE_S(dat3, exp3);
+        NORMALIZE_S(dat4, exp4);
+        NORMALIZE_S(dat5, exp5);
+        NORMALIZE_S(dat6, exp6);
+        NORMALIZE_S(dat7, exp7);
+        NORMALIZE_S(dat8, exp8);
+    }
+
+    SECTION("spaces and operators")
+    {
+        const char dat1[] = "foo.bar.baz console.log";
+        const char dat2[] = "console.log foo.bar.baz";
+        const char dat3[] = "foo . bar . baz console . log";
+        const char dat4[] = "console . log foo . bar . baz";
+        const char dat5[] = "console . foo . bar . baz";
+        const char dat6[] = "console = foo . bar . baz";
+        const char dat7[] = "console . foo + bar . baz";
+        const char dat8[] = "console . foo . bar : baz";
+        const char dat9[] = "console.\nfoo";
+        const char exp1[] = "var_0000.var_0001.var_0002 console.log";
+        const char exp2[] = "console.log var_0000.var_0001.var_0002";
+        const char exp3[] = "var_0000.var_0001.var_0002 console.log";
+        const char exp4[] = "console.log var_0000.var_0001.var_0002";
+        const char exp5[] = "console.foo.bar.baz";
+        const char exp6[] = "console=var_0000.var_0001.var_0002";
+        const char exp7[] = "console.foo+var_0000.var_0001";
+        const char exp8[] = "console.foo.bar:var_0000";
+        const char exp9[] = "console.foo";
+
+        NORMALIZE_S(dat1, exp1);
+        NORMALIZE_S(dat2, exp2);
+        NORMALIZE_S(dat3, exp3);
+        NORMALIZE_S(dat4, exp4);
+        NORMALIZE_S(dat5, exp5);
+        NORMALIZE_S(dat6, exp6);
+        NORMALIZE_S(dat7, exp7);
+        NORMALIZE_S(dat8, exp8);
+        NORMALIZE_S(dat9, exp9);
+    }
+
+    SECTION("comments")
+    {
+        const char dat1[] = "console.<!-- HTML comment\nlog(abc).foo";
+        const char dat2[] = "console.//single-line comment\nlog(abc).foo";
+        const char dat3[] = "console./*multi-line comment*/log(abc).foo";
+        const char exp[] = "console.log(var_0000).foo";
+
+        NORMALIZE_S(dat1, exp);
+        NORMALIZE_S(dat2, exp);
+        NORMALIZE_S(dat3, exp);
+    }
+}
+
+TEST_CASE("built-in scope tracking", "[JSNormalizer]")
+{
+    // 'console' 'eval' 'document' are built-in identifiers
+
+    SECTION("basic")
+    {
+        const char dat1[] = "(alpha) bravo console delta eval foxtrot";
+        const char dat2[] = "(alpha bravo) console delta eval foxtrot";
+        const char dat3[] = "(alpha bravo console) delta eval foxtrot";
+        const char dat4[] = "(alpha bravo console delta) eval foxtrot";
+        const char dat5[] = "(alpha bravo console delta eval) foxtrot";
+        const char dat6[] = "(alpha bravo console delta eval foxtrot)";
+        const char dat7[] = "alpha bravo (console) delta (eval) foxtrot";
+        const char exp1[] = "(var_0000)var_0001 console var_0002 eval var_0003";
+        const char exp2[] = "(var_0000 var_0001)console var_0002 eval var_0003";
+        const char exp3[] = "(var_0000 var_0001 console)var_0002 eval var_0003";
+        const char exp4[] = "(var_0000 var_0001 console var_0002)eval var_0003";
+        const char exp5[] = "(var_0000 var_0001 console var_0002 eval)var_0003";
+        const char exp6[] = "(var_0000 var_0001 console var_0002 eval var_0003)";
+        const char exp7[] = "var_0000 var_0001(console)var_0002(eval)var_0003";
+
+        NORMALIZE_S(dat1, exp1);
+        NORMALIZE_S(dat2, exp2);
+        NORMALIZE_S(dat3, exp3);
+        NORMALIZE_S(dat4, exp4);
+        NORMALIZE_S(dat5, exp5);
+        NORMALIZE_S(dat6, exp6);
+        NORMALIZE_S(dat7, exp7);
+    }
+
+    SECTION("function calls")
+    {
+        const char dat1[] = "foo(bar).baz";
+        const char dat2[] = "foo(bar(baz))";
+        const char dat3[] = "eval(bar).baz";
+        const char dat4[] = "eval(bar(baz))";
+        const char dat5[] = "foo(eval).baz";
+        const char dat6[] = "foo(eval(baz))";
+        const char dat7[] = "foo(bar).eval";
+        const char dat8[] = "foo(bar(eval))";
+        const char dat9[] = "(console).log";
+        const char dat10[] = "((console)).log";
+        const char dat11[] = "((foo, console)).log";
+        const char dat12[] = "((document.foo(bar), console)).log";
+        const char dat13[] = "((document.foo(bar) console)).log";
+        const char dat14[] = "((document.foo(bar) console))log";
+        const char dat15[] = "((document.foo(bar) baz))log";
+        const char dat16[] = "foo(console).log";
+        const char dat17[] = "foo((console).log).log";
+        const char dat18[] = "foo().baz + eval(eval['content'].bar + baz(console['content'].log, baz)).bar";
+        const char dat19[] = "eval['foo']().bar.baz";
+        const char dat20[] = "eval['foo']()['bar'].baz";
+        const char dat21[] = "eval['foo']['bar'].baz()";
+        const char exp1[] = "var_0000(var_0001).var_0002";
+        const char exp2[] = "var_0000(var_0001(var_0002))";
+        const char exp3[] = "eval(var_0000).baz";
+        const char exp4[] = "eval(var_0000(var_0001))";
+        const char exp5[] = "var_0000(eval).var_0001";
+        const char exp6[] = "var_0000(eval(var_0001))";
+        const char exp7[] = "var_0000(var_0001).var_0002";
+        const char exp8[] = "var_0000(var_0001(eval))";
+        const char exp9[] = "(console).log";
+        const char exp10[] = "((console)).log";
+        const char exp11[] = "((var_0000,console)).log";
+        const char exp12[] = "((document.foo(var_0000),console)).log";
+        const char exp13[] = "((document.foo(var_0000)console)).log";
+        const char exp14[] = "((document.foo(var_0000)console))var_0001";
+        const char exp15[] = "((document.foo(var_0000)var_0001))var_0002";
+        const char exp16[] = "var_0000(console).var_0001";
+        const char exp17[] = "var_0000((console).log).var_0001";
+        const char exp18[] = "var_0000().var_0001+eval(eval['content'].bar+var_0001(console['content'].log,var_0001)).bar";
+        const char exp19[] = "eval['foo']().bar.baz";
+        const char exp20[] = "eval['foo']()['bar'].baz";
+        const char exp21[] = "eval['foo']['bar'].baz()";
+
+        NORMALIZE_S(dat1, exp1);
+        NORMALIZE_S(dat2, exp2);
+        NORMALIZE_S(dat3, exp3);
+        NORMALIZE_S(dat4, exp4);
+        NORMALIZE_S(dat5, exp5);
+        NORMALIZE_S(dat6, exp6);
+        NORMALIZE_S(dat7, exp7);
+        NORMALIZE_S(dat8, exp8);
+        NORMALIZE_S(dat9, exp9);
+        NORMALIZE_S(dat10, exp10);
+        NORMALIZE_S(dat11, exp11);
+        NORMALIZE_S(dat12, exp12);
+        NORMALIZE_S(dat13, exp13);
+        NORMALIZE_S(dat14, exp14);
+        NORMALIZE_S(dat15, exp15);
+        NORMALIZE_S(dat16, exp16);
+        NORMALIZE_S(dat17, exp17);
+        NORMALIZE_S(dat18, exp18);
+        NORMALIZE_S(dat19, exp19);
+        NORMALIZE_S(dat20, exp20);
+        NORMALIZE_S(dat21, exp21);
+    }
+
+    SECTION("eval cases")
+    {
+        const char dat1[] = "eval().bar";
+        const char dat2[] = "eval()['bar']";
+        const char dat3[] = "eval().bar()";
+        const char dat4[] = "eval()['bar']()";
+        const char dat5[] = "eval.bar";
+        const char dat6[] = "eval.bar()";
+        const char dat7[] = "eval['bar']";
+        const char dat8[] = "eval['bar']()";
+        const char dat9[] = "\\u0065\\u0076\\u0061\\u006c(\\u0062\\u0061\\u0072).\\u0062\\u0061\\u007a ;";
+        const char dat10[] = "var foo.bar = 123 ; (\\u0065\\u0076\\u0061\\u006c).\\u0062\\u0061\\u007a ;";
+        const char exp1[] = "eval().bar";
+        const char exp2[] = "eval()['bar']";
+        const char exp3[] = "eval().bar()";
+        const char exp4[] = "eval()['bar']()";
+        const char exp5[] = "eval.bar";
+        const char exp6[] = "eval.bar()";
+        const char exp7[] = "eval['bar']";
+        const char exp8[] = "eval['bar']()";
+        const char exp9[] = "eval(var_0000).baz;";
+        const char exp10[] = "var var_0000.var_0001=123;(eval).baz;";
+
+        NORMALIZE_S(dat1, exp1);
+        NORMALIZE_S(dat2, exp2);
+        NORMALIZE_S(dat3, exp3);
+        NORMALIZE_S(dat4, exp4);
+        NORMALIZE_S(dat5, exp5);
+        NORMALIZE_S(dat6, exp6);
+        NORMALIZE_S(dat7, exp7);
+        NORMALIZE_S(dat8, exp8);
+        NORMALIZE_S(dat9, exp9);
+        NORMALIZE_S(dat10, exp10);
+    }
+
+    SECTION("console cases")
+    {
+        const char dat1[] = "console.log=abc";
+        const char dat2[] = "console.log().clear()";
+        const char dat3[] = "console.log(\"asd\").foo";
+        const char dat4[] = "console.log[\"asd\"].foo";
+        const char dat5[] = "console.log(`var a = ${ c + b }`).foo";
+        const char dat6[] = "console.log(abc,def,www,document.foo(abc))";
+        const char dat7[] = "console.log(document.getElementById(\"mem\").text).clear(abc)";
+        const char exp1[] = "console.log=var_0000";
+        const char exp2[] = "console.log().clear()";
+        const char exp3[] = "console.log(\"asd\").foo";
+        const char exp4[] = "console.log[\"asd\"].foo";
+        const char exp5[] = "console.log(`var a = ${var_0000+var_0001}`).foo";
+        const char exp6[] = "console.log(var_0000,var_0001,var_0002,document.foo(var_0000))";
+        const char exp7[] = "console.log(document.getElementById(\"mem\").text).clear(var_0000)";
+
+        NORMALIZE_S(dat1, exp1);
+        NORMALIZE_S(dat2, exp2);
+        NORMALIZE_S(dat3, exp3);
+        NORMALIZE_S(dat4, exp4);
+        NORMALIZE_S(dat5, exp5);
+        NORMALIZE_S(dat6, exp6);
+        NORMALIZE_S(dat7, exp7);
+    }
+
+    SECTION("corner cases")
+    {
+        const char dat1[] = "object = {hidden: eval}";
+        const char dat2[] = "object = {console: \"str\"}";
+        const char dat3[] = "object.hidden = eval";
+        const char dat4[] = "array = [None, eval, document.getElementById]";
+        const char dat5[] = "array[1] = eval";
+        const char exp1[] = "var_0000={var_0001:eval}";
+        const char exp2[] = "var_0000={console:\"str\"}";
+        const char exp3[] = "var_0000.var_0001=eval";
+        const char exp4[] = "var_0000=[var_0001,eval,document.getElementById]";
+        const char exp5[] = "var_0000[1]=eval";
+
+        NORMALIZE_S(dat1, exp1);
+        NORMALIZE_S(dat2, exp2);
+        NORMALIZE_S(dat3, exp3);
+        NORMALIZE_S(dat4, exp4);
+        NORMALIZE_S(dat5, exp5);
+    }
+}
+
+TEST_CASE("built-in identifiers split", "[JSNormalizer]")
+{
+
+#if JSTOKENIZER_MAX_STATES != 8
+#error "built-in identifiers split" tests are designed for 8 states depth
+#endif
+
+    SECTION("a standalone identifier")
+    {
+        const char dat1[] = "con";
+        const char dat2[] = "sole";
+        const char dat3[] = "e";
+        const char dat4[] = "val";
+        const char dat5[] = "console . ";
+        const char dat6[] = "foo";
+        const char dat7[] = "eval ";
+        const char dat8[] = ". bar";
+        const char exp1[] = "var_0000";
+        const char exp2[] = "console";
+        const char exp3[] = "var_0000";
+        const char exp4[] = "eval";
+        const char exp5[] = "console.";
+        const char exp6[] = "console.foo";
+        const char exp7[] = "eval";
+        const char exp8[] = "eval.bar";
+
+        NORMALIZE_T(dat1, dat2, exp1, exp2);
+        NORMALIZE_T(dat3, dat4, exp3, exp4);
+        NORMALIZE_T(dat5, dat6, exp5, exp6);
+        NORMALIZE_T(dat7, dat8, exp7, exp8);
+    }
+
+    SECTION("function calls")
+    {
+        const char dat1[] = "console";
+        const char dat2[] = "().foo";
+        const char dat3[] = "console(";
+        const char dat4[] = ").foo";
+        const char dat5[] = "console()";
+        const char dat6[] = ".foo";
+        const char dat7[] = "console().";
+        const char dat8[] = "foo";
+        const char dat9[] = "console().re";
+        const char dat10[] = "write";
+        const char exp1[] = "console";
+        const char exp2[] = "console().foo";
+        const char exp3[] = "console(";
+        const char exp4[] = "console().foo";
+        const char exp5[] = "console()";
+        const char exp6[] = "console().foo";
+        const char exp7[] = "console().";
+        const char exp8[] = "console().foo";
+        const char exp9[] = "console().re";
+        const char exp10[] = "console().rewrite";
+
+        NORMALIZE_T(dat1, dat2, exp1, exp2);
+        NORMALIZE_T(dat3, dat4, exp3, exp4);
+        NORMALIZE_T(dat5, dat6, exp5, exp6);
+        NORMALIZE_T(dat7, dat8, exp7, exp8);
+        NORMALIZE_T(dat9, dat10, exp9, exp10);
+    }
+
+    SECTION("terminator split")
+    {
+        const char dat1[] = "eval.foo ";
+        const char dat2[] = "bar.baz";
+        const char dat3[] = "eval.foo";
+        const char dat4[] = " bar.baz";
+        const char dat5[] = "eval.foo;";
+        const char dat6[] = "bar.baz";
+        const char dat7[] = "eval.foo";
+        const char dat8[] = ";bar.baz";
+        const char dat9[] = "eval.foo%";
+        const char dat10[] = "=bar.baz";
+        const char exp1[] = "eval.foo";
+        const char exp2[] = "eval.foo var_0000.var_0001";
+        const char exp3[] = "eval.foo";
+        const char exp4[] = "eval.foo var_0000.var_0001";
+        const char exp5[] = "eval.foo;";
+        const char exp6[] = "eval.foo;var_0000.var_0001";
+        const char exp7[] = "eval.foo";
+        const char exp8[] = "eval.foo;var_0000.var_0001";
+        const char exp9[] = "eval.foo%";
+        const char exp10[] = "eval.foo%=var_0000.var_0001";
+
+        NORMALIZE_T(dat1, dat2, exp1, exp2);
+        NORMALIZE_T(dat3, dat4, exp3, exp4);
+        NORMALIZE_T(dat5, dat6, exp5, exp6);
+        NORMALIZE_T(dat7, dat8, exp7, exp8);
+        NORMALIZE_T(dat9, dat10, exp9, exp10);
+    }
+
+    SECTION("scope split")
+    {
+        // "eval(foo,eval(bar,eval(baz[''].console.check+check).foo).bar).baz+check"
+        //                   split here ^
+
+        const char dat1[] = "eval(foo,eval(bar,eval(baz['";
+        const char dat2[] = "'].console.check+check).foo).bar).baz+check";
+        const char exp1[] = "eval(var_0000,eval(var_0001,eval(var_0002['";
+        const char exp2[] = "(var_0001,eval(var_0002[''].var_0003.var_0004+var_0004).foo).bar).baz+var_0004";
+
+        NORMALIZE_T(dat1, dat2, exp1, exp2);
+
+        // "eval(foo,eval(bar,eval(baz[''].console.check+check).foo).bar).baz+check"
+        //                         split here ^
+
+        const char dat3[] = "eval(foo,eval(bar,eval(baz[''].con";
+        const char dat4[] = "sole.check+check).foo).bar).baz+check";
+        const char exp3[] = "eval(var_0000,eval(var_0001,eval(var_0002[''].var_0003";
+        const char exp4[] = "(var_0002[''].var_0004.var_0005+var_0005).foo).bar).baz+var_0005";
+
+        NORMALIZE_T(dat3, dat4, exp3, exp4);
+
+        // "eval(foo,eval(bar,eval(baz[''].console.check+check).foo).bar).baz+check"
+        //                                              split here ^
+
+        const char dat5[] = "eval(foo,eval(bar,eval(baz[''].console.check+check).foo";
+        const char dat6[] = ").bar).baz+check";
+        const char exp5[] = "eval(var_0000,eval(var_0001,eval(var_0002[''].var_0003.var_0004+var_0004).foo";
+        const char exp6[] = "var_0003.var_0004+var_0004).foo).bar).baz+var_0004";
+
+        NORMALIZE_T(dat5, dat6, exp5, exp6);
+    }
+}
+
 #endif // CATCH_TEST_BUILD
 
 // Benchmark tests
@@ -2220,35 +3303,60 @@ static const std::string make_input(const char* begin, const char* mid,
                              const char* end, size_t len) 
 {
     std::string s(begin);
-    int fill = (len - strlen(begin) - strlen(end)) / strlen(mid);
+    int fill = (len - strlen(begin) - strlen(end) - strlen(s_closing_tag)) / strlen(mid);
     for (int i = 0; i < fill; ++i)
         s.append(mid);
     s.append(end);
+    s.append(s_closing_tag);
     return s;
 }
 
+static const std::string make_input_repeat(const char* pattern, size_t depth)
+{
+    std::string s;
+    size_t fill = (depth - strlen(s_closing_tag))/strlen(pattern);
+    for (size_t it = 0; it < fill; ++it)
+        s.append(pattern);
+
+    s.append(s_closing_tag);
+    return s;
+}
+
+static JSTokenizer::JSRet norm_ret(JSNormalizer& normalizer, const std::string& input)
+{
+    normalizer.rewind_output();
+    return normalizer.normalize(input.c_str(), input.size());
+}
+
 TEST_CASE("benchmarking - ::normalize() - literals", "[JSNormalizer]")
 {
     JSIdentifierCtxTest ident_ctx;
-    JSNormalizer normalizer(ident_ctx, UNLIM_DEPTH, MAX_TEMPLATE_NESTNIG);
+    JSNormalizer normalizer(ident_ctx, UNLIM_DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);
     char dst[DEPTH];
     auto whitespace = make_input("", " ", "", DEPTH);
     auto block_comment = make_input("/*", " ", "*/", DEPTH);
     auto double_quote = make_input("\"", " ", "\"", DEPTH);
+
     BENCHMARK("memcpy - whitespaces - 65535 bytes")
     {
         return memcpy(dst, whitespace.c_str(), whitespace.size());
     };
+
+    REQUIRE(norm_ret(normalizer, whitespace) == JSTokenizer::SCRIPT_ENDED);
     BENCHMARK("whitespaces - 65535 bytes")
     {
         normalizer.rewind_output();
         return normalizer.normalize(whitespace.c_str(), whitespace.size());
     };
+
+    REQUIRE(norm_ret(normalizer, block_comment) == JSTokenizer::SCRIPT_ENDED);
     BENCHMARK("block comment - 65535 bytes")
     {
         normalizer.rewind_output();
         return normalizer.normalize(block_comment.c_str(), block_comment.size());
     };
+
+    REQUIRE(norm_ret(normalizer, double_quote) == JSTokenizer::SCRIPT_ENDED);
     BENCHMARK("double quotes string - 65535 bytes")
     {
         normalizer.rewind_output();
@@ -2265,16 +3373,22 @@ TEST_CASE("benchmarking - ::normalize() - literals", "[JSNormalizer]")
     {
         return memcpy(dst, whitespace_8k.c_str(), whitespace_8k.size());
     };
+
+    REQUIRE(norm_ret(normalizer, whitespace_8k) == JSTokenizer::SCRIPT_ENDED);
     BENCHMARK("whitespaces - 8192 bytes")
     {
         normalizer.rewind_output();
         return normalizer.normalize(whitespace_8k.c_str(), whitespace_8k.size());
     };
+
+    REQUIRE(norm_ret(normalizer, block_comment_8k) == JSTokenizer::SCRIPT_ENDED);
     BENCHMARK("block comment - 8192 bytes")
     {
         normalizer.rewind_output();
         return normalizer.normalize(block_comment_8k.c_str(), block_comment_8k.size());
     };
+
+    REQUIRE(norm_ret(normalizer, double_quote_8k) == JSTokenizer::SCRIPT_ENDED);
     BENCHMARK("double quotes string - 8192 bytes")
     {
         normalizer.rewind_output();
@@ -2282,7 +3396,7 @@ TEST_CASE("benchmarking - ::normalize() - literals", "[JSNormalizer]")
     };
 }
 
-TEST_CASE("benchmarking - ::normalize() - identifiers")
+TEST_CASE("benchmarking - ::normalize() - identifiers", "[JSNormalizer]")
 {
     // around 11 000 identifiers
     std::string input;
@@ -2291,46 +3405,110 @@ TEST_CASE("benchmarking - ::normalize() - identifiers")
 
     input.resize(DEPTH - strlen(s_closing_tag));
     input.append(s_closing_tag, strlen(s_closing_tag));
-    const char* src = input.c_str();
-    size_t src_len = input.size();
 
     JSIdentifierCtxTest ident_ctx_mock;
-    JSNormalizer normalizer_wo_ident(ident_ctx_mock, UNLIM_DEPTH, MAX_TEMPLATE_NESTNIG);
+    JSNormalizer normalizer_wo_ident(ident_ctx_mock, UNLIM_DEPTH,
+        MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);
 
+    REQUIRE(norm_ret(normalizer_wo_ident, input) == JSTokenizer::SCRIPT_ENDED);
     BENCHMARK("without substitution")
     {
         normalizer_wo_ident.rewind_output();
-        return normalizer_wo_ident.normalize(src, src_len);
+        return normalizer_wo_ident.normalize(input.c_str(), input.size());
     };
 
-    JSIdentifierCtx ident_ctx(DEPTH);
-    JSNormalizer normalizer_w_ident(ident_ctx, UNLIM_DEPTH, MAX_TEMPLATE_NESTNIG);
+    const std::unordered_set<std::string> ids{};
+    JSIdentifierCtx ident_ctx(DEPTH, ids);
+    JSNormalizer normalizer_w_ident(ident_ctx, UNLIM_DEPTH, MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);
 
+    REQUIRE(norm_ret(normalizer_w_ident, input) == JSTokenizer::SCRIPT_ENDED);
     BENCHMARK("with substitution")
     {
         normalizer_w_ident.rewind_output();
-        return normalizer_w_ident.normalize(src, src_len);
+        return normalizer_w_ident.normalize(input.c_str(), input.size());
+    };
+
+    const std::unordered_set<std::string> ids_n { "n" };
+    JSIdentifierCtx ident_ctx_ids_n(DEPTH, ids_n);
+    JSNormalizer normalizer_built_ins(ident_ctx_ids_n, UNLIM_DEPTH,
+        MAX_TEMPLATE_NESTING, MAX_SCOPE_DEPTH);
+
+    REQUIRE(norm_ret(normalizer_built_ins, input) == JSTokenizer::SCRIPT_ENDED);
+    BENCHMARK("with built-ins")
+    {
+        normalizer_built_ins.rewind_output();
+        return normalizer_built_ins.normalize(input.c_str(), input.size());
+    };
+}
+
+TEST_CASE("benchmarking - ::normalize() - scope", "[JSNormalizer]")
+{
+    constexpr uint32_t depth = 65535;
+    JSIdentifierCtxTest ident_ctx;
+    JSNormalizer normalizer(ident_ctx, UNLIM_DEPTH, MAX_TEMPLATE_NESTING, depth);
+    char dst[depth];
+
+    auto src_ws = make_input("", " ", "", depth);
+    auto src_brace_rep = make_input_repeat("{}", depth);
+    auto src_paren_rep = make_input_repeat("()", depth);
+    auto src_bracket_rep = make_input_repeat("[]", depth);
+
+    BENCHMARK("memcpy - ...{}{}{}... - 65535 bytes")
+    {
+        return memcpy(dst, src_brace_rep.c_str(), src_brace_rep.size());
+    };
+
+    REQUIRE(norm_ret(normalizer, src_ws) == JSTokenizer::SCRIPT_ENDED);
+    BENCHMARK("whitespaces - 65535 bytes")
+    {
+        normalizer.rewind_output();
+        return normalizer.normalize(src_ws.c_str(), src_ws.size());
+    };
+
+    REQUIRE(norm_ret(normalizer, src_brace_rep) == JSTokenizer::SCRIPT_ENDED);
+    BENCHMARK("...{}{}{}... - 65535 bytes")
+    {
+        normalizer.rewind_output();
+        return normalizer.normalize(src_brace_rep.c_str(), src_brace_rep.size());
+    };
+
+    REQUIRE(norm_ret(normalizer, src_paren_rep) == JSTokenizer::SCRIPT_ENDED);
+    BENCHMARK("...()()()... - 65535 bytes")
+    {
+        normalizer.rewind_output();
+        return normalizer.normalize(src_paren_rep.c_str(), src_paren_rep.size());
+    };
+
+    REQUIRE(norm_ret(normalizer, src_bracket_rep) == JSTokenizer::SCRIPT_ENDED);
+    BENCHMARK("...[][][]... - 65535 bytes")
+    {
+        normalizer.rewind_output();
+        return normalizer.normalize(src_bracket_rep.c_str(), src_bracket_rep.size());
     };
 }
 
 TEST_CASE("benchmarking - ::normalize() - automatic semicolon insertion")
 {
-    auto w_semicolons = make_input("", "a;\n", s_closing_tag, DEPTH); 
-    auto wo_semicolons = make_input("", "a \n", s_closing_tag, DEPTH); 
+    auto w_semicolons = make_input("", "a;\n", "", DEPTH);
+    auto wo_semicolons = make_input("", "a \n", "", DEPTH);
     const char* src_w_semicolons = w_semicolons.c_str();
     const char* src_wo_semicolons = wo_semicolons.c_str();
     size_t src_len = w_semicolons.size();
 
     JSIdentifierCtxTest ident_ctx_mock;
-    JSNormalizer normalizer_wo_ident(ident_ctx_mock, UNLIM_DEPTH, MAX_TEMPLATE_NESTNIG);
+    JSNormalizer normalizer_wo_ident(ident_ctx_mock, UNLIM_DEPTH, MAX_TEMPLATE_NESTING, DEPTH);
 
+    REQUIRE(norm_ret(normalizer_wo_ident, w_semicolons) == JSTokenizer::SCRIPT_ENDED);
     BENCHMARK("without semicolon insertion")
     {
+        normalizer_wo_ident.rewind_output();
         return normalizer_wo_ident.normalize(src_w_semicolons, src_len);
     };
 
+    REQUIRE(norm_ret(normalizer_wo_ident, wo_semicolons) == JSTokenizer::SCRIPT_ENDED);
     BENCHMARK("with semicolon insertion")
     {
+        normalizer_wo_ident.rewind_output();
         return normalizer_wo_ident.normalize(src_wo_semicolons, src_len);
     };
 }