]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Pull request #3620: Move Enhanced JS Normalizer from NHI to a standalone component
authorSteve Chew (stechew) <stechew@cisco.com>
Thu, 17 Nov 2022 00:56:54 +0000 (00:56 +0000)
committerSteve Chew (stechew) <stechew@cisco.com>
Thu, 17 Nov 2022 00:56:54 +0000 (00:56 +0000)
Merge in SNORT/snort3 from ~OSERHIIE/snort3:js_module to master

Squashed commit of the following:

commit 2678dac41df3f2862e165ccce92ab70598dad0ff
Author: Oleksii Shumeiko <oshumeik@cisco.com>
Date:   Mon Oct 10 13:20:11 2022 +0300

    http_inspect: move Enhanced JS Normalizer from NHI to a standalone component

        * http_inspect: remove Enhanced JavaScript Normalizer from NHI
        * utils: move JavaScript Normalizer to js_norm component, including unit tests
        * js_norm: implement standalone Enhanced JavaScript Normalizer
        * ips_options: implement js_data IPS option
        * lua: remove default_http_inspect, add default_js_norm

60 files changed:
lua/snort.lua
lua/snort_defaults.lua
src/CMakeLists.txt
src/framework/inspector.h
src/ips_options/CMakeLists.txt
src/ips_options/dev_notes.txt
src/ips_options/ips_js_data.cc [new file with mode: 0644]
src/ips_options/ips_options.cc
src/js_norm/CMakeLists.txt [new file with mode: 0644]
src/js_norm/dev_notes.txt [new file with mode: 0644]
src/js_norm/js_config.h [new file with mode: 0644]
src/js_norm/js_enum.h [new file with mode: 0644]
src/js_norm/js_identifier_ctx.cc [moved from src/utils/js_identifier_ctx.cc with 93% similarity]
src/js_norm/js_identifier_ctx.h [moved from src/utils/js_identifier_ctx.h with 97% similarity]
src/js_norm/js_norm.cc [new file with mode: 0644]
src/js_norm/js_norm.h [new file with mode: 0644]
src/js_norm/js_norm_module.cc [new file with mode: 0644]
src/js_norm/js_norm_module.h [new file with mode: 0644]
src/js_norm/js_normalizer.cc [moved from src/utils/js_normalizer.cc with 92% similarity]
src/js_norm/js_normalizer.h [moved from src/utils/js_normalizer.h with 92% similarity]
src/js_norm/js_tokenizer.h [moved from src/utils/js_tokenizer.h with 97% similarity]
src/js_norm/js_tokenizer.l [moved from src/utils/js_tokenizer.l with 99% similarity]
src/js_norm/test/CMakeLists.txt [new file with mode: 0644]
src/js_norm/test/dev_notes.txt [moved from src/utils/test/dev_notes.txt with 96% similarity]
src/js_norm/test/js_dealias_test.cc [moved from src/utils/test/js_dealias_test.cc with 99% similarity]
src/js_norm/test/js_identifier_ctx_test.cc [moved from src/utils/test/js_identifier_ctx_test.cc with 99% similarity]
src/js_norm/test/js_norm_benchmark.cc [moved from src/utils/test/js_norm_benchmark.cc with 99% similarity]
src/js_norm/test/js_normalizer_test.cc [moved from src/utils/test/js_normalizer_test.cc with 99% similarity]
src/js_norm/test/js_test_options.cc [moved from src/utils/test/js_test_options.cc with 100% similarity]
src/js_norm/test/js_test_options.h [moved from src/utils/test/js_test_options.h with 97% similarity]
src/js_norm/test/js_test_stubs.cc [new file with mode: 0644]
src/js_norm/test/js_test_utils.cc [moved from src/utils/test/js_test_utils.cc with 94% similarity]
src/js_norm/test/js_test_utils.h [moved from src/utils/test/js_test_utils.h with 92% similarity]
src/js_norm/test/js_unescape_test.cc [moved from src/utils/test/js_unescape_test.cc with 99% similarity]
src/js_norm/test/jsn_test.cc [new file with mode: 0644]
src/main/modules.cc
src/main/policy.cc
src/main/policy.h
src/pub_sub/test/pub_sub_http_request_body_event_test.cc
src/service_inspectors/http_inspect/dev_notes_js_norm.txt
src/service_inspectors/http_inspect/dev_notes_test_tool.txt
src/service_inspectors/http_inspect/http_api.cc
src/service_inspectors/http_inspect/http_enum.h
src/service_inspectors/http_inspect/http_flow_data.cc
src/service_inspectors/http_inspect/http_flow_data.h
src/service_inspectors/http_inspect/http_inspect.cc
src/service_inspectors/http_inspect/http_js_norm.cc
src/service_inspectors/http_inspect/http_js_norm.h
src/service_inspectors/http_inspect/http_module.cc
src/service_inspectors/http_inspect/http_module.h
src/service_inspectors/http_inspect/http_msg_body.cc
src/service_inspectors/http_inspect/http_msg_body.h
src/service_inspectors/http_inspect/http_msg_request.cc
src/service_inspectors/http_inspect/http_msg_section.cc
src/service_inspectors/http_inspect/http_tables.cc
src/service_inspectors/http_inspect/ips_http_buffer.cc
src/service_inspectors/http_inspect/test/http_module_test.cc
src/service_inspectors/http_inspect/test/http_uri_norm_test.cc
src/utils/CMakeLists.txt
src/utils/test/CMakeLists.txt

index e2357a15090bb70fb81720f30b4d5fb26b2eacf2..efbd3f65efbfa811636baeb029bee70a3197a9a7 100644 (file)
@@ -84,13 +84,15 @@ ftp_server = default_ftp_server
 ftp_client = { }
 ftp_data = { }
 
-http_inspect = default_http_inspect
+http_inspect = { }
 http2_inspect = { }
 
 -- see file_magic.rules for file id rules
 file_id = { rules_file = 'file_magic.rules' }
 file_policy = { }
 
+js_norm = default_js_norm
+
 -- the following require additional configuration to be fully effective:
 
 appid =
index 721198ec10852e40638af76f7d825f8696d85e70..519d161a774949d2e931cf98821eeabe1c5913de 100644 (file)
@@ -1286,7 +1286,7 @@ default_js_norm_ident_ignore =
 
 default_js_norm_prop_ignore =
 {
-    -- Object 
+    -- Object
     'constructor', 'prototype', '__proto__', '__defineGetter__', '__defineSetter__',
     '__lookupGetter__', '__lookupSetter__', '__count__', '__noSuchMethod__', '__parent__',
     'hasOwnProperty', 'isPrototypeOf', 'propertyIsEnumerable', 'toLocaleString', 'toString',
@@ -1313,7 +1313,7 @@ default_js_norm_prop_ignore =
 
     -- Array
     'copyWithin', 'entries', 'every', 'fill', 'filter', 'find', 'findIndex', 'flat', 'flatMap',
-    'forEach', 'groupBy', 'groupByToMap', 'join', 'keys', 'map', 'pop',  'push', 'reduce', 
+    'forEach', 'groupBy', 'groupByToMap', 'join', 'keys', 'map', 'pop',  'push', 'reduce',
     'reduceRight', 'reverse', 'shift', 'unshift', 'some', 'sort', 'splice',
 
     -- Generator
@@ -1337,7 +1337,7 @@ default_js_norm_prop_ignore =
     'setCapture', 'setHTML', 'setPointerCapture', 'toggleAttribute',
 
     -- HTMLElement
-    'contentEditable', 'contextMenu', 'dataset', 'dir', 'enterKeyHint', 'hidden', 'inert', 
+    'contentEditable', 'contextMenu', 'dataset', 'dir', 'enterKeyHint', 'hidden', 'inert',
     'innerText', 'lang', 'nonce', 'outerText', 'style', 'tabIndex', 'title',
     'attachInternals',
 
@@ -1348,11 +1348,11 @@ default_js_norm_prop_ignore =
     'ExportStyle', 'callee'
 }
 
-default_http_inspect =
+default_js_norm =
 {
     -- params not specified here get internal defaults
-    js_norm_ident_ignore = default_js_norm_ident_ignore,
-    js_norm_prop_ignore = default_js_norm_prop_ignore,
+    ident_ignore = default_js_norm_ident_ignore,
+    prop_ignore = default_js_norm_prop_ignore,
 }
 
 ---------------------------------------------------------------------------
@@ -1374,8 +1374,7 @@ default_whitelist =
     ip_hi_dist icmp_low_sweep icmp_med_sweep icmp_hi_sweep
     default_hi_port_scan default_med_port_scan default_low_port_scan
     default_variables netflow_versions default_js_norm_ident_ignore
-    default_js_norm_prop_ignore default_http_inspect
+    default_js_norm_prop_ignore default_js_norm
 ]]
 
 snort_whitelist_append(default_whitelist)
-
index eea8341901b5616d103b30c30c729cdbf38be910..71ad64cae6edec95cc237cb7fee944a91748c37a 100644 (file)
@@ -94,6 +94,7 @@ add_subdirectory(filters)
 add_subdirectory(flow)
 add_subdirectory(framework)
 add_subdirectory(hash)
+add_subdirectory(js_norm)
 add_subdirectory(latency)
 add_subdirectory(log)
 add_subdirectory(main)
@@ -151,6 +152,7 @@ add_executable( snort
     $<TARGET_OBJECTS:host_tracker>
     $<TARGET_OBJECTS:ips_actions>
     $<TARGET_OBJECTS:ips_options>
+    $<TARGET_OBJECTS:js_norm>
     $<TARGET_OBJECTS:latency>
     $<TARGET_OBJECTS:log>
     $<TARGET_OBJECTS:loggers>
index bcd25be5e73452cac41fca555c9fb03e904573e1..2a7c77e319f32f39942670e826bfd9c30fdb06b1 100644 (file)
@@ -47,8 +47,8 @@ struct InspectionBuffer
 {
     enum Type
     {
-        // this is the only generic rule option
-        IBT_VBA,
+        // these are the only generic rule options
+        IBT_VBA, IBT_JS_DATA,
 
         // FIXIT-M all of these should be eliminated
         IBT_KEY, IBT_HEADER, IBT_BODY,
index bb51945d97b32ee8a0275cb6e0ea5dd7cfc20569..9667788995fe609ffe25933f481264e0ab27fb78 100644 (file)
@@ -62,6 +62,7 @@ set (IPS_SOURCES
     ips_flowbits.cc
     ips_flowbits.h
     ips_hash.cc
+    ips_js_data.cc
     ips_luajit.cc
     ips_metadata.cc
     ips_options.cc
index 8aa54518ea522bc693665945f2beeef3d7107a3a..b3efb7f99699b5e731588fab36d104b289dcc897 100644 (file)
@@ -30,4 +30,9 @@ for the "replace" content the rule will not match.
 - Only the first occurrence of the content will be replaced.
 - "replace" works for raw packets only. So, TCP data must either fit
 under the "pkt_data" buffer requirements or one should enable detection
-on TCP payload before reassembly: search_engine.detect_raw_tcp=true.
\ No newline at end of file
+on TCP payload before reassembly: search_engine.detect_raw_tcp=true.
+
+"js_data" option is used by Enhanced JavaScript Normalizer to access normalized data.
+It's implemented as a generic IPS buffer and follows the JIT approach.
+"js_data" buffer must be specified in the list of buffers available for a particular
+inspector.
diff --git a/src/ips_options/ips_js_data.cc b/src/ips_options/ips_js_data.cc
new file mode 100644 (file)
index 0000000..d8ed0d2
--- /dev/null
@@ -0,0 +1,128 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// ips_js_data.cc author Oleksandr Serhiienko <oserhiie@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "framework/cursor.h"
+#include "framework/inspector.h"
+#include "framework/ips_option.h"
+#include "framework/module.h"
+#include "profiler/profiler.h"
+#include "protocols/packet.h"
+
+using namespace snort;
+
+static constexpr const char* s_name = "js_data";
+static constexpr const char* s_help = "rule option to set detection cursor to normalized JavaScript data";
+
+static THREAD_LOCAL ProfileStats js_data_profile_stats;
+
+class JSDataModule : public Module
+{
+public:
+    JSDataModule() : Module(s_name, s_help) { }
+
+    Usage get_usage() const override
+    { return DETECT; }
+
+    ProfileStats* get_profile() const override
+    { return &js_data_profile_stats; }
+};
+
+class JSDataOption : public IpsOption
+{
+public:
+    JSDataOption() : IpsOption(s_name) { }
+
+    CursorActionType get_cursor_type() const override
+    { return CAT_SET_FAST_PATTERN; }
+
+    section_flags get_pdu_section(bool) const override
+    { return section_to_flag(PS_BODY); }
+
+    EvalStatus eval(Cursor& c, Packet* p) override
+    {
+        RuleProfile profile(js_data_profile_stats);
+        InspectionBuffer buf;
+
+        if (!p->flow or !p->flow->gadget)
+            return NO_MATCH;
+
+        if (p->flow->gadget->get_fp_buf(buf.IBT_JS_DATA, p, buf))
+        {
+            c.set(s_name, buf.data, buf.len);
+            return MATCH;
+        }
+
+        return NO_MATCH;
+    }
+};
+
+//-------------------------------------------------------------------------
+// api methods
+//-------------------------------------------------------------------------
+
+static Module* mod_ctor()
+{ return new JSDataModule; }
+
+static void mod_dtor(Module* m)
+{ delete m; }
+
+static IpsOption* js_data_ctor(Module*, OptTreeNode*)
+{ return new JSDataOption; }
+
+static void js_data_dtor(IpsOption* opt)
+{ delete opt; }
+
+static const IpsApi js_data_api =
+{
+    {
+        PT_IPS_OPTION,
+        sizeof(IpsApi),
+        IPSAPI_VERSION,
+        0,
+        API_RESERVED,
+        API_OPTIONS,
+        s_name,
+        s_help,
+        mod_ctor,
+        mod_dtor
+    },
+    OPT_TYPE_DETECTION,
+    0, PROTO_BIT__TCP,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    js_data_ctor,
+    js_data_dtor,
+    nullptr
+};
+
+#ifdef BUILDING_SO
+SO_PUBLIC const BaseApi* snort_plugins[] =
+#else
+const BaseApi* ips_js_data[] =
+#endif
+{
+    &js_data_api.base,
+    nullptr
+};
index 5886c0a7474ca7a624913cbf098c494f253025d2..a7ab39d8e6fdbf750b34a9dd363434a394839ea9 100644 (file)
@@ -35,6 +35,7 @@ extern const BaseApi* ips_file_data;
 extern const BaseApi* ips_file_meta;
 extern const BaseApi* ips_flow;
 extern const BaseApi* ips_flowbits;
+extern const BaseApi* ips_js_data;
 extern const BaseApi* ips_md5;
 extern const BaseApi* ips_metadata;
 extern const BaseApi* ips_pkt_data;
@@ -103,6 +104,7 @@ static const BaseApi* ips_options[] =
     ips_file_meta,
     ips_flow,
     ips_flowbits,
+    ips_js_data,
     ips_md5,
     ips_metadata,
     ips_pkt_data,
diff --git a/src/js_norm/CMakeLists.txt b/src/js_norm/CMakeLists.txt
new file mode 100644 (file)
index 0000000..30a376c
--- /dev/null
@@ -0,0 +1,24 @@
+FLEX_TARGET ( js_tokenizer
+    ${CMAKE_CURRENT_SOURCE_DIR}/js_tokenizer.l
+    ${CMAKE_CURRENT_BINARY_DIR}/js_tokenizer.cc
+    COMPILE_FLAGS ${FLEX_FLAGS}
+)
+
+set ( JS_SOURCES
+    ${FLEX_js_tokenizer_OUTPUTS}
+    js_config.h
+    js_enum.h
+    js_identifier_ctx.cc
+    js_identifier_ctx.h
+    js_norm.cc
+    js_norm.h
+    js_norm_module.cc
+    js_norm_module.h
+    js_normalizer.cc
+    js_normalizer.h
+    js_tokenizer.h
+)
+
+add_library(js_norm OBJECT ${JS_SOURCES})
+
+add_subdirectory(test)
diff --git a/src/js_norm/dev_notes.txt b/src/js_norm/dev_notes.txt
new file mode 100644 (file)
index 0000000..6196aab
--- /dev/null
@@ -0,0 +1,131 @@
+Enhanced JavaScript Normalizer is a stateful JavaScript whitespace and identifiers normalizer.
+JSNorm is a basic implementation, so other modules can use it right away or provide some
+customization to it. Normalizer will remove all extraneous whitespace and newlines, keeping a
+single space where  syntactically necessary. Comments will be removed, but contents of string
+literals will be kept intact. Any string literals, added by the plus operator, will be concatenated.
+This also works for functions that result in string literals. Semicolons will be inserted, if not
+already present, according to ECMAScript automatic semicolon insertion rules.
+
+All JavaScript identifier names, except those from the ident_ignore or prop_ignore lists,
+will be substituted with unified names in the following format: var_0000 -> var_ffff.
+So, the number of unique identifiers available is 65536 names per transaction.
+If Normalizer overruns the configured limit, built-in alert is generated.
+
+A config option to set the limit manually:
+ * js_norm.identifier_depth.
+
+Identifiers from the ident_ignore list will be placed as is, without substitution. Starting with 
+the listed identifier, any chain of dot accessors, brackets and function calls will be kept
+intact.
+For example:
+ * console.log("bar")
+ * document.getElementById("id").text
+ * eval("script")
+ * foo["bar"]
+
+Ignored identifiers are configured via the following config option,
+it accepts a list of object and function names:
+ * js_norm.ident_ignore = { 'console', 'document', 'eval', 'foo' }
+
+When a variable assignment that 'aliases' an identifier from the list is found,
+the assignment will be tracked, and subsequent occurrences of the variable will be
+replaced with the stored value. This substitution will follow JavaScript variable scope 
+limits.
+
+For example:
+
+    var a = console.log
+    a("hello")  // will be substituted to 'console.log("hello")'
+    a.foo.bar() // will be normalized as 'console.log.foo.bar()'. When variable is 'de-aliased',
+                // following identifiers are not normalized, just like identifiers from ident_ignore
+
+When an object is created using a 'new' keyword, and the class/constructor is found in ident_ignore
+list, the object will be tracked, and although its own identifier will be converted to normal form
+its property and function calls will be kept intact, as with ignored identifiers. 
+
+For example:
+    var obj = new Array()
+    obj.insert(1,2,3) // will be normalized to var_0000.insert(1,2,3)
+
+For properties and methods of objects that can be created implicitly, there is a
+prop_ignore list. All names in the call chain after the first property or
+method from the list has been occurred will not be normalized.
+
+Note that identifiers are normalized by name, i.e. an identifier and a property with the same name
+will be normalized to the same value. However, the ignore lists act separately on identifiers
+and properties.
+
+For example:
+
+   js_norm.prop_ignore = { 'split' }
+
+   in: "string".toUpperCase().split("").reverse().join("");
+   out: "string".var_0000().split("").reverse().join("");
+
+In addition to the scope tracking, JS Normalizer specifically tracks unescape-like JavaScript
+functions (unescape, decodeURI, decodeURIComponent, String.fromCharCode, String.fromCodePoint).
+This allows detection of unescape functions nested within other unescape functions, which is
+a potential indicator of a multilevel obfuscation. The definition of a function call depends on
+identifier substitution, so such identifiers must be included in the ignore list in
+order to use this feature. After determining the unescape sequence, it is decoded into the
+corresponding string, and the name of unescape function will not be present in the output.
+Single-byte escape sequences within the string and template literals which are arguments of
+unescape, decodeURI and decodeURIComponent functions will be decoded according to ISO/IEC 8859-1
+(Latin-1) charset. Except these cases, escape sequences and code points will be decoded to UTF-8
+format.
+
+For example:
+
+   unescape('\u0062\u0061\u0072')              -> 'bar'
+   decodeURI('%62%61%72')                      -> 'bar'
+   decodeURIComponent('\x62\x61\x72')          -> 'bar'
+   String.fromCharCode(98, 0x0061, 0x72)       -> 'bar'
+   String.fromCodePoint(65600, 65601, 0x10042) -> '𐁀𐁁𐁂'
+
+Supported formats follow
+
+   \xXX
+   \uXXXX
+   \u{XXXX}
+   %XX
+   \uXX
+   %uXXXX
+   decimal code point
+   hexadecimal code point
+
+JS Normalizer is able to decode mixed encoding sequences. However, a built-in alert rises
+in such case.
+
+JS Normalizer's syntax parser follows ECMA-262 standard. For various features,
+tracking of variable scope and individual brackets is done in accordance to the standard.
+Additionally, Normalizer enforces standard limits on HTML content in JavaScript:
+ * no nesting tags allowed, i.e. two opening tags in a row
+ * script closing tag is not allowed in string literals, block comments, regular expression literals, etc.
+
+If source JavaScript is syntactically incorrect (containing a bad token, brackets mismatch,
+HTML-tags, etc) Normalizer fires corresponding built-in rule and abandons the current script,
+though the already-processed data remains in the output buffer.
+
+Enhanced JavaScript Normalizer has some trace messages available. Trace options follow:
+
+* trace.module.js_norm.proc turns on messages from script processing flow.
++
+Verbosity levels:
++
+1. Script opening tag detected (available in release build)
+2. Attributes of detected script (available in release build)
+3. Normalizer return code (available in release build)
+4. Contexts management (debug build only)
+5. Parser states (debug build only)
+6. Input stream states (debug build only)
+
+* trace.module.js_norm.dump dumps JavaScript data from processing layers.
++
+Verbosity levels:
++
+1. js_data buffer as it is being passed to detection (available in release build)
+2. (no messages available currently)
+3. Payload passed to Normalizer (available in release build)
+4. Temporary buffer (debug build only)
+5. Matched token (debug build only)
+6. Identifier substitution (debug build only)
diff --git a/src/js_norm/js_config.h b/src/js_norm/js_config.h
new file mode 100644 (file)
index 0000000..5cb438b
--- /dev/null
@@ -0,0 +1,37 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_config.h author Danylo Kyrylov <dkyrylov@cisco.com>
+
+#ifndef JS_CONFIG_H
+#define JS_CONFIG_H
+
+#include <string>
+#include <unordered_set>
+
+struct JSNormConfig
+{
+    int64_t bytes_depth = -1;
+    int32_t identifier_depth = 0xffff;
+    uint8_t max_template_nesting = 32;
+    uint32_t max_bracket_depth = 256;
+    uint32_t max_scope_depth = 256;
+    std::unordered_set<std::string> ignored_ids;
+    std::unordered_set<std::string> ignored_props;
+};
+
+#endif
diff --git a/src/js_norm/js_enum.h b/src/js_norm/js_enum.h
new file mode 100644 (file)
index 0000000..8f25b66
--- /dev/null
@@ -0,0 +1,65 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_enum.h authors Danylo Kyrylov <dkyrylov@cisco.com>, Oleksandr Serhiienko <oserhiie@cisco.com>
+
+#ifndef JS_ENUM_H
+#define JS_ENUM_H
+
+#include "utils/event_gen.h"
+
+namespace jsn
+{
+
+static constexpr unsigned js_gid = 154;
+
+enum
+{
+    TRACE_PROC = 0,
+    TRACE_DUMP
+};
+
+// This enum must be synchronized with JSNormModule::peg_names[] in js_norm_module.cc
+enum PEG_COUNT
+{
+    PEG_BYTES = 0,
+    PEG_IDENTIFIERS,
+    PEG_IDENTIFIER_OVERFLOWS,
+    PEG_COUNT_MAX
+};
+
+// This enum must be synchronized with JSNormModule::events[] in js_norm_module.cc
+enum EventSid
+{
+    EVENT__NONE = -1,
+    EVENT_NEST_UNESCAPE_FUNC = 1,
+    EVENT_MIXED_UNESCAPE_SEQUENCE = 2,
+    EVENT_BAD_TOKEN = 3,
+    EVENT_OPENING_TAG = 4,
+    EVENT_CLOSING_TAG = 5,
+    EVENT_IDENTIFIER_OVERFLOW = 6,
+    EVENT_BRACKET_NEST_OVERFLOW = 7,
+    EVENT_DATA_LOST = 8,
+    EVENT_SCOPE_NEST_OVERFLOW = 9,
+    EVENT__MAX_VALUE
+};
+
+}
+
+using JSEvents = EventGen<jsn::EVENT__MAX_VALUE, jsn::EVENT__NONE, jsn::js_gid>;
+
+#endif
similarity index 93%
rename from src/utils/js_identifier_ctx.cc
rename to src/js_norm/js_identifier_ctx.cc
index c5d0478a4f24e1642a6977182571dae6bc94d385..bc486d5761ba1cb0bae357ed9a77ac08cff27c49 100644 (file)
 #include <cassert>
 #include <memory.h>
 
-#if !defined(CATCH_TEST_BUILD) && !defined(BENCHMARK_TEST)
-#include "service_inspectors/http_inspect/http_enum.h"
-#include "service_inspectors/http_inspect/http_module.h"
-#else
-namespace HttpEnums
-{
-enum PEG_COUNT
-{
-    PEG_JS_IDENTIFIER
-};
-}
+#include "js_enum.h"
+#include "js_norm_module.h"
 
-class HttpModule
-{
-public:
-    static void increment_peg_counts(HttpEnums::PEG_COUNT) {}
-};
-#endif // CATCH_TEST_BUILD
+using namespace jsn;
 
 #define NORM_NAME_SIZE 9 // size of the normalized form plus null symbol
 #define NORM_NAME_CNT 65536
@@ -80,7 +66,7 @@ static int _init_norm_names __attribute__((unused)) = (static_cast<void>(init_no
 JSIdentifierCtx::JSIdentifierCtx(int32_t depth, uint32_t max_scope_depth,
     const std::unordered_set<std::string>& ignored_ids_list,
     const std::unordered_set<std::string>& ignored_props_list)
-    : ignored_ids_list(ignored_ids_list), ignored_props_list(ignored_props_list), 
+    : ignored_ids_list(ignored_ids_list), ignored_props_list(ignored_props_list),
     max_scope_depth(max_scope_depth)
 {
     norm_name = norm_names;
@@ -133,7 +119,7 @@ const char* JSIdentifierCtx::acquire_norm_name(NormId& id)
 
     auto n = norm_name;
     norm_name += NORM_NAME_SIZE;
-    HttpModule::increment_peg_counts(HttpEnums::PEG_JS_IDENTIFIER);
+    JSNormModule::increment_peg_counts(PEG_IDENTIFIERS);
 
     if (id.prop_name || id.id_name)
     {
similarity index 97%
rename from src/utils/js_identifier_ctx.h
rename to src/js_norm/js_identifier_ctx.h
index 4d648f27e0af11123e55453eba72948c09ff167e..6092d1bea8fc96d1b62c4688ea54d32127a08adb 100644 (file)
@@ -26,6 +26,9 @@
 #include <unordered_set>
 #include <vector>
 
+namespace jsn
+{
+
 enum JSProgramScopeType : unsigned int
 {
     GLOBAL = 0,     // the global scope (the initial one)
@@ -34,10 +37,10 @@ enum JSProgramScopeType : unsigned int
     PROG_SCOPE_TYPE_MAX
 };
 
-class JSIdentifierCtxBase
+class JSIdentifier
 {
 public:
-    virtual ~JSIdentifierCtxBase() = default;
+    virtual ~JSIdentifier() = default;
 
     virtual const char* substitute(const char* identifier, bool is_property) = 0;
     virtual void add_alias(const char* alias, const std::string&& value) = 0;
@@ -52,7 +55,7 @@ public:
     virtual size_t size() const = 0;
 };
 
-class JSIdentifierCtx : public JSIdentifierCtxBase
+class JSIdentifierCtx : public JSIdentifier
 {
 public:
     JSIdentifierCtx(int32_t depth, uint32_t max_scope_depth,
@@ -83,7 +86,7 @@ private:
         const char* prop_name = nullptr;
         uint8_t type = 0;
     };
-    
+
     using Alias = std::vector<std::string>;
     using AliasRef = std::list<Alias*>;
     using AliasMap = std::unordered_map<std::string, Alias>;
@@ -136,4 +139,6 @@ public:
 #endif // CATCH_TEST_BUILD
 };
 
+}
+
 #endif // JS_IDENTIFIER_CTX
diff --git a/src/js_norm/js_norm.cc b/src/js_norm/js_norm.cc
new file mode 100644 (file)
index 0000000..e21ba8b
--- /dev/null
@@ -0,0 +1,185 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_norm.cc author Cisco
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "js_norm.h"
+
+#include "js_identifier_ctx.h"
+#include "js_normalizer.h"
+
+#include "js_norm_module.h"
+
+using namespace jsn;
+using namespace snort;
+
+static const char* jsret_codes[] =
+{
+    "end of stream",
+    "script ended",
+    "script continues",
+    "closing tag",
+    "bad token",
+    "identifier overflow",
+    "template nesting overflow",
+    "bracket nesting overflow",
+    "scope nesting overflow",
+    "wrong closing symbol",
+    "ended in inner scope",
+    "unknown"
+};
+
+const char* jsn::ret2str(int r)
+{
+    JSTokenizer::JSRet ret = (JSTokenizer::JSRet)r;
+
+    assert(ret < JSTokenizer::JSRet::MAX);
+    assert(ret < sizeof(jsret_codes)/sizeof(jsret_codes[0]));
+
+    ret = ret < JSTokenizer::JSRet::MAX ? ret : JSTokenizer::JSRet::MAX;
+
+    return jsret_codes[ret];
+}
+
+JSNorm::JSNorm(JSNormConfig* jsn_config, bool ext_script_type) :
+    alive(true), pdu_cnt(0), src_ptr(nullptr), src_end(nullptr),
+    idn_ctx(nullptr), jsn_ctx(nullptr), ext_script_type(ext_script_type)
+{
+    config = jsn_config;
+    alive = (bool)config;
+
+    if (!alive)
+        return;
+
+    idn_ctx = new JSIdentifierCtx(config->identifier_depth,
+        config->max_scope_depth, config->ignored_ids, config->ignored_props);
+    jsn_ctx = new JSNormalizer(*idn_ctx, config->bytes_depth,
+        config->max_template_nesting, config->max_bracket_depth);
+
+    debug_log(4, js_trace, TRACE_PROC, nullptr, "context created\n");
+}
+
+JSNorm::~JSNorm()
+{
+    delete idn_ctx;
+    delete jsn_ctx;
+
+    debug_log(4, js_trace, TRACE_PROC, nullptr, "context deleted\n");
+}
+
+void JSNorm::normalize(const void* in_data, size_t in_len, const void*& data, size_t& len)
+{
+    if (!alive)
+    {
+        len = 0;
+        data = nullptr;
+        return;
+    }
+
+    if (pdu_cnt > 2)
+    {
+        len = 0;
+        data = nullptr;
+        return;
+    }
+    if (pdu_cnt > 1)
+    {
+        events.create_event(EVENT_DATA_LOST);
+        ++pdu_cnt;
+        len = 0;
+        data = nullptr;
+        return;
+    }
+    pdu_cnt = 0;
+
+    src_ptr = (const uint8_t*)in_data;
+    src_end = src_ptr + in_len;
+
+    while (alive and pre_proc())
+    {
+        auto ret = jsn_ctx->normalize((const char*)src_ptr, src_end - src_ptr, ext_script_type);
+        const uint8_t* next = (const uint8_t*)jsn_ctx->get_src_next();
+
+        JSNormModule::increment_peg_counts(PEG_BYTES, next - src_ptr);
+        src_ptr = next;
+
+        alive = post_proc(ret);
+    }
+
+    len = jsn_ctx->script_size();
+    data = jsn_ctx->get_script();
+}
+
+bool JSNorm::pre_proc()
+{
+    return src_ptr < src_end;
+}
+
+bool JSNorm::post_proc(int ret)
+{
+    if (jsn_ctx->is_unescape_nesting_seen())
+        events.create_event(EVENT_NEST_UNESCAPE_FUNC);
+
+    if (jsn_ctx->is_mixed_encoding_seen())
+        events.create_event(EVENT_MIXED_UNESCAPE_SEQUENCE);
+
+    if (jsn_ctx->is_opening_tag_seen())
+        events.create_event(EVENT_OPENING_TAG);
+
+    if (jsn_ctx->is_closing_tag_seen())
+        events.create_event(EVENT_CLOSING_TAG);
+
+    switch ((JSTokenizer::JSRet)ret)
+    {
+    case JSTokenizer::EOS:
+    case JSTokenizer::SCRIPT_ENDED:
+    case JSTokenizer::SCRIPT_CONTINUE:
+        return true;
+
+    case JSTokenizer::CLOSING_TAG:
+        events.create_event(EVENT_CLOSING_TAG);
+        return false;
+
+    case JSTokenizer::BAD_TOKEN:
+    case JSTokenizer::WRONG_CLOSING_SYMBOL:
+    case JSTokenizer::ENDED_IN_INNER_SCOPE:
+        events.create_event(EVENT_BAD_TOKEN);
+        return false;
+
+    case JSTokenizer::IDENTIFIER_OVERFLOW:
+        JSNormModule::increment_peg_counts(PEG_IDENTIFIER_OVERFLOWS);
+        events.create_event(EVENT_IDENTIFIER_OVERFLOW);
+        return false;
+
+    case JSTokenizer::TEMPLATE_NESTING_OVERFLOW:
+    case JSTokenizer::BRACKET_NESTING_OVERFLOW:
+        events.create_event(EVENT_BRACKET_NEST_OVERFLOW);
+        return false;
+
+    case JSTokenizer::SCOPE_NESTING_OVERFLOW:
+        events.create_event(EVENT_SCOPE_NEST_OVERFLOW);
+        return false;
+
+    default:
+        assert(false);
+        return false;
+    }
+}
diff --git a/src/js_norm/js_norm.h b/src/js_norm/js_norm.h
new file mode 100644 (file)
index 0000000..654faa7
--- /dev/null
@@ -0,0 +1,71 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_norm.h author Cisco
+
+#ifndef JS_NORM_H
+#define JS_NORM_H
+
+#include "utils/event_gen.h"
+
+#include "js_config.h"
+#include "js_enum.h"
+
+namespace jsn
+{
+class JSIdentifier;
+class JSNormalizer;
+
+const char* ret2str(int);
+}
+
+namespace snort
+{
+
+class JSNorm
+{
+public:
+    JSNorm(JSNormConfig*, bool ext_script_type = false);
+    JSNorm(const JSNorm&) = delete;
+    virtual ~JSNorm();
+
+    void tick()
+    { ++pdu_cnt; }
+
+    void normalize(const void*, size_t, const void*&, size_t&);
+
+protected:
+    virtual bool pre_proc();
+    virtual bool post_proc(int);
+
+    bool alive;
+    uint32_t pdu_cnt;
+
+    const uint8_t* src_ptr;
+    const uint8_t* src_end;
+
+    jsn::JSIdentifier* idn_ctx;
+    jsn::JSNormalizer* jsn_ctx;
+    bool ext_script_type;
+
+    JSEvents events;
+    JSNormConfig* config;
+};
+
+}
+
+#endif
diff --git a/src/js_norm/js_norm_module.cc b/src/js_norm/js_norm_module.cc
new file mode 100644 (file)
index 0000000..af573ba
--- /dev/null
@@ -0,0 +1,179 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_norm_module.cc author Danylo Kyrylov <dkyrylov@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "js_norm_module.h"
+
+#include "trace/trace.h"
+
+#include "js_config.h"
+#include "js_enum.h"
+
+using namespace jsn;
+using namespace snort;
+
+static constexpr char s_name[] = "js_norm";
+static constexpr char s_help[] = "JavaScript normalizer";
+
+THREAD_LOCAL const Trace* js_trace = nullptr;
+
+THREAD_LOCAL PegCount JSNormModule::peg_counts[PEG_COUNT_MAX] = {};
+THREAD_LOCAL ProfileStats JSNormModule::profile_stats;
+
+static const Parameter ident_ignore_param[] =
+{
+    { "ident_name", Parameter::PT_STRING, nullptr, nullptr, "name of the identifier to ignore" },
+    { nullptr, Parameter::PT_MAX, nullptr, nullptr, nullptr }
+};
+
+static const Parameter prop_ignore_param[] =
+{
+    { "prop_name", Parameter::PT_STRING, nullptr, nullptr, "name of the object property to ignore" },
+    { nullptr, Parameter::PT_MAX, nullptr, nullptr, nullptr }
+};
+
+const Parameter JSNormModule::params[] =
+{
+    { "bytes_depth", Parameter::PT_INT, "-1:max53", "-1",
+      "number of input JavaScript bytes to normalize (-1 unlimited)" },
+
+    { "identifier_depth", Parameter::PT_INT, "0:65536", "65536",
+      "max number of unique JavaScript identifiers to normalize" },
+
+    { "max_tmpl_nest", Parameter::PT_INT, "0:255", "32",
+      "maximum depth of template literal nesting that enhanced JavaScript normalizer will process" },
+
+    { "max_bracket_depth", Parameter::PT_INT, "1:65535", "256",
+      "maximum depth of bracket nesting that enhanced JavaScript normalizer will process" },
+
+    { "max_scope_depth", Parameter::PT_INT, "1:65535", "256",
+      "maximum depth of scope nesting that enhanced JavaScript normalizer will process" },
+
+    { "ident_ignore", Parameter::PT_LIST, ident_ignore_param, nullptr,
+      "list of JavaScript ignored identifiers which will not be normalized" },
+
+    { "prop_ignore", Parameter::PT_LIST, prop_ignore_param, nullptr,
+      "list of JavaScript ignored object properties which will not be normalized" },
+
+    { nullptr, Parameter::PT_MAX, nullptr, nullptr, nullptr }
+};
+
+static const TraceOption trace_options[] =
+{
+    { "proc",  TRACE_PROC,  "enable processing logging" },
+    { "dump",  TRACE_DUMP,  "enable data logging" },
+    { nullptr, 0, nullptr }
+};
+
+const PegInfo JSNormModule::peg_names[PEG_COUNT_MAX + 1] =
+{
+    { CountType::SUM, "bytes", "total number of bytes processed" },
+    { CountType::SUM, "identifiers", "total number of unique identifiers processed" },
+    { CountType::SUM, "identifier_overflows", "total number of unique identifier limit overflows" },
+    { CountType::END, nullptr, nullptr }
+};
+
+const RuleMap JSNormModule::events[] =
+{
+    { EVENT_NEST_UNESCAPE_FUNC, "nested unescape functions" },
+    { EVENT_MIXED_UNESCAPE_SEQUENCE, "mixed unescape sequence" },
+    { EVENT_BAD_TOKEN, "bad token" },
+    { EVENT_OPENING_TAG, "unexpected HTML script opening tag" },
+    { EVENT_CLOSING_TAG, "unexpected HTML script closing tag" },
+    { EVENT_IDENTIFIER_OVERFLOW, "max number of unique identifiers reached" },
+    { EVENT_BRACKET_NEST_OVERFLOW, "excessive bracket nesting" },
+    { EVENT_DATA_LOST, "data gaps during normalization" },
+    { EVENT_SCOPE_NEST_OVERFLOW, "excessive scope nesting" },
+    { 0, nullptr }
+};
+
+JSNormModule::JSNormModule() : Module(s_name, s_help, params), config(nullptr)
+{ }
+
+JSNormModule::~JSNormModule()
+{ }
+
+bool JSNormModule::begin(const char* fqn, int, SnortConfig*)
+{
+    if (strcmp(s_name, fqn))
+        return true;
+
+    auto policy = get_inspection_policy();
+    assert(policy);
+
+    delete policy->jsn_config;
+    policy->jsn_config = new JSNormConfig;
+    config = policy->jsn_config;
+
+    return true;
+}
+
+bool JSNormModule::set(const char*, Value& v, SnortConfig*)
+{
+    assert(config);
+
+    if (v.is("bytes_depth"))
+    {
+        config->bytes_depth = v.get_int64();
+    }
+    else if (v.is("identifier_depth"))
+    {
+        config->identifier_depth = v.get_int32();
+    }
+    else if (v.is("max_tmpl_nest"))
+    {
+        config->max_template_nesting = v.get_uint8();
+    }
+    else if (v.is("max_bracket_depth"))
+    {
+        config->max_bracket_depth = v.get_uint32();
+    }
+    else if (v.is("max_scope_depth"))
+    {
+        config->max_scope_depth = v.get_uint32();
+    }
+    else if (v.is("ident_name"))
+    {
+        config->ignored_ids.insert(v.get_string());
+    }
+    else if (v.is("prop_name"))
+    {
+        config->ignored_props.insert(v.get_string());
+    }
+
+    return true;
+}
+
+void JSNormModule::set_trace(const Trace* trace) const
+{
+    js_trace = trace;
+}
+
+const TraceOption* JSNormModule::get_trace_options() const
+{
+    return trace_options;
+}
+
+unsigned JSNormModule::get_gid() const
+{
+    return js_gid;
+}
diff --git a/src/js_norm/js_norm_module.h b/src/js_norm/js_norm_module.h
new file mode 100644 (file)
index 0000000..cd7d840
--- /dev/null
@@ -0,0 +1,86 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_norm_module.h author Danylo Kyrylov <dkyrylov@cisco.com>
+
+#ifndef JS_NORM_MODULE_H
+#define JS_NORM_MODULE_H
+
+#include "framework/module.h"
+#include "main/policy.h"
+#include "profiler/profiler.h"
+
+#include "js_config.h"
+#include "js_enum.h"
+
+namespace snort
+{
+class Trace;
+}
+
+extern THREAD_LOCAL const snort::Trace* js_trace;
+
+class JSNormModule : public snort::Module
+{
+public:
+    JSNormModule();
+    ~JSNormModule() override;
+
+    bool begin(const char*, int, snort::SnortConfig*) override;
+    bool set(const char*, snort::Value&, snort::SnortConfig*) override;
+
+    void set_trace(const snort::Trace*) const override;
+    const snort::TraceOption* get_trace_options() const override;
+
+    unsigned get_gid() const override;
+
+    const snort::RuleMap* get_rules() const override
+    { return events; }
+
+    const PegInfo* get_pegs() const override
+    { return peg_names; }
+
+    PegCount* get_counts() const override
+    { return peg_counts; }
+
+    snort::ProfileStats* get_profile() const override
+    { return &profile_stats; }
+
+    Usage get_usage() const override
+    { return INSPECT; }
+
+    static void increment_peg_counts(jsn::PEG_COUNT counter)
+    { peg_counts[counter]++; }
+
+    static void increment_peg_counts(jsn::PEG_COUNT counter, uint64_t value)
+    { peg_counts[counter] += value; }
+
+    static PegCount get_peg_counts(jsn::PEG_COUNT counter)
+    { return peg_counts[counter]; }
+
+private:
+    static const snort::Parameter params[];
+    static const snort::RuleMap events[];
+    static const PegInfo peg_names[];
+
+    static THREAD_LOCAL PegCount peg_counts[];
+    static THREAD_LOCAL snort::ProfileStats profile_stats;
+
+    JSNormConfig* config;
+};
+
+#endif
similarity index 92%
rename from src/utils/js_normalizer.cc
rename to src/js_norm/js_normalizer.cc
index d2ae20955c5bbc54be4a243b7f0772132c80a7d6..5407ee2ba57b86ca5be111a16bcb564e0052cad0 100644 (file)
 
 #include "js_normalizer.h"
 
+#include "js_norm/js_enum.h"
+
 #define BUFF_EXP_FACTOR 1.3
 
-using namespace snort;
+using namespace jsn;
 using namespace std;
 
-JSNormalizer::JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t norm_depth,
+JSNormalizer::JSNormalizer(JSIdentifier& js_ident_ctx, size_t norm_depth,
     uint8_t max_template_nesting, uint32_t max_bracket_depth, int tmp_cap_size)
     : depth(norm_depth),
       rem_bytes(norm_depth),
@@ -63,14 +65,14 @@ JSTokenizer::JSRet JSNormalizer::normalize(const char* src, size_t src_len, bool
 
     if (rem_bytes == 0)
     {
-        debug_log(5, http_trace, TRACE_JS_PROC, nullptr,
+        debug_log(5, js_trace, TRACE_PROC, nullptr,
             "depth limit reached\n");
 
         src_next = src + src_len;
         return JSTokenizer::EOS;
     }
 
-    debug_logf(4, http_trace, TRACE_JS_DUMP, nullptr,
+    debug_logf(4, js_trace, TRACE_DUMP, nullptr,
         "tmp buffer[%zu]: %.*s\n", tmp_buf_size, static_cast<int>(tmp_buf_size), tmp_buf);
 
     src_len = min(src_len, rem_bytes);
similarity index 92%
rename from src/utils/js_normalizer.h
rename to src/js_norm/js_normalizer.h
index 5de32aee832b792c37c12009b04a9044af085ebe..2ad8b45c26da6d53333b4b51b253a3bc9e41d19c 100644 (file)
 #include <FlexLexer.h>
 
 #include "js_tokenizer.h"
-#include "streambuf.h"
 
-namespace snort
+#include "utils/streambuf.h"
+
+namespace jsn
 {
 
 class JSNormalizer
 {
 public:
-    JSNormalizer(JSIdentifierCtxBase& js_ident_ctx, size_t depth,
+    JSNormalizer(JSIdentifier& js_ident_ctx, size_t depth,
         uint8_t max_template_nesting, uint32_t max_bracket_depth,
         int tmp_cap_size = JSTOKENIZER_BUF_MAX_SIZE);
     ~JSNormalizer();
@@ -56,9 +57,6 @@ public:
     size_t script_size()
     { return out.tellp(); }
 
-    static size_t size()
-    { return sizeof(JSNormalizer) + 16834; /* YY_BUF_SIZE */ }
-
     bool is_unescape_nesting_seen() const
     { return tokenizer.is_unescape_nesting_seen(); }
 
@@ -97,8 +95,8 @@ private:
     char* tmp_buf;
     size_t tmp_buf_size;
 
-    istreambuf_glue in_buf;
-    ostreambuf_infl out_buf;
+    snort::istreambuf_glue in_buf;
+    snort::ostreambuf_infl out_buf;
     std::istream in;
     std::ostream out;
     JSTokenizer tokenizer;
similarity index 97%
rename from src/utils/js_tokenizer.h
rename to src/js_norm/js_tokenizer.h
index a133074093de3ce42b8247a5afa7a386fdb4b022..f5c97de0f09ca68fe67850f7b40c810da1428699 100644 (file)
 #include <vector>
 
 #include "log/messages.h"
-#include "service_inspectors/http_inspect/http_enum.h"
 #include "trace/trace_api.h"
 
-extern THREAD_LOCAL const snort::Trace* http_trace;
+extern THREAD_LOCAL const snort::Trace* js_trace;
 
 // The longest pattern has 9 characters " < / s c r i p t > ",
 // 8 of them can reside in 1st chunk
@@ -40,9 +39,12 @@ extern THREAD_LOCAL const snort::Trace* http_trace;
 // To hold potentially long identifiers
 #define JSTOKENIZER_BUF_MAX_SIZE 256
 
+namespace jsn
+{
+
 enum JSProgramScopeType : unsigned int;
 
-class JSIdentifierCtxBase;
+class JSIdentifier;
 #if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST)
 class JSTokenizerTester;
 class JSTestConfig;
@@ -166,7 +168,7 @@ public:
     };
 
     JSTokenizer() = delete;
-    explicit JSTokenizer(std::istream& in, std::ostream& out, JSIdentifierCtxBase& ident_ctx,
+    explicit JSTokenizer(std::istream& in, std::ostream& out, JSIdentifier& ident_ctx,
         uint8_t max_template_nesting, uint32_t max_bracket_depth, char*& buf, size_t& buf_size,
         int cap_size = JSTOKENIZER_BUF_MAX_SIZE);
     ~JSTokenizer() override;
@@ -182,10 +184,6 @@ public:
     bool is_closing_tag_seen() const;
     bool is_buffer_adjusted() const;
 
-protected:
-    [[noreturn]] void LexerError(const char* msg) override
-    { snort::FatalError("%s", msg); }
-
 private:
     int yylex() override;
 
@@ -342,7 +340,7 @@ private:
     VStack<uint16_t> brace_depth;
     JSToken token = UNDEFINED;
     ASIGroup previous_group = ASI_OTHER;
-    JSIdentifierCtxBase& ident_ctx;
+    JSIdentifier& ident_ctx;
     size_t bytes_read;
     size_t tmp_bytes_read;
     uint32_t tokens_read;
@@ -413,4 +411,6 @@ private:
 #endif // CATCH_TEST_BUILD || BENCHMARK_TEST
 };
 
+}
+
 #endif // JS_TOKENIZER_H
similarity index 99%
rename from src/utils/js_tokenizer.l
rename to src/js_norm/js_tokenizer.l
index de0bcdd0d32898c0934253afc56735861f30e4ce..d368dabc694b4e51a34703fae190f6035da28a4d 100644 (file)
 #include "config.h"
 #endif
 
-#include "utils/js_tokenizer.h"
 
 #include <algorithm>
 #include <cassert>
 
-#include "utils/js_identifier_ctx.h"
+#include "js_norm/js_enum.h"
+#include "js_norm/js_identifier_ctx.h"
+#include "js_norm/js_tokenizer.h"
 #include "utils/util_cstring.h"
 
+using namespace jsn;
+
 #define YY_NO_UNPUT
+
+#define YY_FATAL_ERROR(msg) { snort::FatalError("%s", msg); }
+
 #define YY_USER_ACTION                                      \
     {                                                       \
-        debug_logf(5, http_trace, TRACE_JS_PROC, nullptr,   \
+        debug_logf(5, js_trace, TRACE_PROC, nullptr,        \
             "pattern #%d, sc %d\n", yy_act, YY_START);      \
                                                             \
-        debug_logf(5, http_trace, TRACE_JS_DUMP, nullptr,   \
+        debug_logf(5, js_trace, TRACE_DUMP, nullptr,        \
             "text '%s'\n", YYText());                       \
                                                             \
         if (!states_process())                              \
@@ -1365,7 +1371,7 @@ const char* JSTokenizer::p_scope_codes[] =
 };
 
 JSTokenizer::JSTokenizer(std::istream& in, std::ostream& out,
-    JSIdentifierCtxBase& mapper, uint8_t max_template_nesting,
+    JSIdentifier& mapper, uint8_t max_template_nesting,
     uint32_t max_bracket_depth, char*& buf, size_t& buf_size, int cap_size)
     : yyFlexLexer(in, out),
       max_template_nesting(max_template_nesting),
@@ -1532,7 +1538,7 @@ JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme, b
 
     if (!name)
     {
-        debug_logf(6, http_trace, TRACE_JS_DUMP, nullptr,
+        debug_logf(6, js_trace, TRACE_DUMP, nullptr,
             "'%s' => IDENTIFIER_OVERFLOW\n", lexeme);
         return IDENTIFIER_OVERFLOW;
     }
@@ -1556,15 +1562,15 @@ JSTokenizer::JSRet JSTokenizer::do_identifier_substitution(const char* lexeme, b
         dealias_stored = true;
         yyout << alias;
 
-        debug_logf(6, http_trace, TRACE_JS_DUMP, nullptr,
-                   "'%s' => '%s'\n", lexeme, alias);
+        debug_logf(6, js_trace, TRACE_DUMP, nullptr,
+            "'%s' => '%s'\n", lexeme, alias);
         return EOS;
     }
 
     ignored_id_pos = -1;
     yyout << name;
 
-    debug_logf(6, http_trace, TRACE_JS_DUMP, nullptr,
+    debug_logf(6, js_trace, TRACE_DUMP, nullptr,
         "'%s' => '%s'\n", lexeme, name);
     return EOS;
 }
@@ -2077,7 +2083,7 @@ JSTokenizer::JSRet JSTokenizer::p_scope_push(ScopeMetaType t)
     if (!ident_ctx.scope_push(m2p(t)))
         return SCOPE_NESTING_OVERFLOW;
 
-    debug_logf(5, http_trace, TRACE_JS_PROC, nullptr, "scope pushed: '%s'\n", m2str(t));
+    debug_logf(5, js_trace, TRACE_PROC, nullptr, "scope pushed: '%s'\n", m2str(t));
 
     return EOS;
 }
@@ -2087,7 +2093,7 @@ JSTokenizer::JSRet JSTokenizer::p_scope_pop(ScopeMetaType t)
     if (!ident_ctx.scope_pop(m2p(t)))
         return WRONG_CLOSING_SYMBOL;
 
-    debug_logf(5, http_trace, TRACE_JS_PROC, nullptr, "scope popped: '%s'\n", m2str(t));
+    debug_logf(5, js_trace, TRACE_PROC, nullptr, "scope popped: '%s'\n", m2str(t));
 
     return EOS;
 }
@@ -2398,7 +2404,7 @@ JSTokenizer::JSRet JSTokenizer::literal_regex_g_close()
     case ')':
         if (regex_stack.empty())
         {
-            debug_logf(5, http_trace, TRACE_JS_PROC, nullptr,
+            debug_logf(5, js_trace, TRACE_PROC, nullptr,
                 "no group to close, .. %c\n", yytext[0]);
             return BAD_TOKEN;
         }
diff --git a/src/js_norm/test/CMakeLists.txt b/src/js_norm/test/CMakeLists.txt
new file mode 100644 (file)
index 0000000..bb4634c
--- /dev/null
@@ -0,0 +1,70 @@
+FLEX_TARGET ( js_tokenizer ${CMAKE_CURRENT_SOURCE_DIR}/../js_tokenizer.l
+    ${CMAKE_CURRENT_BINARY_DIR}/../js_tokenizer.cc
+    COMPILE_FLAGS ${FLEX_FLAGS}
+)
+
+add_catch_test( js_normalizer_test
+    SOURCES
+        ${FLEX_js_tokenizer_OUTPUTS}
+        ../js_identifier_ctx.cc
+        ../js_normalizer.cc
+        ${CMAKE_SOURCE_DIR}/src/utils/streambuf.cc
+        ${CMAKE_SOURCE_DIR}/src/utils/util_cstring.cc
+        js_test_options.cc
+        js_test_stubs.cc
+        js_test_utils.cc
+)
+
+if (ENABLE_BENCHMARK_TESTS)
+    add_catch_test( js_norm_benchmark
+        SOURCES
+            ${FLEX_js_tokenizer_OUTPUTS}
+            ../js_identifier_ctx.cc
+            ../js_normalizer.cc
+            ${CMAKE_SOURCE_DIR}/src/utils/streambuf.cc
+            ${CMAKE_SOURCE_DIR}/src/utils/util_cstring.cc
+            js_test_options.cc
+            js_test_stubs.cc
+            js_test_utils.cc
+    )
+endif(ENABLE_BENCHMARK_TESTS)
+
+add_catch_test( js_dealias_test
+    SOURCES
+        ${FLEX_js_tokenizer_OUTPUTS}
+        ../js_identifier_ctx.cc
+        ../js_normalizer.cc
+        ${CMAKE_SOURCE_DIR}/src/utils/streambuf.cc
+        ${CMAKE_SOURCE_DIR}/src/utils/util_cstring.cc
+        js_test_options.cc
+        js_test_stubs.cc
+        js_test_utils.cc
+)
+
+add_catch_test( js_unescape_test
+    SOURCES
+        ${FLEX_js_tokenizer_OUTPUTS}
+        ../js_identifier_ctx.cc
+        ../js_normalizer.cc
+        ${CMAKE_SOURCE_DIR}/src/utils/streambuf.cc
+        ${CMAKE_SOURCE_DIR}/src/utils/util_cstring.cc
+        js_test_options.cc
+        js_test_stubs.cc
+        js_test_utils.cc
+)
+
+add_catch_test( js_identifier_ctx_test
+    SOURCES
+        ../js_identifier_ctx.cc
+        js_test_stubs.cc
+)
+
+add_catch_test( jsn_test
+    SOURCES
+        ${FLEX_js_tokenizer_OUTPUTS}
+        ../js_identifier_ctx.cc
+        ../js_norm.cc
+        ../js_normalizer.cc
+        ${CMAKE_SOURCE_DIR}/src/utils/streambuf.cc
+        js_test_stubs.cc
+)
similarity index 96%
rename from src/utils/test/dev_notes.txt
rename to src/js_norm/test/dev_notes.txt
index bde1f61f2506f2b28e217bbaea2ac499d37e7974..6ede83787432474f49b26eea57fd061931506b2f 100644 (file)
@@ -31,3 +31,6 @@ Checklist to add a new option with an already present type:
 5. Add field assignment to the set_overrides test case;
 6. Add a named constuctor that returns Config with your option as the type.
 
+js_test_stubs:
+Provides stubs for external dependencies.
+
similarity index 99%
rename from src/utils/test/js_dealias_test.cc
rename to src/js_norm/test/js_dealias_test.cc
index 138132e03b8ededaf297ab09d863919b61f14c73..85c655cc6e75e225d7779d160c31bfd7b3f99e4d 100644 (file)
@@ -25,7 +25,7 @@
 
 #include "js_test_utils.h"
 
-using namespace snort;
+using namespace jsn;
 
 // Unit tests
 
similarity index 99%
rename from src/utils/test/js_identifier_ctx_test.cc
rename to src/js_norm/test/js_identifier_ctx_test.cc
index 9513f54cadff73ea56d6208f5f909eebaba0e178..dafea345b9838f8f00bf30475a895c35fad0fb9a 100644 (file)
@@ -28,7 +28,9 @@
 #include <sstream>
 #include <vector>
 
-#include "utils/js_identifier_ctx.h"
+#include "js_norm/js_identifier_ctx.h"
+
+using namespace jsn;
 
 #define DEPTH 65536
 #define SCOPE_DEPTH 256
similarity index 99%
rename from src/utils/test/js_norm_benchmark.cc
rename to src/js_norm/test/js_norm_benchmark.cc
index bc18c66ce5c097dfba8abe95cb1e4ae57e80fb04..896160ee297f1b641a88094bf7fab73693ac06e5 100644 (file)
 
 #include "catch/catch.hpp"
 
-#include "utils/js_identifier_ctx.h"
-#include "utils/js_normalizer.h"
+#include "js_norm/js_identifier_ctx.h"
+#include "js_norm/js_normalizer.h"
 
 #include "js_test_utils.h"
 
-using namespace snort;
+using namespace jsn;
 
 static constexpr const char* s_closing_tag = "</script>";
 
similarity index 99%
rename from src/utils/test/js_normalizer_test.cc
rename to src/js_norm/test/js_normalizer_test.cc
index 1550cd3b74ca55dfaddee8989d672cad58bf03ab..d2b190cd3d187adaee63ed302920efc6872e0630 100644 (file)
 
 #include "catch/catch.hpp"
 
-#include "utils/js_identifier_ctx.h"
-#include "utils/js_normalizer.h"
+#include "js_norm/js_identifier_ctx.h"
+#include "js_norm/js_normalizer.h"
 
 #include "js_test_utils.h"
 
-using namespace snort;
+using namespace jsn;
 using namespace std::string_literals;
 
 #ifdef CATCH_TEST_BUILD
@@ -160,7 +160,6 @@ TEST_CASE("clamav tests", "[JSNormalizer]")
             "var x='test\u0000test';"s
         );
     }
-    // FIXIT-L this should be revisited
     SECTION("test_case_13 - invalid escape sequence")
     {
         test_normalization_noident(
@@ -5784,8 +5783,6 @@ TEST_CASE("Scope tracking - over multiple PDU", "[JSNormalizer]")
         test_normalization({
             {"long_", "var_0000", {GLOBAL}},
             {"variable", "var_0001", {GLOBAL}}
-            //FIXIT-E: if variable index will be preserved across PDUs, second pdu expected
-            // will be "var_0000"
         });
 
     SECTION("general - variable extension: ignored identifier to a regular one")
similarity index 97%
rename from src/utils/test/js_test_options.h
rename to src/js_norm/test/js_test_options.h
index e27102d81dbb14a2f53a3fc98c359f6a2582cfae..e588666c8b33d07d850f9108a068453362307eb3 100644 (file)
 #include <unordered_set>
 #include <vector>
 
-#include "utils/js_identifier_ctx.h"
-#include "utils/js_normalizer.h"
+#include "js_norm/js_identifier_ctx.h"
+#include "js_norm/js_normalizer.h"
+
+using namespace jsn;
 
 typedef std::unordered_set<std::string> StringSet;
 
diff --git a/src/js_norm/test/js_test_stubs.cc b/src/js_norm/test/js_test_stubs.cc
new file mode 100644 (file)
index 0000000..645598b
--- /dev/null
@@ -0,0 +1,40 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// js_test_stubs.cc author Oleksandr Serhiienko <oserhiie@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "js_norm/js_enum.h"
+#include "js_norm/js_norm_module.h"
+#include "trace/trace_api.h"
+
+THREAD_LOCAL const snort::Trace* js_trace = nullptr;
+THREAD_LOCAL PegCount JSNormModule::peg_counts[jsn::PEG_COUNT_MAX] = {};
+
+namespace snort
+{
+[[noreturn]] void FatalError(const char*, ...) { exit(EXIT_FAILURE); }
+
+void trace_vprintf(const char*, TraceLevel, const char*, const Packet*, const char*, va_list) { }
+uint8_t TraceApi::get_constraints_generation() { return 0; }
+void TraceApi::filter(const Packet&) { }
+
+int DetectionEngine::queue_event(unsigned int, unsigned int) { return 0; }
+}
similarity index 94%
rename from src/utils/test/js_test_utils.cc
rename to src/js_norm/test/js_test_utils.cc
index 7b415dce08ca811ad1862a25bd387152dd685f56..4eebc7d098732ca09ebeac6a4221eacc916915fa 100644 (file)
 
 #include "catch/catch.hpp"
 
-namespace snort
-{
-[[noreturn]] void FatalError(const char*, ...)
-{ exit(EXIT_FAILURE); }
-void trace_vprintf(const char*, TraceLevel, const char*, const Packet*, const char*, va_list) { }
-uint8_t TraceApi::get_constraints_generation() { return 0; }
-void TraceApi::filter(const Packet&) { }
-}
-
-THREAD_LOCAL const snort::Trace* http_trace = nullptr;
-
-using namespace snort;
+using namespace jsn;
 
 JSTokenizerTester::JSTokenizerTester(const JSTestConfig& conf) :
     ident_ctx(conf.identifier_depth,
@@ -45,8 +34,8 @@ JSTokenizerTester::JSTokenizerTester(const JSTestConfig& conf) :
         conf.ignored_properties_list),
     normalizer(
         conf.normalize_identifiers ?
-            static_cast<JSIdentifierCtxBase&>(ident_ctx) :
-            static_cast<JSIdentifierCtxBase&>(ident_ctx_stub),
+            static_cast<JSIdentifier&>(ident_ctx) :
+            static_cast<JSIdentifier&>(ident_ctx_stub),
         conf.norm_depth,
         conf.max_template_nesting,
         conf.max_bracket_depth,
similarity index 92%
rename from src/utils/test/js_test_utils.h
rename to src/js_norm/test/js_test_utils.h
index 1f99ec7a8746f81683d06a67f89418fc7ef80a7d..40be89ca04e2452020ad1353c6ed4449eeb6995e 100644 (file)
 #include <utility>
 #include <vector>
 
-#include "utils/js_identifier_ctx.h"
-#include "utils/js_normalizer.h"
+#include "js_norm/js_identifier_ctx.h"
+#include "js_norm/js_normalizer.h"
 
 #include "js_test_options.h"
 
 constexpr int unlim_depth = -1;
 
-namespace snort
+namespace jsn
 {
-[[noreturn]] void FatalError(const char*, ...);
-void trace_vprintf(const char*, TraceLevel, const char*, const Packet*, const char*, va_list);
-}
 
-class JSIdentifierCtxStub : public JSIdentifierCtxBase
+class JSIdentifierCtxStub : public JSIdentifier
 {
 public:
     JSIdentifierCtxStub() = default;
@@ -72,7 +69,7 @@ public:
 
     JSIdentifierCtx ident_ctx;
     JSIdentifierCtxStub ident_ctx_stub;
-    snort::JSNormalizer normalizer;
+    JSNormalizer normalizer;
 
 private:
     const JSTestConfig& config;
@@ -95,7 +92,7 @@ public:
     JSTestConfig(const Overrides& values);
     JSTestConfig derive(const Overrides& values) const;
 
-    snort::JSNormalizer&& make_normalizer() const;
+    JSNormalizer&& make_normalizer() const;
 
     void test_scope(const std::string& context, const std::list<JSProgramScopeType>& stack) const;
     void test_scope(const std::string& context, const std::list<JSProgramScopeType>& stack,
@@ -134,7 +131,9 @@ static const JSTestConfig default_config({
     normalize_identifiers(true)
 });
 
-void test_scope(const std::string& context, const std::list<JSProgramScopeType>& stack);
+}
+
+void test_scope(const std::string& context, const std::list<jsn::JSProgramScopeType>& stack);
 void test_normalization(const std::string& source, const std::string& expected, const Overrides& overrides = {});
 void test_normalization_noident(const std::string& source, const std::string& expected,
     const Overrides& overrides = {});
similarity index 99%
rename from src/utils/test/js_unescape_test.cc
rename to src/js_norm/test/js_unescape_test.cc
index ded5dbec359ab05d2b648f4a919f2f45cc116a8a..270d5e502ab0f9aaab763f5bbc02a89089f7d670 100644 (file)
 
 #include "catch/catch.hpp"
 
-#include "utils/js_identifier_ctx.h"
-#include "utils/js_normalizer.h"
+#include "js_norm/js_identifier_ctx.h"
+#include "js_norm/js_normalizer.h"
 
 #include "js_test_utils.h"
 
+using namespace jsn;
+
 #ifdef CATCH_TEST_BUILD
 
 TEST_CASE("Sequence parsing", "[JSNormalizer]")
diff --git a/src/js_norm/test/jsn_test.cc b/src/js_norm/test/jsn_test.cc
new file mode 100644 (file)
index 0000000..f6c5f98
--- /dev/null
@@ -0,0 +1,280 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// jsn_test.cc author Oleksandr Serhiienko <oserhiie@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string>
+
+#include "catch/catch.hpp"
+
+#include "js_norm/js_norm.h"
+
+using namespace jsn;
+using namespace snort;
+
+#ifdef CATCH_TEST_BUILD
+
+TEST_CASE("configuration", "[JSNorm]")
+{
+    const void* dst = nullptr;
+    size_t dst_len = 0;
+
+    SECTION("no config passed")
+    {
+        JSNorm jsn(nullptr);
+
+        const std::string src = "var";
+
+        jsn.normalize(src.c_str(), src.size(), dst, dst_len);
+
+        CHECK(dst == nullptr);
+        CHECK(dst_len == 0);
+    }
+
+    SECTION("config passed")
+    {
+        JSNormConfig config;
+        JSNorm jsn(&config);
+
+        const std::string src = "var ";
+        const std::string exp = "var";
+
+        jsn.normalize(src.c_str(), src.size(), dst, dst_len);
+
+        REQUIRE(dst != nullptr);
+        REQUIRE(dst_len != 0);
+
+        CHECK(std::string((const char*)dst, dst_len) == exp);
+    }
+}
+
+TEST_CASE("normalization", "[JSNorm]")
+{
+    JSNormConfig config;
+    JSNorm jsn(&config);
+
+    const void* dst = nullptr;
+    size_t dst_len = 0;
+
+    SECTION("missed input")
+    {
+        const std::string src = "var";
+
+        jsn.tick();
+        jsn.tick();
+        jsn.tick();
+
+        jsn.normalize(src.c_str(), src.size(), dst, dst_len);
+
+        CHECK(dst == nullptr);
+        CHECK(dst_len == 0);
+    }
+
+    SECTION("data lost")
+    {
+        const std::string src = "var";
+
+        jsn.tick();
+        jsn.tick();
+
+        jsn.normalize(src.c_str(), src.size(), dst, dst_len);
+
+        CHECK(dst == nullptr);
+        CHECK(dst_len == 0);
+    }
+
+    SECTION("passed")
+    {
+        const std::string pdu_1 = "var ";
+        const std::string pdu_2 = "a = ";
+        const std::string pdu_3 = "1 ;";
+
+        // dst buffer is accumulated if no explicit flushing
+        const std::string norm_pdu_1 = "var";
+        const std::string norm_pdu_2 = "var var_0000=";
+        const std::string norm_pdu_3 = "var var_0000=1;";
+
+        jsn.tick();
+        jsn.normalize(pdu_1.c_str(), pdu_1.size(), dst, dst_len);
+
+        REQUIRE(dst != nullptr);
+        REQUIRE(dst_len != 0);
+
+        CHECK(std::string((const char*)dst, dst_len) == norm_pdu_1);
+
+        jsn.tick();
+        jsn.normalize(pdu_2.c_str(), pdu_2.size(), dst, dst_len);
+
+        REQUIRE(dst != nullptr);
+        REQUIRE(dst_len != 0);
+
+        CHECK(std::string((const char*)dst, dst_len) == norm_pdu_2);
+
+        jsn.tick();
+        jsn.normalize(pdu_3.c_str(), pdu_3.size(), dst, dst_len);
+
+        REQUIRE(dst != nullptr);
+        REQUIRE(dst_len != 0);
+
+        CHECK(std::string((const char*)dst, dst_len) == norm_pdu_3);
+    }
+}
+
+TEST_CASE("non-blocking events", "[JSNorm]")
+{
+    REQUIRE(EventSid::EVENT__MAX_VALUE == 10);
+
+    JSNormConfig config;
+    config.ignored_ids.insert("unescape");
+
+    JSNorm jsn(&config, false);
+    const void* dst = nullptr;
+    size_t dst_len = 0;
+
+    std::string src = "'bar'";
+    std::string exp = "'bar'";
+
+    SECTION("EVENT_NEST_UNESCAPE_FUNC")
+    {
+        src = "unescape(unescape('foo')) ;";
+        exp = "'foo';";
+    }
+
+    SECTION("EVENT_MIXED_UNESCAPE_SEQUENCE")
+    {
+        src = "unescape(\"\\u66%6f\\u6f\") ;";
+        exp = "\"foo\";";
+    }
+
+    SECTION("EVENT_OPENING_TAG")
+    {
+        src = "'<script>' ;";
+        exp = "'<script>';";
+    }
+
+    SECTION("EVENT_CLOSING_TAG")
+    {
+        JSNorm jsn(&config, true);
+
+        std::string src = "'</script>' ;";
+        std::string exp = "'</script>';";
+
+        jsn.normalize(src.c_str(), src.size(), dst, dst_len);
+
+        REQUIRE(dst != nullptr);
+        REQUIRE(dst_len != 0);
+
+        CHECK(std::string((const char*)dst, dst_len) == exp);
+    }
+
+    jsn.normalize(src.c_str(), src.size(), dst, dst_len);
+
+    REQUIRE(dst != nullptr);
+    REQUIRE(dst_len != 0);
+
+    CHECK(std::string((const char*)dst, dst_len) == exp);
+}
+
+TEST_CASE("blocking events", "[JSNorm]")
+{
+    REQUIRE(EventSid::EVENT__MAX_VALUE == 10);
+
+    JSNormConfig config;
+    JSNorm jsn(&config, false);
+
+    const void* dst = nullptr;
+    size_t dst_len = 0;
+
+    std::string src = "'bar'";
+    std::string exp = "'bar'";
+
+    SECTION("EVENT_CLOSING_TAG")
+    {
+        src = "'</script>' ;";
+        exp = "'";
+    }
+
+    SECTION("EVENT_BAD_TOKEN")
+    {
+        src = "{)";
+        exp = "{";
+    }
+
+    SECTION("EVENT_IDENTIFIER_OVERFLOW")
+    {
+        config.identifier_depth = 0;
+
+        JSNorm jsn(&config, false);
+
+        std::string src = "; a";
+        std::string exp = ";";
+
+        jsn.normalize(src.c_str(), src.size(), dst, dst_len);
+
+        REQUIRE(dst != nullptr);
+        REQUIRE(dst_len != 0);
+
+        CHECK(std::string((const char*)dst, dst_len) == exp);
+    }
+
+    SECTION("EVENT_BRACKET_NEST_OVERFLOW")
+    {
+        config.max_bracket_depth = 0;
+
+        JSNorm jsn(&config, false);
+
+        std::string src = "; {";
+        std::string exp = ";";
+
+        jsn.normalize(src.c_str(), src.size(), dst, dst_len);
+
+        REQUIRE(dst != nullptr);
+        REQUIRE(dst_len != 0);
+
+        CHECK(std::string((const char*)dst, dst_len) == exp);
+    }
+
+    SECTION("EVENT_SCOPE_NEST_OVERFLOW")
+    {
+        config.max_scope_depth = 0;
+
+        JSNorm jsn(&config, false);
+
+        std::string src = "; function f () {";
+        std::string exp = ";function var_0000";
+
+        jsn.normalize(src.c_str(), src.size(), dst, dst_len);
+
+        REQUIRE(dst != nullptr);
+        REQUIRE(dst_len != 0);
+
+        CHECK(std::string((const char*)dst, dst_len) == exp);
+    }
+
+    jsn.normalize(src.c_str(), src.size(), dst, dst_len);
+
+    REQUIRE(dst != nullptr);
+    REQUIRE(dst_len != 0);
+
+    CHECK(std::string((const char*)dst, dst_len) == exp);
+}
+
+#endif
index a356fcd884c55b46531502e424624142d43b51fd..70489e09ceeaa65f667480a3fa8a8765938d9f3a 100644 (file)
@@ -42,6 +42,7 @@
 #include "framework/module.h"
 #include "host_tracker/host_tracker_module.h"
 #include "host_tracker/host_cache_module.h"
+#include "js_norm/js_norm_module.h"
 #include "latency/latency_module.h"
 #include "log/messages.h"
 #include "managers/module_manager.h"
@@ -2117,6 +2118,7 @@ void module_init()
 
     // these modules should be in ips policy
     ModuleManager::add_module(new EventFilterModule);
+    ModuleManager::add_module(new JSNormModule);
     ModuleManager::add_module(new RateFilterModule);
     ModuleManager::add_module(new SuppressModule);
 
index 0f3b0719010cd7691fe1772a91dacae519a64cc5..284a54eae6eed6b940e9e1a3564d350f33fd430c 100644 (file)
@@ -29,6 +29,7 @@
 #include "detection/detection_engine.h"
 #include "framework/file_policy.h"
 #include "framework/policy_selector.h"
+#include "js_norm/js_config.h"
 #include "log/messages.h"
 #include "main/thread_config.h"
 #include "managers/inspector_manager.h"
@@ -168,6 +169,7 @@ void InspectionPolicy::init(InspectionPolicy* other_inspection_policy)
 InspectionPolicy::~InspectionPolicy()
 {
     InspectorManager::delete_policy(this, cloned);
+    delete jsn_config;
 }
 
 void InspectionPolicy::configure()
index c38a0f049abbc674469746f81ab7d9ec943f38dc..d6cf1ce6515c24f02fd0405c6e5a3384997b583a 100644 (file)
@@ -51,6 +51,7 @@ struct SnortConfig;
 
 struct _daq_flow_stats;
 struct _daq_pkt_hdr;
+struct JSNormConfig;
 struct PortTable;
 struct vartable_t;
 struct sfip_var_t;
@@ -156,6 +157,8 @@ public:
     snort::DataBus dbus;
     bool cloned;
 
+    JSNormConfig* jsn_config = nullptr;
+
 private:
     void init(InspectionPolicy* old_inspection_policy);
 };
index c7e4e7cccf0c4b4fa8155f706c404cb1a1021d3c..098e0b40b05068839a9e7040c515300d1520035b 100644 (file)
@@ -55,7 +55,6 @@ void HttpMsgBody::publish() {}
 void HttpMsgBody::do_file_processing(const Field&) {}
 void HttpMsgBody::do_utf_decoding(const Field&, Field&) {}
 void HttpMsgBody::do_file_decompression(const Field&, Field&) {}
-void HttpMsgBody::do_enhanced_js_normalization(const Field&, Field&) {}
 void HttpMsgBody::clean_partial(uint32_t&, uint32_t&, uint8_t*&, uint32_t&) {}
 void HttpMsgBody::bookkeeping_regular_flush(uint32_t&, uint8_t*&, uint32_t&, int32_t) {}
 bool HttpMsgBody::run_detection(snort::Packet*) { return true; }
index 3d9b905c7e78cfa2dc3c06f62456b4b6f9438bb6..42a88673d964db87fa46eb79f1bd01be34ae2821 100644 (file)
@@ -1,6 +1,15 @@
 HttpJsNorm class serves as a script Normalizer, and currently has two implementations:
 the Legacy Normalizer and the Enhanced Normalizer.
 
+In NHI, there are two JSNorm extensions:
+ * HttpInlineJSNorm, processes content of HTML script tags.
+ * HttpExternalJSNorm, processes payload with JavaScript MIME type.
+
+Normalization context is per transaction. It is created once js_data calls for normalized JS data,
+and is deleted once transaction ends. Partial inspections feed data incrementally to JS Normalizer,
+but the output is accumulated and concatenated in the right way, presenting more comprehensive block
+of data to Detection.
+
 During message body analysis the Enhanced Normalizer does one of the following:
 1. If Content-Type says its an external script then Normalizer processes the
    whole message body as a script text.
@@ -8,118 +17,7 @@ During message body analysis the Enhanced Normalizer does one of the following:
    subsequent bytes in a stream mode, until it finds a closing tag.
    It proceeds and scans the entire message body for inline scripts.
 
-Enhanced Normalizer is a stateful JavaScript whitespace and identifiers normalizer.
-Normalizer will remove all extraneous whitespace and newlines, keeping a single space where 
-syntactically necessary. Comments will be removed, but contents of string literals will
-be kept intact. Any string literals, added by the plus operator,
-will be concatenated. This also works for functions that result in string
-literals. Semicolons will be inserted, if not already present, according to ECMAScript
-automatic semicolon insertion rules.
-
-All JavaScript identifier names, except those from the ident_ignore or prop_ignore lists,
-will be substituted with unified names in the following format: var_0000 -> var_ffff.
-The number of unique identifiers available is 65536 names per HTTP transaction.  If Normalizer
-overruns the configured limit, built-in alert is generated.
-
-A config option to set the limit manually:
-
- * http_inspect.js_norm_identifier_depth.
-
-Identifiers from the ident_ignore list will be placed as is, without substitution. Starting with
-the listed identifier, any chain of dot accessors, brackets and function calls will be kept
-intact.
-
-For example:
-
- * console.log("bar")
- * document.getElementById("id").text
- * eval("script")
- * foo["bar"]
-
-Ignored identifiers are configured via the following config option that accepts a list of object
-and function names:
-
- * http_inspect.js_norm_ident_ignore = { 'console', 'document', 'eval', 'foo' }
-
-When a variable assignment that 'aliases' an identifier from the list is found,
-the assignment will be tracked and subsequent occurrences of the variable will be
-replaced with the stored value. This substitution will follow JavaScript variable scope 
-limits.
-
-For example:
-
-    var a = console.log
-    a("hello")  // will be substituted to 'console.log("hello")'
-    a.foo.bar() // will be normalized as 'console.log.foo.bar()'. When variable is 'de-aliased',
-                // following identifiers are not normalized, just like identifiers from ident_ignore
-
-When an object is created using a 'new' keyword, and the class/constructor is found in ident_ignore
-list, the object will be tracked, and although its own identifier will be converted to normal form
-its property and function calls will be kept intact, as with ignored identifiers. 
-
-For example:
-
-    var obj = new Array()
-    obj.insert(1,2,3) // will be normalized to var_0000.insert(1,2,3)
-
-For properties and methods of objects that can be created implicitly, there is a
-js_norm_prop_ignore list. All names in the call chain after the first property or
-method from the list has been occurred will not be normalized.
-
-Note that identifiers are normalized by name, i.e. an identifier and a property with the same name
-will be normalized to the same value. However, the ignore lists act separately on identifiers
-and properties.
-
-For example:
-
-   http_inspect.js_norm_prop_ignore = { 'split' }
-
-   in: "string".toUpperCase().split("").reverse().join("");
-   out: "string".var_0000().split("").reverse().join("");
-
-In addition to the scope tracking, JS Normalizer specifically tracks unescape-like JavaScript
-functions (unescape, decodeURI, decodeURIComponent, String.fromCharCode, String.fromCodePoint).
-This allows detection of unescape functions nested within other unescape functions, which is
-a potential indicator of a multilevel obfuscation. The definition of a function call depends on
-identifier substitution, so such identifiers must be included in the ignore list in
-order to use this feature. After determining the unescape sequence, it is decoded into the
-corresponding string, and the name of unescape function will not be present in the output.
-Single-byte escape sequences within the string and template literals which are arguments of
-unescape, decodeURI and decodeURIComponent functions will be decoded according to ISO/IEC 8859-1
-(Latin-1) charset. Except these cases, escape sequences and code points will be decoded to UTF-8
-format.
-
-For example:
-
-   unescape('\u0062\u0061\u0072')              -> 'bar'
-   decodeURI('%62%61%72')                      -> 'bar'
-   decodeURIComponent('\x62\x61\x72')          -> 'bar'
-   String.fromCharCode(98, 0x0061, 0x72)       -> 'bar'
-   String.fromCodePoint(65600, 65601, 0x10042) -> '𐁀𐁁𐁂'
-
-Supported formats follow
-
-   \xXX
-   \uXXXX
-   \u{XXXX}
-   %XX
-   \uXX
-   %uXXXX
-   decimal code point
-   hexadecimal code point
-
-JS Normalizer is able to decode mixed encoding sequences. However, a built-in alert rises
-in such case.
-
-JS Normalizer's syntax parser follows ECMA-262 standard. For various features,
-tracking of variable scope and individual brackets is done in accordance to the standard.
-Additionally, Normalizer enforces standard limits on HTML content in JavaScript:
- * no nesting tags allowed, i.e. two opening tags in a row
- * script closing tag is not allowed in string literals, block comments, regular expression literals, etc.
-
-If source JavaScript is syntactically incorrect (containing a bad token, brackets mismatch,
-HTML-tags, etc) Normalizer fires corresponding built-in rule and abandons the current script,
-though the already-processed data remains in the output buffer.
+Also, js_data IPS option's buffer is a part of NHI processing in order to start the normalization.
 
 Enhanced Normalizer supports scripts over multiple PDUs.
 So, if the script is not ended, Normalizer's context is saved in HttpFlowData.
index e78785f6aaccb6337feae1226f0d72b7f27b6f08..c15ffa32452081423b22a8120b39327702db95f8 100644 (file)
@@ -88,27 +88,3 @@ developer to get it right.
 The test tool is designed for single-threaded operation only.
 
 The test tool is only available when compiled with REG_TEST.
-
-NHI has some trace messages available. Trace options follow:
-
-* trace.module.http_inspect.js_proc turns on messages from script processing flow.
-+
-Verbosity levels:
-+
-1. Script opening tag detected (available in release build)
-2. Attributes of detected script (available in release build)
-3. Normalizer return code (available in release build)
-4. Contexts management (debug build only)
-5. Parser states (debug build only)
-6. Input stream states (debug build only)
-
-* trace.module.http_inspect.js_dump dumps JavaScript data from processing layers.
-+
-Verbosity levels:
-+
-1. js_data buffer as it is being passed to detection (available in release build)
-2. (no messages available currently)
-3. Payload passed to Normalizer (available in release build)
-4. Temporary buffer (debug build only)
-5. Matched token (debug build only)
-6. Identifier substitution (debug build only)
index 93064df4cbe9220950710d2acc517d9736beec59..89cf49e43e3fab1b004d57f881f58e3349455a80 100644 (file)
@@ -126,7 +126,6 @@ extern const BaseApi* ips_http_true_ip;
 extern const BaseApi* ips_http_uri;
 extern const BaseApi* ips_http_version;
 extern const BaseApi* ips_http_version_match;
-extern const BaseApi* ips_js_data;
 
 #ifdef BUILDING_SO
 SO_PUBLIC const BaseApi* snort_plugins[] =
@@ -161,7 +160,6 @@ const BaseApi* sin_http[] =
     ips_http_uri,
     ips_http_version,
     ips_http_version_match,
-    ips_js_data,
     nullptr
 };
 
index 28f999130f1af4419d4458df1122556c27e2e5d8..9b2032dc5c4d5a24dfb9ec6cf553f85226a19a75 100755 (executable)
 
 #include <cstdint>
 
-enum
-{
-    TRACE_JS_PROC = 0,
-    TRACE_JS_DUMP
-};
-
 namespace HttpEnums
 {
 static const int MAX_OCTETS = 63780;
@@ -69,7 +63,7 @@ enum PEG_COUNT { PEG_FLOW = 0, PEG_SCAN, PEG_REASSEMBLE, PEG_INSPECT, PEG_REQUES
     PEG_CONCURRENT_SESSIONS, PEG_MAX_CONCURRENT_SESSIONS, PEG_SCRIPT_DETECTION,
     PEG_PARTIAL_INSPECT, PEG_EXCESS_PARAMS, PEG_PARAMS, PEG_CUTOVERS, PEG_SSL_SEARCH_ABND_EARLY,
     PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_JS_INLINE, PEG_JS_EXTERNAL,
-    PEG_JS_BYTES, PEG_JS_IDENTIFIER, PEG_JS_IDENTIFIER_OVERFLOW, PEG_SKIP_MIME_ATTACH, PEG_COUNT_MAX };
+    PEG_SKIP_MIME_ATTACH, PEG_COUNT_MAX };
 
 // Result of scanning by splitter
 enum ScanResult { SCAN_NOT_FOUND, SCAN_NOT_FOUND_ACCELERATE, SCAN_FOUND, SCAN_FOUND_PIECE,
@@ -281,18 +275,11 @@ enum Infraction
     INF_MULTIPLE_HOST_HDRS = 118,
     INF_HTTP2_SETTINGS = 119,
     INF_UPGRADE_HEADER_HTTP2 = 120,
-    INF_JS_BAD_TOKEN = 121,
-    INF_JS_OPENING_TAG = 122,
-    INF_JS_CLOSING_TAG = 123,
     INF_JS_CODE_IN_EXTERNAL = 124,
     INF_JS_SHORTENED_TAG = 125,
-    INF_JS_IDENTIFIER_OVERFLOW = 126,
-    INF_JS_BRACKET_NEST_OVERFLOW = 127,
     INF_CHUNK_OVER_MAXIMUM = 128,
     INF_LONG_HOST_VALUE = 129,
     INF_ACCEPT_ENCODING_CONSECUTIVE_COMMAS = 130,
-    INF_JS_DATA_LOST = 131,
-    INF_JS_SCOPE_NEST_OVERFLOW = 132,
     INF_INVALID_SUBVERSION = 133,
     INF_VERSION_0 = 134,
     INF_GZIP_FEXTRA = 135,
@@ -423,16 +410,16 @@ enum EventSid
     EVENT_LONG_SCHEME = 262,
     EVENT_HTTP2_UPGRADE_REQUEST = 263,
     EVENT_HTTP2_UPGRADE_RESPONSE = 264,
-    EVENT_JS_BAD_TOKEN = 265,
-    EVENT_JS_OPENING_TAG = 266,
-    EVENT_JS_CLOSING_TAG = 267,
+    // EVENT_JS_BAD_TOKEN = 265,               // Retired. Do not reuse this number
+    // EVENT_JS_OPENING_TAG = 266,             // Retired. Do not reuse this number
+    // EVENT_JS_CLOSING_TAG = 267,             // Retired. Do not reuse this number
     EVENT_JS_CODE_IN_EXTERNAL = 268,
     EVENT_JS_SHORTENED_TAG = 269,
-    EVENT_JS_IDENTIFIER_OVERFLOW = 270,
-    EVENT_JS_BRACKET_NEST_OVERFLOW = 271,
+    // EVENT_JS_IDENTIFIER_OVERFLOW = 270,     // Retired. Do not reuse this number
+    // EVENT_JS_BRACKET_NEST_OVERFLOW = 271,   // Retired. Do not reuse this number
     EVENT_ACCEPT_ENCODING_CONSECUTIVE_COMMAS = 272,
-    EVENT_JS_DATA_LOST = 273,
-    EVENT_JS_SCOPE_NEST_OVERFLOW = 274,
+    // EVENT_JS_DATA_LOST = 273,               // Retired. Do not reuse this number
+    // EVENT_JS_SCOPE_NEST_OVERFLOW = 274,     // Retired. Do not reuse this number
     EVENT_INVALID_SUBVERSION = 275,
     EVENT_VERSION_0 = 276,
     EVENT_VERSION_HIGHER_THAN_1 = 277,
index 60f05c4850bb134b847731ce7159fe8270db8f02..c8c7a210d85323c0b4dfa33a325849ff1776ed96 100644 (file)
 #include "decompress/file_decomp.h"
 #include "mime/file_mime_process.h"
 #include "service_inspectors/http2_inspect/http2_flow_data.h"
-#include "trace/trace_api.h"
-#include "utils/js_identifier_ctx.h"
-#include "utils/js_normalizer.h"
 
 #include "http_cutter.h"
 #include "http_common.h"
 #include "http_enum.h"
+#include "http_js_norm.h"
 #include "http_module.h"
 #include "http_msg_header.h"
 #include "http_msg_request.h"
@@ -98,23 +96,6 @@ HttpFlowData::~HttpFlowData()
     if (HttpModule::get_peg_counts(PEG_CONCURRENT_SESSIONS) > 0)
         HttpModule::decrement_peg_counts(PEG_CONCURRENT_SESSIONS);
 
-#ifndef UNIT_TEST_BUILD
-    if (js_ident_ctx)
-    {
-        delete js_ident_ctx;
-
-        debug_log(4, http_trace, TRACE_JS_PROC, nullptr,
-            "js_ident_ctx deleted\n");
-    }
-    if (js_normalizer)
-    {
-        delete js_normalizer;
-
-        debug_log(4, http_trace, TRACE_JS_PROC, nullptr,
-            "js_normalizer deleted\n");
-    }
-#endif
-
     for (int k=0; k <= 1; k++)
     {
         delete infractions[k];
@@ -134,6 +115,7 @@ HttpFlowData::~HttpFlowData()
         delete utf_state[k];
         if (fd_state[k] != nullptr)
             File_Decomp_StopFree(fd_state[k]);
+        delete js_ctx[k];
     }
 
     delete_pipeline();
@@ -236,74 +218,6 @@ void HttpFlowData::garbage_collect()
     }
 }
 
-#ifndef UNIT_TEST_BUILD
-void HttpFlowData::reset_js_data_idx()
-{
-    js_data_processed_idx = js_data_idx = 0;
-    js_data_lost_once = false;
-}
-
-void HttpFlowData::reset_js_ident_ctx()
-{
-    if (js_ident_ctx)
-    {
-        js_ident_ctx->reset();
-        debug_log(4, http_trace, TRACE_JS_PROC, nullptr,
-            "js_ident_ctx reset\n");
-    }
-}
-
-snort::JSNormalizer& HttpFlowData::acquire_js_ctx(const HttpParaList::JsNormParam& js_norm_param)
-{
-    if (js_normalizer)
-        return *js_normalizer;
-
-    if (!js_ident_ctx)
-    {
-        js_ident_ctx = new JSIdentifierCtx(js_norm_param.js_identifier_depth,
-            js_norm_param.max_scope_depth, js_norm_param.ignored_ids, js_norm_param.ignored_props);
-
-        debug_logf(4, http_trace, TRACE_JS_PROC, nullptr,
-            "js_ident_ctx created (ident_depth %d)\n", js_norm_param.js_identifier_depth);
-    }
-
-    js_normalizer = new JSNormalizer(*js_ident_ctx, js_norm_param.js_norm_bytes_depth,
-        js_norm_param.max_template_nesting, js_norm_param.max_bracket_depth);
-
-    debug_logf(4, http_trace, TRACE_JS_PROC, nullptr,
-        "js_normalizer created (norm_depth %zd, max_template_nesting %d)\n",
-        js_norm_param.js_norm_bytes_depth, js_norm_param.max_template_nesting);
-
-    return *js_normalizer;
-}
-
-bool HttpFlowData::sync_js_data_idx()
-{
-    bool data_missed = ((js_data_idx - js_data_processed_idx) > 1);
-    js_data_processed_idx = js_data_idx;
-    return data_missed;
-}
-
-void HttpFlowData::release_js_ctx()
-{
-    js_continue = false;
-
-    if (!js_normalizer)
-        return;
-
-    delete js_normalizer;
-    js_normalizer = nullptr;
-
-    debug_log(4, http_trace, TRACE_JS_PROC, nullptr,
-        "js_normalizer deleted\n");
-}
-#else
-void HttpFlowData::reset_js_ident_ctx() {}
-snort::JSNormalizer& HttpFlowData::acquire_js_ctx(const HttpParaList::JsNormParam&)
-{ return *js_normalizer; }
-void HttpFlowData::release_js_ctx() {}
-#endif
-
 bool HttpFlowData::add_to_pipeline(HttpTransaction* latest)
 {
     if (pipeline == nullptr)
index 203c08c5cd4a00ac37f574a69c0848e0c1613c86..2ae92e7153ac8e23d9be217e2465f118a6dc816e 100644 (file)
 #include "http_module.h"
 
 class HttpTransaction;
-class HttpJsNorm;
+class HttpJSNorm;
 class HttpMsgSection;
 class HttpCutter;
 class HttpQueryParser;
-class JSIdentifierCtxBase;
 
 namespace snort
 {
-class JSNormalizer;
 class MimeSession;
 }
 
@@ -58,7 +56,7 @@ public:
 
     friend class HttpBodyCutter;
     friend class HttpInspect;
-    friend class HttpJsNorm;
+    friend class HttpJSNorm;
     friend class HttpMsgSection;
     friend class HttpMsgStart;
     friend class HttpMsgRequest;
@@ -209,22 +207,7 @@ private:
     HttpTransaction* take_from_pipeline();
     void delete_pipeline();
 
-    bool js_data_lost_once = false;
-    uint32_t js_data_idx = 0;
-    uint32_t js_data_processed_idx = 0;
-
-    // *** HttpJsNorm
-    JSIdentifierCtxBase* js_ident_ctx = nullptr;
-    snort::JSNormalizer* js_normalizer = nullptr;
-    bool js_continue = false;
-    bool js_built_in_event = false;
-
-    void reset_js_data_idx();
-    void reset_js_ident_ctx();
-    snort::JSNormalizer& acquire_js_ctx(const HttpParaList::JsNormParam& js_norm_param);
-    void release_js_ctx();
-    bool sync_js_data_idx();
-
+    HttpJSNorm* js_ctx[2] = { nullptr, nullptr };
     bool cutover_on_clear = false;
     bool ssl_search_abandoned = false;
 
index 677ed8c6e302a8b19c4bf3b18bdfe1ee0dce7cf1..1ca5d18642e9c08f481473a8980f716a27593e36 100755 (executable)
@@ -140,7 +140,7 @@ HttpInspect::~HttpInspect()
 
 bool HttpInspect::configure(SnortConfig* )
 {
-    params->js_norm_param.js_norm->configure();
+    params->js_norm_param.configure();
     params->mime_decode_conf->sync_all_depths();
 
     return true;
@@ -154,14 +154,6 @@ void HttpInspect::show(const SnortConfig*) const
     auto bad_chars = GetBadChars(params->uri_param.bad_characters);
     auto xff_headers = GetXFFHeaders(params->xff_headers);
 
-    std::string js_norm_ident_ignore;
-    for (auto s : params->js_norm_param.ignored_ids)
-        js_norm_ident_ignore += s + " ";
-
-    std::string js_norm_prop_ignore;
-    for (auto s : params->js_norm_param.ignored_props)
-        js_norm_prop_ignore += s + " ";
-
     ConfigLogger::log_limit("request_depth", params->request_depth, -1);
     ConfigLogger::log_limit("response_depth", params->response_depth, -1);
     ConfigLogger::log_flag("unzip", params->unzip);
@@ -173,16 +165,8 @@ void HttpInspect::show(const SnortConfig*) const
     ConfigLogger::log_value("max_mime_attach", params->max_mime_attach);
     ConfigLogger::log_flag("script_detection", params->script_detection);
     ConfigLogger::log_flag("normalize_javascript", params->js_norm_param.normalize_javascript);
-    ConfigLogger::log_value("max_javascript_whitespaces", params->js_norm_param.max_javascript_whitespaces);
-    ConfigLogger::log_value("js_norm_bytes_depth", params->js_norm_param.js_norm_bytes_depth);
-    ConfigLogger::log_value("js_norm_identifier_depth", params->js_norm_param.js_identifier_depth);
-    ConfigLogger::log_value("js_norm_max_tmpl_nest", params->js_norm_param.max_template_nesting);
-    ConfigLogger::log_value("js_norm_max_bracket_depth", params->js_norm_param.max_bracket_depth);
-    ConfigLogger::log_value("js_norm_max_scope_depth", params->js_norm_param.max_scope_depth);
-    if (!js_norm_ident_ignore.empty())
-        ConfigLogger::log_list("js_norm_ident_ignore", js_norm_ident_ignore.c_str());
-    if (!js_norm_prop_ignore.empty())
-        ConfigLogger::log_list("js_norm_prop_ignore", js_norm_prop_ignore.c_str());
+    ConfigLogger::log_value("max_javascript_whitespaces",
+        params->js_norm_param.max_javascript_whitespaces);
     ConfigLogger::log_value("bad_characters", bad_chars.c_str());
     ConfigLogger::log_value("ignore_unreserved", unreserved_chars.c_str());
     ConfigLogger::log_flag("percent_u", params->uri_param.percent_u);
@@ -244,6 +228,9 @@ bool HttpInspect::get_buf(InspectionBuffer::Type ibt, Packet* p, InspectionBuffe
     case InspectionBuffer::IBT_VBA:
         return get_buf(BUFFER_VBA_DATA, p, b);
 
+    case InspectionBuffer::IBT_JS_DATA:
+        return get_buf(BUFFER_JS_DATA, p, b);
+
     default:
         assert(false);
         return false;
@@ -352,7 +339,7 @@ void HttpInspect::set_hx_body_state(snort::Flow* flow, HttpCommon::SourceId sour
 
 bool HttpInspect::get_fp_buf(InspectionBuffer::Type ibt, Packet* p, InspectionBuffer& b)
 {
-    assert(ibt == InspectionBuffer::IBT_VBA);
+    assert(ibt == InspectionBuffer::IBT_VBA or ibt == InspectionBuffer::IBT_JS_DATA);
 
     if (get_latest_is(p) == PS_NONE)
         return false;
index 702669558114a19e50bf679d87dffb22df739bd9..58b66214807bbfad18d633b5121c2e275561d57c 100644 (file)
@@ -16,6 +16,7 @@
 // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 //--------------------------------------------------------------------------
 // http_js_norm.cc author Tom Peters <thopeter@cisco.com>
+// http_js_norm.cc author Oleksandr Serhiienko <oserhiie@cisco.com>
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 
 #include "http_js_norm.h"
 
+#include "js_norm/js_enum.h"
+#include "js_norm/js_normalizer.h"
 #include "trace/trace_api.h"
-#include "utils/js_normalizer.h"
 #include "utils/safec.h"
 #include "utils/util_jsnorm.h"
 
-#include "http_common.h"
-#include "http_enum.h"
-
 using namespace HttpEnums;
+using namespace jsn;
 using namespace snort;
 
-static const char* jsret_codes[] =
-{
-    "end of stream",
-    "script ended",
-    "script continues",
-    "closing tag",
-    "bad token",
-    "identifier overflow",
-    "template nesting overflow",
-    "bracket nesting overflow",
-    "scope nesting overflow",
-    "wrong closing symbol",
-    "ended in inner scope",
-    "unknown"
-};
+extern THREAD_LOCAL const snort::Trace* js_trace;
 
-static const char* ret2str(JSTokenizer::JSRet ret)
-{
-    assert(ret < JSTokenizer::JSRet::MAX);
-    ret = ret < JSTokenizer::JSRet::MAX ? ret : JSTokenizer::JSRet::MAX;
-    return jsret_codes[ret];
-}
+enum AttrId { AID_OPEN, AID_SLASH, AID_GT, AID_SRC, AID_JS, AID_NON_JS, AID_ECMA, AID_VB };
 
-static inline JSTokenizer::JSRet js_normalize(JSNormalizer& ctx, const Packet* current_packet,
-    const char* const end, const char*& ptr, bool external_script)
+struct MatchContext
 {
-    trace_logf(3, http_trace, TRACE_JS_DUMP, current_packet,
-        "original[%zu]: %.*s\n", end - ptr, static_cast<int>(end - ptr), ptr);
-
-    auto ret = ctx.normalize(ptr, end - ptr, external_script);
-    auto src_next = ctx.get_src_next();
+    const uint8_t* next = nullptr;
+    bool is_javascript = true;
+    bool is_external = false;
+    bool is_shortened = false;
+};
 
-    trace_logf(3, http_trace, TRACE_JS_PROC, current_packet,
-        "normalizer returned with %d '%s'\n", ret, ret2str(ret));
+SearchTool* js_create_mpse_open_tag()
+{
+    constexpr const char* otag_start = "<SCRIPT";
+    SearchTool* mpse = new SearchTool;
 
-    if (src_next > ptr)
-        HttpModule::increment_peg_counts(PEG_JS_BYTES, src_next - ptr);
-    else
-        src_next = end; // Normalizer has failed, thus aborting the remaining input
+    mpse->add(otag_start, strlen(otag_start), AID_OPEN);
 
-    ptr = src_next;
+    mpse->prep();
 
-    return ret;
+    return mpse;
 }
 
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_,
-    const HttpParaList::JsNormParam& js_norm_param_) :
-    uri_param(uri_param_),
-    js_norm_param(js_norm_param_),
-    mpse_otag(nullptr),
-    mpse_attr(nullptr),
-    mpse_type(nullptr)
-{}
-
-HttpJsNorm::~HttpJsNorm()
+SearchTool* js_create_mpse_tag_type()
 {
-    delete mpse_otag;
-    delete mpse_attr;
-    delete mpse_type;
-}
+    constexpr const char* attr_js = "JAVASCRIPT";
+    constexpr const char* attr_ecma = "ECMASCRIPT";
+    constexpr const char* attr_vb = "VBSCRIPT";
+    SearchTool* mpse = new SearchTool;
 
-void HttpJsNorm::configure()
-{
-    if (configure_once)
-        return;
+    mpse->add(attr_js, strlen(attr_js), AID_JS);
+    mpse->add(attr_ecma, strlen(attr_ecma), AID_ECMA);
+    mpse->add(attr_vb, strlen(attr_vb), AID_VB);
 
-    mpse_otag = new SearchTool;
-    mpse_attr = new SearchTool;
-    mpse_type = new SearchTool;
+    mpse->prep();
 
-    static constexpr const char* otag_start = "<SCRIPT";
-    static constexpr const char* attr_slash = "/";
-    static constexpr const char* attr_gt = ">";
-    static constexpr const char* attr_src = "SRC";
+    return mpse;
+}
 
-    static constexpr const char* attr_js = "JAVASCRIPT";    // legacy only
-    static constexpr const char* attr_ecma = "ECMASCRIPT";  // legacy only
-    static constexpr const char* attr_vb = "VBSCRIPT";      // legacy only
+SearchTool* js_create_mpse_tag_attr()
+{
+    constexpr const char* attr_slash = "/";
+    constexpr const char* attr_gt = ">";
+    constexpr const char* attr_src = "SRC";
 
-    static constexpr const size_t attrs_js_size = 15;
-    static constexpr const char* attrs_js[attrs_js_size] =
+    constexpr const char* attrs_js[] =
     {
         "APPLICATION/JAVASCRIPT",
         "APPLICATION/ECMASCRIPT",
@@ -132,311 +98,113 @@ void HttpJsNorm::configure()
         "TEXT/X-ECMASCRIPT",
         "TEXT/JSCRIPT"
     };
+    constexpr const size_t attrs_js_size = sizeof(attrs_js) / sizeof(attrs_js[0]);
 
-    static constexpr const size_t attrs_non_js_size = 2;
-    static constexpr const char* attrs_non_js[attrs_non_js_size] =
+    constexpr const char* attrs_non_js[] =
     {
         "TEXT/VBSCRIPT",
         "APPLICATION/JSON"
     };
+    constexpr const size_t attrs_non_js_size = sizeof(attrs_non_js) / sizeof(attrs_non_js[0]);
 
-    mpse_otag->add(otag_start, strlen(otag_start), 0);
+    SearchTool* mpse = new SearchTool;
 
-    mpse_attr->add(attr_slash, strlen(attr_slash), AID_SLASH);
-    mpse_attr->add(attr_gt, strlen(attr_gt), AID_GT);
-    mpse_attr->add(attr_src, strlen(attr_src), AID_SRC);
+    mpse->add(attr_slash, strlen(attr_slash), AID_SLASH);
+    mpse->add(attr_gt, strlen(attr_gt), AID_GT);
+    mpse->add(attr_src, strlen(attr_src), AID_SRC);
 
     for (unsigned i = 0; i < attrs_js_size; ++i)
-        mpse_attr->add(attrs_js[i], strlen(attrs_js[i]), AID_JS);
+        mpse->add(attrs_js[i], strlen(attrs_js[i]), AID_JS);
 
     for (unsigned i = 0; i < attrs_non_js_size; ++i)
-        mpse_attr->add(attrs_non_js[i], strlen(attrs_non_js[i]), AID_NON_JS);
+        mpse->add(attrs_non_js[i], strlen(attrs_non_js[i]), AID_NON_JS);
 
-    mpse_type->add(attr_js, strlen(attr_js), AID_JS);
-    mpse_type->add(attr_ecma, strlen(attr_ecma), AID_ECMA);
-    mpse_type->add(attr_vb, strlen(attr_vb), AID_VB);
+    mpse->prep();
 
-    mpse_otag->prep();
-    mpse_attr->prep();
-    mpse_type->prep();
-
-    configure_once = true;
+    return mpse;
 }
 
-void HttpJsNorm::do_external(const Field& input, Field& output,
-    HttpInfractions* infractions, HttpFlowData* ssn, bool final_portion) const
+static int match_script(void*, void*, int index, void* index_ptr, void*)
 {
-    if (ssn->js_built_in_event)
-        return;
-    const Packet* current_packet = DetectionEngine::get_current_packet();
-    const char* ptr = (const char*)input.start();
-    const char* const end = ptr + input.length();
-
-    HttpEventGen* events = ssn->events[HttpCommon::SRC_SERVER];
-
-    if (!alive_ctx(ssn))
-    {
-        HttpModule::increment_peg_counts(PEG_JS_EXTERNAL);
-        trace_logf(2, http_trace, TRACE_JS_PROC, current_packet,
-            "script starts\n");
-    }
-    else
-        trace_logf(2, http_trace, TRACE_JS_PROC, current_packet,
-            "script continues\n");
-
-    auto& js_ctx = ssn->acquire_js_ctx(js_norm_param);
-
-    while (ptr < end)
-    {
-        trace_logf(1, http_trace, TRACE_JS_PROC, current_packet,
-            "external script at %zd offset\n", ptr - (const char*)input.start());
-
-        auto ret = js_normalize(js_ctx, current_packet, end, ptr, true);
-
-        switch (ret)
-        {
-        case JSTokenizer::EOS:
-        case JSTokenizer::SCRIPT_CONTINUE:
-            break;
-        case JSTokenizer::SCRIPT_ENDED:
-        case JSTokenizer::CLOSING_TAG:
-            assert(false); // should not be present in external
-            break;
-        case JSTokenizer::BAD_TOKEN:
-        case JSTokenizer::WRONG_CLOSING_SYMBOL:
-        case JSTokenizer::ENDED_IN_INNER_SCOPE:
-            *infractions += INF_JS_BAD_TOKEN;
-            events->create_event(EVENT_JS_BAD_TOKEN);
-            ssn->js_built_in_event = true;
-            break;
-        case JSTokenizer::IDENTIFIER_OVERFLOW:
-            HttpModule::increment_peg_counts(PEG_JS_IDENTIFIER_OVERFLOW);
-            *infractions += INF_JS_IDENTIFIER_OVERFLOW;
-            events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW);
-            ssn->js_built_in_event = true;
-            break;
-        case JSTokenizer::TEMPLATE_NESTING_OVERFLOW:
-        case JSTokenizer::BRACKET_NESTING_OVERFLOW:
-            *infractions += INF_JS_BRACKET_NEST_OVERFLOW;
-            events->create_event(EVENT_JS_BRACKET_NEST_OVERFLOW);
-            ssn->js_built_in_event = true;
-            break;
-        case JSTokenizer::SCOPE_NESTING_OVERFLOW:
-            *infractions += INF_JS_SCOPE_NEST_OVERFLOW;
-            events->create_event(EVENT_JS_SCOPE_NEST_OVERFLOW);
-            ssn->js_built_in_event = true;
-            break;
-        default:
-            assert(false);
-            break;
-        }
-
-        if (js_ctx.is_unescape_nesting_seen())
-        {
-            *infractions += INF_JS_OBFUSCATION_EXCD;
-            events->create_event(EVENT_JS_OBFUSCATION_EXCD);
-        }
-        if (js_ctx.is_mixed_encoding_seen())
-        {
-            *infractions += INF_MIXED_ENCODINGS;
-            events->create_event(EVENT_MIXED_ENCODINGS);
-        }
-        if (js_ctx.is_closing_tag_seen())
-        {
-            *infractions += INF_JS_CLOSING_TAG;
-            events->create_event(EVENT_JS_CLOSING_TAG);
-        }
-        if (js_ctx.is_buffer_adjusted())
-            output.set_accumulation(true);
-
-        if (ssn->js_built_in_event)
-            break;
-    }
-
-    debug_logf(4, http_trace, TRACE_JS_PROC, current_packet,
-        "input data was %s\n", final_portion ? "last one in PDU" : "a part of PDU");
-
-    uint32_t data_len = js_ctx.script_size();
-
-    if (data_len)
-    {
-        const char* data = final_portion ? js_ctx.take_script() : js_ctx.get_script();
-
-        if (data)
-        {
-            trace_logf(1, http_trace, TRACE_JS_DUMP, current_packet,
-                       "js_data[%u]: %.*s\n", data_len, data_len, data);
-
-            output.set(data_len, (const uint8_t*)data, final_portion);
-        }
-    }
+    static constexpr int script_start_length = sizeof("<SCRIPT") - 1;
+    *((int*) index_ptr) = index - script_start_length;
+    return 1;
 }
 
-void HttpJsNorm::do_inline(const Field& input, Field& output,
-    HttpInfractions* infractions, HttpFlowData* ssn, bool final_portion) const
+static int match_html(void* id, void*, int, void* id_ptr, void*)
 {
-    const Packet* current_packet = DetectionEngine::get_current_packet();
-    const char* ptr = (const char*)input.start();
-    const char* const end = ptr + input.length();
+    *((int*) id_ptr)  = (int)(uintptr_t)id;
+    return 1;
+}
 
-    HttpEventGen* events = ssn->events[HttpCommon::SRC_SERVER];
+static int match_otag(void*, void*, int index, void* ptr, void*)
+{
+    *(uint8_t**)ptr += index;
+    return 1;
+}
 
-    bool script_continue = ssn->js_continue;
-    bool script_external = false;
+static int match_attr(void* pid, void*, int index, void* sctx, void*)
+{
+    MatchContext* ctx = (MatchContext*)sctx;
+    AttrId id = (AttrId)(uintptr_t)pid;
+    const char* c;
 
-    while (ptr < end)
+    switch (id)
     {
-        if (!script_continue)
-        {
-            if (!mpse_otag->find(ptr, end - ptr, match_otag, false, &ptr))
-                break;
-            if (ptr >= end)
-                break;
-
-            MatchContext sctx = {ptr, true, false, false};
-
-            if (ptr[0] == '>')
-                ptr++;
-            else
-            {
-                if (!mpse_attr->find(ptr, end - ptr, match_attr, false, &sctx) || ptr == sctx.next)
-                    break; // the opening tag never ends
-                ptr = sctx.next;
-            }
-
-            trace_logf(1, http_trace, TRACE_JS_PROC, current_packet,
-                "opening tag at %zd offset\n", ptr - (const char*)input.start());
-
-            trace_logf(2, http_trace, TRACE_JS_PROC, current_packet,
-                "script attributes [%s, %s, %s]\n",
-                sctx.is_shortened ? "shortened form" : "full form",
-                sctx.is_javascript ? "JavaScript type" : "unknown type",
-                sctx.is_external ? "external source" : "inline");
-
-            if (sctx.is_shortened)
-            {
-                *infractions += INF_JS_SHORTENED_TAG;
-                events->create_event(EVENT_JS_SHORTENED_TAG);
-                continue;
-            }
-
-            if (!sctx.is_javascript)
-                continue;
-
-            script_external = sctx.is_external;
-
-            // script found
-            if (!script_external)
-                HttpModule::increment_peg_counts(PEG_JS_INLINE);
-        }
-
-        auto& js_ctx = ssn->acquire_js_ctx(js_norm_param);
-        auto output_size_before = js_ctx.script_size();
-
-        auto ret = js_normalize(js_ctx, current_packet, end, ptr, false);
-
-        switch (ret)
-        {
-        case JSTokenizer::EOS:
-            js_ctx.reset_depth();
-            break;
-        case JSTokenizer::SCRIPT_ENDED:
-            break;
-        case JSTokenizer::SCRIPT_CONTINUE:
-            break;
-        case JSTokenizer::CLOSING_TAG:
-            *infractions += INF_JS_CLOSING_TAG;
-            events->create_event(EVENT_JS_CLOSING_TAG);
-            break;
-        case JSTokenizer::BAD_TOKEN:
-        case JSTokenizer::WRONG_CLOSING_SYMBOL:
-        case JSTokenizer::ENDED_IN_INNER_SCOPE:
-            *infractions += INF_JS_BAD_TOKEN;
-            events->create_event(EVENT_JS_BAD_TOKEN);
-            break;
-        case JSTokenizer::IDENTIFIER_OVERFLOW:
-            HttpModule::increment_peg_counts(PEG_JS_IDENTIFIER_OVERFLOW);
-            *infractions += INF_JS_IDENTIFIER_OVERFLOW;
-            events->create_event(EVENT_JS_IDENTIFIER_OVERFLOW);
-            break;
-        case JSTokenizer::TEMPLATE_NESTING_OVERFLOW:
-        case JSTokenizer::BRACKET_NESTING_OVERFLOW:
-            *infractions += INF_JS_BRACKET_NEST_OVERFLOW;
-            events->create_event(EVENT_JS_BRACKET_NEST_OVERFLOW);
-            break;
-        case JSTokenizer::SCOPE_NESTING_OVERFLOW:
-            *infractions += INF_JS_SCOPE_NEST_OVERFLOW;
-            events->create_event(EVENT_JS_SCOPE_NEST_OVERFLOW);
-            break;
-        default:
-            assert(false);
-            break;
-        }
-
-        if (script_external && output_size_before != js_ctx.script_size())
-        {
-            *infractions += INF_JS_CODE_IN_EXTERNAL;
-            events->create_event(EVENT_JS_CODE_IN_EXTERNAL);
-        }
-        if (js_ctx.is_unescape_nesting_seen())
-        {
-            *infractions += INF_JS_OBFUSCATION_EXCD;
-            events->create_event(EVENT_JS_OBFUSCATION_EXCD);
-        }
-        if (js_ctx.is_mixed_encoding_seen())
+    case AID_SLASH:
+        if (*(ctx->next + index) == '>')
         {
-            *infractions += INF_MIXED_ENCODINGS;
-            events->create_event(EVENT_MIXED_ENCODINGS);
+            ctx->is_shortened = true;
+            ctx->next += index;
+            return 1;
         }
-        if (js_ctx.is_opening_tag_seen())
+        else
         {
-            *infractions += INF_JS_OPENING_TAG;
-            events->create_event(EVENT_JS_OPENING_TAG);
+            ctx->is_shortened = false;
+            return 0;
         }
-        if (js_ctx.is_buffer_adjusted())
-            output.set_accumulation(true);
-
-        script_continue = ret == JSTokenizer::SCRIPT_CONTINUE;
-    }
-
-    ssn->js_continue = script_continue;
-
-    if (!alive_ctx(ssn))
-        return;
 
-    debug_logf(4, http_trace, TRACE_JS_PROC, current_packet,
-        "input data was %s\n", final_portion ? "last one in PDU" : "a part of PDU");
+    case AID_GT:
+        ctx->next += index;
+        return 1;
 
-    auto js_ctx = ssn->js_normalizer;
-    uint32_t data_len = js_ctx->script_size();
+    case AID_SRC:
+        c = (const char*)ctx->next + index;
+        while (*c == ' ') c++;
+        ctx->is_external = ctx->is_external || *c == '=';
+        return 0;
 
-    if (data_len)
-    {
-        const char* data = final_portion ? js_ctx->take_script() : js_ctx->get_script();
+    case AID_JS:
+        ctx->is_javascript = true;
+        return 0;
 
-        if (data)
-        {
-            trace_logf(1, http_trace, TRACE_JS_DUMP, current_packet,
-                       "js_data[%u]: %.*s\n", data_len, data_len, data);
+    case AID_NON_JS:
+        ctx->is_javascript = false;
+        return 0;
 
-            output.set(data_len, (const uint8_t*)data, final_portion);
-        }
+    default:
+        assert(false);
+        return 1;
     }
-
-    if (!script_continue && final_portion)
-        ssn->release_js_ctx();
 }
 
-void HttpJsNorm::do_legacy(const Field& input, Field& output, HttpInfractions* infractions,
-    HttpEventGen* events, int max_javascript_whitespaces) const
+void js_normalize(const Field& input, Field& output,
+    const HttpParaList* params, HttpInfractions* inf, HttpEventGen* events)
 {
+    assert(params);
+    assert(inf);
+    assert(events);
+
     bool js_present = false;
     int index = 0;
     const char* ptr = (const char*)input.start();
     const char* const end = ptr + input.length();
+    auto mpse_otag = params->js_norm_param.mpse_otag;
+    auto mpse_type = params->js_norm_param.mpse_type;
+    auto& uri_param = params->uri_param;
 
     JSState js;
-    js.allowed_spaces = max_javascript_whitespaces;
+    js.allowed_spaces = params->js_norm_param.max_javascript_whitespaces;
     js.allowed_levels = MAX_ALLOWED_OBFUSCATION;
     js.alerts = 0;
 
@@ -448,7 +216,7 @@ void HttpJsNorm::do_legacy(const Field& input, Field& output, HttpInfractions* i
         int mindex;
 
         // Search for beginning of a javascript
-        if (mpse_otag->find(ptr, end-ptr, search_js_found, false, &mindex) > 0)
+        if (mpse_otag->find(ptr, end-ptr, match_script, false, &mindex) > 0)
         {
             const char* js_start = ptr + mindex;
             const char* const angle_bracket =
@@ -461,7 +229,7 @@ void HttpJsNorm::do_legacy(const Field& input, Field& output, HttpInfractions* i
             {
                 int mid;
                 const int script_found = mpse_type->find(
-                    js_start, (angle_bracket-js_start), search_html_found, false, &mid);
+                    js_start, (angle_bracket-js_start), match_html, false, &mid);
 
                 js_start = angle_bracket + 1;
                 if (script_found > 0)
@@ -518,17 +286,17 @@ void HttpJsNorm::do_legacy(const Field& input, Field& output, HttpInfractions* i
         {
             if (js.alerts & ALERT_LEVELS_EXCEEDED)
             {
-                *infractions += INF_JS_OBFUSCATION_EXCD;
+                *inf += INF_JS_OBFUSCATION_EXCD;
                 events->create_event(EVENT_JS_OBFUSCATION_EXCD);
             }
             if (js.alerts & ALERT_SPACES_EXCEEDED)
             {
-                *infractions += INF_JS_EXCESS_WS;
+                *inf += INF_JS_EXCESS_WS;
                 events->create_event(EVENT_JS_EXCESS_WS);
             }
             if (js.alerts & ALERT_MIXED_ENCODINGS)
             {
-                *infractions += INF_MIXED_ENCODINGS;
+                *inf += INF_MIXED_ENCODINGS;
                 events->create_event(EVENT_MIXED_ENCODINGS);
             }
         }
@@ -541,68 +309,140 @@ void HttpJsNorm::do_legacy(const Field& input, Field& output, HttpInfractions* i
     }
 }
 
-int HttpJsNorm::search_js_found(void*, void*, int index, void* index_ptr, void*)
+void HttpJSNorm::flush_data(const void*& data, size_t& len)
 {
-    static constexpr int script_start_length = sizeof("<SCRIPT") - 1;
-    *((int*) index_ptr) = index - script_start_length;
-    return 1;
+    len = jsn_ctx->script_size();
+    data = jsn_ctx->take_script();
 }
 
-int HttpJsNorm::search_html_found(void* id, void*, int, void* id_ptr, void*)
+bool HttpInlineJSNorm::pre_proc()
 {
-    *((int*) id_ptr)  = (int)(uintptr_t)id;
-    return 1;
+    assert(mpse_otag);
+    assert(mpse_attr);
+    assert(http_events);
+    assert(infractions);
+
+    if ((*infractions & INF_UNKNOWN_ENCODING) or (*infractions & INF_UNSUPPORTED_ENCODING))
+        return false;
+
+    if (src_ptr >= src_end)
+        return false;
+
+    const Packet* packet = DetectionEngine::get_current_packet();
+
+    if (!script_continue)
+    {
+        while (true)
+        {
+            if (!mpse_otag->find((const char*)src_ptr, src_end - src_ptr, match_otag, false, &src_ptr)
+                || src_ptr >= src_end)
+            {
+                return false;
+            }
+
+            MatchContext sctx = {src_ptr, true, false, false};
+
+            if (!mpse_attr->find((const char*)src_ptr, src_end - src_ptr, match_attr, false, &sctx)
+                || src_ptr == sctx.next || sctx.next >= src_end)
+            {
+                return false;
+            }
+
+            src_ptr = sctx.next;
+
+            trace_logf(1, js_trace, TRACE_PROC, packet,
+                "opening tag at %zd offset\n", src_ptr - page_start);
+
+            trace_logf(2, js_trace, TRACE_PROC, packet,
+                "script attributes [%s, %s, %s]\n",
+                sctx.is_shortened ? "shortened form" : "full form",
+                sctx.is_javascript ? "JavaScript type" : "unknown type",
+                sctx.is_external ? "external source" : "inline");
+
+            if (sctx.is_shortened)
+            {
+                *infractions += INF_JS_SHORTENED_TAG;
+                http_events->create_event(EVENT_JS_SHORTENED_TAG);
+                continue;
+            }
+
+            if (!sctx.is_javascript)
+                continue;
+
+            ext_ref_type = sctx.is_external;
+
+            break;
+        }
+
+        if (!ext_ref_type)
+            HttpModule::increment_peg_counts(PEG_JS_INLINE);
+    }
+
+    ext_script_type = false;
+    output_size = jsn_ctx->script_size();
+
+    trace_logf(3, js_trace, TRACE_DUMP, packet,
+        "original[%zu]: %.*s\n", src_end - src_ptr, (int)(src_end - src_ptr), src_ptr);
+
+    return true;
 }
 
-int HttpJsNorm::match_otag(void*, void*, int index, void* ptr, void*)
+bool HttpInlineJSNorm::post_proc(int ret)
 {
-    *(char**)ptr += index;
-    return 1;
+    trace_logf(3, js_trace, TRACE_PROC, DetectionEngine::get_current_packet(),
+        "normalizer returned with %d '%s'\n", ret, jsn::ret2str(ret));
+
+    assert(http_events);
+    assert(infractions);
+
+    if (ext_ref_type && output_size != jsn_ctx->script_size())
+    {
+        *infractions += INF_JS_CODE_IN_EXTERNAL;
+        http_events->create_event(EVENT_JS_CODE_IN_EXTERNAL);
+    }
+
+    script_continue = ret == (int)jsn::JSTokenizer::SCRIPT_CONTINUE;
+
+    if (!script_continue)
+        jsn_ctx->reset_depth();
+
+    JSNorm::post_proc(ret);
+
+    return true; // reuse context
 }
 
-int HttpJsNorm::match_attr(void* pid, void*, int index, void* sctx, void*)
+bool HttpExternalJSNorm::pre_proc()
 {
-    MatchContext* ctx = (MatchContext*)sctx;
-    AttrId id = (AttrId)(uintptr_t)pid;
-    const char* c;
+    if (src_ptr >= src_end)
+        return false;
 
-    switch (id)
+    const Packet* packet = DetectionEngine::get_current_packet();
+
+    if (!ext_script_type)
     {
-    case AID_SLASH:
-        if (*(ctx->next + index) == '>')
-        {
-            ctx->is_shortened = true;
-            ctx->next += index;
-            return 1;
-        }
-        else
-        {
-            ctx->is_shortened = false;
-            return 0;
-        }
+        HttpModule::increment_peg_counts(PEG_JS_EXTERNAL);
+        trace_logf(1, js_trace, TRACE_PROC, packet,
+            "external script starts\n");
+        ext_script_type = true;
+    }
+    else
+    {
+        trace_logf(2, js_trace, TRACE_PROC, packet,
+            "script continues\n");
+    }
 
-    case AID_GT:
-        ctx->next += index;
-        return 1;
+    trace_logf(3, js_trace, TRACE_DUMP, packet,
+        "original[%zu]: %.*s\n", src_end - src_ptr, (int)(src_end - src_ptr), src_ptr);
 
-    case AID_SRC:
-        c = ctx->next + index;
-        while (*c == ' ') c++;
-        ctx->is_external = ctx->is_external || *c == '=';
-        return 0;
+    return true;
+}
 
-    case AID_JS:
-        ctx->is_javascript = true;
-        return 0;
+bool HttpExternalJSNorm::post_proc(int ret)
+{
+    trace_logf(3, js_trace, TRACE_PROC, DetectionEngine::get_current_packet(),
+        "normalizer returned with %d '%s'\n", ret, jsn::ret2str(ret));
 
-    case AID_NON_JS:
-        ctx->is_javascript = false;
-        return 0;
+    script_continue = ret == (int)jsn::JSTokenizer::SCRIPT_CONTINUE;
 
-    default:
-        assert(false);
-        ctx->is_external = false;
-        ctx->is_javascript = false;
-        return 1;
-    }
+    return JSNorm::post_proc(ret);
 }
index 90094afc34ebdac844e684bcc391b455da9e32f6..ee61c14e0b8c5d6914e8d6b04eea6cd386e26f10 100644 (file)
 // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 //--------------------------------------------------------------------------
 // http_js_norm.h author Tom Peters <thopeter@cisco.com>
+// http_js_norm.h author Oleksandr Serhiienko <oserhiie@cisco.com>
 
 #ifndef HTTP_JS_NORM_H
 #define HTTP_JS_NORM_H
 
 #include <cstring>
 
+#include "js_norm/js_norm.h"
 #include "search_engines/search_tool.h"
 
 #include "http_field.h"
 #include "http_event.h"
 #include "http_module.h"
 
-//-------------------------------------------------------------------------
-// HttpJsNorm class
-//-------------------------------------------------------------------------
+snort::SearchTool* js_create_mpse_open_tag();
+snort::SearchTool* js_create_mpse_tag_type();
+snort::SearchTool* js_create_mpse_tag_attr();
 
-class HttpJsNorm
+void js_normalize(const Field& input, Field& output, const HttpParaList*, HttpInfractions*, HttpEventGen*);
+
+class HttpJSNorm : public snort::JSNorm
 {
 public:
-    HttpJsNorm(const HttpParaList::UriParam& uri_param_,
-        const HttpParaList::JsNormParam& js_norm_param_);
-    ~HttpJsNorm();
+    HttpJSNorm(JSNormConfig* jsn_config) : snort::JSNorm(jsn_config) {}
 
-    void do_legacy(const Field& input, Field& output, HttpInfractions*, HttpEventGen*,
-        int max_javascript_whitespaces) const;
-    void do_inline(const Field& input, Field& output, HttpInfractions*, HttpFlowData*, bool) const;
-    void do_external(const Field& input, Field& output, HttpInfractions*, HttpFlowData*, bool) const;
+    void flush_data(const void*&, size_t&);
 
-    void configure();
+    void link(const void* page, HttpEventGen* http_events_, HttpInfractions* infs)
+    { page_start = (const uint8_t*)page; http_events = http_events_; infractions = infs; }
 
-private:
-    enum AttrId { AID_SLASH, AID_GT, AID_SRC, AID_JS, AID_NON_JS, AID_ECMA, AID_VB };
+    uint64_t get_trans_num() const
+    { return trans_num; }
 
-    struct MatchContext
-    {
-        const char* next;
-        bool is_javascript;
-        bool is_external;
-        bool is_shortened;
-    };
+protected:
+    const uint8_t* page_start = nullptr;
+    HttpEventGen* http_events = nullptr;
+    HttpInfractions* infractions = nullptr;
+    uint64_t trans_num = 0;
+    bool script_continue = false;
+};
+
+class HttpInlineJSNorm : public HttpJSNorm
+{
+public:
+    HttpInlineJSNorm(JSNormConfig* jsn_config, uint64_t tid, snort::SearchTool* mpse_open_tag,
+        snort::SearchTool* mpse_tag_attr) :
+        HttpJSNorm(jsn_config), mpse_otag(mpse_open_tag), mpse_attr(mpse_tag_attr), output_size(0), ext_ref_type(false)
+    { trans_num = tid; }
 
-    const HttpParaList::UriParam& uri_param;
-    const HttpParaList::JsNormParam& js_norm_param;
-    bool configure_once = false;
+protected:
+    bool pre_proc() override;
+    bool post_proc(int) override;
 
+private:
     snort::SearchTool* mpse_otag;
     snort::SearchTool* mpse_attr;
-    snort::SearchTool* mpse_type; // legacy only
+    size_t output_size;
+    bool ext_ref_type;
+};
 
-    static int search_js_found(void*, void*, int index, void*, void*);  // legacy only
-    static int search_html_found(void* id, void*, int, void*, void*); // legacy only
-    static int match_otag(void*, void*, int, void*, void*);
-    static int match_attr(void*, void*, int, void*, void*);
+class HttpExternalJSNorm : public HttpJSNorm
+{
+public:
+    HttpExternalJSNorm(JSNormConfig* jsn_config, uint64_t tid) : HttpJSNorm(jsn_config)
+    { trans_num = tid; }
 
-    bool alive_ctx(const HttpFlowData* ssn) const
-    { return ssn->js_normalizer; }
+protected:
+    bool pre_proc() override;
+    bool post_proc(int) override;
 };
 
 #endif
index 28153a072462a72536bbad53e12572ed1096bb18..125c0630f1ed66e6bda28db63057301a18dc54ab 100755 (executable)
@@ -24,7 +24,6 @@
 #include "http_module.h"
 
 #include "log/messages.h"
-#include "trace/trace.h"
 
 #include "http_enum.h"
 #include "http_js_norm.h"
@@ -45,18 +44,6 @@ HttpModule::~HttpModule()
     LiteralSearch::cleanup(script_detection_handle);
 }
 
-static const Parameter js_norm_ident_ignore_param[] =
-{
-    { "ident_name", Parameter::PT_STRING, nullptr, nullptr, "name of the identifier to ignore" },
-    { nullptr, Parameter::PT_MAX, nullptr, nullptr, nullptr }
-};
-
-static const Parameter js_norm_prop_ignore_param[] =
-{
-    { "prop_name", Parameter::PT_STRING, nullptr, nullptr, "name of the object property to ignore" },
-    { nullptr, Parameter::PT_MAX, nullptr, nullptr, nullptr }
-};
-
 const Parameter HttpModule::http_params[] =
 {
     { "request_depth", Parameter::PT_INT, "-1:max53", "-1",
@@ -107,29 +94,6 @@ const Parameter HttpModule::http_params[] =
     { "normalize_javascript", Parameter::PT_BOOL, nullptr, "false",
       "use legacy normalizer to normalize JavaScript in response bodies" },
 
-    { "js_norm_bytes_depth", Parameter::PT_INT, "-1:max53", "-1",
-      "number of input JavaScript bytes to normalize (-1 unlimited)" },
-
-    // range of accepted identifier names is (var_0000:var_ffff), so the max is 2^16
-    { "js_norm_identifier_depth", Parameter::PT_INT, "0:65536", "65536",
-      "max number of unique JavaScript identifiers to normalize" },
-
-    { "js_norm_max_tmpl_nest", Parameter::PT_INT, "0:255", "32",
-      "maximum depth of template literal nesting that enhanced javascript normalizer "
-      "will process" },
-
-    { "js_norm_max_bracket_depth", Parameter::PT_INT, "1:65535", "256",
-      "maximum depth of bracket nesting that enhanced JavaScript normalizer will process" },
-
-    { "js_norm_max_scope_depth", Parameter::PT_INT, "1:65535", "256",
-      "maximum depth of scope nesting that enhanced JavaScript normalizer will process" },
-
-    { "js_norm_ident_ignore", Parameter::PT_LIST, js_norm_ident_ignore_param, nullptr,
-      "list of JavaScript ignored identifiers which will not be normalized" },
-
-    { "js_norm_prop_ignore", Parameter::PT_LIST, js_norm_prop_ignore_param, nullptr,
-      "list of JavaScript ignored object properties which will not be normalized" },
-
     { "max_javascript_whitespaces", Parameter::PT_INT, "1:65535", "200",
       "maximum consecutive whitespaces allowed within the JavaScript obfuscated data" },
 
@@ -217,25 +181,6 @@ ProfileStats* HttpModule::get_profile() const
 
 THREAD_LOCAL PegCount HttpModule::peg_counts[PEG_COUNT_MAX] = { };
 
-THREAD_LOCAL const Trace* http_trace = nullptr;
-
-static const TraceOption http_trace_options[] =
-{
-    { "js_proc",  TRACE_JS_PROC,  "enable JavaScript processing logging" },
-    { "js_dump",  TRACE_JS_DUMP,  "enable JavaScript data logging" },
-    { nullptr, 0, nullptr }
-};
-
-void HttpModule::set_trace(const Trace* trace) const
-{
-    http_trace = trace;
-}
-
-const TraceOption* HttpModule::get_trace_options() const
-{
-    return http_trace_options;
-}
-
 bool HttpModule::begin(const char* fqn, int, SnortConfig*)
 {
     if (strcmp(fqn, "http_inspect"))
@@ -320,34 +265,6 @@ bool HttpModule::set(const char*, Value& val, SnortConfig*)
     {
         params->js_norm_param.normalize_javascript = val.get_bool();
     }
-    else if (val.is("js_norm_identifier_depth"))
-    {
-        params->js_norm_param.js_identifier_depth = val.get_int32();
-    }
-    else if (val.is("js_norm_bytes_depth"))
-    {
-        params->js_norm_param.js_norm_bytes_depth = val.get_int64();
-    }
-    else if (val.is("js_norm_max_tmpl_nest"))
-    {
-        params->js_norm_param.max_template_nesting = val.get_uint8();
-    }
-    else if (val.is("js_norm_max_bracket_depth"))
-    {
-        params->js_norm_param.max_bracket_depth = val.get_uint32();
-    }
-    else if (val.is("js_norm_max_scope_depth"))
-    {
-        params->js_norm_param.max_scope_depth = val.get_uint32();
-    }
-    else if (val.is("ident_name"))
-    {
-        params->js_norm_param.ignored_ids.insert(val.get_string());
-    }
-    else if (val.is("prop_name"))
-    {
-        params->js_norm_param.ignored_props.insert(val.get_string());
-    }
     else if (val.is("max_javascript_whitespaces"))
     {
         params->js_norm_param.max_javascript_whitespaces = val.get_uint16();
@@ -536,8 +453,6 @@ bool HttpModule::end(const char* fqn, int, SnortConfig*)
                 params->uri_param.iis_unicode_code_page);
     }
 
-    params->js_norm_param.js_norm = new HttpJsNorm(params->uri_param, params->js_norm_param);
-
     params->script_detection_handle = script_detection_handle;
 
     prepare_http_header_list(params);
@@ -561,7 +476,16 @@ HttpParaList::~HttpParaList()
 
 HttpParaList::JsNormParam::~JsNormParam()
 {
-    delete js_norm;
+    delete mpse_otag;
+    delete mpse_type;
+    delete mpse_attr;
+}
+
+void HttpParaList::JsNormParam::configure() const
+{
+    mpse_otag = js_create_mpse_open_tag();
+    mpse_type = js_create_mpse_tag_type();
+    mpse_attr = js_create_mpse_tag_attr();
 }
 
 // Characters that should not be percent-encoded
index 46d8834d1c3e7791f45b66c64b3e5bfb05fd707d..78c878fb5398e746f821dbc1d18952acef138223 100755 (executable)
@@ -29,6 +29,7 @@
 #include "helpers/literal_search.h"
 #include "mime/file_mime_config.h"
 #include "profiler/profiler.h"
+#include "search_engines/search_tool.h"
 
 #include "http_enum.h"
 #include "http_str_to_code.h"
@@ -42,8 +43,6 @@ class Trace;
 struct SnortConfig;
 }
 
-extern THREAD_LOCAL const snort::Trace* http_trace;
-
 struct HttpParaList
 {
 public:
@@ -70,24 +69,21 @@ public:
 
     struct JsNormParam
     {
-    public:
         ~JsNormParam();
+
+        void configure() const;
+
         bool normalize_javascript = false;
-        int64_t js_norm_bytes_depth = -1;
-        int32_t js_identifier_depth = 0;
-        uint8_t max_template_nesting = 32;
-        uint32_t max_bracket_depth = 256;
-        uint32_t max_scope_depth = 256;
-        std::unordered_set<std::string> ignored_ids;
-        std::unordered_set<std::string> ignored_props;
         int max_javascript_whitespaces = 200;
-        class HttpJsNorm* js_norm = nullptr;
+
+        mutable snort::SearchTool* mpse_otag = nullptr;
+        mutable snort::SearchTool* mpse_type = nullptr;
+        mutable snort::SearchTool* mpse_attr = nullptr;
     };
     JsNormParam js_norm_param;
 
     struct UriParam
     {
-    public:
         UriParam();
         ~UriParam() { delete[] unicode_map; }
 
@@ -195,9 +191,6 @@ public:
     bool is_bindable() const override
     { return true; }
 
-    void set_trace(const snort::Trace*) const override;
-    const snort::TraceOption* get_trace_options() const override;
-
 #ifdef REG_TEST
     static const PegInfo* get_peg_names() { return peg_names; }
     static const PegCount* get_peg_counts() { return peg_counts; }
index d2c46878c67312e11289d6060599a9e5e749201f..6ab2b765b42b692c1507c2a913b9722f1e542e18 100644 (file)
@@ -27,6 +27,7 @@
 #include "file_api/file_flows.h"
 #include "file_api/file_service.h"
 #include "helpers/buffer_data.h"
+#include "js_norm/js_enum.h"
 #include "pub_sub/http_request_body_event.h"
 
 #include "http_api.h"
@@ -41,6 +42,9 @@
 using namespace snort;
 using namespace HttpCommon;
 using namespace HttpEnums;
+using namespace jsn;
+
+extern THREAD_LOCAL const snort::Trace* js_trace;
 
 HttpMsgBody::HttpMsgBody(const uint8_t* buffer, const uint16_t buf_size,
     HttpFlowData* session_data_, SourceId source_id_, bool buf_owner, Flow* flow_,
@@ -236,6 +240,9 @@ void HttpMsgBody::analyze()
         {
             do_file_decompression(decoded_body, decompressed_file_body);
 
+            if (decompressed_file_body.length() > 0 and session_data->js_ctx[source_id])
+                session_data->js_ctx[source_id]->tick();
+
             uint32_t& partial_detect_length = session_data->partial_detect_length[source_id];
             uint8_t*& partial_detect_buffer = session_data->partial_detect_buffer[source_id];
             uint32_t& partial_js_detect_length = session_data->partial_js_detect_length[source_id];
@@ -268,9 +275,6 @@ void HttpMsgBody::analyze()
             else
                 do_legacy_js_normalization(decompressed_file_body, js_norm_body);
 
-            if (decompressed_file_body.length() > 0)
-                ++session_data->js_data_idx;
-
             const int32_t detect_length =
                 (js_norm_body.length() <= session_data->detect_depth_remaining[source_id]) ?
                 js_norm_body.length() : session_data->detect_depth_remaining[source_id];
@@ -441,29 +445,37 @@ void HttpMsgBody::fd_event_callback(void* context, int event)
     }
 }
 
-void HttpMsgBody::do_enhanced_js_normalization(const Field& input, Field& output)
+void HttpMsgBody::do_legacy_js_normalization(const Field& input, Field& output)
 {
-    if (session_data->js_data_lost_once)
+    if (!params->js_norm_param.normalize_javascript || source_id == SRC_CLIENT)
+    {
+        output.set(input);
         return;
+    }
 
-    auto infractions = transaction->get_infractions(source_id);
-    auto back = !session_data->partial_flush[source_id];
-    auto http_header = get_header(source_id);
-    auto normalizer = params->js_norm_param.js_norm;
+    js_normalize(input, output, params,
+        transaction->get_infractions(source_id), session_data->events[source_id]);
+}
 
-    if ((*infractions & INF_UNKNOWN_ENCODING) or (*infractions & INF_UNSUPPORTED_ENCODING))
-        return;
+HttpJSNorm* HttpMsgBody::acquire_js_ctx()
+{
+    HttpJSNorm* js_ctx = session_data->js_ctx[source_id];
 
-    if (session_data->sync_js_data_idx())
+    if (js_ctx)
     {
-        *infractions += INF_JS_DATA_LOST;
-        session_data->events[HttpCommon::SRC_SERVER]->create_event(EVENT_JS_DATA_LOST);
-        session_data->js_data_lost_once = true;
-        return;
+        if (js_ctx->get_trans_num() == trans_num)
+            return js_ctx;
+
+        delete js_ctx;
+        js_ctx = nullptr;
     }
 
+    auto http_header = get_header(source_id);
+
     if (!http_header)
-        return;
+        return nullptr;
+
+    JSNormConfig* jsn_config = get_inspection_policy()->jsn_config;
 
     switch(http_header->get_content_type())
     {
@@ -483,27 +495,20 @@ void HttpMsgBody::do_enhanced_js_normalization(const Field& input, Field& output
     case CT_TEXT_X_ECMASCRIPT:
     case CT_TEXT_JSCRIPT:
     case CT_TEXT_LIVESCRIPT:
-        normalizer->do_external(input, output, infractions, session_data, back);
+        // an external script should be processed from the beginning
+        js_ctx = first_body ? new HttpExternalJSNorm(jsn_config, trans_num) : nullptr;
         break;
 
     case CT_APPLICATION_XHTML_XML:
     case CT_TEXT_HTML:
-        normalizer->do_inline(input, output, infractions, session_data, back);
+        js_ctx = new HttpInlineJSNorm(jsn_config, trans_num, params->js_norm_param.mpse_otag,
+            params->js_norm_param.mpse_attr);
         break;
     }
-}
 
-void HttpMsgBody::do_legacy_js_normalization(const Field& input, Field& output)
-{
-    if (!params->js_norm_param.normalize_javascript || source_id == SRC_CLIENT)
-    {
-        output.set(input);
-        return;
-    }
+    session_data->js_ctx[source_id] = js_ctx;
 
-    params->js_norm_param.js_norm->do_legacy(input, output,
-        transaction->get_infractions(source_id), session_data->events[source_id],
-        params->js_norm_param.max_javascript_whitespaces);
+    return js_ctx;
 }
 
 void HttpMsgBody::do_file_processing(const Field& file_data)
@@ -715,10 +720,36 @@ const Field& HttpMsgBody::get_norm_js_data()
         return norm_js_data;
     }
 
-    do_enhanced_js_normalization(decompressed_file_body, norm_js_data);
+    auto jsn = acquire_js_ctx();
+
+    if (!jsn)
+    {
+        norm_js_data.set(STAT_NO_SOURCE);
+        return norm_js_data;
+    }
+
+    const void* dst = nullptr;
+    size_t dst_len = HttpCommon::STAT_NOT_PRESENT;
+    auto back = !session_data->partial_flush[source_id];
+
+    jsn->link(decompressed_file_body.start(), session_data->events[source_id], transaction->get_infractions(source_id));
+    jsn->normalize(decompressed_file_body.start(), decompressed_file_body.length(), dst, dst_len);
+
+    debug_logf(4, js_trace, TRACE_PROC, DetectionEngine::get_current_packet(),
+        "input data was %s\n", back ? "last one in PDU" : "a part of PDU");
 
-    if (norm_js_data.length() == STAT_NOT_COMPUTE)
+    if (!dst or !dst_len)
         norm_js_data.set(STAT_NOT_PRESENT);
+    else
+    {
+        if (back)
+            jsn->flush_data(dst, dst_len);
+
+        trace_logf(1, js_trace, TRACE_DUMP, DetectionEngine::get_current_packet(),
+            "js_data[%u]: %.*s\n", (unsigned)dst_len, (int)dst_len, (const char*)dst);
+
+        norm_js_data.set(dst_len, (const uint8_t*)dst, back);
+    }
 
     return norm_js_data;
 }
index ab10bed5b777d3cab406309f6629e7efe7434eba..10ceb6b17b9266c889a3b24370844908ec57a5eb 100644 (file)
@@ -71,8 +71,9 @@ private:
     void do_file_processing(const Field& file_data);
     void do_utf_decoding(const Field& input, Field& output);
     void do_file_decompression(const Field& input, Field& output);
-    void do_enhanced_js_normalization(const Field& input, Field& output);
     void do_legacy_js_normalization(const Field& input, Field& output);
+    HttpJSNorm* acquire_js_ctx();
+
     void clean_partial(uint32_t& partial_inspected_octets, uint32_t& partial_detect_length,
         uint8_t*& partial_detect_buffer,  uint32_t& partial_js_detect_length);
     void bookkeeping_regular_flush(uint32_t& partial_detect_length,
index 1895673a48c1f0d8d2e907895f05c2c339209476..aa9a80fae5e0f47610678d7bcfb1d51b4be1fb07 100644 (file)
@@ -40,9 +40,6 @@ HttpMsgRequest::HttpMsgRequest(const uint8_t* buffer, const uint16_t buf_size,
 {
     transaction->set_request(this);
     get_related_sections();
-    session_data->release_js_ctx();
-    session_data->reset_js_ident_ctx();
-    session_data->reset_js_data_idx();
 }
 
 HttpMsgRequest::~HttpMsgRequest()
index 842c87eb7e79e0e175fae270826ea74090a38717..6c7cb7d389dd6c7b6b0e7cc0ef214fe69fedde1c 100644 (file)
@@ -169,24 +169,24 @@ const Field& HttpMsgSection::get_classic_buffer(const HttpBufferInfo& buf)
     switch (buf.type)
     {
     case HTTP_BUFFER_CLIENT_BODY:
-      {
+    {
         if (source_id != SRC_CLIENT)
             return Field::FIELD_NULL;
         return (get_body() != nullptr) ? get_body()->get_classic_client_body() : Field::FIELD_NULL;
-      }
+    }
     case HTTP_BUFFER_COOKIE:
     case HTTP_BUFFER_RAW_COOKIE:
-      {
+    {
         if (header[buffer_side] == nullptr)
             return Field::FIELD_NULL;
         return (buf.type == HTTP_BUFFER_COOKIE) ? header[buffer_side]->get_classic_norm_cookie() :
             header[buffer_side]->get_classic_raw_cookie();
-      }
+    }
     case HTTP_BUFFER_HEADER:
     case HTTP_BUFFER_TRAILER:
     case HTTP_HEADER_TEST:
     case HTTP_TRAILER_TEST:
-      {
+    {
         HttpMsgHeadShared* const head = (buf.type == HTTP_BUFFER_HEADER || buf.type == HTTP_HEADER_TEST) ?
             (HttpMsgHeadShared*)header[buffer_side] : (HttpMsgHeadShared*)trailer[buffer_side];
         if (head == nullptr)
@@ -194,18 +194,18 @@ const Field& HttpMsgSection::get_classic_buffer(const HttpBufferInfo& buf)
         if (buf.sub_id == 0)
             return head->get_classic_norm_header();
         return head->get_header_value_norm((HeaderId)buf.sub_id);
-      }
+    }
     case HTTP_BUFFER_METHOD:
-      {
+    {
         return (request != nullptr) ? request->get_method() : Field::FIELD_NULL;
-      }
+    }
     case HTTP_BUFFER_RAW_BODY:
-      {
+    {
         return (get_body() != nullptr) ? get_body()->get_raw_body() : Field::FIELD_NULL;
-      }
+    }
     case HTTP_BUFFER_RAW_HEADER:
     case HTTP_BUFFER_RAW_TRAILER:
-      {
+    {
         HttpMsgHeadShared* const head = (buf.type == HTTP_BUFFER_RAW_HEADER) ?
             (HttpMsgHeadShared*)header[buffer_side] : (HttpMsgHeadShared*)trailer[buffer_side];
         if (head == nullptr)
@@ -213,31 +213,31 @@ const Field& HttpMsgSection::get_classic_buffer(const HttpBufferInfo& buf)
         if (buf.sub_id == 0)
             return head->msg_text;
         return head->get_all_header_values_raw((HeaderId)buf.sub_id);
-      }
+    }
     case HTTP_BUFFER_RAW_REQUEST:
-      {
+    {
         return (request != nullptr) ? request->msg_text : Field::FIELD_NULL;
-      }
+    }
     case HTTP_BUFFER_RAW_STATUS:
-      {
+    {
         return (status != nullptr) ? status->msg_text : Field::FIELD_NULL;
-      }
+    }
     case HTTP_BUFFER_STAT_CODE:
-      {
+    {
         return (status != nullptr) ? status->get_status_code() : Field::FIELD_NULL;
-      }
+    }
     case HTTP_BUFFER_STAT_MSG:
-      {
+    {
         return (status != nullptr) ? status->get_reason_phrase() : Field::FIELD_NULL;
-      }
+    }
     case HTTP_BUFFER_TRUE_IP:
-      {
+    {
         return (header[SRC_CLIENT] != nullptr) ? header[SRC_CLIENT]->get_true_ip() :
             Field::FIELD_NULL;
-      }
+    }
     case HTTP_BUFFER_URI:
     case HTTP_BUFFER_RAW_URI:
-      {
+    {
         const bool raw = (buf.type == HTTP_BUFFER_RAW_URI);
         if (request == nullptr)
             return Field::FIELD_NULL;
@@ -263,29 +263,29 @@ const Field& HttpMsgSection::get_classic_buffer(const HttpBufferInfo& buf)
         }
         assert(false);
         return Field::FIELD_NULL;
-      }
+    }
     case HTTP_BUFFER_VERSION:
-      {
+    {
         HttpMsgStart* start = (buffer_side == SRC_CLIENT) ?
             (HttpMsgStart*)request : (HttpMsgStart*)status;
         return (start != nullptr) ? start->get_version() : Field::FIELD_NULL;
-      }
+    }
     case BUFFER_VBA_DATA:
-      {
+    {
         HttpMsgBody* msg_body = get_body();
         if (msg_body)
-            return msg_body->get_decomp_vba_data(); 
+            return msg_body->get_decomp_vba_data();
         else
             return Field::FIELD_NULL;
-      }
+    }
     case BUFFER_JS_DATA:
-      {
+    {
         HttpMsgBody* msg_body = get_body();
         if (msg_body)
-            return msg_body->get_norm_js_data(); 
+            return msg_body->get_norm_js_data();
         else
             return Field::FIELD_NULL;
-      }
+    }
     default:
         assert(false);
         return Field::FIELD_NULL;
@@ -519,4 +519,3 @@ void HttpMsgSection::print_peg_counts(FILE* output) const
 }
 
 #endif
-
index 99001b840fd87ac835660923df1e5ff5287288dc..1db07a9e368f81f2819861fc611750d682b953e3 100755 (executable)
@@ -328,16 +328,9 @@ const RuleMap HttpModule::http_events[] =
     { EVENT_LONG_SCHEME,                "HTTP URI scheme longer than 10 characters" },
     { EVENT_HTTP2_UPGRADE_REQUEST,      "HTTP/1 client requested HTTP/2 upgrade" },
     { EVENT_HTTP2_UPGRADE_RESPONSE,     "HTTP/1 server granted HTTP/2 upgrade" },
-    { EVENT_JS_BAD_TOKEN,               "bad token in JavaScript" },
-    { EVENT_JS_OPENING_TAG,             "unexpected script opening tag in JavaScript" },
-    { EVENT_JS_CLOSING_TAG,             "unexpected script closing tag in JavaScript" },
     { EVENT_JS_CODE_IN_EXTERNAL,        "JavaScript code under the external script tags" },
     { EVENT_JS_SHORTENED_TAG,           "script opening tag in a short form" },
-    { EVENT_JS_IDENTIFIER_OVERFLOW,     "max number of unique JavaScript identifiers reached" },
-    { EVENT_JS_BRACKET_NEST_OVERFLOW,   "excessive JavaScript bracket nesting" },
     { EVENT_ACCEPT_ENCODING_CONSECUTIVE_COMMAS, "Consecutive commas in HTTP Accept-Encoding header" },
-    { EVENT_JS_DATA_LOST,               "data gaps during JavaScript normalization" },
-    { EVENT_JS_SCOPE_NEST_OVERFLOW,     "excessive JavaScript scope nesting" },
     { EVENT_INVALID_SUBVERSION,         "HTTP/1 version other than 1.0 or 1.1" },
     { EVENT_VERSION_0,                  "HTTP version in start line is 0" },
     { EVENT_VERSION_HIGHER_THAN_1,      "HTTP version in start line is higher than 1" },
@@ -390,9 +383,6 @@ const PegInfo HttpModule::peg_names[PEG_COUNT_MAX+1] =
     { CountType::SUM, "total_bytes", "total HTTP data bytes inspected" },
     { CountType::SUM, "js_inline_scripts", "total number of inline JavaScripts processed" },
     { CountType::SUM, "js_external_scripts", "total number of external JavaScripts processed" },
-    { CountType::SUM, "js_bytes", "total number of JavaScript bytes processed" },
-    { CountType::SUM, "js_identifiers", "total number of unique JavaScript identifiers processed" },
-    { CountType::SUM, "js_identifier_overflows", "total number of unique JavaScript identifier limit overflows" },
     { CountType::SUM, "skip_mime_attach", "total number of HTTP requests with too many MIME attachments to inspect" },
     { CountType::END, nullptr, nullptr }
 };
index 989f4ee34203fa94f181604118abc64753e94146..a3a9fa219386d60fc32cd1c2cee3e36683db27de 100644 (file)
@@ -1069,46 +1069,6 @@ static const IpsApi version_api =
     nullptr
 };
 
-//-------------------------------------------------------------------------
-// js_data
-//-------------------------------------------------------------------------
-//
-
-#undef IPS_OPT
-#define IPS_OPT "js_data"
-#undef IPS_HELP
-#define IPS_HELP "rule option to set detection cursor to normalized JavaScript data"
-static Module* js_data_mod_ctor()
-{
-    return new HttpBufferRuleOptModule(IPS_OPT, IPS_HELP, BUFFER_JS_DATA, CAT_SET_FAST_PATTERN,
-        BUFFER_PSI_JS_DATA);
-}
-
-static const IpsApi js_data_api =
-{
-    {
-        PT_IPS_OPTION,
-        sizeof(IpsApi),
-        IPSAPI_VERSION,
-        1,
-        API_RESERVED,
-        API_OPTIONS,
-        IPS_OPT,
-        IPS_HELP,
-        js_data_mod_ctor,
-        HttpBufferRuleOptModule::mod_dtor
-    },
-    OPT_TYPE_DETECTION,
-    0, PROTO_BIT__TCP,
-    nullptr,
-    nullptr,
-    nullptr,
-    nullptr,
-    HttpBufferIpsOption::opt_ctor,
-    HttpBufferIpsOption::opt_dtor,
-    nullptr
-};
-
 //-------------------------------------------------------------------------
 // plugins
 //-------------------------------------------------------------------------
@@ -1130,4 +1090,4 @@ const BaseApi* ips_http_trailer = &trailer_api.base;
 const BaseApi* ips_http_true_ip = &true_ip_api.base;
 const BaseApi* ips_http_uri = &uri_api.base;
 const BaseApi* ips_http_version = &version_api.base;
-const BaseApi* ips_js_data = &js_data_api.base;
+
index de53244082dee98e0bd386c88011a9d097c348c6..7a7cdb990151e47257ce2f7ff5c7e6ec1846a493 100755 (executable)
@@ -58,6 +58,8 @@ void DecodeConfig::set_decompress_pdf(bool) {}
 void DecodeConfig::set_decompress_swf(bool) {}
 void DecodeConfig::set_decompress_zip(bool) {}
 void DecodeConfig::set_decompress_vba(bool) {}
+
+SearchTool::~SearchTool() {}
 }
 
 void show_stats(PegCount*, const PegInfo*, unsigned, const char*) { }
@@ -69,12 +71,10 @@ int32_t substr_to_code(const uint8_t*, const int32_t, const StrCode []) { return
 long HttpTestManager::print_amount {};
 bool HttpTestManager::print_hex {};
 
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_,
-    const HttpParaList::JsNormParam& js_norm_param_) :
-    uri_param(uri_param_), js_norm_param(js_norm_param_), mpse_otag(nullptr), mpse_attr(nullptr),
-    mpse_type(nullptr) {}
-HttpJsNorm::~HttpJsNorm() = default;
-void HttpJsNorm::configure(){}
+snort::SearchTool* js_create_mpse_open_tag() { return nullptr; }
+snort::SearchTool* js_create_mpse_tag_type() { return nullptr; }
+snort::SearchTool* js_create_mpse_tag_attr() { return nullptr; }
+
 int64_t Parameter::get_int(char const*) { return 0; }
 
 TEST_GROUP(http_peg_count_test)
index f6f6e17e5943699f220f86059a68e5f574e1db8f..da3bc6b1cafffb893a160289ae06ce09226f5add 100755 (executable)
@@ -53,17 +53,16 @@ void DecodeConfig::set_decompress_pdf(bool) {}
 void DecodeConfig::set_decompress_swf(bool) {}
 void DecodeConfig::set_decompress_zip(bool) {}
 void DecodeConfig::set_decompress_vba(bool) {}
+SearchTool::~SearchTool() {}
 }
 
+snort::SearchTool* js_create_mpse_open_tag() { return nullptr; }
+snort::SearchTool* js_create_mpse_tag_type() { return nullptr; }
+snort::SearchTool* js_create_mpse_tag_attr() { return nullptr; }
+
 void show_stats(PegCount*, const PegInfo*, unsigned, const char*) { }
 void show_stats(PegCount*, const PegInfo*, const IndexVec&, const char*, FILE*) { }
 
-HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_,
-    const HttpParaList::JsNormParam& js_norm_param_) :
-    uri_param(uri_param_), js_norm_param(js_norm_param_), mpse_otag(nullptr), mpse_attr(nullptr),
-    mpse_type(nullptr) {}
-HttpJsNorm::~HttpJsNorm() = default;
-void HttpJsNorm::configure() {}
 int64_t Parameter::get_int(char const*) { return 0; }
 
 TEST_GROUP(http_inspect_uri_norm)
index f69c3f00c29324c74bbb61ff69900f0852427b50..19372c3afad863cca32184dc61d2bd82c8bed412 100644 (file)
@@ -18,24 +18,13 @@ set( UTIL_INCLUDES
     util_utf.h
 )
 
-FLEX_TARGET ( js_tokenizer ${CMAKE_CURRENT_SOURCE_DIR}/js_tokenizer.l
-    ${CMAKE_CURRENT_BINARY_DIR}/js_tokenizer.cc
-    COMPILE_FLAGS ${FLEX_FLAGS}
-)
-
 add_library ( utils OBJECT
     ${UTIL_INCLUDES}
     ${SNPRINTF_SOURCES}
-    ${FLEX_js_tokenizer_OUTPUTS}
     boyer_moore.cc
     dnet_header.h
     dyn_array.cc
     dyn_array.h
-    js_identifier_ctx.cc
-    js_identifier_ctx.h
-    js_normalizer.cc
-    js_normalizer.h
-    js_tokenizer.h
     kmap.cc
     sflsq.cc
     snort_bounds.h
index 12a694b3b8190a248869d2947dd3737fe129e2aa..1c6a91c68f60a70bf208b0dbe2e41dc679c2d50b 100644 (file)
@@ -5,62 +5,6 @@ add_cpputest( boyer_moore_test
 
 add_cpputest( memcap_allocator_test )
 
-FLEX_TARGET ( js_tokenizer ${CMAKE_CURRENT_SOURCE_DIR}/../js_tokenizer.l
-    ${CMAKE_CURRENT_BINARY_DIR}/../js_tokenizer.cc
-    COMPILE_FLAGS ${FLEX_FLAGS}
-)
-
-add_catch_test( js_normalizer_test
-    SOURCES
-        ${FLEX_js_tokenizer_OUTPUTS}
-        ../js_identifier_ctx.cc
-        ../js_normalizer.cc
-        ../streambuf.cc
-        ../util_cstring.cc
-        js_test_options.cc
-        js_test_utils.cc
-)
-
-if (ENABLE_BENCHMARK_TESTS)
-    add_catch_test( js_norm_benchmark
-        SOURCES
-            ${FLEX_js_tokenizer_OUTPUTS}
-            ../js_identifier_ctx.cc
-            ../js_normalizer.cc
-            ../streambuf.cc
-            ../util_cstring.cc
-            js_test_options.cc
-            js_test_utils.cc
-    )
-endif(ENABLE_BENCHMARK_TESTS)
-
-add_catch_test( js_dealias_test
-    SOURCES
-        ${FLEX_js_tokenizer_OUTPUTS}
-        ../js_identifier_ctx.cc
-        ../js_normalizer.cc
-        ../streambuf.cc
-        ../util_cstring.cc
-        js_test_options.cc
-        js_test_utils.cc
-)
-
-add_catch_test( js_unescape_test
-    SOURCES
-        ${FLEX_js_tokenizer_OUTPUTS}
-        ../js_identifier_ctx.cc
-        ../js_normalizer.cc
-        ../streambuf.cc
-        ../util_cstring.cc
-        js_test_options.cc
-        js_test_utils.cc
-)
-
-add_catch_test( js_identifier_ctx_test
-    SOURCES
-        ../js_identifier_ctx.cc
-)
-
 add_catch_test( streambuf_test
     SOURCES
         ../streambuf.cc