From: Mike Stepanek (mstepane) Date: Wed, 21 Apr 2021 17:39:43 +0000 (+0000) Subject: Merge pull request #2848 in SNORT/snort3 from ~OSERHIIE/snort3:js_inline_scripts... X-Git-Tag: 3.1.4.0~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e4ddccd8b6dde2ef2b3c436fbe55158a4d58f78c;p=thirdparty%2Fsnort3.git Merge pull request #2848 in SNORT/snort3 from ~OSERHIIE/snort3:js_inline_scripts to master Squashed commit of the following: commit 8d130d92807ecc480c3832e7e85697883bf1ae42 Author: Oleksandr Serhiienko Date: Fri Apr 16 12:48:49 2021 +0300 http_inspect: add js_inline_scripts peg count commit 07beb04a28389e09bc0e77f672e86f58e5ef4194 Author: Oleksandr Serhiienko Date: Fri Apr 16 10:31:01 2021 +0300 http_inspect: identify external javascripts commit b4e77cf2d524ecc076eb6007d9e1f4743b2852e4 Author: Oleksandr Serhiienko Date: Fri Apr 16 01:34:30 2021 +0300 http_inspect: add built-in alert for unexpected tags within inline javascript --- diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h index 0002e4bbf..d3cf7817b 100755 --- a/src/service_inspectors/http_inspect/http_enum.h +++ b/src/service_inspectors/http_inspect/http_enum.h @@ -62,7 +62,7 @@ enum PEG_COUNT { PEG_FLOW = 0, PEG_SCAN, PEG_REASSEMBLE, PEG_INSPECT, PEG_REQUES PEG_OTHER_METHOD, PEG_REQUEST_BODY, PEG_CHUNKED, PEG_URI_NORM, PEG_URI_PATH, PEG_URI_CODING, PEG_CONCURRENT_SESSIONS, PEG_MAX_CONCURRENT_SESSIONS, PEG_SCRIPT_DETECTION, PEG_PARTIAL_INSPECT, PEG_EXCESS_PARAMS, PEG_PARAMS, PEG_CUTOVERS, PEG_SSL_SEARCH_ABND_EARLY, - PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_COUNT_MAX }; + PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_JS_INLINE, PEG_COUNT_MAX }; // Result of scanning by splitter enum ScanResult { SCAN_NOT_FOUND, SCAN_NOT_FOUND_ACCELERATE, SCAN_FOUND, SCAN_FOUND_PIECE, @@ -264,6 +264,7 @@ enum Infraction INF_MULTIPLE_HOST_HDRS, INF_HTTP2_SETTINGS, INF_UPGRADE_HEADER_HTTP2, + INF_JS_UNEXPECTED_TAG, INF__MAX_VALUE }; @@ -322,6 +323,7 @@ enum EventSid EVENT_PDF_UNSUP_COMP_TYPE = 115, EVENT_PDF_CASC_COMP = 116, EVENT_PDF_PARSE_FAILURE = 117, + EVENT_JS_UNEXPECTED_TAG = 118, EVENT_LOSS_OF_SYNC = 201, EVENT_CHUNK_ZEROS = 202, diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc index 2ce7fb0f4..8aad96222 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.cc +++ b/src/service_inspectors/http_inspect/http_js_norm.cc @@ -23,6 +23,7 @@ #include "http_js_norm.h" +#include "utils/js_norm_state.h" #include "utils/js_normalizer.h" #include "utils/safec.h" #include "utils/util_jsnorm.h" @@ -40,6 +41,7 @@ HttpJsNorm::HttpJsNorm(const HttpParaList::UriParam& uri_param_) : HttpJsNorm::~HttpJsNorm() { delete javascript_search_mpse; + delete js_src_attr_search_mpse; delete htmltype_search_mpse; } @@ -49,11 +51,15 @@ void HttpJsNorm::configure() return; javascript_search_mpse = new SearchTool; + js_src_attr_search_mpse = new SearchTool; htmltype_search_mpse = new SearchTool; javascript_search_mpse->add(script_start, script_start_length, JS_JAVASCRIPT); javascript_search_mpse->prep(); + js_src_attr_search_mpse->add(script_src_attr, script_src_attr_length, JS_ATTR_SRC); + js_src_attr_search_mpse->prep(); + struct HiSearchToken { const char* name; @@ -78,8 +84,8 @@ void HttpJsNorm::configure() configure_once = true; } -void HttpJsNorm::enhanced_normalize(const Field& input, Field& output, - int64_t js_normalization_depth) const +void HttpJsNorm::enhanced_normalize(const Field& input, Field& output, HttpInfractions* infractions, + HttpEventGen* events, int64_t js_normalization_depth) const { bool js_present = false; int index = 0; @@ -88,6 +94,10 @@ void HttpJsNorm::enhanced_normalize(const Field& input, Field& output, uint8_t* buffer = new uint8_t[input.length()]; + JSNormState state; + state.norm_depth = js_normalization_depth; + state.alerts = 0; + while (ptr < end) { int bytes_copied = 0; @@ -103,12 +113,15 @@ void HttpJsNorm::enhanced_normalize(const Field& input, Field& output, break; bool type_js = false; + bool external_js = false; if (angle_bracket > js_start) { int mid; const int script_found = htmltype_search_mpse->find( js_start, (angle_bracket-js_start), search_html_found, false, &mid); + external_js = is_external_script(js_start, angle_bracket); + js_start = angle_bracket + 1; if (script_found > 0) { @@ -138,11 +151,13 @@ void HttpJsNorm::enhanced_normalize(const Field& input, Field& output, } ptr = js_start; - if (!type_js) + if (!type_js or external_js) continue; JSNormalizer::normalize(js_start, (uint16_t)(end-js_start), (char*)buffer+index, - (uint16_t)(input.length() - index), &ptr, &bytes_copied, js_normalization_depth); + (uint16_t)(input.length() - index), &ptr, &bytes_copied, state); + + HttpModule::increment_peg_counts(PEG_JS_INLINE); index += bytes_copied; } @@ -151,7 +166,14 @@ void HttpJsNorm::enhanced_normalize(const Field& input, Field& output, } if (js_present) + { + if (state.alerts & ALERT_UNEXPECTED_TAG) + { + *infractions += INF_JS_UNEXPECTED_TAG; + events->create_event(EVENT_JS_UNEXPECTED_TAG); + } output.set(index, buffer, true); + } else delete[] buffer; } @@ -276,9 +298,36 @@ int HttpJsNorm::search_js_found(void*, void*, int index, void* index_ptr, void*) *((int*) index_ptr) = index - script_start_length; return 1; } +int HttpJsNorm::search_js_src_attr_found(void*, void*, int index, void* index_ptr, void*) +{ + *((int*) index_ptr) = index - script_src_attr_length; + return 1; +} int HttpJsNorm::search_html_found(void* id, void*, int, void* id_ptr, void*) { *((int*) id_ptr) = (int)(uintptr_t)id; return 1; } +bool HttpJsNorm::is_external_script(const char* it, const char* script_tag_end) const +{ + int src_pos; + + while (js_src_attr_search_mpse->find(it, (script_tag_end - it), + search_js_src_attr_found, false, &src_pos)) + { + it += (src_pos + script_src_attr_length - 1); + while (++it < script_tag_end) + { + if (*it == ' ') + continue; + else if (*it == '=') + return true; + else + break; + } + } + + return false; +} + diff --git a/src/service_inspectors/http_inspect/http_js_norm.h b/src/service_inspectors/http_inspect/http_js_norm.h index 5f083349a..f48ec40d4 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.h +++ b/src/service_inspectors/http_inspect/http_js_norm.h @@ -39,26 +39,33 @@ public: ~HttpJsNorm(); void legacy_normalize(const Field& input, Field& output, HttpInfractions* infractions, HttpEventGen* events, int max_javascript_whitespaces) const; - void enhanced_normalize(const Field& input, Field& output, - int64_t js_normalization_depth) const; + void enhanced_normalize(const Field& input, Field& output, HttpInfractions* infractions, + HttpEventGen* events, int64_t js_normalization_depth) const; void configure(); private: bool configure_once = false; enum JsSearchId { JS_JAVASCRIPT }; + enum JsSrcAttrSearchId { JS_ATTR_SRC }; enum HtmlSearchId { HTML_JS, HTML_EMA, HTML_VB }; static constexpr const char* script_start = "js_norm_param.js_norm->enhanced_normalize(input, enhanced_js_norm_body, + transaction->get_infractions(source_id), session_data->events[source_id], params->js_norm_param.js_normalization_depth); const int32_t norm_length = diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc index 022b13815..ad7f3c869 100755 --- a/src/service_inspectors/http_inspect/http_tables.cc +++ b/src/service_inspectors/http_inspect/http_tables.cc @@ -357,6 +357,7 @@ const RuleMap HttpModule::http_events[] = { EVENT_PDF_UNSUP_COMP_TYPE, "PDF file unsupported compression type" }, { EVENT_PDF_CASC_COMP, "PDF file cascaded compression" }, { EVENT_PDF_PARSE_FAILURE, "PDF file parse failure" }, + { EVENT_JS_UNEXPECTED_TAG, "unexpected script tag within inline javascript" }, { EVENT_LOSS_OF_SYNC, "not HTTP traffic" }, { EVENT_CHUNK_ZEROS, "chunk length has excessive leading zeros" }, { EVENT_WS_BETWEEN_MSGS, "white space before or between messages" }, @@ -463,6 +464,7 @@ const PegInfo HttpModule::peg_names[PEG_COUNT_MAX+1] = { CountType::SUM, "pipelined_flows", "total HTTP connections containing pipelined requests" }, { CountType::SUM, "pipelined_requests", "total requests placed in a pipeline" }, { CountType::SUM, "total_bytes", "total HTTP data bytes inspected" }, + { CountType::SUM, "js_inline_scripts", "total number of inline JavaScripts processed" }, { CountType::END, nullptr, nullptr } }; diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index 38fc2ddce..d42b18936 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -32,6 +32,7 @@ add_library ( utils OBJECT dnet_header.h dyn_array.cc dyn_array.h + js_norm_state.h js_normalizer.cc js_normalizer.h js_tokenizer.h diff --git a/src/utils/js_norm_state.h b/src/utils/js_norm_state.h new file mode 100644 index 000000000..764edb3d3 --- /dev/null +++ b/src/utils/js_norm_state.h @@ -0,0 +1,37 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2021-2021 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// js_norm_state.h author Oleksandr Serhiienko + +#ifndef JS_NORM_STATE_H +#define JS_NORM_STATE_H + +#include "main/snort_types.h" + +namespace snort +{ +#define ALERT_UNEXPECTED_TAG 0x1 + +struct JSNormState +{ + int64_t norm_depth; + uint16_t alerts; +}; +} + +#endif // JS_NORM_STATE_H + diff --git a/src/utils/js_normalizer.cc b/src/utils/js_normalizer.cc index 1c41eaddc..a5868fe05 100644 --- a/src/utils/js_normalizer.cc +++ b/src/utils/js_normalizer.cc @@ -30,13 +30,13 @@ using namespace snort; int JSNormalizer::normalize(const char* srcbuf, uint16_t srclen, char* dstbuf, uint16_t dstlen, - const char** ptr, int* bytes_copied, int64_t norm_depth) + const char** ptr, int* bytes_copied, JSNormState& state) { std::stringstream in, out; + in.rdbuf()->pubsetbuf(const_cast(srcbuf), + (state.norm_depth >= srclen) ? srclen : state.norm_depth); - in.rdbuf()->pubsetbuf(const_cast(srcbuf), (norm_depth >= srclen) ? srclen : norm_depth); - JSTokenizer tokenizer(in, out, dstbuf, dstlen, ptr, bytes_copied); - + JSTokenizer tokenizer(in, out, dstbuf, dstlen, ptr, bytes_copied, state); return tokenizer.yylex(); } diff --git a/src/utils/js_normalizer.h b/src/utils/js_normalizer.h index 9152e2142..2e562bb1b 100644 --- a/src/utils/js_normalizer.h +++ b/src/utils/js_normalizer.h @@ -22,13 +22,15 @@ #include "main/snort_types.h" +#include "js_norm_state.h" + namespace snort { class JSNormalizer { public: static int normalize(const char* srcbuf, uint16_t srclen, char* dstbuf, uint16_t dstlen, - const char** ptr, int* bytes_copied, int64_t norm_depth); + const char** ptr, int* bytes_copied, JSNormState& state); }; } diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h index 892fdc425..2e284ef44 100644 --- a/src/utils/js_tokenizer.h +++ b/src/utils/js_tokenizer.h @@ -24,6 +24,8 @@ #include "log/messages.h" +#include "js_norm_state.h" + class JSTokenizer : public yyFlexLexer { private: @@ -35,14 +37,13 @@ private: PUNCTUATOR, OPERATOR, LITERAL, - DIRECTIVE, - TAG_SCRIPT_OPEN + DIRECTIVE }; public: // we need an out stream because yyFlexLexer API strongly requires that JSTokenizer(std::stringstream& in, std::stringstream& out, char* dstbuf, - const uint16_t dstlen, const char** ptr, int* bytes_copied); + const uint16_t dstlen, const char** ptr, int* bytes_copied, snort::JSNormState& state); ~JSTokenizer() override; // so, Flex will treat this class as yyclass @@ -66,11 +67,11 @@ private: bool eval_string_literal(const char* match_prefix, const char quotes); bool eval_regex_literal(const char* match_prefix); bool eval_eof(); - void skip_single_line_comment(); - void skip_multi_line_comment(); + bool eval_single_line_comment(); + bool eval_multi_line_comment(); bool parse_literal(const std::string& match_prefix, const char sentinel_ch, - std::string& result, bool is_regex = false); + std::string& result, bool& is_alert, bool is_regex = false); // main lexeme handler // all scanned tokens must pass here @@ -80,7 +81,6 @@ private: bool normalize_punctuator(const JSToken prev_tok, const char* lexeme); bool normalize_operator(const JSToken prev_tok, const char* lexeme); bool normalize_directive(const JSToken prev_tok, const char* lexeme); - bool normalize_tag_script_open(const JSToken prev_tok, const char* lexeme); bool normalize_undefined(const JSToken prev_tok, const char* lexeme); bool normalize_lexeme(const JSToken prev_tok, const char* lexeme); @@ -100,6 +100,8 @@ private: JSToken prev_tok = UNDEFINED; + snort::JSNormState& state; + }; #endif // JS_TOKENIZER_H diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l index af06087af..84e5ef6ea 100644 --- a/src/utils/js_tokenizer.l +++ b/src/utils/js_tokenizer.l @@ -31,6 +31,10 @@ #endif #include "utils/js_tokenizer.h" + + #include + + #include "utils/util_cstring.h" %} /* The following grammar was created based on ECMAScript specification */ @@ -869,9 +873,9 @@ LITERAL_INFINITY Infinity|\xE2\x88\x9E LITERAL_NAN NaN LITERAL {LITERAL_NULL}|{LITERAL_BOOLEAN}|{LITERAL_DECIMAL}|{LITERAL_HEX_INTEGER}|{LITERAL_UNDEFINED}|{LITERAL_INFINITY}|{LITERAL_NAN} -HTML_COMMENT_OPEN \n" + "var b = 2;\r\n"; + +static const char unexpected_tag_expected17[] = + "var a=1;"; + +static const char unexpected_tag_buf18[] = + "var a = 1;\n" + "//something \n" + "var b = 2;\r\n"; + +static const char unexpected_tag_expected18[] = + "var a=1;"; + +static const char unexpected_tag_buf19[] = + "var a = 1;\n" + "/*\n" + "something*/\n" + "var b = 2;\r\n"; + +static const char unexpected_tag_expected19[] = + "var a=1;"; + +static const char unexpected_tag_buf20[] = + "var a = 1;\n" + "/*something\n" + "\n" + "something*/\n" + "var b = 2;\r\n"; + +static const char unexpected_tag_expected20[] = + "var a=1;"; + +static const char unexpected_tag_buf21[] = + "var a = 1;\n" + "/*something\n" + "*/\n" + "var b = 2;\r\n"; + +static const char unexpected_tag_expected21[] = + "var a=1;"; + +static const char unexpected_tag_buf22[] = + "var a = 1;\n" + "var str = 'script somescript /script something';\n" + "var b = 2;\r\n"; + +static const char unexpected_tag_expected22[] = + "var a=1;var str='script somescript /script something';var b=2;"; + +static const char unexpected_tag_buf23[] = + "var a = 1;\n" + "var str = 'script somescript /script something