From: Oleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) Date: Fri, 19 Aug 2022 12:48:59 +0000 (+0000) Subject: Pull request #3550: utils: Add ext_script checks to tokens X-Git-Tag: 3.1.40.0~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b63c5c4c52e033e5edde2f1794118938dd623c93;p=thirdparty%2Fsnort3.git Pull request #3550: utils: Add ext_script checks to tokens Merge in SNORT/snort3 from ~DKYRYLOV/snort3:js_norm_end_tag to master Squashed commit of the following: commit 0450c203be60a18457f4cab5882b80e0cacfc256 Author: dkyrylov Date: Wed Aug 10 15:06:34 2022 +0300 utils: allow closing tag in external scripts An appropriate built-in alert will be generated without stopping the normalization. --- diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc index ce7a7c09d..10d99a470 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.cc +++ b/src/service_inspectors/http_inspect/http_js_norm.cc @@ -200,9 +200,7 @@ void HttpJsNorm::do_external(const Field& input, Field& output, break; case JSTokenizer::SCRIPT_ENDED: case JSTokenizer::CLOSING_TAG: - *infractions += INF_JS_CLOSING_TAG; - events->create_event(EVENT_JS_CLOSING_TAG); - ssn->js_built_in_event = true; + assert(false); // should not be present in external break; case JSTokenizer::BAD_TOKEN: case JSTokenizer::WRONG_CLOSING_SYMBOL: @@ -243,6 +241,11 @@ void HttpJsNorm::do_external(const Field& input, Field& output, *infractions += INF_MIXED_ENCODINGS; events->create_event(EVENT_MIXED_ENCODINGS); } + if (js_ctx.is_closing_tag_seen()) + { + *infractions += INF_JS_CLOSING_TAG; + events->create_event(EVENT_JS_CLOSING_TAG); + } if (ssn->js_built_in_event) break; diff --git a/src/utils/js_identifier_ctx.cc b/src/utils/js_identifier_ctx.cc index a56bb05a0..c5d0478a4 100644 --- a/src/utils/js_identifier_ctx.cc +++ b/src/utils/js_identifier_ctx.cc @@ -245,7 +245,7 @@ bool JSIdentifierCtx::scope_check(const std::list& compare) const std::list JSIdentifierCtx::get_types() const { std::list return_list; - for(const auto& scope:scopes) + for (const auto& scope : scopes) { return_list.push_back(scope.type()); } diff --git a/src/utils/js_normalizer.h b/src/utils/js_normalizer.h index db1b13760..5243faaa2 100644 --- a/src/utils/js_normalizer.h +++ b/src/utils/js_normalizer.h @@ -68,6 +68,9 @@ public: bool is_opening_tag_seen() const { return tokenizer.is_opening_tag_seen(); } + bool is_closing_tag_seen() const + { return tokenizer.is_closing_tag_seen(); } + #if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST) const char* get_tmp_buf() const { return tmp_buf; } diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h index 697b76d49..e69636055 100644 --- a/src/utils/js_tokenizer.h +++ b/src/utils/js_tokenizer.h @@ -176,6 +176,8 @@ public: bool is_unescape_nesting_seen() const; bool is_mixed_encoding_seen() const; bool is_opening_tag_seen() const; + bool is_closing_tag_seen() const; + protected: [[noreturn]] void LexerError(const char* msg) override { snort::FatalError("%s", msg); } @@ -307,6 +309,7 @@ private: void lit_int_code_point(int base); void char_code_no_match(); void explicit_otag(); + void ctag_in_regex(); static const char* p_scope_codes[]; @@ -323,6 +326,7 @@ private: bool unescape_nest_seen = false; bool mixed_encoding_seen = false; bool opening_tag_seen = false; + bool closing_tag_seen = false; uint8_t max_template_nesting; VStack brace_depth; diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l index 2a6820968..a1b7dcb25 100644 --- a/src/utils/js_tokenizer.l +++ b/src/utils/js_tokenizer.l @@ -1106,7 +1106,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_DQ_STRING_START} { EXEC(literal_dq_string_start()) } {LITERAL_DQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); } {HTML_TAG_SCRIPT_OPEN} { if (!ext_script) { opening_tag_seen = true; } ECHO; } -{HTML_TAG_SCRIPT_CLOSE} { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(CLOSING_TAG) } } +{HTML_TAG_SCRIPT_CLOSE} { if (!ext_script) { BEGIN(regst); RETURN(CLOSING_TAG) } else { ECHO; } } \\{CR}{LF} { /* skip */ } \\{LF} { /* skip */ } \\{CR} { /* skip */ } @@ -1128,7 +1128,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_SQ_STRING_START} { EXEC(literal_sq_string_start()) } {LITERAL_SQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); } {HTML_TAG_SCRIPT_OPEN} { if (!ext_script) { opening_tag_seen = true; } ECHO; } -{HTML_TAG_SCRIPT_CLOSE} { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(CLOSING_TAG) } } +{HTML_TAG_SCRIPT_CLOSE} { if (!ext_script) { BEGIN(regst); RETURN(CLOSING_TAG) } else { ECHO; } } \\{CR}{LF} { /* skip */ } \\{LF} { /* skip */ } \\{CR} { /* skip */ } @@ -1151,7 +1151,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 (\\\\)*{LITERAL_TEMPLATE_END} { dealias_append(); ECHO; BEGIN(divop); } (\\\\)*{LITERAL_TEMPLATE_SUBST_START} { EXEC(process_subst_open()) dealias_reset(); } {HTML_TAG_SCRIPT_OPEN} { if (!ext_script) { opening_tag_seen = true; } ECHO; } -{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } +{HTML_TAG_SCRIPT_CLOSE} { if (!ext_script) { BEGIN(regst); RETURN(CLOSING_TAG) } else { ECHO; } } <> { RETURN(SCRIPT_CONTINUE) } {UNICODE_ESCAPE_SEQUENCE} | {HEX_ESCAPE_SEQUENCE} { escaped_unicode_utf_8(); } @@ -1169,7 +1169,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_REGEX_START} { EXEC(literal_regex_start()) } {LITERAL_REGEX_END} { EXEC(literal_regex_end()) } {HTML_TAG_SCRIPT_OPEN} { if (!ext_script) { opening_tag_seen = true; } ECHO; } -{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } +{HTML_TAG_SCRIPT_CLOSE} { if (!ext_script) { BEGIN(regst); RETURN(CLOSING_TAG) } else { ctag_in_regex(); } } \\{CR} { BEGIN(regst); RETURN(BAD_TOKEN) } \\{LF} { BEGIN(regst); RETURN(BAD_TOKEN) } {CR} { BEGIN(regst); RETURN(BAD_TOKEN) } @@ -2036,6 +2036,11 @@ bool JSTokenizer::is_opening_tag_seen() const return opening_tag_seen; } +bool JSTokenizer::is_closing_tag_seen() const +{ + return closing_tag_seen; +} + void JSTokenizer::set_block_param(bool f) { scope_cur().block_param = f; @@ -2242,7 +2247,17 @@ void JSTokenizer::dealias_finalize() } JSTokenizer::JSRet JSTokenizer::html_closing_script_tag() -{ return global_scope() ? SCRIPT_ENDED : ENDED_IN_INNER_SCOPE; } +{ + if (!ext_script) + return global_scope() ? SCRIPT_ENDED : ENDED_IN_INNER_SCOPE; + else + { + closing_tag_seen = true; + states_correct(1); + operator_comparison(); + return EOS; + } +} JSTokenizer::JSRet JSTokenizer::literal_dq_string_start() { @@ -2976,6 +2991,13 @@ void JSTokenizer::explicit_otag() operator_comparison(); } +void JSTokenizer::ctag_in_regex() +{ + // out of '', consume only the leading '<' and renormalize the rest + states_correct(1); + ECHO; +} + JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in, bool external_script) { yy_flush_buffer(YY_CURRENT_BUFFER); diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc index c32c87e2a..3171828d8 100644 --- a/src/utils/test/js_normalizer_test.cc +++ b/src/utils/test/js_normalizer_test.cc @@ -1929,6 +1929,33 @@ static const char unexpected_tag_buf26[] = static const char unexpected_tag_expected26[] = "var regex=/ `;"; + +static const char unexpected_tag_expected27[] = + "var template=` "; + +static const char unexpected_tag_expected27_ext[] = + "var template=` `;"; + +static const char unexpected_tag_buf28[] = + "var regex = / /;/"; + +static const char unexpected_tag_expected28[] = + "var regex=/ "; + +static const char unexpected_tag_expected28_ext[] = + "var regex=/ /;/"; + +static const char unexpected_tag_buf29[] = + "var a = 5 /"; + +static const char unexpected_tag_expected29[] = + "var a=5"; + +static const char unexpected_tag_expected29_ext[] = + "var a=5/"; + TEST_CASE("nested script tags", "[JSNormalizer]") { SECTION("explicit open tag - simple") @@ -2082,6 +2109,21 @@ TEST_CASE("nested script tags", "[JSNormalizer]") VALIDATE(unexpected_tag_buf26, unexpected_tag_expected26); CHECK_OTAG(true); } + SECTION("closing tag within template literal") + { + NORMALIZE(unexpected_tag_buf27); + VALIDATE_FAIL(unexpected_tag_buf27, unexpected_tag_expected27, JSTokenizer::CLOSING_TAG, 26); + } + SECTION("closing tag within regex literal") + { + NORMALIZE(unexpected_tag_buf28); + VALIDATE_FAIL(unexpected_tag_buf28, unexpected_tag_expected28, JSTokenizer::CLOSING_TAG, 23); + } + SECTION("closing tag from regex literal expression") + { + NORMALIZE(unexpected_tag_buf29); + VALIDATE_FAIL(unexpected_tag_buf29, unexpected_tag_expected29, JSTokenizer::SCRIPT_ENDED, 19); + } } TEST_CASE("opening tag sequence", "[JSNormalizer]") @@ -2322,6 +2364,22 @@ TEST_CASE("nested script tags in an external script", "[JSNormalizer]") VALIDATE(unexpected_tag_buf26, unexpected_tag_expected26); CHECK_OTAG(false); } + SECTION("closing tag within template literal") + { + NORMALIZE_EXT(unexpected_tag_buf27); + VALIDATE(unexpected_tag_buf27, unexpected_tag_expected27_ext); + } + SECTION("closing tag within regex literal") + { + NORMALIZE_EXT(unexpected_tag_buf28); + VALIDATE(unexpected_tag_buf28, unexpected_tag_expected28_ext); + } + SECTION("closing tag from regex literal expression") + { + NORMALIZE_EXT(unexpected_tag_buf29); + CHECK(norm.is_closing_tag_seen()); + VALIDATE(unexpected_tag_buf29, unexpected_tag_expected29_ext); + } } TEST_CASE("split between tokens", "[JSNormalizer]")