From: Oleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) Date: Mon, 25 Jul 2022 08:29:27 +0000 (+0000) Subject: Pull request #3527: JavaScript Normalizer: normalize JavaScript after opening tag X-Git-Tag: 3.1.38.0~7 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b65fc8aee4f5e949cca04c4a7f972cc2ec381875;p=thirdparty%2Fsnort3.git Pull request #3527: JavaScript Normalizer: normalize JavaScript after opening tag Merge in SNORT/snort3 from ~OSERHIIE/snort3:js_fix_otag to master Squashed commit of the following: commit 28534c108a56e40b76310a6076820739b82e7e4a Author: Oleksandr Serhiienko Date: Tue Jul 19 20:53:56 2022 +0300 utils: continue JS normalization after opening tag seen * utils: normalize JavaScript after any opening tag seen * utils: re-normalize explicit opening tag by common rules * utils: throw opening tag built-in alert for inline scripts only * utils: remove opening tag return code * http_inspect: do not stop normalization in case of opening script tag * http_inspect: update trace messages --- diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc index 196220ddb..5ce25cead 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.cc +++ b/src/service_inspectors/http_inspect/http_js_norm.cc @@ -39,7 +39,6 @@ static const char* jsret_codes[] = "end of stream", "script ended", "script continues", - "opening tag", "closing tag", "bad token", "identifier overflow", @@ -173,11 +172,6 @@ void HttpJsNorm::do_external(const Field& input, Field& output, events->create_event(EVENT_JS_CLOSING_TAG); ssn->js_built_in_event = true; break; - case JSTokenizer::OPENING_TAG: - *infractions += INF_JS_OPENING_TAG; - events->create_event(EVENT_JS_OPENING_TAG); - ssn->js_built_in_event = true; - break; case JSTokenizer::BAD_TOKEN: case JSTokenizer::WRONG_CLOSING_SYMBOL: case JSTokenizer::ENDED_IN_INNER_SCOPE: @@ -313,10 +307,6 @@ void HttpJsNorm::do_inline(const Field& input, Field& output, break; case JSTokenizer::SCRIPT_CONTINUE: break; - case JSTokenizer::OPENING_TAG: - *infractions += INF_JS_OPENING_TAG; - events->create_event(EVENT_JS_OPENING_TAG); - break; case JSTokenizer::CLOSING_TAG: *infractions += INF_JS_CLOSING_TAG; events->create_event(EVENT_JS_CLOSING_TAG); @@ -361,6 +351,11 @@ void HttpJsNorm::do_inline(const Field& input, Field& output, *infractions += INF_MIXED_ENCODINGS; events->create_event(EVENT_MIXED_ENCODINGS); } + if (js_ctx.is_opening_tag_seen()) + { + *infractions += INF_JS_OPENING_TAG; + events->create_event(EVENT_JS_OPENING_TAG); + } script_continue = ret == JSTokenizer::SCRIPT_CONTINUE; } diff --git a/src/utils/js_normalizer.h b/src/utils/js_normalizer.h index dcfb1e94f..db1b13760 100644 --- a/src/utils/js_normalizer.h +++ b/src/utils/js_normalizer.h @@ -65,6 +65,9 @@ public: bool is_mixed_encoding_seen() const { return tokenizer.is_mixed_encoding_seen(); } + bool is_opening_tag_seen() const + { return tokenizer.is_opening_tag_seen(); } + #if defined(CATCH_TEST_BUILD) || defined(BENCHMARK_TEST) const char* get_tmp_buf() const { return tmp_buf; } diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h index fd985c6c8..18c8ce392 100644 --- a/src/utils/js_tokenizer.h +++ b/src/utils/js_tokenizer.h @@ -151,7 +151,6 @@ public: EOS = 0, SCRIPT_ENDED, SCRIPT_CONTINUE, - OPENING_TAG, CLOSING_TAG, BAD_TOKEN, IDENTIFIER_OVERFLOW, @@ -176,6 +175,7 @@ public: bool is_unescape_nesting_seen() const; bool is_mixed_encoding_seen() const; + bool is_opening_tag_seen() const; protected: [[noreturn]] void LexerError(const char* msg) override { snort::FatalError("%s", msg); } @@ -306,6 +306,7 @@ private: void escaped_url_sequence_latin_1(); void lit_int_code_point(int base); void char_code_no_match(); + void explicit_otag(); static const char* p_scope_codes[]; @@ -321,6 +322,7 @@ private: bool dealias_stored = false; bool unescape_nest_seen = false; bool mixed_encoding_seen = false; + bool opening_tag_seen = false; uint8_t max_template_nesting; VStack brace_depth; diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l index d5e06b373..ca5821a12 100644 --- a/src/utils/js_tokenizer.l +++ b/src/utils/js_tokenizer.l @@ -1075,7 +1075,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LINE_TERMINATORS} { BEGIN(regst); newline_found = true; } {LINE_TERMINATORS} { newline_found = true; } -{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) } +{HTML_TAG_SCRIPT_OPEN} { explicit_otag(); } {HTML_TAG_SCRIPT_CLOSE} { EXEC(html_closing_script_tag()) } {HTML_COMMENT_OPEN} { BEGIN(lcomm); } @@ -1088,7 +1088,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LINE_COMMENT_END1} { BEGIN(char_code); newline_found = true; } {LINE_COMMENT_END2} { BEGIN(char_code); newline_found = true; } {LINE_COMMENT_END4} { if (!ext_script) { BEGIN(regst); RETURN(CLOSING_TAG) } } -{LINE_COMMENT_END3} { if (!ext_script) { BEGIN(regst); RETURN(OPENING_TAG) } } +{LINE_COMMENT_END3} { if (!ext_script) { opening_tag_seen = true; } } {LINE_COMMENT_SKIP} { /* skip */ } <> { RETURN(SCRIPT_CONTINUE) } @@ -1096,7 +1096,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {BLOCK_COMMENT_START} { BEGIN(char_code_bcomm); } {BLOCK_COMMENT_END1} { BEGIN(regst); } {BLOCK_COMMENT_END1} { BEGIN(char_code); } -{BLOCK_COMMENT_END2} { if (!ext_script) { BEGIN(regst); RETURN(OPENING_TAG) } } +{BLOCK_COMMENT_END2} { if (!ext_script) { opening_tag_seen = true; } } {BLOCK_COMMENT_END3} { if (!ext_script) { BEGIN(regst); RETURN(CLOSING_TAG) } } {BLOCK_COMMENT_LINE1} | {BLOCK_COMMENT_LINE2} { newline_found = true; } @@ -1105,7 +1105,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_DQ_STRING_START} { EXEC(literal_dq_string_start()) } {LITERAL_DQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); } -{HTML_TAG_SCRIPT_OPEN} { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(OPENING_TAG) } } +{HTML_TAG_SCRIPT_OPEN} { if (!ext_script) { opening_tag_seen = true; } ECHO; } {HTML_TAG_SCRIPT_CLOSE} { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(CLOSING_TAG) } } \\{CR}{LF} { /* skip */ } \\{LF} { /* skip */ } @@ -1127,7 +1127,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_SQ_STRING_START} { EXEC(literal_sq_string_start()) } {LITERAL_SQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); } -{HTML_TAG_SCRIPT_OPEN} { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(OPENING_TAG) } } +{HTML_TAG_SCRIPT_OPEN} { if (!ext_script) { opening_tag_seen = true; } ECHO; } {HTML_TAG_SCRIPT_CLOSE} { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(CLOSING_TAG) } } \\{CR}{LF} { /* skip */ } \\{LF} { /* skip */ } @@ -1150,6 +1150,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_TEMPLATE_START} { EXEC(literal_template_start()) } (\\\\)*{LITERAL_TEMPLATE_END} { dealias_append(); ECHO; BEGIN(divop); } (\\\\)*{LITERAL_TEMPLATE_SUBST_START} { EXEC(process_subst_open()) dealias_reset(); } +{HTML_TAG_SCRIPT_OPEN} { if (!ext_script) { opening_tag_seen = true; } ECHO; } {HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } <> { RETURN(SCRIPT_CONTINUE) } {UNICODE_ESCAPE_SEQUENCE} | @@ -1167,6 +1168,7 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_REGEX_START} { EXEC(literal_regex_start()) } {LITERAL_REGEX_END} { EXEC(literal_regex_end()) } +{HTML_TAG_SCRIPT_OPEN} { if (!ext_script) { opening_tag_seen = true; } ECHO; } {HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } \\{CR} { BEGIN(regst); RETURN(BAD_TOKEN) } \\{LF} { BEGIN(regst); RETURN(BAD_TOKEN) } @@ -2006,6 +2008,11 @@ bool JSTokenizer::is_mixed_encoding_seen() const return mixed_encoding_seen; } +bool JSTokenizer::is_opening_tag_seen() const +{ + return opening_tag_seen; +} + void JSTokenizer::set_block_param(bool f) { scope_cur().block_param = f; @@ -2935,6 +2942,19 @@ void JSTokenizer::char_code_no_match() memset((void*)(states + sp), 0, sizeof(states[0])); } +void JSTokenizer::explicit_otag() +{ + if (!ext_script) + opening_tag_seen = true; + + // discard match of the script tag and scan again without leading '<' + states_correct(1); + yyless(1); + + // process leading '<' as a comparison operator + operator_comparison(); +} + JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in, bool external_script) { yy_flush_buffer(YY_CURRENT_BUFFER); diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc index 926f6f1b4..7f6cd4218 100644 --- a/src/utils/test/js_normalizer_test.cc +++ b/src/utils/test/js_normalizer_test.cc @@ -67,6 +67,12 @@ using namespace snort; CHECK(!memcmp(dst, expected, act_len)); \ delete[] dst; +#define CHECK_OTAG(is_set) \ + if (is_set) \ + CHECK(norm.is_opening_tag_seen()); \ + else \ + CHECK(!norm.is_opening_tag_seen()); + #define NORMALIZE_L(src, src_len, dst, dst_len, depth, ret, ptr, len) \ { \ @@ -1683,10 +1689,7 @@ static const char unexpected_tag_buf0[] = "var b = 2;\r\n"; static const char unexpected_tag_expected0[] = - "var a=1;"; - -static const char unexpected_tag_expected0_ext[] = - "var a=1;"; + "var a=1;