From: Mike Stepanek (mstepane) Date: Thu, 31 Mar 2022 16:31:37 +0000 (+0000) Subject: Pull request #3334: Opening/closing tags in external scripts. X-Git-Tag: 3.1.27.0~7 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0b9d04a42f60ec889e9516aa9792c09750d44523;p=thirdparty%2Fsnort3.git Pull request #3334: Opening/closing tags in external scripts. Merge in SNORT/snort3 from ~OSHUMEIK/snort3:js_oc_tags to master Squashed commit of the following: commit 0ee5e10bae28eaed6ef387cb487cf51d102e1b84 Author: Oleksii Shumeiko Date: Wed Mar 30 18:38:41 2022 +0300 utils: allow opening/closing tags in external scripts --- diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc index deb0722f1..096413252 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.cc +++ b/src/service_inspectors/http_inspect/http_js_norm.cc @@ -59,12 +59,12 @@ static const char* ret2str(JSTokenizer::JSRet ret) } static inline JSTokenizer::JSRet js_normalize(JSNormalizer& ctx, const Packet* current_packet, - const char* const end, const char*& ptr) + const char* const end, const char*& ptr, bool external_script) { trace_logf(3, http_trace, TRACE_JS_DUMP, current_packet, "original[%zu]: %.*s\n", end - ptr, static_cast(end - ptr), ptr); - auto ret = ctx.normalize(ptr, end - ptr); + auto ret = ctx.normalize(ptr, end - ptr, external_script); auto src_next = ctx.get_src_next(); trace_logf(3, http_trace, TRACE_JS_PROC, current_packet, @@ -167,7 +167,7 @@ void HttpJsNorm::do_external(const Field& input, Field& output, trace_logf(1, http_trace, TRACE_JS_PROC, current_packet, "external script at %zd offset\n", ptr - (const char*)input.start()); - auto ret = js_normalize(js_ctx, current_packet, end, ptr); + auto ret = js_normalize(js_ctx, current_packet, end, ptr, true); switch (ret) { @@ -310,7 +310,7 @@ void HttpJsNorm::do_inline(const Field& input, Field& output, max_template_nesting, max_bracket_depth, max_scope_depth, ignored_ids); auto output_size_before = js_ctx.script_size(); - auto ret = js_normalize(js_ctx, current_packet, end, ptr); + auto ret = js_normalize(js_ctx, current_packet, end, ptr, false); switch (ret) { diff --git a/src/utils/js_normalizer.cc b/src/utils/js_normalizer.cc index 1cce7fa7f..d2ae20955 100644 --- a/src/utils/js_normalizer.cc +++ b/src/utils/js_normalizer.cc @@ -49,7 +49,7 @@ JSNormalizer::~JSNormalizer() tmp_buf_size = 0; } -JSTokenizer::JSRet JSNormalizer::normalize(const char* src, size_t src_len) +JSTokenizer::JSRet JSNormalizer::normalize(const char* src, size_t src_len, bool external_script) { assert(src); @@ -81,7 +81,7 @@ JSTokenizer::JSRet JSNormalizer::normalize(const char* src, size_t src_len) out_buf.reserve(src_len * BUFF_EXP_FACTOR); size_t r_bytes = in_buf.last_chunk_offset(); - auto ret = tokenizer.process(r_bytes); + auto ret = tokenizer.process(r_bytes, external_script); rem_bytes -= r_bytes; src_next = src + r_bytes; diff --git a/src/utils/js_normalizer.h b/src/utils/js_normalizer.h index fa53bb6e3..dcfb1e94f 100644 --- a/src/utils/js_normalizer.h +++ b/src/utils/js_normalizer.h @@ -38,7 +38,8 @@ public: int tmp_cap_size = JSTOKENIZER_BUF_MAX_SIZE); ~JSNormalizer(); - JSTokenizer::JSRet normalize(const char* src, size_t src_len); + JSTokenizer::JSRet normalize(const char* src, size_t src_len, + bool external_script = false); const char* get_src_next() const { return src_next; } diff --git a/src/utils/js_tokenizer.h b/src/utils/js_tokenizer.h index 21a1fd725..c263a1a2c 100644 --- a/src/utils/js_tokenizer.h +++ b/src/utils/js_tokenizer.h @@ -164,7 +164,7 @@ public: int cap_size = JSTOKENIZER_BUF_MAX_SIZE); ~JSTokenizer() override; - JSRet process(size_t& bytes_in); + JSRet process(size_t& bytes_in, bool external_script = false); void reset_output() { ignored_id_pos = -1; } @@ -320,6 +320,7 @@ private: JSIdentifierCtxBase& ident_ctx; size_t bytes_read; size_t tmp_bytes_read; + bool ext_script; struct { diff --git a/src/utils/js_tokenizer.l b/src/utils/js_tokenizer.l index 263a91786..d61b21555 100644 --- a/src/utils/js_tokenizer.l +++ b/src/utils/js_tokenizer.l @@ -1055,14 +1055,14 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 %% -{WHITESPACES} { /* skip */ } -{CHAR_ESCAPE_SEQUENCES} { /* skip */ } +{WHITESPACES} { /* skip */ } +{CHAR_ESCAPE_SEQUENCES} { /* skip */ } -{LINE_TERMINATORS} { BEGIN(regst); newline_found = true; } -{LINE_TERMINATORS} { newline_found = true; } +{LINE_TERMINATORS} { BEGIN(regst); newline_found = true; } +{LINE_TERMINATORS} { newline_found = true; } -{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) } -{HTML_TAG_SCRIPT_CLOSE} { EXEC(html_closing_script_tag()) } +{HTML_TAG_SCRIPT_OPEN} { BEGIN(regst); RETURN(OPENING_TAG) } +{HTML_TAG_SCRIPT_CLOSE} { EXEC(html_closing_script_tag()) } {HTML_COMMENT_OPEN} { BEGIN(lcomm); } {LINE_COMMENT_START} { BEGIN(lcomm); } @@ -1072,25 +1072,26 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LINE_COMMENT_END2} { BEGIN(regst); newline_found = true; } {LINE_COMMENT_END1} { BEGIN(char_code); newline_found = true; } {LINE_COMMENT_END2} { BEGIN(char_code); newline_found = true; } -{LINE_COMMENT_END3} { BEGIN(regst); RETURN(OPENING_TAG) } -{LINE_COMMENT_END4} { BEGIN(regst); RETURN(CLOSING_TAG) } +{LINE_COMMENT_END3} { if (!ext_script) { BEGIN(regst); RETURN(OPENING_TAG) } } +{LINE_COMMENT_END4} { if (!ext_script) { BEGIN(regst); RETURN(CLOSING_TAG) } } {LINE_COMMENT_SKIP} { /* skip */ } <> { RETURN(SCRIPT_CONTINUE) } - {BLOCK_COMMENT_START} { BEGIN(bcomm); } -{BLOCK_COMMENT_START} { BEGIN(char_code_bcomm); } -{BLOCK_COMMENT_END1} { BEGIN(regst); } -{BLOCK_COMMENT_END1} { BEGIN(char_code); } -{BLOCK_COMMENT_END2} { BEGIN(regst); RETURN(OPENING_TAG) } -{BLOCK_COMMENT_END3} { BEGIN(regst); RETURN(CLOSING_TAG) } -{BLOCK_COMMENT_LINE1} | -{BLOCK_COMMENT_LINE2} { newline_found = true; } -{BLOCK_COMMENT_SKIP} { /* skip */ } -<> { RETURN(SCRIPT_CONTINUE) } + {BLOCK_COMMENT_START} { BEGIN(bcomm); } +{BLOCK_COMMENT_START} { BEGIN(char_code_bcomm); } +{BLOCK_COMMENT_END1} { BEGIN(regst); } +{BLOCK_COMMENT_END1} { BEGIN(char_code); } +{BLOCK_COMMENT_END2} { if (!ext_script) { BEGIN(regst); RETURN(OPENING_TAG) } } +{BLOCK_COMMENT_END3} { if (!ext_script) { BEGIN(regst); RETURN(CLOSING_TAG) } } +{BLOCK_COMMENT_LINE1} | +{BLOCK_COMMENT_LINE2} { newline_found = true; } +{BLOCK_COMMENT_SKIP} { /* skip */ } +<> { RETURN(SCRIPT_CONTINUE) } {LITERAL_DQ_STRING_START} { EXEC(literal_dq_string_start()) } {LITERAL_DQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); } -{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } +{HTML_TAG_SCRIPT_OPEN} { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(OPENING_TAG) } } +{HTML_TAG_SCRIPT_CLOSE} { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(CLOSING_TAG) } } \\{CR}{LF} { /* skip */ } \\{LF} { /* skip */ } \\{CR} { /* skip */ } @@ -1110,7 +1111,8 @@ ALL_UNICODE [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8 {LITERAL_SQ_STRING_START} { EXEC(literal_sq_string_start()) } {LITERAL_SQ_STRING_END} { dealias_append(); ECHO; BEGIN(divop); } -{HTML_TAG_SCRIPT_CLOSE} { BEGIN(regst); RETURN(CLOSING_TAG) } +{HTML_TAG_SCRIPT_OPEN} { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(OPENING_TAG) } } +{HTML_TAG_SCRIPT_CLOSE} { if (ext_script) { ECHO; } else { BEGIN(regst); RETURN(CLOSING_TAG) } } \\{CR}{LF} { /* skip */ } \\{LF} { /* skip */ } \\{CR} { /* skip */ } @@ -2818,11 +2820,12 @@ void JSTokenizer::char_code_no_match() memset((void*)(states + sp), 0, sizeof(states[0])); } -JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in) +JSTokenizer::JSRet JSTokenizer::process(size_t& bytes_in, bool external_script) { yy_flush_buffer(YY_CURRENT_BUFFER); unescape_nest_seen = false; mixed_encoding_seen = false; + ext_script = external_script; auto r = yylex(); diff --git a/src/utils/test/js_normalizer_test.cc b/src/utils/test/js_normalizer_test.cc index d17044a0a..d6a04bc55 100644 --- a/src/utils/test/js_normalizer_test.cc +++ b/src/utils/test/js_normalizer_test.cc @@ -45,6 +45,14 @@ using namespace snort; int act_len = norm.script_size(); \ const char* dst = norm.take_script(); +#define NORMALIZE_EXT(src) \ + JSIdentifierCtxStub ident_ctx; \ + JSNormalizer norm(ident_ctx, norm_depth, max_template_nesting, max_bracket_depth); \ + auto ret = norm.normalize(src, sizeof(src), true); \ + const char* ptr = norm.get_src_next(); \ + int act_len = norm.script_size(); \ + const char* dst = norm.take_script(); + #define VALIDATE(src, expected) \ CHECK(ret == JSTokenizer::SCRIPT_CONTINUE); \ CHECK((ptr - src) == sizeof(src)); \ @@ -1574,6 +1582,9 @@ static const char unexpected_tag_buf0[] = static const char unexpected_tag_expected0[] = "var a=1;"; +static const char unexpected_tag_expected0_ext[] = + "var a=1;"; + static const char unexpected_tag_buf1[] = "var a = 1;\n" " something';\n" @@ -1614,6 +1637,9 @@ static const char unexpected_tag_buf5[] = static const char unexpected_tag_expected5[] = "var a=1;var str='"; +static const char unexpected_tag_expected5_ext[] = + "var a=1;var str=' something';var b=2;"; + static const char unexpected_tag_buf6[] = "var a = 1;\n" "var str = 'something something';\n" @@ -1622,6 +1648,9 @@ static const char unexpected_tag_buf6[] = static const char unexpected_tag_expected6[] = "var a=1;var str='something "; +static const char unexpected_tag_expected6_ext[] = + "var a=1;var str='something something';var b=2;"; + static const char unexpected_tag_buf7[] = "var a = 1;\n" "var str = 'something ';\n" @@ -1630,6 +1659,9 @@ static const char unexpected_tag_buf7[] = static const char unexpected_tag_expected7[] = "var a=1;var str='something "; +static const char unexpected_tag_expected7_ext[] = + "var a=1;var str='something ';var b=2;"; + static const char unexpected_tag_buf8[] = "var a = 1;\n" "var str = 'something \\ something';\n" @@ -1638,6 +1670,9 @@ static const char unexpected_tag_buf8[] = static const char unexpected_tag_expected8[] = "var a=1;var str='something \\"; +static const char unexpected_tag_expected8_ext[] = + "var a=1;var str='something \\ something';var b=2;"; + static const char unexpected_tag_buf9[] = "var a = 1;\n" "var str = 'something \\<\\/script\\> something';\n" @@ -1646,6 +1681,9 @@ static const char unexpected_tag_buf9[] = static const char unexpected_tag_expected9[] = "var a=1;var str='something \\<\\/script\\> something';var b=2;"; +static const char unexpected_tag_expected9_ext[] = + "var a=1;var str='something \\<\\/script\\> something';var b=2;"; + static const char unexpected_tag_buf10[] = "var a = 1;\n" "// something\n" @@ -1702,6 +1758,9 @@ static const char unexpected_tag_buf16[] = static const char unexpected_tag_expected16[] = "var a=1;"; +static const char unexpected_tag_expected16_ext[] = + "var a=1;var b=2;"; + static const char unexpected_tag_buf17[] = "var a = 1;\n" "\n" @@ -1710,6 +1769,9 @@ static const char unexpected_tag_buf17[] = static const char unexpected_tag_expected17[] = "var a=1;"; +static const char unexpected_tag_expected17_ext[] = + "var a=1;var b=2;"; + static const char unexpected_tag_buf18[] = "var a = 1;\n" "//something \n" @@ -1718,6 +1780,9 @@ static const char unexpected_tag_buf18[] = static const char unexpected_tag_expected18[] = "var a=1;"; +static const char unexpected_tag_expected18_ext[] = + "var a=1;var b=2;"; + static const char unexpected_tag_buf19[] = "var a = 1;\n" "/*\n" @@ -1727,6 +1792,9 @@ static const char unexpected_tag_buf19[] = static const char unexpected_tag_expected19[] = "var a=1;"; +static const char unexpected_tag_expected19_ext[] = + "var a=1;var b=2;"; + static const char unexpected_tag_buf20[] = "var a = 1;\n" "/*something\n" @@ -1737,6 +1805,9 @@ static const char unexpected_tag_buf20[] = static const char unexpected_tag_expected20[] = "var a=1;"; +static const char unexpected_tag_expected20_ext[] = + "var a=1;var b=2;"; + static const char unexpected_tag_buf21[] = "var a = 1;\n" "/*something\n" @@ -1746,6 +1817,9 @@ static const char unexpected_tag_buf21[] = static const char unexpected_tag_expected21[] = "var a=1;"; +static const char unexpected_tag_expected21_ext[] = + "var a=1;var b=2;"; + static const char unexpected_tag_buf22[] = "var a = 1;\n" "var str = 'script somescript /script something';\n" @@ -1754,6 +1828,9 @@ static const char unexpected_tag_buf22[] = static const char unexpected_tag_expected22[] = "var a=1;var str='script somescript /script something';var b=2;"; +static const char unexpected_tag_expected22_ext[] = + "var a=1;var str='script somescript /script something';var b=2;"; + static const char unexpected_tag_buf23[] = "var a = 1;\n" "var str = 'script somescript /script something