From: Danylo Kyrylov -X (dkyrylov - SOFTSERVE INC at Cisco) Date: Fri, 24 May 2024 08:31:12 +0000 (+0000) Subject: Pull request #4304: js_norm: abort PDF norm on syntax failure X-Git-Tag: 3.2.2.0~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2223f430ac39e40f1d6c39629a7ba086a8e17cd3;p=thirdparty%2Fsnort3.git Pull request #4304: js_norm: abort PDF norm on syntax failure Merge in SNORT/snort3 from ~DKYRYLOV/snort3:pdf_no_inf_tokens to master Squashed commit of the following: commit abc86c1343f5f3b7ce2675b2742306e3c29289e9 Author: dkyrylov Date: Tue Apr 30 18:02:22 2024 +0300 js_norm: stop PDF processing on syntax error Stop processing if buffer size is extended to fit a token over 16k in length; Abort further normalization after PDF tokenizer failure; Process last remaining JavaScript in PDU up to normalization failure. --- diff --git a/src/js_norm/js_norm.cc b/src/js_norm/js_norm.cc index 07ed4983a..38da1e92d 100644 --- a/src/js_norm/js_norm.cc +++ b/src/js_norm/js_norm.cc @@ -130,7 +130,7 @@ void JSNorm::normalize(const void* in_data, size_t in_len, const void*& data, si JSNormModule::increment_peg_counts(PEG_BYTES, next - src_ptr); src_ptr = next; - alive = post_proc(ret); + alive = alive and post_proc(ret); } if (jsn_ctx != nullptr) diff --git a/src/js_norm/js_pdf_norm.cc b/src/js_norm/js_pdf_norm.cc index 0147bb124..ad3d8df79 100644 --- a/src/js_norm/js_pdf_norm.cc +++ b/src/js_norm/js_pdf_norm.cc @@ -60,7 +60,7 @@ bool PDFJSNorm::pre_proc() { trace_logf(2, js_trace, TRACE_PROC, DetectionEngine::get_current_packet(), "pdf processing failed: %d\n", (int)r); - return false; + alive = false; } src_ptr = (const uint8_t*)buf_pdf_out.data(); diff --git a/src/js_norm/pdf_tokenizer.h b/src/js_norm/pdf_tokenizer.h index b520c64b2..4aca27c91 100644 --- a/src/js_norm/pdf_tokenizer.h +++ b/src/js_norm/pdf_tokenizer.h @@ -41,6 +41,7 @@ public: INCOMPLETE_ARRAY_IN_DICTIONARY, STREAM_NO_LENGTH, UNEXPECTED_SYMBOL, + TOKEN_TOO_LONG, MAX }; diff --git a/src/js_norm/pdf_tokenizer.l b/src/js_norm/pdf_tokenizer.l index 7be9fd698..68bae2847 100644 --- a/src/js_norm/pdf_tokenizer.l +++ b/src/js_norm/pdf_tokenizer.l @@ -79,7 +79,9 @@ GRP_DELIMITER [\(\)\<\>\[\]\{\}\/\%] GRP_REGULAR [^\x00\x09\x0a\x0c\x0d\x20\(\)\<\>\[\]\{\}\/\%] /* 7.2.3 Comments */ -COMMENT %{GRP_NOT_NEWLINE}*{EOL_MARKER} +COMMENT_START % +COMMENT_CONTENT {GRP_NOT_NEWLINE}{1,16} +COMMENT_END {EOL_MARKER} /* 7.3.2 Boolean Objects */ OBJ_BOOLEAN true|false @@ -145,17 +147,20 @@ OBJ_REFERENCE {OBJ_INT_NUM}{GRP_WHITESPACE}+{OBJ_INT_NUM}{GRP_WHITESPACE} SKIP [^[:digit:]%]{1,16}|. WHITESPACE {GRP_WHITESPACE}{1,16} -/* Start conditions: INITIAL or inside dictionary, literal string, hexadecimal string, stream */ +/* Start conditions: structures: comment, indirect object, dictionary or array */ +%x comment %x indobj -%x stream %x dictnr + +/* Start conditions: literals: regular, hexadecimal, stream */ %x litstr %x hexstr +%x stream %x jslstr %x jshstr %x jsstream -/* Start conditions: UTF-16BE BOM, UTF-16BE literal string, UTF-16BE hexadecimal string, UTF-16BE stream */ +/* Start conditions: UTF-16BE: BOM, hex BOM, regular, hexadecimal, stream */ %x u16 %x u16hex %x jsstru16 @@ -164,11 +169,12 @@ WHITESPACE {GRP_WHITESPACE}{1,16} %% -{SKIP} { } -{COMMENT} { } + +{COMMENT_START} { PUSH(comment); } +{COMMENT_CONTENT} { } +{COMMENT_END} { POP(); } {INDIRECT_OBJ_OPEN} { PUSH(indobj); h_ind_obj_open(); } -{COMMENT} { } {WHITESPACE} { } {INDIRECT_OBJ_CLOSE} { POP(); h_ind_obj_close(); } @@ -183,7 +189,6 @@ WHITESPACE {GRP_WHITESPACE}{1,16} {OBJ_DICT_OPEN} { PUSH(dictnr); EXEC(h_dict_open()) } {OBJ_DICT_OPEN} { PUSH(dictnr); EXEC(h_dict_open()) } {OBJ_DICT_CLOSE} { POP(); EXEC(h_dict_close()) } -{COMMENT} { } {WHITESPACE} { } {OBJ_REFERENCE} { EXEC(h_dict_other()) h_ref(); } {OBJ_BOOLEAN} { EXEC(h_dict_other()) } @@ -238,6 +243,7 @@ WHITESPACE {GRP_WHITESPACE}{1,16} <*><> { return PDFRet::EOS; } +{SKIP} { } <*>.|\n { return PDFRet::UNEXPECTED_SYMBOL; } %% @@ -619,6 +625,16 @@ PDFTokenizer::~PDFTokenizer() PDFTokenizer::PDFRet PDFTokenizer::process() { - auto r = yylex(); - return static_cast(r); + auto r = static_cast(yylex()); + + if (!yy_buffer_stack or !YY_CURRENT_BUFFER_LVALUE) + return r; + + if (YY_CURRENT_BUFFER_LVALUE->yy_buf_size > YY_BUF_SIZE) + r = PDFTokenizer::TOKEN_TOO_LONG; + + if (r != PDFTokenizer::EOS) + yy_flush_buffer(YY_CURRENT_BUFFER); + + return r; } diff --git a/src/js_norm/test/pdf_tokenizer_test.cc b/src/js_norm/test/pdf_tokenizer_test.cc index fd1df1cf3..f6b53b874 100644 --- a/src/js_norm/test/pdf_tokenizer_test.cc +++ b/src/js_norm/test/pdf_tokenizer_test.cc @@ -279,6 +279,15 @@ TEST_CASE("basic", "[PDFTokenizer]") "", PDFTokenizer::PDFRet::INCOMPLETE_ARRAY_IN_DICTIONARY ); } + SECTION("token too long") + { + test_pdf_proc( + "1"s + std::string(16 * 1024,' ') + " 0 obj" + "<< >>" + "endobj"s, + "", PDFTokenizer::PDFRet::TOKEN_TOO_LONG + ); + } } TEST_CASE("JS location", "[PDFTokenizer]")