From: Oleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) Date: Mon, 12 Dec 2022 19:22:15 +0000 (+0000) Subject: Pull request #3700: js_norm: add support for email protocols X-Git-Tag: 3.1.49.0~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1f7945c13aeaabb594a423174c555e092886ea72;p=thirdparty%2Fsnort3.git Pull request #3700: js_norm: add support for email protocols Merge in SNORT/snort3 from ~OSERHIIE/snort3:jsn_others to master Squashed commit of the following: commit ca987f6324421b17f3fd2d0bdd39c6a65e4cda8c Author: Oleksii Shumeiko Date: Fri Dec 2 16:11:01 2022 +0200 js_norm: add support for email protocols * js_norm: move JS PDF normalizer to a common directory * js_norm: turn API classes to SO PUBLIC * http_inspect: update js_pdf_scripts peg description * imap: add JSN for PDF attachments * pop: add JSN for PDF attachments * smtp: add JSN for PDF attachments * update dev_notes --- diff --git a/src/js_norm/CMakeLists.txt b/src/js_norm/CMakeLists.txt index 15aa9c20c..64646124f 100644 --- a/src/js_norm/CMakeLists.txt +++ b/src/js_norm/CMakeLists.txt @@ -21,6 +21,8 @@ set ( JS_SOURCES js_norm_module.h js_normalizer.cc js_normalizer.h + js_pdf_norm.cc + js_pdf_norm.h js_tokenizer.h pdf_tokenizer.h ) diff --git a/src/js_norm/js_norm.cc b/src/js_norm/js_norm.cc index a7e023002..e685ed3f6 100644 --- a/src/js_norm/js_norm.cc +++ b/src/js_norm/js_norm.cc @@ -135,6 +135,27 @@ void JSNorm::normalize(const void* in_data, size_t in_len, const void*& data, si len = jsn_ctx->script_size(); data = jsn_ctx->get_script(); + + if (data and len) + trace_logf(1, js_trace, TRACE_DUMP, packet, + "js_data[%u]: %.*s\n", (unsigned)len, (int)len, (const char*)data); +} + +void JSNorm::flush_data(const void*& data, size_t& len) +{ + len = jsn_ctx->script_size(); + data = jsn_ctx->take_script(); +} + +void JSNorm::flush_data() +{ + delete[] jsn_ctx->take_script(); +} + +void JSNorm::get_data(const void*& data, size_t& len) +{ + len = jsn_ctx->script_size(); + data = jsn_ctx->get_script(); } bool JSNorm::pre_proc() diff --git a/src/js_norm/js_norm.h b/src/js_norm/js_norm.h index 654faa7e6..9bc688237 100644 --- a/src/js_norm/js_norm.h +++ b/src/js_norm/js_norm.h @@ -36,7 +36,7 @@ const char* ret2str(int); namespace snort { -class JSNorm +class SO_PUBLIC JSNorm { public: JSNorm(JSNormConfig*, bool ext_script_type = false); @@ -47,6 +47,9 @@ public: { ++pdu_cnt; } void normalize(const void*, size_t, const void*&, size_t&); + void get_data(const void*&, size_t&); + void flush_data(const void*&, size_t&); + void flush_data(); protected: virtual bool pre_proc(); diff --git a/src/js_norm/js_pdf_norm.cc b/src/js_norm/js_pdf_norm.cc new file mode 100644 index 000000000..20d9dfca0 --- /dev/null +++ b/src/js_norm/js_pdf_norm.cc @@ -0,0 +1,81 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// js_pdf_norm.cc author Cisco + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "js_pdf_norm.h" + +#include "trace/trace_api.h" + +#include "js_norm_module.h" + +using namespace jsn; +using namespace snort; + +bool PDFJSNorm::pre_proc() +{ + if (src_ptr >= src_end) + return false; + + const Packet* packet = DetectionEngine::get_current_packet(); + + if (!ext_script_type) + { + trace_logf(1, js_trace, TRACE_PROC, packet, + "PDF starts\n"); + ext_script_type = true; + } + else + { + trace_logf(2, js_trace, TRACE_PROC, packet, + "PDF continues\n"); + } + + buf_pdf_in.pubsetbuf(nullptr, 0) + ->pubsetbuf(const_cast((const char*)src_ptr), src_end - src_ptr); + pdf_out.clear(); + delete[] buf_pdf_out.take_data(); + + auto r = extractor.process(); + + if (r != PDFTokenizer::PDFRet::EOS) + { + trace_logf(2, js_trace, TRACE_PROC, DetectionEngine::get_current_packet(), + "pdf processing failed: %d\n", (int)r); + return false; + } + + src_ptr = (const uint8_t*)buf_pdf_out.data(); + src_end = src_ptr + buf_pdf_out.data_len(); + + // script object not found + if (!src_ptr) + return false; + + return true; +} + +bool PDFJSNorm::post_proc(int ret) +{ + src_ptr = src_end; // one time per PDU, even if JS Normalizer has not finished + + return JSNorm::post_proc(ret); +} diff --git a/src/js_norm/js_pdf_norm.h b/src/js_norm/js_pdf_norm.h new file mode 100644 index 000000000..547c19f6c --- /dev/null +++ b/src/js_norm/js_pdf_norm.h @@ -0,0 +1,61 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// js_pdf_norm.h author Cisco + +#ifndef JS_PDF_NORM_H +#define JS_PDF_NORM_H + +#include +#include + +#include "js_norm/js_norm.h" +#include "js_norm/pdf_tokenizer.h" +#include "utils/streambuf.h" + +namespace snort +{ + +class SO_PUBLIC PDFJSNorm : public JSNorm +{ +public: + static bool is_pdf(const void* data, size_t len) + { + constexpr char magic[] = "%PDF-1."; + constexpr int magic_len = sizeof(magic) - 1; + return magic_len < len and !strncmp((const char*)data, magic, magic_len); + } + + PDFJSNorm(JSNormConfig* cfg) : + JSNorm(cfg), pdf_in(&buf_pdf_in), pdf_out(&buf_pdf_out), extractor(pdf_in, pdf_out) + { } + +protected: + bool pre_proc() override; + bool post_proc(int) override; + +private: + snort::istreambuf_glue buf_pdf_in; + snort::ostreambuf_infl buf_pdf_out; + std::istream pdf_in; + std::ostream pdf_out; + jsn::PDFTokenizer extractor; +}; + +} + +#endif diff --git a/src/js_norm/js_tokenizer.h b/src/js_norm/js_tokenizer.h index 8eba08683..bbdf78596 100644 --- a/src/js_norm/js_tokenizer.h +++ b/src/js_norm/js_tokenizer.h @@ -45,7 +45,7 @@ class JSTokenizerTester; class JSTestConfig; #endif // CATCH_TEST_BUILD || BENCHMARK_TEST -class JSTokenizer : public yyFlexLexer +class SO_PUBLIC JSTokenizer : public yyFlexLexer { private: enum JSToken diff --git a/src/js_norm/pdf_tokenizer.h b/src/js_norm/pdf_tokenizer.h index 1b80be7da..9a31841c7 100644 --- a/src/js_norm/pdf_tokenizer.h +++ b/src/js_norm/pdf_tokenizer.h @@ -26,12 +26,14 @@ #include #include +#include "main/snort_types.h" + #define PDFTOKENIZER_NAME_MAX_SIZE 16 namespace jsn { -class PDFTokenizer : public yyFlexLexer +class SO_PUBLIC PDFTokenizer : public yyFlexLexer { public: enum PDFRet diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc index 1936047eb..14967d21b 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.cc +++ b/src/service_inspectors/http_inspect/http_js_norm.cc @@ -309,12 +309,6 @@ void js_normalize(const Field& input, Field& output, } } -void HttpJSNorm::flush_data(const void*& data, size_t& len) -{ - len = jsn_ctx->script_size(); - data = jsn_ctx->take_script(); -} - bool HttpInlineJSNorm::pre_proc() { assert(mpse_otag); @@ -437,52 +431,15 @@ bool HttpPDFJSNorm::pre_proc() if (src_ptr >= src_end) return false; - const Packet* packet = DetectionEngine::get_current_packet(); - if (!ext_script_type) - { HttpModule::increment_peg_counts(PEG_JS_PDF); - trace_logf(1, js_trace, TRACE_PROC, packet, - "PDF starts\n"); - ext_script_type = true; - } - else - { - trace_logf(2, js_trace, TRACE_PROC, packet, - "PDF continues\n"); - } - // an input stream should not write to its buffer - buf_pdf_in.pubsetbuf(nullptr, 0) - ->pubsetbuf(const_cast((const char*)src_ptr), src_end - src_ptr); - - pdf_out.clear(); - delete[] buf_pdf_out.take_data(); - - auto r = extractor.process(); - - if (r != PDFTokenizer::PDFRet::EOS) - { - trace_logf(2, js_trace, TRACE_PROC, DetectionEngine::get_current_packet(), - "pdf processing failed: %d\n", (int)r); - return false; - } - - src_ptr = (const uint8_t*)buf_pdf_out.data(); - src_end = src_ptr + buf_pdf_out.data_len(); - - // script object not found - if (!src_ptr) - return false; - - return true; + return PDFJSNorm::pre_proc(); } bool HttpPDFJSNorm::post_proc(int ret) { - src_ptr = src_end; // one time per PDU, even if JS Normalizer has not finished - script_continue = ret == (int)jsn::JSTokenizer::SCRIPT_CONTINUE; - return JSNorm::post_proc(ret); + return PDFJSNorm::post_proc(ret); } diff --git a/src/service_inspectors/http_inspect/http_js_norm.h b/src/service_inspectors/http_inspect/http_js_norm.h index ff6945e47..1db1ca0b5 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.h +++ b/src/service_inspectors/http_inspect/http_js_norm.h @@ -22,12 +22,10 @@ #define HTTP_JS_NORM_H #include -#include #include "js_norm/js_norm.h" -#include "js_norm/pdf_tokenizer.h" +#include "js_norm/js_pdf_norm.h" #include "search_engines/search_tool.h" -#include "utils/streambuf.h" #include "http_field.h" #include "http_flow_data.h" @@ -40,12 +38,12 @@ snort::SearchTool* js_create_mpse_tag_attr(); void js_normalize(const Field& input, Field& output, const HttpParaList*, HttpInfractions*, HttpEventGen*); -class HttpJSNorm : public snort::JSNorm +class HttpJSNorm { public: - HttpJSNorm(JSNormConfig* jsn_config) : snort::JSNorm(jsn_config) {} + virtual ~HttpJSNorm() {} - void flush_data(const void*&, size_t&); + virtual snort::JSNorm& ctx() = 0; void link(const void* page, HttpEventGen* http_events_, HttpInfractions* infs) { page_start = (const uint8_t*)page; http_events = http_events_; infractions = infs; } @@ -61,14 +59,17 @@ protected: bool script_continue = false; }; -class HttpInlineJSNorm : public HttpJSNorm +class HttpInlineJSNorm : public snort::JSNorm, public HttpJSNorm { public: HttpInlineJSNorm(JSNormConfig* jsn_config, uint64_t tid, snort::SearchTool* mpse_open_tag, snort::SearchTool* mpse_tag_attr) : - HttpJSNorm(jsn_config), mpse_otag(mpse_open_tag), mpse_attr(mpse_tag_attr), output_size(0), ext_ref_type(false) + JSNorm(jsn_config), mpse_otag(mpse_open_tag), mpse_attr(mpse_tag_attr), output_size(0), ext_ref_type(false) { trans_num = tid; } + snort::JSNorm& ctx() override + { return *this; } + protected: bool pre_proc() override; bool post_proc(int) override; @@ -80,41 +81,33 @@ private: bool ext_ref_type; }; -class HttpExternalJSNorm : public HttpJSNorm +class HttpExternalJSNorm : public snort::JSNorm, public HttpJSNorm { public: - HttpExternalJSNorm(JSNormConfig* jsn_config, uint64_t tid) : HttpJSNorm(jsn_config) + HttpExternalJSNorm(JSNormConfig* jsn_config, uint64_t tid) : JSNorm(jsn_config) { trans_num = tid; } + snort::JSNorm& ctx() override + { return *this; } + protected: bool pre_proc() override; bool post_proc(int) override; }; -class HttpPDFJSNorm : public HttpJSNorm +class HttpPDFJSNorm : public snort::PDFJSNorm, public HttpJSNorm { public: - static bool is_pdf(const void* data, size_t len) - { - constexpr char magic[] = "%PDF-1."; - constexpr int magic_len = sizeof(magic) - 1; - return magic_len < len and !strncmp((const char*)data, magic, magic_len); - } - HttpPDFJSNorm(JSNormConfig* jsn_config, uint64_t tid) : - HttpJSNorm(jsn_config), pdf_in(&buf_pdf_in), pdf_out(&buf_pdf_out), extractor(pdf_in, pdf_out) + PDFJSNorm(jsn_config) { trans_num = tid; } + snort::JSNorm& ctx() override + { return *this; } + protected: bool pre_proc() override; bool post_proc(int) override; - -private: - snort::istreambuf_glue buf_pdf_in; - snort::ostreambuf_infl buf_pdf_out; - std::istream pdf_in; - std::ostream pdf_out; - jsn::PDFTokenizer extractor; }; #endif diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc index fe2b29579..7c7446134 100644 --- a/src/service_inspectors/http_inspect/http_msg_body.cc +++ b/src/service_inspectors/http_inspect/http_msg_body.cc @@ -255,7 +255,7 @@ void HttpMsgBody::analyze() do_file_decompression(decoded_body, decompressed_file_body); if (decompressed_file_body.length() > 0 and session_data->js_ctx[source_id]) - session_data->js_ctx[source_id]->tick(); + session_data->js_ctx[source_id]->ctx().tick(); uint32_t& partial_detect_length = session_data->partial_detect_length[source_id]; uint8_t*& partial_detect_buffer = session_data->partial_detect_buffer[source_id]; @@ -789,7 +789,9 @@ const Field& HttpMsgBody::get_norm_js_data() return norm_js_data; } - if (decompressed_file_body.length() <= 0) + int src_len = decompressed_file_body.length(); + + if (src_len <= 0) { norm_js_data.set(STAT_NO_SOURCE); return norm_js_data; @@ -803,12 +805,13 @@ const Field& HttpMsgBody::get_norm_js_data() return norm_js_data; } + const void* src = decompressed_file_body.start(); const void* dst = nullptr; size_t dst_len = HttpCommon::STAT_NOT_PRESENT; - auto back = !session_data->partial_flush[source_id]; + bool back = !session_data->partial_flush[source_id]; - jsn->link(decompressed_file_body.start(), session_data->events[source_id], infractions); - jsn->normalize(decompressed_file_body.start(), decompressed_file_body.length(), dst, dst_len); + jsn->link(src, session_data->events[source_id], infractions); + jsn->ctx().normalize(src, src_len, dst, dst_len); debug_logf(4, js_trace, TRACE_PROC, DetectionEngine::get_current_packet(), "input data was %s\n", back ? "last one in PDU" : "a part of PDU"); @@ -818,11 +821,7 @@ const Field& HttpMsgBody::get_norm_js_data() else { if (back) - jsn->flush_data(dst, dst_len); - - trace_logf(1, js_trace, TRACE_DUMP, DetectionEngine::get_current_packet(), - "js_data[%u]: %.*s\n", (unsigned)dst_len, (int)dst_len, (const char*)dst); - + jsn->ctx().flush_data(dst, dst_len); norm_js_data.set(dst_len, (const uint8_t*)dst, back); } diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc index ab2bb07ef..c8cbe6efd 100755 --- a/src/service_inspectors/http_inspect/http_tables.cc +++ b/src/service_inspectors/http_inspect/http_tables.cc @@ -385,7 +385,7 @@ const PegInfo HttpModule::peg_names[PEG_COUNT_MAX+1] = { CountType::SUM, "total_bytes", "total HTTP data bytes inspected" }, { CountType::SUM, "js_inline_scripts", "total number of inline JavaScripts processed" }, { CountType::SUM, "js_external_scripts", "total number of external JavaScripts processed" }, - { CountType::SUM, "js_pdf_scripts", "total number of PDF JavaScripts processed" }, + { CountType::SUM, "js_pdf_scripts", "total number of PDF files processed" }, { CountType::SUM, "skip_mime_attach", "total number of HTTP requests with too many MIME attachments to inspect" }, { CountType::END, nullptr, nullptr } }; diff --git a/src/service_inspectors/imap/dev_notes.txt b/src/service_inspectors/imap/dev_notes.txt index 45797ce52..5bc5d490c 100644 --- a/src/service_inspectors/imap/dev_notes.txt +++ b/src/service_inspectors/imap/dev_notes.txt @@ -5,3 +5,9 @@ are parsed and the MIME attachments in IMAP responses are processed using the file API. The file API extracts and decodes the attachments. file_data is then set to the start of these extracted/decoded attachments. This inspector also identifies and whitelists the IMAPS traffic. + +IMAP inspector uses PDFJSNorm class to extract and normalize JavaScript +in PDF files. The normalized JavaScript is then available in the js_data buffer. +The js_data buffer follows the JIT approach, thus, to perform the normalization, +the rule with the js_data IPS option must be present as well as the js_norm module +is configured. diff --git a/src/service_inspectors/imap/imap.cc b/src/service_inspectors/imap/imap.cc index 0331258ad..6ad8c13f7 100644 --- a/src/service_inspectors/imap/imap.cc +++ b/src/service_inspectors/imap/imap.cc @@ -25,6 +25,7 @@ #include "imap.h" #include "detection/detection_engine.h" +#include "js_norm/js_pdf_norm.h" #include "log/messages.h" #include "profiler/profiler.h" #include "protocols/packet.h" @@ -140,6 +141,7 @@ const PegInfo imap_peg_names[] = { CountType::SUM, "uu_decoded_bytes", "total uu decoded bytes" }, { CountType::SUM, "non_encoded_attachments", "total non-encoded attachments extracted" }, { CountType::SUM, "non_encoded_bytes", "total non-encoded extracted bytes" }, + { CountType::SUM, "js_pdf_scripts", "total number of PDF files processed" }, { CountType::END, nullptr, nullptr } }; @@ -154,20 +156,36 @@ ImapFlowData::ImapFlowData() : FlowData(inspector_id) ImapFlowData::~ImapFlowData() { - if(session.mime_ssn) - delete(session.mime_ssn); + delete session.mime_ssn; + delete session.jsn; assert(imapstats.concurrent_sessions > 0); imapstats.concurrent_sessions--; } unsigned ImapFlowData::inspector_id = 0; + static IMAPData* get_session_data(Flow* flow) { ImapFlowData* fd = (ImapFlowData*)flow->get_flow_data(ImapFlowData::inspector_id); return fd ? &fd->session : nullptr; } +static inline PDFJSNorm* acquire_js_ctx(IMAPData& imap_ssn, const void* data, size_t len) +{ + if (imap_ssn.jsn) + return imap_ssn.jsn; + + JSNormConfig* cfg = get_inspection_policy()->jsn_config; + if (cfg and PDFJSNorm::is_pdf(data, len)) + { + imap_ssn.jsn = new PDFJSNorm(cfg); + ++imapstats.js_pdf_scripts; + } + + return imap_ssn.jsn; +} + static IMAPData* SetNewIMAPData(IMAP_PROTO_CONF* config, Packet* p) { IMAPData* imap_ssn; @@ -228,6 +246,9 @@ static void IMAP_ResetState(Flow* ssn) imap_ssn->state = STATE_COMMAND; imap_ssn->state_flags = 0; imap_ssn->body_read = imap_ssn->body_len = 0; + + delete imap_ssn->jsn; + imap_ssn->jsn = nullptr; } static void IMAP_GetEOL(const uint8_t* ptr, const uint8_t* end, @@ -436,11 +457,11 @@ static void IMAP_ProcessServerPacket(Packet* p, IMAPData* imap_ssn) { if (imap_ssn->state == STATE_DATA) { - if ( imap_ssn->body_len > imap_ssn->body_read) + if (imap_ssn->body_len > imap_ssn->body_read) { int len = imap_ssn->body_len - imap_ssn->body_read; - if ( (end - ptr) < len ) + if ((end - ptr) < len) { data_end = end; len = data_end - ptr; @@ -449,11 +470,18 @@ static void IMAP_ProcessServerPacket(Packet* p, IMAPData* imap_ssn) data_end = ptr + len; FilePosition position = get_file_position(p); - int data_len = end - ptr; - ptr = imap_ssn->mime_ssn->process_mime_data(p, ptr, data_len, false, - position); - if ( ptr < data_end) + + if (isFileStart(position)) + { + delete imap_ssn->jsn; + imap_ssn->jsn = nullptr; + } + + ptr = imap_ssn->mime_ssn->process_mime_data(p, ptr, data_len, false, position); + if (imap_ssn->jsn) + imap_ssn->jsn->tick(); + if (ptr < data_end) len = len - (data_end - ptr); imap_ssn->body_read += len; @@ -522,13 +550,13 @@ static void IMAP_ProcessServerPacket(Packet* p, IMAPData* imap_ssn) if (imap_ssn->state == STATE_DATA) { body_start = (const uint8_t*)memchr((const char*)ptr, '{', (eol - ptr)); - if ( body_start == nullptr ) + if (body_start == nullptr) { imap_ssn->state = STATE_UNKNOWN; } else { - if ( (body_start + 1) < eol ) + if ((body_start + 1) < eol) { uint32_t len = (uint32_t)SnortStrtoul((const char*)(body_start + 1), &eptr, 10); @@ -557,7 +585,7 @@ static void IMAP_ProcessServerPacket(Packet* p, IMAPData* imap_ssn) { imap_ssn->session_flags &= ~IMAP_FLAG_CHECK_SSL; } - if ( (*ptr != '*') && (*ptr !='+') && (*ptr != '\r') && (*ptr != '\n') ) + if ((*ptr != '*') && (*ptr != '+') && (*ptr != '\r') && (*ptr != '\n')) { DetectionEngine::queue_event(GID_IMAP, IMAP_UNKNOWN_RESP); } @@ -590,6 +618,9 @@ static void snort_imap(IMAP_PROTO_CONF* config, Packet* p) int pkt_dir = IMAP_Setup(p, imap_ssn); + if (imap_ssn->jsn) + imap_ssn->jsn->flush_data(); + if (pkt_dir == IMAP_PKT_FROM_CLIENT) { /* This packet should be a tls client hello */ @@ -775,33 +806,46 @@ void Imap::eval(Packet* p) bool Imap::get_buf(InspectionBuffer::Type ibt, Packet* p, InspectionBuffer& b) { - switch (ibt) - { - case InspectionBuffer::IBT_VBA: - { - IMAPData* imap_ssn = get_session_data(p->flow); + IMAPData* imap_ssn = get_session_data(p->flow); + assert(imap_ssn); - if (!imap_ssn) - return false; + const void* dst = nullptr; + size_t dst_len = 0; - const BufferData& vba_buf = imap_ssn->mime_ssn->get_vba_inspect_buf(); + switch (ibt) + { + case InspectionBuffer::IBT_VBA: + { + const BufferData& vba_buf = imap_ssn->mime_ssn->get_vba_inspect_buf(); + dst = vba_buf.data_ptr(); + dst_len = vba_buf.length(); + break; + } - if (vba_buf.data_ptr() && vba_buf.length()) - { - b.data = vba_buf.data_ptr(); - b.len = vba_buf.length(); - return true; - } - else - return false; + case InspectionBuffer::IBT_JS_DATA: + { + auto& dp = DetectionEngine::get_file_data(p->context); + auto jsn = acquire_js_ctx(*imap_ssn, dp.data, dp.len); + if (jsn) + { + jsn->get_data(dst, dst_len); + if (dst and dst_len) + break; + jsn->normalize(dp.data, dp.len, dst, dst_len); } + break; + } - default: - break; + default: + return false; } - return false; + b.data = (const uint8_t*)dst; + b.len = dst_len; + + return dst && dst_len; } + bool Imap::get_fp_buf(InspectionBuffer::Type ibt, Packet* p, InspectionBuffer& b) { // Fast pattern buffers only supplied at specific times @@ -843,6 +887,7 @@ static const char* imap_bufs[] = { "file_data", "vba_data", + "js_data", nullptr }; @@ -883,4 +928,3 @@ SO_PUBLIC const BaseApi* snort_plugins[] = #else const BaseApi* sin_imap = &imap_api.base; #endif - diff --git a/src/service_inspectors/imap/imap.h b/src/service_inspectors/imap/imap.h index 613f189f0..52ac783d9 100644 --- a/src/service_inspectors/imap/imap.h +++ b/src/service_inspectors/imap/imap.h @@ -156,6 +156,11 @@ private: bool is_end_of_data(snort::Flow* ssn) override; }; +namespace snort +{ +class PDFJSNorm; +} + struct IMAPData { int state; @@ -164,6 +169,7 @@ struct IMAPData uint32_t body_len; uint32_t body_read; ImapMime* mime_ssn; + snort::PDFJSNorm* jsn; }; class ImapFlowData : public snort::FlowData diff --git a/src/service_inspectors/imap/imap_config.h b/src/service_inspectors/imap/imap_config.h index 0b7e5022d..db4b30c3c 100644 --- a/src/service_inspectors/imap/imap_config.h +++ b/src/service_inspectors/imap/imap_config.h @@ -39,6 +39,7 @@ struct ImapStats PegCount ssl_search_abandoned; PegCount ssl_srch_abandoned_early; snort::MimeStats mime_stats; + PegCount js_pdf_scripts; }; extern const PegInfo imap_peg_names[]; diff --git a/src/service_inspectors/pop/dev_notes.txt b/src/service_inspectors/pop/dev_notes.txt index d13a9e30c..a5deafcea 100644 --- a/src/service_inspectors/pop/dev_notes.txt +++ b/src/service_inspectors/pop/dev_notes.txt @@ -6,3 +6,9 @@ parsed and the MIME attachments in POP responses are processed using the file API. The file API extracts and decodes the attachments. file_data is then set to the start of these extracted/decoded attachments. This inspector also identifies and whitelists the POPS traffic. + +POP inspector uses PDFJSNorm class to extract and normalize JavaScript +in PDF files. The normalized JavaScript is then available in the js_data buffer. +The js_data buffer follows the JIT approach, thus, to perform the normalization, +the rule with the js_data IPS option must be present as well as the js_norm module +is configured. diff --git a/src/service_inspectors/pop/pop.cc b/src/service_inspectors/pop/pop.cc index 9001bac47..657501a23 100644 --- a/src/service_inspectors/pop/pop.cc +++ b/src/service_inspectors/pop/pop.cc @@ -25,6 +25,7 @@ #include "pop.h" #include "detection/detection_engine.h" +#include "js_norm/js_pdf_norm.h" #include "log/messages.h" #include "profiler/profiler.h" #include "protocols/packet.h" @@ -96,6 +97,7 @@ const PegInfo pop_peg_names[] = { CountType::SUM, "uu_decoded_bytes", "total uu decoded bytes" }, { CountType::SUM, "non_encoded_attachments", "total non-encoded attachments extracted" }, { CountType::SUM, "non_encoded_bytes", "total non-encoded extracted bytes" }, + { CountType::SUM, "js_pdf_scripts", "total number of PDF files processed" }, { CountType::END, nullptr, nullptr } }; @@ -111,20 +113,36 @@ PopFlowData::PopFlowData() : FlowData(inspector_id) PopFlowData::~PopFlowData() { - if (session.mime_ssn) - delete(session.mime_ssn); + delete session.mime_ssn; + delete session.jsn; assert(popstats.concurrent_sessions > 0); popstats.concurrent_sessions--; } unsigned PopFlowData::inspector_id = 0; + static POPData* get_session_data(Flow* flow) { PopFlowData* fd = (PopFlowData*)flow->get_flow_data(PopFlowData::inspector_id); return fd ? &fd->session : nullptr; } +static inline PDFJSNorm* acquire_js_ctx(POPData& pop_ssn, const void* data, size_t len) +{ + if (pop_ssn.jsn) + return pop_ssn.jsn; + + JSNormConfig* cfg = get_inspection_policy()->jsn_config; + if (cfg and PDFJSNorm::is_pdf(data, len)) + { + pop_ssn.jsn = new PDFJSNorm(cfg); + ++popstats.js_pdf_scripts; + } + + return pop_ssn.jsn; +} + static POPData* SetNewPOPData(POP_PROTO_CONF* config, Packet* p) { POPData* pop_ssn; @@ -185,6 +203,9 @@ static void POP_ResetState(Flow* ssn) pop_ssn->state = STATE_COMMAND; pop_ssn->prev_response = 0; pop_ssn->state_flags = 0; + + delete pop_ssn->jsn; + pop_ssn->jsn = nullptr; } static void POP_GetEOL(const uint8_t* ptr, const uint8_t* end, @@ -418,7 +439,16 @@ static void POP_ProcessServerPacket(Packet* p, POPData* pop_ssn) //ptr = POP_HandleData(p, ptr, end); FilePosition position = get_file_position(p); int len = end - ptr; + + if (isFileStart(position)) + { + delete pop_ssn->jsn; + pop_ssn->jsn = nullptr; + } + ptr = pop_ssn->mime_ssn->process_mime_data(p, ptr, len, false, position); + if (pop_ssn->jsn) + pop_ssn->jsn->tick(); continue; } POP_GetEOL(ptr, end, &eol, &eolm); @@ -527,6 +557,9 @@ static void snort_pop(POP_PROTO_CONF* config, Packet* p) popstats.total_bytes += p->dsize; int pkt_dir = POP_Setup(p, pop_ssn); + if (pop_ssn->jsn) + pop_ssn->jsn->flush_data(); + if (pkt_dir == POP_PKT_FROM_CLIENT) { /* This packet should be a tls client hello */ @@ -711,32 +744,44 @@ void Pop::eval(Packet* p) bool Pop::get_buf(InspectionBuffer::Type ibt, Packet* p, InspectionBuffer& b) { - // Fast pattern buffers only supplied at specific times - switch (ibt) - { - case InspectionBuffer::IBT_VBA: - { - POPData* pop_ssn = get_session_data(p->flow); + POPData* pop_ssn = get_session_data(p->flow); + assert(pop_ssn); - if (!pop_ssn) - return false; + const void* dst = nullptr; + size_t dst_len = 0; - const BufferData& vba_buf = pop_ssn->mime_ssn->get_vba_inspect_buf(); + switch (ibt) + { + case InspectionBuffer::IBT_VBA: + { + const BufferData& vba_buf = pop_ssn->mime_ssn->get_vba_inspect_buf(); + dst = vba_buf.data_ptr(); + dst_len = vba_buf.length(); + break; + } - if (vba_buf.data_ptr() && vba_buf.length()) - { - b.data = vba_buf.data_ptr(); - b.len = vba_buf.length(); - return true; - } - else - return false; + case InspectionBuffer::IBT_JS_DATA: + { + auto& dp = DetectionEngine::get_file_data(p->context); + auto jsn = acquire_js_ctx(*pop_ssn, dp.data, dp.len); + if (jsn) + { + jsn->get_data(dst, dst_len); + if (dst and dst_len) + break; + jsn->normalize(dp.data, dp.len, dst, dst_len); } + break; + } - default: - break; + default: + return false; } - return false; + + b.data = (const uint8_t*)dst; + b.len = dst_len; + + return dst && dst_len; } bool Pop::get_fp_buf(InspectionBuffer::Type ibt, Packet* p, InspectionBuffer& b) @@ -779,6 +824,7 @@ static const char* pop_bufs[] = { "file_data", "vba_data", + "js_data", nullptr }; @@ -819,4 +865,3 @@ SO_PUBLIC const BaseApi* snort_plugins[] = #else const BaseApi* sin_pop = &pop_api.base; #endif - diff --git a/src/service_inspectors/pop/pop.h b/src/service_inspectors/pop/pop.h index 7cfa4112f..e149b9144 100644 --- a/src/service_inspectors/pop/pop.h +++ b/src/service_inspectors/pop/pop.h @@ -111,6 +111,11 @@ private: bool is_end_of_data(snort::Flow* ssn) override; }; +namespace snort +{ +class PDFJSNorm; +} + struct POPData { int state; @@ -118,6 +123,7 @@ struct POPData int state_flags; int session_flags; PopMime* mime_ssn; + snort::PDFJSNorm* jsn; }; class PopFlowData : public snort::FlowData diff --git a/src/service_inspectors/pop/pop_config.h b/src/service_inspectors/pop/pop_config.h index 790152af9..5ab4d276a 100644 --- a/src/service_inspectors/pop/pop_config.h +++ b/src/service_inspectors/pop/pop_config.h @@ -40,6 +40,7 @@ struct PopStats PegCount ssl_search_abandoned; PegCount ssl_srch_abandoned_early; snort::MimeStats mime_stats; + PegCount js_pdf_scripts; }; extern const PegInfo pop_peg_names[]; diff --git a/src/service_inspectors/smtp/dev_notes.txt b/src/service_inspectors/smtp/dev_notes.txt index 730f4fd95..5c961b969 100644 --- a/src/service_inspectors/smtp/dev_notes.txt +++ b/src/service_inspectors/smtp/dev_notes.txt @@ -10,3 +10,9 @@ traffic. SMTP inspector logs the filename, email addresses, attachment names when configured. The SMTP commands are also normalized based on the config. + +SMTP inspector uses PDFJSNorm class to extract and normalize JavaScript +in PDF files. The normalized JavaScript is then available in the js_data buffer. +The js_data buffer follows the JIT approach, thus, to perform the normalization, +the rule with the js_data IPS option must be present as well as the js_norm module +is configured. diff --git a/src/service_inspectors/smtp/smtp.cc b/src/service_inspectors/smtp/smtp.cc index 22d680865..c2f879ac1 100644 --- a/src/service_inspectors/smtp/smtp.cc +++ b/src/service_inspectors/smtp/smtp.cc @@ -26,6 +26,7 @@ #include "detection/detection_engine.h" #include "detection/detection_util.h" +#include "js_norm/js_pdf_norm.h" #include "log/messages.h" #include "log/unified2.h" #include "profiler/profiler.h" @@ -181,6 +182,7 @@ const PegInfo smtp_peg_names[] = { CountType::SUM, "uu_decoded_bytes", "total uu decoded bytes" }, { CountType::SUM, "non_encoded_attachments", "total non-encoded attachments extracted" }, { CountType::SUM, "non_encoded_bytes", "total non-encoded extracted bytes" }, + { CountType::SUM, "js_pdf_scripts", "total number of PDF files processed" }, { CountType::END, nullptr, nullptr } }; @@ -208,23 +210,37 @@ SmtpFlowData::SmtpFlowData() : FlowData(inspector_id) SmtpFlowData::~SmtpFlowData() { - if ( session.mime_ssn ) - delete session.mime_ssn; - - if ( session.auth_name ) - snort_free(session.auth_name); + delete session.mime_ssn; + delete session.jsn; + snort_free(session.auth_name); assert(smtpstats.concurrent_sessions > 0); smtpstats.concurrent_sessions--; } unsigned SmtpFlowData::inspector_id = 0; + static SMTPData* get_session_data(Flow* flow) { SmtpFlowData* fd = (SmtpFlowData*)flow->get_flow_data(SmtpFlowData::inspector_id); return fd ? &fd->session : nullptr; } +static inline PDFJSNorm* acquire_js_ctx(SMTPData& smtp_ssn, const void* data, size_t len) +{ + if (smtp_ssn.jsn) + return smtp_ssn.jsn; + + JSNormConfig* cfg = get_inspection_policy()->jsn_config; + if (cfg and PDFJSNorm::is_pdf(data, len)) + { + smtp_ssn.jsn = new PDFJSNorm(cfg); + ++smtpstats.js_pdf_scripts; + } + + return smtp_ssn.jsn; +} + static SMTPData* SetNewSMTPData(SmtpProtoConf* config, Packet* p) { SMTPData* smtp_ssn; @@ -526,6 +542,9 @@ static void SMTP_ResetState(Flow* ssn) SMTPData* smtp_ssn = get_session_data(ssn); smtp_ssn->state = STATE_COMMAND; smtp_ssn->state_flags = (smtp_ssn->state_flags & SMTP_FLAG_ABANDON_EVT) ? SMTP_FLAG_ABANDON_EVT : 0; + + delete smtp_ssn->jsn; + smtp_ssn->jsn = nullptr; } static inline int InspectPacket(Packet* p) @@ -851,7 +870,7 @@ static const uint8_t* SMTP_HandleCommand(SmtpProtoConf* config, Packet* p, SMTPD smtp_ssn->state = STATE_TLS_CLIENT_PEND; smtp_ssn->client_requested_starttls = true; } - + break; case CMD_X_LINK2STATE: @@ -1018,9 +1037,7 @@ static void SMTP_ProcessClientPacket(SmtpProtoConf* config, Packet* p, SMTPData* const uint8_t* end = p->data + p->dsize; if (smtp_ssn->state == STATE_CONNECT) - { smtp_ssn->state = STATE_COMMAND; - } while ((ptr != nullptr) && (ptr < end)) { @@ -1035,8 +1052,15 @@ static void SMTP_ProcessClientPacket(SmtpProtoConf* config, Packet* p, SMTPData* case STATE_DATA: case STATE_BDATA: position = get_file_position(p); + if (isFileStart(position)) + { + delete smtp_ssn->jsn; + smtp_ssn->jsn = nullptr; + } ptr = smtp_ssn->mime_ssn->process_mime_data(p, ptr, len, true, position); //ptr = SMTP_HandleData(p, ptr, end, &(smtp_ssn->mime_ssn)); + if (smtp_ssn->jsn) + smtp_ssn->jsn->tick(); break; case STATE_XEXCH50: if (smtp_normalizing) @@ -1155,7 +1179,7 @@ static void SMTP_ProcessServerPacket( else { smtp_ssn->server_accepted_starttls = true; - + OpportunisticTlsEvent event(p, p->flow->service); DataBus::publish(OPPORTUNISTIC_TLS_EVENT, event, p->flow); ++smtpstats.starttls; @@ -1216,6 +1240,8 @@ static void snort_smtp(SmtpProtoConf* config, Packet* p) /* reset normalization stuff */ smtp_normalizing = false; smtpstats.total_bytes += p->dsize; + if (smtp_ssn->jsn) + smtp_ssn->jsn->flush_data(); if (pkt_dir == SMTP_PKT_FROM_SERVER) { @@ -1237,7 +1263,7 @@ static void snort_smtp(SmtpProtoConf* config, Packet* p) smtp_ssn->state = STATE_TLS_SERVER_PEND; } } - + if(smtp_ssn->state == STATE_TLS_CLIENT_PEND) smtp_ssn->state = STATE_COMMAND; @@ -1556,23 +1582,44 @@ void Smtp::ProcessSmtpCmdsList(const SmtpCmd* sc) bool Smtp::get_fp_buf(InspectionBuffer::Type ibt, Packet* p, InspectionBuffer& b) { - if ( ibt != InspectionBuffer::IBT_VBA ) - return false; - SMTPData* smtp_ssn = get_session_data(p->flow); + assert(smtp_ssn); - if (!smtp_ssn) - return false; + const void* dst = nullptr; + size_t dst_len = 0; - const BufferData& vba_buf = smtp_ssn->mime_ssn->get_vba_inspect_buf(); + switch (ibt) + { + case InspectionBuffer::IBT_VBA: + { + const BufferData& vba_buf = smtp_ssn->mime_ssn->get_vba_inspect_buf(); + dst = vba_buf.data_ptr(); + dst_len = vba_buf.length(); + break; + } - if ( vba_buf.data_ptr() && vba_buf.length() ) + case InspectionBuffer::IBT_JS_DATA: { - b.data = vba_buf.data_ptr(); - b.len = vba_buf.length(); - return true; + auto& dp = DetectionEngine::get_file_data(p->context); + auto jsn = acquire_js_ctx(*smtp_ssn, dp.data, dp.len); + if (jsn) + { + jsn->get_data(dst, dst_len); + if (dst and dst_len) + break; + jsn->normalize(dp.data, dp.len, dst, dst_len); + } + break; } - return false; + + default: + return false; + } + + b.data = (const uint8_t*)dst; + b.len = dst_len; + + return dst && dst_len; } //------------------------------------------------------------------------- @@ -1621,6 +1668,7 @@ static const char* smtp_bufs[] = { "file_data", "vba_data", + "js_data", nullptr }; @@ -1726,4 +1774,3 @@ TEST_CASE("normalize_data", "[smtp]") delete p.context; } #endif - diff --git a/src/service_inspectors/smtp/smtp.h b/src/service_inspectors/smtp/smtp.h index 0272a049b..87072e221 100644 --- a/src/service_inspectors/smtp/smtp.h +++ b/src/service_inspectors/smtp/smtp.h @@ -156,6 +156,11 @@ private: bool is_end_of_data(snort::Flow* ssn) override; }; +namespace snort +{ +class PDFJSNorm; +} + struct SMTPData { //Initialize structure with default values @@ -164,6 +169,7 @@ struct SMTPData session_flags{0}, dat_chunk{0}, mime_ssn{nullptr}, + jsn{nullptr}, auth_name{nullptr}, client_requested_starttls{false}, pipelined_command_counter{0}, @@ -175,6 +181,7 @@ struct SMTPData int session_flags; uint32_t dat_chunk; SmtpMime* mime_ssn; + snort::PDFJSNorm* jsn; SMTPAuthName* auth_name; bool client_requested_starttls; size_t pipelined_command_counter; diff --git a/src/service_inspectors/smtp/smtp_config.h b/src/service_inspectors/smtp/smtp_config.h index b06b08a07..079f30819 100644 --- a/src/service_inspectors/smtp/smtp_config.h +++ b/src/service_inspectors/smtp/smtp_config.h @@ -157,6 +157,7 @@ struct SmtpStats PegCount ssl_search_abandoned; PegCount ssl_search_abandoned_too_soon; snort::MimeStats mime_stats; + PegCount js_pdf_scripts; }; extern const PegInfo smtp_peg_names[]; diff --git a/src/service_inspectors/smtp/smtp_paf.cc b/src/service_inspectors/smtp/smtp_paf.cc index 07fd04963..3136ca44d 100644 --- a/src/service_inspectors/smtp/smtp_paf.cc +++ b/src/service_inspectors/smtp/smtp_paf.cc @@ -263,7 +263,7 @@ static inline bool flush_based_length(SmtpPafData* pfdata) * * Process data boundary and flush each file based on boundary*/ static inline bool process_data(SmtpPafData* pfdata, uint8_t data) { - if (flush_based_length(pfdata)|| check_data_end(&(pfdata->data_end_state), data)) + if (flush_based_length(pfdata) || check_data_end(&(pfdata->data_end_state), data)) { /*Clean up states*/ pfdata->smtp_state = SMTP_PAF_CMD_STATE; diff --git a/src/utils/streambuf.h b/src/utils/streambuf.h index acf81b47c..85f9b04d5 100644 --- a/src/utils/streambuf.h +++ b/src/utils/streambuf.h @@ -24,13 +24,15 @@ #include #include +#include "main/snort_types.h" + namespace snort { // an input stream over set of buffers, // the buffer doesn't take ownership over the memory, // no intermediate buffering between chunks -class istreambuf_glue : public std::streambuf +class SO_PUBLIC istreambuf_glue : public std::streambuf { public: istreambuf_glue(); @@ -65,7 +67,7 @@ protected: }; // an output stream over extensible array -class ostreambuf_infl : public std::streambuf +class SO_PUBLIC ostreambuf_infl : public std::streambuf { public: static constexpr size_t size_limit = 1 << 20;