]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Pull request #5194: add extractor file logging
authorAnna Norokh -X (anorokh - SOFTSERVE INC at Cisco) <anorokh@cisco.com>
Mon, 16 Mar 2026 16:16:18 +0000 (16:16 +0000)
committerOleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) <oshumeik@cisco.com>
Mon, 16 Mar 2026 16:16:18 +0000 (16:16 +0000)
Merge in SNORT/snort3 from ~ANOROKH/snort3:extractor_file to master

Squashed commit of the following:

commit 1068a08b3e05ae1905c62afc7e8a1a9e5b135f08
Author: anorokh <anorokh@cisco.com>
Date:   Thu Mar 5 20:53:55 2026 +0200

    extractor: add FILE logging

    * update file log condition;

22 files changed:
doc/user/extractor.txt
src/file_api/file_lib.cc
src/file_api/file_lib.h
src/network_inspectors/extractor/CMakeLists.txt
src/network_inspectors/extractor/extractor.cc
src/network_inspectors/extractor/extractor_conn.cc
src/network_inspectors/extractor/extractor_csv_logger.cc
src/network_inspectors/extractor/extractor_csv_logger.h
src/network_inspectors/extractor/extractor_enums.h
src/network_inspectors/extractor/extractor_file.cc [new file with mode: 0644]
src/network_inspectors/extractor/extractor_file.h [new file with mode: 0644]
src/network_inspectors/extractor/extractor_json_logger.cc
src/network_inspectors/extractor/extractor_json_logger.h
src/network_inspectors/extractor/extractor_logger.h
src/network_inspectors/extractor/extractor_quic.cc
src/network_inspectors/extractor/extractor_service.cc
src/network_inspectors/extractor/extractor_service.h
src/network_inspectors/extractor/extractors.cc
src/network_inspectors/extractor/extractors.h
src/pub_sub/file_events.cc
src/pub_sub/file_events.h
src/pub_sub/test/pub_sub_file_events_test.cc

index 1037ca9fc7d0769f62460de70d95fe82036f469f..8a75169b989b5da239423d513d2a61d3c944222c 100644 (file)
@@ -65,6 +65,8 @@ Services and their events:
   ** `response`
 * connection (conn)
   ** `eof` (end of flow)
+* file
+  ** `eof` (end of file)
 * internal built-in checks which failed (weird)
   ** 'builtin' (internally-detected infraction is queued for further processing)
 * triggered IPS rule, whether built-in or text or SO (notice)
@@ -209,6 +211,23 @@ UDP Events: d: Packet with payload.
 
 TCP Events: s: SYN, h: SYN-ACK, a: Pure ACK or PUSH, d: Packet with payload, f: FIN, r: Reset.
 
+Fields supported for file:
+
+* `filename` - filename from headers in network protocols
+* `fuid` - unique file identifier
+* `source` - a protocol associated with the file
+* `inspector` - inspector associated with the file analysis
+* `mime_type` - mime attachment type (or file type identified by file magic)
+* `is_orig` - if sender was originator of the file transfer
+* `seen_bytes` - number of bytes processed for analysis
+* `total_bytes` - total file size in bytes
+* 'duration' - duration the file was analyzed for, in seconds
+* `timeout` - if file analysis timed out
+* `sha256` - SHA256 digest of the file contents
+* `extracted`- name of captured file
+* `extracted_size` - number of bytes captured
+* `extracted_cutoff` - true if the file being captured was cut off so the whole file was not logged
+
 Fields supported for 'weird' and 'notice' logs:
 
 * `sid` - unique signature number of the rule
index 70e2c56ac6b50eee82fd8224950f1065744dd49c..1d8fc0910c38a42c3b10cd1c54c1aaad1d01bc7a 100644 (file)
@@ -537,16 +537,16 @@ inline void FileContext::finalize_file_type()
     file_type_context = nullptr;
 }
 
-std::string FileContext::get_mime_type() const
+const char* FileContext::get_mime_type() const
 {
     const FileConfig* conf = get_file_config();
     if (SNORT_FILE_TYPE_UNKNOWN != file_type_id and SNORT_FILE_TYPE_CONTINUE != file_type_id and conf)
     {
         const FileMeta* info = conf->get_rule_from_id(file_type_id);
-        return info != nullptr ? info->type : std::string();
+        return info != nullptr ? info->type.c_str() : "";
     }
 
-    return std::string();
+    return "";
 }
 
 void FileContext::set_source(Flow *flow)
@@ -588,7 +588,7 @@ void FileContext::log_file_event(Flow* flow, FilePolicyBase* policy)
 
         user_file_data_mutex.unlock();
 
-        if (policy and log_needed)
+        if (processing_complete or log_needed)
         {
             hr_time now = SnortClock::now();
             duration = (TO_USECS(now - start_time)) / 1000000.0;  // Convert microseconds to seconds
@@ -602,11 +602,11 @@ void FileContext::log_file_event(Flow* flow, FilePolicyBase* policy)
             FileCharEncoding encoding = get_character_encoding(filename.c_str(), fname_len);
 
             FILE_DEBUG(file_trace, DEFAULT_TRACE_OPTION_ID, TRACE_DEBUG_LEVEL, GET_CURRENT_PACKET,
-                "File advance log: fuid-%s, source-%s, mime type-%s, file name-%s,"
-                " duration-%f, is orig-%d, seen bytes-%lu, total bytes-%lu,"
+                "File advance log: fuid-%" PRIu64 ", source-%s, mime type-%s, file name-%s,"
+                " duration-%f, is orig-%d, seen bytes-%" PRIu64 ", total bytes-%" PRIu64 ","
                 " timedout-%d, sha256-%s, extracted name-%s, extracted cutoff-%d,"
-                " extracted size-%lu\n", file_event.get_fuid().c_str(),
-                file_event.get_source().c_str(), file_event.get_mime_type().c_str(),
+                " extracted size-%" PRIu64 "\n", file_event.get_fuid(),
+                file_event.get_source().c_str(), file_event.get_mime_type(),
                 (encoding == SNORT_CHAR_ENCODING_UTF_16LE) ? "" : filename.c_str(), file_event.get_duration(),
                 file_event.get_is_orig(), file_event.get_seen_bytes(),
                 file_event.get_total_bytes(), file_event.get_timedout(),
@@ -1101,9 +1101,8 @@ void FileContext::update_file_size(int data_size, FilePosition position)
 {
     processed_bytes += data_size;
 
-    FILE_DEBUG(file_trace, DEFAULT_TRACE_OPTION_ID, TRACE_DEBUG_LEVEL,
-        GET_CURRENT_PACKET,
-        "Updating file size of file_id %lu at position %d with processed_bytes %lu\n",
+    FILE_DEBUG(file_trace, DEFAULT_TRACE_OPTION_ID, TRACE_DEBUG_LEVEL, GET_CURRENT_PACKET,
+        "Updating file size of file_id %" PRIu64 " at position %d with processed_bytes %" PRIu64 "\n",
         file_id, position, processed_bytes);
     if ((position == SNORT_FILE_END)or (position == SNORT_FILE_FULL))
     {
index 0f8125accc59f8c3b5dc11948e3ec628b9626c80..f3a650b83189d96ec546e3e9729e49d35c2dbab5 100644 (file)
@@ -151,7 +151,7 @@ protected:
     bool re_eval = false;
     // Indicates that file transmission goes through 206 HTTP Partial Content
     bool is_partial = false;
-    bool extracted_cutoff = true;
+    bool extracted_cutoff = false;
     uint64_t extracted_size = 0;
     std::string extracted_file_name;
 };
@@ -203,7 +203,7 @@ public:
     FileInspect* get_inspector() { return inspector; }
     double get_duration() const { return duration; }  // Duration in seconds (fractional)
     void set_duration(double d) { duration = d; }
-    std::string get_mime_type() const;
+    const char* get_mime_type() const;
     const std::string& get_source() const { return source; }
     void set_source(Flow *flow);
     bool get_timedout() const { return timedout; }
index 911e60a046b273f256f638adeb6200ab8c5cd430..7ec874e965d986fa34b811e473930096916ff688 100644 (file)
@@ -16,6 +16,8 @@ set( FILE_LIST
     extractor_detection.h
     extractor_dns.cc
     extractor_enums.h
+    extractor_file.cc
+    extractor_file.h
     extractor_flow_data.cc
     extractor_flow_data.h
     extractor_ftp.cc
index 13caed67cdb7471c90e5f76c461d22e159f3b8b6..1b4eef9af7551e629f82b497af941c7ade7531d7 100644 (file)
@@ -50,7 +50,7 @@ THREAD_LOCAL ExtractorLogger* Extractor::logger = nullptr;
 
 static const Parameter extractor_proto_params[] =
 {
-    { "service", Parameter::PT_ENUM, "http | ftp | ssl | conn | dns | quic | weird | notice", nullptr,
+    { "service", Parameter::PT_ENUM, "http | ftp | ssl | conn | dns | quic | file | weird | notice", nullptr,
       "service to extract from" },
 
     { "tenant_id", Parameter::PT_INT, "0:max32", "0",
index 19f5cf69881dfcccad8cbb81cd87e5e83f5b5cd8..e46a0547c19af5c7032d2e5a37043be1b5c088c8 100644 (file)
@@ -88,7 +88,7 @@ static uint64_t get_resp_bytes(const DataEvent*, const Flow* f)
 {
     if (f->session == nullptr)
         return 0;
-    
+
     if (f->pkt_type == PktType::TCP)
         return get_resp_bytes_tcp((const TcpSession*)f->session);
     else if (f->pkt_type == PktType::UDP)
index 64b6ccda395b6286bdd0f076e88b04161a0a1119..7eb06b19a23d945f19cd407b8862c4f1cb00d0ca 100644 (file)
@@ -114,6 +114,18 @@ void CsvExtractorLogger::add_field(const char*, uint64_t v)
     record.append(to_string(v));
 }
 
+void CsvExtractorLogger::add_field(const char*, double v)
+{
+    const unsigned precision = 6;
+
+    // 20 digits for integer part + '.' + precision digits + '\0'
+    char buf[20 + 1 + precision + 1];
+    snort::SnortSnprintf(buf, sizeof(buf), "%.*f", (int)precision, v);
+
+    record.push_back(delimiter);
+    record.append(buf);
+}
+
 void CsvExtractorLogger::add_field(const char*, const snort::SfIp& v)
 {
     record.push_back(delimiter);
index 3f0cc24922e9b9de9bb29db9f53d03868ce2e547..d1a5e4fc97dab7f593f2f76dc4c0de7e760ae6f8 100644 (file)
@@ -34,6 +34,7 @@ public:
     void add_field(const char*, const char*) override;
     void add_field(const char*, const char*, size_t) override;
     void add_field(const char*, uint64_t) override;
+    void add_field(const char*, double) override;
     void add_field(const char*, struct timeval) override;
     void add_field(const char*, const snort::SfIp&) override;
     void add_field(const char*, bool) override;
index bf8399817ded3933ccf1d8965d4034ad66db9bce..de1099220f28f90918d3ff1b3ce6ae061ad2ff84 100644 (file)
@@ -33,6 +33,7 @@ public:
         CONN,
         DNS,
         QUIC,
+        FILE,
         IPS_BUILTIN,
         IPS_USER,
         ANY,
@@ -62,6 +63,8 @@ public:
             return "dns";
         case QUIC:
             return "quic";
+        case FILE:
+            return "file";
         case IPS_BUILTIN:
             return "weird";
         case IPS_USER:
diff --git a/src/network_inspectors/extractor/extractor_file.cc b/src/network_inspectors/extractor/extractor_file.cc
new file mode 100644 (file)
index 0000000..f3dac48
--- /dev/null
@@ -0,0 +1,197 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2026-2026 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// extractor_file.cc author Anna Norokh <anorokh@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "extractor_file.h"
+
+#include "pub_sub/file_events.h"
+#include "pub_sub/file_events_ids.h"
+
+#include "extractor.h"
+#include "extractor_enums.h"
+
+using namespace snort;
+using namespace std;
+
+
+static uint64_t get_seen_bytes(const DataEvent* event, const Flow*)
+{
+    return ((const FileEvent*)event)->get_seen_bytes();
+}
+
+static uint64_t get_total_bytes(const DataEvent* event, const Flow*)
+{
+    return ((const FileEvent*)event)->get_total_bytes();
+}
+
+static uint64_t get_extracted_size(const DataEvent* event, const Flow*)
+{
+    return ((const FileEvent*)event)->get_extracted_size();
+}
+
+static double get_duration(const DataEvent* event, const Flow*)
+{
+    return ((const FileEvent*)event)->get_duration();
+}
+
+static bool get_timedout(const DataEvent* event, const Flow*)
+{
+    return ((const FileEvent*)event)->get_timedout();
+}
+
+static bool get_is_orig(const DataEvent* event, const Flow*)
+{
+    return ((const FileEvent*)event)->get_is_orig();
+}
+
+static bool get_is_extracted_cutoff(const DataEvent* event, const Flow*)
+{
+    return ((const FileEvent*)event)->get_extracted_cutoff();
+}
+
+static const char* get_extracted_name(const DataEvent* event, const Flow*)
+{
+    return ((const FileEvent*)event)->get_extracted_name().c_str();
+}
+
+static uint64_t get_fuid(const DataEvent* event, const Flow*)
+{
+    return ((const FileEvent*)event)->get_fuid();
+}
+
+static const char* get_analyzer(const DataEvent*, const Flow* flow)
+{
+    if (flow->gadget)
+        return flow->gadget->get_name();
+
+    return "";
+}
+
+static const char* get_source(const DataEvent* event, const Flow*)
+{
+    return ((const FileEvent*)event)->get_source().c_str();
+}
+
+static const char* get_mime_type(const DataEvent* event, const Flow*)
+{
+    return ((const FileEvent*)event)->get_mime_type();
+}
+
+static const char* get_filename(const DataEvent* event, const Flow*)
+{
+    return ((const FileEvent*)event)->get_filename().c_str();
+}
+
+static const char* get_sha256(const DataEvent* event, const Flow*)
+{
+    return ((const FileEvent*)event)->get_sha256().c_str();
+}
+
+static const map<string, ExtractorEvent::NumGetFn> sub_num_getters =
+{
+    {"fuid", get_fuid},
+    {"seen_bytes", get_seen_bytes},
+    {"total_bytes", get_total_bytes},
+    {"extracted_size", get_extracted_size},
+};
+
+static const map<string, ExtractorEvent::BufGetFn> sub_buf_getters =
+{
+    {"analyzers", get_analyzer},
+    {"source", get_source},
+    {"filename", get_filename},
+    {"mime_type", get_mime_type},
+    {"extracted", get_extracted_name},
+    {"sha256", get_sha256},
+};
+
+static const map<string, ExtractorEvent::DblGetFn> sub_dbl_getters =
+{
+    {"duration", get_duration},
+};
+
+static const map<string, FileExtractor::SubGetFn> sub_getters =
+{
+    {"timedout", get_timedout},
+    {"is_orig", get_is_orig},
+    {"extracted_cutoff", get_is_extracted_cutoff},
+};
+
+THREAD_LOCAL const snort::Connector::ID* FileExtractor::log_id = nullptr;
+
+FileExtractor::FileExtractor(Extractor& i, uint32_t t, const vector<string>& fields)
+    : ExtractorEvent(ServiceType::FILE, i, t)
+{
+    for (const auto& f : fields)
+    {
+        if (append(nts_fields, nts_getters, f))
+            continue;
+        if (append(sip_fields, sip_getters, f))
+            continue;
+        if (append(num_fields, num_getters, f))
+            continue;
+        if (append(num_fields, sub_num_getters, f))
+            continue;
+        if (append(buf_fields, sub_buf_getters, f))
+            continue;
+        if (append(dbl_fields, sub_dbl_getters, f))
+            continue;
+        if (append(sub_fields, sub_getters, f))
+            continue;
+    }
+
+    DataBus::subscribe_global(file_adv_pub_key, FileEventIds::FILE_COMPLETE,
+        new Eof(*this, S_NAME), i.get_snort_config());
+}
+
+void FileExtractor::internal_tinit(const snort::Connector::ID* service_id)
+{ log_id = service_id; }
+
+void FileExtractor::handle(DataEvent& event, Flow* flow)
+{
+    // cppcheck-suppress unreadVariable
+    Profile profile(extractor_perf_stats);
+
+    if (!filter(flow))
+        return;
+
+    extractor_stats.total_events++;
+
+    logger->open_record();
+    log(nts_fields, &event, flow);
+    log(sip_fields, &event, flow);
+    log(num_fields, &event, flow);
+    log(sub_fields, &event, flow);
+    log(buf_fields, &event, flow);
+    log(dbl_fields, &event, flow);
+    logger->close_record(*log_id);
+}
+
+vector<const char*> FileExtractor::get_field_names() const
+{
+    vector<const char*> res = ExtractorEvent::get_field_names();
+
+    for (auto& f : sub_fields)
+        res.push_back(f.name);
+
+    return res;
+}
diff --git a/src/network_inspectors/extractor/extractor_file.h b/src/network_inspectors/extractor/extractor_file.h
new file mode 100644 (file)
index 0000000..c2fa75c
--- /dev/null
@@ -0,0 +1,45 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2026-2026 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// extractor_file.h author Anna Norokh <anorokh@cisco.com>
+
+#ifndef EXTRACTOR_FILE_H
+#define EXTRACTOR_FILE_H
+
+#include "extractors.h"
+
+class FileExtractor : public ExtractorEvent
+{
+public:
+    using SubGetFn = bool (*) (const DataEvent*, const Flow*);
+    using SubField = DataField<bool, const DataEvent*, const Flow*>;
+
+    FileExtractor(Extractor&, uint32_t tenant, const std::vector<std::string>& fields);
+
+    void handle(DataEvent&, Flow*);
+    std::vector<const char*> get_field_names() const override;
+
+private:
+    using Eof = Handler<FileExtractor>;
+
+    void internal_tinit(const snort::Connector::ID*) override;
+
+    std::vector<SubField> sub_fields;
+    static THREAD_LOCAL const snort::Connector::ID* log_id;
+};
+
+#endif
index 75b1d184084d56d69b47b0086aa0b8f4ccdb93b6..4f1ec45564679040d6fc955a05d5faf950b65592 100644 (file)
@@ -117,6 +117,18 @@ void JsonExtractorLogger::add_field(const char* f, uint64_t v)
     out_buffer += std::to_string(v);
 }
 
+void JsonExtractorLogger::add_field(const char* f, double v)
+{
+    const unsigned precision = 6;
+
+    // 20 digits for integer part + '.' + precision digits + '\0'
+    char buf[20 + 1 + precision + 1];
+    std::snprintf(buf, sizeof(buf), "%.*f", (int)precision, v);
+
+    write_key(f);
+    out_buffer += buf;
+}
+
 void JsonExtractorLogger::add_field(const char* f, const snort::SfIp& v)
 {
     snort::SfIpString buf;
index 8d65f78473c19211aa67e9d45b5a3e489cdb749c..2162b3d28a33a5e5953f9052d10b37a7bdae9852 100644 (file)
@@ -34,6 +34,7 @@ public:
     void add_field(const char*, const char*) override;
     void add_field(const char*, const char*, size_t) override;
     void add_field(const char*, uint64_t) override;
+    void add_field(const char*, double) override;
     void add_field(const char*, struct timeval) override;
     void add_field(const char*, const snort::SfIp&) override;
     void add_field(const char*, bool) override;
index edda0c30f31ab7d17c2877f6f89ef0c6b481af62..2a9ecc7b62e87f54150e4d67d32a8514a2da3b13 100644 (file)
@@ -51,6 +51,7 @@ public:
     virtual void add_field(const char*, const char*) {}
     virtual void add_field(const char*, const char*, size_t) {}
     virtual void add_field(const char*, uint64_t) {}
+    virtual void add_field(const char*, double) {}
     virtual void add_field(const char*, struct timeval) {}
     virtual void add_field(const char*, const snort::SfIp&) {}
     virtual void add_field(const char*, bool) {}
index 4bc29cfd33c1e0ec5475d08fd5a4f999d31de1d9..d5288c659019841b1f5edee9981f9dd64d7067a5 100644 (file)
@@ -47,7 +47,7 @@ public:
 
     QuicExtractorFlowData(QuicExtractor& owner)
         : ExtractorFlowData(type_id), owner(owner) { }
-    
+
     ~QuicExtractorFlowData() override
     {
         if (has_data)
@@ -199,7 +199,7 @@ void QuicExtractor::dump(const QuicExtractorFlowData& fd)
         logger->add_field(f.name, (uint64_t)0);
 
     log(fd_str_fields, &fd, logger->is_strict());
-    
+
     logger->close_record(*log_id);
 }
 
index e209731398aa6a3162208fe08e34128dd5d285c2..715d68c5a9059d9c655ce0d194197dce7f2431e3 100644 (file)
@@ -29,6 +29,7 @@
 #include "extractor_conn.h"
 #include "extractor_detection.h"
 #include "extractor_dns.h"
+#include "extractor_file.h"
 #include "extractor_ftp.h"
 #include "extractor_http.h"
 #include "extractor_quic.h"
@@ -140,6 +141,10 @@ ExtractorService* ExtractorService::make_service(Extractor& ins, const ServiceCo
         srv = new QuicExtractorService(cfg.tenant_id, cfg.fields, cfg.on_events, cfg.service, ins);
         break;
 
+    case ServiceType::FILE:
+        srv = new FileExtractorService(cfg.tenant_id, cfg.fields, cfg.on_events, cfg.service, ins);
+        break;
+
     case ServiceType::IPS_BUILTIN:
         srv = new BuiltinExtractorService(cfg.tenant_id, cfg.fields, cfg.on_events, cfg.service, ins);
         break;
@@ -255,6 +260,11 @@ void ExtractorService::validate(const ServiceConfig& cfg)
         validate_fields(QuicExtractorService::blueprint, cfg.fields);
         break;
 
+    case ServiceType::FILE:
+        validate_events(FileExtractorService::blueprint, cfg.on_events);
+        validate_fields(FileExtractorService::blueprint, cfg.fields);
+        break;
+
     case ServiceType::IPS_BUILTIN:
         validate_fields(BuiltinExtractorService::blueprint, cfg.fields);
         validate_events(BuiltinExtractorService::blueprint, cfg.on_events);
@@ -553,6 +563,54 @@ const snort::Connector::ID& QuicExtractorService::internal_tinit()
 const snort::Connector::ID& QuicExtractorService::get_log_id()
 { return log_id; }
 
+//-------------------------------------------------------------------------
+//  FileExtractorService
+//-------------------------------------------------------------------------
+
+const ServiceBlueprint FileExtractorService::blueprint =
+{
+    // events
+    {
+        "eof",
+    },
+    // fields
+    {
+        "fuid",
+        "source",
+        "analyzers",
+        "mime_type",
+        "filename",
+        "is_orig",
+        "seen_bytes",
+        "total_bytes",
+        "duration",
+        "timedout",
+        "extracted",
+        "extracted_cutoff",
+        "extracted_size",
+        "sha256"
+    },
+};
+
+THREAD_LOCAL Connector::ID FileExtractorService::log_id;
+
+FileExtractorService::FileExtractorService(uint32_t tenant, const std::vector<std::string>& srv_fields,
+    const std::vector<std::string>& srv_events, ServiceType s_type, Extractor& ins)
+    : ExtractorService(tenant, srv_fields, srv_events, blueprint, s_type, ins)
+{
+    for (const auto& event : get_events())
+    {
+        if (!strcmp("eof", event.c_str()))
+            handlers.push_back(new FileExtractor(ins, tenant_id, get_fields()));
+    }
+}
+
+const snort::Connector::ID& FileExtractorService::internal_tinit()
+{ return log_id = logger->get_id(type.c_str()); }
+
+const snort::Connector::ID& FileExtractorService::get_log_id()
+{ return log_id; }
+
 //-------------------------------------------------------------------------
 //  IpsUserExtractorService
 //-------------------------------------------------------------------------
@@ -656,6 +714,7 @@ TEST_CASE("Service Type", "[extractor]")
         ServiceType conn = ServiceType::CONN;
         ServiceType dns = ServiceType::DNS;
         ServiceType quic = ServiceType::QUIC;
+        ServiceType file = ServiceType::FILE;
         ServiceType weird = ServiceType::IPS_BUILTIN;
         ServiceType notice = ServiceType::IPS_USER;
         ServiceType any = ServiceType::ANY;
@@ -665,6 +724,7 @@ TEST_CASE("Service Type", "[extractor]")
         CHECK_FALSE(strcmp("ftp", ftp.c_str()));
         CHECK_FALSE(strcmp("ssl", ssl.c_str()));
         CHECK_FALSE(strcmp("conn", conn.c_str()));
+        CHECK_FALSE(strcmp("file", file.c_str()));
         CHECK_FALSE(strcmp("dns", dns.c_str()));
         CHECK_FALSE(strcmp("quic", quic.c_str()));
         CHECK_FALSE(strcmp("weird", weird.c_str()));
index 72f7b36205e844b9e01c4b339a386aa86175cc2e..29c098f5241483450d0b01c6538b3db3f485d154 100644 (file)
@@ -181,6 +181,21 @@ private:
     static THREAD_LOCAL snort::Connector::ID log_id;
 };
 
+class FileExtractorService : public ExtractorService
+{
+public:
+    static const ServiceBlueprint blueprint;
+
+    FileExtractorService(uint32_t tenant, const std::vector<std::string>& fields,
+        const std::vector<std::string>& events, ServiceType, Extractor&);
+
+private:
+    const snort::Connector::ID& internal_tinit() override;
+    const snort::Connector::ID& get_log_id() override;
+
+    static THREAD_LOCAL snort::Connector::ID log_id;
+};
+
 class BuiltinExtractorService : public ExtractorService
 {
 public:
index 745aa18c4b544952cf1de476b191b98663af0932..ac56cb2fc2436dbd1fa596f7cd906cdccdd637d4 100644 (file)
@@ -53,6 +53,9 @@ vector<const char*> ExtractorEvent::get_field_names() const
     for (auto& f : str_fields)
         res.push_back(f.name);
 
+    for (auto& f : dbl_fields)
+        res.push_back(f.name);
+
     return res;
 }
 
index 09f1542876e316f530400750f88d9da4db6a2779..85a014fd00a36aeb20c096c9391941b905c8768e 100644 (file)
@@ -60,6 +60,8 @@ public:
     using SipField = DataField<const SfIp&, const DataEvent*, const Flow*>;
     using NumGetFn = uint64_t (*) (const DataEvent*, const Flow*);
     using NumField = DataField<uint64_t, const DataEvent*, const Flow*>;
+    using DblGetFn = double (*) (const DataEvent*, const Flow*);
+    using DblField = DataField<double, const DataEvent*, const Flow*>;
     using NtsGetFn = struct timeval (*) (const DataEvent*, const Flow*);
     using NtsField = DataField<struct timeval, const DataEvent*, const Flow*>;
     using StrGetFn = std::pair<const char*, uint16_t> (*) (const DataEvent*, const Flow*);
@@ -184,6 +186,7 @@ protected:
     std::vector<NtsField> nts_fields;
     std::vector<SipField> sip_fields;
     std::vector<NumField> num_fields;
+    std::vector<DblField> dbl_fields;
     std::vector<BufField> buf_fields;
     std::vector<StrField> str_fields;
 
index 8ad8719d00c48c11b06d3609a40861051a4ddabb..8361a2a476fc5be31bddfb01f2b339156cd0e295 100644 (file)
 
 #include "file_events.h"
 
+#include <optional>
+
+#include "file_api/file_lib.h"
+#include "utils/util.h"
+
 using namespace snort;
 
-std::string FileEvent::get_fuid() const
-{ return std::to_string(file_ctx.get_file_id()); }
+uint64_t FileEvent::get_fuid() const
+{ return file_ctx.get_file_id(); }
 
 const std::string& FileEvent::get_source() const
 { return file_ctx.get_source(); }
 
-const std::string FileEvent::get_mime_type() const
-{ return file_ctx.get_mime_type(); }
+const char* FileEvent::get_mime_type() const
+{
+    return file_ctx.get_mime_type();
+}
 
 const std::string& FileEvent::get_filename() const
-{ return file_ctx.get_file_name(); }
+{
+    if (!filename.has_value())
+    {
+        size_t fname_len = file_ctx.get_file_name().length();
+        filename = std::string();
+
+        if (fname_len)
+        {
+            char* outbuf = const_cast<FileContext&>(file_ctx).get_UTF8_fname(&fname_len);
+            const char* fname = (outbuf != nullptr) ? outbuf : file_ctx.get_file_name().c_str();
+
+            size_t pos = 0;
+            while (pos < fname_len)
+            {
+                if (isprint((int)fname[pos]))
+                {
+                    (*filename) += fname[pos++];
+                }
+                else
+                {
+                    (*filename) += '|';
+                    bool add_space = false;
+                    while ((pos < fname_len) && !isprint((int)fname[pos]))
+                    {
+                        if (add_space)
+                            (*filename) += ' ';
+                        else
+                            add_space = true;
+
+                        int ch = 0xff & fname[pos];
+                        char buf[3];
+                        snprintf(buf, sizeof(buf), "%02X", ch);
+                        (*filename) += buf;
+                        pos++;
+                    }
+                    (*filename) += '|';
+                }
+            }
+
+            snort_free(outbuf);
+        }
+    }
+
+    return *filename;
+}
 
 double FileEvent::get_duration() const
 { return file_ctx.get_duration(); }
@@ -60,10 +111,16 @@ uint64_t FileEvent::get_total_bytes() const
 bool FileEvent::get_timedout() const
 { return file_ctx.get_timedout(); }
 
-const std::string FileEvent::get_sha256() const
-{ return (file_ctx.get_file_sig_sha256() ? file_ctx.sha_to_string(file_ctx.get_file_sig_sha256()) : std::string()); }
+const std::string& FileEvent::get_sha256() const
+{
+    if (!sha256.has_value())
+        sha256 = file_ctx.get_file_sig_sha256() ?
+            file_ctx.sha_to_string(file_ctx.get_file_sig_sha256()) : std::string();
+
+    return *sha256;
+}
 
-const std::string FileEvent::get_extracted_name() const
+const std::string& FileEvent::get_extracted_name() const
 { return file_ctx.get_extracted_name(); }
 
 bool FileEvent::get_extracted_cutoff() const
index 70b2cacc4d499b487fc545fa22360fd3f4bef263..f4b73a2ca73aa9f02b9c5677e1c077035dbefcae 100644 (file)
@@ -23,6 +23,8 @@
 #ifndef FILE_EVENTS_H
 #define FILE_EVENTS_H
 
+#include <optional>
+
 #include "file_api/file_cache.h"
 #include "file_events_ids.h"
 #include "framework/mp_data_bus.h"
@@ -128,22 +130,24 @@ class SO_PUBLIC FileEvent : public DataEvent
 public:
     FileEvent(const FileContext& data) : file_ctx(data) { }
 
-    std::string get_fuid() const;
+    uint64_t get_fuid() const;
     const std::string& get_source() const;
-    const std::string get_mime_type() const;
+    const char* get_mime_type() const;
     const std::string& get_filename() const;
     double get_duration() const;  // Returns duration in seconds (fractional)
     bool get_is_orig() const;
     uint64_t get_seen_bytes() const;
     uint64_t get_total_bytes() const;
     bool get_timedout() const;
-    const std::string get_sha256() const;
-    const std::string get_extracted_name() const;
+    const std::string& get_sha256() const;
+    const std::string& get_extracted_name() const;
     bool get_extracted_cutoff() const;
     uint64_t get_extracted_size() const;
 
 private:
     const FileContext& file_ctx;
+    mutable std::optional<std::string> sha256;
+    mutable std::optional<std::string> filename;
 };
 
 }
index c10caf5a5717b864424eefd67819030996b9156e..29b915c959e34feea2ca85f3ef012f9a5b131fbd 100644 (file)
@@ -54,7 +54,9 @@ uint64_t FileInfo::get_file_id() const { return file_id; }
 
 const std::string& FileInfo::get_file_name() const { return file_name; }
 
-std::string FileContext::get_mime_type() const { return std::string(); }
+const char* FileContext::get_mime_type() const { return ""; }
+
+char* FileContext::get_UTF8_fname(size_t*) { return nullptr; }
 
 FileDirection FileInfo::get_file_direction() const { return direction; }
 
@@ -97,7 +99,7 @@ TEST(pub_sub_file_events_test, file_event)
 
     FileEvent event(file_ctx);
 
-    CHECK(event.get_fuid() == std::to_string(fuid));
+    CHECK(event.get_fuid() == fuid);
     CHECK(event.get_source() == std::string());
     CHECK(event.get_mime_type() == std::string());
     CHECK(event.get_filename() == std::string(filename));
@@ -108,7 +110,7 @@ TEST(pub_sub_file_events_test, file_event)
     CHECK(event.get_timedout() == false);
     CHECK(event.get_sha256() == std::string());
     CHECK(event.get_extracted_name() == std::string());
-    CHECK(event.get_extracted_cutoff() == true);
+    CHECK(event.get_extracted_cutoff() == false);
     CHECK(event.get_extracted_size() == 0);
 }