]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Pull request #4505: extractor: add ftp logging
authorAnna Norokh -X (anorokh - SOFTSERVE INC at Cisco) <anorokh@cisco.com>
Thu, 7 Nov 2024 16:09:17 +0000 (16:09 +0000)
committerOleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) <oshumeik@cisco.com>
Thu, 7 Nov 2024 16:09:17 +0000 (16:09 +0000)
Merge in SNORT/snort3 from ~ANOROKH/snort3:extractor_ftp to master

Squashed commit of the following:

commit 56210e0e89a4ab1cafb2fa6f03f5ec8d5a4105c9
Author: anorokh <anorokh@cisco.com>
Date:   Thu Oct 31 12:27:46 2024 -0400

    extractor: address review comments

commit 11c34c621d2d08318c663dd049c3e6823fb47db6
Author: Oleksii Shumeiko <oshumeik@cisco.com>
Date:   Thu Oct 31 14:08:31 2024 +0200

    extractor: move internal stuff out of snort namespace

commit 6b9bc7780c3badafb317158e1f0f27cbff1a3da5
Author: Oleksii Shumeiko <oshumeik@cisco.com>
Date:   Thu Oct 31 12:00:58 2024 +0200

    extractor: fix memory management

    The inspector owns service/event extractor instances.
    Data handlers are split from instances and managed by data bus only.
    Flow data bumps the inspector's reference count.

commit ae80500b23ba88b835e0560c1ccbf8e99c7c041f
Author: anorokh <anorokh@cisco.com>
Date:   Tue Oct 29 08:13:53 2024 -0400

    ftp: reset cmd_size when reset cmd_str

commit 9ceac98772e6bb86404976162f3ca8ea6dcdf67e
Author: anorokh <anorokh@cisco.com>
Date:   Mon Oct 28 11:58:00 2024 -0400

    extractor: log on last response

commit 4b21cebdd076b810b4c11f1606cf47fd163f045c
Author: anorokh <anorokh@cisco.com>
Date:   Thu Oct 24 05:14:09 2024 -0400

    extractor: refactor code

    * rename field types
    * keep flow data intact to log multiple responses
    * reorder list of commands for more effective search

commit b279b45af550dcf7f671fdc88817f5476376afc5
Author: Oleksii Shumeiko <oshumeik@cisco.com>
Date:   Mon Oct 21 17:43:52 2024 +0300

    extractor: enable logging for FTP aggregated event

commit e025bf510a92e4eca3da7cdd69cb520373a6c43d
Author: Oleksii Shumeiko <oshumeik@cisco.com>
Date:   Mon Oct 21 17:19:48 2024 +0300

    extractor: delete unused headers

commit 5578678ba65ddadb06ef8ec2229318635fbdee2a
Author: Oleksii Shumeiko <oshumeik@cisco.com>
Date:   Mon Oct 21 14:07:34 2024 +0300

    extractor: event handlers subscribe by themselves

    Flow data augmented with a callback to dump data whenever the flow gets deleted.

commit a67039d4d80d81e60f9d3c3e50b68756e9f83e61
Author: anorokh <anorokh@cisco.com>
Date:   Tue Oct 15 06:07:49 2024 -0400

    extractor: add user field

commit ade23b33e79dc1489b3e1c66c328b895584ef3cf
Author: Oleksii Shumeiko <oshumeik@cisco.com>
Date:   Mon Oct 14 15:00:59 2024 +0300

    extractor: add imaginary transaction event to FTP

commit ea5869b7ff24e5426b7a0e0b97fc52f8e489fba0
Author: Oleksii Shumeiko <oshumeik@cisco.com>
Date:   Mon Oct 14 10:49:02 2024 -0400

    extractor: update dev_notes.txt

commit c342f3d43fec88f1969128f52468664ba5707da9
Author: Oleksii Shumeiko <oshumeik@cisco.com>
Date:   Wed Oct 23 18:00:50 2024 +0300

    doc: add a page about data logging feature

commit 349a85e29ed832050aa4e7661e66929e06f07fc5
Author: Oleksii Shumeiko <oshumeik@cisco.com>
Date:   Mon Oct 14 11:14:22 2024 +0300

    extractor: rearrange source files

commit b17b1e5720e4843b2b4137a529dc1291f8282dbd
Author: Oleksii Shumeiko <oshumeik@cisco.com>
Date:   Fri Oct 11 12:30:28 2024 +0300

    extractor: introduce flow data

    Move enums to common place.

commit 7892d2a5c53166e29fbf4f373855085d8cdbf43f
Author: anorokh <anorokh@cisco.com>
Date:   Wed Oct 9 14:29:00 2024 -0400

    extractor: add ftp service implementation

31 files changed:
doc/user/CMakeLists.txt
doc/user/extractor.txt [new file with mode: 0644]
doc/user/features.txt
src/network_inspectors/extractor/CMakeLists.txt
src/network_inspectors/extractor/dev_notes.txt
src/network_inspectors/extractor/extractor.cc
src/network_inspectors/extractor/extractor.h
src/network_inspectors/extractor/extractor_csv_logger.cc
src/network_inspectors/extractor/extractor_csv_logger.h
src/network_inspectors/extractor/extractor_enums.h [new file with mode: 0644]
src/network_inspectors/extractor/extractor_event_handlers.h [deleted file]
src/network_inspectors/extractor/extractor_flow_data.cc [moved from src/network_inspectors/extractor/extractor_event.cc with 68% similarity]
src/network_inspectors/extractor/extractor_flow_data.h [new file with mode: 0644]
src/network_inspectors/extractor/extractor_ftp.cc [new file with mode: 0644]
src/network_inspectors/extractor/extractor_ftp.h [new file with mode: 0644]
src/network_inspectors/extractor/extractor_http.cc [moved from src/network_inspectors/extractor/extractor_http_event_handler.cc with 69% similarity]
src/network_inspectors/extractor/extractor_http.h [new file with mode: 0644]
src/network_inspectors/extractor/extractor_json_logger.cc
src/network_inspectors/extractor/extractor_json_logger.h
src/network_inspectors/extractor/extractor_logger.cc
src/network_inspectors/extractor/extractor_logger.h
src/network_inspectors/extractor/extractor_service.cc
src/network_inspectors/extractor/extractor_service.h
src/network_inspectors/extractor/extractor_writer.cc
src/network_inspectors/extractor/extractor_writer.h
src/network_inspectors/extractor/extractors.cc [new file with mode: 0644]
src/network_inspectors/extractor/extractors.h [new file with mode: 0644]
src/pub_sub/ftp_events.h
src/service_inspectors/ftp_telnet/ftpp_si.cc
src/service_inspectors/ftp_telnet/ftpp_si.h
src/service_inspectors/ftp_telnet/pp_ftp.cc

index 502cb2e921241776dbb9eea09a5c486631459a6c..2ce1d8cd5e2a7331ec17e6c83549f45ec6253757 100644 (file)
@@ -15,6 +15,7 @@ set (
     daq.txt
     dcerpc.txt
     errors.txt
+    extractor.txt
     features.txt
     file_processing.txt
     ftp.txt
diff --git a/doc/user/extractor.txt b/doc/user/extractor.txt
new file mode 100644 (file)
index 0000000..3a4da61
--- /dev/null
@@ -0,0 +1,145 @@
+Snort 3 can log IPS events with some meta data and dump packets. The Data
+Logging feature extends that ability to log protocol-specific data, sniffing
+traffic alongside with normal inspection.
+
+
+==== Configurations
+
+The module's configuration consists of two parts:
+
+* global parameters
+  ** `formatting` - log record format
+  ** `output` - where to write logs
+* protocol-targeted parameters bind the targeted service and events with
+  filters and a set of fields to log
+  ** `service` - protocol name
+  ** `tenant_id` - a filter, apply the binding only for traffic marked with
+      the tenant ID
+  ** `on_events` - events in a protocol session to be logged
+  ** `fields` - data fields to log (if a field is not supported it will be ignored)
+
+Configuration from different bindings do not interfere. Among other
+things it allows tenants to get independent data logging configurations.
+
+    extractor =
+    {
+        formatting = 'csv',
+        output = 'stdout',
+
+        protocols =
+        {
+            { service = 'http', tenant_id = 1, on_events = 'eot', fields = 'ts, uri, host, method' },
+            { service = 'ftp', tenant_id = 1, on_events = 'request', fields = 'ts, command, arg' },
+            { service = 'http', tenant_id = 2, on_events = 'eot', fields = 'ts, uri' }
+        }
+    }
+
+==== Supported Parameters
+
+Services and their events:
+
+* HTTP, HTTP2
+  ** eot (request-response pair)
+* FTP
+  ** request
+  ** response
+  ** eot (a session defined by the following commands: APPE, DELE, RETR, STOR, STOU, ACCT, PORT, PASV, EPRT, EPSV)
+
+Common fields available for every service:
+
+* `ts` - timestamp of the current packet, which triggers logging
+* `uid` - connection id, to correlate log records related to the same flow
+* `id.orig_h` - client IP address
+* `id.orig_p` - client TCP port
+* `id.resp_h` - server IP address
+* `id.resp_p` - server TCP port
+* `pkt_num` - packet number
+
+Fields supported for HTTP:
+
+* `method` - verb used in HTTP request
+* `host` - Host header
+* `uri` - URI from request
+* `user_agent` - User-Agent header from client
+* `referrer` - Referrer header
+* `origin` - Origin header from client
+* `version` - Version from request
+* `status_code` - status code returned by server
+* `status_msg` - status message returned by server
+* `trans_depth` - number of request-response pairs seen in the session
+
+Fields supported for FTP:
+
+* `command` - last command seen in a session
+* `arg` - request parameters
+* `user` - user name set for a session
+* `reply_code` - reply code from server in response to command
+* `reply_msg` - reply message from server in response to command
+* `file_size` - size of the file transferred
+* `data_channel.passive` - data channel mode
+* `data_channel.orig_h` - IP address of data channel originator
+* `data_channel.resp_h` - IP address of data channel receiving point
+* `data_channel.resp_p` - TCP port of data channel receiving point
+
+==== Example
+
+Adding the following lines to a default snort configuration (which supports FTP
+inspection) would print some FTP logs to standard output in CSV format.
+
+FTP sessions with basic fields:
+
+    extractor =
+    {
+        formatting = csv',
+        output = 'stdout',
+        protocols =
+        {
+            {service = 'ftp', on_events = 'eot', fields = 'ts, command, user'}
+        }
+    }
+
+Output:
+
+    #ts,command,user
+    946684800.000014,PORT,ftptest
+    946684800.000016,RETR,
+    946684800.000034,PORT,anonymous
+    946684800.000036,RETR,
+    946684800.000053,PORT,sfuser
+    946684800.000055,RETR,
+
+Or FTP requests with the same set of fields:
+
+    extractor =
+    {
+        formatting = 'csv',
+        output = 'stdout',
+        protocols =
+        {
+            {service = 'ftp', on_events = 'request', fields = 'ts, command, user'}
+        }
+    }
+
+Output:
+
+    #ts,command,user
+    946684800.000005,USER,ftptest
+    946684800.000007,PASS,
+    946684800.000009,SYST,
+    946684800.000011,TYPE,
+    946684800.000013,PORT,
+    946684800.000015,RETR,
+    946684800.000018,QUIT,
+    946684800.000027,USER,anonymous
+    946684800.000029,PASS,
+    946684800.000031,TYPE,
+    946684800.000033,PORT,
+    946684800.000035,RETR,
+    946684800.000037,SYST,
+    946684800.000039,QUIT,
+    946684800.000048,USER,sfuser
+    946684800.000050,PASS,
+    946684800.000052,PORT,
+    946684800.000054,RETR,
+    946684800.000057,QUIT,
+
index 1bcb323d558242668026ca23716eb15a0c451cf9..1db7db9ce2c985682f6745c12065e51e5279e28d 100644 (file)
@@ -97,6 +97,10 @@ include::pop_imap.txt[]
 
 include::port_scan.txt[]
 
+=== Protocol Data Logging
+
+include::extractor.txt[]
+
 === Sensitive Data Filtering
 
 include::sensitive_data.txt[]
index a7909775928b486994a0c6967bcfbf0a1a3cdcdb..e8c0f6a1d947c47f00fc49fa58dbd1e41ceecacb 100644 (file)
@@ -3,9 +3,11 @@ set( FILE_LIST
     extractor.h
     extractor_csv_logger.cc
     extractor_csv_logger.h
-    extractor_event.cc
-    extractor_event_handlers.h
-    extractor_http_event_handler.cc
+    extractor_enums.h
+    extractor_flow_data.cc
+    extractor_flow_data.h
+    extractor_ftp.cc
+    extractor_http.cc
     extractor_json_logger.cc
     extractor_json_logger.h
     extractor_logger.cc
@@ -14,6 +16,8 @@ set( FILE_LIST
     extractor_service.h
     extractor_writer.cc
     extractor_writer.h
+    extractors.cc
+    extractors.h
 )
 
 add_library(extractor OBJECT ${FILE_LIST})
index d3acffaa388b59c26bd2f309cb4114c7f8c0d69a..5fa9099de81f2e8ec29479fd8df8225051d97f38 100644 (file)
+This directory contains the data logger framework and targeted service extractors.
+
 Extractor is a global network inspector that logs flow data upon receiving
 a flow event.
 
-Supported services:
- * HTTP
- * HTTP2
-
-Supported events:
- * end of HTTP transaction (request-response pair)
-
-An example configuration follows:
-
-    extractor =
-    {
-        protocols =
-        {
-            service = 'http',
-            tenant_id = 1,
-            on_events = 'eot',
-            fields = 'ts, uri, host, method'
-        }
-        {
-            service = 'http',
-            tenant_id = 2,
-            on_events = 'eot',
-            fields = 'ts, uri'
-        }
-    }
-
-Each tenant can have its own protocol configuration.
-
-A list of common fields which are logged:
- * ts (timestamp)
- * uid (connection id)
- * id.orig_h (client IP address)
- * id.orig_p (client TCP port)
- * id.resp_h (server IP address)
- * id.resp_p (server TCP port)
- * pkt_num (packet number)
-
-The following fields are supported for HTTP:
- * method
- * host
- * uri
- * user_agent
- * referrer
- * origin
- * version
- * status_code
- * status_msg
- * trans_depth
- * request_body_len
- * response_body_len
- * info_code
- * info_msg
- * proxied
- * orig_filenames
- * resp_filenames
+==== General Design
+
+As one can notice from extractor's configuration, the targeted service and
+events, filters and a set of fields are bound together as a single item of
+`extractor.protocols` array. Configurations from different bindings do not
+interfere. Among other things it allows tenants to get independent data
+logging configurations.
+
+The module's configuration scheme reflects how extractor works under the hood.
+Global settings (like `formatting` and `output`) are configured just once.
+While every logging rule in `protocols` array creates a new logging context,
+a service extractor.
+
+==== Logger
+
+`ExtractorLogger` is a base class which accepts data from a service extractor,
+transforms and pushes data further out of Snort. It has two purposes:
+
+* formatting transformation
+* writing to a configured destination
+
+Formatting is performed within a given `ExtractorLogger` class.
+Interface methods of the base class accept a fixed set of data types only.
+Thus, every specialization of `ExtractorLogger` must know how to fit those
+types in the targeted formatting. Namely:
+
+* null-terminated string (`char*`)
+* sub-string, without null symbol (`char*`, `size_t`)
+* number (`uint64_t`)
+* timestamp (`struct timeval`)
+* IP address (`snort::SfIp`)
+* flag (`bool`)
+
+The idea is to keep this set to a bare minimum (so that a new specialization
+won't need to support a large range of types). Yet, big enough to cover common
+data types.
+
+A log unit is a log record. It is enclosed by `ExtractorLogger::open_record` and
+`ExtractorLogger::close_record` calls. A header (or a footer) can
+be added. They prepend (append) the set of log records with meta info.
+
+`ExtractorWriter` interface defines the set of methods to printout formatted
+data.
+
+Additionally, `StdExtractorWriter` (a writer to standard output) implements
+synchronization between threads. But that is not required, and is done just for
+user convenience. `StdExtractorWriter` is not a main writer, nor performant.
+`ExtractorWriter` specialization may do things in asynchronous way.
+
+There are plans to convert `ExtractorWriter` to `Connector` type.
+
+==== Logging Context
+
+A processing unit of Extractor is a service extractor.
+Each configuration entry in `extractor.protocols` array instantiates a service
+extractor of `ExtractorEvent` type.
+
+`ExtractorEvent` contains an entire logging context and does the following:
+
+* subscribes to events of the targeted protocol (`service` and `on_events`)
+* provides data field extracting functions
+* accepts a configured set of fields for logging (from the module's parser)
+* handles an event from data bus:
+  ** writes data out immediately via `ExtractorLogger`
+  ** accumulates data on the flow for further aggregation with subsequent events
+
+Also, each specific class of a service extractor spawns a specific
+`snort::DataHandler` (which is immediately subscribed to data bus events). The
+handler memory is solely managed by data bus. It is guaranteed that service
+extractor lives longer than a handler. This allows safe callbacks from event
+handler to a service extractor. So, memory management is split between data
+bus (event handlers) and the inspector (service extractors).
+
+===== Logging Context Over Reload
+With the memory management split between inspector manager and data bus, there
+is a kind of overlap occurring during configuration reload scenario.
+
+[options="header"]
+|===============================================================================
+| Inspector | Data Bus | Flow and Flow Data
+| **Before reload:**
+  1st instance of extractor is active
+| 1st event handler is alive and references to 1st instance
+| events from a flow goes via 1st event handler right to 1st inspector
+| **During reload:**
+  2nd instance is created, 1st instance is moved to trash and loses its event handler
+| 1st event handler is deleted completely, 2nd event handler becomes active
+| flow is not ended, but packets are not processed here
+| **After reload:**
+  1st instance in the trash still may process callbacks from the original flow data,
+  2nd instance sees new events and processes them, but using the original
+  flow data; if 2nd instance swaps flow data (creates its own) it will start
+  receiving callback from the new flow data
+| 2nd event handler is active and redirects all events to 2nd inspector
+| events from a flow goes to 2nd inspector, however if the original flow
+  data persists the 2nd inspector has to deal with it
+|===============================================================================
+
+
+==== Filtering
+
+Filtering helps to decrease the amount of traffic being logged. The goal is to
+keep performance overhead low. The check action is performed as early as
+possible, at the beginning of each event handling function.
+
+Currently, filtering by tenant ID is supported.
+
+*(Filtering by IP and port ranges yet to be implemented)*
+
+==== Extracting Data
+
+Data path from an inspector up to a writing function should conform the
+following targets:
+
+* be performant (make the path short in terms of number of stack frames and
+  other service function calls)
+* be configurable (to adjust fields set for logging)
+* be extensible, so that any new service/event/field can be added preserving
+  all static/dynamic checks
+
+The general path includes an inspector's protocol event, an event's getter
+functions, data extracting functions, formatting and writing functions.
+
+[options="header"]
+|===============================================================================
+| Layer               | Data types       | Notes
+| 1. Inspector        | (any type)       | generates an event (calling `DataEvent` constructor)
+| 2. Data Event       | (any type)       | provides getter functions, resides in snort3/src/pub_sub/
+| 3. Extractor Event  | `char*` `uint64_t` `timeval` `SfIp` `bool` | converts any type to a fixed type
+| 4. Extractor Logger | `char*` `uint64_t` `timeval` `SfIp` `bool` | decorates a fixed type to fit formatting
+| 5. Extractor Writer | `char*` `uint64_t` | accepts basic types: a number and a text string
+|===============================================================================
+
+_Inspector layer_ focuses on performance. It means we seek a minimal overhead
+on throwing an event (to add a bare minimum of new conditions and checks).
+Also, an event's constructor stores a reference to inspector's processing
+context. Ideally, it would be just inspector's flow data, which means no
+savings in the event itself (since flow data can be retrieved directly from the
+flow).
+
+A _Data Event_ specialization implements getter methods. They can present a
+general piece of aggregated data (say, a whole request or a transaction) or a
+specific property (like, a flag from protocol's state). A getter may cache an
+intermediate result of extra computations if any. Having _Data Event_
+implemented as usual in snort3/src/pub_sub/*.cc is just fine. It doesn't
+affect performance much (comparing to header-only implementation).
+
+_Extractor Event_ extracts data from an event according to the configured set
+of fields. `ExtractorEvent` class carries the following entities:
+
+* a generic data type with an extract function, `DataField` template and its
+  common instances define how to convert any type (a provided context) to a
+  given fixed type:
+    ** `const char*` for null-terminated strings
+    ** `snort::SfIp` for IP addresses
+    ** `uint64_t` for numbers
+    ** `struct timeval` for timestamps
+    ** `std::pair<const char*, uint16_t>` for sub-strings, which have just
+       length without null symbol
+* common extracting functions (IP address, port number, packet timestamp,
+  etc.)
+* a generic logging function, which can log a configured set of fields right
+  out of the provided context (any type)
+
+Template implementation of the generic data type `DataField`, its extracting
+function `Ret (*get)(Context...)` and the logging function `log(const T& fields,
+Context... context)` ensure:
+
+* static type checks are performed during compilation and everything matches on the
+  data path
+* the data path is easily extensible with a new data type, context types,
+  extracting or logging function (and customizable as well)
+
+Since _Extractor Logger_ interface accepts just a limited set of (basic) types,
+it should be able to decorate a data field and put it into a targeted format.
+
+_Extractor Writer_ is the final layer before data leaves Snort. It may
+implement output stream and external resource management (like, file
+rotation, socket operations), synchronization, buffering, queuing if needed.
+
+==== Flow Data
+
+Extractor's flow data is a bit different from the usual approach for other
+inspectors. There is a need to store multiple actual types on the flow. Each
+service extractor may get its own context or no context at all.
+
+`ExtractorFlowData` is the basic flow data type which:
+
+* complies with snort's flow framework, which expects one `DataFlow` type per
+  inspector
+* can be easily extended with a new service simply by providing `ServiceType type_id`
+  constant in the derived class
+* does static and dynamic type checks
+    ** if the underlying flow changes its service type, the actual type
+       of flow data changes as well (both are `ExtractorFlowData`, but distinct
+       derivatives)
+    ** `T* ExtractorFlowData::get(snort::Flow* f)` method ensures that
+       retrieved flow data is of the desired type
+* the virtual destructor allows a derived class to make a final callback to
+  the owning service extractor whenever the flow gets deleted (to make a
+  partial log record of an incomplete or abandoned session)
+
+Flow Data instance must bump reference count of the corresponding Extractor
+inspector. This is to make sure that the inspector's extractor service
+instance is always available for a callback from the flow data.
 
+For reasons mentioned in Logging Context Over Reload section, flow data
+content must be understood and processed properly (accepted or rejected) by
+service extractors of different generations (before and after configuration
+reload).
index f2f6fddff598e469a68d0088c42c29ed39824d7f..b90471abf54c4b9bd3c634f59eae1539e8ed1d47 100644 (file)
@@ -31,9 +31,8 @@
 #include "log/messages.h"
 #include "main/snort_config.h"
 #include "protocols/packet.h"
-#include "pub_sub/http_events.h"
 
-#include "extractor_event_handlers.h"
+#include "extractors.h"
 #include "extractor_logger.h"
 #include "extractor_service.h"
 
@@ -48,7 +47,7 @@ THREAD_LOCAL ProfileStats extractor_perf_stats;
 
 static const Parameter extractor_proto_params[] =
 {
-    { "service", Parameter::PT_ENUM, "http", nullptr,
+    { "service", Parameter::PT_ENUM, "http | ftp", nullptr,
       "service to extract from" },
 
     { "tenant_id", Parameter::PT_INT, "0:max32", "0",
@@ -180,7 +179,7 @@ Extractor::Extractor(ExtractorModule* m)
 
     for (const auto& p : cfg.protocols)
     {
-        auto s = ExtractorService::make_service(p, format, output);
+        auto s = ExtractorService::make_service(*this, p, format, output);
 
         if (s)
             services.push_back(s);
index f4f492097fe3513223e7d1ef85ac855135854568..17f14dbe90989dc4e87258f74c38e60770561c22 100644 (file)
@@ -29,9 +29,8 @@
 #include "main/snort_config.h"
 #include "profiler/profiler.h"
 
-#include "extractor_logger.h"
+#include "extractor_enums.h"
 #include "extractor_service.h"
-#include "extractor_writer.h"
 
 #define S_NAME "extractor"
 #define s_help "extracts protocol specific data"
@@ -57,7 +56,7 @@ struct ExtractorConfig
 
 static const PegInfo extractor_pegs[] =
 {
-    { CountType::SUM, "total_events", "total extractor events" },
+    { CountType::SUM, "total_events", "total events processed by extractor" },
     { CountType::END, nullptr, nullptr }
 };
 
index a4f95f7223b09b5df9b4fbb09713ded964f3284d..adf6b8ed576e70784a344b1ba5e65c898100bf04 100644 (file)
@@ -98,6 +98,13 @@ void CsvExtractorLogger::add_field(const char*, const snort::SfIp& v)
     writer->write(buf);
 }
 
+void CsvExtractorLogger::add_field(const char*, bool v)
+{
+    first_write ? []() { first_write = false; } () : writer->write(",");
+
+    writer->write(v ? "true" : "false");
+}
+
 CsvExtractorLogger::~CsvExtractorLogger()
 {
     delete writer;
index 9a104cedd11c67d6c5f1ebe274794d38b4235d44..5c44b8b45451c181b3b7c45471d2845f6f09e60c 100644 (file)
@@ -42,6 +42,7 @@ public:
     void add_field(const char*, uint64_t) override;
     void add_field(const char*, struct timeval) override;
     void add_field(const char*, const snort::SfIp&) override;
+    void add_field(const char*, bool) override;
     void open_record() override;
     void close_record() override;
 
diff --git a/src/network_inspectors/extractor/extractor_enums.h b/src/network_inspectors/extractor/extractor_enums.h
new file mode 100644 (file)
index 0000000..57f7b5e
--- /dev/null
@@ -0,0 +1,128 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2024-2024 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// extractor_enums.h author Cisco
+
+#ifndef EXTRACTOR_ENUMS_H
+#define EXTRACTOR_ENUMS_H
+
+class ServiceType
+{
+public:
+    enum Value : uint8_t
+    {
+        HTTP,
+        FTP,
+        UNDEFINED,
+        MAX
+    };
+
+    ServiceType() = default;
+    constexpr ServiceType(Value a) : v(a) {}
+    template<typename T> constexpr ServiceType(T a) : v(Value(a)) {}
+
+    constexpr operator Value() const { return v; }
+    explicit operator bool() const = delete;
+
+    const char* c_str() const
+    {
+        switch (v)
+        {
+        case HTTP:
+            return "http";
+        case FTP:
+            return "ftp";
+        case UNDEFINED: // fallthrough
+        case MAX:       // fallthrough
+        default:
+            return "(not set)";
+        }
+    }
+
+private:
+    Value v = UNDEFINED;
+};
+
+class FormatType
+{
+public:
+    enum Value : uint8_t
+    {
+        CSV,
+        JSON,
+        MAX
+    };
+
+    FormatType() = default;
+    constexpr FormatType(Value a) : v(a) {}
+    template<typename T> constexpr FormatType(T a) : v((Value)a) {}
+
+    constexpr operator Value() const { return v; }
+    explicit operator bool() const = delete;
+
+    const char* c_str() const
+    {
+        switch (v)
+        {
+        case CSV:
+            return "csv";
+        case JSON:
+            return "json";
+        case MAX: // fallthrough
+        default:
+            return "(not set)";
+        }
+    }
+
+private:
+    Value v = CSV;
+};
+
+class OutputType
+{
+public:
+    enum Value : uint8_t
+    {
+        STD,
+        MAX
+    };
+
+    OutputType() = default;
+    constexpr OutputType(Value a) : v(a) {}
+    template<typename T> constexpr OutputType(T a) : v((Value)a) {}
+
+    constexpr operator Value() const { return v; }
+    explicit operator bool() const = delete;
+
+    const char* c_str() const
+    {
+        switch (v)
+        {
+        case STD:
+            return "stdout";
+        case MAX: // fallthrough
+        default:
+            return "(not set)";
+        }
+    }
+
+private:
+    Value v = STD;
+};
+
+
+#endif
diff --git a/src/network_inspectors/extractor/extractor_event_handlers.h b/src/network_inspectors/extractor/extractor_event_handlers.h
deleted file mode 100644 (file)
index 5bc2ff2..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-//--------------------------------------------------------------------------
-// Copyright (C) 2024-2024 Cisco and/or its affiliates. All rights reserved.
-//
-// This program is free software; you can redistribute it and/or modify it
-// under the terms of the GNU General Public License Version 2 as published
-// by the Free Software Foundation.  You may not use, modify or distribute
-// this program under any other version of the GNU General Public License.
-//
-// This program is distributed in the hope that it will be useful, but
-// WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with this program; if not, write to the Free Software Foundation, Inc.,
-// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-//--------------------------------------------------------------------------
-// extractor_event_handlers.h author Maya Dagon <mdagon@cisco.com>
-
-#ifndef EXTRACTOR_EVENT_HANDLERS_H
-#define EXTRACTOR_EVENT_HANDLERS_H
-
-#include <sys/time.h>
-#include <vector>
-
-#include "flow/flow_key.h"
-#include "framework/data_bus.h"
-#include "sfip/sf_ip.h"
-
-#include "extractor.h"
-#include "extractor_logger.h"
-
-template <typename Ret, class... Context>
-struct DataField
-{
-    DataField(const char* name, Ret (*get)(Context...)) : name(name), get(get) { }
-
-    const char* name;
-    Ret (*get)(Context...);
-};
-
-class Field;
-
-namespace snort
-{
-
-class ExtractorEvent
-{
-public:
-    using StrGetFn = const char* (*) (const DataEvent*, const Packet*, const Flow*);
-    using StrField = DataField<const char*, const DataEvent*, const Packet*, const Flow*>;
-    using SipGetFn = const SfIp& (*) (const DataEvent*, const Packet*, const Flow*);
-    using SipField = DataField<const SfIp&, const DataEvent*, const Packet*, const Flow*>;
-    using NumGetFn = uint64_t (*) (const DataEvent*, const Packet*, const Flow*);
-    using NumField = DataField<uint64_t, const DataEvent*, const Packet*, const Flow*>;
-    using NtsGetFn = struct timeval (*) (const DataEvent*, const Packet*, const Flow*);
-    using NtsField = DataField<struct timeval, const DataEvent*, const Packet*, const Flow*>;
-
-    static FlowHashKeyOps& get_hash()
-    {
-        static thread_local FlowHashKeyOps flow_key_ops(0);
-        return flow_key_ops;
-    }
-
-    virtual std::vector<const char*> get_field_names() const;
-
-protected:
-    ExtractorEvent(uint32_t tid, ExtractorLogger& l)
-        : tenant_id(tid), logger(l) {}
-
-    template<typename T, class... Context>
-    void log(const T& fields, Context... context)
-    {
-        for (const auto& f : fields)
-            logger.add_field(f.name, f.get(context...));
-    }
-
-    uint32_t tenant_id;
-    ExtractorLogger& logger;
-
-    std::vector<NtsField> nts_fields;
-    std::vector<SipField> sip_fields;
-    std::vector<NumField> num_fields;
-    std::vector<StrField> str_fields;
-};
-
-class HttpExtractorEventHandler : public DataHandler, public ExtractorEvent
-{
-public:
-    using SubGetFn = const Field& (*) (const DataEvent*, const Packet*, const Flow*);
-    using SubField = DataField<const Field&, const DataEvent*, const Packet*, const Flow*>;
-
-    HttpExtractorEventHandler(uint32_t tenant, const std::vector<std::string>& flds, ExtractorLogger& l);
-
-    void handle(DataEvent&, Flow*) override;
-    std::vector<const char*> get_field_names() const override;
-
-private:
-    std::vector<SubField> sub_fields;
-};
-
-}
-
-#endif
similarity index 68%
rename from src/network_inspectors/extractor/extractor_event.cc
rename to src/network_inspectors/extractor/extractor_flow_data.cc
index f9353e30557d1e07ea2520b169b5f1f9184da1a7..6850f88c214dcb23a84a0a041b6c104e7a03ddb5 100644 (file)
 // with this program; if not, write to the Free Software Foundation, Inc.,
 // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 //--------------------------------------------------------------------------
-// extractor_event.cc author Cisco
+// extractor_flow_data.cc author Cisco
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
-#include "extractor_event_handlers.h"
+#include "extractor_flow_data.h"
 
-using namespace snort;
-using namespace std;
+const unsigned ExtractorFlowData::data_id = snort::FlowData::create_flow_data_id();
 
-vector<const char*> ExtractorEvent::get_field_names() const
-{
-    vector<const char*> res;
-
-    for (auto& f : nts_fields)
-        res.push_back(f.name);
-
-    for (auto& f : sip_fields)
-        res.push_back(f.name);
-
-    for (auto& f : num_fields)
-        res.push_back(f.name);
-
-    for (auto& f : str_fields)
-        res.push_back(f.name);
-
-    return res;
-}
diff --git a/src/network_inspectors/extractor/extractor_flow_data.h b/src/network_inspectors/extractor/extractor_flow_data.h
new file mode 100644 (file)
index 0000000..ea22955
--- /dev/null
@@ -0,0 +1,55 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2024-2024 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// extractor_flow_data.h author Cisco
+
+#ifndef EXTRACTOR_FLOW_DATA_H
+#define EXTRACTOR_FLOW_DATA_H
+
+#include "flow/flow.h"
+#include "flow/flow_data.h"
+
+#include "extractor_enums.h"
+
+class ExtractorFlowData : public snort::FlowData
+{
+public:
+    ~ExtractorFlowData() override {}
+
+    template<typename T>
+    static T* get(snort::Flow* f)
+    {
+        auto fd = reinterpret_cast<ExtractorFlowData*>(f->get_flow_data(data_id));
+
+        if (fd and T::type_id == fd->type)
+            return reinterpret_cast<T*>(fd);
+
+        f->free_flow_data(data_id);
+
+        return nullptr;
+    }
+
+protected:
+    ExtractorFlowData(ServiceType type, snort::Inspector& insp)
+        : FlowData(data_id, &insp), type(type) {}
+
+private:
+    const ServiceType type;
+    static const unsigned data_id;
+};
+
+#endif
diff --git a/src/network_inspectors/extractor/extractor_ftp.cc b/src/network_inspectors/extractor/extractor_ftp.cc
new file mode 100644 (file)
index 0000000..e3e75b2
--- /dev/null
@@ -0,0 +1,665 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2024-2024 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// extractor_ftp.cc author Anna Norokh <anorokh@cisco.com>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "extractor_ftp.h"
+
+#include <sys/time.h>
+
+#include "detection/detection_engine.h"
+#include "flow/flow_key.h"
+#include "profiler/profiler.h"
+#include "pub_sub/ftp_events.h"
+#include "service_inspectors/ftp_telnet/ftpp_si.h"
+#include "sfip/sf_ip.h"
+#include "utils/util.h"
+#include "utils/util_net.h"
+
+#include "extractor.h"
+#include "extractor_enums.h"
+#include "extractor_flow_data.h"
+
+#define FILE_STATUS_OK "150"
+
+using namespace snort;
+using namespace std;
+
+namespace req
+{
+static pair<const char*, uint16_t> get_cmd(const DataEvent* event, const Packet*, const Flow*)
+{
+    const auto& req = ((const FtpRequestEvent*)event)->get_request();
+    return {req.cmd_begin, req.cmd_size};
+}
+
+static pair<const char*, uint16_t> get_arg(const DataEvent* event, const Packet*, const Flow*)
+{
+    const auto& req = ((const FtpRequestEvent*)event)->get_request();
+    return {req.param_begin, req.param_size};
+}
+
+static pair<const char*, uint16_t> get_user(const DataEvent* event, const Packet*, const Flow*)
+{
+    const auto& req = ((const FtpRequestEvent*)event)->get_request();
+    const auto cmd = string(req.cmd_begin, req.cmd_size);
+    if (cmd == "USER")
+        return {req.param_begin, req.param_size};
+
+    return {};
+}
+
+static const map<string, ExtractorEvent::StrGetFn> sub_str_getters =
+{
+    {"command", get_cmd},
+    {"arg", get_arg},
+    {"user", get_user},
+};
+}
+
+FtpRequestExtractor::FtpRequestExtractor(Extractor& i, ExtractorLogger& l,
+    uint32_t t, const vector<string>& fields) : ExtractorEvent(i, l, t)
+{
+    for (const auto& f : fields)
+    {
+        if (append(nts_fields, nts_getters, f))
+            continue;
+        if (append(sip_fields, sip_getters, f))
+            continue;
+        if (append(num_fields, num_getters, f))
+            continue;
+        if (append(str_fields, req::sub_str_getters, f))
+            continue;
+    }
+
+    DataBus::subscribe(ftp_pub_key, FtpEventIds::FTP_REQUEST, new Req(*this, S_NAME));
+}
+
+void FtpRequestExtractor::handle(DataEvent& event, Flow* flow)
+{
+    // cppcheck-suppress unreadVariable
+    Profile profile(extractor_perf_stats);
+
+    uint32_t tid = 0;
+
+#ifndef DISABLE_TENANT_ID
+    tid = flow->key->tenant_id;
+#endif
+
+    if (tenant_id != tid)
+        return;
+
+    extractor_stats.total_event++;
+
+    Packet* packet = DetectionEngine::get_current_packet();
+
+    logger.open_record();
+    log(nts_fields, &event, packet, flow);
+    log(sip_fields, &event, packet, flow);
+    log(num_fields, &event, packet, flow);
+    log(str_fields, &event, packet, flow, logger.is_strict());
+    logger.close_record();
+}
+
+static uint64_t parse_last_num(const char *str, uint16_t size)
+{
+    constexpr uint8_t max_digits = 20;
+    char num_str[max_digits + 1] = {};
+    uint8_t pos = max_digits;
+
+    for (size_t i = size; i > 0; --i)
+    {
+        char c = str[i - 1];
+        if (isdigit(c))
+        {
+            num_str[--pos] = c;
+        }
+        else if (pos < max_digits)
+            break;
+    }
+
+    return (pos < max_digits) ? stoull(&num_str[pos]) : 0;
+}
+
+namespace resp
+{
+static pair<const char*, uint16_t> get_code(const DataEvent* event, const Packet*, const Flow*)
+{
+    const auto& response = ((const FtpResponseEvent*)event)->get_response();
+    return {response.rsp_begin, response.rsp_size};
+}
+
+static pair<const char*, uint16_t> get_msg(const DataEvent* event, const Packet*, const Flow*)
+{
+    const auto& response = ((const FtpResponseEvent*)event)->get_response();
+    return {response.msg_begin, response.msg_size};
+}
+
+static const SfIp& get_orig_ip(const DataEvent* event, const Packet*, const Flow*)
+{
+    if (((const FtpResponseEvent*)event)->is_passive())
+        return ((const FtpResponseEvent*)event)->get_client_ip();
+    else
+        return ((const FtpResponseEvent*)event)->get_server_ip();
+}
+
+static const SfIp& get_resp_ip(const DataEvent* event, const Packet*, const Flow*)
+{
+    if (((const FtpResponseEvent*)event)->is_passive())
+        return ((const FtpResponseEvent*)event)->get_server_ip();
+    else
+        return ((const FtpResponseEvent*)event)->get_client_ip();
+}
+
+static uint64_t get_resp_port(const DataEvent* event, const Packet*, const Flow*)
+{
+    if (((const FtpResponseEvent*)event)->is_passive())
+        return (uint64_t)((const FtpResponseEvent*)event)->get_server_port();
+    else
+        return (uint64_t)((const FtpResponseEvent*)event)->get_client_port();
+}
+
+static uint64_t get_file_size(const DataEvent* event, const Packet*, const Flow*)
+{
+    const auto& resp = ((const FtpResponseEvent*)event)->get_response();
+    const auto& code = string(resp.rsp_begin, resp.rsp_size);
+
+    if (code == FILE_STATUS_OK)
+        return parse_last_num(resp.msg_begin, resp.msg_size);
+
+    return 0;
+}
+
+static int8_t get_mode(const DataEvent* event, const Packet*, const Flow*)
+{
+    return ((const FtpResponseEvent*)event)->get_mode();
+}
+
+static const map<string, ExtractorEvent::StrGetFn> sub_str_getters =
+{
+    {"reply_code", get_code},
+    {"reply_msg", get_msg},
+};
+
+static const map<string, ExtractorEvent::NumGetFn> sub_num_getters =
+{
+    {"file_size", get_file_size},
+    {"data_channel.resp_p", get_resp_port}
+};
+
+static const map<string, ExtractorEvent::SipGetFn> sub_sip_getters =
+{
+    {"data_channel.orig_h", get_orig_ip},
+    {"data_channel.resp_h", get_resp_ip}
+};
+
+static const map<string, FtpResponseExtractor::SubGetFn> sub_getters =
+{
+    {"data_channel.passive", get_mode},
+};
+}
+
+FtpResponseExtractor::FtpResponseExtractor(Extractor& i, ExtractorLogger& l,
+    uint32_t t, const vector<string>& fields) : ExtractorEvent(i, l, t)
+{
+    for (const auto& f : fields)
+    {
+        if (append(nts_fields, nts_getters, f))
+            continue;
+        if (append(sip_fields, sip_getters, f))
+            continue;
+        if (append(sip_fields, resp::sub_sip_getters, f))
+            continue;
+        if (append(num_fields, num_getters, f))
+            continue;
+        if (append(num_fields, resp::sub_num_getters, f))
+            continue;
+        if (append(str_fields, resp::sub_str_getters, f))
+            continue;
+        if (append(sub_fields, resp::sub_getters, f))
+            continue;
+    }
+
+    DataBus::subscribe(ftp_pub_key, FtpEventIds::FTP_RESPONSE, new Resp(*this, S_NAME));
+}
+
+template<>
+void ExtractorEvent::log<vector<FtpResponseExtractor::SubField>, DataEvent*, Packet*, Flow*, bool>(
+    const vector<FtpResponseExtractor::SubField>& fields, DataEvent* event, Packet* pkt, Flow* flow, bool strict)
+{
+    for (const auto& f : fields)
+    {
+        const auto mode = f.get(event, pkt, flow);
+        if (mode != FTPP_XFER_NOT_SET)
+            mode == FTPP_XFER_PASSIVE ? logger.add_field(f.name, true) : logger.add_field(f.name, false);
+        else if (strict)
+            logger.add_field(f.name, "");
+    }
+}
+
+void FtpResponseExtractor::handle(DataEvent& event, Flow* flow)
+{
+    // cppcheck-suppress unreadVariable
+    Profile profile(extractor_perf_stats);
+
+    uint32_t tid = 0;
+
+#ifndef DISABLE_TENANT_ID
+    tid = flow->key->tenant_id;
+#endif
+
+    if (tenant_id != tid)
+        return;
+
+    extractor_stats.total_event++;
+
+    Packet* packet = DetectionEngine::get_current_packet();
+
+    logger.open_record();
+    log(nts_fields, &event, packet, flow);
+    log(sip_fields, &event, packet, flow);
+    log(num_fields, &event, packet, flow);
+    log(str_fields, &event, packet, flow, logger.is_strict());
+    log(sub_fields, &event, packet, flow, logger.is_strict());
+    logger.close_record();
+}
+
+vector<const char*> FtpResponseExtractor::get_field_names() const
+{
+    vector<const char*> res = ExtractorEvent::get_field_names();
+
+    for (auto& f : sub_fields)
+        res.push_back(f.name);
+
+    return res;
+}
+
+class FtpExtractorFlowData : public ExtractorFlowData
+{
+public:
+    static constexpr ServiceType type_id = ServiceType::FTP;
+
+    FtpExtractorFlowData(FtpExtractor& owner)
+        : ExtractorFlowData(type_id, owner.get_inspector()), owner(owner) {}
+
+    ~FtpExtractorFlowData() override
+    {
+        if (has_data)
+            owner.dump(*this);
+    }
+
+    void reset();
+
+    string cmd;
+    string arg;
+    string usr;
+    string code;
+    string msg;
+    uint64_t file_size = 0;
+
+    int8_t mode = FTPP_XFER_NOT_SET;
+    SfIp orig_h = {};
+    SfIp resp_h = {};
+    uint64_t resp_p = 0;
+
+    struct timeval ts = {};
+    bool has_data = false;
+
+private:
+    FtpExtractor& owner;
+};
+
+namespace flow
+{
+static const char* get_cmd(const FtpExtractorFlowData& fd)
+{
+    return fd.cmd.c_str();
+}
+
+static const char* get_arg(const FtpExtractorFlowData& fd)
+{
+    return fd.arg.c_str();
+}
+
+static const char* get_user(const FtpExtractorFlowData& fd)
+{
+    return fd.usr.c_str();
+}
+
+static const char* get_code(const FtpExtractorFlowData& fd)
+{
+    return fd.code.c_str();
+}
+
+static const char* get_msg(const FtpExtractorFlowData& fd)
+{
+    return fd.msg.c_str();
+}
+
+static const SfIp& get_orig_ip(const FtpExtractorFlowData& fd)
+{
+    return fd.orig_h;
+}
+
+static const SfIp& get_resp_ip(const FtpExtractorFlowData& fd)
+{
+    return fd.resp_h;
+}
+
+static uint64_t get_file_size(const FtpExtractorFlowData& fd)
+{
+    return fd.file_size;
+}
+
+static uint64_t get_resp_port(const FtpExtractorFlowData& fd)
+{
+    return fd.resp_p;
+}
+
+static int8_t get_mode(const FtpExtractorFlowData& fd)
+{
+    return fd.mode;
+}
+
+static const map<string, FtpExtractor::FdBufGetFn> fd_buf_getters =
+{
+    {"command", get_cmd},
+    {"arg", get_arg},
+    {"user", get_user},
+    {"reply_code", get_code},
+    {"reply_msg", get_msg}
+};
+
+static const map<string, FtpExtractor::FdSipGetFn> fd_sip_getters =
+{
+    {"data_channel.orig_h", get_orig_ip},
+    {"data_channel.resp_h", get_resp_ip}
+};
+
+static const map<string, FtpExtractor::FdNumGetFn> fd_num_getters =
+{
+    {"file_size", get_file_size},
+    {"data_channel.resp_p", get_resp_port}
+};
+
+static const map<string, FtpExtractor::FdSubGetFn> fd_sub_getters =
+{
+    {"data_channel.passive", get_mode},
+};
+}
+
+FtpExtractor::FtpExtractor(Extractor& i, ExtractorLogger& l,
+    uint32_t t, const vector<string>& fields) : ExtractorEvent(i, l, t)
+{
+    for (const auto& f : fields)
+    {
+        if (append(fd_buf_fields, flow::fd_buf_getters, f))
+            continue;
+        if (append(fd_sip_fields, flow::fd_sip_getters, f))
+            continue;
+        if (append(fd_num_fields, flow::fd_num_getters, f))
+            continue;
+        if (append(fd_sub_fields, flow::fd_sub_getters, f))
+            continue;
+        if (append(nts_fields, nts_getters, f))
+            continue;
+        if (append(sip_fields, sip_getters, f))
+            continue;
+        if (append(num_fields, num_getters, f))
+            continue;
+    }
+
+    DataBus::subscribe(ftp_pub_key, FtpEventIds::FTP_REQUEST, new Req(*this, S_NAME));
+    DataBus::subscribe(ftp_pub_key, FtpEventIds::FTP_RESPONSE, new Resp(*this, S_NAME));
+}
+
+vector<const char*> FtpExtractor::get_field_names() const
+{
+    vector<const char*> res = ExtractorEvent::get_field_names();
+
+    for (auto& f : fd_buf_fields)
+        res.push_back(f.name);
+    for (auto& f : fd_sip_fields)
+        res.push_back(f.name);
+    for (auto& f : fd_num_fields)
+        res.push_back(f.name);
+    for (auto& f : fd_sub_fields)
+        res.push_back(f.name);
+
+    return res;
+}
+
+void FtpExtractorFlowData::reset()
+{
+    cmd.clear();
+    arg.clear();
+    code.clear();
+    msg.clear();
+    file_size = 0;
+
+    mode = FTPP_XFER_NOT_SET;
+    orig_h.clear();
+    resp_h.clear();
+    resp_p = 0;
+
+    ts = {};
+    has_data = false;
+}
+
+template<>
+// Passing FtpExtractorFlowData as a pointer.
+// Unfortunately, template expansion is confused if we pass an object (a reference).
+void ExtractorEvent::log<vector<FtpExtractor::FdBufField>, const FtpExtractorFlowData*>(
+    const vector<FtpExtractor::FdBufField>& fields, const FtpExtractorFlowData* fd)
+{
+    for (const auto& f : fields)
+    {
+        auto d = f.get(*fd);
+        logger.add_field(f.name, d);
+    }
+}
+
+template<>
+void ExtractorEvent::log<vector<FtpExtractor::FdSipField>, const FtpExtractorFlowData*>(
+    const vector<FtpExtractor::FdSipField>& fields, const FtpExtractorFlowData* fd)
+{
+    for (const auto& f : fields)
+    {
+        auto d = f.get(*fd);
+        logger.add_field(f.name, d);
+    }
+}
+
+template<>
+void ExtractorEvent::log<vector<FtpExtractor::FdNumField>, const FtpExtractorFlowData*>(
+    const vector<FtpExtractor::FdNumField>& fields, const FtpExtractorFlowData* fd)
+{
+    for (const auto& f : fields)
+    {
+        auto d = f.get(*fd);
+        logger.add_field(f.name, d);
+    }
+}
+
+template<>
+void ExtractorEvent::log<vector<FtpExtractor::FdSubField>, const FtpExtractorFlowData*, bool>(
+    const vector<FtpExtractor::FdSubField>& fields, const FtpExtractorFlowData* fd, bool strict)
+{
+    for (const auto& f : fields)
+    {
+        const auto mode = f.get(*fd);
+        if (mode != FTPP_XFER_NOT_SET)
+            mode == FTPP_XFER_PASSIVE ? logger.add_field(f.name, true) : logger.add_field(f.name, false);
+        else if (strict)
+            logger.add_field(f.name, "");
+    }
+}
+
+static const string commands_to_log = "RETR, STOR, PASV, PORT, DELE, APPE, EPRT, EPSV, STOU, ACCT";
+
+void FtpExtractor::Req::handle(DataEvent& event, Flow* flow)
+{
+    // cppcheck-suppress unreadVariable
+    Profile profile(extractor_perf_stats);
+
+    uint32_t tid = 0;
+
+#ifndef DISABLE_TENANT_ID
+    tid = flow->key->tenant_id;
+#endif
+
+    if (owner.tenant_id != tid)
+        return;
+
+    extractor_stats.total_event++;
+
+    Packet* p = DetectionEngine::get_current_packet();
+    auto fd = ExtractorFlowData::get<FtpExtractorFlowData>(flow);
+
+    if (!fd)
+        flow->set_flow_data(fd = new FtpExtractorFlowData(owner));
+    else if (!fd->cmd.empty())
+    {
+        // log existing flow data
+        owner.logger.open_record();
+        owner.log(owner.nts_fields, &event, p, flow);
+        owner.log(owner.sip_fields, &event, p, flow);
+        owner.log(owner.num_fields, &event, p, flow);
+        owner.log(owner.fd_buf_fields, (const FtpExtractorFlowData*)fd);
+        owner.log(owner.fd_sip_fields, (const FtpExtractorFlowData*)fd);
+        owner.log(owner.fd_num_fields, (const FtpExtractorFlowData*)fd);
+        owner.log(owner.fd_sub_fields, (const FtpExtractorFlowData*)fd, owner.logger.is_strict());
+        owner.logger.close_record();
+
+        fd->reset();
+    }
+
+    const auto& req = ((FtpRequestEvent*)&event)->get_request();
+    const auto cmd = string(req.cmd_begin, req.cmd_size);
+
+    if (cmd == "USER")
+    {
+        fd->usr = string(req.param_begin, req.param_size);
+        return;
+    }
+
+    if (string::npos == commands_to_log.find(cmd))
+        // no need to save it
+        return;
+
+    fd->cmd = cmd;
+    fd->arg = string(req.param_begin, req.param_size);
+
+    fd->ts = p->pkth->ts;
+    fd->has_data = true;
+}
+
+void FtpExtractor::Resp::handle(DataEvent& event, Flow* flow)
+{
+    // cppcheck-suppress unreadVariable
+    Profile profile(extractor_perf_stats);
+
+    uint32_t tid = 0;
+
+#ifndef DISABLE_TENANT_ID
+    tid = flow->key->tenant_id;
+#endif
+
+    if (owner.tenant_id != tid)
+        return;
+
+    extractor_stats.total_event++;
+
+    auto fd = ExtractorFlowData::get<FtpExtractorFlowData>(flow);
+
+    if (!fd or fd->cmd.empty())
+        // no need to save this response
+        return;
+
+    const auto ftp_event = (FtpResponseEvent*)&event;
+    const auto& resp = ftp_event->get_response();
+    const auto code = string(resp.rsp_begin, resp.rsp_size);
+    const auto msg = string(resp.msg_begin, resp.msg_size);
+
+    fd->code = code;
+    fd->msg = msg;
+
+    if (code == FILE_STATUS_OK)
+        fd->file_size = parse_last_num(resp.msg_begin, resp.msg_size);
+
+    if (FTPP_XFER_NOT_SET == ftp_event->get_mode())
+        return;
+
+    if (ftp_event->is_passive())
+    {
+        fd->mode = FTPP_XFER_PASSIVE;
+        fd->orig_h = ftp_event->get_client_ip();
+        fd->resp_h = ftp_event->get_server_ip();
+        fd->resp_p = ftp_event->get_server_port();
+    }
+    else
+    {
+        fd->mode = FTPP_XFER_ACTIVE;
+        fd->orig_h = ftp_event->get_server_ip();
+        fd->resp_h = ftp_event->get_client_ip();
+        fd->resp_p = ftp_event->get_client_port();
+    }
+}
+
+void FtpExtractor::dump(const FtpExtractorFlowData& fd)
+{
+    // cppcheck-suppress unreadVariable
+    Profile profile(extractor_perf_stats);
+
+    logger.open_record();
+
+    for (const auto& f : nts_fields)
+        logger.add_field(f.name, fd.ts);
+    for (const auto& f : sip_fields)
+        logger.add_field(f.name, "");
+    for (const auto& f : num_fields)
+        logger.add_field(f.name, (uint64_t)0);
+
+    log(fd_buf_fields, &fd);
+    log(fd_sip_fields, &fd);
+    log(fd_num_fields, &fd);
+    log(fd_sub_fields, &fd, logger.is_strict());
+
+    logger.close_record();
+}
+
+
+#ifdef UNIT_TEST
+
+#include "catch/snort_catch.h"
+
+TEST_CASE("Parse file size", "[extractor]")
+{
+    const char* resp_msg1 = "Here comes the directory listing (total size 04096 bytes).";
+    const char* resp_msg2 = "Opening data connection for log10.txt, size 0 bytes";
+    const char* resp_msg3 = "Opening BINARY mode data connection for \"files-1.3-1.txt\" (218850 bytes).";
+
+    CHECK(4096 == parse_last_num(resp_msg1 ,58));
+    CHECK(0 == parse_last_num(resp_msg2, 52));
+    CHECK(218850 == parse_last_num(resp_msg3, 73));
+}
+
+#endif
diff --git a/src/network_inspectors/extractor/extractor_ftp.h b/src/network_inspectors/extractor/extractor_ftp.h
new file mode 100644 (file)
index 0000000..ae32b25
--- /dev/null
@@ -0,0 +1,95 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2024-2024 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// extractor_ftp.h author Anna Norokh <anorokh@cisco.com>
+
+#ifndef EXTRACTOR_FTP_H
+#define EXTRACTOR_FTP_H
+
+#include <cassert>
+
+#include "extractors.h"
+
+class FtpExtractorFlowData;
+
+class FtpRequestExtractor : public ExtractorEvent
+{
+public:
+    FtpRequestExtractor(Extractor&, ExtractorLogger&, uint32_t tenant, const std::vector<std::string>& fields);
+
+    void handle(DataEvent&, Flow*);
+
+private:
+    using Req = Handler<FtpRequestExtractor>;
+};
+
+class FtpResponseExtractor : public ExtractorEvent
+{
+public:
+    using SubGetFn = int8_t (*) (const DataEvent*, const Packet*, const Flow*);
+    using SubField = DataField<int8_t, const DataEvent*, const Packet*, const Flow*>;
+
+    FtpResponseExtractor(Extractor&, ExtractorLogger&, uint32_t tenant, const std::vector<std::string>& fields);
+
+    std::vector<const char*> get_field_names() const override;
+    void handle(DataEvent&, Flow*);
+
+private:
+    using Resp = Handler<FtpResponseExtractor>;
+
+    std::vector<SubField> sub_fields;
+};
+
+class FtpExtractor : public ExtractorEvent
+{
+public:
+    using FdBufGetFn = const char* (*) (const FtpExtractorFlowData&);
+    using FdBufField = DataField<const char*, const FtpExtractorFlowData&>;
+    using FdSipGetFn = const SfIp& (*) (const FtpExtractorFlowData&);
+    using FdSipField = DataField<const SfIp&, const FtpExtractorFlowData&>;
+    using FdNumGetFn = uint64_t (*) (const FtpExtractorFlowData&);
+    using FdNumField = DataField<uint64_t, const FtpExtractorFlowData&>;
+    using FdSubGetFn = int8_t (*) (const FtpExtractorFlowData&);
+    using FdSubField = DataField<int8_t, const FtpExtractorFlowData&>;
+
+    FtpExtractor(Extractor&, ExtractorLogger&, uint32_t tenant, const std::vector<std::string>& fields);
+
+    std::vector<const char*> get_field_names() const override;
+    void dump(const FtpExtractorFlowData&);
+
+private:
+    struct Req : public DataHandler
+    {
+        Req(FtpExtractor& owner, const char* name) : DataHandler(name), owner(owner) {}
+        void handle(DataEvent&, Flow*) override;
+        FtpExtractor& owner;
+    };
+
+    struct Resp : public DataHandler
+    {
+        Resp(FtpExtractor& owner, const char* name) : DataHandler(name), owner(owner) {}
+        void handle(DataEvent&, Flow*) override;
+        FtpExtractor& owner;
+    };
+
+    std::vector<FdBufField> fd_buf_fields;
+    std::vector<FdSipField> fd_sip_fields;
+    std::vector<FdNumField> fd_num_fields;
+    std::vector<FdSubField> fd_sub_fields;
+};
+
+#endif
similarity index 69%
rename from src/network_inspectors/extractor/extractor_http_event_handler.cc
rename to src/network_inspectors/extractor/extractor_http.cc
index 46482a732fcfaf9abd829e0676d604a849dee821..8633b79db15b93365702960957a50e91006f0c04 100644 (file)
 // with this program; if not, write to the Free Software Foundation, Inc.,
 // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 //--------------------------------------------------------------------------
-// extractor_http_event_handler.cc author Maya Dagon <mdagon@cisco.com>
+// extractor_http.cc author Maya Dagon <mdagon@cisco.com>
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
-#include "extractor_event_handlers.h"
+#include "extractor_http.h"
 
 #include "detection/detection_engine.h"
 #include "flow/flow_key.h"
@@ -32,6 +32,8 @@
 #include "utils/util.h"
 #include "utils/util_net.h"
 
+#include "extractor.h"
+
 using namespace snort;
 using namespace std;
 
@@ -123,53 +125,7 @@ static const char* get_resp_filenames(const DataEvent* event, const Packet*, con
     return ((const HttpTransactionEndEvent*)event)->get_filename(HttpCommon::SRC_SERVER).c_str();
 }
 
-static struct timeval get_timestamp(const DataEvent*, const Packet* p, const Flow*)
-{
-    return p->pkth->ts;
-}
-
-static const SfIp& get_ip_src(const DataEvent*, const Packet*, const Flow* flow)
-{
-    return flow->flags.client_initiated ? flow->client_ip : flow->server_ip;
-}
-
-static const SfIp& get_ip_dst(const DataEvent*, const Packet*, const Flow* flow)
-{
-    return flow->flags.client_initiated ? flow->server_ip : flow->client_ip;
-}
-
-static uint64_t get_ip_src_port(const DataEvent*, const Packet*, const Flow* flow)
-{
-    return flow->client_port;
-}
-
-static uint64_t get_ip_dst_port(const DataEvent*, const Packet*, const Flow* flow)
-{
-    return flow->server_port;
-}
-
-static uint64_t get_pkt_num(const DataEvent*, const Packet* p, const Flow*)
-{
-    return p->context->packet_number;
-}
-
-static uint64_t get_uid(const DataEvent*, const Packet*, const Flow* flow)
-{
-    return ExtractorEvent::get_hash().do_hash((const unsigned char*)flow->key, 0);
-}
-
-static const map<string, ExtractorEvent::NtsGetFn> nts_getters =
-{
-    {"ts", get_timestamp},
-};
-
-static const map<string, ExtractorEvent::SipGetFn> sip_getters =
-{
-    {"id.orig_h", get_ip_src},
-    {"id.resp_h", get_ip_dst}
-};
-
-static const map<string, ExtractorEvent::StrGetFn> str_getters =
+static const map<string, ExtractorEvent::BufGetFn> sub_buf_getters =
 {
     {"version", get_version},
     {"proxied", get_proxied},
@@ -177,19 +133,15 @@ static const map<string, ExtractorEvent::StrGetFn> str_getters =
     {"resp_filenames", get_resp_filenames}
 };
 
-static const map<string, ExtractorEvent::NumGetFn> num_getters =
+static const map<string, ExtractorEvent::NumGetFn> sub_num_getters =
 {
-    {"id.orig_p", get_ip_src_port},
-    {"id.resp_p", get_ip_dst_port},
-    {"uid", get_uid},
-    {"pkt_num", get_pkt_num},
     {"trans_depth", get_trans_depth},
     {"request_body_len", get_request_body_len},
     {"response_body_len", get_response_body_len},
     {"info_code", get_info_code}
 };
 
-static const map<string, HttpExtractorEventHandler::SubGetFn> sub_getters =
+static const map<string, HttpExtractor::SubGetFn> sub_getters =
 {
     {"method", get_method},
     {"host", get_host},
@@ -202,21 +154,8 @@ static const map<string, HttpExtractorEventHandler::SubGetFn> sub_getters =
     {"info_msg", get_info_msg}
 };
 
-template<class T, class U, class V>
-static inline bool append(T& cont, const U& map, const V& key)
-{
-    auto it = map.find(key);
-
-    if (it == map.end())
-        return false;
-
-    cont.emplace_back(it->first.c_str(), it->second);
-
-    return true;
-}
-
-HttpExtractorEventHandler::HttpExtractorEventHandler(uint32_t t, const vector<string>& fields, ExtractorLogger& l)
-    : DataHandler(S_NAME), ExtractorEvent(t, l)
+HttpExtractor::HttpExtractor(Extractor& i, ExtractorLogger& l, uint32_t t, const vector<string>& fields)
+    : ExtractorEvent(i, l, t)
 {
     for (const auto& f : fields)
     {
@@ -226,56 +165,59 @@ HttpExtractorEventHandler::HttpExtractorEventHandler(uint32_t t, const vector<st
             continue;
         if (append(num_fields, num_getters, f))
             continue;
-        if (append(str_fields, str_getters, f))
+        if (append(num_fields, sub_num_getters, f))
+            continue;
+        if (append(buf_fields, sub_buf_getters, f))
             continue;
         if (append(sub_fields, sub_getters, f))
             continue;
     }
+
+    DataBus::subscribe(http_pub_key, HttpEventIds::END_OF_TRANSACTION, new Eot(*this, S_NAME));
 }
 
 template<>
-void ExtractorEvent::log<vector<HttpExtractorEventHandler::SubField>, DataEvent*, Packet*, Flow*, bool>(
-    const vector<HttpExtractorEventHandler::SubField>& fields, DataEvent* event, Packet* pkt, Flow* flow, bool strict)
+void ExtractorEvent::log<vector<HttpExtractor::SubField>, DataEvent*, Packet*, Flow*, bool>(
+    const vector<HttpExtractor::SubField>& fields, DataEvent* event, Packet* pkt, Flow* flow, bool strict)
 {
     for (const auto& f : fields)
     {
-        const auto& d = f.get(event, pkt, flow);
-        if (d.length() > 0)
-            logger.add_field(f.name, (const char*)d.start(), d.length());
+        const auto& field = f.get(event, pkt, flow);
+        if (field.length() > 0)
+            logger.add_field(f.name, (const char*)field.start(), field.length());
         else if (strict)
             logger.add_field(f.name, "");
     }
 }
 
-void HttpExtractorEventHandler::handle(DataEvent& event, Flow* flow)
+void HttpExtractor::handle(DataEvent& event, Flow* flow)
 {
     // cppcheck-suppress unreadVariable
     Profile profile(extractor_perf_stats);
-    uint32_t tid;
+
+    uint32_t tid = 0;
 
 #ifndef DISABLE_TENANT_ID
     tid = flow->key->tenant_id;
-#else
-    tid = 0;
 #endif
 
     if (tenant_id != tid)
         return;
 
+    extractor_stats.total_event++;
+
     Packet* packet = DetectionEngine::get_current_packet();
 
     logger.open_record();
     log(nts_fields, &event, packet, flow);
     log(sip_fields, &event, packet, flow);
     log(num_fields, &event, packet, flow);
-    log(str_fields, &event, packet, flow);
+    log(buf_fields, &event, packet, flow);
     log(sub_fields, &event, packet, flow, logger.is_strict());
     logger.close_record();
-
-    extractor_stats.total_event++;
 }
 
-vector<const char*> HttpExtractorEventHandler::get_field_names() const
+vector<const char*> HttpExtractor::get_field_names() const
 {
     vector<const char*> res = ExtractorEvent::get_field_names();
 
diff --git a/src/network_inspectors/extractor/extractor_http.h b/src/network_inspectors/extractor/extractor_http.h
new file mode 100644 (file)
index 0000000..aa5b999
--- /dev/null
@@ -0,0 +1,44 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2024-2024 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// extractor_http.h author Maya Dagon <mdagon@cisco.com>
+
+#ifndef EXTRACTOR_HTTP_H
+#define EXTRACTOR_HTTP_H
+
+#include "extractors.h"
+
+class Field;
+
+class HttpExtractor : public ExtractorEvent
+{
+public:
+    using SubGetFn = const Field& (*) (const DataEvent*, const Packet*, const Flow*);
+    using SubField = DataField<const Field&, const DataEvent*, const Packet*, const Flow*>;
+
+    HttpExtractor(Extractor&, ExtractorLogger&, uint32_t tenant, const std::vector<std::string>& fields);
+
+    std::vector<const char*> get_field_names() const override;
+    void handle(DataEvent&, Flow*);
+
+private:
+    using Eot = Handler<HttpExtractor>;
+
+    std::vector<SubField> sub_fields;
+};
+
+#endif
index 68ec7c147caecb4f7d25529aa01b1ea7f29c5711..7da0b5ecf3ae307fb1195a67bf7a83b734b45d54 100644 (file)
@@ -76,3 +76,8 @@ void JsonExtractorLogger::add_field(const char* f, const snort::SfIp& v)
     v.ntop(buf);
     js.put(f, buf);
 }
+
+void JsonExtractorLogger::add_field(const char* f, bool v)
+{
+    v ? js.put_true(f) : js.put_false(f);
+}
index 857bed8d6e9935002f52df050fa5d29443ccc714..c009181c887dd0c472d8ad1f2e905d3f1a40d896 100644 (file)
@@ -42,6 +42,7 @@ public:
     void add_field(const char*, uint64_t) override;
     void add_field(const char*, struct timeval) override;
     void add_field(const char*, const snort::SfIp&) override;
+    void add_field(const char*, bool) override;
     void open_record() override;
     void close_record() override;
 
index a96642ae36056054d0eaa29b586c2858d3badba4..44e708de5795075ff477c081784e3bb96b8a2b15 100644 (file)
@@ -56,8 +56,6 @@ ExtractorLogger* ExtractorLogger::make_logger(FormatType f_type, OutputType o_ty
 
 #include <memory.h>
 
-using namespace snort;
-
 TEST_CASE("Format Type", "[extractor]")
 {
     SECTION("to string")
index f5b10869f568fb4b18b6ba5744b0521e3f105fdf..23b8a9ecc0b813659b473a59fd529d5c7738c410 100644 (file)
 
 #include "sfip/sf_ip.h"
 
+#include "extractor_enums.h"
 #include "extractor_writer.h"
 
-class FormatType
-{
-public:
-    enum Value : uint8_t
-    {
-        CSV,
-        JSON,
-        MAX
-    };
-
-    FormatType() = default;
-    constexpr FormatType(Value a) : v(a) {}
-    template<typename T> constexpr FormatType(T a) : v((Value)a) {}
-
-    constexpr operator Value() const { return v; }
-    explicit operator bool() const = delete;
-
-    const char* c_str() const
-    {
-        switch (v)
-        {
-        case CSV:
-            return "csv";
-        case JSON:
-            return "json";
-        case MAX: // fallthrough
-        default:
-            return "(not set)";
-        }
-    }
-
-private:
-    Value v = CSV;
-};
-
 class ExtractorLogger
 {
 public:
@@ -86,6 +52,7 @@ public:
     virtual void add_field(const char*, uint64_t) {}
     virtual void add_field(const char*, struct timeval) {}
     virtual void add_field(const char*, const snort::SfIp&) {}
+    virtual void add_field(const char*, bool) {}
 
     virtual void open_record() {}
     virtual void close_record() {}
index bfe3b04566ca89cc615a934987f76035654050e6..840c1eb982a345e75b5ecd3f0e052f95e18a00ef 100644 (file)
 
 #include "extractor_service.h"
 
-#include "framework/data_bus.h"
 #include "log/messages.h"
-#include "pub_sub/http_events.h"
 
 #include "extractor.h"
-#include "extractor_event_handlers.h"
+#include "extractor_ftp.h"
+#include "extractor_http.h"
 
 using namespace snort;
 
-
-//////////////////////////////////////////////////////////////////////
-////  ExtractorService
-//////////////////////////////////////////////////////////////////////
+//-------------------------------------------------------------------------
+// ExtractorService
+//-------------------------------------------------------------------------
 
 std::vector<std::string> ExtractorService::common_fields =
 {
@@ -48,16 +46,23 @@ std::vector<std::string> ExtractorService::common_fields =
     "pkt_num"
 };
 
-
 ExtractorService::ExtractorService(uint32_t tenant, const std::vector<std::string>& srv_fields,
-    const std::vector<std::string>& srv_events, const ServiceBlueprint& srv_bp,
-    ServiceType s_type, FormatType f_type, OutputType o_type) : tenant_id(tenant), sbp(srv_bp), type(s_type)
+    const std::vector<std::string>& srv_events, const ServiceBlueprint& srv_bp, ServiceType s_type,
+    FormatType f_type, OutputType o_type, Extractor& ins) : tenant_id(tenant), inspector(ins), sbp(srv_bp), type(s_type)
 {
     add_fields(srv_fields);
     add_events(srv_events);
     logger = ExtractorLogger::make_logger(f_type, o_type);
 }
 
+ExtractorService::~ExtractorService()
+{
+    for (auto h : handlers)
+        delete h;
+
+    delete logger;
+}
+
 void ExtractorService::add_events(const std::vector<std::string>& vals)
 {
     for (const auto& val : vals)
@@ -80,7 +85,8 @@ void ExtractorService::add_fields(const std::vector<std::string>& vals)
     }
 }
 
-ExtractorService* ExtractorService::make_service(const ServiceConfig& cfg, FormatType f_type, OutputType o_type)
+ExtractorService* ExtractorService::make_service(Extractor& ins, const ServiceConfig& cfg,
+    FormatType f_type, OutputType o_type)
 {
     if (cfg.on_events.empty())
     {
@@ -93,7 +99,11 @@ ExtractorService* ExtractorService::make_service(const ServiceConfig& cfg, Forma
     switch (cfg.service)
     {
     case ServiceType::HTTP:
-        srv = new HttpExtractorService(cfg.tenant_id, cfg.fields, cfg.on_events, cfg.service, f_type, o_type);
+        srv = new HttpExtractorService(cfg.tenant_id, cfg.fields, cfg.on_events, cfg.service, f_type, o_type, ins);
+        break;
+
+    case ServiceType::FTP:
+        srv = new FtpExtractorService(cfg.tenant_id, cfg.fields, cfg.on_events, cfg.service, f_type, o_type, ins);
         break;
 
     case ServiceType::UNDEFINED: // fallthrough
@@ -138,9 +148,9 @@ void ExtractorService::show(std::string& str) const
     str += " }";
 }
 
-//////////////////////////////////////////////////////////////////////
-////  HttpExtractorService
-//////////////////////////////////////////////////////////////////////
+//-------------------------------------------------------------------------
+//  HttpExtractorService
+//-------------------------------------------------------------------------
 
 ServiceBlueprint HttpExtractorService::blueprint =
 {
@@ -171,44 +181,107 @@ ServiceBlueprint HttpExtractorService::blueprint =
 };
 
 HttpExtractorService::HttpExtractorService(uint32_t tenant, const std::vector<std::string>& srv_fields,
-    const std::vector<std::string>& srv_events, ServiceType s_type, FormatType f_type, OutputType o_type)
-    : ExtractorService(tenant, srv_fields, srv_events, blueprint, s_type, f_type, o_type)
+    const std::vector<std::string>& srv_events, ServiceType s_type, FormatType f_type, OutputType o_type, Extractor& ins)
+    : ExtractorService(tenant, srv_fields, srv_events, blueprint, s_type, f_type, o_type, ins)
 {
     if (!logger)
         return;
 
     for (const auto& event : get_events())
     {
+        ExtractorEvent* eh;
+
         if (!strcmp("eot", event.c_str()))
-        {
-            auto eh = new HttpExtractorEventHandler(tenant_id, get_fields(), *logger);
+            eh = new HttpExtractor(ins, *logger, tenant_id, get_fields());
 
-            DataBus::subscribe(http_pub_key, HttpEventIds::END_OF_TRANSACTION, eh);
+        else
+            continue;
 
-            auto names_set = eh->get_field_names();
-            logger->set_fields(names_set);
-            logger->add_header();
-        }
+        auto names = eh->get_field_names();
+        logger->set_fields(names);
+        logger->add_header();
+        handlers.push_back(eh);
     }
 }
 
+//-------------------------------------------------------------------------
+//  FtpExtractorService
+//-------------------------------------------------------------------------
+
+ServiceBlueprint FtpExtractorService::blueprint =
+{
+    // events
+    {
+      "request",
+      "response",
+      "eot",
+    },
+    // fields
+    {
+      "command",
+      "arg",
+      "user",
+      "reply_code",
+      "reply_msg",
+      "file_size",
+      "data_channel.passive",
+      "data_channel.orig_h",
+      "data_channel.resp_h",
+      "data_channel.resp_p"
+    },
+};
+
+FtpExtractorService::FtpExtractorService(uint32_t tenant, const std::vector<std::string>& srv_fields,
+    const std::vector<std::string>& srv_events, ServiceType s_type, FormatType f_type, OutputType o_type, Extractor& ins)
+    : ExtractorService(tenant, srv_fields, srv_events, blueprint, s_type, f_type, o_type, ins)
+{
+    if (!logger)
+        return;
+
+    for (const auto& event : get_events())
+    {
+        ExtractorEvent* eh;
+
+        if (!strcmp("request", event.c_str()))
+            eh = new FtpRequestExtractor(ins, *logger, tenant_id, get_fields());
+
+        else if (!strcmp("response", event.c_str()))
+            eh = new FtpResponseExtractor(ins, *logger, tenant_id, get_fields());
+
+        else if (!strcmp("eot", event.c_str()))
+            eh = new FtpExtractor(ins, *logger, tenant_id, get_fields());
+
+        else
+            continue;
+
+        auto names = eh->get_field_names();
+        logger->set_fields(names);
+        logger->add_header();
+        handlers.push_back(eh);
+    }
+}
+
+//-------------------------------------------------------------------------
+//  Unit Tests
+//-------------------------------------------------------------------------
+
 #ifdef UNIT_TEST
 
 #include "catch/snort_catch.h"
 
 #include <memory.h>
 
-using namespace snort;
-
 TEST_CASE("Service Type", "[extractor]")
 {
     SECTION("to string")
     {
         ServiceType http = ServiceType::HTTP;
+        ServiceType ftp = ServiceType::FTP;
         ServiceType undef = ServiceType::UNDEFINED;
         ServiceType max = ServiceType::MAX;
 
         CHECK_FALSE(strcmp("http", http.c_str()));
+        CHECK_FALSE(strcmp("ftp", ftp.c_str()));
         CHECK_FALSE(strcmp("(not set)", undef.c_str()));
         CHECK_FALSE(strcmp("(not set)", max.c_str()));
     }
index 6725f462f13466e992f1af24c31b99f3a784bdcd..9ba24d3395b063d108a2fec433e11cdd5451ab12 100644 (file)
 #include <string>
 #include <vector>
 
+#include "extractor_enums.h"
 #include "extractor_logger.h"
 
+class Extractor;
 class ServiceConfig;
 
-class ServiceType
-{
-public:
-    enum Value : uint8_t
-    {
-        HTTP,
-        UNDEFINED,
-        MAX
-    };
-
-    ServiceType() = default;
-    constexpr ServiceType(Value a) : v(a) {}
-    template<typename T> constexpr ServiceType(T a) : v(Value(a)) {}
-
-    constexpr operator Value() const { return v; }
-    explicit operator bool() const = delete;
-
-    const char* c_str() const
-    {
-        switch (v)
-        {
-        case HTTP:
-            return "http";
-        case UNDEFINED: // fallthrough
-        case MAX:       // fallthrough
-        default:
-            return "(not set)";
-        }
-    }
-
-private:
-    Value v = UNDEFINED;
-};
+class ExtractorEvent;
 
 struct ServiceBlueprint
 {
@@ -68,19 +38,16 @@ struct ServiceBlueprint
     std::vector<std::string> supported_fields;
 };
 
-// FIXIT-P: make a template with Logger and Writer as parameters
 class ExtractorService
 {
 public:
-    static ExtractorService* make_service(const ServiceConfig&, FormatType, OutputType);
+    static ExtractorService* make_service(Extractor&, const ServiceConfig&, FormatType, OutputType);
 
     ExtractorService() = delete;
     ExtractorService(const ExtractorService&) = delete;
     ExtractorService& operator=(const ExtractorService&) = delete;
     ExtractorService(ExtractorService&&) = delete;
-
-    virtual ~ExtractorService()
-    { delete logger; }
+    virtual ~ExtractorService();
 
     void show(std::string&) const;
     uint32_t get_tenant() const { return tenant_id; }
@@ -89,7 +56,7 @@ public:
 
 protected:
     ExtractorService(uint32_t tenant, const std::vector<std::string>& fields, const std::vector<std::string>& events,
-        const ServiceBlueprint& srv_bp, ServiceType, FormatType, OutputType);
+        const ServiceBlueprint& srv_bp, ServiceType, FormatType, OutputType, Extractor&);
     void add_events(const std::vector<std::string>& vals);
     void add_fields(const std::vector<std::string>& vals);
     bool find_event(const std::string&) const;
@@ -102,6 +69,8 @@ protected:
     std::vector<std::string> events;
 
     ExtractorLogger* logger = nullptr;
+    Extractor& inspector;
+    std::vector<ExtractorEvent*> handlers;
 
     const ServiceBlueprint& sbp;
     const ServiceType type;
@@ -111,7 +80,17 @@ class HttpExtractorService : public ExtractorService
 {
 public:
     HttpExtractorService(uint32_t tenant, const std::vector<std::string>& fields,
-    const std::vector<std::string>& events, ServiceType, FormatType, OutputType);
+        const std::vector<std::string>& events, ServiceType, FormatType, OutputType, Extractor&);
+
+private:
+    static ServiceBlueprint blueprint;
+};
+
+class FtpExtractorService : public ExtractorService
+{
+public:
+    FtpExtractorService(uint32_t tenant, const std::vector<std::string>& fields,
+        const std::vector<std::string>& events, ServiceType, FormatType, OutputType, Extractor&);
 
 private:
     static ServiceBlueprint blueprint;
index 4f9da48c974e55dc15954ea82e97d7dac184e507..a5eee7d64e9b5fd157b20c677161f8f6e042a58a 100644 (file)
@@ -77,8 +77,6 @@ void StdExtractorWriter::unlock()
 
 #include <memory.h>
 
-using namespace snort;
-
 TEST_CASE("Output Type", "[extractor]")
 {
     SECTION("to string")
index 6b234b19aa319095e70673656be054763e596322..4ac441e6bfc2d7f5bd9c448fc615bda5d51afff4 100644 (file)
 #define EXTRACTOR_WRITER_H
 
 #include <mutex>
-#include <string>
 
 #include "log/text_log.h"
 #include "main/snort_types.h"
 
-class OutputType
-{
-public:
-    enum Value : uint8_t
-    {
-        STD,
-        MAX
-    };
-
-    OutputType() = default;
-    constexpr OutputType(Value a) : v(a) {}
-    template<typename T> constexpr OutputType(T a) : v((Value)a) {}
-
-    constexpr operator Value() const { return v; }
-    explicit operator bool() const = delete;
-
-    const char* c_str() const
-    {
-        switch (v)
-        {
-        case STD:
-            return "stdout";
-        case MAX: // fallthrough
-        default:
-            return "(not set)";
-        }
-    }
-
-private:
-    Value v = STD;
-};
+#include "extractor_enums.h"
 
 class ExtractorWriter
 {
diff --git a/src/network_inspectors/extractor/extractors.cc b/src/network_inspectors/extractor/extractors.cc
new file mode 100644 (file)
index 0000000..1db66b8
--- /dev/null
@@ -0,0 +1,67 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2024-2024 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// extractors.cc author Cisco
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "extractors.h"
+
+using namespace std;
+
+vector<const char*> ExtractorEvent::get_field_names() const
+{
+    vector<const char*> res;
+
+    for (auto& f : nts_fields)
+        res.push_back(f.name);
+
+    for (auto& f : sip_fields)
+        res.push_back(f.name);
+
+    for (auto& f : num_fields)
+        res.push_back(f.name);
+
+    for (auto& f : buf_fields)
+        res.push_back(f.name);
+
+    for (auto& f : str_fields)
+        res.push_back(f.name);
+
+    return res;
+}
+
+const std::map<std::string, ExtractorEvent::NtsGetFn> ExtractorEvent::nts_getters =
+{
+    {"ts", get_timestamp},
+};
+
+const std::map<std::string, ExtractorEvent::SipGetFn> ExtractorEvent::sip_getters =
+{
+    {"id.orig_h", get_ip_src},
+    {"id.resp_h", get_ip_dst},
+};
+
+const std::map<std::string, ExtractorEvent::NumGetFn> ExtractorEvent::num_getters =
+{
+    {"id.orig_p", ExtractorEvent::get_ip_src_port},
+    {"id.resp_p", ExtractorEvent::get_ip_dst_port},
+    {"uid", ExtractorEvent::get_uid},
+    {"pkt_num", ExtractorEvent::get_pkt_num}
+};
diff --git a/src/network_inspectors/extractor/extractors.h b/src/network_inspectors/extractor/extractors.h
new file mode 100644 (file)
index 0000000..9f5dcdb
--- /dev/null
@@ -0,0 +1,153 @@
+//--------------------------------------------------------------------------
+// Copyright (C) 2024-2024 Cisco and/or its affiliates. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License Version 2 as published
+// by the Free Software Foundation.  You may not use, modify or distribute
+// this program under any other version of the GNU General Public License.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//--------------------------------------------------------------------------
+// extractors.h author Maya Dagon <mdagon@cisco.com>
+
+#ifndef EXTRACTORS_H
+#define EXTRACTORS_H
+
+#include <sys/time.h>
+#include <vector>
+
+#include "detection/detection_engine.h"
+#include "flow/flow_key.h"
+#include "framework/data_bus.h"
+#include "sfip/sf_ip.h"
+
+#include "extractor_logger.h"
+
+class Extractor;
+
+template <typename Ret, class... Context>
+struct DataField
+{
+    DataField(const char* name, Ret (*get)(Context...)) : name(name), get(get) { }
+
+    const char* name;
+    Ret (*get)(Context...);
+};
+
+class ExtractorEvent
+{
+public:
+    using DataEvent = snort::DataEvent;
+    using DataHandler = snort::DataHandler;
+    using Flow = snort::Flow;
+    using Packet = snort::Packet;
+    using SfIp = snort::SfIp;
+
+    using BufGetFn = const char* (*) (const DataEvent*, const Packet*, const Flow*);
+    using BufField = DataField<const char*, const DataEvent*, const Packet*, const Flow*>;
+    using SipGetFn = const SfIp& (*) (const DataEvent*, const Packet*, const Flow*);
+    using SipField = DataField<const SfIp&, const DataEvent*, const Packet*, const Flow*>;
+    using NumGetFn = uint64_t (*) (const DataEvent*, const Packet*, const Flow*);
+    using NumField = DataField<uint64_t, const DataEvent*, const Packet*, const Flow*>;
+    using NtsGetFn = struct timeval (*) (const DataEvent*, const Packet*, const Flow*);
+    using NtsField = DataField<struct timeval, const DataEvent*, const Packet*, const Flow*>;
+    using StrGetFn = std::pair<const char*, uint16_t> (*) (const DataEvent*, const Packet*, const Flow*);
+    using StrField = DataField<std::pair<const char*, uint16_t>, const DataEvent*, const Packet*, const Flow*>;
+
+    static snort::FlowHashKeyOps& get_hash()
+    {
+        static thread_local snort::FlowHashKeyOps flow_key_ops(0);
+        return flow_key_ops;
+    }
+
+    virtual ~ExtractorEvent() {}
+
+    Extractor& get_inspector() const { return inspector; }
+    virtual std::vector<const char*> get_field_names() const;
+
+    void handle(DataEvent&, Flow*) {}
+
+protected:
+    template<typename T>
+    struct Handler : public DataHandler
+    {
+        Handler(T& owner, const char* name) : DataHandler(name), owner(owner) {}
+        void handle(DataEvent& e, Flow* f) override { owner.handle(e, f); }
+        T& owner;
+    };
+
+    static struct timeval get_timestamp(const DataEvent*, const Packet* p, const Flow*)
+    { return p->pkth->ts; }
+
+    static const SfIp& get_ip_src(const DataEvent*, const Packet*, const Flow* flow)
+    { return flow->flags.client_initiated ? flow->client_ip : flow->server_ip; }
+
+    static const SfIp& get_ip_dst(const DataEvent*, const Packet*, const Flow* flow)
+    { return flow->flags.client_initiated ? flow->server_ip : flow->client_ip; }
+
+    static uint64_t get_ip_src_port(const DataEvent*, const Packet*, const Flow* flow)
+    { return flow->client_port; }
+
+    static uint64_t get_ip_dst_port(const DataEvent*, const Packet*, const Flow* flow)
+    { return flow->server_port; }
+
+    static uint64_t get_pkt_num(const DataEvent*, const Packet* p, const Flow*)
+    { return p->context->packet_number; }
+
+    static uint64_t get_uid(const DataEvent*, const Packet*, const Flow* flow)
+    { return ExtractorEvent::get_hash().do_hash((const unsigned char*)flow->key, 0); }
+
+    template<typename T, class... Context>
+    void log(const T& fields, Context... context)
+    {
+        for (const auto& f : fields)
+            logger.add_field(f.name, f.get(context...));
+    }
+
+    void log(const std::vector<StrField>& fields, DataEvent* event, Packet* pkt, Flow* flow, bool strict)
+    {
+        for (const auto& f : fields)
+        {
+            const auto& str = f.get(event, pkt, flow);
+            if (str.second > 0)
+                logger.add_field(f.name, (const char*)str.first, str.second);
+            else if (strict)
+                logger.add_field(f.name, "");
+        }
+    }
+
+    template<class T, class U, class V>
+    bool append(T& cont, const U& map, const V& key)
+    {
+        auto it = map.find(key);
+        if (it != map.end())
+            cont.emplace_back(it->first.c_str(), it->second);
+        return it != map.end();
+    }
+
+    ExtractorEvent(Extractor& i, ExtractorLogger& l, uint32_t tid)
+        : tenant_id(tid), logger(l), inspector(i) { }
+
+    uint32_t tenant_id;
+    ExtractorLogger& logger;
+    Extractor& inspector;
+
+    std::vector<NtsField> nts_fields;
+    std::vector<SipField> sip_fields;
+    std::vector<NumField> num_fields;
+    std::vector<BufField> buf_fields;
+    std::vector<StrField> str_fields;
+
+    static const std::map<std::string, ExtractorEvent::NtsGetFn> nts_getters;
+    static const std::map<std::string, ExtractorEvent::SipGetFn> sip_getters;
+    static const std::map<std::string, ExtractorEvent::NumGetFn> num_getters;
+};
+
+#endif
index 1d0fab6c6a83da12a8bc6531dac2a5098d73a9a9..9bf267262673dcc4bd36e7f1c0b0462df12c2689 100644 (file)
@@ -77,6 +77,12 @@ public:
     const snort::SfIp& get_server_ip() const
     { return session.serverIP; }
 
+    int8_t get_mode() const
+    { return session.mode; }
+
+    bool is_passive() const
+    {  return session.mode == FTPP_XFER_PASSIVE ? true : false; }
+
 private:
     const FTP_SESSION& session;
 };
index 67409cd7d74eeab0a0c4a888058bd23b2e3bccb6..b10625bc06ce18ec96a604e7c87d08373f70c854 100644 (file)
@@ -405,6 +405,7 @@ static inline int FTPResetsession(FTP_SESSION* Ftpsession)
     Ftpsession->clientPort = 0;
     Ftpsession->serverIP.clear();
     Ftpsession->serverPort = 0;
+    Ftpsession->mode = FTPP_XFER_NOT_SET;
     Ftpsession->data_chan_state = NO_STATE;
     Ftpsession->data_chan_index = -1;
     Ftpsession->data_xfer_index = -1;
index f5b400e5b25d5190a807331ed44940a6a49a7b6e..9f7753a6c87ea1fb9bbd9dd82b8afe1dfdfda53e 100644 (file)
@@ -167,6 +167,7 @@ struct FTP_SESSION
     uint16_t clientPort;
     snort::SfIp serverIP;
     uint16_t serverPort;
+    int8_t mode;
 
     /* A file is being transferred on ftp-data channel */
     char* filename;
@@ -198,6 +199,7 @@ public:
 /* FTP-Data Transfer Modes */
 enum
 {
+    FTPP_XFER_NOT_SET = -1,
     FTPP_XFER_PASSIVE = 0,
     FTPP_XFER_ACTIVE  = 1
 };
index 01468c5bb1b254e6d7277fdb764456fa61327e18..1820d2e81ee7372906fd978a05644d47148f2905 100644 (file)
@@ -1082,6 +1082,7 @@ static int do_stateful_checks(FTP_SESSION* session, Packet* p,
                                 int result;
                                 /* This is a passive data transfer */
                                 ftpdata->mode = FTPP_XFER_PASSIVE;
+                                session->mode = FTPP_XFER_PASSIVE;
                                 ftpdata->data_chan = session->server_conf->data_chan;
                                 if (session->flags & FTP_FLG_MALWARE)
                                     session->datassn = ftpdata;
@@ -1192,6 +1193,7 @@ static int do_stateful_checks(FTP_SESSION* session, Packet* p,
                             int result;
                             /* This is a active data transfer */
                             ftpdata->mode = FTPP_XFER_ACTIVE;
+                            session->mode = FTPP_XFER_ACTIVE;
                             ftpdata->data_chan = session->server_conf->data_chan;
                             if (session->flags & FTP_FLG_MALWARE)
                                 session->datassn = ftpdata;
@@ -1603,6 +1605,7 @@ int check_ftp(FTP_SESSION* ftpssn, Packet* p, int iMode)
                     {
                         req->cmd_begin = nullptr;
                         req->cmd_end = nullptr;
+                        req->cmd_size = 0;
                         if (*read_ptr != SP && read_ptr != p->data)
                             read_ptr--;
                         state = FTP_RESPONSE_CONT;