From: Oleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) Date: Tue, 6 Sep 2022 13:49:49 +0000 (+0000) Subject: Pull request #3575: sd_pattern: add and improve built-in patterns X-Git-Tag: 3.1.41.0~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=11006919f537946ce78e82e5d2579da9327f8524;p=thirdparty%2Fsnort3.git Pull request #3575: sd_pattern: add and improve built-in patterns Merge in SNORT/snort3 from ~VHORBATO/snort3:sd_new_patterns to master Squashed commit of the following: commit 7671add3259b33398e783c5b58c3c262737824f4 Author: Vitalii Date: Fri Aug 26 19:58:51 2022 +0300 sd_pattern: add and improve built-in patterns --- diff --git a/doc/user/sensitive_data.txt b/doc/user/sensitive_data.txt index ce7a599c9..391b6c7e2 100644 --- a/doc/user/sensitive_data.txt +++ b/doc/user/sensitive_data.txt @@ -1,7 +1,7 @@ The `sd_pattern` IPS option provides detection and filtering of Personally Identifiable Information (PII). This information includes credit card -numbers, U.S. Social Security numbers, and email addresses. A rich regular -expression syntax is available for defining your own PII. +numbers, U.S. Social Security numbers, phone numbers, and email addresses. +A rich regular expression syntax is available for defining your own PII. ==== Hyperscan @@ -20,24 +20,35 @@ overhead. The Rule option takes the following syntax. ===== Pattern Pattern is the most important and is the only required parameter to -`sd_pattern`. It supports 3 built in patterns which are configured by name: -"credit_card", "us_social" and "us_social_nodashes", as well as user -defined regular expressions of the Hyperscan dialect (see +`sd_pattern`. It supports 5 built-in patterns which are configured by name: +"credit_card", "us_social", "us_social_nodashes", "email", and "us_phone" as +well as user defined regular expressions of the Hyperscan dialect (see https://intel.github.io/hyperscan/dev-reference/compilation.html#pattern-support). sd_pattern:"credit_card"; -When configured, Snort will replace the pattern 'credit_card' with the built in +When configured, Snort will replace the pattern 'credit_card' with the built-in pattern. In addition to pattern matching, Snort will validate that the matched -digits will pass the Luhn-check algorithm. Currently the only pattern that -performs extra verification. +digits will pass the Luhn-check algorithm. sd_pattern:"us_social"; sd_pattern:"us_social_nodashes"; -These special patterns will also be replaced with a built in pattern. +These special patterns will also be replaced with a built-in pattern. Naturally, "us_social" is a pattern of 9 digits separated by `-`'s in the -canonical form. +canonical form. For this pattern, some validation of compliance with the +Social Security Numbers randomization rules is also performed. + + sd_pattern:"email"; + +This pattern will be replaced with a built-in pattern created to match email. +The regex implements the “preferred” syntax from RFC 1035 which is one of the +recommendations in RFC 5322. + + sd_pattern:"us_phone"; + +This pattern will match U.S. phone numbers in different formats with or without +country code. sd_pattern:"\b\w+@ourdomain\.com\b" @@ -55,7 +66,7 @@ correctly formatted emails. ===== Threshold -Threshold is an optional parameter allowing you to change built in default +Threshold is an optional parameter allowing you to change built-in default value (default value is '1'). The following two instances are identical. The first will assume the default value of '1' the second declaration explicitly sets the threshold to '1'. @@ -71,12 +82,12 @@ This example requires 300 matches of the pattern "This is a string literal" to qualify as a positive match. That is, if the string only occurred 299 times in a packet, you will not see an event. -===== Obfuscating Credit Cards and Social Security Numbers +===== Obfuscating built-in patterns -Snort provides discreet logging for the built in patterns "credit_card", -"us_social" and "us_social_nodashes". Enabling `ips.obfuscate_pii` makes -Snort obfuscate the suspect packet payload which was matched by the -patterns. This configuration is disabled by default. +Snort provides discreet logging for the built-in patterns "credit_card", +"us_social", "us_social_nodashes", "us_phone" and "email". Enabling +`ips.obfuscate_pii` makes Snort obfuscate the suspect packet payload which +was matched by the patterns. This configuration is disabled by default. ips = { @@ -109,7 +120,7 @@ Logged output when running Snort in "cmg" alert format. 2. Log obfuscation is only applicable to CMG and Unified2 logging formats. 3. Log obfuscation doesn't support user defined PII patterns. It is -currently only supported for the built in patterns for Credit Cards and US +currently only supported for the built-in patterns for Credit Cards and U.S. Social Security numbers. 4. Log obfuscation doesn't work with stream rebuilt packet payloads. (This diff --git a/src/ips_options/ips_sd_pattern.cc b/src/ips_options/ips_sd_pattern.cc index cf1789a62..17f853e57 100644 --- a/src/ips_options/ips_sd_pattern.cc +++ b/src/ips_options/ips_sd_pattern.cc @@ -23,6 +23,7 @@ #endif #include +#include #include #include @@ -47,9 +48,11 @@ using namespace snort; #define s_name "sd_pattern" #define s_help "rule option for detecting sensitive data" -#define SD_SOCIAL_PATTERN R"(\d{3}-\d{2}-\d{4})" -#define SD_SOCIAL_NODASHES_PATTERN R"(\d{9})" +#define SD_SOCIAL_PATTERN R"([0-8]\d{2}-\d{2}-\d{4})" +#define SD_SOCIAL_NODASHES_PATTERN R"([0-8]\d{8})" #define SD_CREDIT_PATTERN_ALL R"(\d{4}\D?\d{4}\D?\d{2}\D?\d{2}\D?\d{3,4})" +#define SD_EMAIL_PATTERN R"([a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+)*@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)" +#define SD_US_PHONE_PATTERN R"((?:\+?1[-\.\s]?)?\(?([2-9][0-8]\d)\)?[-\.\s]([2-9]\d{2})[-\.\s](\d{4}))" static HyperScratchAllocator* scratcher = nullptr; @@ -103,6 +106,56 @@ struct SdPatternConfig static THREAD_LOCAL ProfileStats sd_pattern_perf_stats; +//------------------------------------------------------------------------- +// SSN validation functions +//------------------------------------------------------------------------- + +#pragma pack(push, 0) + +struct ssn_no_dashes +{ + char area[3]; + char group[2]; + char serial[4]; +}; + +struct ssn_with_dashes +{ + char area[3]; + char dash_1; + char group[2]; + char dash_2; + char serial[4]; +}; + +#pragma pack(pop) + +static int validate_us_ssn_nodashes(const uint8_t* buf, unsigned long long len) +{ + if (len != sizeof(ssn_no_dashes)) + return false; + + const ssn_no_dashes* ssn = (const ssn_no_dashes*)buf; + + return strncmp(ssn->area, "000", 3) + and strncmp(ssn->area, "666", 3) + and strncmp(ssn->group, "00", 2) + and strncmp(ssn->serial, "0000", 4); +} + +static int validate_us_ssn(const uint8_t* buf, unsigned long long len) +{ + if (len != sizeof(ssn_with_dashes)) + return false; + + const ssn_with_dashes* ssn = (const ssn_with_dashes*)buf; + + return strncmp(ssn->area, "000", 3) + and strncmp(ssn->area, "666", 3) + and strncmp(ssn->group, "00", 2) + and strncmp(ssn->serial, "0000", 4); +} + //------------------------------------------------------------------------- // option //------------------------------------------------------------------------- @@ -201,6 +254,8 @@ struct hsContext return left and right; } + unsigned long long last_match_from_pos = ULLONG_MAX; + unsigned long long last_match_to_pos = 0; unsigned int count = 0; SdPatternConfig config; @@ -227,7 +282,15 @@ static int hs_match(unsigned int /*id*/, unsigned long long from, if ( ctx->config.validate && ctx->config.validate(ctx->buf+from, len) != 1 ) return 0; - ctx->count++; + if (from >= ctx->last_match_to_pos) + { + ctx->last_match_from_pos = from; + ctx->count++; + } + else if (from != ctx->last_match_from_pos) + return 0; + + ctx->last_match_to_pos = to; IpsPolicy* p = get_ips_policy(); @@ -362,12 +425,25 @@ bool SdPatternModule::end(const char*, int, SnortConfig*) else if (config.pii == "us_social") { config.pii = SD_SOCIAL_PATTERN; + config.validate = validate_us_ssn; config.can_be_obfuscated = true; config.forced_boundary = true; } else if (config.pii == "us_social_nodashes") { config.pii = SD_SOCIAL_NODASHES_PATTERN; + config.validate = validate_us_ssn_nodashes; + config.can_be_obfuscated = true; + config.forced_boundary = true; + } + else if (config.pii == "email") + { + config.pii = SD_EMAIL_PATTERN; + config.can_be_obfuscated = true; + } + else if (config.pii == "us_phone") + { + config.pii = SD_US_PHONE_PATTERN; config.can_be_obfuscated = true; config.forced_boundary = true; } diff --git a/src/log/obfuscator.h b/src/log/obfuscator.h index 3ef60bd0d..d9370b037 100644 --- a/src/log/obfuscator.h +++ b/src/log/obfuscator.h @@ -57,7 +57,15 @@ public: using iterator = ObSet::iterator; void push(uint32_t offset, uint32_t length) - { blocks.emplace(offset, length); } + { + const auto push_res = blocks.emplace(offset, length); + + if (!push_res.second and length > push_res.first->length) + { + blocks.erase(push_res.first); + blocks.emplace(offset, length); + } + } const_iterator begin() const { return blocks.cbegin(); }