The `sd_pattern` IPS option provides detection and filtering of Personally
Identifiable Information (PII). This information includes credit card
-numbers, U.S. Social Security numbers, and email addresses. A rich regular
-expression syntax is available for defining your own PII.
+numbers, U.S. Social Security numbers, phone numbers, and email addresses.
+A rich regular expression syntax is available for defining your own PII.
==== Hyperscan
===== Pattern
Pattern is the most important and is the only required parameter to
-`sd_pattern`. It supports 3 built in patterns which are configured by name:
-"credit_card", "us_social" and "us_social_nodashes", as well as user
-defined regular expressions of the Hyperscan dialect (see
+`sd_pattern`. It supports 5 built-in patterns which are configured by name:
+"credit_card", "us_social", "us_social_nodashes", "email", and "us_phone" as
+well as user defined regular expressions of the Hyperscan dialect (see
https://intel.github.io/hyperscan/dev-reference/compilation.html#pattern-support).
sd_pattern:"credit_card";
-When configured, Snort will replace the pattern 'credit_card' with the built in
+When configured, Snort will replace the pattern 'credit_card' with the built-in
pattern. In addition to pattern matching, Snort will validate that the matched
-digits will pass the Luhn-check algorithm. Currently the only pattern that
-performs extra verification.
+digits will pass the Luhn-check algorithm.
sd_pattern:"us_social";
sd_pattern:"us_social_nodashes";
-These special patterns will also be replaced with a built in pattern.
+These special patterns will also be replaced with a built-in pattern.
Naturally, "us_social" is a pattern of 9 digits separated by `-`'s in the
-canonical form.
+canonical form. For this pattern, some validation of compliance with the
+Social Security Numbers randomization rules is also performed.
+
+ sd_pattern:"email";
+
+This pattern will be replaced with a built-in pattern created to match email.
+The regex implements the “preferred” syntax from RFC 1035 which is one of the
+recommendations in RFC 5322.
+
+ sd_pattern:"us_phone";
+
+This pattern will match U.S. phone numbers in different formats with or without
+country code.
sd_pattern:"\b\w+@ourdomain\.com\b"
===== Threshold
-Threshold is an optional parameter allowing you to change built in default
+Threshold is an optional parameter allowing you to change built-in default
value (default value is '1'). The following two instances are identical.
The first will assume the default value of '1' the second declaration
explicitly sets the threshold to '1'.
to qualify as a positive match. That is, if the string only occurred 299 times
in a packet, you will not see an event.
-===== Obfuscating Credit Cards and Social Security Numbers
+===== Obfuscating built-in patterns
-Snort provides discreet logging for the built in patterns "credit_card",
-"us_social" and "us_social_nodashes". Enabling `ips.obfuscate_pii` makes
-Snort obfuscate the suspect packet payload which was matched by the
-patterns. This configuration is disabled by default.
+Snort provides discreet logging for the built-in patterns "credit_card",
+"us_social", "us_social_nodashes", "us_phone" and "email". Enabling
+`ips.obfuscate_pii` makes Snort obfuscate the suspect packet payload which
+was matched by the patterns. This configuration is disabled by default.
ips =
{
2. Log obfuscation is only applicable to CMG and Unified2 logging formats.
3. Log obfuscation doesn't support user defined PII patterns. It is
-currently only supported for the built in patterns for Credit Cards and US
+currently only supported for the built-in patterns for Credit Cards and U.S.
Social Security numbers.
4. Log obfuscation doesn't work with stream rebuilt packet payloads. (This
#endif
#include <cctype>
+#include <climits>
#include <hs_compile.h>
#include <hs_runtime.h>
#define s_name "sd_pattern"
#define s_help "rule option for detecting sensitive data"
-#define SD_SOCIAL_PATTERN R"(\d{3}-\d{2}-\d{4})"
-#define SD_SOCIAL_NODASHES_PATTERN R"(\d{9})"
+#define SD_SOCIAL_PATTERN R"([0-8]\d{2}-\d{2}-\d{4})"
+#define SD_SOCIAL_NODASHES_PATTERN R"([0-8]\d{8})"
#define SD_CREDIT_PATTERN_ALL R"(\d{4}\D?\d{4}\D?\d{2}\D?\d{2}\D?\d{3,4})"
+#define SD_EMAIL_PATTERN R"([a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+)*@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)"
+#define SD_US_PHONE_PATTERN R"((?:\+?1[-\.\s]?)?\(?([2-9][0-8]\d)\)?[-\.\s]([2-9]\d{2})[-\.\s](\d{4}))"
static HyperScratchAllocator* scratcher = nullptr;
static THREAD_LOCAL ProfileStats sd_pattern_perf_stats;
+//-------------------------------------------------------------------------
+// SSN validation functions
+//-------------------------------------------------------------------------
+
+#pragma pack(push, 0)
+
+struct ssn_no_dashes
+{
+ char area[3];
+ char group[2];
+ char serial[4];
+};
+
+struct ssn_with_dashes
+{
+ char area[3];
+ char dash_1;
+ char group[2];
+ char dash_2;
+ char serial[4];
+};
+
+#pragma pack(pop)
+
+static int validate_us_ssn_nodashes(const uint8_t* buf, unsigned long long len)
+{
+ if (len != sizeof(ssn_no_dashes))
+ return false;
+
+ const ssn_no_dashes* ssn = (const ssn_no_dashes*)buf;
+
+ return strncmp(ssn->area, "000", 3)
+ and strncmp(ssn->area, "666", 3)
+ and strncmp(ssn->group, "00", 2)
+ and strncmp(ssn->serial, "0000", 4);
+}
+
+static int validate_us_ssn(const uint8_t* buf, unsigned long long len)
+{
+ if (len != sizeof(ssn_with_dashes))
+ return false;
+
+ const ssn_with_dashes* ssn = (const ssn_with_dashes*)buf;
+
+ return strncmp(ssn->area, "000", 3)
+ and strncmp(ssn->area, "666", 3)
+ and strncmp(ssn->group, "00", 2)
+ and strncmp(ssn->serial, "0000", 4);
+}
+
//-------------------------------------------------------------------------
// option
//-------------------------------------------------------------------------
return left and right;
}
+ unsigned long long last_match_from_pos = ULLONG_MAX;
+ unsigned long long last_match_to_pos = 0;
unsigned int count = 0;
SdPatternConfig config;
if ( ctx->config.validate && ctx->config.validate(ctx->buf+from, len) != 1 )
return 0;
- ctx->count++;
+ if (from >= ctx->last_match_to_pos)
+ {
+ ctx->last_match_from_pos = from;
+ ctx->count++;
+ }
+ else if (from != ctx->last_match_from_pos)
+ return 0;
+
+ ctx->last_match_to_pos = to;
IpsPolicy* p = get_ips_policy();
else if (config.pii == "us_social")
{
config.pii = SD_SOCIAL_PATTERN;
+ config.validate = validate_us_ssn;
config.can_be_obfuscated = true;
config.forced_boundary = true;
}
else if (config.pii == "us_social_nodashes")
{
config.pii = SD_SOCIAL_NODASHES_PATTERN;
+ config.validate = validate_us_ssn_nodashes;
+ config.can_be_obfuscated = true;
+ config.forced_boundary = true;
+ }
+ else if (config.pii == "email")
+ {
+ config.pii = SD_EMAIL_PATTERN;
+ config.can_be_obfuscated = true;
+ }
+ else if (config.pii == "us_phone")
+ {
+ config.pii = SD_US_PHONE_PATTERN;
config.can_be_obfuscated = true;
config.forced_boundary = true;
}
using iterator = ObSet::iterator;
void push(uint32_t offset, uint32_t length)
- { blocks.emplace(offset, length); }
+ {
+ const auto push_res = blocks.emplace(offset, length);
+
+ if (!push_res.second and length > push_res.first->length)
+ {
+ blocks.erase(push_res.first);
+ blocks.emplace(offset, length);
+ }
+ }
const_iterator begin() const
{ return blocks.cbegin(); }