+++ /dev/null
-# HTML Fuzzy Hashing Configuration Example
-#
-# This configuration demonstrates how to use HTML fuzzy hashing for:
-# 1. Detecting spam campaigns with similar HTML structure
-# 2. Phishing detection (similar structure, different CTA domains)
-# 3. Brand protection (legitimate templates vs. fake emails)
-
-fuzzy_check {
- # Example rule for HTML structure matching
- rule "HTML_FUZZY" {
- # Standard fuzzy storage configuration
- servers = "localhost:11335";
-
- # Encryption (optional, recommended for production)
- # encryption_key = "your_base32_encoded_public_key";
- # fuzzy_key = "your_hashing_key";
- # fuzzy_shingles_key = "your_shingles_key";
-
- # Algorithm for shingles (mumhash recommended for HTML)
- algorithm = "mumhash";
-
- # Enable HTML fuzzy hashing
- html_shingles = true;
-
- # Minimum number of HTML tags to generate hash
- # (prevents hashing of trivial HTML snippets)
- min_html_tags = 15;
-
- # Weight multiplier for HTML fuzzy matches
- # Can be < 1.0 to reduce impact, or > 1.0 to increase
- html_weight = 1.0;
-
- # Regular fuzzy check settings
- symbol = "FUZZY_HTML";
- max_score = 20.0;
-
- # Fuzzy flag mappings
- fuzzy_map = {
- # Whitelist: known legitimate HTML structures
- "FUZZY_HTML_WHITELIST" {
- flag = 1;
- max_score = 20.0;
- }
- # Blacklist: known spam/phishing HTML structures
- "FUZZY_HTML_BLACKLIST" {
- flag = 2;
- max_score = 20.0;
- }
- }
-
- # Optional: skip specific hashes
- # skip_hashes = "${LOCAL_CONFDIR}/local.d/fuzzy_skip_html.map";
- }
-
- # Example: Combined text + HTML rule
- rule "COMBINED_FUZZY" {
- servers = "localhost:11335";
- algorithm = "mumhash";
-
- # Enable both text and HTML fuzzy hashing
- html_shingles = true;
- min_html_tags = 10;
-
- # This rule will generate:
- # - Text fuzzy hashes (from content)
- # - HTML fuzzy hashes (from structure)
- # Both sent to same storage with same flag
-
- symbol = "FUZZY_COMBINED";
- max_score = 30.0;
-
- fuzzy_map = {
- "FUZZY_COMBINED_WHITE" {
- flag = 10;
- max_score = 30.0;
- }
- "FUZZY_COMBINED_SPAM" {
- flag = 11;
- max_score = 30.0;
- }
- }
- }
-
- # Example: Phishing detection rule (higher weight for HTML)
- rule "PHISHING_DETECTION" {
- servers = "localhost:11335";
- algorithm = "mumhash";
-
- html_shingles = true;
- min_html_tags = 20;
-
- # Higher weight for HTML matches = prioritize structure over content
- html_weight = 1.5;
-
- symbol = "FUZZY_PHISHING";
- max_score = 25.0;
-
- fuzzy_map = {
- # Known phishing HTML templates
- "FUZZY_PHISHING_HTML" {
- flag = 20;
- max_score = 25.0;
- }
- # Known legitimate brands (for comparison)
- "FUZZY_LEGIT_BRANDS" {
- flag = 21;
- max_score = -25.0; # Negative score = whitelist
- }
- }
- }
-}
-
-# Additional configuration for phishing detection rules
-# See rules/fuzzy_html_phishing.lua for Lua-based detection logic
scan_archives = true,
short_text_direct_hash = true,
text_shingles = true,
+ text_hashes = true,
skip_images = false,
}
}
scan_archives = ts.boolean,
short_text_direct_hash = ts.boolean,
text_shingles = ts.boolean,
+ text_hashes = ts.boolean,
skip_images = ts.boolean,
}
local policy_schema = ts.shape(schema_fields)
local id = part:get_id()
lua_util.debugm(N, task, 'check text part %s', id)
+
+ if rule.text_hashes == false then
+ lua_util.debugm(N, task, 'text hashes disabled, relying on HTML for part %s', id)
+ return rule.html_shingles == true, false
+ end
+
local wcnt = text:get_words_count()
if rule.text_shingles then
gboolean no_share;
gboolean no_subject;
gboolean html_shingles; /* Enable HTML fuzzy hashing */
+ gboolean text_hashes; /* Enable/disable generation of text hashes */
unsigned int min_html_tags; /* Minimum tags for HTML hash */
int learn_condition_cb;
uint32_t retransmits;
rule->weight_threshold = NAN;
rule->html_weight = 1.0;
rule->html_shingles = FALSE;
+ rule->text_hashes = TRUE;
rule->min_html_tags = 10;
return rule;
rule->weight_threshold = ucl_object_todouble(value);
}
+ if ((value = ucl_object_lookup(obj, "text_hashes")) != NULL) {
+ rule->text_hashes = ucl_obj_toboolean(value);
+ }
+
if ((value = ucl_object_lookup(obj, "html_shingles")) != NULL) {
rule->html_shingles = ucl_object_toboolean(value);
}
0,
"true",
0);
+ rspamd_rcl_add_doc_by_path(cfg,
+ "fuzzy_check.rule",
+ "Enable hashing of text content (set to false to disable text hashes)",
+ "text_hashes",
+ UCL_BOOLEAN,
+ NULL,
+ 0,
+ "true",
+ 0);
+ rspamd_rcl_add_doc_by_path(cfg,
+ "fuzzy_check.rule",
+ "Enable HTML structure hashing for this rule",
+ "html_shingles",
+ UCL_BOOLEAN,
+ NULL,
+ 0,
+ "false",
+ 0);
+ rspamd_rcl_add_doc_by_path(cfg,
+ "fuzzy_check.rule",
+ "Minimum number of HTML tags required to generate HTML hashes",
+ "min_html_tags",
+ UCL_INT,
+ NULL,
+ 0,
+ NULL,
+ 0);
+ rspamd_rcl_add_doc_by_path(cfg,
+ "fuzzy_check.rule",
+ "Multiplier applied to HTML fuzzy matches",
+ "html_weight",
+ UCL_FLOAT,
+ NULL,
+ 0,
+ NULL,
+ 0);
rspamd_rcl_add_doc_by_path(cfg,
"fuzzy_check.rule",
"Override module default min bytes for this rule",
g_ptr_array_add(res, io);
}
- goto end;
+ return res;
}
else if (c == FUZZY_PING) {
res = g_ptr_array_sized_new(1);
g_ptr_array_add(res, io);
}
- goto end;
+ return res;
}
if (task->message == NULL) {
- goto end;
+ return res;
}
res = g_ptr_array_sized_new(MESSAGE_FIELD(task, parts)->len + 1);
if (mime_part->part_type == RSPAMD_MIME_PART_TEXT &&
!(flags & FUZZY_CHECK_FLAG_NOTEXT)) {
part = mime_part->specific.txt;
+ gboolean allow_html = rule->html_shingles &&
+ !(flags & FUZZY_CHECK_FLAG_NOHTML) &&
+ (check_part || !rule->text_hashes);
+
+ if (check_part && rule->text_hashes) {
+ io = fuzzy_cmd_from_text_part(task, rule,
+ c,
+ flag,
+ value,
+ !fuzzy_check,
+ part,
+ mime_part);
+ }
- io = fuzzy_cmd_from_text_part(task, rule,
- c,
- flag,
- value,
- !fuzzy_check,
- part,
- mime_part);
-
- /* Try HTML fuzzy hash if enabled and text hash generation succeeded/failed */
- if (rule->html_shingles && !(flags & FUZZY_CHECK_FLAG_NOHTML)) {
+ if (allow_html && part != NULL) {
struct fuzzy_cmd_io *html_io;
html_io = fuzzy_cmd_from_html_part(task, rule, c, flag, value,
- part, mime_part);
+ part, mime_part);
if (html_io) {
/* Add HTML hash as separate command */
}
}
}
- else if (mime_part->part_type == RSPAMD_MIME_PART_IMAGE &&
- !(flags & FUZZY_CHECK_FLAG_NOIMAGES)) {
+ else if (check_part && mime_part->part_type == RSPAMD_MIME_PART_IMAGE &&
+ !(flags & FUZZY_CHECK_FLAG_NOIMAGES)) {
image = mime_part->specific.img;
io = fuzzy_cmd_from_data_part(rule, c, flag, value,
- task,
- image->parent->digest,
- mime_part);
+ task,
+ image->parent->digest,
+ mime_part);
io->flags |= FUZZY_CMD_FLAG_IMAGE;
}
- else if (mime_part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA) {
+ else if (check_part && mime_part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA) {
const struct rspamd_lua_specific_part *lua_spec;
lua_spec = &mime_part->specific.lua_specific;
if (hlen == rspamd_cryptobox_HASHBYTES) {
io = fuzzy_cmd_from_data_part(rule, c,
- flag, value,
- task,
- (unsigned char *) h,
- mime_part);
+ flag, value,
+ task,
+ (unsigned char *) h,
+ mime_part);
if (io) {
io->flags |= FUZZY_CMD_FLAG_CONTENT;
* Add part itself as well
*/
io = fuzzy_cmd_from_data_part(rule, c,
- flag, value,
- task,
- mime_part->digest,
- mime_part);
+ flag, value,
+ task,
+ mime_part->digest,
+ mime_part);
}
}
- else {
+ else if (check_part) {
io = fuzzy_cmd_from_data_part(rule, c, flag, value,
- task,
- mime_part->digest, mime_part);
+ task,
+ mime_part->digest, mime_part);
}
if (io) {
PTR_ARRAY_FOREACH(res, j, cur)
{
if (memcmp(cur->cmd.digest, io->cmd.digest,
- sizeof(io->cmd.digest)) == 0) {
+ sizeof(io->cmd.digest)) == 0) {
skip_existing = TRUE;
break;
}
}
}
-end:
if (res && res->len == 0) {
g_ptr_array_free(res, TRUE);