--- /dev/null
-
+ # HTML Fuzzy Hashing Configuration Example
+ #
+ # This configuration demonstrates how to use HTML fuzzy hashing for:
+ # 1. Detecting spam campaigns with similar HTML structure
+ # 2. Phishing detection (similar structure, different CTA domains)
+ # 3. Brand protection (legitimate templates vs. fake emails)
+
+ fuzzy_check {
+ # Example rule for HTML structure matching
+ rule "HTML_FUZZY" {
+ # Standard fuzzy storage configuration
+ servers = "localhost:11335";
-
++
+ # Encryption (optional, recommended for production)
+ # encryption_key = "your_base32_encoded_public_key";
+ # fuzzy_key = "your_hashing_key";
+ # fuzzy_shingles_key = "your_shingles_key";
-
++
+ # Algorithm for shingles (mumhash recommended for HTML)
+ algorithm = "mumhash";
-
++
+ # Enable HTML fuzzy hashing
+ html_shingles = true;
-
++
+ # Minimum number of HTML tags to generate hash
+ # (prevents hashing of trivial HTML snippets)
+ min_html_tags = 15;
-
++
+ # Weight multiplier for HTML fuzzy matches
+ # Can be < 1.0 to reduce impact, or > 1.0 to increase
+ html_weight = 1.0;
-
++
+ # Regular fuzzy check settings
+ symbol = "FUZZY_HTML";
+ max_score = 20.0;
-
++
+ # Fuzzy flag mappings
+ fuzzy_map = {
+ # Whitelist: known legitimate HTML structures
+ "FUZZY_HTML_WHITELIST" {
+ flag = 1;
+ max_score = 20.0;
+ }
+ # Blacklist: known spam/phishing HTML structures
+ "FUZZY_HTML_BLACKLIST" {
+ flag = 2;
+ max_score = 20.0;
+ }
+ }
-
++
+ # Optional: skip specific hashes
+ # skip_hashes = "${LOCAL_CONFDIR}/local.d/fuzzy_skip_html.map";
+ }
-
++
+ # Example: Combined text + HTML rule
+ rule "COMBINED_FUZZY" {
+ servers = "localhost:11335";
+ algorithm = "mumhash";
-
++
+ # Enable both text and HTML fuzzy hashing
+ html_shingles = true;
+ min_html_tags = 10;
-
++
+ # This rule will generate:
+ # - Text fuzzy hashes (from content)
+ # - HTML fuzzy hashes (from structure)
+ # Both sent to same storage with same flag
-
++
+ symbol = "FUZZY_COMBINED";
+ max_score = 30.0;
-
++
+ fuzzy_map = {
+ "FUZZY_COMBINED_WHITE" {
+ flag = 10;
+ max_score = 30.0;
+ }
+ "FUZZY_COMBINED_SPAM" {
+ flag = 11;
+ max_score = 30.0;
+ }
+ }
+ }
-
++
+ # Example: Phishing detection rule (higher weight for HTML)
+ rule "PHISHING_DETECTION" {
+ servers = "localhost:11335";
+ algorithm = "mumhash";
-
++
+ html_shingles = true;
+ min_html_tags = 20;
-
++
+ # Higher weight for HTML matches = prioritize structure over content
+ html_weight = 1.5;
-
++
+ symbol = "FUZZY_PHISHING";
+ max_score = 25.0;
++
+ fuzzy_map = {
+ # Known phishing HTML templates
+ "FUZZY_PHISHING_HTML" {
+ flag = 20;
+ max_score = 25.0;
+ }
+ # Known legitimate brands (for comparison)
+ "FUZZY_LEGIT_BRANDS" {
+ flag = 21;
+ max_score = -25.0; # Negative score = whitelist
+ }
+ }
+ }
+ }
+
+ # Additional configuration for phishing detection rules
+ # See rules/fuzzy_html_phishing.lua for Lua-based detection logic
* We must ignore User header in case of spamc, as SA has
* different meaning of this header
*/
- msg_debug_protocol("read user header, value: %T", hv_tok);
- if (!RSPAMD_TASK_IS_SPAMC(task)) {
- task->auth_user = rspamd_mempool_ftokdup(task->task_pool,
- hv_tok);
- }
- else {
- msg_info_protocol("ignore user header: legacy SA protocol");
+ msg_debug_protocol("read user header, value: %T", hv_tok);
+ if (!RSPAMD_TASK_IS_SPAMC(task)) {
+ task->auth_user = rspamd_mempool_ftokdup(task->task_pool,
+ hv_tok);
+ }
+ else {
+ msg_info_protocol("ignore user header: legacy SA protocol");
+ }
}
- }
- IF_HEADER(URLS_HEADER)
- {
- msg_debug_protocol("read urls header, value: %T", hv_tok);
-
- srch.begin = "extended";
- srch.len = 8;
+ IF_HEADER(URLS_HEADER)
+ {
+ msg_debug_protocol("read urls header, value: %T", hv_tok);
- if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
- task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS;
- msg_debug_protocol("extended urls information");
- }
+ srch.begin = "extended";
+ srch.len = 8;
- /* TODO: add more formats there */
- }
- IF_HEADER(USER_AGENT_HEADER)
- {
- msg_debug_protocol("read user-agent header, value: %T", hv_tok);
+ if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
+ task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS;
+ msg_debug_protocol("extended urls information");
+ }
- if (hv_tok->len == 6 &&
- rspamd_lc_cmp(hv_tok->begin, "rspamc", 6) == 0) {
- task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT;
+ /* TODO: add more formats there */
}
- }
- break;
- case 'l':
- case 'L':
- IF_HEADER(NO_LOG_HEADER)
- {
- msg_debug_protocol("read log header, value: %T", hv_tok);
- srch.begin = "no";
- srch.len = 2;
+ IF_HEADER(USER_AGENT_HEADER)
+ {
+ msg_debug_protocol("read user-agent header, value: %T", hv_tok);
- if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
- task->flags |= RSPAMD_TASK_FLAG_NO_LOG;
+ if (hv_tok->len == 6 &&
+ rspamd_lc_cmp(hv_tok->begin, "rspamc", 6) == 0) {
+ task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT;
+ }
}
- }
- IF_HEADER(LOG_TAG_HEADER)
- {
- msg_debug_protocol("read log-tag header, value: %T", hv_tok);
- /* Ensure that a tag is valid */
- if (rspamd_fast_utf8_validate(hv_tok->begin, hv_tok->len) == 0) {
- memcpy(task->task_pool->tag.uid, hv_tok->begin,
- MIN(hv_tok->len, sizeof(task->task_pool->tag.uid)));
+ break;
+ case 'l':
+ case 'L':
+ IF_HEADER(NO_LOG_HEADER)
+ {
+ msg_debug_protocol("read log header, value: %T", hv_tok);
+ srch.begin = "no";
+ srch.len = 2;
+
+ if (rspamd_ftok_casecmp(hv_tok, &srch) == 0) {
+ task->flags |= RSPAMD_TASK_FLAG_NO_LOG;
+ }
}
- }
- break;
- case 'm':
- case 'M':
- IF_HEADER(MTA_TAG_HEADER)
- {
- char *mta_tag;
- mta_tag = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
- rspamd_mempool_set_variable(task->task_pool,
- RSPAMD_MEMPOOL_MTA_TAG,
- mta_tag, NULL);
- msg_debug_protocol("read MTA-Tag header, value: %s", mta_tag);
- }
- IF_HEADER(MTA_NAME_HEADER)
- {
- char *mta_name;
- mta_name = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
- rspamd_mempool_set_variable(task->task_pool,
- RSPAMD_MEMPOOL_MTA_NAME,
- mta_name, NULL);
- msg_debug_protocol("read MTA-Name header, value: %s", mta_name);
- }
- IF_HEADER(MILTER_HEADER)
- {
- task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_MILTER;
- msg_debug_protocol("read Milter header, value: %T", hv_tok);
- }
- break;
- case 't':
- case 'T':
- IF_HEADER(TLS_CIPHER_HEADER)
- {
- task->flags |= RSPAMD_TASK_FLAG_SSL;
- msg_debug_protocol("read TLS cipher header, value: %T", hv_tok);
- }
- break;
- case 'x':
- case 'X':
- IF_HEADER("X-Rspamd-Mail-Esmtp-Args")
- {
- /* Parse MAIL ESMTP arguments from HTTP header */
- if (!task->mail_esmtp_args) {
- task->mail_esmtp_args = g_hash_table_new_full(
- rspamd_ftok_icase_hash,
- rspamd_ftok_icase_equal,
- rspamd_fstring_mapped_ftok_free,
- rspamd_fstring_mapped_ftok_free);
- }
-
- /* Parse KEY=VALUE format */
- const char *p = hv_tok->begin;
- const char *end = hv_tok->begin + hv_tok->len;
- const char *eq = memchr(p, '=', hv_tok->len);
-
- if (eq && eq > p) {
- rspamd_fstring_t *key = rspamd_fstring_new_init(p, eq - p);
- rspamd_fstring_t *value = rspamd_fstring_new_init(eq + 1, end - eq - 1);
- rspamd_ftok_t *key_tok = rspamd_ftok_map(key);
- rspamd_ftok_t *value_tok = rspamd_ftok_map(value);
-
- g_hash_table_replace(task->mail_esmtp_args, key_tok, value_tok);
- msg_debug_protocol("parsed mail ESMTP arg: %T=%T", key_tok, value_tok);
+ IF_HEADER(LOG_TAG_HEADER)
+ {
+ msg_debug_protocol("read log-tag header, value: %T", hv_tok);
+ /* Ensure that a tag is valid */
+ if (rspamd_fast_utf8_validate(hv_tok->begin, hv_tok->len) == 0) {
+ memcpy(task->task_pool->tag.uid, hv_tok->begin,
+ MIN(hv_tok->len, sizeof(task->task_pool->tag.uid)));
+ }
}
- }
- IF_HEADER("X-Rspamd-Rcpt-Esmtp-Args")
- {
- /* Parse RCPT ESMTP arguments from HTTP header */
- if (!task->rcpt_esmtp_args) {
- task->rcpt_esmtp_args = g_ptr_array_new();
- }
-
- /* Parse IDX:KEY=VALUE format */
- const char *p = hv_tok->begin;
- const char *end = hv_tok->begin + hv_tok->len;
- const char *colon = memchr(p, ':', hv_tok->len);
-
- if (colon && colon > p) {
- char *endptr;
- int rcpt_idx = strtol(p, &endptr, 10);
-
- if (endptr == colon) {
- /* Ensure we have enough entries in the array */
- while (task->rcpt_esmtp_args->len <= rcpt_idx) {
- g_ptr_array_add(task->rcpt_esmtp_args, NULL);
- }
+ break;
+ case 'm':
+ case 'M':
+ IF_HEADER(MTA_TAG_HEADER)
+ {
+ char *mta_tag;
+ mta_tag = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
+ rspamd_mempool_set_variable(task->task_pool,
+ RSPAMD_MEMPOOL_MTA_TAG,
+ mta_tag, NULL);
+ msg_debug_protocol("read MTA-Tag header, value: %s", mta_tag);
+ }
+ IF_HEADER(MTA_NAME_HEADER)
+ {
+ char *mta_name;
+ mta_name = rspamd_mempool_ftokdup(task->task_pool, hv_tok);
+ rspamd_mempool_set_variable(task->task_pool,
+ RSPAMD_MEMPOOL_MTA_NAME,
+ mta_name, NULL);
+ msg_debug_protocol("read MTA-Name header, value: %s", mta_name);
+ }
+ IF_HEADER(MILTER_HEADER)
+ {
+ task->protocol_flags |= RSPAMD_TASK_PROTOCOL_FLAG_MILTER;
+ msg_debug_protocol("read Milter header, value: %T", hv_tok);
+ }
+ break;
+ case 't':
+ case 'T':
+ IF_HEADER(TLS_CIPHER_HEADER)
+ {
+ task->flags |= RSPAMD_TASK_FLAG_SSL;
+ msg_debug_protocol("read TLS cipher header, value: %T", hv_tok);
+ }
+ break;
++ case 'x':
++ case 'X':
++ IF_HEADER("X-Rspamd-Mail-Esmtp-Args")
++ {
++ /* Parse MAIL ESMTP arguments from HTTP header */
++ if (!task->mail_esmtp_args) {
++ task->mail_esmtp_args = g_hash_table_new_full(
++ rspamd_ftok_icase_hash,
++ rspamd_ftok_icase_equal,
++ rspamd_fstring_mapped_ftok_free,
++ rspamd_fstring_mapped_ftok_free);
++ }
+
- /* Get or create hash table for this recipient */
- GHashTable *rcpt_args = g_ptr_array_index(task->rcpt_esmtp_args, rcpt_idx);
- if (!rcpt_args) {
- rcpt_args = g_hash_table_new_full(
- rspamd_ftok_icase_hash,
- rspamd_ftok_icase_equal,
- rspamd_fstring_mapped_ftok_free,
- rspamd_fstring_mapped_ftok_free);
- g_ptr_array_index(task->rcpt_esmtp_args, rcpt_idx) = rcpt_args;
- }
++ /* Parse KEY=VALUE format */
++ const char *p = hv_tok->begin;
++ const char *end = hv_tok->begin + hv_tok->len;
++ const char *eq = memchr(p, '=', hv_tok->len);
+
- /* Parse KEY=VALUE */
- p = colon + 1;
- const char *eq = memchr(p, '=', end - p);
++ if (eq && eq > p) {
++ rspamd_fstring_t *key = rspamd_fstring_new_init(p, eq - p);
++ rspamd_fstring_t *value = rspamd_fstring_new_init(eq + 1, end - eq - 1);
++ rspamd_ftok_t *key_tok = rspamd_ftok_map(key);
++ rspamd_ftok_t *value_tok = rspamd_ftok_map(value);
+
- if (eq && eq > p) {
- rspamd_fstring_t *key = rspamd_fstring_new_init(p, eq - p);
- rspamd_fstring_t *value = rspamd_fstring_new_init(eq + 1, end - eq - 1);
- rspamd_ftok_t *key_tok = rspamd_ftok_map(key);
- rspamd_ftok_t *value_tok = rspamd_ftok_map(value);
++ g_hash_table_replace(task->mail_esmtp_args, key_tok, value_tok);
++ msg_debug_protocol("parsed mail ESMTP arg: %T=%T", key_tok, value_tok);
++ }
++ }
++ IF_HEADER("X-Rspamd-Rcpt-Esmtp-Args")
++ {
++ /* Parse RCPT ESMTP arguments from HTTP header */
++ if (!task->rcpt_esmtp_args) {
++ task->rcpt_esmtp_args = g_ptr_array_new();
++ }
+
- g_hash_table_replace(rcpt_args, key_tok, value_tok);
- msg_debug_protocol("parsed rcpt ESMTP arg for idx %d: %T=%T", rcpt_idx, key_tok, value_tok);
++ /* Parse IDX:KEY=VALUE format */
++ const char *p = hv_tok->begin;
++ const char *end = hv_tok->begin + hv_tok->len;
++ const char *colon = memchr(p, ':', hv_tok->len);
++
++ if (colon && colon > p) {
++ char *endptr;
++ int rcpt_idx = strtol(p, &endptr, 10);
++
++ if (endptr == colon) {
++ /* Ensure we have enough entries in the array */
++ while (task->rcpt_esmtp_args->len <= rcpt_idx) {
++ g_ptr_array_add(task->rcpt_esmtp_args, NULL);
++ }
++
++ /* Get or create hash table for this recipient */
++ GHashTable *rcpt_args = g_ptr_array_index(task->rcpt_esmtp_args, rcpt_idx);
++ if (!rcpt_args) {
++ rcpt_args = g_hash_table_new_full(
++ rspamd_ftok_icase_hash,
++ rspamd_ftok_icase_equal,
++ rspamd_fstring_mapped_ftok_free,
++ rspamd_fstring_mapped_ftok_free);
++ g_ptr_array_index(task->rcpt_esmtp_args, rcpt_idx) = rcpt_args;
++ }
++
++ /* Parse KEY=VALUE */
++ p = colon + 1;
++ const char *eq = memchr(p, '=', end - p);
++
++ if (eq && eq > p) {
++ rspamd_fstring_t *key = rspamd_fstring_new_init(p, eq - p);
++ rspamd_fstring_t *value = rspamd_fstring_new_init(eq + 1, end - eq - 1);
++ rspamd_ftok_t *key_tok = rspamd_ftok_map(key);
++ rspamd_ftok_t *value_tok = rspamd_ftok_map(value);
++
++ g_hash_table_replace(rcpt_args, key_tok, value_tok);
++ msg_debug_protocol("parsed rcpt ESMTP arg for idx %d: %T=%T", rcpt_idx, key_tok, value_tok);
++ }
+ }
+ }
+ }
- }
- break;
- default:
- msg_debug_protocol("generic header: %T", hn_tok);
- break;
++ break;
+ default:
+ msg_debug_protocol("generic header: %T", hn_tok);
+ break;
}
rspamd_task_add_request_header (task, hn_tok, hv_tok);