]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Add dual-mode HTML fuzzy: template matching + phishing detection
authorVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 13 Feb 2026 16:47:13 +0000 (16:47 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 13 Feb 2026 16:47:13 +0000 (16:47 +0000)
When html_ignore_domains is enabled, generate two HTML fuzzy commands per
part: a template command (domains ignored) and a domain-sensitive command
(domains included). If the template matches but domains don't, the new
FUZZY_HTML_PHISHING symbol fires, detecting reused HTML templates with
swapped phishing links.

rules/fuzzy_html_phishing.lua
src/plugins/fuzzy_check.c

index a4486ec5aff51ef999700a66dde4fcf328a0e35a..4f3d318de2d6fc0a916859a121e8bc08336fc298 100644 (file)
@@ -109,6 +109,12 @@ local function check_fuzzy_mismatch(task)
       lua_util.debugm(N, task, 'html matched but text did not for %s', sym.name)
     end
 
+    -- Phishing detection: HTML template matches but domains differ
+    if matched['html'] and not matched['htmld'] then
+      task:insert_result('FUZZY_HTML_PHISHING', 1.0, sym.name)
+      lua_util.debugm(N, task, 'html template matched but domains differ for %s (possible phishing)', sym.name)
+    end
+
     ::continue::
   end
 end
@@ -140,4 +146,13 @@ rspamd_config:register_symbol{
   description = 'HTML structure fuzzy matches but text content does not',
 }
 
+rspamd_config:register_symbol{
+  name = 'FUZZY_HTML_PHISHING',
+  type = 'virtual',
+  score = 6.0,
+  parent = cb_id,
+  group = 'fuzzy',
+  description = 'HTML template matches but link domains differ (possible phishing)',
+}
+
 rspamd_config:register_dependency('FUZZY_MISMATCH_CHECK', 'FUZZY_CALLBACK')
index b87a44681d9472067316a7080b3fb994c0a44737..69daf4a51fd519bae6d247de3c888fc98e0897b0 100644 (file)
@@ -279,6 +279,7 @@ struct fuzzy_tcp_connection {
 #define FUZZY_CMD_FLAG_IMAGE (1 << 2)
 #define FUZZY_CMD_FLAG_CONTENT (1 << 3)
 #define FUZZY_CMD_FLAG_HTML (1 << 4)
+#define FUZZY_CMD_FLAG_HTML_DOMAINS (1 << 5)
 
 #define FUZZY_CHECK_FLAG_NOIMAGES (1 << 0)
 #define FUZZY_CHECK_FLAG_NOATTACHMENTS (1 << 1)
@@ -3713,7 +3714,8 @@ fuzzy_cmd_from_html_part(struct rspamd_task *task,
                                                 int flag,
                                                 uint32_t weight,
                                                 struct rspamd_mime_text_part *part,
-                                                struct rspamd_mime_part *mp)
+                                                struct rspamd_mime_part *mp,
+                                                gboolean ignore_link_domains)
 {
        struct rspamd_fuzzy_shingle_cmd *shcmd = NULL;
        struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd = NULL;
@@ -3774,7 +3776,7 @@ fuzzy_cmd_from_html_part(struct rspamd_task *task,
        memcpy(&key_part, rule->shingles_key->str, sizeof(key_part));
        rspamd_snprintf(html_cache_key, sizeof(html_cache_key), "%s%d_html%s",
                                        rule->algorithm_str, key_part,
-                                       rule->html_ignore_domains ? "_nd" : "");
+                                       ignore_link_domains ? "_nd" : "");
 
        html_cached_ptr = (struct rspamd_cached_shingles **) rspamd_mempool_get_variable(
                task->task_pool, html_cache_key);
@@ -3817,7 +3819,7 @@ fuzzy_cmd_from_html_part(struct rspamd_task *task,
                html_sh = rspamd_shingles_from_html(part->html,
                                                                                        (const unsigned char *) rule->shingles_key->str, task->task_pool,
                                                                                        rspamd_shingles_default_filter, NULL,
-                                                                                       rule->alg, rule->html_ignore_domains);
+                                                                                       rule->alg, ignore_link_domains);
 
                if (html_sh != NULL) {
                        /* Use structure shingles for fuzzy matching */
@@ -4311,8 +4313,16 @@ fuzzy_insert_result(struct fuzzy_client_session *session,
                        type = "img";
                        res->type = FUZZY_RESULT_IMG;
                }
+               else if ((io->flags & FUZZY_CMD_FLAG_HTML_DOMAINS)) {
+                       /* HTML domain-sensitive hash (structure + domains) */
+                       nval *= sqrtf(rep->v1.prob);
+                       nval *= session->rule->html_weight;
+
+                       type = "htmld";
+                       res->type = FUZZY_RESULT_HTML;
+               }
                else if ((io->flags & FUZZY_CMD_FLAG_HTML)) {
-                       /* HTML structural hash */
+                       /* HTML structural hash (template mode, domains ignored) */
                        nval *= sqrtf(rep->v1.prob);
                        /* Apply HTML weight multiplier from rule config */
                        nval *= session->rule->html_weight;
@@ -5047,6 +5057,9 @@ fuzzy_controller_io_callback(int fd, short what, void *arg)
                                        if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
                                                ftype = "img";
                                        }
+                                       else if ((io->flags & FUZZY_CMD_FLAG_HTML_DOMAINS)) {
+                                               ftype = "htmld";
+                                       }
                                        else if ((io->flags & FUZZY_CMD_FLAG_HTML)) {
                                                ftype = "html";
                                        }
@@ -5305,12 +5318,28 @@ fuzzy_generate_commands(struct rspamd_task *task, struct fuzzy_rule *rule,
                                                struct fuzzy_cmd_io *html_io;
 
                                                html_io = fuzzy_cmd_from_html_part(task, rule, c, flag, value,
-                                                                                                                  part, mime_part);
+                                                                                                                  part, mime_part,
+                                                                                                                  rule->html_ignore_domains);
 
                                                if (html_io) {
-                                                       /* Add HTML hash as separate command */
+                                                       /* Add HTML hash as separate command (template mode) */
                                                        g_ptr_array_add(res, html_io);
                                                }
+
+                                               /* Generate domain-sensitive command when ignore_domains is on */
+                                               if (rule->html_ignore_domains) {
+                                                       struct fuzzy_cmd_io *htmld_io;
+
+                                                       htmld_io = fuzzy_cmd_from_html_part(task, rule, c, flag, value,
+                                                                                                                               part, mime_part,
+                                                                                                                               FALSE);
+
+                                                       if (htmld_io) {
+                                                               /* Mark as domain-sensitive HTML command */
+                                                               htmld_io->flags |= FUZZY_CMD_FLAG_HTML_DOMAINS;
+                                                               g_ptr_array_add(res, htmld_io);
+                                                       }
+                                               }
                                        }
                                }
                                else if (check_part && mime_part->part_type == RSPAMD_MIME_PART_IMAGE &&