lua_util.debugm(N, task, 'html matched but text did not for %s', sym.name)
end
+ -- Phishing detection: HTML template matches but domains differ
+ if matched['html'] and not matched['htmld'] then
+ task:insert_result('FUZZY_HTML_PHISHING', 1.0, sym.name)
+ lua_util.debugm(N, task, 'html template matched but domains differ for %s (possible phishing)', sym.name)
+ end
+
::continue::
end
end
description = 'HTML structure fuzzy matches but text content does not',
}
+rspamd_config:register_symbol{
+ name = 'FUZZY_HTML_PHISHING',
+ type = 'virtual',
+ score = 6.0,
+ parent = cb_id,
+ group = 'fuzzy',
+ description = 'HTML template matches but link domains differ (possible phishing)',
+}
+
rspamd_config:register_dependency('FUZZY_MISMATCH_CHECK', 'FUZZY_CALLBACK')
#define FUZZY_CMD_FLAG_IMAGE (1 << 2)
#define FUZZY_CMD_FLAG_CONTENT (1 << 3)
#define FUZZY_CMD_FLAG_HTML (1 << 4)
+#define FUZZY_CMD_FLAG_HTML_DOMAINS (1 << 5)
#define FUZZY_CHECK_FLAG_NOIMAGES (1 << 0)
#define FUZZY_CHECK_FLAG_NOATTACHMENTS (1 << 1)
int flag,
uint32_t weight,
struct rspamd_mime_text_part *part,
- struct rspamd_mime_part *mp)
+ struct rspamd_mime_part *mp,
+ gboolean ignore_link_domains)
{
struct rspamd_fuzzy_shingle_cmd *shcmd = NULL;
struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd = NULL;
memcpy(&key_part, rule->shingles_key->str, sizeof(key_part));
rspamd_snprintf(html_cache_key, sizeof(html_cache_key), "%s%d_html%s",
rule->algorithm_str, key_part,
- rule->html_ignore_domains ? "_nd" : "");
+ ignore_link_domains ? "_nd" : "");
html_cached_ptr = (struct rspamd_cached_shingles **) rspamd_mempool_get_variable(
task->task_pool, html_cache_key);
html_sh = rspamd_shingles_from_html(part->html,
(const unsigned char *) rule->shingles_key->str, task->task_pool,
rspamd_shingles_default_filter, NULL,
- rule->alg, rule->html_ignore_domains);
+ rule->alg, ignore_link_domains);
if (html_sh != NULL) {
/* Use structure shingles for fuzzy matching */
type = "img";
res->type = FUZZY_RESULT_IMG;
}
+ else if ((io->flags & FUZZY_CMD_FLAG_HTML_DOMAINS)) {
+ /* HTML domain-sensitive hash (structure + domains) */
+ nval *= sqrtf(rep->v1.prob);
+ nval *= session->rule->html_weight;
+
+ type = "htmld";
+ res->type = FUZZY_RESULT_HTML;
+ }
else if ((io->flags & FUZZY_CMD_FLAG_HTML)) {
- /* HTML structural hash */
+ /* HTML structural hash (template mode, domains ignored) */
nval *= sqrtf(rep->v1.prob);
/* Apply HTML weight multiplier from rule config */
nval *= session->rule->html_weight;
if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
ftype = "img";
}
+ else if ((io->flags & FUZZY_CMD_FLAG_HTML_DOMAINS)) {
+ ftype = "htmld";
+ }
else if ((io->flags & FUZZY_CMD_FLAG_HTML)) {
ftype = "html";
}
struct fuzzy_cmd_io *html_io;
html_io = fuzzy_cmd_from_html_part(task, rule, c, flag, value,
- part, mime_part);
+ part, mime_part,
+ rule->html_ignore_domains);
if (html_io) {
- /* Add HTML hash as separate command */
+ /* Add HTML hash as separate command (template mode) */
g_ptr_array_add(res, html_io);
}
+
+ /* Generate domain-sensitive command when ignore_domains is on */
+ if (rule->html_ignore_domains) {
+ struct fuzzy_cmd_io *htmld_io;
+
+ htmld_io = fuzzy_cmd_from_html_part(task, rule, c, flag, value,
+ part, mime_part,
+ FALSE);
+
+ if (htmld_io) {
+ /* Mark as domain-sensitive HTML command */
+ htmld_io->flags |= FUZZY_CMD_FLAG_HTML_DOMAINS;
+ g_ptr_array_add(res, htmld_io);
+ }
+ }
}
}
else if (check_part && mime_part->part_type == RSPAMD_MIME_PART_IMAGE &&