From: Vsevolod Stakhov Date: Sat, 4 Oct 2025 20:30:53 +0000 (+0100) Subject: [Minor] Add debug logging to HTML fuzzy hash generation X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3c19c2465dd267f79ec3f7ba8ce0eecd694df406;p=thirdparty%2Frspamd.git [Minor] Add debug logging to HTML fuzzy hash generation Add detailed debug messages to track HTML fuzzy hash generation flow: - Log when fuzzy_cmd_from_html_part is called - Log HTML shingles enabled/disabled status - Log HTML part detection - Log tag count checks - Log successful/failed hash generation This helps diagnose issues with HTML fuzzy matching in tests. --- diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index 0afdfba67a..8d87ff1e35 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -2116,13 +2116,17 @@ fuzzy_cmd_from_html_part(struct rspamd_task *task, unsigned int additional_length; unsigned char *additional_data; + msg_debug_fuzzy_check("fuzzy_cmd_from_html_part called for rule %s", rule->name); + /* Check if HTML shingles are enabled for this rule */ if (!rule->html_shingles) { + msg_debug_fuzzy_check("HTML shingles disabled for rule %s", rule->name); return NULL; } /* Check if this is an HTML part */ if (!IS_TEXT_PART_HTML(part) || part->html == NULL) { + msg_debug_fuzzy_check("Part is not HTML or html is NULL"); return NULL; } @@ -2133,6 +2137,9 @@ fuzzy_cmd_from_html_part(struct rspamd_task *task, return NULL; } + msg_debug_fuzzy_check("Proceeding to generate HTML fuzzy hash, tags_count=%d", + part->html_features ? part->html_features->tags_count : 0); + cached = fuzzy_cmd_get_cached(rule, task, mp); if (cached) { @@ -3512,13 +3519,22 @@ fuzzy_generate_commands(struct rspamd_task *task, struct fuzzy_rule *rule, if (rule->html_shingles && !(flags & FUZZY_CHECK_FLAG_NOHTML)) { struct fuzzy_cmd_io *html_io; + msg_debug_fuzzy_check("Attempting HTML fuzzy hash for rule %s", rule->name); html_io = fuzzy_cmd_from_html_part(task, rule, c, flag, value, part, mime_part); if (html_io) { /* Add HTML hash as separate command */ + msg_debug_fuzzy_check("HTML fuzzy hash generated and added to commands"); g_ptr_array_add(res, html_io); } + else { + msg_debug_fuzzy_check("HTML fuzzy hash generation returned NULL"); + } + } + else { + msg_debug_fuzzy_check("HTML fuzzy skipped: html_shingles=%d, NOHTML flag=%d", + rule->html_shingles, !!(flags & FUZZY_CHECK_FLAG_NOHTML)); } } else if (mime_part->part_type == RSPAMD_MIME_PART_IMAGE && diff --git a/test/functional/cases/120_fuzzy/html-fuzzy.robot b/test/functional/cases/120_fuzzy/html-fuzzy.robot index ee63c445d3..26be91392a 100644 --- a/test/functional/cases/120_fuzzy/html-fuzzy.robot +++ b/test/functional/cases/120_fuzzy/html-fuzzy.robot @@ -46,12 +46,10 @@ HTML Fuzzy Phishing Test IF ${RSPAMD_FUZZY_HTML_ADD} == 0 Fail "HTML Fuzzy Add was not run" END - Scan File ${HTML_PHISHING} # Structure similar but CTA domains different # Might match with lower score or not match depending on CTA weight - # For now just verify no crash - ${result} = Scan Message With Rspamc ${HTML_PHISHING} - Should Be Equal As Numbers ${result.returncode} 0 + # Just verify scanning works without crash + Scan File ${HTML_PHISHING} HTML Fuzzy Delete Test [Documentation] Delete HTML fuzzy hash