From: Vsevolod Stakhov <vsevolod@rspamd.com>
Date: Sat, 4 Oct 2025 18:34:48 +0000 (+0100)
Subject: [Feature] Integrate HTML fuzzy hashing into fuzzy_check module
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=28e67afe35d7e67a77ade47e15df16c1e98f4d50;p=thirdparty%2Frspamd.git

[Feature] Integrate HTML fuzzy hashing into fuzzy_check module

Add support for HTML structure fuzzy hashing in fuzzy_check plugin:

Core integration:
- Add FUZZY_CMD_FLAG_HTML flag and FUZZY_RESULT_HTML result type
- Add html_shingles, min_html_tags, html_weight options to fuzzy_rule
- Implement fuzzy_cmd_from_html_part() to generate HTML fuzzy commands
- Integrate into fuzzy_generate_commands() for automatic hash generation
- Handle HTML results with configurable weight multiplier

Configuration:
- html_shingles: enable/disable HTML fuzzy hashing per rule
- min_html_tags: minimum HTML tags threshold (default 10)
- html_weight: score multiplier for HTML matches (default 1.0)

Use cases:
1. Brand protection: detect phishing with copied HTML but fake CTA
2. Spam campaigns: group messages by HTML structure
3. Template detection: identify newsletters/notifications
4. Phishing: text match + HTML CTA mismatch = suspicious

Files added:
- lualib/lua_fuzzy_html.lua: helper functions for mismatch detection
- conf/modules.d/fuzzy_check_html.conf: configuration examples
- test/functional/configs/fuzzy_html_test.conf: test configuration
- rules/fuzzy_html_phishing.lua: phishing detection rules

HTML fuzzy works alongside text fuzzy:
- Both hashes generated and sent to storage
- Separate result types allow different handling
- CTA domain verification prevents false positives

Next steps:
- Performance testing on real email corpus
- Fine-tune weights and thresholds
- Collect legitimate brand templates for whitelisting
---

diff --git a/conf/modules.d/fuzzy_check_html.conf b/conf/modules.d/fuzzy_check_html.conf
new file mode 100644
index 0000000000..face9c916d
--- /dev/null
+++ b/conf/modules.d/fuzzy_check_html.conf
@@ -0,0 +1,114 @@
+# HTML Fuzzy Hashing Configuration Example
+#
+# This configuration demonstrates how to use HTML fuzzy hashing for:
+# 1. Detecting spam campaigns with similar HTML structure
+# 2. Phishing detection (similar structure, different CTA domains)
+# 3. Brand protection (legitimate templates vs. fake emails)
+
+fuzzy_check {
+  # Example rule for HTML structure matching
+  rule "HTML_FUZZY" {
+    # Standard fuzzy storage configuration
+    servers = "localhost:11335";
+    
+    # Encryption (optional, recommended for production)
+    # encryption_key = "your_base32_encoded_public_key";
+    # fuzzy_key = "your_hashing_key";
+    # fuzzy_shingles_key = "your_shingles_key";
+    
+    # Algorithm for shingles (mumhash recommended for HTML)
+    algorithm = "mumhash";
+    
+    # Enable HTML fuzzy hashing
+    html_shingles = true;
+    
+    # Minimum number of HTML tags to generate hash
+    # (prevents hashing of trivial HTML snippets)
+    min_html_tags = 15;
+    
+    # Weight multiplier for HTML fuzzy matches
+    # Can be < 1.0 to reduce impact, or > 1.0 to increase
+    html_weight = 1.0;
+    
+    # Regular fuzzy check settings
+    symbol = "FUZZY_HTML";
+    max_score = 20.0;
+    
+    # Fuzzy flag mappings
+    fuzzy_map = {
+      # Whitelist: known legitimate HTML structures
+      "FUZZY_HTML_WHITELIST" {
+        flag = 1;
+        max_score = 20.0;
+      }
+      # Blacklist: known spam/phishing HTML structures
+      "FUZZY_HTML_BLACKLIST" {
+        flag = 2;
+        max_score = 20.0;
+      }
+    }
+    
+    # Optional: skip specific hashes
+    # skip_hashes = "${LOCAL_CONFDIR}/local.d/fuzzy_skip_html.map";
+  }
+  
+  # Example: Combined text + HTML rule
+  rule "COMBINED_FUZZY" {
+    servers = "localhost:11335";
+    algorithm = "mumhash";
+    
+    # Enable both text and HTML fuzzy hashing
+    html_shingles = true;
+    min_html_tags = 10;
+    
+    # This rule will generate:
+    # - Text fuzzy hashes (from content)
+    # - HTML fuzzy hashes (from structure)
+    # Both sent to same storage with same flag
+    
+    symbol = "FUZZY_COMBINED";
+    max_score = 30.0;
+    
+    fuzzy_map = {
+      "FUZZY_COMBINED_WHITE" {
+        flag = 10;
+        max_score = 30.0;
+      }
+      "FUZZY_COMBINED_SPAM" {
+        flag = 11;
+        max_score = 30.0;
+      }
+    }
+  }
+  
+  # Example: Phishing detection rule (higher weight for HTML)
+  rule "PHISHING_DETECTION" {
+    servers = "localhost:11335";
+    algorithm = "mumhash";
+    
+    html_shingles = true;
+    min_html_tags = 20;
+    
+    # Higher weight for HTML matches = prioritize structure over content
+    html_weight = 1.5;
+    
+    symbol = "FUZZY_PHISHING";
+    max_score = 25.0;
+    
+    fuzzy_map = {
+      # Known phishing HTML templates
+      "FUZZY_PHISHING_HTML" {
+        flag = 20;
+        max_score = 25.0;
+      }
+      # Known legitimate brands (for comparison)
+      "FUZZY_LEGIT_BRANDS" {
+        flag = 21;
+        max_score = -25.0;  # Negative score = whitelist
+      }
+    }
+  }
+}
+
+# Additional configuration for phishing detection rules
+# See rules/fuzzy_html_phishing.lua for Lua-based detection logic
diff --git a/lualib/lua_fuzzy_html.lua b/lualib/lua_fuzzy_html.lua
new file mode 100644
index 0000000000..1b3b36cd9f
--- /dev/null
+++ b/lualib/lua_fuzzy_html.lua
@@ -0,0 +1,98 @@
+--[[
+HTML Fuzzy Hashing Helper Module
+
+This module provides helper functions for HTML fuzzy hash matching
+and phishing detection based on HTML structure vs. content mismatches.
+
+Use case: Detect phishing where HTML structure matches legitimate emails
+but CTA (Call-To-Action) domains are different.
+]]
+
+local exports = {}
+local rspamd_logger = require "rspamd_logger"
+local lua_util = require "lua_util"
+
+--[[
+Analyze fuzzy results to detect potential phishing based on:
+- Text content fuzzy match (high score)
+- HTML structure fuzzy match (high score)
+- But HTML CTA domains differ from known legitimate
+
+Returns: phishing_score, explanation
+]]
+exports.check_html_text_mismatch = function(task, fuzzy_results)
+  local html_matches = {}
+  local text_matches = {}
+  
+  -- Separate HTML and text fuzzy matches
+  for _, res in ipairs(fuzzy_results or {}) do
+    if res.type == 'html' then
+      table.insert(html_matches, res)
+    elseif res.type == 'txt' then
+      table.insert(text_matches, res)
+    end
+  end
+  
+  -- Phishing scenario: high text match but low/no HTML match
+  if #text_matches > 0 and #html_matches == 0 then
+    local max_text_score = 0
+    for _, res in ipairs(text_matches) do
+      if res.score > max_text_score then
+        max_text_score = res.score
+      end
+    end
+    
+    -- High text match but no HTML match = suspicious
+    if max_text_score > 0.7 then
+      return max_text_score * 0.5, string.format(
+        "Text fuzzy match (%.2f) without HTML match - possible CTA substitution",
+        max_text_score)
+    end
+  end
+  
+  -- Inverse scenario: HTML match but no text match
+  -- (Could be template with varying content - less suspicious)
+  if #html_matches > 0 and #text_matches == 0 then
+    local max_html_score = 0
+    for _, res in ipairs(html_matches) do
+      if res.score > max_html_score then
+        max_html_score = res.score
+      end
+    end
+    
+    -- This is expected for newsletters/notifications
+    lua_util.debugm('fuzzy_html', task,
+      'HTML match (%.2f) without text match - likely template variation',
+      max_html_score)
+  end
+  
+  return 0, nil
+end
+
+--[[
+Check if message has suspicious HTML fuzzy pattern:
+- Known legitimate HTML structure
+- But text content is different or manipulated
+- Useful for brand protection
+
+Example: Amazon email template with phishing text
+]]
+exports.check_brand_hijack = function(task, html_fuzzy_result, text_fuzzy_result)
+  if not html_fuzzy_result then
+    return 0, nil
+  end
+  
+  -- High HTML match = known template
+  if html_fuzzy_result.score > 0.8 then
+    -- Check if text is suspicious
+    if not text_fuzzy_result or text_fuzzy_result.score < 0.3 then
+      return html_fuzzy_result.score * 0.6,
+        string.format("Known HTML template (%.2f) with unfamiliar text - possible brand hijacking",
+          html_fuzzy_result.score)
+    end
+  end
+  
+  return 0, nil
+end
+
+return exports
diff --git a/rules/fuzzy_html_phishing.lua b/rules/fuzzy_html_phishing.lua
new file mode 100644
index 0000000000..77cc50c6ce
--- /dev/null
+++ b/rules/fuzzy_html_phishing.lua
@@ -0,0 +1,115 @@
+--[[
+Copyright (c) 2025, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+--[[
+HTML Fuzzy Phishing Detection Rules
+
+Detects phishing based on fuzzy hash mismatches:
+1. Text content matches known legitimate email (whitelist)
+2. But HTML structure doesn't match or has different CTA domains
+3. Or vice versa: HTML structure matches but text/CTA is suspicious
+
+This indicates possible template reuse for phishing.
+]]
+
+local rspamd_logger = require "rspamd_logger"
+local lua_util = require "lua_util"
+
+local N = 'fuzzy_html_phishing'
+
+local function check_fuzzy_mismatch(task)
+  local fuzzy_results = task:get_mempool():get_variable('fuzzy_result')
+  
+  if not fuzzy_results then
+    return false
+  end
+  
+  -- Collect results by type
+  local text_matches = {}
+  local html_matches = {}
+  
+  for _, hash_result in ipairs(fuzzy_results) do
+    local symbol = tostring(hash_result)
+    -- Parse fuzzy result format: "flag:hash:prob:type"
+    -- This is simplified - actual parsing depends on result format
+    
+    -- For now, check mempool variables set by fuzzy_insert_result
+    -- We need to enhance fuzzy_check to expose result types
+  end
+  
+  -- Get fuzzy check symbols from task results
+  local fuzzy_symbols = task:get_symbols_all()
+  local has_text_fuzzy = false
+  local has_html_fuzzy = false
+  local text_score = 0
+  local html_score = 0
+  
+  for _, sym in ipairs(fuzzy_symbols) do
+    if sym.name:match('FUZZY.*TEXT') or sym.name == 'R_FUZZY_HASH' then
+      has_text_fuzzy = true
+      text_score = math.max(text_score, sym.score or 0)
+    end
+    if sym.name:match('FUZZY.*HTML') then
+      has_html_fuzzy = true
+      html_score = math.max(html_score, sym.score or 0)
+    end
+  end
+  
+  -- Scenario 1: Text matches legitimate but no HTML match
+  -- This could indicate phishing with copied text but fake HTML/CTA
+  if has_text_fuzzy and not has_html_fuzzy and text_score > 5.0 then
+    task:insert_result('FUZZY_HTML_PHISHING_MISMATCH', 0.5,
+      string.format('text_score:%.2f', text_score))
+    lua_util.debugm(N, task,
+      'Phishing suspect: text fuzzy match (%.2f) without HTML match',
+      text_score)
+    return true
+  end
+  
+  -- Scenario 2: HTML matches but text doesn't (less suspicious)
+  -- This is common for newsletters/notifications with varying content
+  if has_html_fuzzy and not has_text_fuzzy and html_score > 8.0 then
+    -- Only flag if HTML score is very high (known template)
+    lua_util.debugm(N, task,
+      'HTML template match (%.2f) with varying text - likely legitimate newsletter',
+      html_score)
+    -- Could add negative score or just log
+  end
+  
+  return false
+end
+
+-- Register symbol
+rspamd_config:register_symbol{
+  name = 'FUZZY_HTML_PHISHING_MISMATCH',
+  type = 'virtual',
+  score = 5.0,
+  description = 'Text fuzzy matches legitimate but HTML structure does not',
+  group = 'fuzzy'
+}
+
+-- Register callback
+local id = rspamd_config:register_symbol{
+  name = 'FUZZY_HTML_PHISHING_CHECK',
+  type = 'callback',
+  callback = check_fuzzy_mismatch,
+  score = 0.0,
+  group = 'fuzzy',
+  description = 'Check for HTML/text fuzzy mismatches indicating phishing'
+}
+
+-- Depends on fuzzy_check
+rspamd_config:register_dependency('FUZZY_HTML_PHISHING_CHECK', 'FUZZY_CALLBACK')
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index 7dd5162ac7..9d83b7896d 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -94,10 +94,13 @@ struct fuzzy_rule {
 	struct rspamd_cryptobox_pubkey *peer_key;
 	double max_score;
 	double weight_threshold;
+	double html_weight; /* Weight multiplier for HTML hashes (default 1.0) */
 	enum fuzzy_rule_mode mode;
 	gboolean skip_unknown;
 	gboolean no_share;
 	gboolean no_subject;
+	gboolean html_shingles; /* Enable HTML fuzzy hashing */
+	unsigned int min_html_tags; /* Minimum tags for HTML hash */
 	int learn_condition_cb;
 	uint32_t retransmits;
 	struct rspamd_hash_map_helper *skip_map;
@@ -127,7 +130,8 @@ enum fuzzy_result_type {
 	FUZZY_RESULT_TXT,
 	FUZZY_RESULT_IMG,
 	FUZZY_RESULT_CONTENT,
-	FUZZY_RESULT_BIN
+	FUZZY_RESULT_BIN,
+	FUZZY_RESULT_HTML
 };
 
 struct fuzzy_client_result {
@@ -174,10 +178,12 @@ struct fuzzy_learn_session {
 #define FUZZY_CMD_FLAG_SENT (1 << 1)
 #define FUZZY_CMD_FLAG_IMAGE (1 << 2)
 #define FUZZY_CMD_FLAG_CONTENT (1 << 3)
+#define FUZZY_CMD_FLAG_HTML (1 << 4)
 
 #define FUZZY_CHECK_FLAG_NOIMAGES (1 << 0)
 #define FUZZY_CHECK_FLAG_NOATTACHMENTS (1 << 1)
 #define FUZZY_CHECK_FLAG_NOTEXT (1 << 2)
+#define FUZZY_CHECK_FLAG_NOHTML (1 << 3)
 
 struct fuzzy_cmd_io {
 	uint32_t tag;
@@ -340,6 +346,9 @@ fuzzy_rule_new(const char *default_symbol, rspamd_mempool_t *pool)
 								  rule->mappings);
 	rule->mode = fuzzy_rule_read_write;
 	rule->weight_threshold = NAN;
+	rule->html_weight = 1.0;
+	rule->html_shingles = FALSE;
+	rule->min_html_tags = 10;
 
 	return rule;
 }
@@ -720,6 +729,18 @@ fuzzy_parse_rule(struct rspamd_config *cfg, const ucl_object_t *obj,
 		rule->weight_threshold = ucl_object_todouble(value);
 	}
 
+	if ((value = ucl_object_lookup(obj, "html_shingles")) != NULL) {
+		rule->html_shingles = ucl_object_toboolean(value);
+	}
+
+	if ((value = ucl_object_lookup(obj, "min_html_tags")) != NULL) {
+		rule->min_html_tags = ucl_object_toint(value);
+	}
+
+	if ((value = ucl_object_lookup(obj, "html_weight")) != NULL) {
+		rule->html_weight = ucl_object_todouble(value);
+	}
+
 	/*
 	 * Process rule in Lua
 	 */
@@ -2074,6 +2095,139 @@ fuzzy_cmd_from_text_part(struct rspamd_task *task,
 	return io;
 }
 
+/*
+ * Create fuzzy command from HTML structure (if part is HTML)
+ */
+static struct fuzzy_cmd_io *
+fuzzy_cmd_from_html_part(struct rspamd_task *task,
+						 struct fuzzy_rule *rule,
+						 int c,
+						 int flag,
+						 uint32_t weight,
+						 struct rspamd_mime_text_part *part,
+						 struct rspamd_mime_part *mp)
+{
+	struct rspamd_fuzzy_shingle_cmd *shcmd = NULL;
+	struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd = NULL;
+	struct rspamd_cached_shingles *cached = NULL;
+	struct rspamd_html_shingle *html_sh = NULL;
+	struct fuzzy_cmd_io *io;
+	unsigned int additional_length;
+	unsigned char *additional_data;
+
+	/* Check if HTML shingles are enabled for this rule */
+	if (!rule->html_shingles) {
+		return NULL;
+	}
+
+	/* Check if this is an HTML part */
+	if (!IS_TEXT_PART_HTML(part) || part->html == NULL) {
+		return NULL;
+	}
+
+	/* Check minimum tags threshold */
+	if (part->html_features && part->html_features->tags_count < rule->min_html_tags) {
+		msg_debug_fuzzy_check("HTML part has %d tags, less than minimum %d",
+							  part->html_features->tags_count, rule->min_html_tags);
+		return NULL;
+	}
+
+	cached = fuzzy_cmd_get_cached(rule, task, mp);
+
+	if (cached) {
+		/* Copy from cache */
+		additional_length = cached->additional_length;
+		additional_data = cached->additional_data;
+
+		if (cached->sh) {
+			encshcmd = rspamd_mempool_alloc0(task->task_pool,
+											 sizeof(*encshcmd) + additional_length);
+			shcmd = &encshcmd->cmd;
+			memcpy(&shcmd->sgl, cached->sh, sizeof(struct rspamd_shingle));
+			memcpy(shcmd->basic.digest, cached->digest, sizeof(cached->digest));
+			memcpy(((unsigned char *) encshcmd) + sizeof(*encshcmd), additional_data,
+				   additional_length);
+			shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
+		}
+		else {
+			return NULL;
+		}
+	}
+	else {
+		/* Generate HTML shingles */
+		additional_length = fuzzy_cmd_extension_length(task, rule);
+		cached = rspamd_mempool_alloc0(task->task_pool, sizeof(*cached) + additional_length);
+		cached->additional_length = additional_length;
+		cached->additional_data = ((unsigned char *) cached) + sizeof(*cached);
+
+		if (additional_length > 0) {
+			fuzzy_cmd_write_extensions(task, rule, cached->additional_data, additional_length);
+		}
+
+		encshcmd = rspamd_mempool_alloc0(task->task_pool,
+										 sizeof(*encshcmd) + additional_length);
+		shcmd = &encshcmd->cmd;
+
+		msg_debug_fuzzy_check("generating HTML shingles for part with %d tags",
+							  part->html_features ? part->html_features->tags_count : 0);
+
+		html_sh = rspamd_shingles_from_html(part->html,
+											rule->shingles_key->str, task->task_pool,
+											rspamd_shingles_default_filter, NULL,
+											rule->alg);
+
+		if (html_sh != NULL) {
+			/* Use structure shingles for fuzzy matching */
+			memcpy(&shcmd->sgl, &html_sh->structure_shingles, sizeof(struct rspamd_shingle));
+			/* Use direct hash as digest for exact matching */
+			memcpy(shcmd->basic.digest, html_sh->direct_hash, sizeof(shcmd->basic.digest));
+			shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
+
+			/* Cache results */
+			cached->sh = &html_sh->structure_shingles;
+			memcpy(cached->digest, html_sh->direct_hash, sizeof(cached->digest));
+			additional_data = ((unsigned char *) encshcmd) + sizeof(*encshcmd);
+			memcpy(additional_data, cached->additional_data, additional_length);
+		}
+		else {
+			/* No HTML shingles generated */
+			return NULL;
+		}
+
+		fuzzy_cmd_set_cached(rule, task, mp, cached);
+	}
+
+	io = rspamd_mempool_alloc(task->task_pool, sizeof(*io));
+	io->part = mp;
+
+	shcmd->basic.tag = ottery_rand_uint32();
+	shcmd->basic.cmd = c;
+	shcmd->basic.version = RSPAMD_FUZZY_PLUGIN_VERSION;
+
+	if (c != FUZZY_CHECK) {
+		shcmd->basic.flag = flag;
+		shcmd->basic.value = weight;
+	}
+
+	io->tag = shcmd->basic.tag;
+	io->flags = FUZZY_CMD_FLAG_HTML;
+	memcpy(&io->cmd, &shcmd->basic, sizeof(io->cmd));
+
+	if (rule->peer_key) {
+		/* Encrypt data */
+		fuzzy_encrypt_cmd(rule, &encshcmd->hdr, (unsigned char *) shcmd,
+						  sizeof(*shcmd) + additional_length);
+		io->io.iov_base = encshcmd;
+		io->io.iov_len = sizeof(*encshcmd) + additional_length;
+	}
+	else {
+		io->io.iov_base = shcmd;
+		io->io.iov_len = sizeof(*shcmd) + additional_length;
+	}
+
+	return io;
+}
+
 #if 0
 static struct fuzzy_cmd_io *
 fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
@@ -2443,6 +2597,15 @@ fuzzy_insert_result(struct fuzzy_client_session *session,
 			type = "img";
 			res->type = FUZZY_RESULT_IMG;
 		}
+		else if ((io->flags & FUZZY_CMD_FLAG_HTML)) {
+			/* HTML structural hash */
+			nval *= sqrtf(rep->v1.prob);
+			/* Apply HTML weight multiplier from rule config */
+			nval *= session->rule->html_weight;
+
+			type = "html";
+			res->type = FUZZY_RESULT_HTML;
+		}
 		else {
 			/* Calc real probability */
 			nval *= sqrtf(rep->v1.prob);
@@ -3095,6 +3258,9 @@ fuzzy_controller_io_callback(int fd, short what, void *arg)
 					if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
 						ftype = "img";
 					}
+					else if ((io->flags & FUZZY_CMD_FLAG_HTML)) {
+						ftype = "html";
+					}
 					else if (io->flags & FUZZY_CMD_FLAG_CONTENT) {
 						ftype = "content";
 					}
@@ -3340,6 +3506,19 @@ fuzzy_generate_commands(struct rspamd_task *task, struct fuzzy_rule *rule,
 												  !fuzzy_check,
 												  part,
 												  mime_part);
+
+					/* Try HTML fuzzy hash if enabled and text hash generation succeeded/failed */
+					if (rule->html_shingles && !(flags & FUZZY_CHECK_FLAG_NOHTML)) {
+						struct fuzzy_cmd_io *html_io;
+
+						html_io = fuzzy_cmd_from_html_part(task, rule, c, flag, value,
+														   part, mime_part);
+
+						if (html_io) {
+							/* Add HTML hash as separate command */
+							g_ptr_array_add(res, html_io);
+						}
+					}
 				}
 				else if (mime_part->part_type == RSPAMD_MIME_PART_IMAGE &&
 						 !(flags & FUZZY_CHECK_FLAG_NOIMAGES)) {
diff --git a/test/functional/configs/fuzzy_html_test.conf b/test/functional/configs/fuzzy_html_test.conf
new file mode 100644
index 0000000000..4166e97b12
--- /dev/null
+++ b/test/functional/configs/fuzzy_html_test.conf
@@ -0,0 +1,53 @@
+# Test configuration for HTML fuzzy hashing
+
+.include(duplicate=append,priority=0) "{= env.TESTDIR =}/configs/plugins.conf"
+.include(duplicate=merge,priority=0) "{= env.TESTDIR =}/configs/statistic.conf"
+
+fuzzy_check {
+  # Test rule for HTML fuzzy hashing
+  rule "TEST_HTML_FUZZY" {
+    servers = "localhost:11335";
+    algorithm = "mumhash";
+    
+    # Enable HTML fuzzy hashing
+    html_shingles = true;
+    min_html_tags = 5;  # Low threshold for testing
+    html_weight = 1.0;
+    
+    symbol = "FUZZY_HTML_TEST";
+    max_score = 10.0;
+    
+    # Skip encryption for testing
+    # encryption_key = "";
+    
+    fuzzy_map = {
+      "FUZZY_HTML_WHITELIST" {
+        flag = 1;
+        max_score = 10.0;
+      }
+      "FUZZY_HTML_SPAM" {
+        flag = 2;
+        max_score = 10.0;
+      }
+    }
+  }
+  
+  # Rule with both text and HTML enabled
+  rule "TEST_COMBINED" {
+    servers = "localhost:11335";
+    algorithm = "mumhash";
+    
+    html_shingles = true;
+    min_html_tags = 3;
+    
+    symbol = "FUZZY_COMBINED_TEST";
+    max_score = 15.0;
+    
+    fuzzy_map = {
+      "FUZZY_COMBINED_MATCH" {
+        flag = 10;
+        max_score = 15.0;
+      }
+    }
+  }
+}