From: Vsevolod Stakhov Date: Sun, 28 Dec 2025 18:45:05 +0000 (+0000) Subject: [Feature] Rename fuzzy_check max_score to hits_limit for clarity X-Git-Tag: 3.14.3~24 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7fd47dad2f9e8b63b5bc6d9576e96c8a329b4737;p=thirdparty%2Frspamd.git [Feature] Rename fuzzy_check max_score to hits_limit for clarity The option name max_score was confusing as it doesn't refer to the symbol score but rather the number of fuzzy hash hits at which the normalized score reaches ~1.0 (formula: tanh(e * hits / hits_limit)). - Rename max_score -> hits_limit in fuzzy_check.c and default config - Add backward compatibility: max_score is still accepted as an alias - Add lua_cfg_transform to handle legacy configs (max_score overrides hits_limit to ensure local.d overrides work correctly) - Add explanatory comments in config and documentation --- diff --git a/conf/modules.d/fuzzy_check.conf b/conf/modules.d/fuzzy_check.conf index 9aef488e0f..031a4ecf5f 100644 --- a/conf/modules.d/fuzzy_check.conf +++ b/conf/modules.d/fuzzy_check.conf @@ -22,22 +22,24 @@ fuzzy_check { encryption_key = "icy63itbhhni8bq15ntp5n5symuixf73s1kpjh6skaq4e7nx5fiy"; symbol = "FUZZY_UNKNOWN"; mime_types = ["*"]; - max_score = 20.0; + # Number of hits at which the normalized score reaches ~1.0 + # Score formula: tanh(e * hits / hits_limit) + hits_limit = 20.0; read_only = yes; skip_unknown = yes; short_text_direct_hash = true; # If less than min_length then use direct hash min_length = 64; # Minimum words count to consider shingles fuzzy_map = { FUZZY_DENIED { - max_score = 20.0; + hits_limit = 20.0; flag = 1; } FUZZY_PROB { - max_score = 10.0; + hits_limit = 10.0; flag = 2; } FUZZY_WHITE { - max_score = 2.0; + hits_limit = 2.0; flag = 3; } } @@ -56,11 +58,11 @@ fuzzy_check { # image { enabled = false; } # optional: do not hash images # } # symbol = "FUZZY_HTML_STRUCTURE"; - # max_score = 25.0; + # hits_limit = 25.0; # fuzzy_map = { # FUZZY_HTML_SPAM { # flag = 200; - # max_score = 25.0; + # hits_limit = 25.0; # } # } # } diff --git a/lualib/lua_cfg_transform.lua b/lualib/lua_cfg_transform.lua index 55477a6fa8..691f57f5cb 100644 --- a/lualib/lua_cfg_transform.lua +++ b/lualib/lua_cfg_transform.lua @@ -523,5 +523,39 @@ return function(cfg) cfg.options.upstream = cfg.options.upstreams end + -- fuzzy_check: max_score -> hits_limit rename with backward compatibility + -- If max_score is specified (e.g., from local.d overrides), it should override hits_limit + if cfg.fuzzy_check then + try_transform('fuzzy_check', function() + local dominated_options = function(rule) + -- max_score overrides hits_limit for backward compatibility + -- This ensures local.d overrides using legacy max_score still work + if rule.max_score then + rule.hits_limit = rule.max_score + rule.max_score = nil + end + -- Also transform fuzzy_map entries + if rule.fuzzy_map then + for _, map_entry in pairs(rule.fuzzy_map) do + if type(map_entry) == 'table' and map_entry.max_score then + map_entry.hits_limit = map_entry.max_score + map_entry.max_score = nil + end + end + end + end + + if cfg.fuzzy_check.rule then + for _, rule in pairs(cfg.fuzzy_check.rule) do + if type(rule) == 'table' then + dominated_options(rule) + end + end + end + -- Handle case where fuzzy_check itself has these options (single rule syntax) + dominated_options(cfg.fuzzy_check) + end) + end + return ret, cfg end diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index 75da5640da..77f838ad14 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -18,7 +18,8 @@ * * Allowed options: * - symbol (string): symbol to insert (default: 'R_FUZZY') - * - max_score (double): maximum score to that weights of hashes would be normalized (default: 0 - no normalization) + * - hits_limit (double): number of fuzzy hash hits at which the normalized score reaches ~1.0 (default: 0 - no normalization) + * The score is calculated as tanh(e * hits / hits_limit). Legacy name: max_score * * - fuzzy_map (string): a string that contains map in format { fuzzy_key => [ symbol, weight ] } where fuzzy_key is number of * fuzzy list. This string itself should be in format 1:R_FUZZY_SAMPLE1:10,2:R_FUZZY_SAMPLE2:1 etc, where first number is fuzzy @@ -107,7 +108,7 @@ struct fuzzy_rule { struct rspamd_cryptobox_pubkey *read_peer_key; struct rspamd_cryptobox_keypair *write_local_key; struct rspamd_cryptobox_pubkey *write_peer_key; - double max_score; + double hits_limit; double weight_threshold; double html_weight; /* Weight multiplier for HTML hashes (default 1.0) */ enum fuzzy_rule_mode mode; @@ -365,13 +366,16 @@ parse_flags(struct fuzzy_rule *rule, if (elt != NULL) { map->fuzzy_flag = ucl_obj_toint(elt); - elt = ucl_object_lookup(val, "max_score"); + elt = ucl_object_lookup(val, "hits_limit"); + if (elt == NULL) { + elt = ucl_object_lookup(val, "max_score"); + } if (elt != NULL) { map->weight = ucl_obj_todouble(elt); } else { - map->weight = rule->max_score; + map->weight = rule->hits_limit; } /* Add flag to hash table */ g_hash_table_insert(rule->mappings, @@ -1715,8 +1719,13 @@ fuzzy_parse_rule(struct rspamd_config *cfg, const ucl_object_t *obj, } - if ((value = ucl_object_lookup(obj, "max_score")) != NULL) { - rule->max_score = ucl_obj_todouble(value); + /* hits_limit: number of fuzzy hits at which normalized score reaches ~1.0 + * Legacy name: max_score (supported for backward compatibility) */ + if ((value = ucl_object_lookup(obj, "hits_limit")) != NULL) { + rule->hits_limit = ucl_obj_todouble(value); + } + else if ((value = ucl_object_lookup(obj, "max_score")) != NULL) { + rule->hits_limit = ucl_obj_todouble(value); } if ((value = ucl_object_lookup(obj, "retransmits")) != NULL) { @@ -2362,8 +2371,10 @@ int fuzzy_check_module_init(struct rspamd_config *cfg, struct module_ctx **ctx) 0); rspamd_rcl_add_doc_by_path(cfg, "fuzzy_check.rule", - "Maximum value for fuzzy hash when weight of symbol is exactly 1.0 (if value is higher then score is still 1.0)", - "max_score", + "Number of fuzzy hash hits at which the normalized score reaches ~1.0. " + "Score is calculated as tanh(e * hits / hits_limit). " + "Legacy name: max_score", + "hits_limit", UCL_INT, NULL, 0, @@ -2553,8 +2564,9 @@ int fuzzy_check_module_init(struct rspamd_config *cfg, struct module_ctx **ctx) /* Fuzzy map doc strings */ rspamd_rcl_add_doc_by_path(cfg, "fuzzy_check.rule.fuzzy_map", - "Maximum score for this flag", - "max_score", + "Number of fuzzy hash hits at which the normalized score reaches ~1.0 for this flag. " + "Legacy name: max_score", + "hits_limit", UCL_INT, NULL, 0, @@ -4251,7 +4263,7 @@ fuzzy_insert_result(struct fuzzy_client_session *session, GINT_TO_POINTER(rep->v1.flag))) == NULL) { /* Default symbol and default weight */ symbol = session->rule->symbol; - weight = session->rule->max_score; + weight = session->rule->hits_limit; } else { /* Get symbol and weight from map */