]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Rename fuzzy_check max_score to hits_limit for clarity
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 28 Dec 2025 18:45:05 +0000 (18:45 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sun, 28 Dec 2025 18:45:05 +0000 (18:45 +0000)
The option name max_score was confusing as it doesn't refer to the
symbol score but rather the number of fuzzy hash hits at which the
normalized score reaches ~1.0 (formula: tanh(e * hits / hits_limit)).

- Rename max_score -> hits_limit in fuzzy_check.c and default config
- Add backward compatibility: max_score is still accepted as an alias
- Add lua_cfg_transform to handle legacy configs (max_score overrides
  hits_limit to ensure local.d overrides work correctly)
- Add explanatory comments in config and documentation

conf/modules.d/fuzzy_check.conf
lualib/lua_cfg_transform.lua
src/plugins/fuzzy_check.c

index 9aef488e0f7e761438e06b9ac1f940cdff517f7f..031a4ecf5f392c3b11fb84103ab25b7c4453eda2 100644 (file)
@@ -22,22 +22,24 @@ fuzzy_check {
     encryption_key = "icy63itbhhni8bq15ntp5n5symuixf73s1kpjh6skaq4e7nx5fiy";
     symbol = "FUZZY_UNKNOWN";
     mime_types = ["*"];
-    max_score = 20.0;
+    # Number of hits at which the normalized score reaches ~1.0
+    # Score formula: tanh(e * hits / hits_limit)
+    hits_limit = 20.0;
     read_only = yes;
     skip_unknown = yes;
     short_text_direct_hash = true; # If less than min_length then use direct hash
     min_length = 64; # Minimum words count to consider shingles
     fuzzy_map = {
       FUZZY_DENIED {
-        max_score = 20.0;
+        hits_limit = 20.0;
         flag = 1;
       }
       FUZZY_PROB {
-        max_score = 10.0;
+        hits_limit = 10.0;
         flag = 2;
       }
       FUZZY_WHITE {
-        max_score = 2.0;
+        hits_limit = 2.0;
         flag = 3;
       }
     }
@@ -56,11 +58,11 @@ fuzzy_check {
   #     image { enabled = false; }   # optional: do not hash images
   #   }
   #   symbol = "FUZZY_HTML_STRUCTURE";
-  #   max_score = 25.0;
+  #   hits_limit = 25.0;
   #   fuzzy_map = {
   #     FUZZY_HTML_SPAM {
   #       flag = 200;
-  #       max_score = 25.0;
+  #       hits_limit = 25.0;
   #     }
   #   }
   # }
index 55477a6fa8721e3dbf3403d6c6c8b36cb1b9825c..691f57f5cba910f19939859e526597b821e4fa14 100644 (file)
@@ -523,5 +523,39 @@ return function(cfg)
     cfg.options.upstream = cfg.options.upstreams
   end
 
+  -- fuzzy_check: max_score -> hits_limit rename with backward compatibility
+  -- If max_score is specified (e.g., from local.d overrides), it should override hits_limit
+  if cfg.fuzzy_check then
+    try_transform('fuzzy_check', function()
+      local dominated_options = function(rule)
+        -- max_score overrides hits_limit for backward compatibility
+        -- This ensures local.d overrides using legacy max_score still work
+        if rule.max_score then
+          rule.hits_limit = rule.max_score
+          rule.max_score = nil
+        end
+        -- Also transform fuzzy_map entries
+        if rule.fuzzy_map then
+          for _, map_entry in pairs(rule.fuzzy_map) do
+            if type(map_entry) == 'table' and map_entry.max_score then
+              map_entry.hits_limit = map_entry.max_score
+              map_entry.max_score = nil
+            end
+          end
+        end
+      end
+
+      if cfg.fuzzy_check.rule then
+        for _, rule in pairs(cfg.fuzzy_check.rule) do
+          if type(rule) == 'table' then
+            dominated_options(rule)
+          end
+        end
+      end
+      -- Handle case where fuzzy_check itself has these options (single rule syntax)
+      dominated_options(cfg.fuzzy_check)
+    end)
+  end
+
   return ret, cfg
 end
index 75da5640da9b0785595566b4760952366a6bc594..77f838ad14f0ed2c72f7d63e76eded4f9b0f016c 100644 (file)
@@ -18,7 +18,8 @@
  *
  * Allowed options:
  * - symbol (string): symbol to insert (default: 'R_FUZZY')
- * - max_score (double): maximum score to that weights of hashes would be normalized (default: 0 - no normalization)
+ * - hits_limit (double): number of fuzzy hash hits at which the normalized score reaches ~1.0 (default: 0 - no normalization)
+ *   The score is calculated as tanh(e * hits / hits_limit). Legacy name: max_score
  *
  * - fuzzy_map (string): a string that contains map in format { fuzzy_key => [ symbol, weight ] } where fuzzy_key is number of
  *   fuzzy list. This string itself should be in format 1:R_FUZZY_SAMPLE1:10,2:R_FUZZY_SAMPLE2:1 etc, where first number is fuzzy
@@ -107,7 +108,7 @@ struct fuzzy_rule {
        struct rspamd_cryptobox_pubkey *read_peer_key;
        struct rspamd_cryptobox_keypair *write_local_key;
        struct rspamd_cryptobox_pubkey *write_peer_key;
-       double max_score;
+       double hits_limit;
        double weight_threshold;
        double html_weight; /* Weight multiplier for HTML hashes (default 1.0) */
        enum fuzzy_rule_mode mode;
@@ -365,13 +366,16 @@ parse_flags(struct fuzzy_rule *rule,
                        if (elt != NULL) {
                                map->fuzzy_flag = ucl_obj_toint(elt);
 
-                               elt = ucl_object_lookup(val, "max_score");
+                               elt = ucl_object_lookup(val, "hits_limit");
+                               if (elt == NULL) {
+                                       elt = ucl_object_lookup(val, "max_score");
+                               }
 
                                if (elt != NULL) {
                                        map->weight = ucl_obj_todouble(elt);
                                }
                                else {
-                                       map->weight = rule->max_score;
+                                       map->weight = rule->hits_limit;
                                }
                                /* Add flag to hash table */
                                g_hash_table_insert(rule->mappings,
@@ -1715,8 +1719,13 @@ fuzzy_parse_rule(struct rspamd_config *cfg, const ucl_object_t *obj,
        }
 
 
-       if ((value = ucl_object_lookup(obj, "max_score")) != NULL) {
-               rule->max_score = ucl_obj_todouble(value);
+       /* hits_limit: number of fuzzy hits at which normalized score reaches ~1.0
+        * Legacy name: max_score (supported for backward compatibility) */
+       if ((value = ucl_object_lookup(obj, "hits_limit")) != NULL) {
+               rule->hits_limit = ucl_obj_todouble(value);
+       }
+       else if ((value = ucl_object_lookup(obj, "max_score")) != NULL) {
+               rule->hits_limit = ucl_obj_todouble(value);
        }
 
        if ((value = ucl_object_lookup(obj, "retransmits")) != NULL) {
@@ -2362,8 +2371,10 @@ int fuzzy_check_module_init(struct rspamd_config *cfg, struct module_ctx **ctx)
                                                           0);
        rspamd_rcl_add_doc_by_path(cfg,
                                                           "fuzzy_check.rule",
-                                                          "Maximum value for fuzzy hash when weight of symbol is exactly 1.0 (if value is higher then score is still 1.0)",
-                                                          "max_score",
+                                                          "Number of fuzzy hash hits at which the normalized score reaches ~1.0. "
+                                                          "Score is calculated as tanh(e * hits / hits_limit). "
+                                                          "Legacy name: max_score",
+                                                          "hits_limit",
                                                           UCL_INT,
                                                           NULL,
                                                           0,
@@ -2553,8 +2564,9 @@ int fuzzy_check_module_init(struct rspamd_config *cfg, struct module_ctx **ctx)
        /* Fuzzy map doc strings */
        rspamd_rcl_add_doc_by_path(cfg,
                                                           "fuzzy_check.rule.fuzzy_map",
-                                                          "Maximum score for this flag",
-                                                          "max_score",
+                                                          "Number of fuzzy hash hits at which the normalized score reaches ~1.0 for this flag. "
+                                                          "Legacy name: max_score",
+                                                          "hits_limit",
                                                           UCL_INT,
                                                           NULL,
                                                           0,
@@ -4251,7 +4263,7 @@ fuzzy_insert_result(struct fuzzy_client_session *session,
                                                                 GINT_TO_POINTER(rep->v1.flag))) == NULL) {
                /* Default symbol and default weight */
                symbol = session->rule->symbol;
-               weight = session->rule->max_score;
+               weight = session->rule->hits_limit;
        }
        else {
                /* Get symbol and weight from map */