]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Auto-load shipped fasttext model when present 6067/head
authorVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 29 May 2026 08:05:46 +0000 (09:05 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 29 May 2026 08:09:14 +0000 (09:09 +0100)
When no fasttext_model is configured, fall back to the model shipped at
$SHAREDIR/languages/fasttext_model.ftz: if the file is readable, load
it via the existing direct-load path; otherwise stay silent (debug
only) so stock installs without the model behave exactly as before.

This lets images that ship the model file drop the explicit
fasttext_model config override. The success path reuses
load_model_direct (the same code used for an explicit fasttext_model),
and the absent-file case produces no error and leaves the detector
reporting 'fasttext model is not loaded' as before.

conf/lang_detection.inc
src/libmime/lang_detection_fasttext.cxx

index 16bfa3f69d93d29050b7f0c95732c5e4afe6c188..7cbd2ffb141ada7169090cc0a74efebee0acf968 100644 (file)
@@ -20,7 +20,9 @@
 # Disable specific languages
 # languages_disable = ["fr", "es"]
 
-# Use the following fasttext model for language detection (if Fasttext support is compiled in)
+# Use the following fasttext model for language detection (if Fasttext support is compiled in).
+# When this is unset, a model shipped at "${SHAREDIR}/languages/fasttext_model.ftz"
+# is auto-loaded if present; set this explicitly to use a different model or a map.
 # fasttext_model = "${SHAREDIR}/languages/fasttext_model.ftz"
 
 # Prefer fasttext over all other methods
index a491397e1d61bbed44f1c2bcc3c5b75d5bc0cbf6..e05526b608696106279c0f446405675857857a06 100644 (file)
@@ -90,6 +90,32 @@ private:
                }
        }
 
+       /*
+        * Fallback used when no fasttext_model is configured: load the model that
+        * ships in the languages data dir if it exists. Stays silent (debug only)
+        * when the file is absent so stock installs without the model behave as
+        * before.
+        */
+       void try_load_default_model()
+       {
+               auto *cfg = cfg_;
+               static const char default_model_path[] =
+                       RSPAMD_SHAREDIR "/languages/fasttext_model.ftz";
+
+               if (access(default_model_path, R_OK) != 0) {
+                       msg_debug_config("no default fasttext model at %s: %s",
+                                                        default_model_path, strerror(errno));
+                       return;
+               }
+
+               load_model_direct(default_model_path);
+
+               if (owned_model_) {
+                       msg_info_config("loaded default fasttext model from %s",
+                                                       default_model_path);
+               }
+       }
+
        void load_model_map(const char *model_path)
        {
                auto *cfg = cfg_;
@@ -193,21 +219,26 @@ public:
        {
                const auto *ucl_obj = cfg->cfg_ucl_obj;
                const auto *opts_section = ucl_object_find_key(ucl_obj, "lang_detection");
+               const ucl_object_t *model = nullptr;
 
                if (opts_section) {
-                       const auto *model = ucl_object_find_key(opts_section, "fasttext_model");
+                       model = ucl_object_find_key(opts_section, "fasttext_model");
+               }
 
-                       if (model) {
-                               const char *model_path = ucl_object_tostring(model);
+               if (model) {
+                       const char *model_path = ucl_object_tostring(model);
 
-                               if (rspamd_map_is_map(model_path)) {
-                                       load_model_map(model_path);
-                               }
-                               else {
-                                       load_model_direct(model_path);
-                               }
+                       if (rspamd_map_is_map(model_path)) {
+                               load_model_map(model_path);
+                       }
+                       else {
+                               load_model_direct(model_path);
                        }
                }
+               else {
+                       /* No explicit model configured: try the shipped default */
+                       try_load_default_model();
+               }
        }
 
        /* Disallow multiple initialisation */