]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Show fasttext info
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 29 Apr 2023 13:46:55 +0000 (14:46 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 29 Apr 2023 13:46:55 +0000 (14:46 +0100)
src/libmime/lang_detection.c
src/libmime/lang_detection_fasttext.cxx
src/libmime/lang_detection_fasttext.h

index 57d2f301da01451a365d5ad36e51c98ed1cc2073..09591438e2fd4d13059902b2a66f94842baafbfb 100644 (file)
@@ -15,6 +15,7 @@
  */
 
 #include "lang_detection.h"
+#include "lang_detection_fasttext.h"
 #include "libserver/logger.h"
 #include "libcryptobox/cryptobox.h"
 #include "libutil/multipattern.h"
@@ -181,6 +182,7 @@ struct rspamd_lang_detector {
        UConverter *uchar_converter;
        gsize short_text_limit;
        gsize total_occurrences; /* number of all languages found */
+       gpointer fasttext_detector;
        ref_entry_t ref;
 };
 
@@ -766,6 +768,7 @@ rspamd_language_detector_dtor (struct rspamd_lang_detector *d)
                }
 
                kh_destroy (rspamd_stopwords_hash, d->stop_words_norm);
+               rspamd_lang_detection_fasttext_destroy(d->fasttext_detector);
        }
 }
 
@@ -886,10 +889,14 @@ rspamd_language_detector_init (struct rspamd_config *cfg)
                total += kh_size (ret->trigrams[i]);
        }
 
+       ret->fasttext_detector = rspamd_lang_detection_fasttext_init(cfg);
+       char *fasttext_status = rspamd_lang_detection_fasttext_show_info(ret->fasttext_detector);
+
        msg_info_config ("loaded %d languages, "
-                       "%d trigrams",
+                       "%d trigrams; %s",
                        (gint)ret->languages->len,
-                       (gint)total);
+                       (gint)total, fasttext_status);
+       g_free (fasttext_status);
 
        if (stop_words) {
                ucl_object_unref (stop_words);
index cf6b5c85295a99e3703fac61de3d49e89f832e46..d6bd96ca16e294ff31cf62593d3570bdd27a7e81 100644 (file)
@@ -33,6 +33,7 @@ namespace rspamd::langdet {
 class fasttext_langdet {
 private:
        fasttext::FastText ft;
+       std::string model_fname;
        bool loaded;
 
        struct one_shot_buf : public std::streambuf {
@@ -53,6 +54,7 @@ public:
                                try {
                                        ft.loadModel(ucl_object_tostring(model));
                                        loaded = true;
+                                       model_fname = std::string{ucl_object_tostring(model)};
                                }
                                catch (std::exception &e) {
                                        auto err_message = fmt::format("cannot load fasttext model: {}", e.what());
@@ -93,6 +95,16 @@ public:
 
                return nullptr;
        }
+
+       auto model_info(void) const -> std::string {
+               if (!loaded) {
+                       return "fasttext model is not loaded";
+               }
+               else {
+                       return fmt::format("fasttext model {}: {} languages, {} tokens", model_fname,
+                               ft.getDictionary()->nlabels(), ft.getDictionary()->ntokens());
+               }
+       }
 };
 }
 #endif
@@ -112,6 +124,17 @@ void* rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg)
 #endif
 }
 
+char *rspamd_lang_detection_fasttext_show_info(void *ud)
+{
+#ifndef WITH_FASTTEXT
+       return g_strdup("fasttext is not compiled in");
+#else
+       auto model_info = FASTTEXT_MODEL_TO_C_API(ud)->model_info();
+
+       return g_strdup(model_info.c_str());
+#endif
+}
+
 rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud,
                                                                                           const char *in, size_t len, int k)
 {
index 44bc8bf711facae302522e6082b31b2983cd02b6..71e253940b3859199f7b7936247c94a567bb2dfc 100644 (file)
@@ -27,6 +27,13 @@ struct rspamd_config;
  */
 void* rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg);
 
+/**
+ * Show info about fasttext language detector
+ * @param ud
+ * @return
+ */
+char *rspamd_lang_detection_fasttext_show_info(void *ud);
+
 
 typedef  void * rspamd_fasttext_predict_result_t;
 /**