]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Minor] Add some more debug to the fasttext classifier
authorVsevolod Stakhov <vsevolod@rspamd.com>
Wed, 3 May 2023 11:08:01 +0000 (12:08 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Wed, 3 May 2023 11:08:01 +0000 (12:08 +0100)
src/libmime/lang_detection.c
src/libmime/lang_detection_fasttext.cxx
src/libmime/lang_detection_fasttext.h
src/libserver/logger.h

index 7696c4aedc79204f788e7b60c0252de32afa55c8..4d9e1ae683d10d20e418530cc8da73c078f6782c 100644 (file)
@@ -122,7 +122,7 @@ struct rspamd_stop_word_elt {
         G_STRFUNC, \
         __VA_ARGS__)
 
-INIT_LOG_MODULE(langdet)
+INIT_LOG_MODULE_PUBLIC(langdet)
 
 static const struct rspamd_language_unicode_match *
 rspamd_language_search_unicode_match (const gchar *key,
@@ -1843,7 +1843,7 @@ rspamd_language_detector_detect (struct rspamd_task *task,
                unsigned ndetected = 0;
                if (rspamd_lang_detection_fasttext_is_enabled(d->fasttext_detector)) {
                        rspamd_fasttext_predict_result_t fasttext_predict_result =
-                               rspamd_lang_detection_fasttext_detect(d->fasttext_detector,
+                               rspamd_lang_detection_fasttext_detect(d->fasttext_detector, task,
                                        part->utf_words, 4);
 
                        ndetected = rspamd_lang_detection_fasttext_get_nlangs(fasttext_predict_result);
index b75668670baf34c875ba34c7393975128f066a98..d9e4e7192c35c318eed6e5ad90ace721b2f95093 100644 (file)
 #include "fmt/core.h"
 #include "stat_api.h"
 #include <exception>
-#include <string>
 #include <string_view>
 #include <vector>
 #endif
 
 #ifdef WITH_FASTTEXT
+
+EXTERN_LOG_MODULE_DEF(langdet);
+#define msg_debug_lang_det(...)  rspamd_conditional_debug_fast (nullptr, nullptr, \
+        rspamd_langdet_log_id, "langdet", task->task_pool->tag.uid, \
+        __FUNCTION__, \
+        __VA_ARGS__)
+
 namespace rspamd::langdet {
 class fasttext_langdet {
 private:
@@ -167,6 +173,7 @@ bool rspamd_lang_detection_fasttext_is_enabled(void *ud)
 }
 
 rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud,
+                                                                                                                                          struct rspamd_task *task,
                                                                                                                                           GArray *utf_words,
                                                                                                                                           int k)
 {
@@ -186,6 +193,8 @@ rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud,
                }
        }
 
+       msg_debug_lang_det("fasttext: got %z word tokens from %ud words", words_vec.size(), utf_words->len);
+
        auto *res = real_model->detect_language(words_vec, k);
 
        return (rspamd_fasttext_predict_result_t)res;
index 9fb1db222d5cb79fd56ae64a6dd981be07df6ae3..4a9f45c21d9ea04cbe490ae31e63b721f7a9641e 100644 (file)
@@ -20,6 +20,7 @@
 
 G_BEGIN_DECLS
 struct rspamd_config;
+struct rspamd_task; /* for logging */
 /**
  * Initialize fasttext language detector
  * @param cfg
@@ -52,7 +53,7 @@ typedef  void * rspamd_fasttext_predict_result_t;
  * @return TRUE if language is detected
  */
 rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud,
-               GArray *utf_words, int k);
+               struct rspamd_task *task, GArray *utf_words, int k);
 
 /**
  * Get number of languages detected
index 164e0b97e2437ecb349a134861aff6acdf36865a..f6abec658910f486270a9fc80538d7993dda6061 100644 (file)
@@ -168,6 +168,9 @@ gint rspamd_logger_add_debug_module (const gchar *mod);
         rspamd_##mname##_log_id = rspamd_logger_add_debug_module(#mname); \
 }
 
+#define EXTERN_LOG_MODULE_DEF(mname) \
+       extern gint rspamd_##mname##_log_id
+
 void rspamd_logger_configure_modules (GHashTable *mods_enabled);
 
 /**