*/
#include "lang_detection.h"
+#include "lang_detection_fasttext.h"
#include "libserver/logger.h"
#include "libcryptobox/cryptobox.h"
#include "libutil/multipattern.h"
UConverter *uchar_converter;
gsize short_text_limit;
gsize total_occurrences; /* number of all languages found */
+ gpointer fasttext_detector;
ref_entry_t ref;
};
}
kh_destroy (rspamd_stopwords_hash, d->stop_words_norm);
+ rspamd_lang_detection_fasttext_destroy(d->fasttext_detector);
}
}
total += kh_size (ret->trigrams[i]);
}
+ ret->fasttext_detector = rspamd_lang_detection_fasttext_init(cfg);
+ char *fasttext_status = rspamd_lang_detection_fasttext_show_info(ret->fasttext_detector);
+
msg_info_config ("loaded %d languages, "
- "%d trigrams",
+ "%d trigrams; %s",
(gint)ret->languages->len,
- (gint)total);
+ (gint)total, fasttext_status);
+ g_free (fasttext_status);
if (stop_words) {
ucl_object_unref (stop_words);
class fasttext_langdet {
private:
fasttext::FastText ft;
+ std::string model_fname;
bool loaded;
struct one_shot_buf : public std::streambuf {
try {
ft.loadModel(ucl_object_tostring(model));
loaded = true;
+ model_fname = std::string{ucl_object_tostring(model)};
}
catch (std::exception &e) {
auto err_message = fmt::format("cannot load fasttext model: {}", e.what());
return nullptr;
}
+
+ auto model_info(void) const -> std::string {
+ if (!loaded) {
+ return "fasttext model is not loaded";
+ }
+ else {
+ return fmt::format("fasttext model {}: {} languages, {} tokens", model_fname,
+ ft.getDictionary()->nlabels(), ft.getDictionary()->ntokens());
+ }
+ }
};
}
#endif
#endif
}
+char *rspamd_lang_detection_fasttext_show_info(void *ud)
+{
+#ifndef WITH_FASTTEXT
+ return g_strdup("fasttext is not compiled in");
+#else
+ auto model_info = FASTTEXT_MODEL_TO_C_API(ud)->model_info();
+
+ return g_strdup(model_info.c_str());
+#endif
+}
+
rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud,
const char *in, size_t len, int k)
{
*/
void* rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg);
+/**
+ * Show info about fasttext language detector
+ * @param ud
+ * @return
+ */
+char *rspamd_lang_detection_fasttext_show_info(void *ud);
+
typedef void * rspamd_fasttext_predict_result_t;
/**