]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Minor] Store occurencies numbers for languages
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 23 Jan 2018 19:09:40 +0000 (19:09 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 23 Jan 2018 19:09:40 +0000 (19:09 +0000)
src/libmime/lang_detection.c

index 3eeb9829f5b678c11e3f5e83ae00d3937770fd76..2bdda30049a847cd64ed7eaceb999bd9fef71842 100644 (file)
@@ -40,6 +40,7 @@ struct rspamd_language_elt {
        guint unigramms_total; /* total frequencies for unigramms */
        guint bigramms_total; /* total frequencies for bigramms */
        guint trigramms_total; /* total frequencies for trigramms */
+       guint occurencies; /* total number of parts with this language */
 };
 
 struct rspamd_ngramm_elt {
@@ -54,6 +55,7 @@ struct rspamd_lang_detector {
        GHashTable *trigramms; /* trigramms frequencies */
        UConverter *uchar_converter;
        gsize short_text_limit;
+       gsize total_occurencies; /* number of all languages found */
 };
 
 #define msg_debug_lang_det(...)  rspamd_conditional_debug_fast (NULL, NULL, \
@@ -372,7 +374,7 @@ rspamd_language_detector_init (struct rspamd_config *cfg)
                goto end;
        }
 
-       ret = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (*ret));
+       ret = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*ret));
        ret->languages = g_ptr_array_sized_new (gl.gl_pathc);
        ret->uchar_converter = ucnv_open ("UTF-8", &uc_err);
        ret->short_text_limit = short_text_limit;
@@ -908,5 +910,11 @@ rspamd_language_detector_detect (struct rspamd_task *task,
        g_ptr_array_sort (result, rspamd_language_detector_cmp);
        g_hash_table_unref (candidates);
 
+       if (result->len > 0) {
+               cand = g_ptr_array_index (result, 0);
+               cand->elt->occurencies ++;
+               d->total_occurencies ++;
+       }
+
        return result;
 }
\ No newline at end of file