guint unigramms_total; /* total frequencies for unigramms */
guint bigramms_total; /* total frequencies for bigramms */
guint trigramms_total; /* total frequencies for trigramms */
+ guint occurencies; /* total number of parts with this language */
};
struct rspamd_ngramm_elt {
GHashTable *trigramms; /* trigramms frequencies */
UConverter *uchar_converter;
gsize short_text_limit;
+ gsize total_occurencies; /* number of all languages found */
};
#define msg_debug_lang_det(...) rspamd_conditional_debug_fast (NULL, NULL, \
goto end;
}
- ret = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (*ret));
+ ret = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*ret));
ret->languages = g_ptr_array_sized_new (gl.gl_pathc);
ret->uchar_converter = ucnv_open ("UTF-8", &uc_err);
ret->short_text_limit = short_text_limit;
g_ptr_array_sort (result, rspamd_language_detector_cmp);
g_hash_table_unref (candidates);
+ if (result->len > 0) {
+ cand = g_ptr_array_index (result, 0);
+ cand->elt->occurencies ++;
+ d->total_occurencies ++;
+ }
+
return result;
}
\ No newline at end of file