/*
* Top languages
*/
+static const gchar *tier0_langs[] = {
+ "en",
+};
static const gchar *tier1_langs[] = {
- "en", "fr", "it", "de", "es", "nl", "zh-CN", "zh-TW", "ja",
+ "fr", "it", "de", "es", "nl", "zh-CN", "zh-TW", "ja",
"ko", "pt", "ru", "pl", "tk", "th", "ar"
};
RS_LANGUAGE_UNISCRIPT = (1 << 1),
RS_LANGUAGE_UNIGRAMM = (1 << 2),
RS_LANGUAGE_TIER1 = (1 << 3),
- RS_LANGUAGE_TIER2 = (1 << 4),
+ RS_LANGUAGE_TIER0 = (1 << 4),
};
struct rspamd_language_elt {
nelt->flags |= RS_LANGUAGE_TIER1;
}
+ if (rspamd_language_search_str (nelt->name, tier0_langs,
+ G_N_ELEMENTS (tier0_langs))) {
+ nelt->flags |= RS_LANGUAGE_TIER0;
+ }
+
it = NULL;
ngramms = g_ptr_array_sized_new (freqs->len);
gdouble mean;
};
+static const gdouble tier0_adjustment = 1.2;
static const gdouble tier1_adjustment = 0.8;
static const gdouble frequency_adjustment = 0.8;
probb_adjusted += cbd->std * tier1_adjustment;
}
+ if (canda->elt->flags & RS_LANGUAGE_TIER0) {
+ proba_adjusted += cbd->std * tier0_adjustment;
+ }
+
+ if (candb->elt->flags & RS_LANGUAGE_TIER0) {
+ probb_adjusted += cbd->std * tier0_adjustment;
+ }
+
if (proba_adjusted > probb_adjusted) {
return -1;
} else if (probb_adjusted > proba_adjusted) {