From: Vsevolod Stakhov Date: Sun, 14 Jan 2018 23:05:08 +0000 (+0000) Subject: [Minor] Lowercase ucs data X-Git-Tag: 1.7.0~278 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=f1a9346bf6e537e8819ff3d27cbdbfe1a2f906f8;p=thirdparty%2Frspamd.git [Minor] Lowercase ucs data --- diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c index f73bc7aeaa..374e38f833 100644 --- a/src/libmime/lang_detection.c +++ b/src/libmime/lang_detection.c @@ -21,6 +21,7 @@ #include #include #include +#include #include static const gsize default_short_text_limit = 200; @@ -80,6 +81,16 @@ rspamd_trigram_equal (gconstpointer v, gconstpointer v2) return memcmp (v, v2, 3 * sizeof (UChar)) == 0; } +static void +rspamd_language_detector_ucs_lowercase (UChar *s, gsize len) +{ + gsize i; + + for (i = 0; i < len; i ++) { + s[i] = u_tolower (s[i]); + } +} + static void rspamd_language_detector_read_file (struct rspamd_config *cfg, struct rspamd_lang_detector *d, @@ -149,6 +160,8 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg, continue; } + rspamd_language_detector_ucs_lowercase (ucs_key, nsym); + if (nsym == 2) { /* We have a digraph */ g_hash_table_insert (nelt->bigramms, ucs_key, @@ -270,6 +283,7 @@ rspamd_language_detector_to_ucs (struct rspamd_lang_detector *d, utf_token->begin, utf_token->len, &uc_err); if (nsym >= 0) { + rspamd_language_detector_ucs_lowercase (out, nsym); ucs_token->begin = (const gchar *) out; ucs_token->len = nsym; }