From f1a9346bf6e537e8819ff3d27cbdbfe1a2f906f8 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sun, 14 Jan 2018 23:05:08 +0000 Subject: [PATCH] [Minor] Lowercase ucs data --- src/libmime/lang_detection.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c index f73bc7aeaa..374e38f833 100644 --- a/src/libmime/lang_detection.c +++ b/src/libmime/lang_detection.c @@ -21,6 +21,7 @@ #include #include #include +#include #include static const gsize default_short_text_limit = 200; @@ -80,6 +81,16 @@ rspamd_trigram_equal (gconstpointer v, gconstpointer v2) return memcmp (v, v2, 3 * sizeof (UChar)) == 0; } +static void +rspamd_language_detector_ucs_lowercase (UChar *s, gsize len) +{ + gsize i; + + for (i = 0; i < len; i ++) { + s[i] = u_tolower (s[i]); + } +} + static void rspamd_language_detector_read_file (struct rspamd_config *cfg, struct rspamd_lang_detector *d, @@ -149,6 +160,8 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg, continue; } + rspamd_language_detector_ucs_lowercase (ucs_key, nsym); + if (nsym == 2) { /* We have a digraph */ g_hash_table_insert (nelt->bigramms, ucs_key, @@ -270,6 +283,7 @@ rspamd_language_detector_to_ucs (struct rspamd_lang_detector *d, utf_token->begin, utf_token->len, &uc_err); if (nsym >= 0) { + rspamd_language_detector_ucs_lowercase (out, nsym); ucs_token->begin = (const gchar *) out; ucs_token->len = nsym; } -- 2.47.3