#include <glob.h>
#include <unicode/utf8.h>
#include <unicode/ucnv.h>
+#include <unicode/uchar.h>
#include <math.h>
static const gsize default_short_text_limit = 200;
return memcmp (v, v2, 3 * sizeof (UChar)) == 0;
}
+static void
+rspamd_language_detector_ucs_lowercase (UChar *s, gsize len)
+{
+ gsize i;
+
+ for (i = 0; i < len; i ++) {
+ s[i] = u_tolower (s[i]);
+ }
+}
+
static void
rspamd_language_detector_read_file (struct rspamd_config *cfg,
struct rspamd_lang_detector *d,
continue;
}
+ rspamd_language_detector_ucs_lowercase (ucs_key, nsym);
+
if (nsym == 2) {
/* We have a digraph */
g_hash_table_insert (nelt->bigramms, ucs_key,
utf_token->begin, utf_token->len, &uc_err);
if (nsym >= 0) {
+ rspamd_language_detector_ucs_lowercase (out, nsym);
ucs_token->begin = (const gchar *) out;
ucs_token->len = nsym;
}