From ad33efe0d2a35eee8a3a47a6fdffcae6d6b8e185 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 8 Nov 2018 09:40:08 +0000 Subject: [PATCH] [Minor] This step is not done yet, apply another approach Issue: #2623 --- src/libmime/message.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/libmime/message.c b/src/libmime/message.c index 0604f0ce77..5f9373a9ad 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -206,12 +206,30 @@ rspamd_mime_part_create_words (struct rspamd_task *task, #if U_ICU_VERSION_MAJOR_NUM < 50 /* Hack to prevent hang with Thai in old libicu */ - if (part->unicode_scripts & RSPAMD_UNICODE_THAI) { - msg_info_task ("enable workaround for Thai characters for old libicu") - tok_type = RSPAMD_TOKENIZE_RAW; - } - else { - tok_type = RSPAMD_TOKENIZE_UTF; + const gchar *p = part->utf_stripped_content->data, *end; + guint i = 0; + end = p + part->utf_stripped_content->len; + gint32 uc, sc; + + tok_type = RSPAMD_TOKENIZE_UTF; + + while (p + i < end) { + U8_NEXT (p, i, part->utf_stripped_content->len, uc); + + if (((gint32) uc) < 0) { + tok_type = RSPAMD_TOKENIZE_RAW; + break; + } + + if (u_isalpha (uc)) { + sc = ublock_getCode (uc); + + if (sc == UBLOCK_THAI) { + msg_info_task ("enable workaround for Thai characters for old libicu"); + tok_type = RSPAMD_TOKENIZE_RAW; + break; + } + } } #else tok_type = RSPAMD_TOKENIZE_UTF; -- 2.47.3