From: Vsevolod Stakhov Date: Thu, 8 Nov 2018 09:40:08 +0000 (+0000) Subject: [Minor] This step is not done yet, apply another approach X-Git-Tag: 1.8.2~66 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ad33efe0d2a35eee8a3a47a6fdffcae6d6b8e185;p=thirdparty%2Frspamd.git [Minor] This step is not done yet, apply another approach Issue: #2623 --- diff --git a/src/libmime/message.c b/src/libmime/message.c index 0604f0ce77..5f9373a9ad 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -206,12 +206,30 @@ rspamd_mime_part_create_words (struct rspamd_task *task, #if U_ICU_VERSION_MAJOR_NUM < 50 /* Hack to prevent hang with Thai in old libicu */ - if (part->unicode_scripts & RSPAMD_UNICODE_THAI) { - msg_info_task ("enable workaround for Thai characters for old libicu") - tok_type = RSPAMD_TOKENIZE_RAW; - } - else { - tok_type = RSPAMD_TOKENIZE_UTF; + const gchar *p = part->utf_stripped_content->data, *end; + guint i = 0; + end = p + part->utf_stripped_content->len; + gint32 uc, sc; + + tok_type = RSPAMD_TOKENIZE_UTF; + + while (p + i < end) { + U8_NEXT (p, i, part->utf_stripped_content->len, uc); + + if (((gint32) uc) < 0) { + tok_type = RSPAMD_TOKENIZE_RAW; + break; + } + + if (u_isalpha (uc)) { + sc = ublock_getCode (uc); + + if (sc == UBLOCK_THAI) { + msg_info_task ("enable workaround for Thai characters for old libicu"); + tok_type = RSPAMD_TOKENIZE_RAW; + break; + } + } } #else tok_type = RSPAMD_TOKENIZE_UTF;