]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Minor] This step is not done yet, apply another approach
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 8 Nov 2018 09:40:08 +0000 (09:40 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 8 Nov 2018 09:41:57 +0000 (09:41 +0000)
Issue: #2623

src/libmime/message.c

index 0604f0ce774a930fb61a1328d60a6e2cf0e53ce3..5f9373a9adb591831bd95df8c06066cf11b753a5 100644 (file)
@@ -206,12 +206,30 @@ rspamd_mime_part_create_words (struct rspamd_task *task,
 
 #if U_ICU_VERSION_MAJOR_NUM < 50
                /* Hack to prevent hang with Thai in old libicu */
-               if (part->unicode_scripts & RSPAMD_UNICODE_THAI) {
-                       msg_info_task ("enable workaround for Thai characters for old libicu")
-                       tok_type = RSPAMD_TOKENIZE_RAW;
-               }
-               else {
-                       tok_type = RSPAMD_TOKENIZE_UTF;
+               const gchar *p = part->utf_stripped_content->data, *end;
+               guint i = 0;
+               end = p + part->utf_stripped_content->len;
+               gint32 uc, sc;
+
+               tok_type = RSPAMD_TOKENIZE_UTF;
+
+               while (p + i < end) {
+                       U8_NEXT (p, i, part->utf_stripped_content->len, uc);
+
+                       if (((gint32) uc) < 0) {
+                               tok_type = RSPAMD_TOKENIZE_RAW;
+                               break;
+                       }
+
+                       if (u_isalpha (uc)) {
+                               sc = ublock_getCode (uc);
+
+                               if (sc == UBLOCK_THAI) {
+                                       msg_info_task ("enable workaround for Thai characters for old libicu");
+                                       tok_type = RSPAMD_TOKENIZE_RAW;
+                                       break;
+                               }
+                       }
                }
 #else
                tok_type = RSPAMD_TOKENIZE_UTF;