#if U_ICU_VERSION_MAJOR_NUM < 50
/* Hack to prevent hang with Thai in old libicu */
- if (part->unicode_scripts & RSPAMD_UNICODE_THAI) {
- msg_info_task ("enable workaround for Thai characters for old libicu")
- tok_type = RSPAMD_TOKENIZE_RAW;
- }
- else {
- tok_type = RSPAMD_TOKENIZE_UTF;
+ const gchar *p = part->utf_stripped_content->data, *end;
+ guint i = 0;
+ end = p + part->utf_stripped_content->len;
+ gint32 uc, sc;
+
+ tok_type = RSPAMD_TOKENIZE_UTF;
+
+ while (p + i < end) {
+ U8_NEXT (p, i, part->utf_stripped_content->len, uc);
+
+ if (((gint32) uc) < 0) {
+ tok_type = RSPAMD_TOKENIZE_RAW;
+ break;
+ }
+
+ if (u_isalpha (uc)) {
+ sc = ublock_getCode (uc);
+
+ if (sc == UBLOCK_THAI) {
+ msg_info_task ("enable workaround for Thai characters for old libicu");
+ tok_type = RSPAMD_TOKENIZE_RAW;
+ break;
+ }
+ }
}
#else
tok_type = RSPAMD_TOKENIZE_UTF;