From: Alexander Moisseev Date: Mon, 13 Apr 2026 15:39:24 +0000 (+0300) Subject: [Fix] Skip ICU conversion for x-binaryenc charset in all detection paths X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0ef6ab9b33273a698fa836cddcbe0f22c80c9ae6;p=thirdparty%2Frspamd.git [Fix] Skip ICU conversion for x-binaryenc charset in all detection paths x-binaryenc is a synthetic name returned by CED (Google Compact Encoding Detection) to signal binary content, not a real text encoding. ICU has no converter for it and always fails with U_FILE_ACCESS_ERROR, producing a misleading warning in the logs. Add an early exit in rspamd_mime_text_part_maybe_convert() for all three detection paths: announced-charset missing, announced-charset unknown, and rspamd_mime_charset_utf_check content-heuristic. When the detected charset is x-binaryenc, mark the part as raw binary immediately, consistent with what the existing fallback already does. Extract set_part_binary() helper and RSPAMD_BINARYENC_CHARSET constant to eliminate the resulting code duplication. Fixes the spurious "cannot open converter for x-binaryenc" warning seen when processing messages with binary MIME parts. --- diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c index 995706d10c..b24d9c036e 100644 --- a/src/libmime/mime_encoding.c +++ b/src/libmime/mime_encoding.c @@ -31,6 +31,7 @@ #include #define UTF8_CHARSET "UTF-8" +#define RSPAMD_BINARYENC_CHARSET "x-binaryenc" #define RSPAMD_CHARSET_FLAG_UTF (1 << 0) #define RSPAMD_CHARSET_FLAG_ASCII (1 << 1) @@ -725,6 +726,18 @@ rspamd_mime_charset_utf_check(rspamd_ftok_t *charset, return FALSE; } +static void +set_part_binary(struct rspamd_task *task, + struct rspamd_mime_text_part *text_part, + GByteArray *part_content, + const char *charset) +{ + msg_debug_task("text part contains binary data (detected charset: %s), skip conversion", + charset); + SET_PART_RAW(text_part); + text_part->utf_raw_content = part_content; +} + void rspamd_mime_text_part_maybe_convert(struct rspamd_task *task, struct rspamd_mime_text_part *text_part) { @@ -771,6 +784,10 @@ void rspamd_mime_text_part_maybe_convert(struct rspamd_task *task, text_part->parsed.len); if (charset != NULL) { + if (g_ascii_strcasecmp(charset, RSPAMD_BINARYENC_CHARSET) == 0) { + set_part_binary(task, text_part, part_content, charset); + return; + } msg_info_task("detected charset %s", charset); } @@ -794,6 +811,10 @@ void rspamd_mime_text_part_maybe_convert(struct rspamd_task *task, if (need_charset_heuristic) { charset = rspamd_mime_charset_find_by_content_maybe_split(part_content->data, part_content->len); + if (charset != NULL && g_ascii_strcasecmp(charset, RSPAMD_BINARYENC_CHARSET) == 0) { + set_part_binary(task, text_part, part_content, charset); + return; + } msg_info_task("detected charset: %s", charset); checked = TRUE; text_part->real_charset = charset; @@ -840,6 +861,11 @@ void rspamd_mime_text_part_maybe_convert(struct rspamd_task *task, else { charset = charset_tok.begin; + if (g_ascii_strcasecmp(charset, RSPAMD_BINARYENC_CHARSET) == 0) { + set_part_binary(task, text_part, part_content, charset); + return; + } + if (!rspamd_mime_text_part_utf8_convert(task, text_part, part_content, charset, &err)) { msg_warn_task("<%s>: cannot convert from %s to utf8: %s",