]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Fix] Skip ICU conversion for x-binaryenc charset in all detection paths
authorAlexander Moisseev <moiseev@mezonplus.ru>
Mon, 13 Apr 2026 15:39:24 +0000 (18:39 +0300)
committerAlexander Moisseev <moiseev@mezonplus.ru>
Mon, 13 Apr 2026 16:43:00 +0000 (19:43 +0300)
x-binaryenc is a synthetic name returned by CED (Google Compact
Encoding Detection) to signal binary content, not a real text
encoding.  ICU has no converter for it and always fails with
U_FILE_ACCESS_ERROR, producing a misleading warning in the logs.

Add an early exit in rspamd_mime_text_part_maybe_convert() for all
three detection paths: announced-charset missing, announced-charset
unknown, and rspamd_mime_charset_utf_check content-heuristic. When
the detected charset is x-binaryenc, mark the part as raw binary
immediately, consistent with what the existing fallback already does.

Extract set_part_binary() helper and RSPAMD_BINARYENC_CHARSET constant
to eliminate the resulting code duplication.

Fixes the spurious "cannot open converter for x-binaryenc" warning
seen when processing messages with binary MIME parts.

src/libmime/mime_encoding.c

index 995706d10c0f9018fa9ffaa77813970e160e8a07..b24d9c036e5f15b05971dc984fd9a0c46d383e19 100644 (file)
@@ -31,6 +31,7 @@
 #include <math.h>
 
 #define UTF8_CHARSET "UTF-8"
+#define RSPAMD_BINARYENC_CHARSET "x-binaryenc"
 
 #define RSPAMD_CHARSET_FLAG_UTF (1 << 0)
 #define RSPAMD_CHARSET_FLAG_ASCII (1 << 1)
@@ -725,6 +726,18 @@ rspamd_mime_charset_utf_check(rspamd_ftok_t *charset,
        return FALSE;
 }
 
+static void
+set_part_binary(struct rspamd_task *task,
+                               struct rspamd_mime_text_part *text_part,
+                               GByteArray *part_content,
+                               const char *charset)
+{
+       msg_debug_task("text part contains binary data (detected charset: %s), skip conversion",
+                                  charset);
+       SET_PART_RAW(text_part);
+       text_part->utf_raw_content = part_content;
+}
+
 void rspamd_mime_text_part_maybe_convert(struct rspamd_task *task,
                                                                                 struct rspamd_mime_text_part *text_part)
 {
@@ -771,6 +784,10 @@ void rspamd_mime_text_part_maybe_convert(struct rspamd_task *task,
                                                                                                                                          text_part->parsed.len);
 
                        if (charset != NULL) {
+                               if (g_ascii_strcasecmp(charset, RSPAMD_BINARYENC_CHARSET) == 0) {
+                                       set_part_binary(task, text_part, part_content, charset);
+                                       return;
+                               }
                                msg_info_task("detected charset %s", charset);
                        }
 
@@ -794,6 +811,10 @@ void rspamd_mime_text_part_maybe_convert(struct rspamd_task *task,
                        if (need_charset_heuristic) {
                                charset = rspamd_mime_charset_find_by_content_maybe_split(part_content->data,
                                                                                                                                                  part_content->len);
+                               if (charset != NULL && g_ascii_strcasecmp(charset, RSPAMD_BINARYENC_CHARSET) == 0) {
+                                       set_part_binary(task, text_part, part_content, charset);
+                                       return;
+                               }
                                msg_info_task("detected charset: %s", charset);
                                checked = TRUE;
                                text_part->real_charset = charset;
@@ -840,6 +861,11 @@ void rspamd_mime_text_part_maybe_convert(struct rspamd_task *task,
                else {
                        charset = charset_tok.begin;
 
+                       if (g_ascii_strcasecmp(charset, RSPAMD_BINARYENC_CHARSET) == 0) {
+                               set_part_binary(task, text_part, part_content, charset);
+                               return;
+                       }
+
                        if (!rspamd_mime_text_part_utf8_convert(task, text_part,
                                                                                                        part_content, charset, &err)) {
                                msg_warn_task("<%s>: cannot convert from %s to utf8: %s",