lib-mail: Fix potential assert-crash when parsing illegal charset translation sequence

author Timo Sirainen <timo.sirainen@open-xchange.com>

Wed, 18 Mar 2026 11:45:35 +0000 (13:45 +0200)

committer aki.tuomi <aki.tuomi@open-xchange.com>

Fri, 20 Mar 2026 05:44:33 +0000 (05:44 +0000)
author Timo Sirainen <timo.sirainen@open-xchange.com>
Wed, 18 Mar 2026 11:45:35 +0000 (13:45 +0200)
committer aki.tuomi <aki.tuomi@open-xchange.com>
Fri, 20 Mar 2026 05:44:33 +0000 (05:44 +0000)
diff --git a/src/lib-mail/message-decoder.c b/src/lib-mail/message-decoder.c

index bec5a50f6d0109d72d1d264ea3cace7201cb10f4..db602bf315d7c99b59a0e9110820df10efe16f19 100644 (file)
--- a/src/lib-mail/message-decoder.c
+++ b/src/lib-mail/message-decoder.c
@@ -224,19 +224,33 @@ static void translation_buf_decode(struct message_decoder_context *ctx,
         memcpy(trans_buf + ctx->translation_size, *data, data_wanted);
  
         orig_size = trans_size = ctx->translation_size + data_wanted;
-       (void)charset_to_utf8(ctx->charset_trans, trans_buf,
-                             &trans_size, ctx->buf2);
-
-       if (trans_size <= ctx->translation_size) {
+       enum charset_result result =
+               charset_to_utf8(ctx->charset_trans, trans_buf,
+                               &trans_size, ctx->buf2);
+
+       if (trans_size > ctx->translation_size) {
+               /* more input was processed */
+       } else if (trans_size > 0) {
+               /* We get here when:
+                  1. Incomplete sequence has been sent to charset_to_utf()
+                     previously, e.g. 3 bytes for a utf-32be character.
+                  2. We have 1 new byte to complete the character, which
+                     is found to be invalid.
+                  3. charset_to_utf() internally skips 1 byte, hoping to find
+                     valid output from the next byte. But now we again have
+                     only 3 bytes left, so the input is incomplete to get
+                     forward.
+
+                  So we'll assert here that result is
+                  CHARSET_RET_INVALID_INPUT. Any other situation is not
+                  expected to happen. */
+               i_assert(result == CHARSET_RET_INVALID_INPUT);
+               trans_size = ctx->translation_size + 1;
+       } else {
                 /* We should get here only if we didn't get forward in our
                    charset translation, i.e. we need more input to continue
-                  translation.
-
-                  If trans_size > 0 but <= translation_size, it means that the
-                  input we already fed to charset_to_utf8() previously returned
-                  "more data needed" but this time it succeeded. This shouldn't
-                  happen, so we'll have an assert against it. */
-               i_assert(trans_size == 0);
+                  translation. */
+               i_assert(result == CHARSET_RET_INCOMPLETE_INPUT);
                 /* This assert triggers if translation_buf is too small and
                    we can't get forward with the translation. */
                 i_assert(orig_size < CHARSET_MAX_PENDING_BUF_SIZE);
diff --git a/src/lib-mail/test-message-decoder.c b/src/lib-mail/test-message-decoder.c

index b362b5493e38f99df133e8e6b17b490e46e2e0d9..fcf2a4cfae19731177558f0ad13e9552d963d9ee 100644 (file)
--- a/src/lib-mail/test-message-decoder.c
+++ b/src/lib-mail/test-message-decoder.c
@@ -77,6 +77,68 @@ static void test_message_decoder(void)
         test_end();
  }
  
+static void test_message_decoder_partial_illegal_sequence(void)
+{
+       struct message_decoder_context *ctx;
+       struct message_part part;
+       struct message_header_line hdr;
+       struct message_block input, output;
+
+       test_begin("message decoder partial illegal sequence");
+
+       i_zero(&part);
+       i_zero(&input);
+       memset(&output, 0xff, sizeof(output));
+       input.part = &part;
+
+       ctx = message_decoder_init(NULL, 0);
+
+       i_zero(&hdr);
+       hdr.name = "Content-Type";
+       hdr.name_len = strlen(hdr.name);
+       hdr.full_value = (const void *)"text/plain; charset=utf-32be";
+       hdr.full_value_len = strlen((const char *)hdr.full_value);
+       input.hdr = &hdr;
+       test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+       test_assert(output.size == 0);
+
+       input.hdr = NULL;
+       test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+
+       input.data = (const void *)"M--";
+       input.size = strlen((const char *)input.data);
+       test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+       test_assert(output.size == 0);
+
+       /* This completes the 32bit character, making it illegal */
+       input.data = (const void *)"X";
+       input.size = 1;
+       test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+       test_assert_cmp(output.size, ==, UNICODE_REPLACEMENT_CHAR_UTF8_LEN);
+       test_assert(memcmp(output.data, UNICODE_REPLACEMENT_CHAR_UTF8,
+                          output.size) == 0);
+
+       /* Valid character followed by earlier invalid input. */
+       input.data = (const void *)"\000\000\000\x61";
+       input.size = 4;
+       test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+       test_assert_cmp(output.size, ==, 1);
+       test_assert(memcmp(output.data, "a", output.size) == 0);
+
+       /* Try also the code path where illegal input is followed by a valid
+          character is followed by */
+       input.data = (const void *)"M--\000\000\000\x62";
+       input.size = 3 + 4;
+       test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+       test_assert_cmp(output.size, ==, UNICODE_REPLACEMENT_CHAR_UTF8_LEN + 1);
+       test_assert(memcmp(output.data, UNICODE_REPLACEMENT_CHAR_UTF8"b",
+                          output.size) == 0);
+
+       message_decoder_deinit(&ctx);
+
+       test_end();
+}
+
  static void test_message_decoder_multipart(void)
  {
         static const char test_message_input[] =
@@ -546,6 +608,7 @@ int main(void)
  {
         static void (*const test_functions[])(void) = {
                 test_message_decoder,
+               test_message_decoder_partial_illegal_sequence,
                 test_message_decoder_multipart,
                 test_message_decoder_current_content_type,
                 test_message_decoder_content_transfer_encoding,
author	Timo Sirainen <timo.sirainen@open-xchange.com>
	Wed, 18 Mar 2026 11:45:35 +0000 (13:45 +0200)
committer	aki.tuomi <aki.tuomi@open-xchange.com>
	Fri, 20 Mar 2026 05:44:33 +0000 (05:44 +0000)
src/lib-mail/message-decoder.c		patch \| blob \| blame \| history
src/lib-mail/test-message-decoder.c		patch \| blob \| blame \| history