From: Timo Sirainen <timo.sirainen@open-xchange.com>
Date: Wed, 18 Mar 2026 11:45:35 +0000 (+0200)
Subject: lib-mail: Fix potential assert-crash when parsing illegal charset translation sequence
X-Git-Tag: 2.4.3~56
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=110c19e44e95be6b6d2b09cf994ce5b502c8dd8c;p=thirdparty%2Fdovecot%2Fcore.git

lib-mail: Fix potential assert-crash when parsing illegal charset translation sequence

The assert was added by 7aad885a21e7b3832fa98f41613097383603929f
---

diff --git a/src/lib-mail/message-decoder.c b/src/lib-mail/message-decoder.c
index bec5a50f6d..db602bf315 100644
--- a/src/lib-mail/message-decoder.c
+++ b/src/lib-mail/message-decoder.c
@@ -224,19 +224,33 @@ static void translation_buf_decode(struct message_decoder_context *ctx,
 	memcpy(trans_buf + ctx->translation_size, *data, data_wanted);
 
 	orig_size = trans_size = ctx->translation_size + data_wanted;
-	(void)charset_to_utf8(ctx->charset_trans, trans_buf,
-			      &trans_size, ctx->buf2);
-
-	if (trans_size <= ctx->translation_size) {
+	enum charset_result result =
+		charset_to_utf8(ctx->charset_trans, trans_buf,
+				&trans_size, ctx->buf2);
+
+	if (trans_size > ctx->translation_size) {
+		/* more input was processed */
+	} else if (trans_size > 0) {
+		/* We get here when:
+		   1. Incomplete sequence has been sent to charset_to_utf()
+		      previously, e.g. 3 bytes for a utf-32be character.
+		   2. We have 1 new byte to complete the character, which
+		      is found to be invalid.
+		   3. charset_to_utf() internally skips 1 byte, hoping to find
+		      valid output from the next byte. But now we again have
+		      only 3 bytes left, so the input is incomplete to get
+		      forward.
+
+		   So we'll assert here that result is
+		   CHARSET_RET_INVALID_INPUT. Any other situation is not
+		   expected to happen. */
+		i_assert(result == CHARSET_RET_INVALID_INPUT);
+		trans_size = ctx->translation_size + 1;
+	} else {
 		/* We should get here only if we didn't get forward in our
 		   charset translation, i.e. we need more input to continue
-		   translation.
-
-		   If trans_size > 0 but <= translation_size, it means that the
-		   input we already fed to charset_to_utf8() previously returned
-		   "more data needed" but this time it succeeded. This shouldn't
-		   happen, so we'll have an assert against it. */
-		i_assert(trans_size == 0);
+		   translation. */
+		i_assert(result == CHARSET_RET_INCOMPLETE_INPUT);
 		/* This assert triggers if translation_buf is too small and
 		   we can't get forward with the translation. */
 		i_assert(orig_size < CHARSET_MAX_PENDING_BUF_SIZE);
diff --git a/src/lib-mail/test-message-decoder.c b/src/lib-mail/test-message-decoder.c
index b362b5493e..fcf2a4cfae 100644
--- a/src/lib-mail/test-message-decoder.c
+++ b/src/lib-mail/test-message-decoder.c
@@ -77,6 +77,68 @@ static void test_message_decoder(void)
 	test_end();
 }
 
+static void test_message_decoder_partial_illegal_sequence(void)
+{
+	struct message_decoder_context *ctx;
+	struct message_part part;
+	struct message_header_line hdr;
+	struct message_block input, output;
+
+	test_begin("message decoder partial illegal sequence");
+
+	i_zero(&part);
+	i_zero(&input);
+	memset(&output, 0xff, sizeof(output));
+	input.part = &part;
+
+	ctx = message_decoder_init(NULL, 0);
+
+	i_zero(&hdr);
+	hdr.name = "Content-Type";
+	hdr.name_len = strlen(hdr.name);
+	hdr.full_value = (const void *)"text/plain; charset=utf-32be";
+	hdr.full_value_len = strlen((const char *)hdr.full_value);
+	input.hdr = &hdr;
+	test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+	test_assert(output.size == 0);
+
+	input.hdr = NULL;
+	test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+
+	input.data = (const void *)"M--";
+	input.size = strlen((const char *)input.data);
+	test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+	test_assert(output.size == 0);
+
+	/* This completes the 32bit character, making it illegal */
+	input.data = (const void *)"X";
+	input.size = 1;
+	test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+	test_assert_cmp(output.size, ==, UNICODE_REPLACEMENT_CHAR_UTF8_LEN);
+	test_assert(memcmp(output.data, UNICODE_REPLACEMENT_CHAR_UTF8,
+			   output.size) == 0);
+
+	/* Valid character followed by earlier invalid input. */
+	input.data = (const void *)"\000\000\000\x61";
+	input.size = 4;
+	test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+	test_assert_cmp(output.size, ==, 1);
+	test_assert(memcmp(output.data, "a", output.size) == 0);
+
+	/* Try also the code path where illegal input is followed by a valid
+	   character is followed by */
+	input.data = (const void *)"M--\000\000\000\x62";
+	input.size = 3 + 4;
+	test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+	test_assert_cmp(output.size, ==, UNICODE_REPLACEMENT_CHAR_UTF8_LEN + 1);
+	test_assert(memcmp(output.data, UNICODE_REPLACEMENT_CHAR_UTF8"b",
+			   output.size) == 0);
+
+	message_decoder_deinit(&ctx);
+
+	test_end();
+}
+
 static void test_message_decoder_multipart(void)
 {
 	static const char test_message_input[] =
@@ -546,6 +608,7 @@ int main(void)
 {
 	static void (*const test_functions[])(void) = {
 		test_message_decoder,
+		test_message_decoder_partial_illegal_sequence,
 		test_message_decoder_multipart,
 		test_message_decoder_current_content_type,
 		test_message_decoder_content_transfer_encoding,