From: Timo Sirainen Date: Wed, 18 Mar 2026 11:45:35 +0000 (+0200) Subject: lib-mail: Fix potential assert-crash when parsing illegal charset translation sequence X-Git-Tag: 2.4.3~56 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=110c19e44e95be6b6d2b09cf994ce5b502c8dd8c;p=thirdparty%2Fdovecot%2Fcore.git lib-mail: Fix potential assert-crash when parsing illegal charset translation sequence The assert was added by 7aad885a21e7b3832fa98f41613097383603929f --- diff --git a/src/lib-mail/message-decoder.c b/src/lib-mail/message-decoder.c index bec5a50f6d..db602bf315 100644 --- a/src/lib-mail/message-decoder.c +++ b/src/lib-mail/message-decoder.c @@ -224,19 +224,33 @@ static void translation_buf_decode(struct message_decoder_context *ctx, memcpy(trans_buf + ctx->translation_size, *data, data_wanted); orig_size = trans_size = ctx->translation_size + data_wanted; - (void)charset_to_utf8(ctx->charset_trans, trans_buf, - &trans_size, ctx->buf2); - - if (trans_size <= ctx->translation_size) { + enum charset_result result = + charset_to_utf8(ctx->charset_trans, trans_buf, + &trans_size, ctx->buf2); + + if (trans_size > ctx->translation_size) { + /* more input was processed */ + } else if (trans_size > 0) { + /* We get here when: + 1. Incomplete sequence has been sent to charset_to_utf() + previously, e.g. 3 bytes for a utf-32be character. + 2. We have 1 new byte to complete the character, which + is found to be invalid. + 3. charset_to_utf() internally skips 1 byte, hoping to find + valid output from the next byte. But now we again have + only 3 bytes left, so the input is incomplete to get + forward. + + So we'll assert here that result is + CHARSET_RET_INVALID_INPUT. Any other situation is not + expected to happen. */ + i_assert(result == CHARSET_RET_INVALID_INPUT); + trans_size = ctx->translation_size + 1; + } else { /* We should get here only if we didn't get forward in our charset translation, i.e. we need more input to continue - translation. - - If trans_size > 0 but <= translation_size, it means that the - input we already fed to charset_to_utf8() previously returned - "more data needed" but this time it succeeded. This shouldn't - happen, so we'll have an assert against it. */ - i_assert(trans_size == 0); + translation. */ + i_assert(result == CHARSET_RET_INCOMPLETE_INPUT); /* This assert triggers if translation_buf is too small and we can't get forward with the translation. */ i_assert(orig_size < CHARSET_MAX_PENDING_BUF_SIZE); diff --git a/src/lib-mail/test-message-decoder.c b/src/lib-mail/test-message-decoder.c index b362b5493e..fcf2a4cfae 100644 --- a/src/lib-mail/test-message-decoder.c +++ b/src/lib-mail/test-message-decoder.c @@ -77,6 +77,68 @@ static void test_message_decoder(void) test_end(); } +static void test_message_decoder_partial_illegal_sequence(void) +{ + struct message_decoder_context *ctx; + struct message_part part; + struct message_header_line hdr; + struct message_block input, output; + + test_begin("message decoder partial illegal sequence"); + + i_zero(&part); + i_zero(&input); + memset(&output, 0xff, sizeof(output)); + input.part = ∂ + + ctx = message_decoder_init(NULL, 0); + + i_zero(&hdr); + hdr.name = "Content-Type"; + hdr.name_len = strlen(hdr.name); + hdr.full_value = (const void *)"text/plain; charset=utf-32be"; + hdr.full_value_len = strlen((const char *)hdr.full_value); + input.hdr = &hdr; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert(output.size == 0); + + input.hdr = NULL; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + + input.data = (const void *)"M--"; + input.size = strlen((const char *)input.data); + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert(output.size == 0); + + /* This completes the 32bit character, making it illegal */ + input.data = (const void *)"X"; + input.size = 1; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert_cmp(output.size, ==, UNICODE_REPLACEMENT_CHAR_UTF8_LEN); + test_assert(memcmp(output.data, UNICODE_REPLACEMENT_CHAR_UTF8, + output.size) == 0); + + /* Valid character followed by earlier invalid input. */ + input.data = (const void *)"\000\000\000\x61"; + input.size = 4; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert_cmp(output.size, ==, 1); + test_assert(memcmp(output.data, "a", output.size) == 0); + + /* Try also the code path where illegal input is followed by a valid + character is followed by */ + input.data = (const void *)"M--\000\000\000\x62"; + input.size = 3 + 4; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert_cmp(output.size, ==, UNICODE_REPLACEMENT_CHAR_UTF8_LEN + 1); + test_assert(memcmp(output.data, UNICODE_REPLACEMENT_CHAR_UTF8"b", + output.size) == 0); + + message_decoder_deinit(&ctx); + + test_end(); +} + static void test_message_decoder_multipart(void) { static const char test_message_input[] = @@ -546,6 +608,7 @@ int main(void) { static void (*const test_functions[])(void) = { test_message_decoder, + test_message_decoder_partial_illegal_sequence, test_message_decoder_multipart, test_message_decoder_current_content_type, test_message_decoder_content_transfer_encoding,