memcpy(trans_buf + ctx->translation_size, *data, data_wanted);
orig_size = trans_size = ctx->translation_size + data_wanted;
- (void)charset_to_utf8(ctx->charset_trans, trans_buf,
- &trans_size, ctx->buf2);
-
- if (trans_size <= ctx->translation_size) {
+ enum charset_result result =
+ charset_to_utf8(ctx->charset_trans, trans_buf,
+ &trans_size, ctx->buf2);
+
+ if (trans_size > ctx->translation_size) {
+ /* more input was processed */
+ } else if (trans_size > 0) {
+ /* We get here when:
+ 1. Incomplete sequence has been sent to charset_to_utf()
+ previously, e.g. 3 bytes for a utf-32be character.
+ 2. We have 1 new byte to complete the character, which
+ is found to be invalid.
+ 3. charset_to_utf() internally skips 1 byte, hoping to find
+ valid output from the next byte. But now we again have
+ only 3 bytes left, so the input is incomplete to get
+ forward.
+
+ So we'll assert here that result is
+ CHARSET_RET_INVALID_INPUT. Any other situation is not
+ expected to happen. */
+ i_assert(result == CHARSET_RET_INVALID_INPUT);
+ trans_size = ctx->translation_size + 1;
+ } else {
/* We should get here only if we didn't get forward in our
charset translation, i.e. we need more input to continue
- translation.
-
- If trans_size > 0 but <= translation_size, it means that the
- input we already fed to charset_to_utf8() previously returned
- "more data needed" but this time it succeeded. This shouldn't
- happen, so we'll have an assert against it. */
- i_assert(trans_size == 0);
+ translation. */
+ i_assert(result == CHARSET_RET_INCOMPLETE_INPUT);
/* This assert triggers if translation_buf is too small and
we can't get forward with the translation. */
i_assert(orig_size < CHARSET_MAX_PENDING_BUF_SIZE);
test_end();
}
+static void test_message_decoder_partial_illegal_sequence(void)
+{
+ struct message_decoder_context *ctx;
+ struct message_part part;
+ struct message_header_line hdr;
+ struct message_block input, output;
+
+ test_begin("message decoder partial illegal sequence");
+
+ i_zero(&part);
+ i_zero(&input);
+ memset(&output, 0xff, sizeof(output));
+ input.part = ∂
+
+ ctx = message_decoder_init(NULL, 0);
+
+ i_zero(&hdr);
+ hdr.name = "Content-Type";
+ hdr.name_len = strlen(hdr.name);
+ hdr.full_value = (const void *)"text/plain; charset=utf-32be";
+ hdr.full_value_len = strlen((const char *)hdr.full_value);
+ input.hdr = &hdr;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 0);
+
+ input.hdr = NULL;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+
+ input.data = (const void *)"M--";
+ input.size = strlen((const char *)input.data);
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 0);
+
+ /* This completes the 32bit character, making it illegal */
+ input.data = (const void *)"X";
+ input.size = 1;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert_cmp(output.size, ==, UNICODE_REPLACEMENT_CHAR_UTF8_LEN);
+ test_assert(memcmp(output.data, UNICODE_REPLACEMENT_CHAR_UTF8,
+ output.size) == 0);
+
+ /* Valid character followed by earlier invalid input. */
+ input.data = (const void *)"\000\000\000\x61";
+ input.size = 4;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert_cmp(output.size, ==, 1);
+ test_assert(memcmp(output.data, "a", output.size) == 0);
+
+ /* Try also the code path where illegal input is followed by a valid
+ character is followed by */
+ input.data = (const void *)"M--\000\000\000\x62";
+ input.size = 3 + 4;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert_cmp(output.size, ==, UNICODE_REPLACEMENT_CHAR_UTF8_LEN + 1);
+ test_assert(memcmp(output.data, UNICODE_REPLACEMENT_CHAR_UTF8"b",
+ output.size) == 0);
+
+ message_decoder_deinit(&ctx);
+
+ test_end();
+}
+
static void test_message_decoder_multipart(void)
{
static const char test_message_input[] =
{
static void (*const test_functions[])(void) = {
test_message_decoder,
+ test_message_decoder_partial_illegal_sequence,
test_message_decoder_multipart,
test_message_decoder_current_content_type,
test_message_decoder_content_transfer_encoding,