From: Stephan Bosch <stephan.bosch@open-xchange.com>
Date: Wed, 16 Sep 2020 01:21:30 +0000 (+0200)
Subject: lib-mail: Properly handle bad UTF-8 in message_header_encode_b().
X-Git-Tag: 2.3.13~109
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=617b1beaa3de6cac7b404f329c1a5814866b0907;p=thirdparty%2Fdovecot%2Fcore.git

lib-mail: Properly handle bad UTF-8 in message_header_encode_b().

Broken characters are substituted with a replacement character.
---

diff --git a/src/lib-mail/message-header-encode.c b/src/lib-mail/message-header-encode.c
index d124b1f302..a9410a86fa 100644
--- a/src/lib-mail/message-header-encode.c
+++ b/src/lib-mail/message-header-encode.c
@@ -176,48 +176,124 @@ void message_header_encode_q(const unsigned char *input, size_t len,
 void message_header_encode_b(const unsigned char *input, size_t len,
 			     string_t *output, size_t first_line_len)
 {
-	size_t line_len, line_len_left, max;
+	static const unsigned char *rep_char =
+		(const unsigned char *)UNICODE_REPLACEMENT_CHAR_UTF8;
+	static const unsigned int rep_char_len =
+		UNICODE_REPLACEMENT_CHAR_UTF8_LEN;
+	struct base64_encoder b64enc;
+	size_t line_len_left;
 
 	if (len == 0)
 		return;
 
-	line_len = first_line_len;
-	if (line_len >= MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN) {
+	line_len_left = MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN;
+
+	if (first_line_len >= MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN - 3) {
 		str_append(output, "\n\t");
-		line_len = 1;
+		line_len_left--;
+	} else {
+		line_len_left -= first_line_len;
 	}
 
+	str_append(output, "=?utf-8?b?");
+	base64_encode_init(&b64enc, &base64_scheme, 0, 0);
 	for (;;) {
-		line_len_left = MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN - line_len;
-		max = MAX_BASE64_DECODED_SIZE(line_len_left);
-		do {
-			max--;
-			if (max > len)
-				max = len;
-			else {
-				/* all of it doesn't fit. find a character where we
-				   can split it from. */
-				while (max > 0 && (input[max] & 0xc0) == 0x80)
-					max--;
-			}
-		} while (MAX_BASE64_ENCODED_SIZE(max) > line_len_left &&
-			 max > 0);
+		unichar_t ch;
+		size_t space, max, old_bufsize, n_in, n_out;
+		int nch = 1;
+
+		/* Determine how many octets can be encoded on (the remainder
+		   of) this line */
+		space = base64_encode_get_full_space(&b64enc, line_len_left);
+		max = I_MIN(space, len);
+
+		/* Check UTF-8 code points in the input and determine a proper
+		   boundary for the end of this fragment if the encoded size
+		   exceeds the maximum (remaining) line length. */
+		for (n_in = 0; n_in < max;) {
+			nch = uni_utf8_get_char_n(&input[n_in],
+						  len - n_in, &ch);
+			if (nch <= 0)
+				break;
+			if ((n_in + nch) > max)
+				break;
+			n_in += nch;
+		}
+
+		/* Encode this fragment up until the maximum fragment size or
+		   the first invalid UTF-8 code point in the input. */
+		if (n_in > 0) {
+			old_bufsize = output->used;
+			if (!base64_encode_more(&b64enc, input, n_in,
+						  &n_in, output))
+				i_unreached();
+			n_out = output->used - old_bufsize;
+
+			/* Update sizes and pointers */
+			i_assert(len >= n_in);
+			i_assert(line_len_left >= n_out);
+			input += n_in;
+			len -= n_in;
+			line_len_left -= n_out;
+		}
 
-		if (max > 0) {
-			str_append(output, "=?utf-8?b?");
-			base64_encode(input, max, output);
-			str_append(output, "?=");
+		/* Determine whether a repacement character needs to be written
+		   and how much space there is left for it on the current line.
+		 */
+		space = 0;
+		if (nch <= 0) {
+			space = base64_encode_get_full_space(
+				&b64enc, line_len_left);
 		}
 
-		input += max;
-		len -= max;
+		/* Start a new line once insufficient space is available. */
+		if ((nch > 0 && len > 0) ||
+		    (nch <= 0 && space < rep_char_len)) {
+			old_bufsize = output->used;
+			if (!base64_encode_finish(&b64enc, output))
+				i_unreached();
+			n_out = output->used - old_bufsize;
+			i_assert(line_len_left >= n_out);
+
+			str_append(output, "?=\n\t=?utf-8?b?");
+			line_len_left = MIME_MAX_LINE_LEN -
+				MIME_WRAPPER_LEN - 1;
+			base64_encode_reset(&b64enc);
+		}
+
+		/* Write replacement character if needed. */
+		n_in = 0;
+		n_out = 0;
+		if (nch <= 0) {
+			old_bufsize = output->used;
+			if (!base64_encode_more(&b64enc, rep_char, rep_char_len,
+						NULL, output))
+				i_unreached();
+
+			n_in = 1;
+			n_out = output->used - old_bufsize;
+
+			/* Skip more invalid characters in the input. */
+			for (; n_in < len; n_in++) {
+				nch = uni_utf8_get_char_n(&input[n_in],
+							  len - n_in, &ch);
+				if (nch > 0)
+					break;
+			}
+		}
+
+		/* Update sizes and pointers */
+		i_assert(line_len_left >= n_out);
+		input += n_in;
+		len -= n_in;
+		line_len_left -= n_out;
 
 		if (len == 0)
 			break;
-
-		str_append(output, "\n\t");
-		line_len = 1;
 	}
+	if (!base64_encode_finish(&b64enc, output))
+		i_unreached();
+	str_append(output, "?=");
 }
 
 void message_header_encode(const char *input, string_t *output)