From: Timo Sirainen <tss@iki.fi>
Date: Mon, 5 May 2014 12:39:58 +0000 (+0300)
Subject: lib-mail: Added message_header_encode_data() to support encoding also NUL characters.
X-Git-Tag: 2.2.13.rc1~44
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a5f1628ad97ad649bfef86c84c3afc1d9cb0f326;p=thirdparty%2Fdovecot%2Fcore.git

lib-mail: Added message_header_encode_data() to support encoding also NUL characters.
---

diff --git a/src/lib-mail/Makefile.am b/src/lib-mail/Makefile.am
index 8de4488584..5907747dad 100644
--- a/src/lib-mail/Makefile.am
+++ b/src/lib-mail/Makefile.am
@@ -134,7 +134,7 @@ test_message_decoder_LDADD = message-decoder.lo rfc822-parser.lo rfc2231-parser.
 test_message_decoder_DEPENDENCIES = $(test_deps)
 
 test_message_header_decode_SOURCES = test-message-header-decode.c
-test_message_header_decode_LDADD = message-header-decode.lo quoted-printable.lo $(test_libs)
+test_message_header_decode_LDADD = message-header-decode.lo quoted-printable.lo message-header-encode.lo $(test_libs)
 test_message_header_decode_DEPENDENCIES = $(test_deps)
 
 test_message_header_encode_SOURCES = test-message-header-encode.c
diff --git a/src/lib-mail/message-header-encode.c b/src/lib-mail/message-header-encode.c
index fa387f0e57..6ee3cf6ddb 100644
--- a/src/lib-mail/message-header-encode.c
+++ b/src/lib-mail/message-header-encode.c
@@ -11,7 +11,8 @@
 #define IS_LWSP(c) \
 	((c) == ' ' || (c) == '\t' || (c) == '\n')
 
-static bool input_idx_need_encoding(const unsigned char *input, unsigned int i)
+static bool input_idx_need_encoding(const unsigned char *input,
+				    unsigned int i, unsigned int len)
 {
 	/* 8bit chars */
 	if ((input[i] & 0x80) != 0)
@@ -21,7 +22,7 @@ static bool input_idx_need_encoding(const unsigned char *input, unsigned int i)
 		return TRUE;
 
 	/* <LWSP>=? */
-	if (input[i] == '=' && input[i+1] == '?' &&
+	if (input[i] == '=' && i+1 < len && input[i+1] == '?' &&
 	    (i == 0 || IS_LWSP(input[i-1])))
 		return TRUE;
 	return FALSE;
@@ -130,21 +131,26 @@ void message_header_encode_b(const unsigned char *input, unsigned int len,
 	}
 }
 
-void message_header_encode(const char *_input, string_t *output)
+void message_header_encode(const char *input, string_t *output)
+{
+	message_header_encode_data((const void *)input, strlen(input), output);
+}
+
+void message_header_encode_data(const unsigned char *input, unsigned int len,
+				string_t *output)
 {
-	const unsigned char *input = (const unsigned char *)_input;
 	unsigned int i, first_idx, last_idx;
 	unsigned int enc_chars, enc_len, base64_len, q_len;
 	bool use_q;
 
 	/* find the first word that needs encoding */
-	for (i = 0; input[i] != '\0'; i++) {
-		if (input_idx_need_encoding(input, i))
+	for (i = 0; i < len; i++) {
+		if (input_idx_need_encoding(input, i, len))
 			break;
 	}
-	if (input[i] == '\0') {
+	if (i == len) {
 		/* no encoding necessary */
-		str_append(output, _input);
+		str_append_data(output, input, len);
 		return;
 	}
 	first_idx = i;
@@ -153,13 +159,13 @@ void message_header_encode(const char *_input, string_t *output)
 
 	/* find the last word that needs encoding */
 	last_idx = ++i; enc_chars = 1;
-	for (; input[i] != '\0'; i++) {
-		if (input_idx_need_encoding(input, i)) {
+	for (; i < len; i++) {
+		if (input_idx_need_encoding(input, i, len)) {
 			last_idx = i + 1;
 			enc_chars++;
 		}
 	}
-	while (input[last_idx] != '\0' && !IS_LWSP(input[last_idx]))
+	while (last_idx < len && !IS_LWSP(input[last_idx]))
 		last_idx++;
 
 	/* figure out if we should use Q or B encoding. Prefer Q if it's not
@@ -170,10 +176,10 @@ void message_header_encode(const char *_input, string_t *output)
 	use_q = q_len*2/3 <= base64_len;
 
 	/* and do it */
-	str_append_n(output, input, first_idx);
+	str_append_data(output, input, first_idx);
 	if (use_q)
 		message_header_encode_q(input + first_idx, enc_len, output);
 	else
 		message_header_encode_b(input + first_idx, enc_len, output);
-	str_append(output, _input + last_idx);
+	str_append_data(output, input + last_idx, len - last_idx);
 }
diff --git a/src/lib-mail/message-header-encode.h b/src/lib-mail/message-header-encode.h
index 13ef7e12a4..4f3b15ca5b 100644
--- a/src/lib-mail/message-header-encode.h
+++ b/src/lib-mail/message-header-encode.h
@@ -3,6 +3,8 @@
 
 /* Encode UTF-8 input into output wherever necessary. */
 void message_header_encode(const char *input, string_t *output);
+void message_header_encode_data(const unsigned char *input, unsigned int len,
+				string_t *output);
 
 /* Encode the whole UTF-8 input using "Q" or "B" encoding into output.
    The output is split into multiple lines if necessary. The first line length
diff --git a/src/lib-mail/test-message-header-decode.c b/src/lib-mail/test-message-header-decode.c
index 7f39f14c4a..5b716c1f60 100644
--- a/src/lib-mail/test-message-header-decode.c
+++ b/src/lib-mail/test-message-header-decode.c
@@ -4,9 +4,12 @@
 #include "buffer.h"
 #include "str.h"
 #include "charset-utf8.h"
+#include "message-header-encode.h"
 #include "message-header-decode.h"
 #include "test-common.h"
 
+#include <stdlib.h>
+
 bool charset_is_utf8(const char *charset ATTR_UNUSED) { return TRUE; }
 
 int charset_to_utf8_begin(const char *charset ATTR_UNUSED,
@@ -25,6 +28,7 @@ charset_to_utf8(struct charset_translation *t ATTR_UNUSED,
 static void test_message_header_decode(void)
 {
 	static const char *data[] = {
+		" \t=?utf-8?q?=c3=a4?=  =?utf-8?q?=c3=a4?=  b  \t\r\n ", "Ã¤Ã¤  b  \t\r\n ",
 		"a =?utf-8?q?=c3=a4?= b", "a Ã¤ b",
 		"a =?utf-8?q?=c3=a4?= b", "a Ã¤ b",
 		"a =?utf-8?q?=c3=a4?=\t\t\r\n =?utf-8?q?=c3=a4?= b", "a Ã¤Ã¤ b",
@@ -47,10 +51,40 @@ static void test_message_header_decode(void)
 	test_end();
 }
 
+static void test_message_header_decode_encode_random(void)
+{
+	string_t *encoded, *decoded;
+	unsigned char buf[1024];
+	unsigned int i, j, buflen;
+
+	test_begin("message header encode & decode randomly");
+
+	buf[0] = 'x';
+	encoded = t_str_new(256);
+	decoded = t_str_new(256);
+	for (i = 0; i < 1000; i++) {
+		/* fill only with 7bit data so we don't have to worry about
+		   the data being valid UTF-8 */
+		for (j = 1; j < sizeof(buf); j++)
+			buf[j] = rand() % 128;
+		buflen = rand() % sizeof(buf);
+
+		str_truncate(encoded, 0);
+		str_truncate(decoded, 0);
+		message_header_encode_data(buf, buflen, encoded);
+		message_header_decode_utf8(encoded->data, encoded->used,
+					   decoded, NULL);
+		test_assert(decoded->used == buflen &&
+			    memcmp(decoded->data, buf, buflen) == 0);
+	}
+	test_end();
+}
+
 int main(void)
 {
 	static void (*test_functions[])(void) = {
 		test_message_header_decode,
+		test_message_header_decode_encode_random,
 		NULL
 	};
 	return test_run(test_functions);
diff --git a/src/lib-mail/test-message-header-encode.c b/src/lib-mail/test-message-header-encode.c
index a2da561dd9..30c7cf3a64 100644
--- a/src/lib-mail/test-message-header-encode.c
+++ b/src/lib-mail/test-message-header-encode.c
@@ -183,12 +183,28 @@ static void test_message_header_encode(void)
 	test_end();
 }
 
+static void test_message_header_encode_data(void)
+{
+	string_t *str = t_str_new(128);
+	static unsigned char nuls[10] = { 0, };
+
+	test_begin("message header encode data");
+	message_header_encode_data(nuls, 1, str);
+	test_assert(strcmp(str_c(str), "=?utf-8?q?=00?=") == 0);
+
+	str_truncate(str, 0);
+	message_header_encode_data(nuls, sizeof(nuls), str);
+	test_assert(strcmp(str_c(str), "=?utf-8?b?AAAAAAAAAAAAAA==?=") == 0);
+	test_end();
+}
+
 int main(void)
 {
 	static void (*test_functions[])(void) = {
 		test_message_header_encode_q,
 		test_message_header_encode_b,
 		test_message_header_encode,
+		test_message_header_encode_data,
 		NULL
 	};
 	return test_run(test_functions);