From: Markus Valentin <markus.valentin@open-xchange.com>
Date: Wed, 4 Nov 2020 13:38:10 +0000 (+0100)
Subject: lib-mail: Extend quoted-printable decoding tests
X-Git-Tag: 2.3.14.rc1~295
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e3b45a1e30c98dcf33bc79dc9370f1217eadc36a;p=thirdparty%2Fdovecot%2Fcore.git

lib-mail: Extend quoted-printable decoding tests
---

diff --git a/src/lib-mail/test-istream-qp-decoder.c b/src/lib-mail/test-istream-qp-decoder.c
index 7879825648..51de21bf1b 100644
--- a/src/lib-mail/test-istream-qp-decoder.c
+++ b/src/lib-mail/test-istream-qp-decoder.c
@@ -9,24 +9,92 @@ static const struct {
 	const char *input;
 	const char *output;
 	int stream_errno;
+	int eof;
 } tests[] = {
-	{ "p=C3=A4=C3=A4t=C3=B6s", "p\xC3\xA4\xC3\xA4t\xC3\xB6s", 0 },
-	{ "p=c3=a4=c3=a4t=c3=b6s=  \n", "p\xC3\xA4\xC3\xA4t\xC3\xB6s", 0 },
-	{ "p=c3=a4= \t \n=c3=\r\n=a4t=  \r\n=c3=b6s", "p\xC3\xA4\xC3\xA4t\xC3\xB6s", 0 },
-
-	{ "p=c3=a4\rasdf", "p\xC3\xA4", EINVAL },
-	{ "p=c", "p", EPIPE },
-	{ "p=A", "p", EPIPE },
-	{ "p=Ax", "p", EINVAL },
-	{ "p=c3=a4=c3=a4t=c3=b6s=  ", "p\xC3\xA4\xC3\xA4t\xC3\xB6s", EPIPE }
+	{ "p=C3=A4=C3=A4t=C3=B6s", "p\xC3\xA4\xC3\xA4t\xC3\xB6s", 0 , 0 },
+	{ "p=c3=a4=c3=a4t=c3=b6s=  \n", "p\xC3\xA4\xC3\xA4t\xC3\xB6s", 0, 0 },
+	{ "p=c3=a4= \t \n=c3=\r\n=a4t=  \r\n=c3=b6s", "p\xC3\xA4\xC3\xA4t\xC3\xB6s", 0, 1 },
+	{ "p=c3=a4= \t \n=c3=\r\n=a4t=  \r\n=c3=b6s", "p\xC3\xA4\xC3\xA4t\xC3\xB6s", 0, 2 },
+	{ "p=c3=a4= \t \n=c3=\r\n=a4t=  \r\n=c3=b6s", "p\xC3\xA4\xC3\xA4t\xC3\xB6s", 0, 3 },
+	{ "p=c3=a4= \t \n=c3=\r\n=a4t=  \r\n=c3=b6s", "p\xC3\xA4\xC3\xA4t\xC3\xB6s", 0, 4 },
+	{ "p=c3=a4= \t \n=c3=\r\n=a4t=  \r\n=c3=b6s", "p\xC3\xA4\xC3\xA4t\xC3\xB6s", 0, 5 },
+	{ "p=c3=a4= \t \n=c3=\r\n=a4t=  \r\n=c3=b6s", "p\xC3\xA4\xC3\xA4t\xC3\xB6s", 0, 7 },
+	{ "p=c3", "p\xC3", 0, 2 },
+	{ "=0A=0D  ", "\n\r", 0, 7 },
+	{ "foo_bar", "foo_bar", 0, 0 },
+	{ "\n\n", "\r\n\r\n", 0, 0 },
+	{ "\r\n\n\n\r\n", "\r\n\r\n\r\n\r\n", 0, 0 },
+	/* Unnecessarily encoded */
+	{ "=66=6f=6f=42=61=72", "fooBar", 0, 4 },
+	/* Expected to be encoded but not */
+	{ "\xc3\x9c""berm=c3=a4\xc3\x9figer Gebrauch", "\xc3\x9c""berm\xc3\xa4\xc3\x9figer Gebrauch", 0, 9 },
+	/* Decode control characters */
+	{ "=0C=07", "\x0C\x07", 0, 0 },
+	/* Data */
+	{ "=DE=AD=BE=EF", "\xDE\xAD\xBE\xEF", 0, 0 },
+	/* Non hex data */
+	{ "=FJ=X1", "", EINVAL, 0 },
+	/* No content allowed after Soft Line Break */
+	{ "=C3=9C = ","\xc3\x9c ", EPIPE, 0 },
+	/* Boundary delimiter */
+	{ "=C3=9C=\r\n-------","\xc3\x9c-------", 0, 0 },
+	{ "=----------- =C3=9C","", EINVAL, 0 },
+	{ "=___________ =C3=9C","", EINVAL, 0 },
+	{ "___________ =C3=9C","___________ \xc3\x9c", 0, 0 },
+	{ "=2D=2D=2D=2D=2D=2D =C3=9C","------ \xc3\x9c", 0, 0 },
+	{ "=FC=83=BF=BF=BF=BF", "\xFC\x83\xBF\xBF\xBF\xBF", 0, 0 },
+	{ "=FE=FE=FF=FF", "\xFE\xFE\xFF\xFF", 0, 0 },
+	{ "\xFF=C3=9C\xFE\xFF""foobar", "\xFF\xc3\x9c\xFE\xFF""foobar", 0, 0 },
+
+	{ "p=c3=a4\rasdf", "p\xC3\xA4", EINVAL, 0 },
+	{ "=___________ \xc3\x9c","", EINVAL, 0 },
+	{ "p=c", "p", EPIPE, 0 },
+	{ "p=A", "p", EPIPE, 0 },
+	{ "p=Ax", "p", EINVAL, 0 },
+	{ "___________ \xc3\x9c=C3=9","___________ \xc3\x9c\xC3", EPIPE, 0},
+	{ "p=c3=a4=c3=a4t=c3=b6s=  ", "p\xC3\xA4\xC3\xA4t\xC3\xB6s", EPIPE, 0 },
+	/* Soft Line Break example from the RFC */
+	{
+		"Now's the time =\r\nfor all folk to come=\r\n to the aid of "
+		"their country.", "Now's the time for all folk to come to the"
+		" aid of their country.", 0, 41
+	},
 };
 
+static bool is_hex(char c) {
+	return ((c >= 48 && c <= 57) || (c >= 65 && c <= 70)
+		|| (c >= 97 && c <= 102));
+
+}
+
+static unsigned int
+get_encoding_size_diff(const char *qp_input, unsigned int limit)
+{
+	unsigned int encoded_chars = 0;
+	unsigned int soft_line_breaks = 0;
+	for (unsigned int i = 0; i < limit; i++) {
+		char c = qp_input[i];
+		if (c == '=' && i+2 < limit) {
+			if (qp_input[i+1] == '\r' && qp_input[i+2] == '\n') {
+				soft_line_breaks++;
+				i += 2;
+				limit += 3;
+			} else if (is_hex(qp_input[i+1]) && is_hex(qp_input[i+2])) {
+				encoded_chars++;
+				i += 2;
+				limit += 2;
+			}
+		}
+	}
+	return encoded_chars*2 + soft_line_breaks*3;
+}
+
 static void
 decode_test(const char *qp_input, const char *output, int stream_errno,
-	    unsigned int buffer_size)
+	    unsigned int buffer_size, unsigned int eof)
 {
 	size_t qp_input_len = strlen(qp_input);
-	struct istream *input_data, *input;
+	struct istream *input_data, *input_data_limited, *input;
 	const unsigned char *data;
 	size_t i, size;
 	string_t *str = t_str_new(32);
@@ -57,7 +125,49 @@ decode_test(const char *qp_input, const char *output, int stream_errno,
 	test_assert(ret == -1);
 	test_assert(input->stream_errno == stream_errno);
 
-	test_assert(strcmp(str_c(str), output) == 0);
+	if (stream_errno == 0) {
+		/* Test seeking on streams where the testcases do not
+		 * expect a specific errno already */
+		uoff_t v_off = input->v_offset;
+		/* Seeking backwards */
+		i_stream_seek(input, 0);
+		test_assert(input->v_offset == 0);
+
+		/* Seeking forward */
+		i_stream_seek(input, v_off+1);
+		test_assert(input->stream_errno == ESPIPE);
+	}
+	/* Compare outputs */
+	test_assert_strcmp(str_c(str), output);
+
+	if (eof > 0) {
+		/* Insert early EOF into input_data */
+		i_stream_seek(input_data, 0);
+		str_truncate(str, 0);
+		input_data_limited = i_stream_create_limit(input_data, eof);
+		test_istream_set_allow_eof(input_data_limited, TRUE);
+		i_stream_unref(&input);
+		input = i_stream_create_qp_decoder(input_data_limited);
+		while ((ret = i_stream_read_more(input, &data, &size)) > 0) {
+			str_append_data(str, data, size);
+			i_stream_skip(input, size);
+		}
+		test_assert(ret == -1);
+		/* If there is no error still assume that the result is valid
+		 * till artifical eof. */
+		if (input->stream_errno == 0) {
+			unsigned int encoding_margin =
+				get_encoding_size_diff(qp_input, eof);
+
+			/* Cut the expected output at eof of input*/
+			const char *expected_output =
+				t_strdup_printf("%.*s", eof-encoding_margin,
+						output);
+			test_assert_strcmp(str_c(str), expected_output);
+		}
+		test_assert(input->eof);
+	}
+
 	i_stream_unref(&input);
 	i_stream_unref(&input_data);
 }
@@ -70,7 +180,7 @@ static void test_istream_qp_decoder(void)
 		test_begin(t_strdup_printf("istream qp decoder %u", i+1));
 		for (j = 1; j < 10; j++) T_BEGIN {
 			decode_test(tests[i].input, tests[i].output,
-				    tests[i].stream_errno, j);
+				    tests[i].stream_errno, j, tests[i].eof);
 		} T_END;
 		test_end();
 	}
diff --git a/src/lib-mail/test-qp-decoder.c b/src/lib-mail/test-qp-decoder.c
index afc3604ad1..f005d7e9af 100644
--- a/src/lib-mail/test-qp-decoder.c
+++ b/src/lib-mail/test-qp-decoder.c
@@ -29,6 +29,7 @@ static void test_qp_decoder(void)
 		{ "\r\n\n\n\r\n", "\r\n\r\n\r\n\r\n", 0, 0 },
 
 		{ "foo=", "foo=", 4, -1 },
+		{ "foo= =66", "foo= f", 5, -1 },
 		{ "foo= \t", "foo= \t", 6, -1 },
 		{ "foo= \r", "foo= \r", 6, -1 },
 		{ "foo= \r bar", "foo= \r bar", 6, -1 },
@@ -41,7 +42,99 @@ static void test_qp_decoder(void)
 		{ WHITESPACE70"      7\n", WHITESPACE70"      7\r\n", 0, 0 },
 		{ WHITESPACE70"       8\n", WHITESPACE70"       8\r\n", 77, -1 },
 		{ WHITESPACE70"        9\n", WHITESPACE70"       9\r\n", 78, -1 },
-		{ WHITESPACE70"         0\n", WHITESPACE70"       0\r\n", 79, -1 }
+		{ WHITESPACE70"         0\n", WHITESPACE70"       0\r\n", 79, -1 },
+		/* Expect extra whitespace to be truncated */
+		{ WHITESPACE70"      7\n"WHITESPACE10"", WHITESPACE70"      7\r\n", 0, 0 },
+		{ WHITESPACE70"      7=\r\n"WHITESPACE10, WHITESPACE70"      7", 0, 0 },
+		/* Unnecessarily encoded */
+		{ "=66=6f=6f=42=61=72", "fooBar", 0, 0 },
+		/* Expected to be encoded but not */
+		{ "\xc3\x9c""berm=c3=a4\xc3\x9figer Gebrauch", "\xc3\x9c""berm\xc3\xa4\xc3\x9figer Gebrauch", 0, 0 },
+		/* Decode control characters */
+		{ "=0C=07", "\x0C\x07", 0, 0 },
+		/* Data */
+		{ "=DE=AD=BE=EF", "\xDE\xAD\xBE\xEF", 0, 0 },
+		/* Non hex data */
+		{ "=FJ=X1", "=FJ=X1", 2, -1 },
+		/* No content allowed after Soft Line Break */
+		{ "=C3=9C = ","\xc3\x9c"" = ", 9, -1 },
+		/* Boundary delimiter */
+		{ "=C3=9C=\r\n-------","\xc3\x9c""-------", 0, 0 },
+		{ "=----------- =C3=9C","=----------- \xc3\x9c""", 1, -1 },
+		{ "=___________ =C3=9C","=___________ \xc3\x9c""", 1, -1 },
+		{ "___________ =C3=9C","___________ \xc3\x9c""", 0, 0 },
+		{ "=2D=2D=2D=2D=2D=2D =C3=9C","------ \xc3\x9c""", 0, 0 },
+		{ "=FC=83=BF=BF=BF=BF", "\xFC\x83\xBF\xBF\xBF\xBF", 0, 0 },
+		{ "=FE=FE=FF=FF", "\xFE\xFE\xFF\xFF", 0, 0 },
+		{ "\xFF=C3=9C\xFE\xFF""foobar", "\xFF\xc3\x9c""\xFE\xFF""foobar", 0, 0 },
+		/* Unnecessarily encoded and trailing whitespace */
+		{
+			"=66=6f=6f=42=61=72                         ",
+			"fooBar", 0, 0
+		},
+		/* Indicate error if encoded line is longer then 76 */
+		{
+			WHITESPACE70"       =C3=9C\n",
+			WHITESPACE70"       \xc3\x9c""\r\n", 77, -1
+		},
+		/* Soft Line Break example from the RFC */
+		{
+			"Now's the time =\r\nfor all folk to come=\r\n to the"
+			" aid of their country.",
+			"Now's the time for all folk to come to the aid of "
+			"their country.", 0, 0
+		},
+		{
+			"=C3=9Cberm=C3=A4=C3=9Figer Gebrauch",
+			"\xc3\x9c""berm\xc3\xa4\xc3\x9figer Gebrauch", 0, 0
+		},
+		/* Softlinebreak without following content */
+		{
+			"=C3=9Cberm=C3=A4=C3=9Figer Gebrauch=",
+			"\xc3\x9c""berm\xc3\xa4\xc3\x9figer Gebrauch=", 36, -1
+		},
+		/* Lowercase formally illegal but allowed for robustness */
+		{
+			"=c3=9cberm=c3=a4=c3=9figer Gebrauch",
+			"\xc3\x9c""berm\xc3\xa4\xc3\x9figer Gebrauch", 0, 0
+		},
+		/* Control characters in input */
+		{
+			"=c3=9c=10berm=c3=a4=c3=9figer Geb=0Frauch",
+			"\xc3\x9c\x10""berm\xc3\xa4\xc3\x9figer Geb\x0Frauch", 0, 0
+		},
+		/* Trailing whitespace */
+		{
+			"Trailing Whitesp=C3=A4ce =\r\n        ",
+			"Trailing Whitesp\xc3\xa4""ce ", 0 ,0
+		},
+		{
+			"Trailing Whitesp=C3=A4ce         ",
+			"Trailing Whitesp\xc3\xa4""ce", 0 ,0
+		},
+		{
+			"=54=65=73=74=20=6D=65=73=73=61=67=65",
+			"Test message", 0 , 0
+		},
+		{
+			"=E3=81=93=E3=82=8C=E3=81=AF=E5=A2\r\n=83=E7=95=8C=E3"
+			"=81=AE=E3=81=82=E3=82=8B=E3=83=A1=E3=83=83=E3=82=BB="
+			"E3=83=BC=E3=82=B8=E3=81=A7=E3=81=99",
+			"\xE3\x81\x93\xE3\x82\x8C\xE3\x81\xAF\xE5\xA2\r\n\x83"
+			"\xE7\x95\x8C\xE3\x81\xAE\xE3\x81\x82\xE3\x82\x8B\xE3"
+			"\x83\xA1\xE3\x83\x83\xE3\x82\xBB\xE3\x83\xBC\xE3\x82"
+			"\xB8\xE3\x81\xA7\xE3\x81\x99", 0, 0
+		},
+		{
+			"=E3=81\xc3\xf1=93=E3=82=8\xff""C=E3=81=AF=E5=A2",
+			"\xE3\x81\xc3\xf1\x93\xE3\x82=8\xff""C\xE3\x81\xAF\xE5\xA2",
+			19, -1
+		},
+		{
+			"\x77Hello\x76=20 \x20 =E3=81\xc3\xf1=93=E3=82",
+			"wHellov    \xE3\x81\xc3\xf1\x93\xE3\x82",
+			0, 0
+		},
 	};
 	string_t *str;
 	unsigned int i, j;
@@ -64,7 +157,7 @@ static void test_qp_decoder(void)
 		}
 		test_assert_idx(ret == tests[i].ret, i);
 		test_assert_idx(ret == 0 || error_pos == tests[i].error_pos, i);
-		test_assert_idx(strcmp(str_c(str), tests[i].output) == 0, i);
+		test_assert_strcmp_idx(str_c(str), tests[i].output, i);
 
 		/* try in small pieces */
 		str_truncate(str, 0);
@@ -77,7 +170,7 @@ static void test_qp_decoder(void)
 		if (qp_decoder_finish(qp, &error) < 0)
 			ret = -1;
 		test_assert_idx(ret == tests[i].ret, i);
-		test_assert_idx(strcmp(str_c(str), tests[i].output) == 0, i);
+		test_assert_strcmp_idx(str_c(str), tests[i].output, i);
 
 		qp_decoder_deinit(&qp);
 		str_truncate(str, 0);
diff --git a/src/lib-mail/test-quoted-printable.c b/src/lib-mail/test-quoted-printable.c
index a96b324be9..3cce59eace 100644
--- a/src/lib-mail/test-quoted-printable.c
+++ b/src/lib-mail/test-quoted-printable.c
@@ -14,7 +14,19 @@ static void test_quoted_printable_q_decode(void)
 		"foo=", "foo=",
 		"foo=A", "foo=A",
 		"foo=Ax", "foo=Ax",
-		"foo=Ax=xy", "foo=Ax=xy"
+		"foo=Ax=xy", "foo=Ax=xy",
+		"=C3=9Cberm=C3=A4=C3=9Figer Gebrauch", "\xc3\x9c""berm\xc3\xa4\xc3\x9figer Gebrauch",
+		/* Lowercase formally illegal but allowed for robustness */
+		"=c3=9cberm=c3=a4=c3=9figer Gebrauch", "\xc3\x9c""berm\xc3\xa4\xc3\x9figer Gebrauch",
+		/* Unnecessarily encoded */
+		"=66=6f=6f=42=61=72", "fooBar",
+		/* Expected to be encoded but not */
+		"\xc3\x9c""berm=c3=a4\xc3\x9figer Gebrauch", "\xc3\x9c""berm\xc3\xa4\xc3\x9figer Gebrauch",
+		/* Decode control characters */
+		"=0C=07", "\x0C\x07",
+		"=DE=AD=BE=EF", "\xDE\xAD\xBE\xEF",
+		/* Non-Hex data */
+		"=FJ=X1", "=FJ=X1",
 	};
 	buffer_t *buf;
 	unsigned int i;
@@ -24,7 +36,7 @@ static void test_quoted_printable_q_decode(void)
 	for (i = 0; i < N_ELEMENTS(data); i += 2) {
 		quoted_printable_q_decode((const void *)data[i], strlen(data[i]),
 					  buf);
-		test_assert(strcmp(data[i+1], str_c(buf)) == 0);
+		test_assert_strcmp_idx(data[i+1], str_c(buf), i/2);
 		buffer_set_used_size(buf, 0);
 	}
 	test_end();