From: Timo Sirainen Date: Sun, 26 Jul 2009 20:32:22 +0000 (-0400) Subject: Split quoted-printable parsing to q-p body parsing and "Q" header parsing. X-Git-Tag: 2.0.alpha1~390 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=1a1fcdbe27a8cee9a4c453a6b2f625a5be572a32;p=thirdparty%2Fdovecot%2Fcore.git Split quoted-printable parsing to q-p body parsing and "Q" header parsing. Fixed also several other issues in quoted-printable parsing to get it fully RFC compliant. --HG-- branch : HEAD --- diff --git a/src/lib-mail/Makefile.am b/src/lib-mail/Makefile.am index 4faf083b91..486c62ff84 100644 --- a/src/lib-mail/Makefile.am +++ b/src/lib-mail/Makefile.am @@ -60,6 +60,7 @@ test_programs = \ test-message-header-parser \ test-message-id \ test-message-parser \ + test-quoted-printable \ test-rfc2231-parser noinst_PROGRAMS = $(test_programs) @@ -100,6 +101,10 @@ test_message_parser_SOURCES = test-message-parser.c test_message_parser_LDADD = message-parser.lo message-header-parser.lo message-size.lo rfc822-parser.lo rfc2231-parser.lo $(test_libs) test_message_parser_DEPENDENCIES = message-parser.lo message-header-parser.lo message-size.lo rfc822-parser.lo rfc2231-parser.lo $(test_libs) +test_quoted_printable_SOURCES = test-quoted-printable.c +test_quoted_printable_LDADD = quoted-printable.lo $(test_libs) +test_quoted_printable_DEPENDENCIES = quoted-printable.lo $(test_libs) + test_rfc2231_parser_SOURCES = test-rfc2231-parser.c test_rfc2231_parser_LDADD = rfc2231-parser.lo rfc822-parser.lo $(test_libs) test_rfc2231_parser_DEPENDENCIES = rfc2231-parser.lo rfc822-parser.lo $(test_libs) diff --git a/src/lib-mail/message-header-decode.c b/src/lib-mail/message-header-decode.c index f263deb2b8..c0d0d2fd34 100644 --- a/src/lib-mail/message-header-decode.c +++ b/src/lib-mail/message-header-decode.c @@ -36,9 +36,9 @@ message_header_decode_encoded(const unsigned char *data, size_t size, switch (data[start_pos[0]+1]) { case 'q': case 'Q': - quoted_printable_decode(data + start_pos[1] + 1, - start_pos[2] - start_pos[1] - 1, - NULL, decodebuf); + quoted_printable_q_decode(data + start_pos[1] + 1, + start_pos[2] - start_pos[1] - 1, + decodebuf); break; case 'b': case 'B': diff --git a/src/lib-mail/quoted-printable.c b/src/lib-mail/quoted-printable.c index 61299f4576..a9b1202264 100644 --- a/src/lib-mail/quoted-printable.c +++ b/src/lib-mail/quoted-printable.c @@ -5,33 +5,46 @@ #include "hex-binary.h" #include "quoted-printable.h" +#define QP_IS_TRAILING_SPACE(c) \ + ((c) == ' ' || (c) == '\t') + void quoted_printable_decode(const unsigned char *src, size_t src_size, size_t *src_pos_r, buffer_t *dest) { char hexbuf[3]; - size_t src_pos, next; + size_t src_pos, pos, next; hexbuf[2] = '\0'; next = 0; for (src_pos = 0; src_pos < src_size; src_pos++) { - if (src[src_pos] != '_' && src[src_pos] != '=') + if (src[src_pos] != '=' && src[src_pos] != '\n') continue; - buffer_append(dest, src + next, src_pos - next); - next = src_pos+1; - - if (src[src_pos] == '_') { - buffer_append_c(dest, ' '); + if (src[src_pos] == '\n') { + /* drop trailing whitespace */ + pos = src_pos; + if (pos > 0 && src[pos-1] == '\r') + pos--; + while (pos > 0 && QP_IS_TRAILING_SPACE(src[pos-1])) + pos--; + buffer_append(dest, src + next, pos - next); + next = src_pos+1; + buffer_append(dest, "\r\n", 2); continue; } + /* '=' */ + buffer_append(dest, src + next, src_pos - next); + next = src_pos; + if (src_pos+1 >= src_size) break; if (src[src_pos+1] == '\n') { /* =\n -> skip both */ - src_pos++; + src_pos += 2; + next += 2; continue; } @@ -40,11 +53,61 @@ void quoted_printable_decode(const unsigned char *src, size_t src_size, if (src[src_pos+1] == '\r' && src[src_pos+2] == '\n') { /* =\r\n -> skip both */ + src_pos += 3; + next += 3; + continue; + } + + /* = */ + hexbuf[0] = src[src_pos+1]; + hexbuf[1] = src[src_pos+2]; + + if (hex_to_binary(hexbuf, dest) == 0) { src_pos += 2; + next = src_pos + 1; + } else { + /* non-hex data, show as-is */ + next = src_pos; + } + } + if (src_pos == src_size) { + /* add everything but trailing spaces */ + if (src_pos > 0 && src[src_pos-1] == '\r') + src_pos--; + while (src_pos > 0 && QP_IS_TRAILING_SPACE(src[src_pos-1])) + src_pos--; + buffer_append(dest, src + next, src_pos - next); + next = src_pos; + } + + *src_pos_r = next; +} + +void quoted_printable_q_decode(const unsigned char *src, size_t src_size, + buffer_t *dest) +{ + char hexbuf[3]; + size_t src_pos, next; + + hexbuf[2] = '\0'; + + next = 0; + for (src_pos = 0; src_pos < src_size; src_pos++) { + if (src[src_pos] != '_' && src[src_pos] != '=') + continue; + + buffer_append(dest, src + next, src_pos - next); + next = src_pos; + + if (src[src_pos] == '_') { + buffer_append_c(dest, ' '); next++; continue; } + if (src_pos+2 >= src_size) + break; + /* = */ hexbuf[0] = src[src_pos+1]; hexbuf[1] = src[src_pos+2]; @@ -57,9 +120,5 @@ void quoted_printable_decode(const unsigned char *src, size_t src_size, next = src_pos; } } - buffer_append(dest, src + next, src_size - next); - - if (src_pos_r != NULL) - *src_pos_r = src_pos; } diff --git a/src/lib-mail/quoted-printable.h b/src/lib-mail/quoted-printable.h index 7020bbc9fe..38b50dc19a 100644 --- a/src/lib-mail/quoted-printable.h +++ b/src/lib-mail/quoted-printable.h @@ -5,9 +5,11 @@ size of src, and may be same as src. Decoding errors are ignored. This function may be called multiple times for parsing the same stream. - If src_pos is non-NULL, it's updated to first non-translated character in - src. */ + src_pos is updated to first non-translated character in src. */ void quoted_printable_decode(const unsigned char *src, size_t src_size, size_t *src_pos_r, buffer_t *dest); +/* Decode MIME "Q" encoding. */ +void quoted_printable_q_decode(const unsigned char *src, size_t src_size, + buffer_t *dest); #endif diff --git a/src/lib-mail/test-quoted-printable.c b/src/lib-mail/test-quoted-printable.c new file mode 100644 index 0000000000..1e23d986f2 --- /dev/null +++ b/src/lib-mail/test-quoted-printable.c @@ -0,0 +1,79 @@ +/* Copyright (c) 2007-2009 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "buffer.h" +#include "str.h" +#include "quoted-printable.h" +#include "test-common.h" + +static void test_quoted_printable_decode(void) +{ + const char *data[] = { + "foo \r\nbar=", "foo\r\nbar", + "foo =\nbar", "foo bar", + "foo =\r\nbar", "foo bar", + "foo \nbar=", "foo\r\nbar", + "=0A=0D ", "\n\r", + "foo_bar", "foo_bar", + "foo=", "foo", + "foo=A", "foo", + "foo=Ax", "foo=Ax", + "foo=Ax=xy", "foo=Ax=xy" + }; + buffer_t *buf; + unsigned int i, start, end, len; + size_t src_pos; + + test_begin("quoted printable decode"); + buf = buffer_create_dynamic(pool_datastack_create(), 128); + for (i = 0; i < N_ELEMENTS(data); i += 2) { + len = strlen(data[i]); + for (start = 0, end = 1; end <= len; ) { + quoted_printable_decode(CONST_PTR_OFFSET(data[i], start), + end - start, &src_pos, buf); + src_pos += start; + start = src_pos; + if (src_pos <= end) + end++; + else + end = src_pos + 1; + } + test_assert(strcmp(data[i+1], str_c(buf)) == 0); + buffer_set_used_size(buf, 0); + } + test_end(); +} + +static void test_quoted_printable_q_decode(void) +{ + const char *data[] = { + "=0A=0D ", "\n\r ", + "__foo__bar__", " foo bar ", + "foo=", "foo=", + "foo=A", "foo=A", + "foo=Ax", "foo=Ax", + "foo=Ax=xy", "foo=Ax=xy" + }; + buffer_t *buf; + unsigned int i; + + test_begin("quoted printable q decode"); + buf = buffer_create_dynamic(pool_datastack_create(), 128); + for (i = 0; i < N_ELEMENTS(data); i += 2) { + quoted_printable_q_decode((const void *)data[i], strlen(data[i]), + buf); + test_assert(strcmp(data[i+1], str_c(buf)) == 0); + buffer_set_used_size(buf, 0); + } + test_end(); +} + +int main(void) +{ + static void (*test_functions[])(void) = { + test_quoted_printable_decode, + test_quoted_printable_q_decode, + NULL + }; + return test_run(test_functions); +}