message-date.c \
message-decoder.c \
message-header-decode.c \
+ message-header-encode.c \
message-header-parser.c \
message-id.c \
message-parser.c \
message-date.h \
message-decoder.h \
message-header-decode.h \
+ message-header-encode.h \
message-header-parser.h \
message-id.h \
message-parser.h \
test-message-date \
test-message-decoder \
test-message-header-decode \
+ test-message-header-encode \
test-message-header-parser \
test-message-id \
test-message-parser \
test_message_header_decode_LDADD = message-header-decode.lo quoted-printable.lo $(test_libs)
test_message_header_decode_DEPENDENCIES = message-header-decode.lo quoted-printable.lo $(test_libs)
+test_message_header_encode_SOURCES = test-message-header-encode.c
+test_message_header_encode_LDADD = message-header-encode.lo $(test_libs)
+test_message_header_encode_DEPENDENCIES = message-header-encode.lo $(test_libs)
+
test_message_header_parser_SOURCES = test-message-header-parser.c
test_message_header_parser_LDADD = message-header-parser.lo $(test_libs)
test_message_header_parser_DEPENDENCIES = message-header-parser.lo $(test_libs)
--- /dev/null
+/* Copyright (c) 2009 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "base64.h"
+#include "message-header-encode.h"
+
+#define MIME_WRAPPER_LEN (strlen("=?utf-8?q?""?="))
+#define MIME_MAX_LINE_LEN 76
+
+#define IS_LWSP(c) \
+ ((c) == ' ' || (c) == '\t' || (c) == '\n')
+
+static bool input_idx_need_encoding(const unsigned char *input, unsigned int i)
+{
+ if ((input[i] & 0x80) != 0)
+ return TRUE;
+
+ if (input[i] == '=' && input[i+1] == '?' &&
+ (i == 0 || IS_LWSP(input[i-1])))
+ return TRUE;
+ return FALSE;
+}
+
+static unsigned int str_last_line_len(string_t *str)
+{
+ const unsigned char *data = str_data(str);
+ unsigned int i = str_len(str);
+
+ while (i > 0 && data[i-1] != '\n')
+ i--;
+ return str_len(str) - i;
+}
+
+void message_header_encode_q(const unsigned char *input, unsigned int len,
+ string_t *output)
+{
+ unsigned int i, line_len, line_len_left;
+
+ line_len = str_last_line_len(output);
+ if (line_len >= MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN - 3) {
+ str_append(output, "\n\t");
+ line_len = 1;
+ }
+
+ str_append(output, "=?utf-8?q?");
+ line_len_left = MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN - line_len;
+ for (i = 0; i < len; i++) {
+ if (line_len_left < 3) {
+ /* if we're not at the beginning of a character,
+ go backwards until we are */
+ while ((input[i] & 0xc0) == 0x80) {
+ str_truncate(output, str_len(output)-3);
+ i--;
+ }
+ str_append(output, "?=\n\t=?utf-8?q?");
+ line_len_left = MIME_MAX_LINE_LEN -
+ MIME_WRAPPER_LEN - 1;
+ }
+ switch (input[i]) {
+ case ' ':
+ str_append_c(output, '_');
+ break;
+ case '=':
+ case '?':
+ case '_':
+ str_printfa(output, "=%2X", input[i]);
+ break;
+ default:
+ if (input[i] < 32 || (input[i] & 0x80) != 0) {
+ line_len_left -= 2;
+ str_printfa(output, "=%2X", input[i]);
+ } else {
+ str_append_c(output, input[i]);
+ }
+ break;
+ }
+ line_len_left--;
+ }
+ str_append(output, "?=");
+}
+
+void message_header_encode_b(const unsigned char *input, unsigned int len,
+ string_t *output)
+{
+ unsigned int line_len, line_len_left, max;
+
+ line_len = str_last_line_len(output);
+ if (line_len >= MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN) {
+ str_append(output, "\n\t");
+ line_len = 1;
+ }
+
+ for (;;) {
+ line_len_left = MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN - line_len;
+ max = MAX_BASE64_DECODED_SIZE(line_len_left);
+ do {
+ max--;
+ if (max > len)
+ max = len;
+ else {
+ /* all of it doesn't fit. find a character where we
+ can split it from. */
+ while (max > 0 && (input[max] & 0xc0) == 0x80)
+ max--;
+ }
+ } while (MAX_BASE64_ENCODED_SIZE(max) > line_len_left &&
+ max > 0);
+
+ if (max > 0) {
+ str_append(output, "=?utf-8?b?");
+ base64_encode(input, max, output);
+ str_append(output, "?=");
+ }
+
+ input += max;
+ len -= max;
+
+ if (len == 0)
+ break;
+
+ str_append(output, "\n\t");
+ line_len = 1;
+ }
+}
+
+void message_header_encode(const char *_input, string_t *output)
+{
+ const unsigned char *input = (const unsigned char *)_input;
+ unsigned int i, first_idx, last_idx;
+ unsigned int enc_chars, enc_len, base64_len, q_len;
+ bool use_q;
+
+ /* find the first word that needs encoding */
+ for (i = 0; input[i] != '\0'; i++) {
+ if (input_idx_need_encoding(input, i))
+ break;
+ }
+ if (input[i] == '\0') {
+ /* no encoding necessary */
+ str_append(output, _input);
+ return;
+ }
+ first_idx = i;
+ while (first_idx > 0 && !IS_LWSP(input[first_idx-1]))
+ first_idx--;
+
+ /* find the last word that needs encoding */
+ last_idx = ++i; enc_chars = 1;
+ for (; input[i] != '\0'; i++) {
+ if (input_idx_need_encoding(input, i)) {
+ last_idx = i + 1;
+ enc_chars++;
+ }
+ }
+ while (input[last_idx] != '\0' && !IS_LWSP(input[last_idx]))
+ last_idx++;
+
+ /* figure out if we should use Q or B encoding. Prefer Q if it's not
+ too much larger. */
+ enc_len = last_idx - first_idx;
+ base64_len = MAX_BASE64_ENCODED_SIZE(enc_len);
+ q_len = enc_len + enc_chars*3;
+ use_q = q_len*2/3 <= base64_len;
+
+ /* and do it */
+ str_append_n(output, input, first_idx);
+ if (use_q)
+ message_header_encode_q(input + first_idx, enc_len, output);
+ else
+ message_header_encode_b(input + first_idx, enc_len, output);
+ str_append(output, _input + last_idx);
+}
--- /dev/null
+#ifndef MESSAGE_HEADER_ENCODE_H
+#define MESSAGE_HEADER_ENCODE_H
+
+/* Encode UTF-8 input into output wherever necessary. */
+void message_header_encode(const char *input, string_t *output);
+
+/* Encode the whole UTF-8 input using "Q" or "B" encoding into output.
+ The output is split into multiple lines if necessary. The first line length
+ is looked up from the output string. */
+void message_header_encode_q(const unsigned char *input, unsigned int len,
+ string_t *output);
+void message_header_encode_b(const unsigned char *input, unsigned int len,
+ string_t *output);
+
+#endif
--- /dev/null
+/* Copyright (c) 2009 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "base64.h"
+#include "buffer.h"
+#include "str.h"
+#include "message-header-encode.h"
+#include "test-common.h"
+
+static bool verify_q(const char *str, unsigned int i, bool starts_with_a)
+{
+ unsigned int line_start = i, char_count = 0;
+
+ if (strncmp(str+i, "\n\t", 2) == 0) {
+ i += 2;
+ line_start = i - 1;
+ }
+
+ for (;;) {
+ if (strncmp(str+i, "=?utf-8?q?", 10) != 0)
+ return FALSE;
+ i += 10;
+
+ if (starts_with_a) {
+ if (str[i] != 'a')
+ return FALSE;
+ starts_with_a = FALSE;
+ i++;
+ }
+ while (strncmp(str+i, "?=", 2) != 0) {
+ if (strncmp(str+i, "=C3=A4", 6) != 0)
+ return FALSE;
+ i += 6;
+ char_count++;
+ }
+ i += 2;
+ if (i - line_start > 76)
+ return FALSE;
+
+ if (str[i] == '\0')
+ break;
+ if (strncmp(str+i, "\n\t", 2) != 0)
+ return FALSE;
+ i += 2;
+ line_start = i - 1;
+ }
+ return char_count == 40;
+}
+
+static void test_message_header_encode_q(void)
+{
+ string_t *input = t_str_new(100);
+ string_t *str = t_str_new(512);
+ unsigned int i, j, skip;
+
+ test_begin("message header encode q");
+
+ str_append_c(input, 'a');
+ for (i = 0; i < 40; i++)
+ str_append(input, "ä");
+ for (i = 0; i < 80; i++) {
+ for (skip = 0; skip < 2; skip++) {
+ str_truncate(str, 0);
+ for (j = 1; j < i; j++)
+ str_append_c(str, 'X');
+ if (i != 0)
+ str_append_c(str, ' ');
+
+ message_header_encode_q(str_data(input) + skip,
+ str_len(input) - skip, str);
+ test_assert(verify_q(str_c(str), i, !skip));
+ }
+ }
+ test_end();
+}
+
+static bool verify_b(const char *str, unsigned int i, bool starts_with_a)
+{
+ unsigned int line_start = i, start, j, char_count = 0;
+ char bufdata[1000];
+ buffer_t buf;
+
+ buffer_create_data(&buf, bufdata, sizeof(bufdata));
+ if (strncmp(str+i, "\n\t", 2) == 0) {
+ i += 2;
+ line_start = i - 1;
+ }
+
+ for (;;) {
+ if (strncmp(str+i, "=?utf-8?b?", 10) != 0)
+ return FALSE;
+ i += 10;
+
+ start = i;
+ for (; str[i] != '?'; i++) {
+ if (str[i] == '\0')
+ return FALSE;
+ }
+ buffer_set_used_size(&buf, 0);
+ if (base64_decode(str+start, i-start, NULL, &buf) < 0)
+ return FALSE;
+ i++;
+
+ if (!starts_with_a)
+ j = 0;
+ else {
+ if (bufdata[0] != 'a')
+ return FALSE;
+ starts_with_a = FALSE;
+ j = 1;
+ }
+ for (; j < buf.used; j += 2) {
+ if (bufdata[j] != '\xc3' || bufdata[j+1] != '\xa4')
+ return FALSE;
+ char_count++;
+ }
+ if (j != buf.used)
+ return FALSE;
+
+ if (str[i++] != '=')
+ return FALSE;
+
+ if (i - line_start > 76)
+ return FALSE;
+
+ if (str[i] == '\0')
+ break;
+ if (strncmp(str+i, "\n\t", 2) != 0)
+ return FALSE;
+ i += 2;
+ line_start = i - 1;
+ }
+ return char_count == 40;
+}
+
+static void test_message_header_encode_b(void)
+{
+ string_t *input = t_str_new(100);
+ string_t *str = t_str_new(512);
+ unsigned int i, j, skip;
+
+ test_begin("message header encode b");
+
+ str_append_c(input, 'a');
+ for (i = 0; i < 40; i++)
+ str_append(input, "ä");
+ for (i = 0; i < 80; i++) {
+ for (skip = 0; skip < 2; skip++) {
+ str_truncate(str, 0);
+ for (j = 1; j < i; j++)
+ str_append_c(str, 'X');
+ if (i != 0)
+ str_append_c(str, ' ');
+
+ message_header_encode_b(str_data(input) + skip,
+ str_len(input) - skip, str);
+ test_assert(verify_b(str_c(str), i, !skip));
+ }
+ }
+ test_end();
+}
+
+static void test_message_header_encode(void)
+{
+ const char *data[] = {
+ "a b", "a b",
+ "a bcäde f", "a =?utf-8?q?bc=C3=A4de?= f",
+ "a ää ä b", "a =?utf-8?b?w6TDpCDDpA==?= b",
+ "ä a ä", "=?utf-8?q?=C3=A4_a_=C3=A4?=",
+ "ää a ä", "=?utf-8?b?w6TDpCBhIMOk?=",
+ };
+ string_t *str = t_str_new(128);
+ unsigned int i;
+
+ test_begin("message header encode");
+ for (i = 0; i < N_ELEMENTS(data); i += 2) {
+ str_truncate(str, 0);
+ message_header_encode(data[i], str);
+ test_assert(strcmp(str_c(str), data[i+1]) == 0);
+ }
+ test_end();
+}
+
+int main(void)
+{
+ static void (*test_functions[])(void) = {
+ test_message_header_encode_q,
+ test_message_header_encode_b,
+ test_message_header_encode,
+ NULL
+ };
+ return test_run(test_functions);
+}