From: Timo Sirainen Date: Wed, 25 Apr 2018 11:17:34 +0000 (+0300) Subject: lib-mail: Change NUL -> 0x80 replacement to use unicode replacement char instead X-Git-Tag: 2.3.4~242 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a5875be4f45ead2272d00637f8c7467c6e43fa5c;p=thirdparty%2Fdovecot%2Fcore.git lib-mail: Change NUL -> 0x80 replacement to use unicode replacement char instead Using 0x80 produces invalid UTF-8 output, which we should avoid. Some places in Dovecot code already requires that input is valid UTF-8, so it's also safer and easier to use the unicode replacement character. Fixes: Panic: file fts-tokenizer-generic.c: line 210 (fts_tokenizer_generic_simple_next): assertion failed: (char_size > 0) --- diff --git a/src/lib-mail/message-part-data.c b/src/lib-mail/message-part-data.c index ca06462c46..08c1f0af76 100644 --- a/src/lib-mail/message-part-data.c +++ b/src/lib-mail/message-part-data.c @@ -173,18 +173,23 @@ envelope_get_field(const char *name) static const char * hdr_strdup(pool_t pool, const unsigned char *data, size_t size) { - char *dest = p_malloc(pool, size+1); - if (memchr(data, '\0', size) == NULL) { /* fast path */ + char *dest = p_malloc(pool, size+1); memcpy(dest, data, size); - } else { - /* slow path - this could be made faster, but it should be - rare so keep it simple */ - for (size_t i = 0; i < size; i++) - dest[i] = data[i] == '\0' ? 0x80 : data[i]; + return dest; + } + + /* slow path - this could be made faster, but it should be + rare so keep it simple */ + string_t *str = str_new(pool, size+2); + for (size_t i = 0; i < size; i++) { + if (data[i] != '\0') + str_append_c(str, data[i]); + else + str_append(str, UNICODE_REPLACEMENT_CHAR_UTF8); } - return dest; + return str_c(str); } void message_part_envelope_parse_from_header(pool_t pool, diff --git a/src/lib-mail/rfc2231-parser.h b/src/lib-mail/rfc2231-parser.h index 0db536fd4e..95cadc2e6d 100644 --- a/src/lib-mail/rfc2231-parser.h +++ b/src/lib-mail/rfc2231-parser.h @@ -3,9 +3,9 @@ /* Parse all content parameters using rfc822_parse_content_param() and return them as a NULL-terminated [key, value] array. RFC 2231-style continuations - are merged to a single key. NULs are converted into 0x80. Returns -1 if some - of the input was invalid (but valid key/value pairs are still returned), 0 - if everything looked ok. */ + are merged to a single key. NULs are converted into unicode replacement + character (U+FFFD). Returns -1 if some of the input was invalid (but valid + key/value pairs are still returned), 0 if everything looked ok. */ int ATTR_NOWARN_UNUSED_RESULT rfc2231_parse(struct rfc822_parser_context *ctx, const char *const **result_r); diff --git a/src/lib-mail/rfc822-parser.h b/src/lib-mail/rfc822-parser.h index fa41a6e21b..c001f761f0 100644 --- a/src/lib-mail/rfc822-parser.h +++ b/src/lib-mail/rfc822-parser.h @@ -1,8 +1,10 @@ #ifndef RFC822_PARSER_H #define RFC822_PARSER_H +#include "unichar.h" + /* This can be used as a common NUL replacement character */ -#define RFC822_NUL_REPLACEMENT_STR "\x80" +#define RFC822_NUL_REPLACEMENT_STR UNICODE_REPLACEMENT_CHAR_UTF8 struct rfc822_parser_context { const unsigned char *data, *end; diff --git a/src/lib-mail/test-message-address.c b/src/lib-mail/test-message-address.c index 90baac5e34..0f5cae2a4d 100644 --- a/src/lib-mail/test-message-address.c +++ b/src/lib-mail/test-message-address.c @@ -324,8 +324,8 @@ static void test_message_address_nuls(void) const unsigned char input[] = "\"user\0nuls\"@[domain\0nuls] (comment\0nuls)"; const struct message_address output = { - NULL, "comment\x80nuls", NULL, "user\x80nuls", - "[domain\x80nuls]", FALSE + NULL, "comment\xEF\xBF\xBDnuls", NULL, "user\xEF\xBF\xBDnuls", + "[domain\xEF\xBF\xBDnuls]", FALSE }; const struct message_address *addr; diff --git a/src/lib-mail/test-rfc2231-parser.c b/src/lib-mail/test-rfc2231-parser.c index 3c7e1b61f7..f54910adff 100644 --- a/src/lib-mail/test-rfc2231-parser.c +++ b/src/lib-mail/test-rfc2231-parser.c @@ -18,7 +18,7 @@ static void test_rfc2231_parser(void) "; key*1=baz"; const char *output[] = { "key", - "f\x80oobazba%", + "f\xEF\xBF\xBDoobazba%", "key2*", "''ab%25", "key3*",