From: Timo Sirainen Date: Wed, 13 Jan 2021 17:50:50 +0000 (+0200) Subject: lib-imap: Add imap_utf7_to_utf8_escaped() and imap_escaped_utf8_to_utf7() X-Git-Tag: 2.3.14.rc1~57 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=bffa6a9c0cbe5fd906969325e268406017c09ce7;p=thirdparty%2Fdovecot%2Fcore.git lib-imap: Add imap_utf7_to_utf8_escaped() and imap_escaped_utf8_to_utf7() These can be used to convert invalid mUTF-7 to escaped UTF-8 and back to the original invalid input. --- diff --git a/src/lib-imap/imap-utf7.c b/src/lib-imap/imap-utf7.c index ba97d90551..7ea53f535a 100644 --- a/src/lib-imap/imap-utf7.c +++ b/src/lib-imap/imap-utf7.c @@ -55,25 +55,50 @@ mbase64_encode(string_t *dest, const unsigned char *in, size_t len) str_append_c(dest, '-'); } -static const char *imap_utf8_first_encode_char(const char *str) +static const char * +imap_utf8_first_encode_char(const char *str, char escape_char) { const char *p; for (p = str; *p != '\0'; p++) { - if (*p == '&' || *p < 0x20 || *p >= 0x7f) + if (*p == '&' || *p < 0x20 || *p >= 0x7f || *p == escape_char) return p; } return NULL; } -int imap_utf8_to_utf7(const char *src, string_t *dest) +int imap_escaped_utf8_hex_to_char(const char *str, unsigned char *chr_r) +{ + unsigned int i = 0; + unsigned char c = 0; + + /* NOTE: Only lowercase hex characters are allowed so the output is + reversible. */ + for (;;) { + if (str[i] >= '0' && str[i] <= '9') + c += str[i] - '0'; + else if (str[i] >= 'a' && str[i] <= 'f') + c += str[i] - 'a' + 10; + else + return -1; + if (++i == 2) + break; + c *= 0x10; + } + *chr_r = c; + return 0; +} + +static int +imap_utf8_to_utf7_int(const char *src, char escape_char, string_t *dest) { const char *p; unichar_t chr; uint8_t *utf16, *u; uint16_t u16; + unsigned char c; - p = imap_utf8_first_encode_char(src); + p = imap_utf8_first_encode_char(src, escape_char); if (p == NULL) { /* no characters that need to be encoded */ str_append(dest, src); @@ -84,6 +109,12 @@ int imap_utf8_to_utf7(const char *src, string_t *dest) str_append_data(dest, src, p-src); utf16 = t_malloc0(MALLOC_MULTIPLY(strlen(p), 2)); while (*p != '\0') { + if (*p == escape_char && + imap_escaped_utf8_hex_to_char(p+1, &c) == 0) { + str_append_c(dest, c); + p += 3; + continue; + } if (*p == '&') { str_append(dest, "&-"); p++; @@ -118,12 +149,24 @@ int imap_utf8_to_utf7(const char *src, string_t *dest) return 0; } +int imap_utf8_to_utf7(const char *src, string_t *dest) +{ + return imap_utf8_to_utf7_int(src, '\0', dest); +} + +int imap_escaped_utf8_to_utf7(const char *src, char escape_char, string_t *dest) +{ + i_assert(escape_char != '&'); + + return imap_utf8_to_utf7_int(src, escape_char, dest); +} + int t_imap_utf8_to_utf7(const char *src, const char **dest_r) { string_t *str; int ret; - if (imap_utf8_first_encode_char(src) == NULL) { + if (imap_utf8_first_encode_char(src, '\0') == NULL) { *dest_r = src; return 0; } @@ -253,14 +296,18 @@ static int mbase64_decode_to_utf8(string_t *dest, const char **_src) return 0; } -int imap_utf7_to_utf8(const char *src, string_t *dest) +static int +imap_utf7_to_utf8_int(const char *src, const char *escape_chars, string_t *dest) { const char *p; for (p = src; *p != '\0'; p++) { - if (*p < 0x20 || *p >= 0x7f) - return -1; - if (*p == '&') + if (*p < 0x20 || *p >= 0x7f) { + if (escape_chars[0] == '\0') + return -1; + break; + } + if (*p == '&' || strchr(escape_chars, *p) != NULL) break; } if (*p == '\0') { @@ -272,13 +319,23 @@ int imap_utf7_to_utf8(const char *src, string_t *dest) /* at least one encoded character */ str_append_data(dest, src, p-src); while (*p != '\0') { - if (*p == '&') { + if (strchr(escape_chars, *p) != NULL || + *p < 0x20 || *p >= 0x7f) { + str_printfa(dest, "%c%02x", escape_chars[0], + (unsigned char)*p); + p++; + } else if (*p == '&') { if (*++p == '-') { str_append_c(dest, '&'); p++; } else { - if (mbase64_decode_to_utf8(dest, &p) < 0) - return -1; + size_t orig_size = str_len(dest); + if (mbase64_decode_to_utf8(dest, &p) < 0) { + if (escape_chars[0] == '\0') + return -1; + str_truncate(dest, orig_size); + str_printfa(dest, "%c26", escape_chars[0]); + } } } else { str_append_c(dest, *p++); @@ -287,6 +344,20 @@ int imap_utf7_to_utf8(const char *src, string_t *dest) return 0; } +int imap_utf7_to_utf8(const char *src, string_t *dest) +{ + return imap_utf7_to_utf8_int(src, "", dest); +} + +void imap_utf7_to_utf8_escaped(const char *src, const char *escape_chars, + string_t *dest) +{ + i_assert(escape_chars[0] != '&'); + + if (imap_utf7_to_utf8_int(src, escape_chars, dest) < 0) + i_unreached(); +} + bool imap_utf7_is_valid(const char *src) { const char *p; diff --git a/src/lib-imap/imap-utf7.h b/src/lib-imap/imap-utf7.h index 5d7875f198..d7ae3066e6 100644 --- a/src/lib-imap/imap-utf7.h +++ b/src/lib-imap/imap-utf7.h @@ -5,9 +5,23 @@ valid UTF-8. */ int imap_utf8_to_utf7(const char *src, string_t *dest); int t_imap_utf8_to_utf7(const char *src, const char **dest_r); +/* Like imap_utf8_to_utf7(), but decode all instances. + Returns -1 if src isn't valid UTF-8. Note that invalid content + isn't treated as an error - it's simply passed through. */ +int imap_escaped_utf8_to_utf7(const char *src, char escape_char, string_t *dest); +/* For manually parsing the after . Returns 0 on success, + -1 if str doesn't point to valid . */ +int imap_escaped_utf8_hex_to_char(const char *str, unsigned char *chr_r); + /* Convert IMAP-UTF-7 string to UTF-8. Returns 0 if ok, -1 if src isn't valid IMAP-UTF-7. */ int imap_utf7_to_utf8(const char *src, string_t *dest); +/* Like imap_utf7_to_utf8(), but write invalid input as . + All the characters in escape_chars[] are escaped in the same way. This + allows converting the escaped output back to the original (broken) + IMAP-UTF-7 input. */ +void imap_utf7_to_utf8_escaped(const char *src, const char *escape_chars, + string_t *dest); /* Returns TRUE if the string is valid IMAP-UTF-7 string. */ bool imap_utf7_is_valid(const char *src); diff --git a/src/lib-imap/test-imap-utf7.c b/src/lib-imap/test-imap-utf7.c index b487e04e6d..216eebf4a7 100644 --- a/src/lib-imap/test-imap-utf7.c +++ b/src/lib-imap/test-imap-utf7.c @@ -22,19 +22,26 @@ static void test_imap_utf7_by_example(void) { NULL, "&Jjo!" }, { NULL, "&U,BTFw-&ZeVnLIqe-" } /* unnecessary shift */ }; - string_t *dest; + string_t *dest, *dest2; unsigned int i; dest = t_str_new(256); + dest2 = t_str_new(256); test_begin("imap mutf7 examples"); for (i = 0; i < N_ELEMENTS(tests); i++) { + str_truncate(dest, 0); if (tests[i].utf8 != NULL) { - str_truncate(dest, 0); if (imap_utf8_to_utf7(tests[i].utf8, dest) < 0) test_assert_idx(tests[i].mutf7 == NULL, i); else test_assert_idx(null_strcmp(tests[i].mutf7, str_c(dest)) == 0, i); + } else { + /* invalid mUTF-7 - test that escaping works */ + str_truncate(dest2, 0); + imap_utf7_to_utf8_escaped(tests[i].mutf7, "%", dest); + imap_escaped_utf8_to_utf7(str_c(dest), '%', dest2); + test_assert_idx(strcmp(tests[i].mutf7, str_c(dest2)) == 0, i); } if (tests[i].mutf7 != NULL) { str_truncate(dest, 0); @@ -45,6 +52,15 @@ static void test_imap_utf7_by_example(void) test_assert_idx(imap_utf7_is_valid(tests[i].mutf7) != (tests[i].utf8 == NULL), i); } } + + str_truncate(dest, 0); + imap_utf7_to_utf8_escaped(".foo%", "%.", dest); + test_assert_strcmp(str_c(dest), "%2efoo%25"); + + str_truncate(dest, 0); + test_assert(imap_escaped_utf8_to_utf7("%foo%2ebar", '%', dest) == 0); + test_assert_strcmp(str_c(dest), "%foo.bar"); + test_end(); } @@ -85,9 +101,12 @@ static void test_imap_utf7_ucs4_cases(void) static const char mb64[64]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; static void test_imap_utf7_non_utf16(void) { + string_t *dest, *dest2; unsigned int i; test_begin("imap mutf7 non-utf16"); + dest = t_str_new(32); + dest2 = t_str_new(32); for (i = 0; i <= 255; ++i) { /* Invalid, code a single 8-bit octet */ const char csrc[] = { @@ -98,6 +117,13 @@ static void test_imap_utf7_non_utf16(void) '\0' }; test_assert_idx(!imap_utf7_is_valid(csrc), i); + + /* escaping can reverse the original string */ + str_truncate(dest, 0); + str_truncate(dest2, 0); + imap_utf7_to_utf8_escaped(csrc, "%", dest); + imap_escaped_utf8_to_utf7(str_c(dest), '%', dest2); + test_assert_idx(strcmp(csrc, str_c(dest2)) == 0, i); } for (i = 0; i <= 255; ++i) { /* Invalid, U+00E4 followed by a single octet */ @@ -111,6 +137,13 @@ static void test_imap_utf7_non_utf16(void) '\0' }; test_assert_idx(!imap_utf7_is_valid(csrc), i); + + /* escaping can reverse the original string */ + str_truncate(dest, 0); + str_truncate(dest2, 0); + imap_utf7_to_utf8_escaped(csrc, "%", dest); + imap_escaped_utf8_to_utf7(str_c(dest), '%', dest2); + test_assert_idx(strcmp(csrc, str_c(dest2)) == 0, i); } test_end(); }