]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-imap: Add imap_utf7_to_utf8_escaped() and imap_escaped_utf8_to_utf7()
authorTimo Sirainen <timo.sirainen@open-xchange.com>
Wed, 13 Jan 2021 17:50:50 +0000 (19:50 +0200)
committeraki.tuomi <aki.tuomi@open-xchange.com>
Wed, 3 Feb 2021 09:04:22 +0000 (09:04 +0000)
These can be used to convert invalid mUTF-7 to escaped UTF-8 and back to the
original invalid input.

src/lib-imap/imap-utf7.c
src/lib-imap/imap-utf7.h
src/lib-imap/test-imap-utf7.c

index ba97d905519f36b92212bb8ff20e1ea1f032fd2e..7ea53f535af5ae50fb7b4d96d389457ea4dd1151 100644 (file)
@@ -55,25 +55,50 @@ mbase64_encode(string_t *dest, const unsigned char *in, size_t len)
        str_append_c(dest, '-');
 }
 
-static const char *imap_utf8_first_encode_char(const char *str)
+static const char *
+imap_utf8_first_encode_char(const char *str, char escape_char)
 {
        const char *p;
 
        for (p = str; *p != '\0'; p++) {
-               if (*p == '&' || *p < 0x20 || *p >= 0x7f)
+               if (*p == '&' || *p < 0x20 || *p >= 0x7f || *p == escape_char)
                        return p;
        }
        return NULL;
 }
 
-int imap_utf8_to_utf7(const char *src, string_t *dest)
+int imap_escaped_utf8_hex_to_char(const char *str, unsigned char *chr_r)
+{
+       unsigned int i = 0;
+       unsigned char c = 0;
+
+       /* NOTE: Only lowercase hex characters are allowed so the output is
+          reversible. */
+       for (;;) {
+               if (str[i] >= '0' && str[i] <= '9')
+                       c += str[i] - '0';
+               else if (str[i] >= 'a' && str[i] <= 'f')
+                       c += str[i] - 'a' + 10;
+               else
+                       return -1;
+               if (++i == 2)
+                       break;
+               c *= 0x10;
+       }
+       *chr_r = c;
+       return 0;
+}
+
+static int
+imap_utf8_to_utf7_int(const char *src, char escape_char, string_t *dest)
 {
        const char *p;
        unichar_t chr;
        uint8_t *utf16, *u;
        uint16_t u16;
+       unsigned char c;
 
-       p = imap_utf8_first_encode_char(src);
+       p = imap_utf8_first_encode_char(src, escape_char);
        if (p == NULL) {
                /* no characters that need to be encoded */
                str_append(dest, src);
@@ -84,6 +109,12 @@ int imap_utf8_to_utf7(const char *src, string_t *dest)
        str_append_data(dest, src, p-src);
        utf16 = t_malloc0(MALLOC_MULTIPLY(strlen(p), 2));
        while (*p != '\0') {
+               if (*p == escape_char &&
+                   imap_escaped_utf8_hex_to_char(p+1, &c) == 0) {
+                       str_append_c(dest, c);
+                       p += 3;
+                       continue;
+               }
                if (*p == '&') {
                        str_append(dest, "&-");
                        p++;
@@ -118,12 +149,24 @@ int imap_utf8_to_utf7(const char *src, string_t *dest)
        return 0;
 }
 
+int imap_utf8_to_utf7(const char *src, string_t *dest)
+{
+       return imap_utf8_to_utf7_int(src, '\0', dest);
+}
+
+int imap_escaped_utf8_to_utf7(const char *src, char escape_char, string_t *dest)
+{
+       i_assert(escape_char != '&');
+
+       return imap_utf8_to_utf7_int(src, escape_char, dest);
+}
+
 int t_imap_utf8_to_utf7(const char *src, const char **dest_r)
 {
        string_t *str;
        int ret;
 
-       if (imap_utf8_first_encode_char(src) == NULL) {
+       if (imap_utf8_first_encode_char(src, '\0') == NULL) {
                *dest_r = src;
                return 0;
        }
@@ -253,14 +296,18 @@ static int mbase64_decode_to_utf8(string_t *dest, const char **_src)
        return 0;
 }
 
-int imap_utf7_to_utf8(const char *src, string_t *dest)
+static int
+imap_utf7_to_utf8_int(const char *src, const char *escape_chars, string_t *dest)
 {
        const char *p;
 
        for (p = src; *p != '\0'; p++) {
-               if (*p < 0x20 || *p >= 0x7f)
-                       return -1;
-               if (*p == '&')
+               if (*p < 0x20 || *p >= 0x7f) {
+                       if (escape_chars[0] == '\0')
+                               return -1;
+                       break;
+               }
+               if (*p == '&' || strchr(escape_chars, *p) != NULL)
                        break;
        }
        if (*p == '\0') {
@@ -272,13 +319,23 @@ int imap_utf7_to_utf8(const char *src, string_t *dest)
        /* at least one encoded character */
        str_append_data(dest, src, p-src);
        while (*p != '\0') {
-               if (*p == '&') {
+               if (strchr(escape_chars, *p) != NULL ||
+                   *p < 0x20 || *p >= 0x7f) {
+                       str_printfa(dest, "%c%02x", escape_chars[0],
+                                   (unsigned char)*p);
+                       p++;
+               } else if (*p == '&') {
                        if (*++p == '-') {
                                str_append_c(dest, '&');
                                p++;
                        } else {
-                               if (mbase64_decode_to_utf8(dest, &p) < 0)
-                                       return -1;
+                               size_t orig_size = str_len(dest);
+                               if (mbase64_decode_to_utf8(dest, &p) < 0) {
+                                       if (escape_chars[0] == '\0')
+                                               return -1;
+                                       str_truncate(dest, orig_size);
+                                       str_printfa(dest, "%c26", escape_chars[0]);
+                               }
                        }
                } else {
                        str_append_c(dest, *p++);
@@ -287,6 +344,20 @@ int imap_utf7_to_utf8(const char *src, string_t *dest)
        return 0;
 }
 
+int imap_utf7_to_utf8(const char *src, string_t *dest)
+{
+       return imap_utf7_to_utf8_int(src, "", dest);
+}
+
+void imap_utf7_to_utf8_escaped(const char *src, const char *escape_chars,
+                              string_t *dest)
+{
+       i_assert(escape_chars[0] != '&');
+
+       if (imap_utf7_to_utf8_int(src, escape_chars, dest) < 0)
+               i_unreached();
+}
+
 bool imap_utf7_is_valid(const char *src)
 {
        const char *p;
index 5d7875f198c899910c133fca93b17af39bf024bd..d7ae3066e6c248224517d1ad59b76fdfe0e1f1f8 100644 (file)
@@ -5,9 +5,23 @@
    valid UTF-8. */
 int imap_utf8_to_utf7(const char *src, string_t *dest);
 int t_imap_utf8_to_utf7(const char *src, const char **dest_r);
+/* Like imap_utf8_to_utf7(), but decode all <escape_char><hex> instances.
+   Returns -1 if src isn't valid UTF-8. Note that invalid <escape_char> content
+   isn't treated as an error - it's simply passed through. */
+int imap_escaped_utf8_to_utf7(const char *src, char escape_char, string_t *dest);
+/* For manually parsing the <hex> after <escape_char>. Returns 0 on success,
+   -1 if str doesn't point to valid <hex>. */
+int imap_escaped_utf8_hex_to_char(const char *str, unsigned char *chr_r);
+
 /* Convert IMAP-UTF-7 string to UTF-8. Returns 0 if ok, -1 if src isn't
    valid IMAP-UTF-7. */
 int imap_utf7_to_utf8(const char *src, string_t *dest);
+/* Like imap_utf7_to_utf8(), but write invalid input as <escape_chars[0]><hex>.
+   All the characters in escape_chars[] are escaped in the same way. This
+   allows converting the escaped output back to the original (broken)
+   IMAP-UTF-7 input. */
+void imap_utf7_to_utf8_escaped(const char *src, const char *escape_chars,
+                              string_t *dest);
 /* Returns TRUE if the string is valid IMAP-UTF-7 string. */
 bool imap_utf7_is_valid(const char *src);
 
index b487e04e6d820c5934d38543b3445dc953cfc110..216eebf4a7f084407a7cb9ea846c7c133641fca1 100644 (file)
@@ -22,19 +22,26 @@ static void test_imap_utf7_by_example(void)
                { NULL, "&Jjo!" },
                { NULL, "&U,BTFw-&ZeVnLIqe-" } /* unnecessary shift */
        };
-       string_t *dest;
+       string_t *dest, *dest2;
        unsigned int i;
 
        dest = t_str_new(256);
+       dest2 = t_str_new(256);
 
        test_begin("imap mutf7 examples");
        for (i = 0; i < N_ELEMENTS(tests); i++) {
+               str_truncate(dest, 0);
                if (tests[i].utf8 != NULL) {
-                       str_truncate(dest, 0);
                        if (imap_utf8_to_utf7(tests[i].utf8, dest) < 0)
                                test_assert_idx(tests[i].mutf7 == NULL, i);
                        else
                                test_assert_idx(null_strcmp(tests[i].mutf7, str_c(dest)) == 0, i);
+               } else {
+                       /* invalid mUTF-7 - test that escaping works */
+                       str_truncate(dest2, 0);
+                       imap_utf7_to_utf8_escaped(tests[i].mutf7, "%", dest);
+                       imap_escaped_utf8_to_utf7(str_c(dest), '%', dest2);
+                       test_assert_idx(strcmp(tests[i].mutf7, str_c(dest2)) == 0, i);
                }
                if (tests[i].mutf7 != NULL) {
                        str_truncate(dest, 0);
@@ -45,6 +52,15 @@ static void test_imap_utf7_by_example(void)
                        test_assert_idx(imap_utf7_is_valid(tests[i].mutf7) != (tests[i].utf8 == NULL), i);
                }
        }
+
+       str_truncate(dest, 0);
+       imap_utf7_to_utf8_escaped(".foo%", "%.", dest);
+       test_assert_strcmp(str_c(dest), "%2efoo%25");
+
+       str_truncate(dest, 0);
+       test_assert(imap_escaped_utf8_to_utf7("%foo%2ebar", '%', dest) == 0);
+       test_assert_strcmp(str_c(dest), "%foo.bar");
+
        test_end();
 }
 
@@ -85,9 +101,12 @@ static void test_imap_utf7_ucs4_cases(void)
 static const char mb64[64]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
 static void test_imap_utf7_non_utf16(void)
 {
+       string_t *dest, *dest2;
        unsigned int i;
 
        test_begin("imap mutf7 non-utf16");
+       dest = t_str_new(32);
+       dest2 = t_str_new(32);
        for (i = 0; i <= 255; ++i) {
                /* Invalid, code a single 8-bit octet */
                const char csrc[] = {
@@ -98,6 +117,13 @@ static void test_imap_utf7_non_utf16(void)
                        '\0'
                };
                test_assert_idx(!imap_utf7_is_valid(csrc), i);
+
+               /* escaping can reverse the original string */
+               str_truncate(dest, 0);
+               str_truncate(dest2, 0);
+               imap_utf7_to_utf8_escaped(csrc, "%", dest);
+               imap_escaped_utf8_to_utf7(str_c(dest), '%', dest2);
+               test_assert_idx(strcmp(csrc, str_c(dest2)) == 0, i);
        }
        for (i = 0; i <= 255; ++i) {
                /* Invalid, U+00E4 followed by a single octet */
@@ -111,6 +137,13 @@ static void test_imap_utf7_non_utf16(void)
                        '\0'
                };
                test_assert_idx(!imap_utf7_is_valid(csrc), i);
+
+               /* escaping can reverse the original string */
+               str_truncate(dest, 0);
+               str_truncate(dest2, 0);
+               imap_utf7_to_utf8_escaped(csrc, "%", dest);
+               imap_escaped_utf8_to_utf7(str_c(dest), '%', dest2);
+               test_assert_idx(strcmp(csrc, str_c(dest2)) == 0, i);
        }
        test_end();
 }