lib-imap: Add imap_utf7_to_utf8_escaped() and imap_escaped_utf8_to_utf7()

author Timo Sirainen <timo.sirainen@open-xchange.com>

Wed, 13 Jan 2021 17:50:50 +0000 (19:50 +0200)

committer aki.tuomi <aki.tuomi@open-xchange.com>

Wed, 3 Feb 2021 09:04:22 +0000 (09:04 +0000)
author Timo Sirainen <timo.sirainen@open-xchange.com>
Wed, 13 Jan 2021 17:50:50 +0000 (19:50 +0200)
committer aki.tuomi <aki.tuomi@open-xchange.com>
Wed, 3 Feb 2021 09:04:22 +0000 (09:04 +0000)
diff --git a/src/lib-imap/imap-utf7.c b/src/lib-imap/imap-utf7.c

index ba97d905519f36b92212bb8ff20e1ea1f032fd2e..7ea53f535af5ae50fb7b4d96d389457ea4dd1151 100644 (file)
--- a/src/lib-imap/imap-utf7.c
+++ b/src/lib-imap/imap-utf7.c
@@ -55,25 +55,50 @@ mbase64_encode(string_t *dest, const unsigned char *in, size_t len)
         str_append_c(dest, '-');
  }
  
-static const char *imap_utf8_first_encode_char(const char *str)
+static const char *
+imap_utf8_first_encode_char(const char *str, char escape_char)
  {
         const char *p;
  
         for (p = str; *p != '\0'; p++) {
-               if (*p == '&' || *p < 0x20 || *p >= 0x7f)
+               if (*p == '&' || *p < 0x20 || *p >= 0x7f || *p == escape_char)
                         return p;
         }
         return NULL;
  }
  
-int imap_utf8_to_utf7(const char *src, string_t *dest)
+int imap_escaped_utf8_hex_to_char(const char *str, unsigned char *chr_r)
+{
+       unsigned int i = 0;
+       unsigned char c = 0;
+
+       /* NOTE: Only lowercase hex characters are allowed so the output is
+          reversible. */
+       for (;;) {
+               if (str[i] >= '0' && str[i] <= '9')
+                       c += str[i] - '0';
+               else if (str[i] >= 'a' && str[i] <= 'f')
+                       c += str[i] - 'a' + 10;
+               else
+                       return -1;
+               if (++i == 2)
+                       break;
+               c *= 0x10;
+       }
+       *chr_r = c;
+       return 0;
+}
+
+static int
+imap_utf8_to_utf7_int(const char *src, char escape_char, string_t *dest)
  {
         const char *p;
         unichar_t chr;
         uint8_t *utf16, *u;
         uint16_t u16;
+       unsigned char c;
  
-       p = imap_utf8_first_encode_char(src);
+       p = imap_utf8_first_encode_char(src, escape_char);
         if (p == NULL) {
                 /* no characters that need to be encoded */
                 str_append(dest, src);
@@ -84,6 +109,12 @@ int imap_utf8_to_utf7(const char *src, string_t *dest)
         str_append_data(dest, src, p-src);
         utf16 = t_malloc0(MALLOC_MULTIPLY(strlen(p), 2));
         while (*p != '\0') {
+               if (*p == escape_char &&
+                   imap_escaped_utf8_hex_to_char(p+1, &c) == 0) {
+                       str_append_c(dest, c);
+                       p += 3;
+                       continue;
+               }
                 if (*p == '&') {
                         str_append(dest, "&-");
                         p++;
@@ -118,12 +149,24 @@ int imap_utf8_to_utf7(const char *src, string_t *dest)
         return 0;
  }
  
+int imap_utf8_to_utf7(const char *src, string_t *dest)
+{
+       return imap_utf8_to_utf7_int(src, '\0', dest);
+}
+
+int imap_escaped_utf8_to_utf7(const char *src, char escape_char, string_t *dest)
+{
+       i_assert(escape_char != '&');
+
+       return imap_utf8_to_utf7_int(src, escape_char, dest);
+}
+
  int t_imap_utf8_to_utf7(const char *src, const char **dest_r)
  {
         string_t *str;
         int ret;
  
-       if (imap_utf8_first_encode_char(src) == NULL) {
+       if (imap_utf8_first_encode_char(src, '\0') == NULL) {
                 *dest_r = src;
                 return 0;
         }
@@ -253,14 +296,18 @@ static int mbase64_decode_to_utf8(string_t *dest, const char **_src)
         return 0;
  }
  
-int imap_utf7_to_utf8(const char *src, string_t *dest)
+static int
+imap_utf7_to_utf8_int(const char *src, const char *escape_chars, string_t *dest)
  {
         const char *p;
  
         for (p = src; *p != '\0'; p++) {
-               if (*p < 0x20 || *p >= 0x7f)
-                       return -1;
-               if (*p == '&')
+               if (*p < 0x20 || *p >= 0x7f) {
+                       if (escape_chars[0] == '\0')
+                               return -1;
+                       break;
+               }
+               if (*p == '&' || strchr(escape_chars, *p) != NULL)
                         break;
         }
         if (*p == '\0') {
@@ -272,13 +319,23 @@ int imap_utf7_to_utf8(const char *src, string_t *dest)
         /* at least one encoded character */
         str_append_data(dest, src, p-src);
         while (*p != '\0') {
-               if (*p == '&') {
+               if (strchr(escape_chars, *p) != NULL ||
+                   *p < 0x20 || *p >= 0x7f) {
+                       str_printfa(dest, "%c%02x", escape_chars[0],
+                                   (unsigned char)*p);
+                       p++;
+               } else if (*p == '&') {
                         if (*++p == '-') {
                                 str_append_c(dest, '&');
                                 p++;
                         } else {
-                               if (mbase64_decode_to_utf8(dest, &p) < 0)
-                                       return -1;
+                               size_t orig_size = str_len(dest);
+                               if (mbase64_decode_to_utf8(dest, &p) < 0) {
+                                       if (escape_chars[0] == '\0')
+                                               return -1;
+                                       str_truncate(dest, orig_size);
+                                       str_printfa(dest, "%c26", escape_chars[0]);
+                               }
                         }
                 } else {
                         str_append_c(dest, *p++);
@@ -287,6 +344,20 @@ int imap_utf7_to_utf8(const char *src, string_t *dest)
         return 0;
  }
  
+int imap_utf7_to_utf8(const char *src, string_t *dest)
+{
+       return imap_utf7_to_utf8_int(src, "", dest);
+}
+
+void imap_utf7_to_utf8_escaped(const char *src, const char *escape_chars,
+                              string_t *dest)
+{
+       i_assert(escape_chars[0] != '&');
+
+       if (imap_utf7_to_utf8_int(src, escape_chars, dest) < 0)
+               i_unreached();
+}
+
  bool imap_utf7_is_valid(const char *src)
  {
         const char *p;
diff --git a/src/lib-imap/imap-utf7.h b/src/lib-imap/imap-utf7.h

index 5d7875f198c899910c133fca93b17af39bf024bd..d7ae3066e6c248224517d1ad59b76fdfe0e1f1f8 100644 (file)
--- a/src/lib-imap/imap-utf7.h
+++ b/src/lib-imap/imap-utf7.h
@@ -5,9 +5,23 @@
     valid UTF-8. */
  int imap_utf8_to_utf7(const char *src, string_t *dest);
  int t_imap_utf8_to_utf7(const char *src, const char **dest_r);
+/* Like imap_utf8_to_utf7(), but decode all <escape_char><hex> instances.
+   Returns -1 if src isn't valid UTF-8. Note that invalid <escape_char> content
+   isn't treated as an error - it's simply passed through. */
+int imap_escaped_utf8_to_utf7(const char *src, char escape_char, string_t *dest);
+/* For manually parsing the <hex> after <escape_char>. Returns 0 on success,
+   -1 if str doesn't point to valid <hex>. */
+int imap_escaped_utf8_hex_to_char(const char *str, unsigned char *chr_r);
+
  /* Convert IMAP-UTF-7 string to UTF-8. Returns 0 if ok, -1 if src isn't
     valid IMAP-UTF-7. */
  int imap_utf7_to_utf8(const char *src, string_t *dest);
+/* Like imap_utf7_to_utf8(), but write invalid input as <escape_chars[0]><hex>.
+   All the characters in escape_chars[] are escaped in the same way. This
+   allows converting the escaped output back to the original (broken)
+   IMAP-UTF-7 input. */
+void imap_utf7_to_utf8_escaped(const char *src, const char *escape_chars,
+                              string_t *dest);
  /* Returns TRUE if the string is valid IMAP-UTF-7 string. */
  bool imap_utf7_is_valid(const char *src);
  
diff --git a/src/lib-imap/test-imap-utf7.c b/src/lib-imap/test-imap-utf7.c

index b487e04e6d820c5934d38543b3445dc953cfc110..216eebf4a7f084407a7cb9ea846c7c133641fca1 100644 (file)
--- a/src/lib-imap/test-imap-utf7.c
+++ b/src/lib-imap/test-imap-utf7.c
@@ -22,19 +22,26 @@ static void test_imap_utf7_by_example(void)
                 { NULL, "&Jjo!" },
                 { NULL, "&U,BTFw-&ZeVnLIqe-" } /* unnecessary shift */
         };
-       string_t *dest;
+       string_t *dest, *dest2;
         unsigned int i;
  
         dest = t_str_new(256);
+       dest2 = t_str_new(256);
  
         test_begin("imap mutf7 examples");
         for (i = 0; i < N_ELEMENTS(tests); i++) {
+               str_truncate(dest, 0);
                 if (tests[i].utf8 != NULL) {
-                       str_truncate(dest, 0);
                         if (imap_utf8_to_utf7(tests[i].utf8, dest) < 0)
                                 test_assert_idx(tests[i].mutf7 == NULL, i);
                         else
                                 test_assert_idx(null_strcmp(tests[i].mutf7, str_c(dest)) == 0, i);
+               } else {
+                       /* invalid mUTF-7 - test that escaping works */
+                       str_truncate(dest2, 0);
+                       imap_utf7_to_utf8_escaped(tests[i].mutf7, "%", dest);
+                       imap_escaped_utf8_to_utf7(str_c(dest), '%', dest2);
+                       test_assert_idx(strcmp(tests[i].mutf7, str_c(dest2)) == 0, i);
                 }
                 if (tests[i].mutf7 != NULL) {
                         str_truncate(dest, 0);
@@ -45,6 +52,15 @@ static void test_imap_utf7_by_example(void)
                         test_assert_idx(imap_utf7_is_valid(tests[i].mutf7) != (tests[i].utf8 == NULL), i);
                 }
         }
+
+       str_truncate(dest, 0);
+       imap_utf7_to_utf8_escaped(".foo%", "%.", dest);
+       test_assert_strcmp(str_c(dest), "%2efoo%25");
+
+       str_truncate(dest, 0);
+       test_assert(imap_escaped_utf8_to_utf7("%foo%2ebar", '%', dest) == 0);
+       test_assert_strcmp(str_c(dest), "%foo.bar");
+
         test_end();
  }
  
@@ -85,9 +101,12 @@ static void test_imap_utf7_ucs4_cases(void)
  static const char mb64[64]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
  static void test_imap_utf7_non_utf16(void)
  {
+       string_t *dest, *dest2;
         unsigned int i;
  
         test_begin("imap mutf7 non-utf16");
+       dest = t_str_new(32);
+       dest2 = t_str_new(32);
         for (i = 0; i <= 255; ++i) {
                 /* Invalid, code a single 8-bit octet */
                 const char csrc[] = {
@@ -98,6 +117,13 @@ static void test_imap_utf7_non_utf16(void)
                         '\0'
                 };
                 test_assert_idx(!imap_utf7_is_valid(csrc), i);
+
+               /* escaping can reverse the original string */
+               str_truncate(dest, 0);
+               str_truncate(dest2, 0);
+               imap_utf7_to_utf8_escaped(csrc, "%", dest);
+               imap_escaped_utf8_to_utf7(str_c(dest), '%', dest2);
+               test_assert_idx(strcmp(csrc, str_c(dest2)) == 0, i);
         }
         for (i = 0; i <= 255; ++i) {
                 /* Invalid, U+00E4 followed by a single octet */
@@ -111,6 +137,13 @@ static void test_imap_utf7_non_utf16(void)
                         '\0'
                 };
                 test_assert_idx(!imap_utf7_is_valid(csrc), i);
+
+               /* escaping can reverse the original string */
+               str_truncate(dest, 0);
+               str_truncate(dest2, 0);
+               imap_utf7_to_utf8_escaped(csrc, "%", dest);
+               imap_escaped_utf8_to_utf7(str_c(dest), '%', dest2);
+               test_assert_idx(strcmp(csrc, str_c(dest2)) == 0, i);
         }
         test_end();
  }
author	Timo Sirainen <timo.sirainen@open-xchange.com>
	Wed, 13 Jan 2021 17:50:50 +0000 (19:50 +0200)
committer	aki.tuomi <aki.tuomi@open-xchange.com>
	Wed, 3 Feb 2021 09:04:22 +0000 (09:04 +0000)
src/lib-imap/imap-utf7.c		patch \| blob \| blame \| history
src/lib-imap/imap-utf7.h		patch \| blob \| blame \| history
src/lib-imap/test-imap-utf7.c		patch \| blob \| blame \| history