]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
Added code for encoding and decoding IMAP's modified-UTF7 strings.
authorTimo Sirainen <tss@iki.fi>
Sat, 1 Nov 2008 19:12:01 +0000 (21:12 +0200)
committerTimo Sirainen <tss@iki.fi>
Sat, 1 Nov 2008 19:12:01 +0000 (21:12 +0200)
--HG--
branch : HEAD

src/lib-imap/Makefile.am
src/lib-imap/imap-utf7.c [new file with mode: 0644]
src/lib-imap/imap-utf7.h [new file with mode: 0644]
src/tests/test-imap.c

index c13aa7a57d01987195798c23e2c181685975f44a..459f468d7fd9aa80876a7655151e83a9249c982e 100644 (file)
@@ -15,6 +15,7 @@ libimap_a_SOURCES = \
        imap-parser.c \
        imap-quote.c \
        imap-seqset.c \
+       imap-utf7.c \
        imap-util.c
 
 headers = \
@@ -27,6 +28,7 @@ headers = \
        imap-parser.h \
        imap-quote.h \
        imap-seqset.h \
+       imap-utf7.h \
        imap-util.h
 
 if INSTALL_HEADERS
diff --git a/src/lib-imap/imap-utf7.c b/src/lib-imap/imap-utf7.c
new file mode 100644 (file)
index 0000000..202e8be
--- /dev/null
@@ -0,0 +1,249 @@
+/* Copyright (c) 2008 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "unichar.h"
+#include "imap-utf7.h"
+
+static const char imap_b64enc[] =
+       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+
+#define XX 0xff
+static const unsigned char imap_b64dec[256] = {
+       XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+       XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+       XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,62, 63,XX,XX,XX,
+       52,53,54,55, 56,57,58,59, 60,61,XX,XX, XX,XX,XX,XX,
+       XX, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
+       15,16,17,18, 19,20,21,22, 23,24,25,XX, XX,XX,XX,XX,
+       XX,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
+       41,42,43,44, 45,46,47,48, 49,50,51,XX, XX,XX,XX,XX,
+       XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+       XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+       XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+       XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+       XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+       XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+       XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+       XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX
+};
+
+static void
+mbase64_encode(string_t *dest, const unsigned char *in, unsigned int len)
+{
+       str_append_c(dest, '&');
+       while (len >= 3) {
+               str_append_c(dest, imap_b64enc[in[0] >> 2]);
+               str_append_c(dest, imap_b64enc[((in[0] & 3) << 4) |
+                                              (in[1] >> 4)]);
+               str_append_c(dest, imap_b64enc[((in[1] & 0x0f) << 2) |
+                                              ((in[2] & 0xc0) >> 6)]);
+               str_append_c(dest, imap_b64enc[in[2] & 0x3f]);
+               in += 3;
+               len -= 3;
+       }
+       if (len > 0) {
+               str_append_c(dest, imap_b64enc[in[0] >> 2]);
+               if (len == 1)
+                       str_append_c(dest, imap_b64enc[(in[0] & 0x03) << 4]);
+               else {
+                       str_append_c(dest, imap_b64enc[((in[0] & 0x03) << 4) |
+                                                      (in[1] >> 4)]);
+                       str_append_c(dest, imap_b64enc[(in[1] & 0x0f) << 2]);
+               }
+       }
+       str_append_c(dest, '-');
+}
+
+int imap_utf8_to_utf7(const char *src, string_t *dest)
+{
+       const char *p;
+       unichar_t chr;
+       uint8_t *utf16, *u;
+       uint16_t u16;
+
+       for (p = src; *p != '\0'; p++) {
+               if (*p == '&' || (unsigned char)*p >= 0x80)
+                       break;
+       }
+       if (*p == '\0') {
+               /* no ASCII characters that need to be encoded */
+               str_append(dest, src);
+               return 0;
+       }
+
+       /* at least one encoded character */
+       str_append_n(dest, src, p-src);
+       utf16 = t_malloc(strlen(p)*2);
+       while (*p != '\0') {
+               if (*p == '&') {
+                       str_append(dest, "&-");
+                       p++;
+                       continue;
+               }
+               if ((unsigned char)*p < 0x80) {
+                       str_append_c(dest, *p);
+                       p++;
+                       continue;
+               }
+
+               u = utf16;
+               while ((unsigned char)*p >= 0x80) {
+                       if (uni_utf8_get_char(p, &chr) <= 0)
+                               return -1;
+                       /* @UNSAFE */
+                       if (chr < UTF16_SURROGATE_BASE) {
+                               *u++ = chr >> 8;
+                               *u++ = chr & 0xff;
+                       } else {
+                               u16 = UTF16_SURROGATE_HIGH(chr);
+                               *u++ = u16 >> 8;
+                               *u++ = u16 & 0xff;
+                               u16 = UTF16_SURROGATE_LOW(chr);
+                               *u++ = u16 >> 8;
+                               *u++ = u16 & 0xff;
+                       }
+                       p += uni_utf8_char_bytes(*p);
+               }
+               mbase64_encode(dest, utf16, u-utf16);
+       }
+       return 0;
+}
+
+static int utf16buf_to_utf8(string_t *dest, const unsigned char output[4],
+                           unsigned int *_pos, unsigned int len)
+{
+       unsigned int pos = *_pos;
+       uint16_t high, low;
+       unichar_t chr;
+
+       if (len % 2 != 0)
+               return -1;
+       
+       high = (output[pos % 4] << 8) | output[(pos+1) % 4];
+       if (high < UTF16_SURROGATE_HIGH_FIRST ||
+           high > UTF16_SURROGATE_HIGH_MAX) {
+               /* single byte */
+               uni_ucs4_to_utf8_c(high, dest);
+               *_pos = (pos + 2) % 4;
+               return 0;
+       }
+
+       if (high > UTF16_SURROGATE_HIGH_LAST)
+               return -1;
+       if (len != 4) {
+               /* missing the second character */
+               return -1;
+       }
+
+       low = (output[(pos+2)%4] << 8) | output[(pos+3) % 4];
+       if (low < UTF16_SURROGATE_LOW_FIRST || low > UTF16_SURROGATE_LOW_LAST)
+               return -1;
+
+       chr = UTF16_SURROGATE_BASE +
+               (((high & UTF16_SURROGATE_MASK) << UTF16_SURROGATE_SHIFT) |
+                (low & UTF16_SURROGATE_MASK));
+       uni_ucs4_to_utf8_c(chr, dest);
+       return 0;
+}
+
+static int mbase64_decode_to_utf8(string_t *dest, const char **_src)
+{
+       const char *src = *_src;
+       unsigned char input[4], output[4];
+       unsigned int outstart = 0, outpos = 0;
+
+       while (*src != '-') {
+               input[0] = imap_b64dec[(uint8_t)src[0]];
+               input[1] = imap_b64dec[(uint8_t)src[1]];
+               if (input[0] == 0xff || input[1] == 0xff)
+                       return -1;
+
+               output[outpos % 4] = (input[0] << 2) | (input[1] >> 4);
+               if (++outpos % 4 == outstart) {
+                       if (utf16buf_to_utf8(dest, output, &outstart, 4) < 0)
+                               return -1;
+               }
+
+               input[2] = imap_b64dec[(uint8_t)src[2]];
+               if (input[2] == 0xff) {
+                       if (src[2] != '-')
+                               return -1;
+
+                       src += 2;
+                       break;
+               }
+
+               output[outpos % 4] = (input[1] << 4) | (input[2] >> 2);
+               if (++outpos % 4 == outstart) {
+                       if (utf16buf_to_utf8(dest, output, &outstart, 4) < 0)
+                               return -1;
+               }
+
+               input[3] = imap_b64dec[(uint8_t)src[3]];
+               if (input[3] == 0xff) {
+                       if (src[3] != '-')
+                               return -1;
+
+                       src += 3;
+                       break;
+               }
+
+               output[outpos % 4] = ((input[2] << 6) & 0xc0) | input[3];
+               if (++outpos % 4 == outstart) {
+                       if (utf16buf_to_utf8(dest, output, &outstart, 4) < 0)
+                               return -1;
+               }
+
+               src += 4;
+       }
+       if (outstart != outpos % 4) {
+               if (utf16buf_to_utf8(dest, output, &outstart,
+                                    (4 + outpos - outstart) % 4) < 0)
+                       return -1;
+       }
+
+       /* found ending '-' */
+       *_src = src + 1;
+       return 0;
+}
+
+int imap_utf7_to_utf8(const char *src, string_t *dest)
+{
+       const char *p;
+
+       for (p = src; *p != '\0'; p++) {
+               if (*p == '&' || (unsigned char)*p >= 0x80)
+                       break;
+       }
+       if (*p == '\0') {
+               /* no IMAP-UTF-7 encoded characters */
+               str_append(dest, src);
+               return 0;
+       }
+       if ((unsigned char)*p >= 0x80) {
+               /* 8bit characters - the input is broken */
+               return -1;
+       }
+
+       /* at least one encoded character */
+       str_append_n(dest, src, p-src);
+       while (*p != '\0') {
+               if (*p == '&') {
+                       if (*++p == '-') {
+                               str_append_c(dest, '&');
+                               p++;
+                       } else {
+                               if (mbase64_decode_to_utf8(dest, &p) < 0)
+                                       return -1;
+                               if (p[0] == '&' && p[1] != '-') {
+                                       /* &...-& */
+                                       return -1;
+                               }
+                       }
+               } else {
+                       str_append_c(dest, *p++);
+               }
+       }
+       return 0;
+}
diff --git a/src/lib-imap/imap-utf7.h b/src/lib-imap/imap-utf7.h
new file mode 100644 (file)
index 0000000..725d6a9
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef IMAP_UTF7_H
+#define IMAP_UTF7_H
+
+/* Convert an UTF-8 string to IMAP-UTF-7. Returns 0 if ok, -1 if src isn't
+   valid UTF-8. */
+int imap_utf8_to_utf7(const char *src, string_t *dest);
+/* Convert IMAP-UTF-7 string to UTF-8. Returns 0 if ok, -1 if src isn't
+   valid IMAP-UTF-7. */
+int imap_utf7_to_utf8(const char *src, string_t *dest);
+
+#endif
index a8041d859516c9f3a36f2ae0dd3d2cdc084e37a5..71c21965cb75130faaa775dacd029bb84640b18e 100644 (file)
@@ -1,7 +1,10 @@
 /* Copyright (c) 2008 Dovecot authors, see the included COPYING file */
 
 #include "lib.h"
+#include "str.h"
+#include "unichar.h"
 #include "imap-match.h"
+#include "imap-utf7.h"
 #include "test-common.h"
 
 struct test_imap_match {
@@ -75,10 +78,99 @@ static void test_imap_match(void)
        }
 }
 
+static void test_imap_utf7(void)
+{
+       static const char *to_utf7[] = {
+               "&&x&&", "&-&-x&-&-",
+               "~peter/mail/台北/日本語", "~peter/mail/&U,BTFw-/&ZeVnLIqe-",
+               "tietäjä", "tiet&AOQ-j&AOQ-",
+               "pää", NULL,
+               NULL
+       };
+       static const char *invalid_utf7[] = {
+               "&Jjo!",
+               "&U,BTFw-&ZeVnLIqe-",
+               NULL
+       };
+       string_t *src, *dest;
+       const char *orig_src;
+       unsigned int i, j;
+       unichar_t chr;
+       bool success, all_success = TRUE;
+
+       src = t_str_new(256);
+       dest = t_str_new(256);
+
+       for (i = 0; to_utf7[i] != NULL; i += 2) {
+               str_truncate(dest, 0);
+               if (imap_utf8_to_utf7(to_utf7[i], dest) < 0)
+                       success = to_utf7[i+1] == NULL;
+               else {
+                       success = to_utf7[i+1] != NULL &&
+                               strcmp(to_utf7[i+1], str_c(dest)) == 0;
+               }
+               if (!success) {
+                       test_out(t_strdup_printf("imap_utf8_to_utf7(%d)", i/2),
+                                FALSE);
+                       all_success = FALSE;
+               } else if (to_utf7[i+1] != NULL) {
+                       str_truncate(dest, 0);
+                       if (imap_utf7_to_utf8(to_utf7[i+1], dest) < 0 ||
+                           strcmp(to_utf7[i], str_c(dest)) != 0) {
+                               test_out(t_strdup_printf("imap_utf7_to_utf8(%d)", i/2),
+                                        FALSE);
+                               all_success = FALSE;
+                       }
+               }
+       }
+       if (all_success)
+               test_out("imap_utf8_to_utf7()", TRUE);
+
+       success = TRUE;
+       for (chr = 0xffff; chr <= 0x10010; chr++) {
+               for (i = 1; i <= 10; i++) {
+                       str_truncate(src, 0);
+                       str_truncate(dest, 0);
+                       for (j = 0; j < i; j++) {
+                               if (j % 3 == 0)
+                                       str_append_c(src, 'x');
+                               if (j % 5 == 0)
+                                       str_append_c(src, '&');
+                               uni_ucs4_to_utf8_c(chr, src);
+                       }
+
+                       orig_src = t_strdup(str_c(src));
+                       str_truncate(src, 0);
+
+                       if (imap_utf8_to_utf7(orig_src, dest) < 0)
+                               success = FALSE;
+                       else if (imap_utf7_to_utf8(str_c(dest), src) < 0)
+                               success = FALSE;
+                       else
+                               success = strcmp(str_c(src), orig_src) == 0;
+                       if (!success)
+                               goto end;
+               }
+       }
+end:
+       test_out("imap_utf7_to_utf8(reverse)", success);
+       for (i = 0; invalid_utf7[i] != NULL; i++) {
+               str_truncate(dest, 0);
+               if (imap_utf7_to_utf8(invalid_utf7[i], dest) == 0) {
+                       test_out(t_strdup_printf("imap_utf7_to_utf8(invalid.%d)", i),
+                                FALSE);
+                       all_success = FALSE;
+               }
+       }
+       if (all_success)
+               test_out("imap_utf7_to_utf8(invalid)", TRUE);
+}
+
 int main(void)
 {
        test_init();
 
        test_imap_match();
+       test_imap_utf7();
        return test_deinit();
 }