str_append_c(dest, '-');
}
-static const char *imap_utf8_first_encode_char(const char *str)
+static const char *
+imap_utf8_first_encode_char(const char *str, char escape_char)
{
const char *p;
for (p = str; *p != '\0'; p++) {
- if (*p == '&' || *p < 0x20 || *p >= 0x7f)
+ if (*p == '&' || *p < 0x20 || *p >= 0x7f || *p == escape_char)
return p;
}
return NULL;
}
-int imap_utf8_to_utf7(const char *src, string_t *dest)
+int imap_escaped_utf8_hex_to_char(const char *str, unsigned char *chr_r)
+{
+ unsigned int i = 0;
+ unsigned char c = 0;
+
+ /* NOTE: Only lowercase hex characters are allowed so the output is
+ reversible. */
+ for (;;) {
+ if (str[i] >= '0' && str[i] <= '9')
+ c += str[i] - '0';
+ else if (str[i] >= 'a' && str[i] <= 'f')
+ c += str[i] - 'a' + 10;
+ else
+ return -1;
+ if (++i == 2)
+ break;
+ c *= 0x10;
+ }
+ *chr_r = c;
+ return 0;
+}
+
+static int
+imap_utf8_to_utf7_int(const char *src, char escape_char, string_t *dest)
{
const char *p;
unichar_t chr;
uint8_t *utf16, *u;
uint16_t u16;
+ unsigned char c;
- p = imap_utf8_first_encode_char(src);
+ p = imap_utf8_first_encode_char(src, escape_char);
if (p == NULL) {
/* no characters that need to be encoded */
str_append(dest, src);
str_append_data(dest, src, p-src);
utf16 = t_malloc0(MALLOC_MULTIPLY(strlen(p), 2));
while (*p != '\0') {
+ if (*p == escape_char &&
+ imap_escaped_utf8_hex_to_char(p+1, &c) == 0) {
+ str_append_c(dest, c);
+ p += 3;
+ continue;
+ }
if (*p == '&') {
str_append(dest, "&-");
p++;
return 0;
}
+int imap_utf8_to_utf7(const char *src, string_t *dest)
+{
+ return imap_utf8_to_utf7_int(src, '\0', dest);
+}
+
+int imap_escaped_utf8_to_utf7(const char *src, char escape_char, string_t *dest)
+{
+ i_assert(escape_char != '&');
+
+ return imap_utf8_to_utf7_int(src, escape_char, dest);
+}
+
int t_imap_utf8_to_utf7(const char *src, const char **dest_r)
{
string_t *str;
int ret;
- if (imap_utf8_first_encode_char(src) == NULL) {
+ if (imap_utf8_first_encode_char(src, '\0') == NULL) {
*dest_r = src;
return 0;
}
return 0;
}
-int imap_utf7_to_utf8(const char *src, string_t *dest)
+static int
+imap_utf7_to_utf8_int(const char *src, const char *escape_chars, string_t *dest)
{
const char *p;
for (p = src; *p != '\0'; p++) {
- if (*p < 0x20 || *p >= 0x7f)
- return -1;
- if (*p == '&')
+ if (*p < 0x20 || *p >= 0x7f) {
+ if (escape_chars[0] == '\0')
+ return -1;
+ break;
+ }
+ if (*p == '&' || strchr(escape_chars, *p) != NULL)
break;
}
if (*p == '\0') {
/* at least one encoded character */
str_append_data(dest, src, p-src);
while (*p != '\0') {
- if (*p == '&') {
+ if (strchr(escape_chars, *p) != NULL ||
+ *p < 0x20 || *p >= 0x7f) {
+ str_printfa(dest, "%c%02x", escape_chars[0],
+ (unsigned char)*p);
+ p++;
+ } else if (*p == '&') {
if (*++p == '-') {
str_append_c(dest, '&');
p++;
} else {
- if (mbase64_decode_to_utf8(dest, &p) < 0)
- return -1;
+ size_t orig_size = str_len(dest);
+ if (mbase64_decode_to_utf8(dest, &p) < 0) {
+ if (escape_chars[0] == '\0')
+ return -1;
+ str_truncate(dest, orig_size);
+ str_printfa(dest, "%c26", escape_chars[0]);
+ }
}
} else {
str_append_c(dest, *p++);
return 0;
}
+int imap_utf7_to_utf8(const char *src, string_t *dest)
+{
+ return imap_utf7_to_utf8_int(src, "", dest);
+}
+
+void imap_utf7_to_utf8_escaped(const char *src, const char *escape_chars,
+ string_t *dest)
+{
+ i_assert(escape_chars[0] != '&');
+
+ if (imap_utf7_to_utf8_int(src, escape_chars, dest) < 0)
+ i_unreached();
+}
+
bool imap_utf7_is_valid(const char *src)
{
const char *p;
valid UTF-8. */
int imap_utf8_to_utf7(const char *src, string_t *dest);
int t_imap_utf8_to_utf7(const char *src, const char **dest_r);
+/* Like imap_utf8_to_utf7(), but decode all <escape_char><hex> instances.
+ Returns -1 if src isn't valid UTF-8. Note that invalid <escape_char> content
+ isn't treated as an error - it's simply passed through. */
+int imap_escaped_utf8_to_utf7(const char *src, char escape_char, string_t *dest);
+/* For manually parsing the <hex> after <escape_char>. Returns 0 on success,
+ -1 if str doesn't point to valid <hex>. */
+int imap_escaped_utf8_hex_to_char(const char *str, unsigned char *chr_r);
+
/* Convert IMAP-UTF-7 string to UTF-8. Returns 0 if ok, -1 if src isn't
valid IMAP-UTF-7. */
int imap_utf7_to_utf8(const char *src, string_t *dest);
+/* Like imap_utf7_to_utf8(), but write invalid input as <escape_chars[0]><hex>.
+ All the characters in escape_chars[] are escaped in the same way. This
+ allows converting the escaped output back to the original (broken)
+ IMAP-UTF-7 input. */
+void imap_utf7_to_utf8_escaped(const char *src, const char *escape_chars,
+ string_t *dest);
/* Returns TRUE if the string is valid IMAP-UTF-7 string. */
bool imap_utf7_is_valid(const char *src);
{ NULL, "&Jjo!" },
{ NULL, "&U,BTFw-&ZeVnLIqe-" } /* unnecessary shift */
};
- string_t *dest;
+ string_t *dest, *dest2;
unsigned int i;
dest = t_str_new(256);
+ dest2 = t_str_new(256);
test_begin("imap mutf7 examples");
for (i = 0; i < N_ELEMENTS(tests); i++) {
+ str_truncate(dest, 0);
if (tests[i].utf8 != NULL) {
- str_truncate(dest, 0);
if (imap_utf8_to_utf7(tests[i].utf8, dest) < 0)
test_assert_idx(tests[i].mutf7 == NULL, i);
else
test_assert_idx(null_strcmp(tests[i].mutf7, str_c(dest)) == 0, i);
+ } else {
+ /* invalid mUTF-7 - test that escaping works */
+ str_truncate(dest2, 0);
+ imap_utf7_to_utf8_escaped(tests[i].mutf7, "%", dest);
+ imap_escaped_utf8_to_utf7(str_c(dest), '%', dest2);
+ test_assert_idx(strcmp(tests[i].mutf7, str_c(dest2)) == 0, i);
}
if (tests[i].mutf7 != NULL) {
str_truncate(dest, 0);
test_assert_idx(imap_utf7_is_valid(tests[i].mutf7) != (tests[i].utf8 == NULL), i);
}
}
+
+ str_truncate(dest, 0);
+ imap_utf7_to_utf8_escaped(".foo%", "%.", dest);
+ test_assert_strcmp(str_c(dest), "%2efoo%25");
+
+ str_truncate(dest, 0);
+ test_assert(imap_escaped_utf8_to_utf7("%foo%2ebar", '%', dest) == 0);
+ test_assert_strcmp(str_c(dest), "%foo.bar");
+
test_end();
}
static const char mb64[64]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
static void test_imap_utf7_non_utf16(void)
{
+ string_t *dest, *dest2;
unsigned int i;
test_begin("imap mutf7 non-utf16");
+ dest = t_str_new(32);
+ dest2 = t_str_new(32);
for (i = 0; i <= 255; ++i) {
/* Invalid, code a single 8-bit octet */
const char csrc[] = {
'\0'
};
test_assert_idx(!imap_utf7_is_valid(csrc), i);
+
+ /* escaping can reverse the original string */
+ str_truncate(dest, 0);
+ str_truncate(dest2, 0);
+ imap_utf7_to_utf8_escaped(csrc, "%", dest);
+ imap_escaped_utf8_to_utf7(str_c(dest), '%', dest2);
+ test_assert_idx(strcmp(csrc, str_c(dest2)) == 0, i);
}
for (i = 0; i <= 255; ++i) {
/* Invalid, U+00E4 followed by a single octet */
'\0'
};
test_assert_idx(!imap_utf7_is_valid(csrc), i);
+
+ /* escaping can reverse the original string */
+ str_truncate(dest, 0);
+ str_truncate(dest2, 0);
+ imap_utf7_to_utf8_escaped(csrc, "%", dest);
+ imap_escaped_utf8_to_utf7(str_c(dest), '%', dest2);
+ test_assert_idx(strcmp(csrc, str_c(dest2)) == 0, i);
}
test_end();
}