]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-mail: Make FETCH ENVELOPE and SEARCH work on the human-readable domains
authorArnt Gulbrandsen <arnt@gulbrandsen.priv.no>
Fri, 1 Mar 2024 16:47:28 +0000 (17:47 +0100)
committeraki.tuomi <aki.tuomi@open-xchange.com>
Fri, 28 Jun 2024 09:48:14 +0000 (09:48 +0000)
This stores the unicode form of domains in all indexes, meaning that
searching uses and serverside parsing shows the human-readable form of all
addresses.

configure.ac
src/lib-mail/rfc822-parser.c
src/lib-mail/rfc822-parser.h
src/lib-mail/test-rfc822-parser.c

index ba6e1bd5cfb31a77731dc3322c070214b0865b89..66e3f5eaba14101ceb0e164a3e0fdb87a7dc8a44 100644 (file)
@@ -50,6 +50,13 @@ AC_ARG_ENABLE([pro-build],
   [is_pro_build=no]
 )
 
+AC_ARG_ENABLE(experimental-mail-utf8,
+AS_HELP_STRING([--enable-experimental-mail-utf8], [Enable experimental support for SMTPUTF8 and UTF8=ACCEPT]),
+       AS_IF([test x$enableval = xyes], [
+              AC_DEFINE([EXPERIMENTAL_MAIL_UTF8],, [Build with SMTPUTF8 and UTF8=ACCEPT support])
+       ])
+)
+
 AC_ARG_WITH(shared-libs,
 AS_HELP_STRING([--with-shared-libs], [Link binaries using shared Dovecot libraries (default)]),
        want_shared_libs=$withval,
index c8595b4187b5ee377b046a7699ddec365429a100..606ede87159158e4a68544127c5cc629a61c966c 100644 (file)
@@ -2,6 +2,7 @@
 
 #include "lib.h"
 #include "str.h"
+#include "punycode.h"
 #include "strescape.h"
 #include "rfc822-parser.h"
 
@@ -412,6 +413,34 @@ rfc822_parse_domain_literal(struct rfc822_parser_context *ctx, string_t *str)
        return -1;
 }
 
+void rfc822_decode_punycode(const char *input, size_t len, string_t *result)
+{
+       string_t *decoded = t_str_new(64);
+       const char *pos = input;
+       const char *end = CONST_PTR_OFFSET(input, len);
+
+       while (pos < end) {
+               const char *value;
+               const char *delim = strchr(pos, '.');
+               if (delim == NULL)
+                       delim = end;
+               if (str_begins(pos, "xn--", &value)) {
+                       str_truncate(decoded, 0);
+                       if (punycode_decode(value, delim - value, result) < 0)
+                               /* Consider it as data */
+                               str_append_data(result, pos, delim - pos + 1);
+                       else if (*delim == '.')
+                               str_append_c(result, *delim);
+               } else {
+                       /* No punycode prefix */
+                       str_append_data(result, pos, delim - pos + 1);
+               }
+               pos = delim + 1;
+       }
+       if (pos < end)
+               str_append_data(result, pos, end - pos);
+}
+
 int rfc822_parse_domain(struct rfc822_parser_context *ctx, string_t *str)
 {
        /*
@@ -428,8 +457,21 @@ int rfc822_parse_domain(struct rfc822_parser_context *ctx, string_t *str)
 
        if (*ctx->data == '[')
                return rfc822_parse_domain_literal(ctx, str);
-       else
-               return rfc822_parse_dot_atom(ctx, str);
+       else {
+               int ret = rfc822_parse_dot_atom(ctx, str);
+#ifdef EXPERIMENTAL_MAIL_UTF8
+               if (ret == 0) {
+                       size_t start_pos = str_len(str);
+                       string_t *u = t_str_new(64);
+                       const char *data = t_strndup(str_data(str) + start_pos,
+                                                    str_len(str) - start_pos);
+                       rfc822_decode_punycode(data, strlen(data), u);
+                       str_truncate(str, start_pos);
+                       str_append_str(str, u);
+               }
+#endif
+               return ret;
+       }
 }
 
 int rfc822_parse_content_type(struct rfc822_parser_context *ctx, string_t *str)
index c001f761f0aef2b522323b92ab43aa231ebf75bf..84e9bda67137cc14a2ee4149b9c058455b38a3d8 100644 (file)
@@ -68,4 +68,8 @@ int rfc822_parse_content_type(struct rfc822_parser_context *ctx, string_t *str);
 int rfc822_parse_content_param(struct rfc822_parser_context *ctx,
                               const char **key_r, string_t *value);
 
+/* Decode a punycode-encoded domain name and return the UTF8
+   form in result. Returns 0 on success and -1 on failure. */
+void rfc822_decode_punycode(const char *input, size_t len, string_t *result);
+
 #endif
index a0e7ad04b8e868eb5e20cbbce26aab5cc339a7ea..4a95df9a5d18a7aa1c2b42255e3203cf19bc9bcc 100644 (file)
@@ -199,6 +199,30 @@ static void test_rfc822_parse_domain_literal(void)
        test_end();
 }
 
+static void test_rfc822_decode_punycode(void)
+{
+       const struct test_case {
+               const char *in;
+               const char *out;
+       } cases[] = {
+               { .in = "xn--gr-zia.org", .out = "gr\xc3\xa5.org" },
+               { .in = "xn--gr-zia", "gr\xc3\xa5" },
+               { .in = "org.xn--gr-zia", "org.gr\xc3\xa5" },
+               { .in = "org.xn--gr-zia.org", "org.gr\xc3\xa5.org" },
+               { .in = "org.xn--zz-zzzz.org", "org.xn--zz-zzzz.org" },
+       };
+       string_t *res = t_str_new(64);
+
+       test_begin("rfc822 decode punycode");
+       for (size_t i = 0; i < N_ELEMENTS(cases); i++) {
+               str_truncate(res, 0);
+               rfc822_decode_punycode(cases[i].in, strlen(cases[i].in), res);
+               test_assert_strcmp_idx(str_c(res),
+                                      cases[i].out, i);
+       }
+       test_end();
+}
+
 #undef TEST_STRING
 #define TEST_STRING(a) .input = (const unsigned char*)a, .input_len = sizeof(a)-1
 
@@ -436,6 +460,7 @@ int main(void)
                test_rfc822_parse_quoted_string,
                test_rfc822_parse_dot_atom,
                test_rfc822_parse_domain_literal,
+               test_rfc822_decode_punycode,
                test_rfc822_parse_content_type,
                test_rfc822_parse_content_param,
                test_rfc822_parse_content_type_param,