]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-smtp: smtp-address - Add SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN parse flag.
authorStephan Bosch <stephan.bosch@open-xchange.com>
Fri, 13 Sep 2019 00:04:59 +0000 (02:04 +0200)
committerStephan Bosch <stephan.bosch@open-xchange.com>
Fri, 4 Oct 2019 17:13:42 +0000 (19:13 +0200)
Makes the path parser try to skip over a broken address to allow working around
syntax errors in e.g. the sender address for the  MAIL command.

src/lib-smtp/smtp-address.c
src/lib-smtp/smtp-address.h
src/lib-smtp/test-smtp-address.c

index ff5cead53a9f21f10ac043795b73a6e91a023652..4bc7f82aa06835f301cb066334da14507b1fafe5 100644 (file)
@@ -49,6 +49,8 @@ struct smtp_address_parser {
 
        bool parse:1;
        bool path:1;
+       bool parsed_any:1;
+       bool totally_broken:1;
 };
 
 static int
@@ -88,18 +90,25 @@ smtp_parse_localpart(struct smtp_parser *parser, const char **localpart_r)
 }
 
 static int
-smtp_address_parser_find_end(struct smtp_address_parser *aparser)
+smtp_address_parser_find_end(struct smtp_address_parser *aparser,
+                            enum smtp_address_parse_flags flags)
 {
        struct smtp_parser *parser = &aparser->parser;
        const char *begin = (const char *)parser->begin, *end;
+       const char **address_p = NULL;
 
        if (aparser->address_end != NULL)
                return 0;
 
-       if (smtp_address_parse_any(begin, NULL, &end) < 0) {
+       if (aparser->parse &&
+           HAS_ALL_BITS(flags, SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW))
+               address_p = &aparser->address.raw;
+       if (smtp_address_parse_any(begin, address_p, &end) < 0) {
                parser->error = "Invalid character";
+               aparser->totally_broken = TRUE;
                return -1;
        }
+       aparser->parsed_any = TRUE;
        aparser->address_end = (const unsigned char *)end;
        if (aparser->path) {
                i_assert(aparser->address_end > parser->begin);
@@ -128,7 +137,7 @@ smtp_parse_mailbox(struct smtp_address_parser *aparser,
                        return ret;
        } else {
                /* find the end of the address */
-               if (smtp_address_parser_find_end(aparser) < 0)
+               if (smtp_address_parser_find_end(aparser, flags) < 0)
                        return -1;
                /* use the right-most '@' as separator */
                dp = aparser->address_end - 1;
@@ -333,6 +342,40 @@ int smtp_address_parse_mailbox(pool_t pool, const char *mailbox,
        return 0;
 }
 
+static int
+smtp_address_parse_path_broken(struct smtp_address_parser *aparser,
+                              enum smtp_address_parse_flags flags,
+                              const char **endp_r) ATTR_NULL(3)
+{
+       struct smtp_parser *parser = &aparser->parser;
+       const char *begin = (const char *)parser->begin, *end;
+       const char *raw = aparser->address.raw;
+       const char **address_p = NULL;
+
+       i_zero(&aparser->address);
+       aparser->address.raw = raw;
+
+       if (aparser->totally_broken ||
+           HAS_NO_BITS(flags, SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN))
+               return -1;
+       i_assert(aparser->parse);
+       if (aparser->parsed_any) {
+               if (endp_r != NULL)
+                       *endp_r = (const char *)aparser->address_end;
+               return 0;
+       }
+
+       if (HAS_ALL_BITS(flags, SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW))
+               address_p = &aparser->address.raw;
+       if (smtp_address_parse_any(begin, address_p, &end) < 0) {
+               /* totally broken */
+               return -1;
+       }
+       if (endp_r != NULL)
+               *endp_r = end;
+       return 0;
+}
+
 int smtp_address_parse_path_full(pool_t pool, const char *path,
                                 enum smtp_address_parse_flags flags,
                                 struct smtp_address **address_r,
@@ -345,6 +388,8 @@ int smtp_address_parse_path_full(pool_t pool, const char *path,
                *address_r = NULL;
        if (error_r != NULL)
                *error_r = NULL;
+       if (endp_r != NULL)
+               *endp_r = NULL;
 
        if (path == NULL || *path == '\0') {
                if ((flags & SMTP_ADDRESS_PARSE_FLAG_ALLOW_EMPTY) == 0 ||
@@ -355,6 +400,8 @@ int smtp_address_parse_path_full(pool_t pool, const char *path,
                }
                if (address_r != NULL)
                        *address_r = p_new(pool, struct smtp_address, 1);
+               if (endp_r != NULL)
+                       *endp_r = path;
                return 0;
        }
 
@@ -368,18 +415,36 @@ int smtp_address_parse_path_full(pool_t pool, const char *path,
                        *error_r = (ret < 0 ? aparser.parser.error :
                                "Missing '<' at beginning of path");
                }
-               return -1;
-       }
-       if (endp_r != NULL)
-               *endp_r = (const char *)aparser.parser.cur;
-       else if (aparser.parser.cur != aparser.parser.end) {
+               ret = -1;
+       } else if (endp_r != NULL) {
+               if (aparser.parser.cur == aparser.parser.end ||
+                   *aparser.parser.cur == ' ' ||
+                   HAS_NO_BITS(flags, SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN)) {
+                       *endp_r = (const char *)aparser.parser.cur;
+                       ret = 0;
+               } else {
+                       if (error_r != NULL)
+                               *error_r = "Invalid character in path";
+                       ret = -1;
+               }
+       } else if (aparser.parser.cur == aparser.parser.end) {
+               ret = 0;
+       } else {
                if (error_r != NULL)
                        *error_r = "Invalid character in path";
-               return -1;
+               ret = -1;
        }
 
-       if (HAS_ALL_BITS(flags, SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW) &&
-           aparser.address.localpart != NULL) {
+       if (ret < 0) {
+               /* normal parsing failed */
+               if (smtp_address_parse_path_broken(&aparser, flags,
+                                                  endp_r) < 0) {
+                       /* failed to parse it as a broken address as well */
+                       return -1;
+               }
+               /* broken address */
+       } else if (HAS_ALL_BITS(flags, SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW) &&
+                  aparser.address.localpart != NULL) {
                if (aparser.path &&
                    ((const unsigned char *)(path + 1) < aparser.parser.cur)) {
                        aparser.address.raw = t_strdup_until(
@@ -392,7 +457,7 @@ int smtp_address_parse_path_full(pool_t pool, const char *path,
 
        if (address_r != NULL)
                *address_r = smtp_address_clone(pool, &aparser.address);
-       return 0;
+       return ret;
 }
 
 int smtp_address_parse_path(pool_t pool, const char *path,
index d59d82822ae7214f0ad43a758d4c68a6317272ad..f5e391d99ff9fea3217159b7446aacb0da3788f5 100644 (file)
@@ -23,9 +23,18 @@ enum smtp_address_parse_flags {
         */
        SMTP_ADDRESS_PARSE_FLAG_ALLOW_BAD_LOCALPART = BIT(4),
        /* Store an unparsed copy of the address in the `raw' field of struct
-          smtp_address. This flag is only relevant for
-          smtp_address_parse_path(). */
+          smtp_address. When combined with SMTP_ADDRESS_PARSE_FLAG_SKIP_BROKEN,
+          the broken address will be stored there. This flag is only relevant
+          for smtp_address_parse_path(). */
        SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW        = BIT(5),
+       /* Try to skip over a broken address to allow working around syntax
+          errors in e.g. the sender address for the MAIL command. This flag is
+          only relevant for smtp_address_parse_path*(). The parser will return
+          failure, but it will return a broken address which is be equivalent
+          to <>. The raw broken address string is available in the address->raw
+          field. When the broken address contains control characters or is
+          badly delimited, parsing will still fail completely. */
+       SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN       = BIT(6),
 };
 
 struct smtp_address {
@@ -63,7 +72,10 @@ int smtp_address_parse_mailbox(pool_t pool, const char *mailbox,
    returned in address_r. When address_r is NULL, the provided string will be
    verified for validity as a path only. The endp_r parameter is used to
    return a pointer to the end of the path string, so that the caller can
-   continue parsing from there. */
+   continue parsing from there. When the SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN
+   flag is set, a broken address will be returned, even when the return value
+   is -1 (see above). If it is totally broken, *endp_r will be then be NULL.
+ */
 int smtp_address_parse_path_full(pool_t pool, const char *path,
                                 enum smtp_address_parse_flags flags,
                                 struct smtp_address **address_r,
@@ -72,7 +84,9 @@ int smtp_address_parse_path_full(pool_t pool, const char *path,
 /* Parse the RFC 5321 address from the provided path string. Returns 0 when
    the address was parsed successfully and -1 upon error. The address is
    returned in address_r. When address_r is NULL, the provided string will be
-   verified for validity as a path only. */
+   verified for validity as a path only. When the
+   SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN flag is set, a broken address will be
+   returned, even when the return value is -1 (see above). */
 int smtp_address_parse_path(pool_t pool, const char *path,
                            enum smtp_address_parse_flags flags,
                            struct smtp_address **address_r,
@@ -109,7 +123,7 @@ void smtp_address_detail_parse_temp(const char *delimiters,
    pointer to the end of the path string, so that the caller can continue
    parsing from there.*/
 int smtp_address_parse_any(const char *in, const char **address_r,
-                          const char **endp_r);
+                          const char **endp_r) ATTR_NULL(2, 3);
 
 /*
  * SMTP address construction
index 08ef9b3e2cd1f0c5e3cafad0117f420b59c58d72..b13622ba66a0b71779eab217899099cf5c909ac5 100644 (file)
@@ -252,6 +252,145 @@ valid_path_parse_tests[] = {
                             .raw = "user@domain.tld"},
                .output = "<user@domain.tld>"
        },
+       /* Broken */
+       {
+               .input = "<>",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_ALLOW_EMPTY |
+                        SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN,
+               .address = { .localpart = NULL, .domain = NULL, .raw = NULL }
+       },
+       {
+               .input = "<user>",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_ALLOW_LOCALPART |
+                        SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN,
+               .address = { .localpart = "user", .domain = NULL,
+                            .raw = "user" }
+       },
+       {
+               .input = "<user@domain.tld>",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN,
+               .address = { .localpart = "user", .domain = "domain.tld",
+                            .raw = "user@domain.tld" }
+       },
+       {
+               .input = "<@otherdomain.tld,@yetanotherdomain.tld:user@domain.tld>",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN,
+               .address = { .localpart = "user", .domain = "domain.tld",
+                            .raw = "@otherdomain.tld,@yetanotherdomain.tld:"
+                                   "user@domain.tld" },
+               .output = "<user@domain.tld>"
+       },
+       {
+               .input = "user@domain.tld",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_BRACKETS_OPTIONAL |
+                        SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN,
+               .address = { .localpart = "user", .domain = "domain.tld",
+                            .raw = "user@domain.tld"},
+               .output = "<user@domain.tld>"
+       },
+       {
+               .input = "u\"ser",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_ALLOW_LOCALPART |
+                        SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN,
+               .address = { .localpart = NULL, .domain = NULL,
+                            .raw = "u\"ser" },
+               .output = "<>",
+       },
+       {
+               .input = "user\"@domain.tld",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN,
+               .address = { .localpart = NULL, .domain = NULL,
+                            .raw = "user\"@domain.tld" },
+               .output = "<>",
+       },
+       {
+               .input = "<u\"ser>",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_ALLOW_LOCALPART |
+                        SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN,
+               .address = { .localpart = NULL, .domain = NULL,
+                            .raw = "u\"ser" },
+               .output = "<>",
+       },
+       {
+               .input = "<user\"@domain.tld>",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN,
+               .address = { .localpart = NULL, .domain = NULL,
+                            .raw = "user\"@domain.tld" },
+               .output = "<>",
+       },
+       {
+               .input = "bla$die%bla@die&bla",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN,
+               .address = { .localpart = NULL, .domain = NULL,
+                            .raw = "bla$die%bla@die&bla" },
+               .output = "<>",
+       },
+       {
+               .input = "/@)$@)BLAARGH!@#$$",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN,
+               .address = { .localpart = NULL, .domain = NULL,
+                            .raw = "/@)$@)BLAARGH!@#$$" },
+               .output = "<>",
+       },
+       {
+               .input = "</@)$@)BLAARGH!@#$$>",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN,
+               .address = { .localpart = NULL, .domain = NULL,
+                            .raw = "/@)$@)BLAARGH!@#$$" },
+               .output = "<>",
+       },
+       {
+               .input = "/@)$@)BLAARGH!@#$$",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN  |
+                        SMTP_ADDRESS_PARSE_FLAG_ALLOW_BAD_LOCALPART |
+                        SMTP_ADDRESS_PARSE_FLAG_BRACKETS_OPTIONAL,
+               .address = { .localpart = NULL, .domain = NULL,
+                            .raw = "/@)$@)BLAARGH!@#$$" },
+               .output = "<>",
+       },
+       {
+               .input = "</@)$@)BLAARGH!@#$$>",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN |
+                        SMTP_ADDRESS_PARSE_FLAG_ALLOW_BAD_LOCALPART |
+                        SMTP_ADDRESS_PARSE_FLAG_BRACKETS_OPTIONAL,
+               .address = { .localpart = NULL, .domain = NULL,
+                            .raw = "/@)$@)BLAARGH!@#$$" },
+               .output = "<>",
+       },
+       {
+               .input = "f\xc3\xb6\xc3\xa4@\xc3\xb6\xc3\xa4",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN |
+                        SMTP_ADDRESS_PARSE_FLAG_ALLOW_BAD_LOCALPART |
+                        SMTP_ADDRESS_PARSE_FLAG_BRACKETS_OPTIONAL,
+               .address = { .localpart = NULL, .domain = NULL,
+                            .raw = "f\xc3\xb6\xc3\xa4@\xc3\xb6\xc3\xa4" },
+               .output = "<>",
+       },
+       {
+               .input = "<f\xc3\xb6\xc3\xa4@\xc3\xb6\xc3\xa4>",
+               .flags = SMTP_ADDRESS_PARSE_FLAG_PRESERVE_RAW |
+                        SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN |
+                        SMTP_ADDRESS_PARSE_FLAG_ALLOW_BAD_LOCALPART |
+                        SMTP_ADDRESS_PARSE_FLAG_BRACKETS_OPTIONAL,
+               .address = { .localpart = NULL, .domain = NULL,
+                            .raw = "f\xc3\xb6\xc3\xa4@\xc3\xb6\xc3\xa4" },
+               .output = "<>",
+       },
 };
 
 unsigned int valid_path_parse_test_count =
@@ -281,7 +420,8 @@ test_smtp_path_equal(const struct smtp_address *test,
                         null_strcmp(parsed->domain, test->domain) == 0);
        }
        if (parsed == NULL) {
-               /* nothing */
+               test_out_quiet(t_strdup_printf("address = (null)"),
+                              (test->raw == NULL));
        } else if (parsed->raw == NULL) {
                test_out_quiet(t_strdup_printf("address->raw = (null)"),
                               (parsed->raw == test->raw));
@@ -298,18 +438,21 @@ static void test_smtp_path_parse_valid(void)
 
        for (i = 0; i < valid_path_parse_test_count; i++) T_BEGIN {
                const struct valid_path_parse_test *test;
+               bool ignore_broken;
                struct smtp_address *address;
                const char *error = NULL, *output, *encoded;
                int ret;
 
                test = &valid_path_parse_tests[i];
+               ignore_broken = HAS_ALL_BITS(
+                       test->flags, SMTP_ADDRESS_PARSE_FLAG_IGNORE_BROKEN);
                ret = smtp_address_parse_path(pool_datastack_create(),
                                              test->input, test->flags,
                                              &address, &error);
 
                test_begin(t_strdup_printf("smtp path valid [%d]", i));
                test_out_reason(t_strdup_printf("parse(\"%s\")", test->input),
-                               ret == 0, error);
+                               (ret == 0 || ignore_broken), error);
 
                if (!test_has_failed()) {
                        test_smtp_path_equal(&test->address, address);