]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-http: http-url - Preserve significant path percent encoding through parse and...
authorStephan Bosch <stephan.bosch@open-xchange.com>
Fri, 5 Sep 2025 14:33:18 +0000 (16:33 +0200)
committertimo.sirainen <timo.sirainen@open-xchange.com>
Sun, 7 Sep 2025 13:53:33 +0000 (13:53 +0000)
Previously, %2f in the path part of the URL was parsed to '/' and not encoded
back to %2f when the (normalized) URL was written back to a string using
http_url_create(). Now, an encoded version of the path is preserved in struct
http_url, so that the distinction between real delimiters and encoded literals
is always preserved.

src/lib-http/http-url.c
src/lib-http/http-url.h
src/lib-http/test-http-url.c

index 2bdc2cfaf57d2fa885964fc4ece7898353f09dcd..926c24d9a75b2f1626c21dd81d0453ea2bd7ae3d 100644 (file)
@@ -153,6 +153,41 @@ static bool http_url_parse_authority_form(struct http_url_parser *url_parser)
        return TRUE;
 }
 
+static void
+http_url_parse_path_copy_from_base(struct http_url_parser *url_parser)
+{
+       struct uri_parser *parser = &url_parser->parser;
+       struct http_url *url = url_parser->url, *base = url_parser->base;
+
+       if (!url_parser->relative || url == NULL)
+               return;
+
+       if (base->path != NULL)
+               url->path = p_strdup(parser->pool, base->path);
+       else if (base->enc_path != NULL) {
+               const char *enc;
+               bool result;
+
+               result = uri_data_decode(parser, base->enc_path,
+                                        NULL, &enc);
+               i_assert(result);
+               url->path = p_strdup(parser->pool, enc);
+       }
+       if (base->enc_path != NULL) {
+               if (strcmp(url->path, base->enc_path) == 0)
+                       url->enc_path = url->path;
+               else
+                       url->enc_path = p_strdup(parser->pool, base->enc_path);
+       } else if (base->path != NULL) {
+               string_t *enc = t_str_new(256);
+               uri_append_path_data(enc, NULL, base->path);
+               if (strcmp(url->path, str_c(enc)) == 0)
+                       url->enc_path = url->path;
+               else
+                       url->enc_path = p_strdup(parser->pool, str_c(enc));
+       }
+}
+
 static int
 http_url_parse_path(struct http_url_parser *url_parser)
 {
@@ -160,7 +195,7 @@ http_url_parse_path(struct http_url_parser *url_parser)
        struct http_url *url = url_parser->url, *base = url_parser->base;
        const char *const *path;
        int path_relative;
-       string_t *fullpath = NULL;
+       string_t *enc_fullpath = NULL, *fullpath = NULL;
        int ret;
 
        /* path-abempty / path-absolute / path-noscheme / path-empty */
@@ -169,17 +204,27 @@ http_url_parse_path(struct http_url_parser *url_parser)
 
        /* Resolve path */
        if (ret == 0) {
-               if (url_parser->relative && url != NULL)
-                       url->path = p_strdup(parser->pool, base->path);
+               http_url_parse_path_copy_from_base(url_parser);
                return 0;
        }
 
-       if (url != NULL)
+       if (url != NULL) {
+               enc_fullpath = t_str_new(256);
                fullpath = t_str_new(256);
+       }
+
+       if (url != NULL && url_parser->relative && path_relative > 0 &&
+           (base->enc_path != NULL || base->path != NULL)) {
+               const char *base_path = base->enc_path;
+
+               if (base_path == NULL) {
+                       string_t *enc = t_str_new(256);
+                       uri_append_path_data(enc, NULL, base->path);
+                       base_path = str_c(enc);
+               }
 
-       if (url_parser->relative && path_relative > 0 && base->path != NULL) {
-               const char *pbegin = base->path;
-               const char *pend = base->path + strlen(base->path);
+               const char *pbegin = base_path;
+               const char *pend = base_path + strlen(base_path);
                const char *p = pend - 1;
 
                i_assert(*pbegin == '/');
@@ -197,8 +242,16 @@ http_url_parse_path(struct http_url_parser *url_parser)
                        if (p > pbegin) p--;
                }
 
-               if (url != NULL && pend > pbegin)
-                       str_append_data(fullpath, pbegin, pend - pbegin);
+               if (pend > pbegin) {
+                       const char *enc;
+                       bool result;
+
+                       str_append_data(enc_fullpath, pbegin, pend - pbegin);
+                       result = uri_data_decode(parser, str_c(enc_fullpath),
+                                                NULL, &enc);
+                       i_assert(result);
+                       str_append(fullpath, enc);
+               }
        }
 
        /* Append relative path */
@@ -209,14 +262,21 @@ http_url_parse_path(struct http_url_parser *url_parser)
                        return -1;
 
                if (url != NULL) {
+                       str_append_c(enc_fullpath, '/');
+                       str_append(enc_fullpath, *path);
                        str_append_c(fullpath, '/');
                        str_append(fullpath, part);
                }
                path++;
        }
 
-       if (url != NULL)
-               url->path = p_strdup(parser->pool, str_c(fullpath));
+       if (url != NULL) {
+               url->enc_path = p_strdup(parser->pool, str_c(enc_fullpath));
+               if (strcmp(str_c(fullpath), url->enc_path) == 0)
+                       url->path = url->enc_path;
+               else
+                       url->path = p_strdup(parser->pool, str_c(fullpath));
+       }
        return 1;
 }
 
@@ -555,6 +615,7 @@ void http_url_copy(pool_t pool, struct http_url *dest,
                   const struct http_url *src)
 {
        http_url_copy_authority(pool, dest, src);
+       dest->enc_path = p_strdup(pool, src->enc_path);
        dest->path = p_strdup(pool, src->path);
        dest->enc_query = p_strdup(pool, src->enc_query);
        dest->enc_fragment = p_strdup(pool, src->enc_fragment);
@@ -616,11 +677,16 @@ http_url_add_authority(string_t *urlstr, const struct http_url *url)
 static void
 http_url_add_target(string_t *urlstr, const struct http_url *url)
 {
-       if (url->path == NULL || *url->path == '\0') {
+       if ((url->enc_path == NULL || *url->enc_path == '\0') &&
+           (url->path == NULL || *url->path == '\0')) {
                /* Older syntax of RFC 2616 requires this slash at all times for
                   an absolute URL. */
                str_append_c(urlstr, '/');
+       } else if (url->enc_path != NULL && *url->enc_path != '\0') {
+               i_assert(*url->enc_path == '/');
+               str_append(urlstr, url->enc_path);
        } else {
+               i_assert(*url->path == '/');
                uri_append_path_data(urlstr, "", url->path);
        }
 
index 62d8922f35e908e1cc677fea32d646110eaba699..691cce3613876eb8bdee6a7b34994c16e166e399 100644 (file)
@@ -18,6 +18,7 @@ struct http_url {
        const char *password;
 
        /* path */
+       const char *enc_path; /* encoded */
        const char *path;
 
        /* ?query (still encoded) */
index 45eaef093b1d8d30273cbd5ab26c4cbe097e1d7f..f10c586412d09865286528700941fbbe0cbf1842 100644 (file)
@@ -724,7 +724,124 @@ static struct valid_http_url_test valid_url_tests[] = {
                        .path = "/b/c/g",
                        .enc_fragment = "s/../x",
                },
-       }
+       },
+       /* Encoded paths */
+       { // "http://a/%2f"
+               .url = "http://a/%2f",
+               .url_parsed = {
+                       .host = { .name = "a" },
+                       .path = "//",
+                       .enc_path = "/%2f",
+               },
+       },
+       { // ""
+               .url = "",
+               .url_base = {
+                       .host = { .name = "a" },
+                       .enc_path = "/%2f",
+               },
+               .url_parsed = {
+                       .host = { .name = "a" },
+                       .path = "//",
+                       .enc_path = "/%2f",
+               },
+       },
+       { // ""
+               .url = "",
+               .url_base = {
+                       .host = { .name = "a" },
+                       .path = "//",
+               },
+               .url_parsed = {
+                       .host = { .name = "a" },
+                       .path = "//",
+                       .enc_path = "//",
+               },
+       },
+       { // "."
+               .url = ".",
+               .url_base = {
+                       .host = { .name = "a" },
+                       .enc_path = "/%2f",
+               },
+               .url_parsed = {
+                       .host = { .name = "a" },
+                       .path = "/",
+                       .enc_path = "/",
+               },
+       },
+       { // "./%2fc"
+               .url = "./%2fc",
+               .url_base = {
+                       .host = { .name = "a" },
+                       .enc_path = "/%2fa/%2fb/",
+               },
+               .url_parsed = {
+                       .host = { .name = "a" },
+                       .path = "//a//b//c",
+                       .enc_path = "/%2fa/%2fb/%2fc",
+               },
+       },
+       { // "../%2fc"
+               .url = "../%2fc",
+               .url_base = {
+                       .host = { .name = "a" },
+                       .enc_path = "/%2fa/%2fb/",
+               },
+               .url_parsed = {
+                       .host = { .name = "a" },
+                       .path = "//a//c",
+                       .enc_path = "/%2fa/%2fc",
+               },
+       },
+       { // "./%2fc"
+               .url = "./%2fc",
+               .url_base = {
+                       .host = { .name = "a" },
+                       .path = "/%2fa/%2fb/",
+               },
+               .url_parsed = {
+                       .host = { .name = "a" },
+                       .path = "/%2fa/%2fb//c",
+                       .enc_path = "/%252fa/%252fb/%2fc",
+               },
+       },
+       { // "../%2fc"
+               .url = "../%2fc",
+               .url_base = {
+                       .host = { .name = "a" },
+                       .path = "/%2fa/%2fb/",
+               },
+               .url_parsed = {
+                       .host = { .name = "a" },
+                       .path = "/%2fa//c",
+                       .enc_path = "/%252fa/%2fc",
+               },
+       },
+       { // "./%2fc%3f"
+               .url = "./%2fc%3f",
+               .url_base = {
+                       .host = { .name = "a" },
+                       .enc_path = "/%2fa%3f/%2fb%3f/",
+               },
+               .url_parsed = {
+                       .host = { .name = "a" },
+                       .path = "//a?//b?//c?",
+                       .enc_path = "/%2fa%3f/%2fb%3f/%2fc%3f",
+               },
+       },
+       { // "../%2fc"
+               .url = "../%2fc%3f",
+               .url_base = {
+                       .host = { .name = "a" },
+                       .enc_path = "/%2fa%3f/%2fb%3f/",
+               },
+               .url_parsed = {
+                       .host = { .name = "a" },
+                       .path = "//a?//c?",
+                       .enc_path = "/%2fa%3f/%2fc%3f",
+               },
+       },
 };
 
 static unsigned int valid_url_test_count = N_ELEMENTS(valid_url_tests);
@@ -754,6 +871,13 @@ test_http_url_equal(struct http_url *urlt, struct http_url *urlp)
        } else {
                test_assert(strcmp(urlp->path, urlt->path) == 0);
        }
+       const char *urlt_enc_path = (urlt->enc_path == NULL ?
+                                    urlt->path : urlt->enc_path);
+       if (urlp->enc_path == NULL || urlt_enc_path == NULL) {
+               test_assert(urlp->enc_path == urlt_enc_path);
+       } else {
+               test_assert(strcmp(urlp->enc_path, urlt_enc_path) == 0);
+       }
        if (urlp->enc_query == NULL || urlt->enc_query == NULL) {
                test_assert(urlp->enc_query == urlt->enc_query);
        } else {
@@ -907,6 +1031,8 @@ static const char *parse_create_url_tests[] = {
        "http://www.example.com/%23shared/news",
        "http://www.example.com/query.php?name=Hendrik%20Visser",
        "http://www.example.com/network.html#IMAP%20Server",
+       "http://www.example.com/%2f/frop.html",
+       "http://www.example.com/%3f%2f%3f/frop.html",
 };
 
 static unsigned int