From: Daniel Stenberg Date: Thu, 8 Jun 2023 11:15:09 +0000 (+0200) Subject: urlapi: have *set(PATH) prepend a slash if one is missing X-Git-Tag: curl-8_2_0~133 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3c9256c8a0b5a36dde28b49cc8df8c6d0aae1f48;p=thirdparty%2Fcurl.git urlapi: have *set(PATH) prepend a slash if one is missing Previously the code would just do that for the path when extracting the full URL, which made a subsequent curl_url_get() of the path to (unexpectedly) still return it without the leading path. Amend lib1560 to verify this. Clarify the curl_url_set() docs about it. Bug: https://curl.se/mail/lib-2023-06/0015.html Closes #11272 Reported-by: Pedro Henrique --- diff --git a/docs/libcurl/curl_url_set.3 b/docs/libcurl/curl_url_set.3 index 034974e111..435818e8bf 100644 --- a/docs/libcurl/curl_url_set.3 +++ b/docs/libcurl/curl_url_set.3 @@ -101,8 +101,8 @@ Port cannot be URL encoded on set. The given port number is provided as a string and the decimal number must be between 1 and 65535. Anything else will return an error. .IP CURLUPART_PATH -If a path is set in the URL without a leading slash, a slash will be inserted -automatically when this URL is read from the handle. +If a path is set in the URL without a leading slash, a slash will be prepended +automatically. .IP CURLUPART_QUERY The query part will also get spaces converted to pluses when asked to URL encode on set with the \fICURLU_URLENCODE\fP bit. diff --git a/lib/urlapi.c b/lib/urlapi.c index 07df6d65d8..7b2498c40e 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -1547,7 +1547,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, } } - url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s", scheme, u->user ? u->user : "", u->password ? ":": "", @@ -1558,7 +1558,6 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, allochost ? allochost : u->host, port ? ":": "", port ? port : "", - (u->path && (u->path[0] != '/')) ? "/": "", u->path ? u->path : "/", (u->query && u->query[0]) ? "?": "", (u->query && u->query[0]) ? u->query : "", @@ -1640,6 +1639,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0; bool plusencode = FALSE; bool urlskipslash = FALSE; + bool leadingslash = FALSE; bool appendquery = FALSE; bool equalsencode = FALSE; @@ -1751,6 +1751,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, break; case CURLUPART_PATH: urlskipslash = TRUE; + leadingslash = TRUE; /* enforce */ storep = &u->path; break; case CURLUPART_QUERY: @@ -1801,16 +1802,21 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, { const char *newp = part; size_t nalloc = strlen(part); + struct dynbuf enc; if(nalloc > CURL_MAX_INPUT_LENGTH) /* excessive input length */ return CURLUE_MALFORMED_INPUT; + Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash); + + if(leadingslash && (part[0] != '/')) { + CURLcode result = Curl_dyn_addn(&enc, "/", 1); + if(result) + return CURLUE_OUT_OF_MEMORY; + } if(urlencode) { const unsigned char *i; - struct dynbuf enc; - - Curl_dyn_init(&enc, nalloc * 3 + 1); for(i = (const unsigned char *)part; *i; i++) { CURLcode result; @@ -1838,14 +1844,13 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, return CURLUE_OUT_OF_MEMORY; } } - newp = Curl_dyn_ptr(&enc); } else { char *p; - newp = strdup(part); - if(!newp) + CURLcode result = Curl_dyn_add(&enc, part); + if(result) return CURLUE_OUT_OF_MEMORY; - p = (char *)newp; + p = Curl_dyn_ptr(&enc); while(*p) { /* make sure percent encoded are lower case */ if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) && @@ -1858,6 +1863,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, p++; } } + newp = Curl_dyn_ptr(&enc); if(appendquery) { /* Append the 'newp' string onto the old query. Add a '&' separator if @@ -1866,24 +1872,24 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, size_t querylen = u->query ? strlen(u->query) : 0; bool addamperand = querylen && (u->query[querylen -1] != '&'); if(querylen) { - struct dynbuf enc; - Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); + struct dynbuf qbuf; + Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH); - if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */ + if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */ goto nomem; if(addamperand) { - if(Curl_dyn_addn(&enc, "&", 1)) + if(Curl_dyn_addn(&qbuf, "&", 1)) goto nomem; } - if(Curl_dyn_add(&enc, newp)) + if(Curl_dyn_add(&qbuf, newp)) goto nomem; - free((char *)newp); + Curl_dyn_free(&enc); free(*storep); - *storep = Curl_dyn_ptr(&enc); + *storep = Curl_dyn_ptr(&qbuf); return CURLUE_OK; nomem: - free((char *)newp); + Curl_dyn_free(&enc); return CURLUE_OUT_OF_MEMORY; } } @@ -1895,7 +1901,7 @@ nomem: } else { if(!n || hostname_check(u, (char *)newp, n)) { - free((char *)newp); + Curl_dyn_free(&enc); return CURLUE_BAD_HOSTNAME; } } diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c index 449f937a9d..0eca0fda72 100644 --- a/tests/libtest/lib1560.c +++ b/tests/libtest/lib1560.c @@ -108,6 +108,16 @@ struct setcase { CURLUcode pcode; /* for updating parts */ }; +struct setgetcase { + const char *in; + const char *set; + const char *out; + unsigned int urlflags; /* for setting the URL */ + unsigned int setflags; /* for updating parts */ + unsigned int getflags; /* for getting parts */ + CURLUcode pcode; /* for updating parts */ +}; + struct testcase { const char *in; const char *out; @@ -747,8 +757,33 @@ static int checkurl(const char *org, const char *url, const char *out) return 0; } +/* 1. Set the URL + 2. Set components + 3. Extract all components (not URL) +*/ +static const struct setgetcase setget_parts_list[] = { + {"https://example.com", + "path=get,", + "https | [11] | [12] | [13] | example.com | [15] | /get | [16] | [17]", + 0, 0, 0, CURLUE_OK}, + {"https://example.com", + "path=/get,", + "https | [11] | [12] | [13] | example.com | [15] | /get | [16] | [17]", + 0, 0, 0, CURLUE_OK}, + {"https://example.com", + "path=g e t,", + "https | [11] | [12] | [13] | example.com | [15] | /g%20e%20t | " + "[16] | [17]", + 0, CURLU_URLENCODE, 0, CURLUE_OK}, + {NULL, NULL, NULL, 0, 0, 0, CURLUE_OK} +}; + /* !checksrc! disable SPACEBEFORECOMMA 1 */ static const struct setcase set_parts_list[] = { + {"https://example.com", + "path=get,", + "https://example.com/get", + 0, 0, CURLUE_OK, CURLUE_OK}, {"https://example.com/", "scheme=ftp+-.123,", "ftp+-.123://example.com/", @@ -1120,6 +1155,54 @@ static int set_url(void) return error; } +/* 1. Set a URL + 2. Set one or more parts + 3. Extract and compare all parts - not the URL +*/ +static int setget_parts(void) +{ + int i; + int error = 0; + + for(i = 0; setget_parts_list[i].set && !error; i++) { + CURLUcode rc; + CURLU *urlp = curl_url(); + if(!urlp) { + error++; + break; + } + if(setget_parts_list[i].in) + rc = curl_url_set(urlp, CURLUPART_URL, setget_parts_list[i].in, + setget_parts_list[i].urlflags); + else + rc = CURLUE_OK; + if(!rc) { + char *url = NULL; + CURLUcode uc = updateurl(urlp, setget_parts_list[i].set, + setget_parts_list[i].setflags); + + if(uc != setget_parts_list[i].pcode) { + fprintf(stderr, "updateurl\nin: %s\nreturned %d (expected %d)\n", + setget_parts_list[i].set, (int)uc, setget_parts_list[i].pcode); + error++; + } + if(!uc) { + if(checkparts(urlp, setget_parts_list[i].set, setget_parts_list[i].out, + setget_parts_list[i].getflags)) + error++; /* add */ + } + curl_free(url); + } + else if(rc != CURLUE_OK) { + fprintf(stderr, "Set parts\nin: %s\nreturned %d (expected %d)\n", + setget_parts_list[i].in, (int)rc, 0); + error++; + } + curl_url_cleanup(urlp); + } + return error; +} + static int set_parts(void) { int i; @@ -1593,6 +1676,9 @@ int test(char *URL) { (void)URL; /* not used */ + if(setget_parts()) + return 10; + if(get_url()) return 3;