From 846678541b8186f0fdaaab927dffebdbad52b6de Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Wed, 14 Sep 2022 09:18:30 +0200 Subject: [PATCH] urlapi: detect scheme better when not guessing When the parser is not allowed to guess scheme, it should consider the word ending at the first colon to be the scheme, independently of number of slashes. The parser now checks that the scheme is known before it counts slashes, to improve the error messge for URLs with unknown schemes and maybe no slashes. When following redirects, no scheme guessing is allowed and therefore this change effectively prevents redirects to unknown schemes such as "data". Fixes #9503 --- lib/transfer.c | 7 +++++-- lib/url.c | 2 +- lib/urlapi-int.h | 3 ++- lib/urlapi.c | 32 ++++++++++++++++++++++---------- 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/lib/transfer.c b/lib/transfer.c index 8b28b1e629..fcc4006af3 100644 --- a/lib/transfer.c +++ b/lib/transfer.c @@ -1637,7 +1637,7 @@ CURLcode Curl_follow(struct Curl_easy *data, if((type != FOLLOW_RETRY) && (data->req.httpcode != 401) && (data->req.httpcode != 407) && - Curl_is_absolute_url(newurl, NULL, 0)) + Curl_is_absolute_url(newurl, NULL, 0, FALSE)) /* If this is not redirect due to a 401 or 407 response and an absolute URL: don't allow a custom port number */ disallowport = TRUE; @@ -1649,8 +1649,11 @@ CURLcode Curl_follow(struct Curl_easy *data, CURLU_ALLOW_SPACE | (data->set.path_as_is ? CURLU_PATH_AS_IS : 0)); if(uc) { - if(type != FOLLOW_FAKE) + if(type != FOLLOW_FAKE) { + failf(data, "The redirect target URL could not be parsed: %s", + curl_url_strerror(uc)); return Curl_uc_to_curlcode(uc); + } /* the URL could not be parsed for some reason, but since this is FAKE mode, just duplicate the field as-is */ diff --git a/lib/url.c b/lib/url.c index 3d65925236..4a3a0e50c8 100644 --- a/lib/url.c +++ b/lib/url.c @@ -1995,7 +1995,7 @@ static CURLcode parseurlandfillconn(struct Curl_easy *data, return CURLE_OUT_OF_MEMORY; if(data->set.str[STRING_DEFAULT_PROTOCOL] && - !Curl_is_absolute_url(data->state.url, NULL, 0)) { + !Curl_is_absolute_url(data->state.url, NULL, 0, TRUE)) { char *url = aprintf("%s://%s", data->set.str[STRING_DEFAULT_PROTOCOL], data->state.url); if(!url) diff --git a/lib/urlapi-int.h b/lib/urlapi-int.h index 5d9db8ccc7..43a83ef6e4 100644 --- a/lib/urlapi-int.h +++ b/lib/urlapi-int.h @@ -25,7 +25,8 @@ ***************************************************************************/ #include "curl_setup.h" -size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen); +size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen, + bool guess_scheme); #ifdef DEBUGBUILD CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host, diff --git a/lib/urlapi.c b/lib/urlapi.c index baecb84b92..2276b93dd4 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -184,8 +184,12 @@ static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative) * Returns the length of the scheme if the given URL is absolute (as opposed * to relative). Stores the scheme in the buffer if TRUE and 'buf' is * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set. + * + * If 'guess_scheme' is TRUE, it means the URL might be provided without + * scheme. */ -size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen) +size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen, + bool guess_scheme) { int i; DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN)); @@ -193,7 +197,7 @@ size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen) if(buf) buf[0] = 0; /* always leave a defined value in buf */ #ifdef WIN32 - if(STARTS_WITH_DRIVE_PREFIX(url)) + if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url)) return 0; #endif for(i = 0; i < MAX_SCHEME_LEN; ++i) { @@ -207,7 +211,11 @@ size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen) break; } } - if(i && (url[i] == ':') && (url[i + 1] == '/')) { + if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) { + /* If this does not guess scheme, the scheme always ends with the colon so + that this also detects data: URLs etc. In guessing mode, data: could + be the host name "data" with a specified port number. */ + /* the length of the scheme is the name part only */ size_t len = i; if(buf) { @@ -934,7 +942,9 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) goto fail; } - schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf)); + schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf), + flags & (CURLU_GUESS_SCHEME| + CURLU_DEFAULT_SCHEME)); /* handle the file: scheme */ if(schemelen && !strcmp(schemebuf, "file")) { @@ -1059,11 +1069,6 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) p++; i++; } - if((i < 1) || (i>3)) { - /* less than one or more than three slashes */ - result = CURLUE_BAD_SLASHES; - goto fail; - } schemep = schemebuf; if(!Curl_builtin_scheme(schemep) && @@ -1072,6 +1077,11 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) goto fail; } + if((i < 1) || (i>3)) { + /* less than one or more than three slashes */ + result = CURLUE_BAD_SLASHES; + goto fail; + } if(junkscan(schemep, flags)) { result = CURLUE_BAD_SCHEME; goto fail; @@ -1730,7 +1740,9 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, /* if the new thing is absolute or the old one is not * (we could not get an absolute url in 'oldurl'), * then replace the existing with the new. */ - if(Curl_is_absolute_url(part, NULL, 0) + if(Curl_is_absolute_url(part, NULL, 0, + flags & (CURLU_GUESS_SCHEME| + CURLU_DEFAULT_SCHEME)) || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) { return parseurl_and_replace(part, u, flags); } -- 2.47.3