From: Emanuele Torre Date: Sun, 21 May 2023 03:01:35 +0000 (+0200) Subject: Revert "urlapi: respect CURLU_ALLOW_SPACE and CURLU_NO_AUTHORITY for redirects" X-Git-Tag: curl-8_1_1~12 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=eef076baa696662c7393156a5b90b81042e83a17;p=thirdparty%2Fcurl.git Revert "urlapi: respect CURLU_ALLOW_SPACE and CURLU_NO_AUTHORITY for redirects" This reverts commit df6c2f7b544f1f35f2a3e0be11f345affeb6fe9c. (It only keep the test case that checks redirection to an absolute URL without hostname and CURLU_NO_AUTHORITY). I originally wanted to make CURLU_ALLOW_SPACE accept spaces in the hostname only because I thought curl_url_set(CURLUPART_URL, CURLU_ALLOW_SPACE) was already accepting them, and they were only not being accepted in the hostname when curl_url_set(CURLUPART_URL) was used for a redirection. That is not actually the case, urlapi never accepted hostnames with spaces, and a hostname with a space in it never makes sense. I probably misread the output of my original test when I they were normally accepted when using CURLU_ALLOW_SPACE, and not redirecting. Some other URL parsers seems to allow space in the host part of the URL, e.g. both python3's urllib.parse module, and Chromium's javascript URL object allow spaces (chromium percent escapes the spaces with %20), (they also both ignore TABs, and other whitespace characters), but those URLs with spaces in the hostname are useless, neither python3's requests module nor Chromium's window.location can actually use them. There is no reason to add support for URLs with spaces in the host, since it was not a inconsistency bug; let's revert that patch before it makes it into release. Sorry about that. I also reverted the extra check for CURLU_NO_AUTHORITY since that does not seem to be necessary, CURLU_NO_AUTHORITY already worked for redirects. Closes #11169 --- diff --git a/lib/urlapi.c b/lib/urlapi.c index e09ee54be3..96e5d440a5 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -618,8 +618,7 @@ static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname, } static CURLUcode hostname_check(struct Curl_URL *u, char *hostname, - size_t hlen, /* length of hostname */ - unsigned int flags) + size_t hlen) /* length of hostname */ { size_t len; DEBUGASSERT(hostname); @@ -629,10 +628,8 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname, else if(hostname[0] == '[') return ipv6_parse(u, hostname, hlen); else { - static char bad_chars[] = " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%"; - len = strcspn(hostname, (flags & CURLU_ALLOW_SPACE) - ? &bad_chars[1] /* space is allowed */ - : bad_chars); + /* letters from the second string are not ok */ + len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%"); if(hlen != len) /* hostname with bad content */ return CURLUE_BAD_HOSTNAME; @@ -810,9 +807,8 @@ static CURLUcode parse_authority(struct Curl_URL *u, break; case HOST_NAME: result = urldecode_host(host); - if(!result && !(flags & CURLU_NO_AUTHORITY)) - result = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host), - flags); + if(!result) + result = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host)); break; case HOST_ERROR: result = CURLUE_OUT_OF_MEMORY; @@ -1898,7 +1894,7 @@ nomem: /* Skip hostname check, it's allowed to be empty. */ } else { - if(!n || hostname_check(u, (char *)newp, n, flags)) { + if(!n || hostname_check(u, (char *)newp, n)) { free((char *)newp); return CURLUE_BAD_HOSTNAME; } diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c index fe275ebeff..180bea9f2b 100644 --- a/tests/libtest/lib1560.c +++ b/tests/libtest/lib1560.c @@ -984,10 +984,6 @@ static const struct redircase set_url_list[] = { "../newpage", "http://user:foo@example.com/newpage", 0, 0, CURLUE_OK}, - {"http://user:foo@example.com/path?query#frag", - "http://example org/", - "http://example org/", - 0, CURLU_ALLOW_SPACE, CURLUE_OK}, {"http://user:foo@example.com/path?query#frag", "http://?hi", "http:///?hi",