* URL encoding should be skipped for hostnames, otherwise IDN resolution
* will fail.
*
+ * 'query' tells if it is a query part or not, or if it is allowed to
+ * "transition" into a query part with a question mark.
+ *
* @unittest: 1675
*/
UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url,
size_t len, bool relative,
- bool query);
+ unsigned int query);
UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url,
size_t len, bool relative,
- bool query)
+ unsigned int query)
{
/* we must add this with whitespace-replacing */
const unsigned char *iptr;
const unsigned char *host_sep = (const unsigned char *)url;
CURLcode result = CURLE_OK;
+ DEBUGASSERT((query >= QUERY_NO) && (query <= QUERY_YES));
+
if(!relative) {
size_t n;
host_sep = (const unsigned char *)find_host_sep(url);
for(iptr = host_sep; len && !result; iptr++, len--) {
if(*iptr == ' ') {
- if(!query)
+ if(query != QUERY_YES)
result = curlx_dyn_addn(o, "%20", 3);
else
result = curlx_dyn_addn(o, "+", 1);
Curl_hexbyte(&out[1], *iptr);
result = curlx_dyn_addn(o, out, 3);
}
- else if(*iptr == '%' && ISXDIGIT(iptr[1]) && ISXDIGIT(iptr[2]) &&
+ else if(*iptr == '%' && (len >= 3) &&
+ ISXDIGIT(iptr[1]) && ISXDIGIT(iptr[2]) &&
(ISLOWER(iptr[1]) || ISLOWER(iptr[2]))) {
/* uppercase it */
unsigned char hex = (unsigned char)((curlx_hexval(iptr[1]) << 4) |
}
else {
result = curlx_dyn_addn(o, iptr, 1);
- if(*iptr == '?')
- query = TRUE;
+ if(*iptr == '?' && (query == QUERY_NOT_YET))
+ query = QUERY_YES;
}
}
* @unittest: 1675
*/
UNITTEST CURLUcode parse_file(const char *url, size_t urllen, CURLU *u,
- struct dynbuf *host, const char **pathp,
- size_t *pathlenp);
+ const char **pathp, size_t *pathlenp);
UNITTEST CURLUcode parse_file(const char *url, size_t urllen, CURLU *u,
- struct dynbuf *host, const char **pathp,
- size_t *pathlenp)
+ const char **pathp, size_t *pathlenp)
{
const char *path;
size_t pathlen;
- bool uncpath = FALSE;
if(urllen <= 6)
/* file:/ is not enough to actually be a complete file: URL */
return CURLUE_BAD_FILE_URL;
*
* o the hostname is a FQDN that resolves to this machine, or
*
- * o it is an UNC String transformed to an URI (Windows only, RFC 8089
- * Appendix E.3).
- *
* For brevity, we only consider URLs with empty, "localhost", or
* "127.0.0.1" hostnames as local, otherwise as an UNC String.
*
checkprefix("127.0.0.1/", ptr)) {
ptr += 9; /* now points to the slash after the host */
}
- else {
-#ifdef _WIN32
- size_t len;
-
- /* the hostname, NetBIOS computer name, can not contain disallowed
- chars, and the delimiting slash character must be appended to the
- hostname */
- path = strpbrk(ptr, "/\\:*?\"<>|");
- if(!path || *path != '/')
- return CURLUE_BAD_FILE_URL;
-
- len = path - ptr;
- if(len) {
- CURLcode code = curlx_dyn_addn(host, ptr, len);
- if(code)
- return cc2cu(code);
- uncpath = TRUE;
- }
-
- ptr -= 2; /* now points to the // before the host in UNC */
-#else
+ else
/* Invalid file://hostname/, expected localhost or 127.0.0.1 or
none */
return CURLUE_BAD_FILE_URL;
-#endif
- }
}
path = ptr;
pathlen = urllen - (ptr - url);
}
- if(!uncpath)
- /* no host for file: URLs by default */
- curlx_dyn_reset(host);
-
#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
/* Do not allow Windows drive letters when not in Windows.
* This catches both "file:/c:" and "file:c:" */
if(flags & CURLU_URLENCODE) {
struct dynbuf enc;
curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
- ures = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
+ ures = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, QUERY_NO);
if(ures)
return ures;
u->fragment = curlx_dyn_ptr(&enc);
CURLUcode ures;
curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
/* skip the leading question mark */
- ures = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
+ ures = urlencode_str(&enc, query + 1, qlen - 1, TRUE, QUERY_YES);
if(ures)
return ures;
u->query = curlx_dyn_ptr(&enc);
if(pathlen && (flags & CURLU_URLENCODE)) {
struct dynbuf enc;
curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
- ures = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
+ ures = urlencode_str(&enc, path, pathlen, TRUE, QUERY_NO);
if(ures)
return ures;
pathlen = curlx_dyn_len(&enc);
/* handle the file: scheme */
if(schemelen && !strcmp(schemebuf, "file")) {
is_file = TRUE;
- ures = parse_file(url, urllen, u, &host, &path, &pathlen);
+ ures = parse_file(url, urllen, u, &path, &pathlen);
}
else {
const char *hostp = NULL;
curlx_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH);
if(!curlx_dyn_addn(&urlbuf, base, prelen) &&
- !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, FALSE)) {
+ !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed,
+ QUERY_NOT_YET)) {
uc = parseurl_and_replace(curlx_dyn_ptr(&urlbuf), u,
flags & ~U_CURLU_PATH_AS_IS);
}
if(urlencode) {
struct dynbuf enc;
curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
- uc = urlencode_str(&enc, part, partlen, TRUE, what == CURLUPART_QUERY);
+ uc = urlencode_str(&enc, part, partlen, TRUE, what == CURLUPART_QUERY ?
+ QUERY_YES : QUERY_NO);
curlx_free(part);
if(uc)
return uc;
struct urlencode_test {
const char *in;
bool relative;
- bool query;
+ unsigned int query;
const char *out;
};
const struct urlencode_test tests[] = {
- {"http://leave\x01/hello\x01world", FALSE, FALSE,
+ {"http://leave\x01/hello\x01world", FALSE, QUERY_NO,
"http://leave\x01/hello%01world"},
- {"http://leave/hello\x01world", FALSE, FALSE,
+ {"http://leave/hello\x01world", FALSE, QUERY_NO,
"http://leave/hello%01world"},
- {"http://le ave/hello\x01world", FALSE, FALSE,
+ {"http://le ave/hello\x01world", FALSE, QUERY_NO,
"http://le ave/hello%01world"},
- {"hello\x01world", TRUE, FALSE, "hello%01world"},
- {"hello\xf0world", TRUE, FALSE, "hello%F0world"},
- {"hello world", TRUE, FALSE, "hello%20world"},
- {"hello%20world", TRUE, FALSE, "hello%20world"},
- {"hello world", TRUE, TRUE, "hello+world"},
- {"a+b c", TRUE, FALSE, "a+b%20c"},
- {"a%20b%20c", TRUE, FALSE, "a%20b%20c"},
- {"a%aab%aac", TRUE, FALSE, "a%AAb%AAc"},
- {"a%aab%AAc", TRUE, FALSE, "a%AAb%AAc"},
- {"w%w%x", TRUE, FALSE, "w%w%x"},
- {"w%wf%xf", TRUE, FALSE, "w%wf%xf"},
- {"w%fw%fw", TRUE, FALSE, "w%fw%fw"},
- {"a+b c", TRUE, TRUE, "a+b+c"},
- {"/foo/bar", TRUE, FALSE, "/foo/bar"},
- {"/foo/bar", TRUE, TRUE, "/foo/bar"},
- {"/foo/ bar", TRUE, FALSE, "/foo/%20bar"},
- {"/foo/ bar", TRUE, TRUE, "/foo/+bar"},
- {"~-._", TRUE, FALSE, "~-._"},
- {"~-._", TRUE, TRUE, "~-._"},
+ {"hello\x01world", TRUE, QUERY_NO, "hello%01world"},
+ {"hello\xf0world", TRUE, QUERY_NO, "hello%F0world"},
+ {"hello world", TRUE, QUERY_NO, "hello%20world"},
+ {"hello%20world", TRUE, QUERY_NO, "hello%20world"},
+ {"hello world", TRUE, QUERY_YES, "hello+world"},
+ {"a+b c", TRUE, QUERY_NO, "a+b%20c"},
+ {"a%20b%20c", TRUE, QUERY_NO, "a%20b%20c"},
+ {"a%aab%aac", TRUE, QUERY_NO, "a%AAb%AAc"},
+ {"a%aab%AAc", TRUE, QUERY_NO, "a%AAb%AAc"},
+ {"w%w%x", TRUE, QUERY_NO, "w%w%x"},
+ {"w%wf%xf", TRUE, QUERY_NO, "w%wf%xf"},
+ {"w%fw%fw", TRUE, QUERY_NO, "w%fw%fw"},
+ {"a+b c", TRUE, QUERY_YES, "a+b+c"},
+ {"/foo/bar", TRUE, QUERY_NO, "/foo/bar"},
+ {"/foo/bar", TRUE, QUERY_YES, "/foo/bar"},
+ {"/foo/ bar", TRUE, QUERY_NO, "/foo/%20bar"},
+ {"/foo/ bar", TRUE, QUERY_YES, "/foo/+bar"},
+ {"~-._", TRUE, QUERY_NO, "~-._"},
+ {"~-._", TRUE, QUERY_YES, "~-._"},
+ {"foo bar?foo bar", TRUE, QUERY_NO, "foo%20bar?foo%20bar"},
+ {"foo bar?foo bar", TRUE, QUERY_NOT_YET, "foo%20bar?foo+bar"},
};
curlx_dyn_init(&out, 256);
unsigned int i;
struct file_test {
const char *in;
- const char *out_host;
const char *out_path;
bool fine;
};
const struct file_test tests[] = {
- {"file:///etc/hosts", "", "/etc/hosts", TRUE},
- {"file://localhost/etc/hosts", "", "/etc/hosts", TRUE},
- {"file://apple/etc/hosts", "", "/etc/hosts", FALSE},
+ {"file:///etc/hosts", "/etc/hosts", TRUE},
+ {"file://localhost/etc/hosts", "/etc/hosts", TRUE},
+ {"file://apple/etc/hosts", "/etc/hosts", FALSE},
#ifdef _WIN32
- {"file:///c:/windows/system32", "", "c:/windows/system32", TRUE},
- {"file://localhost/c:/windows/system32", "",
- "c:/windows/system32", TRUE},
+ {"file:///c:/windows/system32", "c:/windows/system32", TRUE},
+ {"file://localhost/c:/windows/system32", "c:/windows/system32", TRUE},
#endif
};
if(!u)
return CURLE_OUT_OF_MEMORY;
- uc = parse_file(tests[i].in, strlen(tests[i].in), u, &host, &path,
- &pathlen);
+ uc = parse_file(tests[i].in, strlen(tests[i].in), u, &path, &pathlen);
if(!tests[i].fine && !uc) {
curl_mfprintf(stderr, "Unexpectedly fine for input '%s'\n",
tests[i].in);
fails++;
}
- else if(tests[i].out_host[0]) {
- /* expecting a hostname output */
- if(!curlx_dyn_len(&host) ||
- strcmp(curlx_dyn_ptr(&host), tests[i].out_host))
- error = TRUE;
- }
if(tests[i].fine &&
(uc ||
strncmp(path, tests[i].out_path, pathlen) ||
strlen(tests[i].out_path) != pathlen)) {
curl_mfprintf(stderr, "parse_file('%s') failed:"
- " expected host '%s', path '%s'; got host '%s',"
- " path '%.*s'\n",
- tests[i].in, tests[i].out_host, tests[i].out_path,
- uc ? "error" : curlx_dyn_ptr(&host),
+ " expected path '%s'; got path '%.*s'\n",
+ tests[i].in, tests[i].out_path,
(int)pathlen, path);
fails++;
}