#include "urldata.h"
#include "urlapi-int.h"
#include "strcase.h"
-#include "dotdot.h"
#include "url.h"
#include "escape.h"
#include "curl_ctype.h"
#include "inet_pton.h"
#include "inet_ntop.h"
+#include "strdup.h"
/* The last 3 #include files should be in this order */
#include "curl_printf.h"
char *path;
char *query;
char *fragment;
-
- char *scratch; /* temporary scratch area */
- char *temppath; /* temporary path pointer */
long portnum; /* the numerical version */
};
free(u->path);
free(u->query);
free(u->fragment);
- free(u->scratch);
- free(u->temppath);
}
/*
return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
}
-/*
- * strlen_url() returns the length of the given URL if the spaces within the
- * URL were properly URL encoded.
- * URL encoding should be skipped for host names, otherwise IDN resolution
- * will fail.
- */
-static size_t strlen_url(const char *url, bool relative)
-{
- const unsigned char *ptr;
- size_t newlen = 0;
- bool left = TRUE; /* left side of the ? */
- const unsigned char *host_sep = (const unsigned char *) url;
-
- if(!relative)
- host_sep = (const unsigned char *) find_host_sep(url);
-
- for(ptr = (unsigned char *)url; *ptr; ptr++) {
-
- if(ptr < host_sep) {
- ++newlen;
- continue;
- }
-
- if(*ptr == ' ') {
- if(left)
- newlen += 3;
- else
- newlen++;
- continue;
- }
-
- if (*ptr == '?')
- left = FALSE;
-
- if(urlchar_needs_escaping(*ptr))
- newlen += 2;
-
- newlen++;
- }
-
- return newlen;
-}
-
-/* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
- * the source URL accordingly.
+/* strcpy_url() creates a url in an output dynbuf and URL-encodes the spaces
+ * in the source URL accordingly.
+ *
* URL encoding should be skipped for host names, otherwise IDN resolution
* will fail.
*
- * Returns TRUE if something was updated.
*/
-static bool strcpy_url(char *output, const char *url, bool relative)
+static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
{
/* we must add this with whitespace-replacing */
bool left = TRUE;
const unsigned char *iptr;
- char *optr = output;
const unsigned char *host_sep = (const unsigned char *) url;
- bool changed = FALSE;
if(!relative)
host_sep = (const unsigned char *) find_host_sep(url);
iptr++) {
if(iptr < host_sep) {
- *optr++ = *iptr;
+ if(Curl_dyn_addn(o, iptr, 1))
+ return CURLUE_OUT_OF_MEMORY;
continue;
}
if(*iptr == ' ') {
if(left) {
- *optr++='%'; /* add a '%' */
- *optr++='2'; /* add a '2' */
- *optr++='0'; /* add a '0' */
+ if(Curl_dyn_addn(o, "%20", 3))
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ else {
+ if(Curl_dyn_addn(o, "+", 1))
+ return CURLUE_OUT_OF_MEMORY;
}
- else
- *optr++='+'; /* add a '+' here */
- changed = TRUE;
continue;
}
left = FALSE;
if(urlchar_needs_escaping(*iptr)) {
- msnprintf(optr, 4, "%%%02x", *iptr);
- changed = TRUE;
- optr += 3;
+ if(Curl_dyn_addf(o, "%%%02x", *iptr))
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ else {
+ if(Curl_dyn_addn(o, iptr, 1))
+ return CURLUE_OUT_OF_MEMORY;
}
- else
- *optr++ = *iptr;
}
- *optr = 0; /* null-terminate output buffer */
- return changed;
+ return CURLUE_OK;
}
/*
- * Returns true if the given URL is absolute (as opposed to relative). Returns
- * the scheme in the buffer if TRUE and 'buf' is non-NULL. The buflen must
- * be larger than MAX_SCHEME_LEN if buf is set.
+ * Returns the length of the scheme if the given URL is absolute (as opposed
+ * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
+ * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
*/
-bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
+size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
{
int i;
DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
buf[0] = 0; /* always leave a defined value in buf */
#ifdef WIN32
if(STARTS_WITH_DRIVE_PREFIX(url))
- return FALSE;
+ return 0;
#endif
for(i = 0; i < MAX_SCHEME_LEN; ++i) {
char s = url[i];
}
}
if(i && (url[i] == ':') && (url[i + 1] == '/')) {
+ /* the length of the scheme is the name part only */
+ size_t len = i;
if(buf) {
buf[i] = 0;
while(i--) {
buf[i] = Curl_raw_tolower(url[i]);
}
}
- return TRUE;
+ return len;
}
- return FALSE;
+ return 0;
}
/*
* URL-encodes any spaces.
* The returned pointer must be freed by the caller unless NULL
* (returns NULL on out of memory).
+ *
+ * Note that this function destroys the 'base' string.
*/
-static char *concat_url(const char *base, const char *relurl)
+static char *concat_url(char *base, const char *relurl)
{
/***
TRY to append this new path to the old URL
to the right of the host part. Oh crap, this is doomed to cause
problems in the future...
*/
- char *newest;
+ struct dynbuf newest;
char *protsep;
char *pathsep;
- size_t newlen;
bool host_changed = FALSE;
-
const char *useurl = relurl;
- size_t urllen;
-
- /* we must make our own copy of the URL to play with, as it may
- point to read-only data */
- char *url_clone = strdup(base);
-
- if(!url_clone)
- return NULL; /* skip out of this NOW */
/* protsep points to the start of the host name */
- protsep = strstr(url_clone, "//");
+ protsep = strstr(base, "//");
if(!protsep)
- protsep = url_clone;
+ protsep = base;
else
protsep += 2; /* pass the slashes */
}
}
- /* If the new part contains a space, this is a mighty stupid redirect
- but we still make an effort to do "right". To the left of a '?'
- letter we replace each space with %20 while it is replaced with '+'
- on the right side of the '?' letter.
- */
- newlen = strlen_url(useurl, !host_changed);
-
- urllen = strlen(url_clone);
-
- newest = malloc(urllen + 1 + /* possible slash */
- newlen + 1 /* zero byte */);
-
- if(!newest) {
- free(url_clone); /* don't leak this */
- return NULL;
- }
+ Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
/* copy over the root url part */
- memcpy(newest, url_clone, urllen);
+ if(Curl_dyn_add(&newest, base))
+ return NULL;
/* check if we need to append a slash */
if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
;
- else
- newest[urllen++]='/';
+ else {
+ if(Curl_dyn_addn(&newest, "/", 1))
+ return NULL;
+ }
/* then append the new piece on the right side */
- strcpy_url(&newest[urllen], useurl, !host_changed);
+ strcpy_url(&newest, useurl, !host_changed);
- free(url_clone);
-
- return newest;
+ return Curl_dyn_ptr(&newest);
}
/* scan for byte values < 31 or 127 */
*
*/
static CURLUcode parse_hostname_login(struct Curl_URL *u,
- char **hostname,
+ struct dynbuf *host,
unsigned int flags)
{
CURLUcode result = CURLUE_OK;
char *optionsp = NULL;
const struct Curl_handler *h = NULL;
- /* At this point, we're hoping all the other special cases have
- * been taken care of, so conn->host.name is at most
- * [user[:password][;options]]@]hostname
+ /* At this point, we assume all the other special cases have been taken
+ * care of, so the host is at most
+ *
+ * [user[:password][;options]]@]hostname
*
* We need somewhere to put the embedded details, so do that first.
*/
- char *ptr = strchr(*hostname, '@');
- char *login = *hostname;
+ char *login = Curl_dyn_ptr(host);
+ char *ptr;
+
+ DEBUGASSERT(login);
+ ptr = strchr(login, '@');
if(!ptr)
goto out;
/* We will now try to extract the
* possible login information in a string like:
* ftp://user:password@ftp.my.site:8021/README */
- *hostname = ++ptr;
+ ptr++;
/* if this is a known scheme, get some details */
if(u->scheme)
u->options = optionsp;
}
+ /* move the name to the start of the host buffer */
+ if(Curl_dyn_tail(host, strlen(ptr)))
+ return CURLUE_OUT_OF_MEMORY;
+
return CURLUE_OK;
out:
return result;
}
-UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
+UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
bool has_scheme)
{
char *portptr = NULL;
char endbracket;
int len;
-
+ char *hostname = Curl_dyn_ptr(host);
/*
* Find the end of an IPv6 address, either on the ']' ending bracket or
* a percent-encoded zone index.
char *rest;
long port;
char portbuf[7];
+ size_t keep = portptr - hostname;
/* Browser behavior adaptation. If there's a colon with no digits after,
just cut off the name there which makes us ignore the colon and just
Don't do it if the URL has no scheme, to make something that looks like
a scheme not work!
*/
- if(!portptr[1]) {
- *portptr = '\0';
+ Curl_dyn_setlen(host, keep);
+ portptr++;
+ if(!*portptr)
return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
- }
- if(!ISDIGIT(portptr[1]))
+ if(!ISDIGIT(*portptr))
return CURLUE_BAD_PORT_NUMBER;
- port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */
+ port = strtol(portptr, &rest, 10); /* Port number must be decimal */
if(port > 0xffff)
return CURLUE_BAD_PORT_NUMBER;
if(rest[0])
return CURLUE_BAD_PORT_NUMBER;
- *portptr++ = '\0'; /* cut off the name there */
*rest = 0;
/* generate a new port number string to get rid of leading zeroes etc */
msnprintf(portbuf, sizeof(portbuf), "%ld", port);
return CURLUE_OK;
}
-static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
+static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
+ size_t hlen) /* length of hostname */
{
size_t len;
- size_t hlen = strlen(hostname);
+ DEBUGASSERT(hostname);
- if(hostname[0] == '[') {
+ if(!hostname[0])
+ return CURLUE_NO_HOST;
+ else if(hostname[0] == '[') {
const char *l = "0123456789abcdefABCDEF:.";
if(hlen < 4) /* '[::]' is the shortest possible valid string */
return CURLUE_BAD_IPV6;
/* hostname with bad content */
return CURLUE_BAD_HOSTNAME;
}
- if(!hostname[0])
- return CURLUE_NO_HOST;
return CURLUE_OK;
}
return TRUE;
}
-/* return strdup'ed version in 'outp', possibly percent decoded */
-static CURLUcode decode_host(char *hostname, char **outp)
+/* if necessary, replace the host content with a URL decoded version */
+static CURLUcode decode_host(struct dynbuf *host)
{
char *per = NULL;
- if(hostname[0] != '[')
+ const char *hostname = Curl_dyn_ptr(host);
+ if(hostname[0] == '[')
/* only decode if not an ipv6 numerical */
- per = strchr(hostname, '%');
- if(!per) {
- *outp = strdup(hostname);
- if(!*outp)
- return CURLUE_OUT_OF_MEMORY;
- }
+ return CURLUE_OK;
+ per = strchr(hostname, '%');
+ if(!per)
+ /* nothing to decode */
+ return CURLUE_OK;
else {
- /* might be encoded */
+ /* encoded */
size_t dlen;
- CURLcode result = Curl_urldecode(hostname, 0, outp, &dlen, REJECT_CTRL);
+ char *decoded;
+ CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
+ REJECT_CTRL);
if(result)
return CURLUE_BAD_HOSTNAME;
+ Curl_dyn_reset(host);
+ result = Curl_dyn_addn(host, decoded, dlen);
+ free(decoded);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
}
return CURLUE_OK;
}
-static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
+/*
+ * "Remove Dot Segments"
+ * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
+ */
+
+/*
+ * dedotdotify()
+ * @unittest: 1395
+ *
+ * This function gets a null-terminated path with dot and dotdot sequences
+ * passed in and strips them off according to the rules in RFC 3986 section
+ * 5.2.4.
+ *
+ * The function handles a query part ('?' + stuff) appended but it expects
+ * that fragments ('#' + stuff) have already been cut off.
+ *
+ * Note that this funciton *writes* a byte into the source buffer during its
+ * operation.
+ *
+ * RETURNS
+ *
+ * an allocated dedotdotified output string
+ */
+UNITTEST char *dedotdotify(const char *input, size_t clen);
+UNITTEST char *dedotdotify(const char *input, size_t clen)
{
- char *path;
- bool path_alloced = FALSE;
+ char *out = malloc(clen + 1);
+ char *outptr;
+ const char *orginput = input;
+ char *queryp;
+ if(!out)
+ return NULL; /* out of memory */
+
+ *out = 0; /* null-terminates, for inputs like "./" */
+ outptr = out;
+
+ if(!*input)
+ /* zero length input string, return that */
+ return out;
+
+ /*
+ * To handle query-parts properly, we must find it and remove it during the
+ * dotdot-operation and then append it again at the end to the output
+ * string.
+ */
+ queryp = strchr(input, '?');
+
+ do {
+ bool dotdot = TRUE;
+ if(*input == '.') {
+ /* A. If the input buffer begins with a prefix of "../" or "./", then
+ remove that prefix from the input buffer; otherwise, */
+
+ if(!strncmp("./", input, 2)) {
+ input += 2;
+ clen -= 2;
+ }
+ else if(!strncmp("../", input, 3)) {
+ input += 3;
+ clen -= 3;
+ }
+ /* D. if the input buffer consists only of "." or "..", then remove
+ that from the input buffer; otherwise, */
+
+ else if(!strcmp(".", input) || !strcmp("..", input) ||
+ !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
+ *out = 0;
+ break;
+ }
+ else
+ dotdot = FALSE;
+ }
+ else if(*input == '/') {
+ /* B. if the input buffer begins with a prefix of "/./" or "/.", where
+ "." is a complete path segment, then replace that prefix with "/" in
+ the input buffer; otherwise, */
+ if(!strncmp("/./", input, 3)) {
+ input += 2;
+ clen -= 2;
+ }
+ else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
+ *outptr++ = '/';
+ *outptr = 0;
+ break;
+ }
+
+ /* C. if the input buffer begins with a prefix of "/../" or "/..",
+ where ".." is a complete path segment, then replace that prefix with
+ "/" in the input buffer and remove the last segment and its
+ preceding "/" (if any) from the output buffer; otherwise, */
+
+ else if(!strncmp("/../", input, 4)) {
+ input += 3;
+ clen -= 3;
+ /* remove the last segment from the output buffer */
+ while(outptr > out) {
+ outptr--;
+ if(*outptr == '/')
+ break;
+ }
+ *outptr = 0; /* null-terminate where it stops */
+ }
+ else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
+ /* remove the last segment from the output buffer */
+ while(outptr > out) {
+ outptr--;
+ if(*outptr == '/')
+ break;
+ }
+ *outptr++ = '/';
+ *outptr = 0; /* null-terminate where it stops */
+ break;
+ }
+ else
+ dotdot = FALSE;
+ }
+ else
+ dotdot = FALSE;
+
+ if(!dotdot) {
+ /* E. move the first path segment in the input buffer to the end of
+ the output buffer, including the initial "/" character (if any) and
+ any subsequent characters up to, but not including, the next "/"
+ character or the end of the input buffer. */
+
+ do {
+ *outptr++ = *input++;
+ clen--;
+ } while(*input && (*input != '/') && (*input != '?'));
+ *outptr = 0;
+ }
+
+ /* continue until end of input string OR, if there is a terminating
+ query part, stop there */
+ } while(*input && (!queryp || (input < queryp)));
+
+ if(queryp) {
+ size_t qlen;
+ /* There was a query part, append that to the output. */
+ size_t oindex = queryp - orginput;
+ qlen = strlen(&orginput[oindex]);
+ memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */
+ }
+
+ return out;
+}
+
+static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
+{
+ const char *path;
+ size_t pathlen;
bool uncpath = FALSE;
- char *hostname;
char *query = NULL;
char *fragment = NULL;
- CURLUcode result;
- bool url_has_scheme = FALSE;
char schemebuf[MAX_SCHEME_LEN + 1];
const char *schemep = NULL;
size_t schemelen = 0;
size_t urllen;
+ CURLUcode result = CURLUE_OK;
+ size_t fraglen = 0;
+ struct dynbuf host;
DEBUGASSERT(url);
+ Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
+
/*************************************************************
* Parse the URL.
************************************************************/
/* allocate scratch area */
urllen = strlen(url);
- if(urllen > CURL_MAX_INPUT_LENGTH)
+ if(urllen > CURL_MAX_INPUT_LENGTH) {
/* excessive input length */
- return CURLUE_MALFORMED_INPUT;
-
- path = u->scratch = malloc(urllen * 2 + 2);
- if(!path)
- return CURLUE_OUT_OF_MEMORY;
-
- hostname = &path[urllen + 1];
- hostname[0] = 0;
-
- if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
- url_has_scheme = TRUE;
- schemelen = strlen(schemebuf);
+ result = CURLUE_MALFORMED_INPUT;
+ goto fail;
}
+ schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf));
+
/* handle the file: scheme */
- if(url_has_scheme && !strcmp(schemebuf, "file")) {
- if(urllen <= 6)
+ if(schemelen && !strcmp(schemebuf, "file")) {
+ if(urllen <= 6) {
/* file:/ is not enough to actually be a complete file: URL */
- return CURLUE_BAD_FILE_URL;
+ result = CURLUE_BAD_FILE_URL;
+ goto fail;
+ }
/* path has been allocated large enough to hold this */
- strcpy(path, &url[5]);
+ path = (char *)&url[5];
- u->scheme = strdup("file");
- if(!u->scheme)
- return CURLUE_OUT_OF_MEMORY;
+ schemep = u->scheme = strdup("file");
+ if(!u->scheme) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
/* Extra handling URLs with an authority component (i.e. that start with
* "file://")
*/
if(path[0] == '/' && path[1] == '/') {
/* swallow the two slashes */
- char *ptr = &path[2];
+ const char *ptr = &path[2];
/*
* According to RFC 8089, a file: URL can be reliably dereferenced if:
chars, and the delimiting slash character must be appended to the
host name */
path = strpbrk(ptr, "/\\:*?\"<>|");
- if(!path || *path != '/')
- return CURLUE_BAD_FILE_URL;
+ if(!path || *path != '/') {
+ result = CURLUE_BAD_FILE_URL;
+ goto fail;
+ }
len = path - ptr;
if(len) {
- memcpy(hostname, ptr, len);
- hostname[len] = 0;
+ if(Curl_dyn_addn(&host, ptr, len)) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
uncpath = TRUE;
}
#else
/* Invalid file://hostname/, expected localhost or 127.0.0.1 or
none */
- return CURLUE_BAD_FILE_URL;
+ result = CURLUE_BAD_FILE_URL;
+ goto fail;
#endif
}
}
}
if(!uncpath)
- hostname = NULL; /* no host for file: URLs by default */
+ /* no host for file: URLs by default */
+ Curl_dyn_reset(&host);
#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
/* Don't allow Windows drive letters when not in Windows.
if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
STARTS_WITH_URL_DRIVE_PREFIX(path)) {
/* File drive letters are only accepted in MSDOS/Windows */
- return CURLUE_BAD_FILE_URL;
+ result = CURLUE_BAD_FILE_URL;
+ goto fail;
}
#else
/* If the path starts with a slash and a drive letter, ditch the slash */
if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
/* This cannot be done with strcpy, as the memory chunks overlap! */
- memmove(path, &path[1], strlen(&path[1]) + 1);
+ path++;
}
#endif
const char *p;
const char *hostp;
size_t len;
- path[0] = 0;
- if(url_has_scheme) {
+ if(schemelen) {
int i = 0;
p = &url[schemelen + 1];
while(p && (*p == '/') && (i < 4)) {
p++;
i++;
}
- if((i < 1) || (i>3))
+ if((i < 1) || (i>3)) {
/* less than one or more than three slashes */
- return CURLUE_BAD_SLASHES;
+ result = CURLUE_BAD_SLASHES;
+ goto fail;
+ }
schemep = schemebuf;
if(!Curl_builtin_scheme(schemep) &&
- !(flags & CURLU_NON_SUPPORT_SCHEME))
- return CURLUE_UNSUPPORTED_SCHEME;
+ !(flags & CURLU_NON_SUPPORT_SCHEME)) {
+ result = CURLUE_UNSUPPORTED_SCHEME;
+ goto fail;
+ }
- if(junkscan(schemep, flags))
- return CURLUE_BAD_SCHEME;
+ if(junkscan(schemep, flags)) {
+ result = CURLUE_BAD_SCHEME;
+ goto fail;
+ }
}
else {
/* no scheme! */
- if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
- return CURLUE_BAD_SCHEME;
+ if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
+ result = CURLUE_BAD_SCHEME;
+ goto fail;
+ }
if(flags & CURLU_DEFAULT_SCHEME)
schemep = DEFAULT_SCHEME;
len = p - hostp;
if(len) {
- memcpy(hostname, hostp, len);
- hostname[len] = 0;
+ if(Curl_dyn_addn(&host, hostp, len)) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
}
else {
- if(!(flags & CURLU_NO_AUTHORITY))
- return CURLUE_NO_HOST;
+ if(!(flags & CURLU_NO_AUTHORITY)) {
+ result = CURLUE_NO_HOST;
+ goto fail;
+ }
}
- strcpy(path, p);
+ path = (char *)p;
if(schemep) {
u->scheme = strdup(schemep);
- if(!u->scheme)
- return CURLUE_OUT_OF_MEMORY;
+ if(!u->scheme) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
}
}
- if((flags & CURLU_URLENCODE) && path[0]) {
- /* worst case output length is 3x the original! */
- char *newp = malloc(strlen(path) * 3);
- if(!newp)
- return CURLUE_OUT_OF_MEMORY;
- path_alloced = TRUE;
- strcpy_url(newp, path, TRUE); /* consider it relative */
- u->temppath = path = newp;
+ if(*path && (flags & CURLU_URLENCODE)) {
+ struct dynbuf enc;
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+ if(strcpy_url(&enc, path, TRUE)) { /* consider it relative */
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ path = u->path = Curl_dyn_ptr(&enc);
}
fragment = strchr(path, '#');
if(fragment) {
- *fragment++ = 0;
- if(junkscan(fragment, flags))
- return CURLUE_BAD_FRAGMENT;
- if(fragment[0]) {
- u->fragment = strdup(fragment);
- if(!u->fragment)
- return CURLUE_OUT_OF_MEMORY;
+ fraglen = strlen(fragment);
+ if(fraglen > 1) {
+ /* skip the leading '#' in the copy but include the terminating null */
+ u->fragment = Curl_memdup(fragment + 1, fraglen);
+ if(!u->fragment) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+
+ if(junkscan(u->fragment, flags)) {
+ result = CURLUE_BAD_FRAGMENT;
+ goto fail;
+ }
}
}
query = strchr(path, '?');
- if(query) {
- *query++ = 0;
- if(junkscan(query, flags))
- return CURLUE_BAD_QUERY;
- /* done even if the query part is a blank string */
- u->query = strdup(query);
- if(!u->query)
- return CURLUE_OUT_OF_MEMORY;
- }
+ if(query && (!fragment || (query < fragment))) {
+ size_t qlen = strlen(query) - fraglen; /* includes '?' */
+ pathlen = strlen(path) - qlen - fraglen;
+ if(qlen > 1) {
+ u->query = Curl_memdup(query + 1, qlen);
+ if(!u->query) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ u->query[qlen - 1] = 0;
- if(junkscan(path, flags))
- return CURLUE_BAD_PATH;
+ if(junkscan(u->query, flags)) {
+ result = CURLUE_BAD_QUERY;
+ goto fail;
+ }
+ }
+ else {
+ /* single byte query */
+ u->query = strdup("");
+ if(!u->query) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ }
+ }
+ else
+ pathlen = strlen(path) - fraglen;
- if(!path[0])
- /* if there's no path left set, unset */
+ if(!pathlen) {
+ /* there is no path left, unset */
path = NULL;
+ }
else {
+ if(!u->path) {
+ u->path = Curl_memdup(path, pathlen + 1);
+ if(!u->path) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ u->path[pathlen] = 0;
+ path = u->path;
+ }
+ else if(flags & CURLU_URLENCODE)
+ /* it might have encoded more than just the path so cut it */
+ u->path[pathlen] = 0;
+
+ if(junkscan(u->path, flags)) {
+ result = CURLUE_BAD_PATH;
+ goto fail;
+ }
+
if(!(flags & CURLU_PATH_AS_IS)) {
/* remove ../ and ./ sequences according to RFC3986 */
- char *newp = Curl_dedotdotify(path);
- if(!newp)
- return CURLUE_OUT_OF_MEMORY;
-
- if(strcmp(newp, path)) {
- /* if we got a new version */
- if(path_alloced)
- Curl_safefree(u->temppath);
- u->temppath = path = newp;
- path_alloced = TRUE;
+ char *newp = dedotdotify((char *)path, pathlen);
+ if(!newp) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
}
- else
- free(newp);
+ free(u->path);
+ u->path = newp;
}
-
- u->path = path_alloced?path:strdup(path);
- if(!u->path)
- return CURLUE_OUT_OF_MEMORY;
- u->temppath = NULL; /* used now */
}
- if(hostname) {
+ if(Curl_dyn_len(&host)) {
char normalized_ipv4[sizeof("255.255.255.255") + 1];
/*
* Parse the login details and strip them out of the host name.
*/
- result = parse_hostname_login(u, &hostname, flags);
- if(result)
- return result;
-
- result = Curl_parse_port(u, hostname, url_has_scheme);
+ result = parse_hostname_login(u, &host, flags);
+ if(!result)
+ result = Curl_parse_port(u, &host, schemelen);
if(result)
- return result;
+ goto fail;
- if(junkscan(hostname, flags))
- return CURLUE_BAD_HOSTNAME;
+ if(junkscan(Curl_dyn_ptr(&host), flags)) {
+ result = CURLUE_BAD_HOSTNAME;
+ goto fail;
+ }
- if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
- /* Skip hostname check, it's allowed to be empty. */
- u->host = strdup("");
+ if(ipv4_normalize(Curl_dyn_ptr(&host),
+ normalized_ipv4, sizeof(normalized_ipv4))) {
+ Curl_dyn_reset(&host);
+ if(Curl_dyn_add(&host, normalized_ipv4)) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
}
else {
- if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
- u->host = strdup(normalized_ipv4);
- else {
- result = decode_host(hostname, &u->host);
- if(result)
- return result;
- result = hostname_check(u, u->host);
- if(result)
- return result;
- }
+ result = decode_host(&host);
+ if(!result)
+ result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host));
+ if(result)
+ goto fail;
}
- if(!u->host)
- return CURLUE_OUT_OF_MEMORY;
+
if((flags & CURLU_GUESS_SCHEME) && !schemep) {
+ const char *hostname = Curl_dyn_ptr(&host);
/* legacy curl-style guess based on host name */
if(checkprefix("ftp.", hostname))
schemep = "ftp";
schemep = "http";
u->scheme = strdup(schemep);
- if(!u->scheme)
- return CURLUE_OUT_OF_MEMORY;
+ if(!u->scheme) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ }
+ }
+ else if(flags & CURLU_NO_AUTHORITY) {
+ /* allowed to be empty. */
+ if(Curl_dyn_add(&host, "")) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
}
}
- Curl_safefree(u->scratch);
- Curl_safefree(u->temppath);
+ u->host = Curl_dyn_ptr(&host);
- return CURLUE_OK;
-}
-
-/*
- * Parse the URL and set the relevant members of the Curl_URL struct.
- */
-static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
-{
- CURLUcode result = seturl(url, u, flags);
- if(result) {
- free_urlhandle(u);
- memset(u, 0, sizeof(struct Curl_URL));
- }
+ return result;
+ fail:
+ Curl_dyn_free(&host);
+ free_urlhandle(u);
return result;
}
free_urlhandle(u);
*u = tmpurl;
}
- else
- free_urlhandle(&tmpurl);
return result;
}
if(u->host[0] == '[') {
if(u->zoneid) {
/* make it '[ host %25 zoneid ]' */
+ struct dynbuf enc;
size_t hostlen = strlen(u->host);
- size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
- allochost = malloc(alen);
- if(!allochost)
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+ if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
+ u->zoneid))
return CURLUE_OUT_OF_MEMORY;
- memcpy(allochost, u->host, hostlen - 1);
- msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
- "%%25%s]", u->zoneid);
+ allochost = Curl_dyn_ptr(&enc);
}
}
else if(urlencode) {
else {
/* only encode '%' in output host name */
char *host = u->host;
- size_t pcount = 0;
+ bool percent = FALSE;
/* first, count number of percents present in the name */
while(*host) {
- if(*host == '%')
- pcount++;
+ if(*host == '%') {
+ percent = TRUE;
+ break;
+ }
host++;
}
- /* if there were percents, encode the host name */
- if(pcount) {
- size_t hostlen = strlen(u->host);
- size_t alen = hostlen + 2 * pcount + 1;
- char *o = allochost = malloc(alen);
- if(!allochost)
- return CURLUE_OUT_OF_MEMORY;
-
+ /* if there were percent(s), encode the host name */
+ if(percent) {
+ struct dynbuf enc;
+ CURLcode result;
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
host = u->host;
while(*host) {
- if(*host == '%') {
- memcpy(o, "%25", 3);
- o += 3;
- host++;
- continue;
- }
- *o++ = *host++;
+ if(*host == '%')
+ result = Curl_dyn_addn(&enc, "%25", 3);
+ else
+ result = Curl_dyn_addn(&enc, host, 1);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
+ host++;
}
- *o = '\0';
+ free(u->host);
+ u->host = Curl_dyn_ptr(&enc);
}
}
*part = decoded;
}
if(urlencode) {
- /* worst case output length is 3x the original! */
- char *newp = malloc(strlen(*part) * 3);
- if(!newp)
+ struct dynbuf enc;
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+ if(strcpy_url(&enc, *part, TRUE)) /* consider it relative */
return CURLUE_OUT_OF_MEMORY;
- if(strcpy_url(newp, *part, TRUE)) { /* consider it relative */
- free(*part);
- *part = newp;
- }
- else
- free(newp);
+ free(*part);
+ *part = Curl_dyn_ptr(&enc);
}
return CURLUE_OK;
if(urlencode) {
const unsigned char *i;
- char *o;
- char *enc = malloc(nalloc * 3 + 1); /* for worst case! */
- if(!enc)
- return CURLUE_OUT_OF_MEMORY;
- for(i = (const unsigned char *)part, o = enc; *i; i++) {
+ struct dynbuf enc;
+
+ Curl_dyn_init(&enc, nalloc * 3 + 1);
+
+ for(i = (const unsigned char *)part; *i; i++) {
+ CURLcode result;
if((*i == ' ') && plusencode) {
- *o = '+';
- o++;
+ result = Curl_dyn_addn(&enc, "+", 1);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
}
else if(Curl_isunreserved(*i) ||
((*i == '/') && urlskipslash) ||
if((*i == '=') && equalsencode)
/* only skip the first equals sign */
equalsencode = FALSE;
- *o = *i;
- o++;
+ result = Curl_dyn_addn(&enc, i, 1);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
}
else {
- msnprintf(o, 4, "%%%02x", *i);
- o += 3;
+ result = Curl_dyn_addf(&enc, "%%%02x", *i);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
}
}
- *o = 0; /* null-terminate */
- newp = enc;
+ newp = Curl_dyn_ptr(&enc);
}
else {
char *p;
}
if(appendquery) {
- /* Append the string onto the old query. Add a '&' separator if none is
- present at the end of the existing query already */
+ /* Append the 'newp' string onto the old query. Add a '&' separator if
+ none is present at the end of the existing query already */
+
size_t querylen = u->query ? strlen(u->query) : 0;
bool addamperand = querylen && (u->query[querylen -1] != '&');
if(querylen) {
- size_t newplen = strlen(newp);
- char *p = malloc(querylen + addamperand + newplen + 1);
- if(!p) {
- free((char *)newp);
- return CURLUE_OUT_OF_MEMORY;
+ struct dynbuf enc;
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+
+ if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
+ goto nomem;
+
+ if(addamperand) {
+ if(Curl_dyn_addn(&enc, "&", 1))
+ goto nomem;
}
- strcpy(p, u->query); /* original query */
- if(addamperand)
- p[querylen] = '&'; /* ampersand */
- strcpy(&p[querylen + addamperand], newp); /* new suffix */
+ if(Curl_dyn_add(&enc, newp))
+ goto nomem;
free((char *)newp);
free(*storep);
- *storep = p;
+ *storep = Curl_dyn_ptr(&enc);
return CURLUE_OK;
+ nomem:
+ free((char *)newp);
+ return CURLUE_OUT_OF_MEMORY;
}
}
if(what == CURLUPART_HOST) {
- if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
+ size_t n = strlen(newp);
+ if(!n && (flags & CURLU_NO_AUTHORITY)) {
/* Skip hostname check, it's allowed to be empty. */
}
else {
- if(hostname_check(u, (char *)newp)) {
+ if(hostname_check(u, (char *)newp, n)) {
free((char *)newp);
return CURLUE_BAD_HOSTNAME;
}
#define free_and_clear(x) free(x); x = NULL
+static CURLUcode parse_port(CURLU *url,
+ char *h, bool has_scheme)
+{
+ struct dynbuf host;
+ CURLUcode ret;
+ Curl_dyn_init(&host, 10000);
+ if(Curl_dyn_add(&host, h))
+ return CURLUE_OUT_OF_MEMORY;
+ ret = Curl_parse_port(url, &host, has_scheme);
+ Curl_dyn_free(&host);
+ return ret;
+}
+
UNITTEST_START
{
CURLUcode ret;
ipv6port = strdup("[fe80::250:56ff:fea7:da15]");
if(!ipv6port)
goto fail;
- ret = Curl_parse_port(u, ipv6port, FALSE);
- fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+ ret = parse_port(u, ipv6port, FALSE);
+ fail_unless(ret == CURLUE_OK, "parse_port returned error");
ret = curl_url_get(u, CURLUPART_PORT, &portnum, CURLU_NO_DEFAULT_PORT);
fail_unless(ret != CURLUE_OK, "curl_url_get portnum returned something");
free_and_clear(ipv6port);
ipv6port = strdup("[fe80::250:56ff:fea7:da15|");
if(!ipv6port)
goto fail;
- ret = Curl_parse_port(u, ipv6port, FALSE);
- fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
+ ret = parse_port(u, ipv6port, FALSE);
+ fail_unless(ret != CURLUE_OK, "parse_port true on error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
ipv6port = strdup("[fe80::250:56ff;fea7:da15]:80");
if(!ipv6port)
goto fail;
- ret = Curl_parse_port(u, ipv6port, FALSE);
- fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
+ ret = parse_port(u, ipv6port, FALSE);
+ fail_unless(ret != CURLUE_OK, "parse_port true on error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
ipv6port = strdup("[fe80::250:56ff:fea7:da15%25eth3]:80");
if(!ipv6port)
goto fail;
- ret = Curl_parse_port(u, ipv6port, FALSE);
- fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+ ret = parse_port(u, ipv6port, FALSE);
+ fail_unless(ret == CURLUE_OK, "parse_port returned error");
ret = curl_url_get(u, CURLUPART_PORT, &portnum, 0);
fail_unless(ret == CURLUE_OK, "curl_url_get portnum returned error");
fail_unless(portnum && !strcmp(portnum, "80"), "Check portnumber");
ipv6port = strdup("[fe80::250:56ff:fea7:da15%25eth3]");
if(!ipv6port)
goto fail;
- ret = Curl_parse_port(u, ipv6port, FALSE);
- fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+ ret = parse_port(u, ipv6port, FALSE);
+ fail_unless(ret == CURLUE_OK, "parse_port returned error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
ipv6port = strdup("[fe80::250:56ff:fea7:da15]:81");
if(!ipv6port)
goto fail;
- ret = Curl_parse_port(u, ipv6port, FALSE);
- fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+ ret = parse_port(u, ipv6port, FALSE);
+ fail_unless(ret == CURLUE_OK, "parse_port returned error");
ret = curl_url_get(u, CURLUPART_PORT, &portnum, 0);
fail_unless(ret == CURLUE_OK, "curl_url_get portnum returned error");
fail_unless(portnum && !strcmp(portnum, "81"), "Check portnumber");
ipv6port = strdup("[fe80::250:56ff:fea7:da15];81");
if(!ipv6port)
goto fail;
- ret = Curl_parse_port(u, ipv6port, FALSE);
- fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
+ ret = parse_port(u, ipv6port, FALSE);
+ fail_unless(ret != CURLUE_OK, "parse_port true on error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
ipv6port = strdup("[fe80::250:56ff:fea7:da15]80");
if(!ipv6port)
goto fail;
- ret = Curl_parse_port(u, ipv6port, FALSE);
- fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
+ ret = parse_port(u, ipv6port, FALSE);
+ fail_unless(ret != CURLUE_OK, "parse_port true on error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
ipv6port = strdup("[fe80::250:56ff:fea7:da15]:");
if(!ipv6port)
goto fail;
- ret = Curl_parse_port(u, ipv6port, TRUE);
- fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+ ret = parse_port(u, ipv6port, TRUE);
+ fail_unless(ret == CURLUE_OK, "parse_port returned error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
ipv6port = strdup("[fe80::250:56ff:fea7:da15!25eth3]:80");
if(!ipv6port)
goto fail;
- ret = Curl_parse_port(u, ipv6port, FALSE);
- fail_unless(ret != CURLUE_OK, "Curl_parse_port returned non-error");
+ ret = parse_port(u, ipv6port, FALSE);
+ fail_unless(ret != CURLUE_OK, "parse_port returned non-error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
ipv6port = strdup("[fe80::250:56ff:fea7:da15%eth3]:80");
if(!ipv6port)
goto fail;
- ret = Curl_parse_port(u, ipv6port, FALSE);
- fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+ ret = parse_port(u, ipv6port, FALSE);
+ fail_unless(ret == CURLUE_OK, "parse_port returned error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
"aaaaaaaaaaaaaaaaaaaaaa:");
if(!ipv6port)
goto fail;
- ret = Curl_parse_port(u, ipv6port, FALSE);
- fail_unless(ret == CURLUE_BAD_PORT_NUMBER, "Curl_parse_port did wrong");
+ ret = parse_port(u, ipv6port, FALSE);
+ fail_unless(ret == CURLUE_BAD_PORT_NUMBER, "parse_port did wrong");
fail:
free(ipv6port);
curl_url_cleanup(u);