From: Daniel Stenberg Date: Mon, 9 Jun 2025 15:22:28 +0000 (+0200) Subject: urlapi: simplify and split into sub functions X-Git-Tag: curl-8_15_0~293 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f9d8ed63ede3aaab0f26f37d768528a8030b7ad8;p=thirdparty%2Fcurl.git urlapi: simplify and split into sub functions Closes #17565 --- diff --git a/lib/urlapi.c b/lib/urlapi.c index 3f67431e57..1a1700adb8 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -1320,18 +1320,221 @@ fail: return NULL; } +#ifndef USE_IDN +#define host_decode(x,y) CURLUE_LACKS_IDN +#define host_encode(x,y) CURLUE_LACKS_IDN +#else +static CURLUcode host_decode(const char *host, char **allochost) +{ + CURLcode result = Curl_idn_decode(host, allochost); + if(result) + return (result == CURLE_OUT_OF_MEMORY) ? + CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME; + return CURLUE_OK; +} + +static CURLUcode host_encode(const char *host, char **allochost) +{ + CURLcode result = Curl_idn_encode(host, allochost); + if(result) + return (result == CURLE_OUT_OF_MEMORY) ? + CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME; + return CURLUE_OK; +} +#endif + +static CURLUcode urlget_format(const CURLU *u, CURLUPart what, + const char *ptr, char **part, + bool plusdecode, unsigned int flags) +{ + size_t partlen = strlen(ptr); + bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0; + bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0; + bool punycode = (flags & CURLU_PUNYCODE) && (what == CURLUPART_HOST); + bool depunyfy = (flags & CURLU_PUNY2IDN) && (what == CURLUPART_HOST); + *part = Curl_memdup0(ptr, partlen); + if(!*part) + return CURLUE_OUT_OF_MEMORY; + if(plusdecode) { + /* convert + to space */ + char *plus = *part; + size_t i = 0; + for(i = 0; i < partlen; ++plus, i++) { + if(*plus == '+') + *plus = ' '; + } + } + if(urldecode) { + char *decoded; + size_t dlen; + /* this unconditional rejection of control bytes is documented + API behavior */ + CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL); + free(*part); + if(res) { + *part = NULL; + return CURLUE_URLDECODE; + } + *part = decoded; + partlen = dlen; + } + if(urlencode) { + struct dynbuf enc; + CURLUcode uc; + curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); + uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY); + if(uc) + return uc; + free(*part); + *part = curlx_dyn_ptr(&enc); + } + else if(punycode) { + if(!Curl_is_ASCII_name(u->host)) { + char *allochost = NULL; + CURLUcode ret = host_decode(*part, &allochost); + if(ret) + return ret; + free(*part); + *part = allochost; + } + } + else if(depunyfy) { + if(Curl_is_ASCII_name(u->host)) { + char *allochost = NULL; + CURLUcode ret = host_encode(*part, &allochost); + if(ret) + return ret; + free(*part); + *part = allochost; + } + } + + return CURLUE_OK; +} + +static CURLUcode urlget_url(const CURLU *u, char **part, unsigned int flags) +{ + char *url; + const char *scheme; + char *options = u->options; + char *port = u->port; + char *allochost = NULL; + bool show_fragment = + u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY); + bool show_query = (u->query && u->query[0]) || + (u->query_present && flags & CURLU_GET_EMPTY); + bool punycode = (flags & CURLU_PUNYCODE) ? 1 : 0; + bool depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0; + bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0; + char portbuf[7]; + if(u->scheme && strcasecompare("file", u->scheme)) { + url = aprintf("file://%s%s%s%s%s", + u->path, + show_query ? "?": "", + u->query ? u->query : "", + show_fragment ? "#": "", + u->fragment ? u->fragment : ""); + } + else if(!u->host) + return CURLUE_NO_HOST; + else { + const struct Curl_handler *h = NULL; + char schemebuf[MAX_SCHEME_LEN + 5]; + if(u->scheme) + scheme = u->scheme; + else if(flags & CURLU_DEFAULT_SCHEME) + scheme = DEFAULT_SCHEME; + else + return CURLUE_NO_SCHEME; + + h = Curl_get_scheme_handler(scheme); + if(!port && (flags & CURLU_DEFAULT_PORT)) { + /* there is no stored port number, but asked to deliver + a default one for the scheme */ + if(h) { + msnprintf(portbuf, sizeof(portbuf), "%u", h->defport); + port = portbuf; + } + } + else if(port) { + /* there is a stored port number, but asked to inhibit if it matches + the default one for the scheme */ + if(h && (h->defport == u->portnum) && + (flags & CURLU_NO_DEFAULT_PORT)) + port = NULL; + } + + if(h && !(h->flags & PROTOPT_URLOPTIONS)) + options = NULL; + + if(u->host[0] == '[') { + if(u->zoneid) { + /* make it '[ host %25 zoneid ]' */ + struct dynbuf enc; + size_t hostlen = strlen(u->host); + curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); + if(curlx_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host, + u->zoneid)) + return CURLUE_OUT_OF_MEMORY; + allochost = curlx_dyn_ptr(&enc); + } + } + else if(urlencode) { + allochost = curl_easy_escape(NULL, u->host, 0); + if(!allochost) + return CURLUE_OUT_OF_MEMORY; + } + else if(punycode) { + if(!Curl_is_ASCII_name(u->host)) { + CURLUcode ret = host_decode(u->host, &allochost); + if(ret) + return ret; + } + } + else if(depunyfy) { + if(Curl_is_ASCII_name(u->host)) { + CURLUcode ret = host_encode(u->host, &allochost); + if(ret) + return ret; + } + } + + if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme) + msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme); + else + schemebuf[0] = 0; + + url = aprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + schemebuf, + u->user ? u->user : "", + u->password ? ":": "", + u->password ? u->password : "", + options ? ";" : "", + options ? options : "", + (u->user || u->password || options) ? "@": "", + allochost ? allochost : u->host, + port ? ":": "", + port ? port : "", + u->path ? u->path : "/", + show_query ? "?": "", + u->query ? u->query : "", + show_fragment ? "#": "", + u->fragment ? u->fragment : ""); + free(allochost); + } + if(!url) + return CURLUE_OUT_OF_MEMORY; + *part = url; + return CURLUE_OK; +} + CURLUcode curl_url_get(const CURLU *u, CURLUPart what, char **part, unsigned int flags) { const char *ptr; CURLUcode ifmissing = CURLUE_UNKNOWN_PART; char portbuf[7]; - bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0; - bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0; - bool punycode = FALSE; - bool depunyfy = FALSE; bool plusdecode = FALSE; - (void)flags; if(!u) return CURLUE_BAD_HANDLE; if(!part) @@ -1342,7 +1545,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, case CURLUPART_SCHEME: ptr = u->scheme; ifmissing = CURLUE_NO_SCHEME; - urldecode = FALSE; /* never for schemes */ + flags &= ~CURLU_URLDECODE; /* never for schemes */ if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme) return CURLUE_NO_SCHEME; break; @@ -1361,8 +1564,6 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, case CURLUPART_HOST: ptr = u->host; ifmissing = CURLUE_NO_HOST; - punycode = (flags & CURLU_PUNYCODE) ? 1 : 0; - depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0; break; case CURLUPART_ZONEID: ptr = u->zoneid; @@ -1371,7 +1572,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, case CURLUPART_PORT: ptr = u->port; ifmissing = CURLUE_NO_PORT; - urldecode = FALSE; /* never for port */ + flags &= ~CURLU_URLDECODE; /* never for port */ if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) { /* there is no stored port number, but asked to deliver a default one for the scheme */ @@ -1398,7 +1599,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, case CURLUPART_QUERY: ptr = u->query; ifmissing = CURLUE_NO_QUERY; - plusdecode = urldecode; + plusdecode = flags & CURLU_URLDECODE; if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY)) /* there was a blank query and the user do not ask for it */ ptr = NULL; @@ -1410,219 +1611,31 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, /* there was a blank fragment and the user asks for it */ ptr = ""; break; - case CURLUPART_URL: { - char *url; - const char *scheme; - char *options = u->options; - char *port = u->port; - char *allochost = NULL; - bool show_fragment = - u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY); - bool show_query = - (u->query && u->query[0]) || - (u->query_present && flags & CURLU_GET_EMPTY); - punycode = (flags & CURLU_PUNYCODE) ? 1 : 0; - depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0; - if(u->scheme && strcasecompare("file", u->scheme)) { - url = aprintf("file://%s%s%s%s%s", - u->path, - show_query ? "?": "", - u->query ? u->query : "", - show_fragment ? "#": "", - u->fragment ? u->fragment : ""); - } - else if(!u->host) - return CURLUE_NO_HOST; - else { - const struct Curl_handler *h = NULL; - char schemebuf[MAX_SCHEME_LEN + 5]; - if(u->scheme) - scheme = u->scheme; - else if(flags & CURLU_DEFAULT_SCHEME) - scheme = DEFAULT_SCHEME; - else - return CURLUE_NO_SCHEME; - - h = Curl_get_scheme_handler(scheme); - if(!port && (flags & CURLU_DEFAULT_PORT)) { - /* there is no stored port number, but asked to deliver - a default one for the scheme */ - if(h) { - msnprintf(portbuf, sizeof(portbuf), "%u", h->defport); - port = portbuf; - } - } - else if(port) { - /* there is a stored port number, but asked to inhibit if it matches - the default one for the scheme */ - if(h && (h->defport == u->portnum) && - (flags & CURLU_NO_DEFAULT_PORT)) - port = NULL; - } - - if(h && !(h->flags & PROTOPT_URLOPTIONS)) - options = NULL; - - if(u->host[0] == '[') { - if(u->zoneid) { - /* make it '[ host %25 zoneid ]' */ - struct dynbuf enc; - size_t hostlen = strlen(u->host); - curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); - if(curlx_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host, - u->zoneid)) - return CURLUE_OUT_OF_MEMORY; - allochost = curlx_dyn_ptr(&enc); - } - } - else if(urlencode) { - allochost = curl_easy_escape(NULL, u->host, 0); - if(!allochost) - return CURLUE_OUT_OF_MEMORY; - } - else if(punycode) { - if(!Curl_is_ASCII_name(u->host)) { -#ifndef USE_IDN - return CURLUE_LACKS_IDN; -#else - CURLcode result = Curl_idn_decode(u->host, &allochost); - if(result) - return (result == CURLE_OUT_OF_MEMORY) ? - CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME; -#endif - } - } - else if(depunyfy) { - if(Curl_is_ASCII_name(u->host)) { -#ifndef USE_IDN - return CURLUE_LACKS_IDN; -#else - CURLcode result = Curl_idn_encode(u->host, &allochost); - if(result) - /* this is the most likely error */ - return (result == CURLE_OUT_OF_MEMORY) ? - CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME; -#endif - } - } - - if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme) - msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme); - else - schemebuf[0] = 0; - - url = aprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", - schemebuf, - u->user ? u->user : "", - u->password ? ":": "", - u->password ? u->password : "", - options ? ";" : "", - options ? options : "", - (u->user || u->password || options) ? "@": "", - allochost ? allochost : u->host, - port ? ":": "", - port ? port : "", - u->path ? u->path : "/", - show_query ? "?": "", - u->query ? u->query : "", - show_fragment ? "#": "", - u->fragment ? u->fragment : ""); - free(allochost); - } - if(!url) - return CURLUE_OUT_OF_MEMORY; - *part = url; - return CURLUE_OK; - } + case CURLUPART_URL: + return urlget_url(u, part, flags); default: ptr = NULL; break; } - if(ptr) { - size_t partlen = strlen(ptr); - size_t i = 0; - *part = Curl_memdup0(ptr, partlen); - if(!*part) - return CURLUE_OUT_OF_MEMORY; - if(plusdecode) { - /* convert + to space */ - char *plus = *part; - for(i = 0; i < partlen; ++plus, i++) { - if(*plus == '+') - *plus = ' '; - } - } - if(urldecode) { - char *decoded; - size_t dlen; - /* this unconditional rejection of control bytes is documented - API behavior */ - CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL); - free(*part); - if(res) { - *part = NULL; - return CURLUE_URLDECODE; - } - *part = decoded; - partlen = dlen; - } - if(urlencode) { - struct dynbuf enc; - CURLUcode uc; - curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); - uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY); - if(uc) - return uc; - free(*part); - *part = curlx_dyn_ptr(&enc); - } - else if(punycode) { - if(!Curl_is_ASCII_name(u->host)) { -#ifndef USE_IDN - return CURLUE_LACKS_IDN; -#else - char *allochost; - CURLcode result = Curl_idn_decode(*part, &allochost); - if(result) - return (result == CURLE_OUT_OF_MEMORY) ? - CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME; - free(*part); - *part = allochost; -#endif - } - } - else if(depunyfy) { - if(Curl_is_ASCII_name(u->host)) { -#ifndef USE_IDN - return CURLUE_LACKS_IDN; -#else - char *allochost; - CURLcode result = Curl_idn_encode(*part, &allochost); - if(result) - return (result == CURLE_OUT_OF_MEMORY) ? - CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME; - free(*part); - *part = allochost; -#endif - } - } + if(ptr) + return urlget_format(u, what, ptr, part, plusdecode, flags); - return CURLUE_OK; - } - else - return ifmissing; + return ifmissing; } static CURLUcode set_url_scheme(CURLU *u, const char *scheme, - unsigned int flags) + unsigned int flags) { - size_t plen = strlen(scheme); + size_t plen = strlen(scheme); + const struct Curl_handler *h = NULL; + if((plen > MAX_SCHEME_LEN) || (plen < 1)) + /* too long or too short */ + return CURLUE_BAD_SCHEME; + /* verify that it is a fine scheme */ + h = Curl_get_scheme_handler(scheme); + if(!h) { const char *s = scheme; - if((plen > MAX_SCHEME_LEN) || (plen < 1)) - /* too long or too short */ - return CURLUE_BAD_SCHEME; - /* verify that it is a fine scheme */ - if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(scheme)) + if(!(flags & CURLU_NON_SUPPORT_SCHEME)) return CURLUE_UNSUPPORTED_SCHEME; if(ISALPHA(*s)) { /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */ @@ -1635,8 +1648,9 @@ static CURLUcode set_url_scheme(CURLU *u, const char *scheme, } else return CURLUE_BAD_SCHEME; - u->guessed_scheme = FALSE; - return CURLUE_OK; + } + u->guessed_scheme = FALSE; + return CURLUE_OK; } static CURLUcode set_url_port(CURLU *u, const char *provided_port) @@ -1659,7 +1673,7 @@ static CURLUcode set_url_port(CURLU *u, const char *provided_port) } static CURLUcode set_url(CURLU *u, const char *url, size_t part_size, - unsigned int flags) + unsigned int flags) { /* * Allow a new URL to replace the existing (if any) contents. @@ -1696,6 +1710,53 @@ static CURLUcode set_url(CURLU *u, const char *url, size_t part_size, return uc; } +static CURLUcode urlset_clear(CURLU *u, CURLUPart what) +{ + switch(what) { + case CURLUPART_URL: + free_urlhandle(u); + memset(u, 0, sizeof(struct Curl_URL)); + break; + case CURLUPART_SCHEME: + Curl_safefree(u->scheme); + u->guessed_scheme = FALSE; + break; + case CURLUPART_USER: + Curl_safefree(u->user); + break; + case CURLUPART_PASSWORD: + Curl_safefree(u->password); + break; + case CURLUPART_OPTIONS: + Curl_safefree(u->options); + break; + case CURLUPART_HOST: + Curl_safefree(u->host); + break; + case CURLUPART_ZONEID: + Curl_safefree(u->zoneid); + break; + case CURLUPART_PORT: + u->portnum = 0; + Curl_safefree(u->port); + break; + case CURLUPART_PATH: + Curl_safefree(u->path); + break; + case CURLUPART_QUERY: + Curl_safefree(u->query); + u->query_present = FALSE; + break; + case CURLUPART_FRAGMENT: + Curl_safefree(u->fragment); + u->fragment_present = FALSE; + break; + default: + return CURLUE_UNKNOWN_PART; + } + return CURLUE_OK; +} + CURLUcode curl_url_set(CURLU *u, CURLUPart what, const char *part, unsigned int flags) { @@ -1710,57 +1771,9 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, if(!u) return CURLUE_BAD_HANDLE; - if(!part) { + if(!part) /* setting a part to NULL clears it */ - switch(what) { - case CURLUPART_URL: - break; - case CURLUPART_SCHEME: - storep = &u->scheme; - u->guessed_scheme = FALSE; - break; - case CURLUPART_USER: - storep = &u->user; - break; - case CURLUPART_PASSWORD: - storep = &u->password; - break; - case CURLUPART_OPTIONS: - storep = &u->options; - break; - case CURLUPART_HOST: - storep = &u->host; - break; - case CURLUPART_ZONEID: - storep = &u->zoneid; - break; - case CURLUPART_PORT: - u->portnum = 0; - storep = &u->port; - break; - case CURLUPART_PATH: - storep = &u->path; - break; - case CURLUPART_QUERY: - storep = &u->query; - u->query_present = FALSE; - break; - case CURLUPART_FRAGMENT: - storep = &u->fragment; - u->fragment_present = FALSE; - break; - default: - return CURLUE_UNKNOWN_PART; - } - if(storep && *storep) { - Curl_safefree(*storep); - } - else if(!storep) { - free_urlhandle(u); - memset(u, 0, sizeof(struct Curl_URL)); - } - return CURLUE_OK; - } + return urlset_clear(u, what); nalloc = strlen(part); if(nalloc > CURL_MAX_INPUT_LENGTH) @@ -1810,9 +1823,8 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, storep = &u->fragment; u->fragment_present = TRUE; break; - case CURLUPART_URL: { + case CURLUPART_URL: return set_url(u, part, nalloc, flags); - } default: return CURLUE_UNKNOWN_PART; }