From: Daniel Stenberg Date: Fri, 11 Aug 2023 07:41:28 +0000 (+0200) Subject: urlapi: CURLU_PUNY2IDN - convert from punycode to IDN name X-Git-Tag: curl-8_3_0~167 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c350069f6442d3920d14a57cb97fcb868ad54030;p=thirdparty%2Fcurl.git urlapi: CURLU_PUNY2IDN - convert from punycode to IDN name Asssisted-by: Jay Satiro Closes #11655 --- diff --git a/docs/libcurl/curl_url_get.3 b/docs/libcurl/curl_url_get.3 index ba669711bd..ea712b1bb2 100644 --- a/docs/libcurl/curl_url_get.3 +++ b/docs/libcurl/curl_url_get.3 @@ -91,6 +91,16 @@ If libcurl is built without IDN capabilities, using this bit will make anything outside the ASCII range. (Added in curl 7.88.0) +.IP CURLU_PUNY2IDN +If set and asked to retrieve the \fBCURLUPART_HOST\fP or \fBCURLUPART_URL\fP +parts, libcurl returns the host name in its IDN (International Domain Name) +UTF-8 version if it otherwise is a punycode version. + +If libcurl is built without IDN capabilities, using this bit will make +\fIcurl_url_get(3)\fP return \fICURLUE_LACKS_IDN\fP if the host name is using +punycode. + +(Added in curl 8.3.0) .SH PARTS .IP CURLUPART_URL When asked to return the full URL, \fIcurl_url_get(3)\fP will return a diff --git a/docs/libcurl/symbols-in-versions b/docs/libcurl/symbols-in-versions index 6f88bf5e55..7adf5c4ae4 100644 --- a/docs/libcurl/symbols-in-versions +++ b/docs/libcurl/symbols-in-versions @@ -1063,6 +1063,7 @@ CURLU_NO_AUTHORITY 7.67.0 CURLU_NO_DEFAULT_PORT 7.62.0 CURLU_NON_SUPPORT_SCHEME 7.62.0 CURLU_PATH_AS_IS 7.62.0 +CURLU_PUNY2IDN 8.3.0 CURLU_PUNYCODE 7.88.0 CURLU_URLDECODE 7.62.0 CURLU_URLENCODE 7.62.0 diff --git a/include/curl/urlapi.h b/include/curl/urlapi.h index 992e9f6019..88cdeb3bca 100644 --- a/include/curl/urlapi.h +++ b/include/curl/urlapi.h @@ -97,6 +97,7 @@ typedef enum { scheme is unknown. */ #define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */ #define CURLU_PUNYCODE (1<<12) /* get the host name in punycode */ +#define CURLU_PUNY2IDN (1<<13) /* punycode => IDN conversion */ typedef struct Curl_URL CURLU; diff --git a/lib/idn.c b/lib/idn.c index 5f4b07e018..ff1808a63b 100644 --- a/lib/idn.c +++ b/lib/idn.c @@ -75,7 +75,8 @@ bool Curl_win32_idn_to_ascii(const char *in, char **out) wchar_t *in_w = curlx_convert_UTF8_to_wchar(in); if(in_w) { wchar_t punycode[IDN_MAX_LENGTH]; - int chars = IdnToAscii(0, in_w, -1, punycode, IDN_MAX_LENGTH); + int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode, + IDN_MAX_LENGTH); curlx_unicodefree(in_w); if(chars) { char *mstr = curlx_convert_wchar_to_UTF8(punycode); @@ -91,6 +92,27 @@ bool Curl_win32_idn_to_ascii(const char *in, char **out) return success; } +char *Curl_win32_ascii_to_idn(const char *in) +{ + char *out = NULL; + + wchar_t *in_w = curlx_convert_UTF8_to_wchar(in); + if(in_w) { + WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */ + int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn, + IDN_MAX_LENGTH); + if(chars) { + /* 'chars' is "the number of characters retrieved" */ + char *mstr = curlx_convert_wchar_to_UTF8(idn); + if(mstr) { + out = strdup(mstr); + curlx_unicodefree(mstr); + } + } + } + return out; +} + #endif /* USE_WIN32_IDN */ /* @@ -144,6 +166,19 @@ static char *idn_decode(const char *input) return decoded; } +static char *idn_encode(const char *puny) +{ + char *enc = NULL; +#ifdef USE_LIBIDN2 + int rc = idn2_to_unicode_8z8z(puny, &enc, 0); + if(rc != IDNA_SUCCESS) + return NULL; +#elif defined(USE_WIN32_IDN) + enc = Curl_win32_ascii_to_idn(puny); +#endif + return enc; +} + char *Curl_idn_decode(const char *input) { char *d = idn_decode(input); @@ -157,6 +192,19 @@ char *Curl_idn_decode(const char *input) return d; } +char *Curl_idn_encode(const char *puny) +{ + char *d = idn_encode(puny); +#ifdef USE_LIBIDN2 + if(d) { + char *c = strdup(d); + idn2_free(d); + d = c; + } +#endif + return d; +} + /* * Frees data allocated by idnconvert_hostname() */ diff --git a/lib/idn.h b/lib/idn.h index 6c0bbb7109..2c292cdd92 100644 --- a/lib/idn.h +++ b/lib/idn.h @@ -26,6 +26,7 @@ #ifdef USE_WIN32_IDN bool Curl_win32_idn_to_ascii(const char *in, char **out); +char *Curl_win32_ascii_to_idn(const char *in); #endif /* USE_WIN32_IDN */ bool Curl_is_ASCII_name(const char *hostname); CURLcode Curl_idnconvert_hostname(struct hostname *host); @@ -33,6 +34,7 @@ CURLcode Curl_idnconvert_hostname(struct hostname *host); #define USE_IDN void Curl_free_idnconverted_hostname(struct hostname *host); char *Curl_idn_decode(const char *input); +char *Curl_idn_encode(const char *input); #ifdef USE_LIBIDN2 #define Curl_idn_free(x) idn2_free(x) #else diff --git a/lib/urlapi.c b/lib/urlapi.c index b1a126d548..74bd67be6f 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -1403,6 +1403,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, bool urldecode = (flags & CURLU_URLDECODE)?1:0; bool urlencode = (flags & CURLU_URLENCODE)?1:0; bool punycode = FALSE; + bool depunyfy = FALSE; bool plusdecode = FALSE; (void)flags; if(!u) @@ -1433,6 +1434,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, ptr = u->host; ifmissing = CURLUE_NO_HOST; punycode = (flags & CURLU_PUNYCODE)?1:0; + depunyfy = (flags & CURLU_PUNY2IDN)?1:0; break; case CURLUPART_ZONEID: ptr = u->zoneid; @@ -1483,6 +1485,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, char *port = u->port; char *allochost = NULL; punycode = (flags & CURLU_PUNYCODE)?1:0; + depunyfy = (flags & CURLU_PUNY2IDN)?1:0; if(u->scheme && strcasecompare("file", u->scheme)) { url = aprintf("file://%s%s%s", u->path, @@ -1548,6 +1551,17 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, #endif } } + else if(depunyfy) { + if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) { +#ifndef USE_IDN + return CURLUE_LACKS_IDN; +#else + allochost = Curl_idn_encode(u->host); + if(!allochost) + return CURLUE_OUT_OF_MEMORY; +#endif + } + } url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s", scheme, @@ -1626,6 +1640,19 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, #endif } } + else if(depunyfy) { + if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) { +#ifndef USE_IDN + return CURLUE_LACKS_IDN; +#else + char *allochost = Curl_idn_encode(*part); + if(!allochost) + return CURLUE_OUT_OF_MEMORY; + free(*part); + *part = allochost; +#endif + } + } return CURLUE_OK; } diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c index ff03bec939..f09f05f03e 100644 --- a/tests/libtest/lib1560.c +++ b/tests/libtest/lib1560.c @@ -179,6 +179,9 @@ static const struct testcase get_parts_list[] ={ {"https://räksmörgås.se", "https | [11] | [12] | [13] | xn--rksmrgs-5wao1o.se | " "[15] | / | [16] | [17]", 0, CURLU_PUNYCODE, CURLUE_OK}, + {"https://xn--rksmrgs-5wao1o.se", + "https | [11] | [12] | [13] | räksmörgås.se | " + "[15] | / | [16] | [17]", 0, CURLU_PUNY2IDN, CURLUE_OK}, #else {"https://räksmörgås.se", "https | [11] | [12] | [13] | [30] | [15] | / | [16] | [17]",