pthreads
PTR
ptr
+punycode
py
pycurl
QNX
Note that even when not asking for URL encoding, the '%' (byte 37) will be URL
encoded to make sure the host name remains valid.
+.IP CURLU_PUNYCODE
+If set and \fICURLU_URLENCODE\fP is not set, and asked to retrieve the
+\fBCURLUPART_HOST\fP or \fBCURLUPART_URL\fP parts, libcurl returns the host
+name in its punycode version if it contains any non-ASCII octets (and is an
+IDN name).
+
+If libcurl is built without IDN capabilities, using this bit will make
+\fIcurl_url_get(3)\fP return \fICURLUE_LACKS_IDN\fP if the host name contains
+anything outside the ASCII range.
+
+(Added in curl 7.88.0)
.SH PARTS
.IP CURLUPART_URL
When asked to return the full URL, \fIcurl_url_get(3)\fP will return a
CURLU_NO_DEFAULT_PORT 7.62.0
CURLU_NON_SUPPORT_SCHEME 7.62.0
CURLU_PATH_AS_IS 7.62.0
+CURLU_PUNYCODE 7.88.0
CURLU_URLDECODE 7.62.0
CURLU_URLENCODE 7.62.0
CURLUE_BAD_FILE_URL 7.81.0
CURLUE_BAD_SCHEME 7.81.0
CURLUE_BAD_SLASHES 7.81.0
CURLUE_BAD_USER 7.81.0
+CURLUE_LACKS_IDN 7.88.0
CURLUE_MALFORMED_INPUT 7.62.0
CURLUE_NO_FRAGMENT 7.62.0
CURLUE_NO_HOST 7.62.0
CURLUE_BAD_SCHEME, /* 27 */
CURLUE_BAD_SLASHES, /* 28 */
CURLUE_BAD_USER, /* 29 */
+ CURLUE_LACKS_IDN, /* 30 */
CURLUE_LAST
} CURLUcode;
#define CURLU_NO_AUTHORITY (1<<10) /* Allow empty authority when the
scheme is unknown. */
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
+#define CURLU_PUNYCODE (1<<12) /* get the host name in pynycode */
typedef struct Curl_URL CURLU;
* Curl_idn_decode() returns an allocated IDN decoded string if it was
* possible. NULL on error.
*/
-static char *Curl_idn_decode(const char *input)
+static char *idn_decode(const char *input)
{
char *decoded = NULL;
#ifdef USE_LIBIDN2
return decoded;
}
+char *Curl_idn_decode(const char *input)
+{
+ char *d = idn_decode(input);
+#ifdef USE_LIBIDN2
+ if(d) {
+ char *c = strdup(d);
+ idn2_free(d);
+ d = c;
+ }
+#endif
+ return d;
+}
+
/*
* Frees data allocated by idnconvert_hostname()
*/
void Curl_free_idnconverted_hostname(struct hostname *host)
{
-#if defined(USE_LIBIDN2)
if(host->encalloc) {
- idn2_free(host->encalloc); /* must be freed with idn2_free() since this was
- allocated by libidn */
+ /* must be freed with idn2_free() if allocated by libidn */
+ Curl_idn_free(host->encalloc);
host->encalloc = NULL;
}
-#elif defined(USE_WIN32_IDN)
- free(host->encalloc); /* must be freed with free() since this was
- allocated by Curl_win32_idn_to_ascii */
- host->encalloc = NULL;
-#else
- (void)host;
-#endif
}
#endif /* USE_IDN */
#ifdef USE_IDN
/* Check name for non-ASCII and convert hostname if we can */
if(!Curl_is_ASCII_name(host->name)) {
- char *decoded = Curl_idn_decode(host->name);
+ char *decoded = idn_decode(host->name);
if(decoded) {
/* successful */
host->encalloc = decoded;
#endif
return CURLE_OK;
}
-
#if defined(USE_LIBIDN2) || defined(USE_WIN32_IDN)
#define USE_IDN
void Curl_free_idnconverted_hostname(struct hostname *host);
+char *Curl_idn_decode(const char *input);
+#ifdef USE_LIBIDN2
+#define Curl_idn_free(x) idn2_free(x)
+#else
+#define Curl_idn_free(x) free(x)
+#endif
+
#else
#define Curl_free_idnconverted_hostname(x)
+#define Curl_idn_decode(x) NULL
#endif
#endif /* HEADER_CURL_IDN_H */
case CURLUE_BAD_USER:
return "Bad user";
+ case CURLUE_LACKS_IDN:
+ return "libcurl lacks IDN support";
+
case CURLUE_LAST:
break;
}
#include "inet_pton.h"
#include "inet_ntop.h"
#include "strdup.h"
+#include "idn.h"
/* The last 3 #include files should be in this order */
#include "curl_printf.h"
char portbuf[7];
bool urldecode = (flags & CURLU_URLDECODE)?1:0;
bool urlencode = (flags & CURLU_URLENCODE)?1:0;
+ bool punycode = FALSE;
bool plusdecode = FALSE;
(void)flags;
if(!u)
case CURLUPART_HOST:
ptr = u->host;
ifmissing = CURLUE_NO_HOST;
+ punycode = (flags & CURLU_PUNYCODE)?1:0;
break;
case CURLUPART_ZONEID:
ptr = u->zoneid;
char *options = u->options;
char *port = u->port;
char *allochost = NULL;
+ punycode = (flags & CURLU_PUNYCODE)?1:0;
if(u->scheme && strcasecompare("file", u->scheme)) {
url = aprintf("file://%s%s%s",
u->path,
if(!allochost)
return CURLUE_OUT_OF_MEMORY;
}
+ else if(punycode) {
+ if(!Curl_is_ASCII_name(u->host)) {
+#ifndef USE_IDN
+ return CURLUE_LACKS_IDN;
+#else
+ allochost = Curl_idn_decode(u->host);
+ if(!allochost)
+ return CURLUE_OUT_OF_MEMORY;
+#endif
+ }
+ }
else {
/* only encode '%' in output host name */
char *host = u->host;
free(*part);
*part = Curl_dyn_ptr(&enc);
}
+ else if(punycode) {
+ if(!Curl_is_ASCII_name(u->host)) {
+#ifndef USE_IDN
+ return CURLUE_LACKS_IDN;
+#else
+ char *allochost = Curl_idn_decode(*part);
+ if(!allochost)
+ return CURLUE_OUT_OF_MEMORY;
+ free(*part);
+ *part = allochost;
+#endif
+ }
+ }
return CURLUE_OK;
}
u27: Bad scheme
u28: Unsupported number of slashes following scheme
u29: Bad user
-u30: CURLUcode unknown
+u30: libcurl lacks IDN support
+u31: CURLUcode unknown
</stdout>
</verify>
*/
#include "test.h"
+#if defined(USE_LIBIDN2) || defined(USE_WIN32_IDN)
+#define USE_IDN
+#endif
#include "testutil.h"
#include "warnless.h"
};
static const struct testcase get_parts_list[] ={
+#ifdef USE_IDN
+ {"https://räksmörgås.se",
+ "https | [11] | [12] | [13] | xn--rksmrgs-5wao1o.se | "
+ "[15] | / | [16] | [17]", 0, CURLU_PUNYCODE, CURLUE_OK},
+#else
+ {"https://räksmörgås.se",
+ "https | [11] | [12] | [13] | [30] | [15] | / | [16] | [17]",
+ 0, CURLU_PUNYCODE, CURLUE_OK},
+#endif
/* https://ℂᵤⓇℒ。𝐒🄴 */
{"https://"
"%e2%84%82%e1%b5%a4%e2%93%87%e2%84%92%e3%80%82%f0%9d%90%92%f0%9f%84%b4",
};
static const struct urltestcase get_url_list[] = {
+#ifdef USE_IDN
+ {"https://räksmörgås.se/path?q#frag",
+ "https://xn--rksmrgs-5wao1o.se/path?q#frag", 0, CURLU_PUNYCODE, CURLUE_OK},
+#endif
/* unsupported schemes with no guessing enabled */
{"data:text/html;charset=utf-8;base64,PCFET0NUWVBFIEhUTUw+PG1ldGEgY",
"", 0, 0, CURLUE_UNSUPPORTED_SCHEME},