]> git.ipfire.org Git - thirdparty/glibc.git/blame - libidn/idna.c
Replace FSF snail mail address with URLs.
[thirdparty/glibc.git] / libidn / idna.c
CommitLineData
01859b1c 1/* idna.c Convert to or from IDN strings.
aff2453d 2 * Copyright (C) 2002, 2003, 2004, 2011 Simon Josefsson
01859b1c
UD
3 *
4 * This file is part of GNU Libidn.
5 *
6 * GNU Libidn is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
59ba27a6 17 * License along with GNU Libidn; if not, see <http://www.gnu.org/licenses/>.
01859b1c
UD
18 */
19
20#if HAVE_CONFIG_H
21# include "config.h"
22#endif
23
24#include <stdlib.h>
25#include <string.h>
26#include <stringprep.h>
27#include <punycode.h>
28
29#include "idna.h"
30
31#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
32 (c) == 0xFF0E || (c) == 0xFF61)
33
34/* Core functions */
35
36/**
37 * idna_to_ascii_4i
38 * @in: input array with unicode code points.
39 * @inlen: length of input array with unicode code points.
40 * @out: output zero terminated string that must have room for at
41 * least 63 characters plus the terminating zero.
42 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
43 *
44 * The ToASCII operation takes a sequence of Unicode code points that make
45 * up one label and transforms it into a sequence of code points in the
46 * ASCII range (0..7F). If ToASCII succeeds, the original sequence and the
47 * resulting sequence are equivalent labels.
48 *
49 * It is important to note that the ToASCII operation can fail. ToASCII
50 * fails if any step of it fails. If any step of the ToASCII operation
51 * fails on any label in a domain name, that domain name MUST NOT be used
52 * as an internationalized domain name. The method for deadling with this
53 * failure is application-specific.
54 *
55 * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
56 * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
57 * sequence of ASCII code points or a failure condition.
58 *
59 * ToASCII never alters a sequence of code points that are all in the ASCII
60 * range to begin with (although it could fail). Applying the ToASCII
61 * operation multiple times has exactly the same effect as applying it just
62 * once.
63 *
64 * Return value: Returns 0 on success, or an error code.
65 */
66int
67idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
68{
69 size_t len, outlen;
70 uint32_t *src; /* XXX don't need to copy data? */
71 int rc;
72
73 /*
74 * ToASCII consists of the following steps:
75 *
76 * 1. If all code points in the sequence are in the ASCII range (0..7F)
77 * then skip to step 3.
78 */
79
80 {
81 size_t i;
82 int inasciirange;
83
84 inasciirange = 1;
85 for (i = 0; i < inlen; i++)
86 if (in[i] > 0x7F)
87 inasciirange = 0;
88 if (inasciirange)
89 {
90 src = malloc (sizeof (in[0]) * (inlen + 1));
91 if (src == NULL)
92 return IDNA_MALLOC_ERROR;
93
94 memcpy (src, in, sizeof (in[0]) * inlen);
95 src[inlen] = 0;
96
97 goto step3;
98 }
99 }
100
101 /*
102 * 2. Perform the steps specified in [NAMEPREP] and fail if there is
103 * an error. The AllowUnassigned flag is used in [NAMEPREP].
104 */
105
106 {
107 char *p;
108
109 p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
110 if (p == NULL)
111 return IDNA_MALLOC_ERROR;
112
113 len = strlen (p);
114 do
115 {
f6d77b20
UD
116 char *newp;
117
01859b1c 118 len = 2 * len + 10; /* XXX better guess? */
f6d77b20
UD
119 newp = realloc (p, len);
120 if (newp == NULL)
9be31a51
UD
121 {
122 free (p);
123 return IDNA_MALLOC_ERROR;
124 }
125 p = newp;
01859b1c
UD
126
127 if (flags & IDNA_ALLOW_UNASSIGNED)
128 rc = stringprep_nameprep (p, len);
129 else
130 rc = stringprep_nameprep_no_unassigned (p, len);
131 }
132 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
133
134 if (rc != STRINGPREP_OK)
135 {
136 free (p);
137 return IDNA_STRINGPREP_ERROR;
138 }
139
140 src = stringprep_utf8_to_ucs4 (p, -1, NULL);
141
142 free (p);
143 }
144
145step3:
146 /*
147 * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
148 *
149 * (a) Verify the absence of non-LDH ASCII code points; that is,
150 * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
151 *
152 * (b) Verify the absence of leading and trailing hyphen-minus;
153 * that is, the absence of U+002D at the beginning and end of
154 * the sequence.
155 */
156
157 if (flags & IDNA_USE_STD3_ASCII_RULES)
158 {
159 size_t i;
160
161 for (i = 0; src[i]; i++)
162 if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
163 (src[i] >= 0x3A && src[i] <= 0x40) ||
164 (src[i] >= 0x5B && src[i] <= 0x60) ||
165 (src[i] >= 0x7B && src[i] <= 0x7F))
166 {
167 free (src);
168 return IDNA_CONTAINS_NON_LDH;
169 }
170
171 if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
172 {
173 free (src);
174 return IDNA_CONTAINS_MINUS;
175 }
176 }
177
178 /*
179 * 4. If all code points in the sequence are in the ASCII range
180 * (0..7F), then skip to step 8.
181 */
182
183 {
184 size_t i;
185 int inasciirange;
186
187 inasciirange = 1;
188 for (i = 0; src[i]; i++)
189 {
190 if (src[i] > 0x7F)
191 inasciirange = 0;
192 /* copy string to output buffer if we are about to skip to step8 */
193 if (i < 64)
194 out[i] = src[i];
195 }
196 if (i < 64)
197 out[i] = '\0';
198 if (inasciirange)
199 goto step8;
200 }
201
202 /*
203 * 5. Verify that the sequence does NOT begin with the ACE prefix.
204 *
205 */
206
207 {
208 size_t i;
209 int match;
210
211 match = 1;
212 for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
213 if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
214 match = 0;
215 if (match)
216 {
217 free (src);
218 return IDNA_CONTAINS_ACE_PREFIX;
219 }
220 }
221
222 /*
223 * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
224 * and fail if there is an error.
225 */
226 for (len = 0; src[len]; len++)
227 ;
228 src[len] = '\0';
229 outlen = 63 - strlen (IDNA_ACE_PREFIX);
230 rc = punycode_encode (len, src, NULL,
231 &outlen, &out[strlen (IDNA_ACE_PREFIX)]);
232 if (rc != PUNYCODE_SUCCESS)
233 {
234 free (src);
235 return IDNA_PUNYCODE_ERROR;
236 }
237 out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
238
239 /*
240 * 7. Prepend the ACE prefix.
241 */
242
243 memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
244
245 /*
246 * 8. Verify that the number of code points is in the range 1 to 63
247 * inclusive (0 is excluded).
248 */
249
250step8:
251 free (src);
252 if (strlen (out) < 1 || strlen (out) > 63)
253 return IDNA_INVALID_LENGTH;
254
255 return IDNA_SUCCESS;
256}
257
258/* ToUnicode(). May realloc() utf8in. */
259static int
260idna_to_unicode_internal (char *utf8in,
261 uint32_t * out, size_t * outlen, int flags)
262{
263 int rc;
264 char tmpout[64];
265 size_t utf8len = strlen (utf8in) + 1;
266 size_t addlen = 0;
267
268 /*
269 * ToUnicode consists of the following steps:
270 *
271 * 1. If the sequence contains any code points outside the ASCII range
272 * (0..7F) then proceed to step 2, otherwise skip to step 3.
273 */
274
275 {
276 size_t i;
277 int inasciirange;
278
279 inasciirange = 1;
280 for (i = 0; utf8in[i]; i++)
281 if (utf8in[i] & ~0x7F)
282 inasciirange = 0;
283 if (inasciirange)
284 goto step3;
285 }
286
287 /*
288 * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
289 * error. (If step 3 of ToASCII is also performed here, it will not
290 * affect the overall behavior of ToUnicode, but it is not
291 * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
292 */
293 do
294 {
9be31a51
UD
295 char *newp = realloc (utf8in, utf8len + addlen);
296 if (newp == NULL)
297 {
298 free (utf8in);
299 return IDNA_MALLOC_ERROR;
300 }
301 utf8in = newp;
01859b1c
UD
302 if (flags & IDNA_ALLOW_UNASSIGNED)
303 rc = stringprep_nameprep (utf8in, utf8len + addlen);
304 else
305 rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
306 addlen += 1;
307 }
308 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
309
310 if (rc != STRINGPREP_OK)
9be31a51
UD
311 {
312 free (utf8in);
313 return IDNA_STRINGPREP_ERROR;
314 }
01859b1c
UD
315
316 /* 3. Verify that the sequence begins with the ACE prefix, and save a
317 * copy of the sequence.
318 */
319
320step3:
321 if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0)
9be31a51
UD
322 {
323 free (utf8in);
324 return IDNA_NO_ACE_PREFIX;
325 }
01859b1c
UD
326
327 /* 4. Remove the ACE prefix.
328 */
329
330 memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
331 strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
332
333 /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
334 * and fail if there is an error. Save a copy of the result of
335 * this step.
336 */
337
338 (*outlen)--; /* reserve one for the zero */
339
340 rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
341 if (rc != PUNYCODE_SUCCESS)
9be31a51
UD
342 {
343 free (utf8in);
344 return IDNA_PUNYCODE_ERROR;
345 }
01859b1c
UD
346
347 out[*outlen] = 0; /* add zero */
348
349 /* 6. Apply ToASCII.
350 */
351
352 rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
353 if (rc != IDNA_SUCCESS)
9be31a51
UD
354 {
355 free (utf8in);
356 return rc;
357 }
01859b1c
UD
358
359 /* 7. Verify that the result of step 6 matches the saved copy from
360 * step 3, using a case-insensitive ASCII comparison.
361 */
362
363 if (strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
9be31a51
UD
364 {
365 free (utf8in);
366 return IDNA_ROUNDTRIP_VERIFY_ERROR;
367 }
01859b1c
UD
368
369 /* 8. Return the saved copy from step 5.
370 */
371
9be31a51 372 free (utf8in);
01859b1c
UD
373 return IDNA_SUCCESS;
374}
375
376/**
377 * idna_to_unicode_44i
378 * @in: input array with unicode code points.
379 * @inlen: length of input array with unicode code points.
380 * @out: output array with unicode code points.
381 * @outlen: on input, maximum size of output array with unicode code points,
382 * on exit, actual size of output array with unicode code points.
383 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
384 *
385 * The ToUnicode operation takes a sequence of Unicode code points
386 * that make up one label and returns a sequence of Unicode code
387 * points. If the input sequence is a label in ACE form, then the
388 * result is an equivalent internationalized label that is not in ACE
389 * form, otherwise the original sequence is returned unaltered.
390 *
391 * ToUnicode never fails. If any step fails, then the original input
392 * sequence is returned immediately in that step.
393 *
394 * The Punycode decoder can never output more code points than it
395 * inputs, but Nameprep can, and therefore ToUnicode can. Note that
396 * the number of octets needed to represent a sequence of code points
397 * depends on the particular character encoding used.
398 *
399 * The inputs to ToUnicode are a sequence of code points, the
400 * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
401 * ToUnicode is always a sequence of Unicode code points.
402 *
403 * Return value: Returns error condition, but it must only be used for
404 * debugging purposes. The output buffer is always
405 * guaranteed to contain the correct data according to
406 * the specification (sans malloc induced errors). NB!
407 * This means that you normally ignore the return code
408 * from this function, as checking it means breaking the
409 * standard.
410 */
411int
412idna_to_unicode_44i (const uint32_t * in, size_t inlen,
413 uint32_t * out, size_t * outlen, int flags)
414{
415 int rc;
416 size_t outlensave = *outlen;
417 char *p;
418
419 p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
420 if (p == NULL)
421 return IDNA_MALLOC_ERROR;
422
423 rc = idna_to_unicode_internal (p, out, outlen, flags);
424 if (rc != IDNA_SUCCESS)
425 {
426 memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
427 inlen : outlensave));
428 *outlen = inlen;
429 }
430
9be31a51 431 /* p is freed in idna_to_unicode_internal. */
01859b1c
UD
432
433 return rc;
434}
435
436/* Wrappers that handle several labels */
437
438/**
439 * idna_to_ascii_4z:
440 * @input: zero terminated input Unicode string.
441 * @output: pointer to newly allocated output string.
442 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
443 *
444 * Convert UCS-4 domain name to ASCII string. The domain name may
445 * contain several labels, separated by dots. The output buffer must
446 * be deallocated by the caller.
447 *
448 * Return value: Returns IDNA_SUCCESS on success, or error code.
449 **/
450int
451idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
452{
453 const uint32_t *start = input;
454 const uint32_t *end = input;
455 char buf[64];
456 char *out = NULL;
457 int rc;
458
459 /* 1) Whenever dots are used as label separators, the following
460 characters MUST be recognized as dots: U+002E (full stop),
461 U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
462 U+FF61 (halfwidth ideographic full stop). */
463
76461ded 464 if (input[0] == 0)
01859b1c
UD
465 {
466 /* Handle implicit zero-length root label. */
467 *output = malloc (1);
468 if (!*output)
469 return IDNA_MALLOC_ERROR;
76461ded
UD
470 strcpy (*output, "");
471 return IDNA_SUCCESS;
472 }
473
474 if (DOTP (input[0]) && input[1] == 0)
475 {
476 /* Handle explicit zero-length root label. */
477 *output = malloc (2);
478 if (!*output)
479 return IDNA_MALLOC_ERROR;
480 strcpy (*output, ".");
01859b1c
UD
481 return IDNA_SUCCESS;
482 }
483
484 *output = NULL;
485 do
486 {
487 end = start;
488
489 for (; *end && !DOTP (*end); end++)
490 ;
491
492 if (*end == '\0' && start == end)
493 {
494 /* Handle explicit zero-length root label. */
495 buf[0] = '\0';
496 }
497 else
498 {
499 rc = idna_to_ascii_4i (start, end - start, buf, flags);
500 if (rc != IDNA_SUCCESS)
501 return rc;
502 }
503
504 if (out)
505 {
9be31a51
UD
506 char *newp = realloc (out, strlen (out) + 1 + strlen (buf) + 1);
507 if (!newp)
508 {
509 free (out);
510 return IDNA_MALLOC_ERROR;
511 }
512 out = newp;
01859b1c
UD
513 strcat (out, ".");
514 strcat (out, buf);
515 }
516 else
517 {
518 out = (char *) malloc (strlen (buf) + 1);
519 if (!out)
520 return IDNA_MALLOC_ERROR;
521 strcpy (out, buf);
522 }
523
524 start = end + 1;
525 }
526 while (*end);
527
528 *output = out;
529
530 return IDNA_SUCCESS;
531}
532
533/**
534 * idna_to_ascii_8z:
535 * @input: zero terminated input UTF-8 string.
536 * @output: pointer to newly allocated output string.
537 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
538 *
539 * Convert UTF-8 domain name to ASCII string. The domain name may
540 * contain several labels, separated by dots. The output buffer must
541 * be deallocated by the caller.
542 *
543 * Return value: Returns IDNA_SUCCESS on success, or error code.
544 **/
545int
546idna_to_ascii_8z (const char *input, char **output, int flags)
547{
548 uint32_t *ucs4;
549 size_t ucs4len;
550 int rc;
551
552 ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
553 if (!ucs4)
554 return IDNA_ICONV_ERROR;
555
556 rc = idna_to_ascii_4z (ucs4, output, flags);
557
558 free (ucs4);
559
560 return rc;
561
562}
563
564/**
565 * idna_to_ascii_lz:
566 * @input: zero terminated input UTF-8 string.
567 * @output: pointer to newly allocated output string.
568 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
569 *
570 * Convert domain name in the locale's encoding to ASCII string. The
571 * domain name may contain several labels, separated by dots. The
572 * output buffer must be deallocated by the caller.
573 *
574 * Return value: Returns IDNA_SUCCESS on success, or error code.
575 **/
576int
577idna_to_ascii_lz (const char *input, char **output, int flags)
578{
579 char *utf8;
580 int rc;
581
582 utf8 = stringprep_locale_to_utf8 (input);
583 if (!utf8)
584 return IDNA_ICONV_ERROR;
585
586 rc = idna_to_ascii_8z (utf8, output, flags);
587
588 free (utf8);
589
590 return rc;
591}
592
593/**
594 * idna_to_unicode_4z4z:
595 * @input: zero-terminated Unicode string.
596 * @output: pointer to newly allocated output Unicode string.
597 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
598 *
599 * Convert possibly ACE encoded domain name in UCS-4 format into a
600 * UCS-4 string. The domain name may contain several labels,
601 * separated by dots. The output buffer must be deallocated by the
602 * caller.
603 *
604 * Return value: Returns IDNA_SUCCESS on success, or error code.
605 **/
606int
607idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
608{
609 const uint32_t *start = input;
610 const uint32_t *end = input;
611 uint32_t *buf;
612 size_t buflen;
613 uint32_t *out = NULL;
614 size_t outlen = 0;
01859b1c
UD
615
616 *output = NULL;
617
618 do
619 {
620 end = start;
621
622 for (; *end && !DOTP (*end); end++)
623 ;
624
625 buflen = end - start;
626 buf = malloc (sizeof (buf[0]) * (buflen + 1));
627 if (!buf)
628 return IDNA_MALLOC_ERROR;
629
aff2453d
UD
630 idna_to_unicode_44i (start, end - start, buf, &buflen, flags);
631 /* don't check return value as per specification! */
01859b1c
UD
632
633 if (out)
634 {
9be31a51
UD
635 uint32_t *newp = realloc (out,
636 sizeof (out[0])
637 * (outlen + 1 + buflen + 1));
638 if (!newp)
639 {
640 free (buf);
641 free (out);
642 return IDNA_MALLOC_ERROR;
643 }
644 out = newp;
01859b1c
UD
645 out[outlen++] = 0x002E; /* '.' (full stop) */
646 memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
647 outlen += buflen;
648 out[outlen] = 0x0;
649 free (buf);
650 }
651 else
652 {
653 out = buf;
654 outlen = buflen;
655 out[outlen] = 0x0;
656 }
657
658 start = end + 1;
659 }
660 while (*end);
661
662 *output = out;
663
664 return IDNA_SUCCESS;
665}
666
667/**
668 * idna_to_unicode_8z4z:
669 * @input: zero-terminated UTF-8 string.
670 * @output: pointer to newly allocated output Unicode string.
671 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
672 *
673 * Convert possibly ACE encoded domain name in UTF-8 format into a
674 * UCS-4 string. The domain name may contain several labels,
675 * separated by dots. The output buffer must be deallocated by the
676 * caller.
677 *
678 * Return value: Returns IDNA_SUCCESS on success, or error code.
679 **/
680int
681idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags)
682{
683 uint32_t *ucs4;
684 size_t ucs4len;
685 int rc;
686
687 ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
688 if (!ucs4)
689 return IDNA_ICONV_ERROR;
690
691 rc = idna_to_unicode_4z4z (ucs4, output, flags);
692 free (ucs4);
693
694 return rc;
695}
696
697/**
698 * idna_to_unicode_8z8z:
699 * @input: zero-terminated UTF-8 string.
700 * @output: pointer to newly allocated output UTF-8 string.
701 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
702 *
703 * Convert possibly ACE encoded domain name in UTF-8 format into a
704 * UTF-8 string. The domain name may contain several labels,
705 * separated by dots. The output buffer must be deallocated by the
706 * caller.
707 *
708 * Return value: Returns IDNA_SUCCESS on success, or error code.
709 **/
710int
711idna_to_unicode_8z8z (const char *input, char **output, int flags)
712{
713 uint32_t *ucs4;
714 int rc;
715
716 rc = idna_to_unicode_8z4z (input, &ucs4, flags);
717 *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
718 free (ucs4);
719
720 if (!*output)
721 return IDNA_ICONV_ERROR;
722
723 return rc;
724}
725
726/**
727 * idna_to_unicode_8zlz:
728 * @input: zero-terminated UTF-8 string.
729 * @output: pointer to newly allocated output string encoded in the
730 * current locale's character set.
731 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
732 *
733 * Convert possibly ACE encoded domain name in UTF-8 format into a
734 * string encoded in the current locale's character set. The domain
735 * name may contain several labels, separated by dots. The output
736 * buffer must be deallocated by the caller.
737 *
738 * Return value: Returns IDNA_SUCCESS on success, or error code.
739 **/
740int
741idna_to_unicode_8zlz (const char *input, char **output, int flags)
742{
743 char *utf8;
744 int rc;
745
746 rc = idna_to_unicode_8z8z (input, &utf8, flags);
747 *output = stringprep_utf8_to_locale (utf8);
748 free (utf8);
749
750 if (!*output)
751 return IDNA_ICONV_ERROR;
752
753 return rc;
754}
755
756/**
757 * idna_to_unicode_lzlz:
758 * @input: zero-terminated string encoded in the current locale's
759 * character set.
760 * @output: pointer to newly allocated output string encoded in the
761 * current locale's character set.
762 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
763 *
764 * Convert possibly ACE encoded domain name in the locale's character
765 * set into a string encoded in the current locale's character set.
766 * The domain name may contain several labels, separated by dots. The
767 * output buffer must be deallocated by the caller.
768 *
769 * Return value: Returns IDNA_SUCCESS on success, or error code.
770 **/
771int
772idna_to_unicode_lzlz (const char *input, char **output, int flags)
773{
774 char *utf8;
775 int rc;
776
777 utf8 = stringprep_locale_to_utf8 (input);
778 if (!utf8)
779 return IDNA_ICONV_ERROR;
780
781 rc = idna_to_unicode_8zlz (utf8, output, flags);
782 free (utf8);
783
784 return rc;
785}
786
787/**
788 * IDNA_ACE_PREFIX
789 *
790 * The IANA allocated prefix to use for IDNA. "xn--"
791 */
792
793/**
794 * Idna_rc:
795 * @IDNA_SUCCESS: Successful operation. This value is guaranteed to
796 * always be zero, the remaining ones are only guaranteed to hold
797 * non-zero values, for logical comparison purposes.
798 * @IDNA_STRINGPREP_ERROR: Error during string preparation.
799 * @IDNA_PUNYCODE_ERROR: Error during punycode operation.
800 * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
801 * the string contains non-LDH ASCII characters.
802 * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
803 * the string contains a leading or trailing hyphen-minus (U+002D).
804 * @IDNA_INVALID_LENGTH: The final output string is not within the
805 * (inclusive) range 1 to 63 characters.
806 * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
807 * (for ToUnicode).
808 * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
809 * string does not equal the input.
810 * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
811 * ToASCII).
812 * @IDNA_ICONV_ERROR: Could not convert string in locale encoding.
813 * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
814 * fatal error).
2d7ae210
UD
815 * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used
816 * internally in libc).
01859b1c
UD
817 *
818 * Enumerated return codes of idna_to_ascii_4i(),
819 * idna_to_unicode_44i() functions (and functions derived from those
820 * functions). The value 0 is guaranteed to always correspond to
821 * success.
822 */
823
824
825/**
826 * Idna_flags:
827 * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
828 * Unicode code points.
829 * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
830 * rules (i.e., normal host name rules).
831 *
832 * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.
833 */