]> git.ipfire.org Git - thirdparty/glibc.git/blob - iconv/gconv_simple.c
Replace FSF snail mail address with URLs.
[thirdparty/glibc.git] / iconv / gconv_simple.c
1 /* Simple transformations functions.
2 Copyright (C) 1997-2005, 2007, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #include <byteswap.h>
21 #include <dlfcn.h>
22 #include <endian.h>
23 #include <errno.h>
24 #include <gconv.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wchar.h>
29 #include <sys/param.h>
30 #include <gconv_int.h>
31
32 #define BUILTIN_ALIAS(s1, s2) /* nothing */
33 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
34 MinF, MaxF, MinT, MaxT) \
35 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
36 const unsigned char **, const unsigned char *, \
37 unsigned char **, size_t *, int, int);
38 #include "gconv_builtin.h"
39
40
41 #ifndef EILSEQ
42 # define EILSEQ EINVAL
43 #endif
44
45
46 /* Specialized conversion function for a single byte to INTERNAL, recognizing
47 only ASCII characters. */
48 wint_t
49 __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
50 {
51 if (c < 0x80)
52 return c;
53 else
54 return WEOF;
55 }
56
57
58 /* Transform from the internal, UCS4-like format, to UCS4. The
59 difference between the internal ucs4 format and the real UCS4
60 format is, if any, the endianess. The Unicode/ISO 10646 says that
61 unless some higher protocol specifies it differently, the byte
62 order is big endian.*/
63 #define DEFINE_INIT 0
64 #define DEFINE_FINI 0
65 #define MIN_NEEDED_FROM 4
66 #define MIN_NEEDED_TO 4
67 #define FROM_DIRECTION 1
68 #define FROM_LOOP internal_ucs4_loop
69 #define TO_LOOP internal_ucs4_loop /* This is not used. */
70 #define FUNCTION_NAME __gconv_transform_internal_ucs4
71
72
73 static inline int
74 __attribute ((always_inline))
75 internal_ucs4_loop (struct __gconv_step *step,
76 struct __gconv_step_data *step_data,
77 const unsigned char **inptrp, const unsigned char *inend,
78 unsigned char **outptrp, unsigned char *outend,
79 size_t *irreversible)
80 {
81 const unsigned char *inptr = *inptrp;
82 unsigned char *outptr = *outptrp;
83 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
84 int result;
85
86 #if __BYTE_ORDER == __LITTLE_ENDIAN
87 /* Sigh, we have to do some real work. */
88 size_t cnt;
89 uint32_t *outptr32 = (uint32_t *) outptr;
90
91 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
92 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
93
94 *inptrp = inptr;
95 *outptrp = (unsigned char *) outptr32;
96 #elif __BYTE_ORDER == __BIG_ENDIAN
97 /* Simply copy the data. */
98 *inptrp = inptr + n_convert * 4;
99 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
100 #else
101 # error "This endianess is not supported."
102 #endif
103
104 /* Determine the status. */
105 if (*inptrp == inend)
106 result = __GCONV_EMPTY_INPUT;
107 else if (*outptrp + 4 > outend)
108 result = __GCONV_FULL_OUTPUT;
109 else
110 result = __GCONV_INCOMPLETE_INPUT;
111
112 return result;
113 }
114
115 #ifndef _STRING_ARCH_unaligned
116 static inline int
117 __attribute ((always_inline))
118 internal_ucs4_loop_unaligned (struct __gconv_step *step,
119 struct __gconv_step_data *step_data,
120 const unsigned char **inptrp,
121 const unsigned char *inend,
122 unsigned char **outptrp, unsigned char *outend,
123 size_t *irreversible)
124 {
125 const unsigned char *inptr = *inptrp;
126 unsigned char *outptr = *outptrp;
127 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
128 int result;
129
130 # if __BYTE_ORDER == __LITTLE_ENDIAN
131 /* Sigh, we have to do some real work. */
132 size_t cnt;
133
134 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
135 {
136 outptr[0] = inptr[3];
137 outptr[1] = inptr[2];
138 outptr[2] = inptr[1];
139 outptr[3] = inptr[0];
140 }
141
142 *inptrp = inptr;
143 *outptrp = outptr;
144 # elif __BYTE_ORDER == __BIG_ENDIAN
145 /* Simply copy the data. */
146 *inptrp = inptr + n_convert * 4;
147 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
148 # else
149 # error "This endianess is not supported."
150 # endif
151
152 /* Determine the status. */
153 if (*inptrp == inend)
154 result = __GCONV_EMPTY_INPUT;
155 else if (*outptrp + 4 > outend)
156 result = __GCONV_FULL_OUTPUT;
157 else
158 result = __GCONV_INCOMPLETE_INPUT;
159
160 return result;
161 }
162 #endif
163
164
165 static inline int
166 __attribute ((always_inline))
167 internal_ucs4_loop_single (struct __gconv_step *step,
168 struct __gconv_step_data *step_data,
169 const unsigned char **inptrp,
170 const unsigned char *inend,
171 unsigned char **outptrp, unsigned char *outend,
172 size_t *irreversible)
173 {
174 mbstate_t *state = step_data->__statep;
175 size_t cnt = state->__count & 7;
176
177 while (*inptrp < inend && cnt < 4)
178 state->__value.__wchb[cnt++] = *(*inptrp)++;
179
180 if (__builtin_expect (cnt < 4, 0))
181 {
182 /* Still not enough bytes. Store the ones in the input buffer. */
183 state->__count &= ~7;
184 state->__count |= cnt;
185
186 return __GCONV_INCOMPLETE_INPUT;
187 }
188
189 #if __BYTE_ORDER == __LITTLE_ENDIAN
190 (*outptrp)[0] = state->__value.__wchb[3];
191 (*outptrp)[1] = state->__value.__wchb[2];
192 (*outptrp)[2] = state->__value.__wchb[1];
193 (*outptrp)[3] = state->__value.__wchb[0];
194
195 #elif __BYTE_ORDER == __BIG_ENDIAN
196 /* XXX unaligned */
197 (*outptrp)[0] = state->__value.__wchb[0];
198 (*outptrp)[1] = state->__value.__wchb[1];
199 (*outptrp)[2] = state->__value.__wchb[2];
200 (*outptrp)[3] = state->__value.__wchb[3];
201 #else
202 # error "This endianess is not supported."
203 #endif
204 *outptrp += 4;
205
206 /* Clear the state buffer. */
207 state->__count &= ~7;
208
209 return __GCONV_OK;
210 }
211
212 #include <iconv/skeleton.c>
213
214
215 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
216 for the other direction we have to check for correct values here. */
217 #define DEFINE_INIT 0
218 #define DEFINE_FINI 0
219 #define MIN_NEEDED_FROM 4
220 #define MIN_NEEDED_TO 4
221 #define FROM_DIRECTION 1
222 #define FROM_LOOP ucs4_internal_loop
223 #define TO_LOOP ucs4_internal_loop /* This is not used. */
224 #define FUNCTION_NAME __gconv_transform_ucs4_internal
225
226
227 static inline int
228 __attribute ((always_inline))
229 ucs4_internal_loop (struct __gconv_step *step,
230 struct __gconv_step_data *step_data,
231 const unsigned char **inptrp, const unsigned char *inend,
232 unsigned char **outptrp, unsigned char *outend,
233 size_t *irreversible)
234 {
235 int flags = step_data->__flags;
236 const unsigned char *inptr = *inptrp;
237 unsigned char *outptr = *outptrp;
238 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
239 int result;
240 size_t cnt;
241
242 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
243 {
244 uint32_t inval;
245
246 #if __BYTE_ORDER == __LITTLE_ENDIAN
247 inval = bswap_32 (*(const uint32_t *) inptr);
248 #else
249 inval = *(const uint32_t *) inptr;
250 #endif
251
252 if (__builtin_expect (inval > 0x7fffffff, 0))
253 {
254 /* The value is too large. We don't try transliteration here since
255 this is not an error because of the lack of possibilities to
256 represent the result. This is a genuine bug in the input since
257 UCS4 does not allow such values. */
258 if (irreversible == NULL)
259 /* We are transliterating, don't try to correct anything. */
260 return __GCONV_ILLEGAL_INPUT;
261
262 if (flags & __GCONV_IGNORE_ERRORS)
263 {
264 /* Just ignore this character. */
265 ++*irreversible;
266 continue;
267 }
268
269 *inptrp = inptr;
270 *outptrp = outptr;
271 return __GCONV_ILLEGAL_INPUT;
272 }
273
274 *((uint32_t *) outptr) = inval;
275 outptr += sizeof (uint32_t);
276 }
277
278 *inptrp = inptr;
279 *outptrp = outptr;
280
281 /* Determine the status. */
282 if (*inptrp == inend)
283 result = __GCONV_EMPTY_INPUT;
284 else if (*outptrp + 4 > outend)
285 result = __GCONV_FULL_OUTPUT;
286 else
287 result = __GCONV_INCOMPLETE_INPUT;
288
289 return result;
290 }
291
292 #ifndef _STRING_ARCH_unaligned
293 static inline int
294 __attribute ((always_inline))
295 ucs4_internal_loop_unaligned (struct __gconv_step *step,
296 struct __gconv_step_data *step_data,
297 const unsigned char **inptrp,
298 const unsigned char *inend,
299 unsigned char **outptrp, unsigned char *outend,
300 size_t *irreversible)
301 {
302 int flags = step_data->__flags;
303 const unsigned char *inptr = *inptrp;
304 unsigned char *outptr = *outptrp;
305 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
306 int result;
307 size_t cnt;
308
309 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
310 {
311 if (__builtin_expect (inptr[0] > 0x80, 0))
312 {
313 /* The value is too large. We don't try transliteration here since
314 this is not an error because of the lack of possibilities to
315 represent the result. This is a genuine bug in the input since
316 UCS4 does not allow such values. */
317 if (irreversible == NULL)
318 /* We are transliterating, don't try to correct anything. */
319 return __GCONV_ILLEGAL_INPUT;
320
321 if (flags & __GCONV_IGNORE_ERRORS)
322 {
323 /* Just ignore this character. */
324 ++*irreversible;
325 continue;
326 }
327
328 *inptrp = inptr;
329 *outptrp = outptr;
330 return __GCONV_ILLEGAL_INPUT;
331 }
332
333 # if __BYTE_ORDER == __LITTLE_ENDIAN
334 outptr[3] = inptr[0];
335 outptr[2] = inptr[1];
336 outptr[1] = inptr[2];
337 outptr[0] = inptr[3];
338 # else
339 outptr[0] = inptr[0];
340 outptr[1] = inptr[1];
341 outptr[2] = inptr[2];
342 outptr[3] = inptr[3];
343 # endif
344 outptr += 4;
345 }
346
347 *inptrp = inptr;
348 *outptrp = outptr;
349
350 /* Determine the status. */
351 if (*inptrp == inend)
352 result = __GCONV_EMPTY_INPUT;
353 else if (*outptrp + 4 > outend)
354 result = __GCONV_FULL_OUTPUT;
355 else
356 result = __GCONV_INCOMPLETE_INPUT;
357
358 return result;
359 }
360 #endif
361
362
363 static inline int
364 __attribute ((always_inline))
365 ucs4_internal_loop_single (struct __gconv_step *step,
366 struct __gconv_step_data *step_data,
367 const unsigned char **inptrp,
368 const unsigned char *inend,
369 unsigned char **outptrp, unsigned char *outend,
370 size_t *irreversible)
371 {
372 mbstate_t *state = step_data->__statep;
373 int flags = step_data->__flags;
374 size_t cnt = state->__count & 7;
375
376 while (*inptrp < inend && cnt < 4)
377 state->__value.__wchb[cnt++] = *(*inptrp)++;
378
379 if (__builtin_expect (cnt < 4, 0))
380 {
381 /* Still not enough bytes. Store the ones in the input buffer. */
382 state->__count &= ~7;
383 state->__count |= cnt;
384
385 return __GCONV_INCOMPLETE_INPUT;
386 }
387
388 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
389 0))
390 {
391 /* The value is too large. We don't try transliteration here since
392 this is not an error because of the lack of possibilities to
393 represent the result. This is a genuine bug in the input since
394 UCS4 does not allow such values. */
395 if (!(flags & __GCONV_IGNORE_ERRORS))
396 {
397 *inptrp -= cnt - (state->__count & 7);
398 return __GCONV_ILLEGAL_INPUT;
399 }
400 }
401 else
402 {
403 #if __BYTE_ORDER == __LITTLE_ENDIAN
404 (*outptrp)[0] = state->__value.__wchb[3];
405 (*outptrp)[1] = state->__value.__wchb[2];
406 (*outptrp)[2] = state->__value.__wchb[1];
407 (*outptrp)[3] = state->__value.__wchb[0];
408 #elif __BYTE_ORDER == __BIG_ENDIAN
409 (*outptrp)[0] = state->__value.__wchb[0];
410 (*outptrp)[1] = state->__value.__wchb[1];
411 (*outptrp)[2] = state->__value.__wchb[2];
412 (*outptrp)[3] = state->__value.__wchb[3];
413 #endif
414
415 *outptrp += 4;
416 }
417
418 /* Clear the state buffer. */
419 state->__count &= ~7;
420
421 return __GCONV_OK;
422 }
423
424 #include <iconv/skeleton.c>
425
426
427 /* Similarly for the little endian form. */
428 #define DEFINE_INIT 0
429 #define DEFINE_FINI 0
430 #define MIN_NEEDED_FROM 4
431 #define MIN_NEEDED_TO 4
432 #define FROM_DIRECTION 1
433 #define FROM_LOOP internal_ucs4le_loop
434 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
435 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
436
437
438 static inline int
439 __attribute ((always_inline))
440 internal_ucs4le_loop (struct __gconv_step *step,
441 struct __gconv_step_data *step_data,
442 const unsigned char **inptrp, const unsigned char *inend,
443 unsigned char **outptrp, unsigned char *outend,
444 size_t *irreversible)
445 {
446 const unsigned char *inptr = *inptrp;
447 unsigned char *outptr = *outptrp;
448 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
449 int result;
450
451 #if __BYTE_ORDER == __BIG_ENDIAN
452 /* Sigh, we have to do some real work. */
453 size_t cnt;
454 uint32_t *outptr32 = (uint32_t *) outptr;
455
456 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
457 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
458 outptr = (unsigned char *) outptr32;
459
460 *inptrp = inptr;
461 *outptrp = outptr;
462 #elif __BYTE_ORDER == __LITTLE_ENDIAN
463 /* Simply copy the data. */
464 *inptrp = inptr + n_convert * 4;
465 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
466 #else
467 # error "This endianess is not supported."
468 #endif
469
470 /* Determine the status. */
471 if (*inptrp == inend)
472 result = __GCONV_EMPTY_INPUT;
473 else if (*outptrp + 4 > outend)
474 result = __GCONV_FULL_OUTPUT;
475 else
476 result = __GCONV_INCOMPLETE_INPUT;
477
478 return result;
479 }
480
481 #ifndef _STRING_ARCH_unaligned
482 static inline int
483 __attribute ((always_inline))
484 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
485 struct __gconv_step_data *step_data,
486 const unsigned char **inptrp,
487 const unsigned char *inend,
488 unsigned char **outptrp, unsigned char *outend,
489 size_t *irreversible)
490 {
491 const unsigned char *inptr = *inptrp;
492 unsigned char *outptr = *outptrp;
493 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
494 int result;
495
496 # if __BYTE_ORDER == __BIG_ENDIAN
497 /* Sigh, we have to do some real work. */
498 size_t cnt;
499
500 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
501 {
502 outptr[0] = inptr[3];
503 outptr[1] = inptr[2];
504 outptr[2] = inptr[1];
505 outptr[3] = inptr[0];
506 }
507
508 *inptrp = inptr;
509 *outptrp = outptr;
510 # elif __BYTE_ORDER == __LITTLE_ENDIAN
511 /* Simply copy the data. */
512 *inptrp = inptr + n_convert * 4;
513 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
514 # else
515 # error "This endianess is not supported."
516 # endif
517
518 /* Determine the status. */
519 if (*inptrp == inend)
520 result = __GCONV_EMPTY_INPUT;
521 else if (*inptrp + 4 > inend)
522 result = __GCONV_INCOMPLETE_INPUT;
523 else
524 {
525 assert (*outptrp + 4 > outend);
526 result = __GCONV_FULL_OUTPUT;
527 }
528
529 return result;
530 }
531 #endif
532
533
534 static inline int
535 __attribute ((always_inline))
536 internal_ucs4le_loop_single (struct __gconv_step *step,
537 struct __gconv_step_data *step_data,
538 const unsigned char **inptrp,
539 const unsigned char *inend,
540 unsigned char **outptrp, unsigned char *outend,
541 size_t *irreversible)
542 {
543 mbstate_t *state = step_data->__statep;
544 size_t cnt = state->__count & 7;
545
546 while (*inptrp < inend && cnt < 4)
547 state->__value.__wchb[cnt++] = *(*inptrp)++;
548
549 if (__builtin_expect (cnt < 4, 0))
550 {
551 /* Still not enough bytes. Store the ones in the input buffer. */
552 state->__count &= ~7;
553 state->__count |= cnt;
554
555 return __GCONV_INCOMPLETE_INPUT;
556 }
557
558 #if __BYTE_ORDER == __BIG_ENDIAN
559 (*outptrp)[0] = state->__value.__wchb[3];
560 (*outptrp)[1] = state->__value.__wchb[2];
561 (*outptrp)[2] = state->__value.__wchb[1];
562 (*outptrp)[3] = state->__value.__wchb[0];
563
564 #else
565 /* XXX unaligned */
566 (*outptrp)[0] = state->__value.__wchb[0];
567 (*outptrp)[1] = state->__value.__wchb[1];
568 (*outptrp)[2] = state->__value.__wchb[2];
569 (*outptrp)[3] = state->__value.__wchb[3];
570
571 #endif
572
573 *outptrp += 4;
574
575 /* Clear the state buffer. */
576 state->__count &= ~7;
577
578 return __GCONV_OK;
579 }
580
581 #include <iconv/skeleton.c>
582
583
584 /* And finally from UCS4-LE to the internal encoding. */
585 #define DEFINE_INIT 0
586 #define DEFINE_FINI 0
587 #define MIN_NEEDED_FROM 4
588 #define MIN_NEEDED_TO 4
589 #define FROM_DIRECTION 1
590 #define FROM_LOOP ucs4le_internal_loop
591 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
592 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
593
594
595 static inline int
596 __attribute ((always_inline))
597 ucs4le_internal_loop (struct __gconv_step *step,
598 struct __gconv_step_data *step_data,
599 const unsigned char **inptrp, const unsigned char *inend,
600 unsigned char **outptrp, unsigned char *outend,
601 size_t *irreversible)
602 {
603 int flags = step_data->__flags;
604 const unsigned char *inptr = *inptrp;
605 unsigned char *outptr = *outptrp;
606 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
607 int result;
608 size_t cnt;
609
610 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
611 {
612 uint32_t inval;
613
614 #if __BYTE_ORDER == __BIG_ENDIAN
615 inval = bswap_32 (*(const uint32_t *) inptr);
616 #else
617 inval = *(const uint32_t *) inptr;
618 #endif
619
620 if (__builtin_expect (inval > 0x7fffffff, 0))
621 {
622 /* The value is too large. We don't try transliteration here since
623 this is not an error because of the lack of possibilities to
624 represent the result. This is a genuine bug in the input since
625 UCS4 does not allow such values. */
626 if (irreversible == NULL)
627 /* We are transliterating, don't try to correct anything. */
628 return __GCONV_ILLEGAL_INPUT;
629
630 if (flags & __GCONV_IGNORE_ERRORS)
631 {
632 /* Just ignore this character. */
633 ++*irreversible;
634 continue;
635 }
636
637 return __GCONV_ILLEGAL_INPUT;
638 }
639
640 *((uint32_t *) outptr) = inval;
641 outptr += sizeof (uint32_t);
642 }
643
644 *inptrp = inptr;
645 *outptrp = outptr;
646
647 /* Determine the status. */
648 if (*inptrp == inend)
649 result = __GCONV_EMPTY_INPUT;
650 else if (*inptrp + 4 > inend)
651 result = __GCONV_INCOMPLETE_INPUT;
652 else
653 {
654 assert (*outptrp + 4 > outend);
655 result = __GCONV_FULL_OUTPUT;
656 }
657
658 return result;
659 }
660
661 #ifndef _STRING_ARCH_unaligned
662 static inline int
663 __attribute ((always_inline))
664 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
665 struct __gconv_step_data *step_data,
666 const unsigned char **inptrp,
667 const unsigned char *inend,
668 unsigned char **outptrp, unsigned char *outend,
669 size_t *irreversible)
670 {
671 int flags = step_data->__flags;
672 const unsigned char *inptr = *inptrp;
673 unsigned char *outptr = *outptrp;
674 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
675 int result;
676 size_t cnt;
677
678 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
679 {
680 if (__builtin_expect (inptr[3] > 0x80, 0))
681 {
682 /* The value is too large. We don't try transliteration here since
683 this is not an error because of the lack of possibilities to
684 represent the result. This is a genuine bug in the input since
685 UCS4 does not allow such values. */
686 if (irreversible == NULL)
687 /* We are transliterating, don't try to correct anything. */
688 return __GCONV_ILLEGAL_INPUT;
689
690 if (flags & __GCONV_IGNORE_ERRORS)
691 {
692 /* Just ignore this character. */
693 ++*irreversible;
694 continue;
695 }
696
697 *inptrp = inptr;
698 *outptrp = outptr;
699 return __GCONV_ILLEGAL_INPUT;
700 }
701
702 # if __BYTE_ORDER == __BIG_ENDIAN
703 outptr[3] = inptr[0];
704 outptr[2] = inptr[1];
705 outptr[1] = inptr[2];
706 outptr[0] = inptr[3];
707 # else
708 outptr[0] = inptr[0];
709 outptr[1] = inptr[1];
710 outptr[2] = inptr[2];
711 outptr[3] = inptr[3];
712 # endif
713
714 outptr += 4;
715 }
716
717 *inptrp = inptr;
718 *outptrp = outptr;
719
720 /* Determine the status. */
721 if (*inptrp == inend)
722 result = __GCONV_EMPTY_INPUT;
723 else if (*inptrp + 4 > inend)
724 result = __GCONV_INCOMPLETE_INPUT;
725 else
726 {
727 assert (*outptrp + 4 > outend);
728 result = __GCONV_FULL_OUTPUT;
729 }
730
731 return result;
732 }
733 #endif
734
735
736 static inline int
737 __attribute ((always_inline))
738 ucs4le_internal_loop_single (struct __gconv_step *step,
739 struct __gconv_step_data *step_data,
740 const unsigned char **inptrp,
741 const unsigned char *inend,
742 unsigned char **outptrp, unsigned char *outend,
743 size_t *irreversible)
744 {
745 mbstate_t *state = step_data->__statep;
746 int flags = step_data->__flags;
747 size_t cnt = state->__count & 7;
748
749 while (*inptrp < inend && cnt < 4)
750 state->__value.__wchb[cnt++] = *(*inptrp)++;
751
752 if (__builtin_expect (cnt < 4, 0))
753 {
754 /* Still not enough bytes. Store the ones in the input buffer. */
755 state->__count &= ~7;
756 state->__count |= cnt;
757
758 return __GCONV_INCOMPLETE_INPUT;
759 }
760
761 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
762 0))
763 {
764 /* The value is too large. We don't try transliteration here since
765 this is not an error because of the lack of possibilities to
766 represent the result. This is a genuine bug in the input since
767 UCS4 does not allow such values. */
768 if (!(flags & __GCONV_IGNORE_ERRORS))
769 return __GCONV_ILLEGAL_INPUT;
770 }
771 else
772 {
773 #if __BYTE_ORDER == __BIG_ENDIAN
774 (*outptrp)[0] = state->__value.__wchb[3];
775 (*outptrp)[1] = state->__value.__wchb[2];
776 (*outptrp)[2] = state->__value.__wchb[1];
777 (*outptrp)[3] = state->__value.__wchb[0];
778 #else
779 (*outptrp)[0] = state->__value.__wchb[0];
780 (*outptrp)[1] = state->__value.__wchb[1];
781 (*outptrp)[2] = state->__value.__wchb[2];
782 (*outptrp)[3] = state->__value.__wchb[3];
783 #endif
784
785 *outptrp += 4;
786 }
787
788 /* Clear the state buffer. */
789 state->__count &= ~7;
790
791 return __GCONV_OK;
792 }
793
794 #include <iconv/skeleton.c>
795
796
797 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
798 #define DEFINE_INIT 0
799 #define DEFINE_FINI 0
800 #define MIN_NEEDED_FROM 1
801 #define MIN_NEEDED_TO 4
802 #define FROM_DIRECTION 1
803 #define FROM_LOOP ascii_internal_loop
804 #define TO_LOOP ascii_internal_loop /* This is not used. */
805 #define FUNCTION_NAME __gconv_transform_ascii_internal
806 #define ONE_DIRECTION 1
807
808 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
809 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
810 #define LOOPFCT FROM_LOOP
811 #define BODY \
812 { \
813 if (__builtin_expect (*inptr > '\x7f', 0)) \
814 { \
815 /* The value is too large. We don't try transliteration here since \
816 this is not an error because of the lack of possibilities to \
817 represent the result. This is a genuine bug in the input since \
818 ASCII does not allow such values. */ \
819 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
820 } \
821 else \
822 { \
823 /* It's an one byte sequence. */ \
824 *((uint32_t *) outptr) = *inptr++; \
825 outptr += sizeof (uint32_t); \
826 } \
827 }
828 #define LOOP_NEED_FLAGS
829 #include <iconv/loop.c>
830 #include <iconv/skeleton.c>
831
832
833 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
834 #define DEFINE_INIT 0
835 #define DEFINE_FINI 0
836 #define MIN_NEEDED_FROM 4
837 #define MIN_NEEDED_TO 1
838 #define FROM_DIRECTION 1
839 #define FROM_LOOP internal_ascii_loop
840 #define TO_LOOP internal_ascii_loop /* This is not used. */
841 #define FUNCTION_NAME __gconv_transform_internal_ascii
842 #define ONE_DIRECTION 1
843
844 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
845 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
846 #define LOOPFCT FROM_LOOP
847 #define BODY \
848 { \
849 if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
850 { \
851 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
852 STANDARD_TO_LOOP_ERR_HANDLER (4); \
853 } \
854 else \
855 { \
856 /* It's an one byte sequence. */ \
857 *outptr++ = *((const uint32_t *) inptr); \
858 inptr += sizeof (uint32_t); \
859 } \
860 }
861 #define LOOP_NEED_FLAGS
862 #include <iconv/loop.c>
863 #include <iconv/skeleton.c>
864
865
866 /* Convert from the internal (UCS4-like) format to UTF-8. */
867 #define DEFINE_INIT 0
868 #define DEFINE_FINI 0
869 #define MIN_NEEDED_FROM 4
870 #define MIN_NEEDED_TO 1
871 #define MAX_NEEDED_TO 6
872 #define FROM_DIRECTION 1
873 #define FROM_LOOP internal_utf8_loop
874 #define TO_LOOP internal_utf8_loop /* This is not used. */
875 #define FUNCTION_NAME __gconv_transform_internal_utf8
876 #define ONE_DIRECTION 1
877
878 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
879 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
880 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
881 #define LOOPFCT FROM_LOOP
882 #define BODY \
883 { \
884 uint32_t wc = *((const uint32_t *) inptr); \
885 \
886 if (__builtin_expect (wc < 0x80, 1)) \
887 /* It's an one byte sequence. */ \
888 *outptr++ = (unsigned char) wc; \
889 else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
890 { \
891 size_t step; \
892 unsigned char *start; \
893 \
894 for (step = 2; step < 6; ++step) \
895 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
896 break; \
897 \
898 if (__builtin_expect (outptr + step > outend, 0)) \
899 { \
900 /* Too long. */ \
901 result = __GCONV_FULL_OUTPUT; \
902 break; \
903 } \
904 \
905 start = outptr; \
906 *outptr = (unsigned char) (~0xff >> step); \
907 outptr += step; \
908 do \
909 { \
910 start[--step] = 0x80 | (wc & 0x3f); \
911 wc >>= 6; \
912 } \
913 while (step > 1); \
914 start[0] |= wc; \
915 } \
916 else \
917 { \
918 STANDARD_TO_LOOP_ERR_HANDLER (4); \
919 } \
920 \
921 inptr += 4; \
922 }
923 #define LOOP_NEED_FLAGS
924 #include <iconv/loop.c>
925 #include <iconv/skeleton.c>
926
927
928 /* Convert from UTF-8 to the internal (UCS4-like) format. */
929 #define DEFINE_INIT 0
930 #define DEFINE_FINI 0
931 #define MIN_NEEDED_FROM 1
932 #define MAX_NEEDED_FROM 6
933 #define MIN_NEEDED_TO 4
934 #define FROM_DIRECTION 1
935 #define FROM_LOOP utf8_internal_loop
936 #define TO_LOOP utf8_internal_loop /* This is not used. */
937 #define FUNCTION_NAME __gconv_transform_utf8_internal
938 #define ONE_DIRECTION 1
939
940 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
941 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
942 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
943 #define LOOPFCT FROM_LOOP
944 #define BODY \
945 { \
946 /* Next input byte. */ \
947 uint32_t ch = *inptr; \
948 \
949 if (__builtin_expect (ch < 0x80, 1)) \
950 { \
951 /* One byte sequence. */ \
952 ++inptr; \
953 } \
954 else \
955 { \
956 uint_fast32_t cnt; \
957 uint_fast32_t i; \
958 \
959 if (ch >= 0xc2 && ch < 0xe0) \
960 { \
961 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
962 otherwise the wide character could have been represented \
963 using a single byte. */ \
964 cnt = 2; \
965 ch &= 0x1f; \
966 } \
967 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
968 { \
969 /* We expect three bytes. */ \
970 cnt = 3; \
971 ch &= 0x0f; \
972 } \
973 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
974 { \
975 /* We expect four bytes. */ \
976 cnt = 4; \
977 ch &= 0x07; \
978 } \
979 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
980 { \
981 /* We expect five bytes. */ \
982 cnt = 5; \
983 ch &= 0x03; \
984 } \
985 else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
986 { \
987 /* We expect six bytes. */ \
988 cnt = 6; \
989 ch &= 0x01; \
990 } \
991 else \
992 { \
993 /* Search the end of this ill-formed UTF-8 character. This \
994 is the next byte with (x & 0xc0) != 0x80. */ \
995 i = 0; \
996 do \
997 ++i; \
998 while (inptr + i < inend \
999 && (*(inptr + i) & 0xc0) == 0x80 \
1000 && i < 5); \
1001 \
1002 errout: \
1003 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1004 } \
1005 \
1006 if (__builtin_expect (inptr + cnt > inend, 0)) \
1007 { \
1008 /* We don't have enough input. But before we report that check \
1009 that all the bytes are correct. */ \
1010 for (i = 1; inptr + i < inend; ++i) \
1011 if ((inptr[i] & 0xc0) != 0x80) \
1012 break; \
1013 \
1014 if (__builtin_expect (inptr + i == inend, 1)) \
1015 { \
1016 result = __GCONV_INCOMPLETE_INPUT; \
1017 break; \
1018 } \
1019 \
1020 goto errout; \
1021 } \
1022 \
1023 /* Read the possible remaining bytes. */ \
1024 for (i = 1; i < cnt; ++i) \
1025 { \
1026 uint32_t byte = inptr[i]; \
1027 \
1028 if ((byte & 0xc0) != 0x80) \
1029 /* This is an illegal encoding. */ \
1030 break; \
1031 \
1032 ch <<= 6; \
1033 ch |= byte & 0x3f; \
1034 } \
1035 \
1036 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1037 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1038 have been represented with fewer than cnt bytes. */ \
1039 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
1040 /* Do not accept UTF-16 surrogates. */ \
1041 || (ch >= 0xd800 && ch <= 0xdfff)) \
1042 { \
1043 /* This is an illegal encoding. */ \
1044 goto errout; \
1045 } \
1046 \
1047 inptr += cnt; \
1048 } \
1049 \
1050 /* Now adjust the pointers and store the result. */ \
1051 *((uint32_t *) outptr) = ch; \
1052 outptr += sizeof (uint32_t); \
1053 }
1054 #define LOOP_NEED_FLAGS
1055
1056 #define STORE_REST \
1057 { \
1058 /* We store the remaining bytes while converting them into the UCS4 \
1059 format. We can assume that the first byte in the buffer is \
1060 correct and that it requires a larger number of bytes than there \
1061 are in the input buffer. */ \
1062 wint_t ch = **inptrp; \
1063 size_t cnt, r; \
1064 \
1065 state->__count = inend - *inptrp; \
1066 \
1067 assert (ch != 0xc0 && ch != 0xc1); \
1068 if (ch >= 0xc2 && ch < 0xe0) \
1069 { \
1070 /* We expect two bytes. The first byte cannot be 0xc0 or \
1071 0xc1, otherwise the wide character could have been \
1072 represented using a single byte. */ \
1073 cnt = 2; \
1074 ch &= 0x1f; \
1075 } \
1076 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
1077 { \
1078 /* We expect three bytes. */ \
1079 cnt = 3; \
1080 ch &= 0x0f; \
1081 } \
1082 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
1083 { \
1084 /* We expect four bytes. */ \
1085 cnt = 4; \
1086 ch &= 0x07; \
1087 } \
1088 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
1089 { \
1090 /* We expect five bytes. */ \
1091 cnt = 5; \
1092 ch &= 0x03; \
1093 } \
1094 else \
1095 { \
1096 /* We expect six bytes. */ \
1097 cnt = 6; \
1098 ch &= 0x01; \
1099 } \
1100 \
1101 /* The first byte is already consumed. */ \
1102 r = cnt - 1; \
1103 while (++(*inptrp) < inend) \
1104 { \
1105 ch <<= 6; \
1106 ch |= **inptrp & 0x3f; \
1107 --r; \
1108 } \
1109 \
1110 /* Shift for the so far missing bytes. */ \
1111 ch <<= r * 6; \
1112 \
1113 /* Store the number of bytes expected for the entire sequence. */ \
1114 state->__count |= cnt << 8; \
1115 \
1116 /* Store the value. */ \
1117 state->__value.__wch = ch; \
1118 }
1119
1120 #define UNPACK_BYTES \
1121 { \
1122 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
1123 wint_t wch = state->__value.__wch; \
1124 size_t ntotal = state->__count >> 8; \
1125 \
1126 inlen = state->__count & 255; \
1127 \
1128 bytebuf[0] = inmask[ntotal - 2]; \
1129 \
1130 do \
1131 { \
1132 if (--ntotal < inlen) \
1133 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1134 wch >>= 6; \
1135 } \
1136 while (ntotal > 1); \
1137 \
1138 bytebuf[0] |= wch; \
1139 }
1140
1141 #define CLEAR_STATE \
1142 state->__count = 0
1143
1144
1145 #include <iconv/loop.c>
1146 #include <iconv/skeleton.c>
1147
1148
1149 /* Convert from UCS2 to the internal (UCS4-like) format. */
1150 #define DEFINE_INIT 0
1151 #define DEFINE_FINI 0
1152 #define MIN_NEEDED_FROM 2
1153 #define MIN_NEEDED_TO 4
1154 #define FROM_DIRECTION 1
1155 #define FROM_LOOP ucs2_internal_loop
1156 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1157 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1158 #define ONE_DIRECTION 1
1159
1160 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1161 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1162 #define LOOPFCT FROM_LOOP
1163 #define BODY \
1164 { \
1165 uint16_t u1 = get16 (inptr); \
1166 \
1167 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1168 { \
1169 /* Surrogate characters in UCS-2 input are not valid. Reject \
1170 them. (Catching this here is not security relevant.) */ \
1171 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1172 } \
1173 \
1174 *((uint32_t *) outptr) = u1; \
1175 outptr += sizeof (uint32_t); \
1176 inptr += 2; \
1177 }
1178 #define LOOP_NEED_FLAGS
1179 #include <iconv/loop.c>
1180 #include <iconv/skeleton.c>
1181
1182
1183 /* Convert from the internal (UCS4-like) format to UCS2. */
1184 #define DEFINE_INIT 0
1185 #define DEFINE_FINI 0
1186 #define MIN_NEEDED_FROM 4
1187 #define MIN_NEEDED_TO 2
1188 #define FROM_DIRECTION 1
1189 #define FROM_LOOP internal_ucs2_loop
1190 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1191 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1192 #define ONE_DIRECTION 1
1193
1194 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1195 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1196 #define LOOPFCT FROM_LOOP
1197 #define BODY \
1198 { \
1199 uint32_t val = *((const uint32_t *) inptr); \
1200 \
1201 if (__builtin_expect (val >= 0x10000, 0)) \
1202 { \
1203 UNICODE_TAG_HANDLER (val, 4); \
1204 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1205 } \
1206 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1207 { \
1208 /* Surrogate characters in UCS-4 input are not valid. \
1209 We must catch this, because the UCS-2 output might be \
1210 interpreted as UTF-16 by other programs. If we let \
1211 surrogates pass through, attackers could make a security \
1212 hole exploit by synthesizing any desired plane 1-16 \
1213 character. */ \
1214 result = __GCONV_ILLEGAL_INPUT; \
1215 if (! ignore_errors_p ()) \
1216 break; \
1217 inptr += 4; \
1218 ++*irreversible; \
1219 continue; \
1220 } \
1221 else \
1222 { \
1223 put16 (outptr, val); \
1224 outptr += sizeof (uint16_t); \
1225 inptr += 4; \
1226 } \
1227 }
1228 #define LOOP_NEED_FLAGS
1229 #include <iconv/loop.c>
1230 #include <iconv/skeleton.c>
1231
1232
1233 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1234 #define DEFINE_INIT 0
1235 #define DEFINE_FINI 0
1236 #define MIN_NEEDED_FROM 2
1237 #define MIN_NEEDED_TO 4
1238 #define FROM_DIRECTION 1
1239 #define FROM_LOOP ucs2reverse_internal_loop
1240 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1241 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1242 #define ONE_DIRECTION 1
1243
1244 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1245 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1246 #define LOOPFCT FROM_LOOP
1247 #define BODY \
1248 { \
1249 uint16_t u1 = bswap_16 (get16 (inptr)); \
1250 \
1251 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1252 { \
1253 /* Surrogate characters in UCS-2 input are not valid. Reject \
1254 them. (Catching this here is not security relevant.) */ \
1255 if (! ignore_errors_p ()) \
1256 { \
1257 result = __GCONV_ILLEGAL_INPUT; \
1258 break; \
1259 } \
1260 inptr += 2; \
1261 ++*irreversible; \
1262 continue; \
1263 } \
1264 \
1265 *((uint32_t *) outptr) = u1; \
1266 outptr += sizeof (uint32_t); \
1267 inptr += 2; \
1268 }
1269 #define LOOP_NEED_FLAGS
1270 #include <iconv/loop.c>
1271 #include <iconv/skeleton.c>
1272
1273
1274 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1275 #define DEFINE_INIT 0
1276 #define DEFINE_FINI 0
1277 #define MIN_NEEDED_FROM 4
1278 #define MIN_NEEDED_TO 2
1279 #define FROM_DIRECTION 1
1280 #define FROM_LOOP internal_ucs2reverse_loop
1281 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1282 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1283 #define ONE_DIRECTION 1
1284
1285 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1286 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1287 #define LOOPFCT FROM_LOOP
1288 #define BODY \
1289 { \
1290 uint32_t val = *((const uint32_t *) inptr); \
1291 if (__builtin_expect (val >= 0x10000, 0)) \
1292 { \
1293 UNICODE_TAG_HANDLER (val, 4); \
1294 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1295 } \
1296 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1297 { \
1298 /* Surrogate characters in UCS-4 input are not valid. \
1299 We must catch this, because the UCS-2 output might be \
1300 interpreted as UTF-16 by other programs. If we let \
1301 surrogates pass through, attackers could make a security \
1302 hole exploit by synthesizing any desired plane 1-16 \
1303 character. */ \
1304 if (! ignore_errors_p ()) \
1305 { \
1306 result = __GCONV_ILLEGAL_INPUT; \
1307 break; \
1308 } \
1309 inptr += 4; \
1310 ++*irreversible; \
1311 continue; \
1312 } \
1313 else \
1314 { \
1315 put16 (outptr, bswap_16 (val)); \
1316 outptr += sizeof (uint16_t); \
1317 inptr += 4; \
1318 } \
1319 }
1320 #define LOOP_NEED_FLAGS
1321 #include <iconv/loop.c>
1322 #include <iconv/skeleton.c>