]>
git.ipfire.org Git - thirdparty/glibc.git/blob - iconv/gconv_simple.c
1 /* Simple transformations functions.
2 Copyright (C) 1997-2003, 2004 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 #include <sys/param.h>
31 #include <gconv_int.h>
33 #define BUILTIN_ALIAS(s1, s2) /* nothing */
34 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
35 MinF, MaxF, MinT, MaxT) \
36 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
37 __const unsigned char **, __const unsigned char *, \
38 unsigned char **, size_t *, int, int);
39 #include "gconv_builtin.h"
43 # define EILSEQ EINVAL
47 /* Specialized conversion function for a single byte to INTERNAL, recognizing
48 only ASCII characters. */
50 __gconv_btwoc_ascii (struct __gconv_step
*step
, unsigned char c
)
59 /* Transform from the internal, UCS4-like format, to UCS4. The
60 difference between the internal ucs4 format and the real UCS4
61 format is, if any, the endianess. The Unicode/ISO 10646 says that
62 unless some higher protocol specifies it differently, the byte
63 order is big endian.*/
66 #define MIN_NEEDED_FROM 4
67 #define MIN_NEEDED_TO 4
68 #define FROM_DIRECTION 1
69 #define FROM_LOOP internal_ucs4_loop
70 #define TO_LOOP internal_ucs4_loop /* This is not used. */
71 #define FUNCTION_NAME __gconv_transform_internal_ucs4
75 __attribute ((always_inline
))
76 internal_ucs4_loop (struct __gconv_step
*step
,
77 struct __gconv_step_data
*step_data
,
78 const unsigned char **inptrp
, const unsigned char *inend
,
79 unsigned char **outptrp
, unsigned char *outend
,
82 const unsigned char *inptr
= *inptrp
;
83 unsigned char *outptr
= *outptrp
;
84 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
87 #if __BYTE_ORDER == __LITTLE_ENDIAN
88 /* Sigh, we have to do some real work. */
90 uint32_t *outptr32
= (uint32_t *) outptr
;
92 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
93 *outptr32
++ = bswap_32 (*(const uint32_t *) inptr
);
94 outptr
= (unsigned char *) outptr32
;
98 #elif __BYTE_ORDER == __BIG_ENDIAN
99 /* Simply copy the data. */
100 *inptrp
= inptr
+ n_convert
* 4;
101 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
103 # error "This endianess is not supported."
106 /* Determine the status. */
107 if (*inptrp
== inend
)
108 result
= __GCONV_EMPTY_INPUT
;
109 else if (*outptrp
+ 4 > outend
)
110 result
= __GCONV_FULL_OUTPUT
;
112 result
= __GCONV_INCOMPLETE_INPUT
;
117 #ifndef _STRING_ARCH_unaligned
119 __attribute ((always_inline
))
120 internal_ucs4_loop_unaligned (struct __gconv_step
*step
,
121 struct __gconv_step_data
*step_data
,
122 const unsigned char **inptrp
,
123 const unsigned char *inend
,
124 unsigned char **outptrp
, unsigned char *outend
,
125 size_t *irreversible
)
127 const unsigned char *inptr
= *inptrp
;
128 unsigned char *outptr
= *outptrp
;
129 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
132 # if __BYTE_ORDER == __LITTLE_ENDIAN
133 /* Sigh, we have to do some real work. */
136 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
138 outptr
[0] = inptr
[3];
139 outptr
[1] = inptr
[2];
140 outptr
[2] = inptr
[1];
141 outptr
[3] = inptr
[0];
146 # elif __BYTE_ORDER == __BIG_ENDIAN
147 /* Simply copy the data. */
148 *inptrp
= inptr
+ n_convert
* 4;
149 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
151 # error "This endianess is not supported."
154 /* Determine the status. */
155 if (*inptrp
== inend
)
156 result
= __GCONV_EMPTY_INPUT
;
157 else if (*outptrp
+ 4 > outend
)
158 result
= __GCONV_FULL_OUTPUT
;
160 result
= __GCONV_INCOMPLETE_INPUT
;
168 __attribute ((always_inline
))
169 internal_ucs4_loop_single (struct __gconv_step
*step
,
170 struct __gconv_step_data
*step_data
,
171 const unsigned char **inptrp
,
172 const unsigned char *inend
,
173 unsigned char **outptrp
, unsigned char *outend
,
174 size_t *irreversible
)
176 mbstate_t *state
= step_data
->__statep
;
177 size_t cnt
= state
->__count
& 7;
179 while (*inptrp
< inend
&& cnt
< 4)
180 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
182 if (__builtin_expect (cnt
< 4, 0))
184 /* Still not enough bytes. Store the ones in the input buffer. */
185 state
->__count
&= ~7;
186 state
->__count
|= cnt
;
188 return __GCONV_INCOMPLETE_INPUT
;
191 #if __BYTE_ORDER == __LITTLE_ENDIAN
192 (*outptrp
)[0] = state
->__value
.__wchb
[3];
193 (*outptrp
)[1] = state
->__value
.__wchb
[2];
194 (*outptrp
)[2] = state
->__value
.__wchb
[1];
195 (*outptrp
)[3] = state
->__value
.__wchb
[0];
197 #elif __BYTE_ORDER == __BIG_ENDIAN
199 (*outptrp
)[0] = state
->__value
.__wchb
[0];
200 (*outptrp
)[1] = state
->__value
.__wchb
[1];
201 (*outptrp
)[2] = state
->__value
.__wchb
[2];
202 (*outptrp
)[3] = state
->__value
.__wchb
[3];
204 # error "This endianess is not supported."
208 /* Clear the state buffer. */
209 state
->__count
&= ~7;
214 #include <iconv/skeleton.c>
217 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
218 for the other direction we have to check for correct values here. */
219 #define DEFINE_INIT 0
220 #define DEFINE_FINI 0
221 #define MIN_NEEDED_FROM 4
222 #define MIN_NEEDED_TO 4
223 #define FROM_DIRECTION 1
224 #define FROM_LOOP ucs4_internal_loop
225 #define TO_LOOP ucs4_internal_loop /* This is not used. */
226 #define FUNCTION_NAME __gconv_transform_ucs4_internal
230 __attribute ((always_inline
))
231 ucs4_internal_loop (struct __gconv_step
*step
,
232 struct __gconv_step_data
*step_data
,
233 const unsigned char **inptrp
, const unsigned char *inend
,
234 unsigned char **outptrp
, unsigned char *outend
,
235 size_t *irreversible
)
237 int flags
= step_data
->__flags
;
238 const unsigned char *inptr
= *inptrp
;
239 unsigned char *outptr
= *outptrp
;
240 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
244 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
248 #if __BYTE_ORDER == __LITTLE_ENDIAN
249 inval
= bswap_32 (*(const uint32_t *) inptr
);
251 inval
= *(const uint32_t *) inptr
;
254 if (__builtin_expect (inval
> 0x7fffffff, 0))
256 /* The value is too large. We don't try transliteration here since
257 this is not an error because of the lack of possibilities to
258 represent the result. This is a genuine bug in the input since
259 UCS4 does not allow such values. */
260 if (irreversible
== NULL
)
261 /* We are transliterating, don't try to correct anything. */
262 return __GCONV_ILLEGAL_INPUT
;
264 if (flags
& __GCONV_IGNORE_ERRORS
)
266 /* Just ignore this character. */
273 return __GCONV_ILLEGAL_INPUT
;
276 *((uint32_t *) outptr
) = inval
;
277 outptr
+= sizeof (uint32_t);
283 /* Determine the status. */
284 if (*inptrp
== inend
)
285 result
= __GCONV_EMPTY_INPUT
;
286 else if (*outptrp
+ 4 > outend
)
287 result
= __GCONV_FULL_OUTPUT
;
289 result
= __GCONV_INCOMPLETE_INPUT
;
294 #ifndef _STRING_ARCH_unaligned
296 __attribute ((always_inline
))
297 ucs4_internal_loop_unaligned (struct __gconv_step
*step
,
298 struct __gconv_step_data
*step_data
,
299 const unsigned char **inptrp
,
300 const unsigned char *inend
,
301 unsigned char **outptrp
, unsigned char *outend
,
302 size_t *irreversible
)
304 int flags
= step_data
->__flags
;
305 const unsigned char *inptr
= *inptrp
;
306 unsigned char *outptr
= *outptrp
;
307 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
311 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
313 if (__builtin_expect (inptr
[0] > 0x80, 0))
315 /* The value is too large. We don't try transliteration here since
316 this is not an error because of the lack of possibilities to
317 represent the result. This is a genuine bug in the input since
318 UCS4 does not allow such values. */
319 if (irreversible
== NULL
)
320 /* We are transliterating, don't try to correct anything. */
321 return __GCONV_ILLEGAL_INPUT
;
323 if (flags
& __GCONV_IGNORE_ERRORS
)
325 /* Just ignore this character. */
332 return __GCONV_ILLEGAL_INPUT
;
335 # if __BYTE_ORDER == __LITTLE_ENDIAN
336 outptr
[3] = inptr
[0];
337 outptr
[2] = inptr
[1];
338 outptr
[1] = inptr
[2];
339 outptr
[0] = inptr
[3];
341 outptr
[0] = inptr
[0];
342 outptr
[1] = inptr
[1];
343 outptr
[2] = inptr
[2];
344 outptr
[3] = inptr
[3];
352 /* Determine the status. */
353 if (*inptrp
== inend
)
354 result
= __GCONV_EMPTY_INPUT
;
355 else if (*outptrp
+ 4 > outend
)
356 result
= __GCONV_FULL_OUTPUT
;
358 result
= __GCONV_INCOMPLETE_INPUT
;
366 __attribute ((always_inline
))
367 ucs4_internal_loop_single (struct __gconv_step
*step
,
368 struct __gconv_step_data
*step_data
,
369 const unsigned char **inptrp
,
370 const unsigned char *inend
,
371 unsigned char **outptrp
, unsigned char *outend
,
372 size_t *irreversible
)
374 mbstate_t *state
= step_data
->__statep
;
375 int flags
= step_data
->__flags
;
376 size_t cnt
= state
->__count
& 7;
378 while (*inptrp
< inend
&& cnt
< 4)
379 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
381 if (__builtin_expect (cnt
< 4, 0))
383 /* Still not enough bytes. Store the ones in the input buffer. */
384 state
->__count
&= ~7;
385 state
->__count
|= cnt
;
387 return __GCONV_INCOMPLETE_INPUT
;
390 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[0] > 0x80,
393 /* The value is too large. We don't try transliteration here since
394 this is not an error because of the lack of possibilities to
395 represent the result. This is a genuine bug in the input since
396 UCS4 does not allow such values. */
397 if (!(flags
& __GCONV_IGNORE_ERRORS
))
399 *inptrp
-= cnt
- (state
->__count
& 7);
400 return __GCONV_ILLEGAL_INPUT
;
405 #if __BYTE_ORDER == __LITTLE_ENDIAN
406 (*outptrp
)[0] = state
->__value
.__wchb
[3];
407 (*outptrp
)[1] = state
->__value
.__wchb
[2];
408 (*outptrp
)[2] = state
->__value
.__wchb
[1];
409 (*outptrp
)[3] = state
->__value
.__wchb
[0];
410 #elif __BYTE_ORDER == __BIG_ENDIAN
411 (*outptrp
)[0] = state
->__value
.__wchb
[0];
412 (*outptrp
)[1] = state
->__value
.__wchb
[1];
413 (*outptrp
)[2] = state
->__value
.__wchb
[2];
414 (*outptrp
)[3] = state
->__value
.__wchb
[3];
420 /* Clear the state buffer. */
421 state
->__count
&= ~7;
426 #include <iconv/skeleton.c>
429 /* Similarly for the little endian form. */
430 #define DEFINE_INIT 0
431 #define DEFINE_FINI 0
432 #define MIN_NEEDED_FROM 4
433 #define MIN_NEEDED_TO 4
434 #define FROM_DIRECTION 1
435 #define FROM_LOOP internal_ucs4le_loop
436 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
437 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
441 __attribute ((always_inline
))
442 internal_ucs4le_loop (struct __gconv_step
*step
,
443 struct __gconv_step_data
*step_data
,
444 const unsigned char **inptrp
, const unsigned char *inend
,
445 unsigned char **outptrp
, unsigned char *outend
,
446 size_t *irreversible
)
448 const unsigned char *inptr
= *inptrp
;
449 unsigned char *outptr
= *outptrp
;
450 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
453 #if __BYTE_ORDER == __BIG_ENDIAN
454 /* Sigh, we have to do some real work. */
457 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
458 *((uint32_t *) outptr
)++ = bswap_32 (*(const uint32_t *) inptr
);
462 #elif __BYTE_ORDER == __LITTLE_ENDIAN
463 /* Simply copy the data. */
464 *inptrp
= inptr
+ n_convert
* 4;
465 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
467 # error "This endianess is not supported."
470 /* Determine the status. */
471 if (*inptrp
== inend
)
472 result
= __GCONV_EMPTY_INPUT
;
473 else if (*outptrp
+ 4 > outend
)
474 result
= __GCONV_FULL_OUTPUT
;
476 result
= __GCONV_INCOMPLETE_INPUT
;
481 #ifndef _STRING_ARCH_unaligned
483 __attribute ((always_inline
))
484 internal_ucs4le_loop_unaligned (struct __gconv_step
*step
,
485 struct __gconv_step_data
*step_data
,
486 const unsigned char **inptrp
,
487 const unsigned char *inend
,
488 unsigned char **outptrp
, unsigned char *outend
,
489 size_t *irreversible
)
491 const unsigned char *inptr
= *inptrp
;
492 unsigned char *outptr
= *outptrp
;
493 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
496 # if __BYTE_ORDER == __BIG_ENDIAN
497 /* Sigh, we have to do some real work. */
500 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4, outptr
+= 4)
502 outptr
[0] = inptr
[3];
503 outptr
[1] = inptr
[2];
504 outptr
[2] = inptr
[1];
505 outptr
[3] = inptr
[0];
510 # elif __BYTE_ORDER == __LITTLE_ENDIAN
511 /* Simply copy the data. */
512 *inptrp
= inptr
+ n_convert
* 4;
513 *outptrp
= __mempcpy (outptr
, inptr
, n_convert
* 4);
515 # error "This endianess is not supported."
518 /* Determine the status. */
519 if (*inptrp
== inend
)
520 result
= __GCONV_EMPTY_INPUT
;
521 else if (*inptrp
+ 4 > inend
)
522 result
= __GCONV_INCOMPLETE_INPUT
;
525 assert (*outptrp
+ 4 > outend
);
526 result
= __GCONV_FULL_OUTPUT
;
535 __attribute ((always_inline
))
536 internal_ucs4le_loop_single (struct __gconv_step
*step
,
537 struct __gconv_step_data
*step_data
,
538 const unsigned char **inptrp
,
539 const unsigned char *inend
,
540 unsigned char **outptrp
, unsigned char *outend
,
541 size_t *irreversible
)
543 mbstate_t *state
= step_data
->__statep
;
544 size_t cnt
= state
->__count
& 7;
546 while (*inptrp
< inend
&& cnt
< 4)
547 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
549 if (__builtin_expect (cnt
< 4, 0))
551 /* Still not enough bytes. Store the ones in the input buffer. */
552 state
->__count
&= ~7;
553 state
->__count
|= cnt
;
555 return __GCONV_INCOMPLETE_INPUT
;
558 #if __BYTE_ORDER == __BIG_ENDIAN
559 (*outptrp
)[0] = state
->__value
.__wchb
[3];
560 (*outptrp
)[1] = state
->__value
.__wchb
[2];
561 (*outptrp
)[2] = state
->__value
.__wchb
[1];
562 (*outptrp
)[3] = state
->__value
.__wchb
[0];
567 (*outptrp
)[0] = state
->__value
.__wchb
[0];
568 (*outptrp
)[1] = state
->__value
.__wchb
[1];
569 (*outptrp
)[2] = state
->__value
.__wchb
[2];
570 (*outptrp
)[3] = state
->__value
.__wchb
[3];
575 /* Clear the state buffer. */
576 state
->__count
&= ~7;
581 #include <iconv/skeleton.c>
584 /* And finally from UCS4-LE to the internal encoding. */
585 #define DEFINE_INIT 0
586 #define DEFINE_FINI 0
587 #define MIN_NEEDED_FROM 4
588 #define MIN_NEEDED_TO 4
589 #define FROM_DIRECTION 1
590 #define FROM_LOOP ucs4le_internal_loop
591 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
592 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
596 __attribute ((always_inline
))
597 ucs4le_internal_loop (struct __gconv_step
*step
,
598 struct __gconv_step_data
*step_data
,
599 const unsigned char **inptrp
, const unsigned char *inend
,
600 unsigned char **outptrp
, unsigned char *outend
,
601 size_t *irreversible
)
603 int flags
= step_data
->__flags
;
604 const unsigned char *inptr
= *inptrp
;
605 unsigned char *outptr
= *outptrp
;
606 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
610 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
614 #if __BYTE_ORDER == __BIG_ENDIAN
615 inval
= bswap_32 (*(const uint32_t *) inptr
);
617 inval
= *(const uint32_t *) inptr
;
620 if (__builtin_expect (inval
> 0x7fffffff, 0))
622 /* The value is too large. We don't try transliteration here since
623 this is not an error because of the lack of possibilities to
624 represent the result. This is a genuine bug in the input since
625 UCS4 does not allow such values. */
626 if (irreversible
== NULL
)
627 /* We are transliterating, don't try to correct anything. */
628 return __GCONV_ILLEGAL_INPUT
;
630 if (flags
& __GCONV_IGNORE_ERRORS
)
632 /* Just ignore this character. */
637 return __GCONV_ILLEGAL_INPUT
;
640 *((uint32_t *) outptr
) = inval
;
641 outptr
+= sizeof (uint32_t);
647 /* Determine the status. */
648 if (*inptrp
== inend
)
649 result
= __GCONV_EMPTY_INPUT
;
650 else if (*inptrp
+ 4 > inend
)
651 result
= __GCONV_INCOMPLETE_INPUT
;
654 assert (*outptrp
+ 4 > outend
);
655 result
= __GCONV_FULL_OUTPUT
;
661 #ifndef _STRING_ARCH_unaligned
663 __attribute ((always_inline
))
664 ucs4le_internal_loop_unaligned (struct __gconv_step
*step
,
665 struct __gconv_step_data
*step_data
,
666 const unsigned char **inptrp
,
667 const unsigned char *inend
,
668 unsigned char **outptrp
, unsigned char *outend
,
669 size_t *irreversible
)
671 int flags
= step_data
->__flags
;
672 const unsigned char *inptr
= *inptrp
;
673 unsigned char *outptr
= *outptrp
;
674 size_t n_convert
= MIN (inend
- inptr
, outend
- outptr
) / 4;
678 for (cnt
= 0; cnt
< n_convert
; ++cnt
, inptr
+= 4)
680 if (__builtin_expect (inptr
[3] > 0x80, 0))
682 /* The value is too large. We don't try transliteration here since
683 this is not an error because of the lack of possibilities to
684 represent the result. This is a genuine bug in the input since
685 UCS4 does not allow such values. */
686 if (irreversible
== NULL
)
687 /* We are transliterating, don't try to correct anything. */
688 return __GCONV_ILLEGAL_INPUT
;
690 if (flags
& __GCONV_IGNORE_ERRORS
)
692 /* Just ignore this character. */
699 return __GCONV_ILLEGAL_INPUT
;
702 # if __BYTE_ORDER == __BIG_ENDIAN
703 outptr
[3] = inptr
[0];
704 outptr
[2] = inptr
[1];
705 outptr
[1] = inptr
[2];
706 outptr
[0] = inptr
[3];
708 outptr
[0] = inptr
[0];
709 outptr
[1] = inptr
[1];
710 outptr
[2] = inptr
[2];
711 outptr
[3] = inptr
[3];
720 /* Determine the status. */
721 if (*inptrp
== inend
)
722 result
= __GCONV_EMPTY_INPUT
;
723 else if (*inptrp
+ 4 > inend
)
724 result
= __GCONV_INCOMPLETE_INPUT
;
727 assert (*outptrp
+ 4 > outend
);
728 result
= __GCONV_FULL_OUTPUT
;
737 __attribute ((always_inline
))
738 ucs4le_internal_loop_single (struct __gconv_step
*step
,
739 struct __gconv_step_data
*step_data
,
740 const unsigned char **inptrp
,
741 const unsigned char *inend
,
742 unsigned char **outptrp
, unsigned char *outend
,
743 size_t *irreversible
)
745 mbstate_t *state
= step_data
->__statep
;
746 int flags
= step_data
->__flags
;
747 size_t cnt
= state
->__count
& 7;
749 while (*inptrp
< inend
&& cnt
< 4)
750 state
->__value
.__wchb
[cnt
++] = *(*inptrp
)++;
752 if (__builtin_expect (cnt
< 4, 0))
754 /* Still not enough bytes. Store the ones in the input buffer. */
755 state
->__count
&= ~7;
756 state
->__count
|= cnt
;
758 return __GCONV_INCOMPLETE_INPUT
;
761 if (__builtin_expect (((unsigned char *) state
->__value
.__wchb
)[3] > 0x80,
764 /* The value is too large. We don't try transliteration here since
765 this is not an error because of the lack of possibilities to
766 represent the result. This is a genuine bug in the input since
767 UCS4 does not allow such values. */
768 if (!(flags
& __GCONV_IGNORE_ERRORS
))
769 return __GCONV_ILLEGAL_INPUT
;
773 #if __BYTE_ORDER == __BIG_ENDIAN
774 (*outptrp
)[0] = state
->__value
.__wchb
[3];
775 (*outptrp
)[1] = state
->__value
.__wchb
[2];
776 (*outptrp
)[2] = state
->__value
.__wchb
[1];
777 (*outptrp
)[3] = state
->__value
.__wchb
[0];
779 (*outptrp
)[0] = state
->__value
.__wchb
[0];
780 (*outptrp
)[1] = state
->__value
.__wchb
[1];
781 (*outptrp
)[2] = state
->__value
.__wchb
[2];
782 (*outptrp
)[3] = state
->__value
.__wchb
[3];
788 /* Clear the state buffer. */
789 state
->__count
&= ~7;
794 #include <iconv/skeleton.c>
797 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
798 #define DEFINE_INIT 0
799 #define DEFINE_FINI 0
800 #define MIN_NEEDED_FROM 1
801 #define MIN_NEEDED_TO 4
802 #define FROM_DIRECTION 1
803 #define FROM_LOOP ascii_internal_loop
804 #define TO_LOOP ascii_internal_loop /* This is not used. */
805 #define FUNCTION_NAME __gconv_transform_ascii_internal
806 #define ONE_DIRECTION 1
808 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
809 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
810 #define LOOPFCT FROM_LOOP
813 if (__builtin_expect (*inptr > '\x7f', 0)) \
815 /* The value is too large. We don't try transliteration here since \
816 this is not an error because of the lack of possibilities to \
817 represent the result. This is a genuine bug in the input since \
818 ASCII does not allow such values. */ \
819 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
822 /* It's an one byte sequence. */ \
823 *((uint32_t *) outptr) = *inptr++; \
824 outptr += sizeof (uint32_t); \
826 #define LOOP_NEED_FLAGS
827 #include <iconv/loop.c>
828 #include <iconv/skeleton.c>
831 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
832 #define DEFINE_INIT 0
833 #define DEFINE_FINI 0
834 #define MIN_NEEDED_FROM 4
835 #define MIN_NEEDED_TO 1
836 #define FROM_DIRECTION 1
837 #define FROM_LOOP internal_ascii_loop
838 #define TO_LOOP internal_ascii_loop /* This is not used. */
839 #define FUNCTION_NAME __gconv_transform_internal_ascii
840 #define ONE_DIRECTION 1
842 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
843 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
844 #define LOOPFCT FROM_LOOP
847 if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
849 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
850 STANDARD_TO_LOOP_ERR_HANDLER (4); \
853 /* It's an one byte sequence. */ \
854 *outptr++ = *((const uint32_t *) inptr); \
855 inptr += sizeof (uint32_t); \
857 #define LOOP_NEED_FLAGS
858 #include <iconv/loop.c>
859 #include <iconv/skeleton.c>
862 /* Convert from the internal (UCS4-like) format to UTF-8. */
863 #define DEFINE_INIT 0
864 #define DEFINE_FINI 0
865 #define MIN_NEEDED_FROM 4
866 #define MIN_NEEDED_TO 1
867 #define MAX_NEEDED_TO 6
868 #define FROM_DIRECTION 1
869 #define FROM_LOOP internal_utf8_loop
870 #define TO_LOOP internal_utf8_loop /* This is not used. */
871 #define FUNCTION_NAME __gconv_transform_internal_utf8
872 #define ONE_DIRECTION 1
874 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
875 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
876 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
877 #define LOOPFCT FROM_LOOP
880 uint32_t wc = *((const uint32_t *) inptr); \
883 /* It's an one byte sequence. */ \
884 *outptr++ = (unsigned char) wc; \
885 else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
890 for (step = 2; step < 6; ++step) \
891 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
894 if (__builtin_expect (outptr + step > outend, 0)) \
897 result = __GCONV_FULL_OUTPUT; \
902 *outptr = (unsigned char) (~0xff >> step); \
907 start[step] = 0x80 | (wc & 0x3f); \
910 while (--step > 0); \
915 STANDARD_TO_LOOP_ERR_HANDLER (4); \
920 #define LOOP_NEED_FLAGS
921 #include <iconv/loop.c>
922 #include <iconv/skeleton.c>
925 /* Convert from UTF-8 to the internal (UCS4-like) format. */
926 #define DEFINE_INIT 0
927 #define DEFINE_FINI 0
928 #define MIN_NEEDED_FROM 1
929 #define MAX_NEEDED_FROM 6
930 #define MIN_NEEDED_TO 4
931 #define FROM_DIRECTION 1
932 #define FROM_LOOP utf8_internal_loop
933 #define TO_LOOP utf8_internal_loop /* This is not used. */
934 #define FUNCTION_NAME __gconv_transform_utf8_internal
935 #define ONE_DIRECTION 1
937 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
938 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
939 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
940 #define LOOPFCT FROM_LOOP
947 /* Next input byte. */ \
952 /* One byte sequence. */ \
958 if (ch >= 0xc2 && ch < 0xe0) \
960 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
961 otherwise the wide character could have been represented \
962 using a single byte. */ \
966 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
968 /* We expect three bytes. */ \
972 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
974 /* We expect four bytes. */ \
978 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
980 /* We expect five bytes. */ \
984 else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
986 /* We expect six bytes. */ \
994 /* Search the end of this ill-formed UTF-8 character. This \
995 is the next byte with (x & 0xc0) != 0x80. */ \
999 while (inptr + skipped < inend \
1000 && (*(inptr + skipped) & 0xc0) == 0x80 \
1003 STANDARD_FROM_LOOP_ERR_HANDLER (skipped); \
1006 if (__builtin_expect (inptr + cnt > inend, 0)) \
1008 /* We don't have enough input. But before we report that check \
1009 that all the bytes are correct. */ \
1010 for (i = 1; inptr + i < inend; ++i) \
1011 if ((inptr[i] & 0xc0) != 0x80) \
1014 if (__builtin_expect (inptr + i == inend, 1)) \
1016 result = __GCONV_INCOMPLETE_INPUT; \
1020 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1023 /* Read the possible remaining bytes. */ \
1024 for (i = 1; i < cnt; ++i) \
1026 uint32_t byte = inptr[i]; \
1028 if ((byte & 0xc0) != 0x80) \
1029 /* This is an illegal encoding. */ \
1033 ch |= byte & 0x3f; \
1036 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1037 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1038 have been represented with fewer than cnt bytes. */ \
1039 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1041 /* This is an illegal encoding. */ \
1042 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1048 /* Now adjust the pointers and store the result. */ \
1049 *((uint32_t *) outptr) = ch; \
1050 outptr += sizeof (uint32_t); \
1052 #define LOOP_NEED_FLAGS
1054 #define STORE_REST \
1056 /* We store the remaining bytes while converting them into the UCS4 \
1057 format. We can assume that the first byte in the buffer is \
1058 correct and that it requires a larger number of bytes than there \
1059 are in the input buffer. */ \
1060 wint_t ch = **inptrp; \
1063 state->__count = inend - *inptrp; \
1065 if (ch >= 0xc2 && ch < 0xe0) \
1067 /* We expect two bytes. The first byte cannot be 0xc0 or \
1068 0xc1, otherwise the wide character could have been \
1069 represented using a single byte. */ \
1073 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
1075 /* We expect three bytes. */ \
1079 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
1081 /* We expect four bytes. */ \
1085 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
1087 /* We expect five bytes. */ \
1093 /* We expect six bytes. */ \
1098 /* The first byte is already consumed. */ \
1100 while (++(*inptrp) < inend) \
1103 ch |= **inptrp & 0x3f; \
1107 /* Shift for the so far missing bytes. */ \
1110 /* Store the number of bytes expected for the entire sequence. */ \
1111 state->__count |= cnt << 8; \
1113 /* Store the value. */ \
1114 state->__value.__wch = ch; \
1117 #define UNPACK_BYTES \
1119 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
1120 wint_t wch = state->__value.__wch; \
1121 size_t ntotal = state->__count >> 8; \
1123 inlen = state->__count & 255; \
1125 bytebuf[0] = inmask[ntotal - 2]; \
1129 if (--ntotal < inlen) \
1130 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1133 while (ntotal > 1); \
1135 bytebuf[0] |= wch; \
1138 #define CLEAR_STATE \
1142 #include <iconv/loop.c>
1143 #include <iconv/skeleton.c>
1146 /* Convert from UCS2 to the internal (UCS4-like) format. */
1147 #define DEFINE_INIT 0
1148 #define DEFINE_FINI 0
1149 #define MIN_NEEDED_FROM 2
1150 #define MIN_NEEDED_TO 4
1151 #define FROM_DIRECTION 1
1152 #define FROM_LOOP ucs2_internal_loop
1153 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1154 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1155 #define ONE_DIRECTION 1
1157 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1158 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1159 #define LOOPFCT FROM_LOOP
1162 uint16_t u1 = *((const uint16_t *) inptr); \
1164 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1166 /* Surrogate characters in UCS-2 input are not valid. Reject \
1167 them. (Catching this here is not security relevant.) */ \
1168 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1171 *((uint32_t *) outptr) = u1; \
1172 outptr += sizeof (uint32_t); \
1175 #define LOOP_NEED_FLAGS
1176 #include <iconv/loop.c>
1177 #include <iconv/skeleton.c>
1180 /* Convert from the internal (UCS4-like) format to UCS2. */
1181 #define DEFINE_INIT 0
1182 #define DEFINE_FINI 0
1183 #define MIN_NEEDED_FROM 4
1184 #define MIN_NEEDED_TO 2
1185 #define FROM_DIRECTION 1
1186 #define FROM_LOOP internal_ucs2_loop
1187 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1188 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1189 #define ONE_DIRECTION 1
1191 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1192 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1193 #define LOOPFCT FROM_LOOP
1196 uint32_t val = *((const uint32_t *) inptr); \
1198 if (__builtin_expect (val >= 0x10000, 0)) \
1200 UNICODE_TAG_HANDLER (val, 4); \
1201 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1203 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1205 /* Surrogate characters in UCS-4 input are not valid. \
1206 We must catch this, because the UCS-2 output might be \
1207 interpreted as UTF-16 by other programs. If we let \
1208 surrogates pass through, attackers could make a security \
1209 hole exploit by synthesizing any desired plane 1-16 \
1211 result = __GCONV_ILLEGAL_INPUT; \
1212 if (! ignore_errors_p ()) \
1220 *((uint16_t *) outptr) = val; \
1221 outptr += sizeof (uint16_t); \
1225 #define LOOP_NEED_FLAGS
1226 #include <iconv/loop.c>
1227 #include <iconv/skeleton.c>
1230 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1231 #define DEFINE_INIT 0
1232 #define DEFINE_FINI 0
1233 #define MIN_NEEDED_FROM 2
1234 #define MIN_NEEDED_TO 4
1235 #define FROM_DIRECTION 1
1236 #define FROM_LOOP ucs2reverse_internal_loop
1237 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1238 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1239 #define ONE_DIRECTION 1
1241 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1242 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1243 #define LOOPFCT FROM_LOOP
1246 uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \
1248 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1250 /* Surrogate characters in UCS-2 input are not valid. Reject \
1251 them. (Catching this here is not security relevant.) */ \
1252 if (! ignore_errors_p ()) \
1254 result = __GCONV_ILLEGAL_INPUT; \
1262 *((uint32_t *) outptr) = u1; \
1263 outptr += sizeof (uint32_t); \
1266 #define LOOP_NEED_FLAGS
1267 #include <iconv/loop.c>
1268 #include <iconv/skeleton.c>
1271 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1272 #define DEFINE_INIT 0
1273 #define DEFINE_FINI 0
1274 #define MIN_NEEDED_FROM 4
1275 #define MIN_NEEDED_TO 2
1276 #define FROM_DIRECTION 1
1277 #define FROM_LOOP internal_ucs2reverse_loop
1278 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1279 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1280 #define ONE_DIRECTION 1
1282 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1283 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1284 #define LOOPFCT FROM_LOOP
1287 uint32_t val = *((const uint32_t *) inptr); \
1288 if (__builtin_expect (val >= 0x10000, 0)) \
1290 UNICODE_TAG_HANDLER (val, 4); \
1291 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1293 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1295 /* Surrogate characters in UCS-4 input are not valid. \
1296 We must catch this, because the UCS-2 output might be \
1297 interpreted as UTF-16 by other programs. If we let \
1298 surrogates pass through, attackers could make a security \
1299 hole exploit by synthesizing any desired plane 1-16 \
1301 if (! ignore_errors_p ()) \
1303 result = __GCONV_ILLEGAL_INPUT; \
1312 *((uint16_t *) outptr) = bswap_16 (val); \
1313 outptr += sizeof (uint16_t); \
1317 #define LOOP_NEED_FLAGS
1318 #include <iconv/loop.c>
1319 #include <iconv/skeleton.c>