]> git.ipfire.org Git - thirdparty/glibc.git/blob - iconv/gconv_simple.c
* iconv/gconv_simple.c (ucs4le_internal_loop): Remove cast used as
[thirdparty/glibc.git] / iconv / gconv_simple.c
1 /* Simple transformations functions.
2 Copyright (C) 1997-2003, 2004 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21 #include <byteswap.h>
22 #include <dlfcn.h>
23 #include <endian.h>
24 #include <errno.h>
25 #include <gconv.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <sys/param.h>
31 #include <gconv_int.h>
32
33 #define BUILTIN_ALIAS(s1, s2) /* nothing */
34 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
35 MinF, MaxF, MinT, MaxT) \
36 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
37 __const unsigned char **, __const unsigned char *, \
38 unsigned char **, size_t *, int, int);
39 #include "gconv_builtin.h"
40
41
42 #ifndef EILSEQ
43 # define EILSEQ EINVAL
44 #endif
45
46
47 /* Specialized conversion function for a single byte to INTERNAL, recognizing
48 only ASCII characters. */
49 wint_t
50 __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
51 {
52 if (c < 0x80)
53 return c;
54 else
55 return WEOF;
56 }
57
58
59 /* Transform from the internal, UCS4-like format, to UCS4. The
60 difference between the internal ucs4 format and the real UCS4
61 format is, if any, the endianess. The Unicode/ISO 10646 says that
62 unless some higher protocol specifies it differently, the byte
63 order is big endian.*/
64 #define DEFINE_INIT 0
65 #define DEFINE_FINI 0
66 #define MIN_NEEDED_FROM 4
67 #define MIN_NEEDED_TO 4
68 #define FROM_DIRECTION 1
69 #define FROM_LOOP internal_ucs4_loop
70 #define TO_LOOP internal_ucs4_loop /* This is not used. */
71 #define FUNCTION_NAME __gconv_transform_internal_ucs4
72
73
74 static inline int
75 __attribute ((always_inline))
76 internal_ucs4_loop (struct __gconv_step *step,
77 struct __gconv_step_data *step_data,
78 const unsigned char **inptrp, const unsigned char *inend,
79 unsigned char **outptrp, unsigned char *outend,
80 size_t *irreversible)
81 {
82 const unsigned char *inptr = *inptrp;
83 unsigned char *outptr = *outptrp;
84 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
85 int result;
86
87 #if __BYTE_ORDER == __LITTLE_ENDIAN
88 /* Sigh, we have to do some real work. */
89 size_t cnt;
90 uint32_t *outptr32 = (uint32_t *) outptr;
91
92 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
93 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
94 outptr = (unsigned char *) outptr32;
95
96 *inptrp = inptr;
97 *outptrp = outptr;
98 #elif __BYTE_ORDER == __BIG_ENDIAN
99 /* Simply copy the data. */
100 *inptrp = inptr + n_convert * 4;
101 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
102 #else
103 # error "This endianess is not supported."
104 #endif
105
106 /* Determine the status. */
107 if (*inptrp == inend)
108 result = __GCONV_EMPTY_INPUT;
109 else if (*outptrp + 4 > outend)
110 result = __GCONV_FULL_OUTPUT;
111 else
112 result = __GCONV_INCOMPLETE_INPUT;
113
114 return result;
115 }
116
117 #ifndef _STRING_ARCH_unaligned
118 static inline int
119 __attribute ((always_inline))
120 internal_ucs4_loop_unaligned (struct __gconv_step *step,
121 struct __gconv_step_data *step_data,
122 const unsigned char **inptrp,
123 const unsigned char *inend,
124 unsigned char **outptrp, unsigned char *outend,
125 size_t *irreversible)
126 {
127 const unsigned char *inptr = *inptrp;
128 unsigned char *outptr = *outptrp;
129 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
130 int result;
131
132 # if __BYTE_ORDER == __LITTLE_ENDIAN
133 /* Sigh, we have to do some real work. */
134 size_t cnt;
135
136 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
137 {
138 outptr[0] = inptr[3];
139 outptr[1] = inptr[2];
140 outptr[2] = inptr[1];
141 outptr[3] = inptr[0];
142 }
143
144 *inptrp = inptr;
145 *outptrp = outptr;
146 # elif __BYTE_ORDER == __BIG_ENDIAN
147 /* Simply copy the data. */
148 *inptrp = inptr + n_convert * 4;
149 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
150 # else
151 # error "This endianess is not supported."
152 # endif
153
154 /* Determine the status. */
155 if (*inptrp == inend)
156 result = __GCONV_EMPTY_INPUT;
157 else if (*outptrp + 4 > outend)
158 result = __GCONV_FULL_OUTPUT;
159 else
160 result = __GCONV_INCOMPLETE_INPUT;
161
162 return result;
163 }
164 #endif
165
166
167 static inline int
168 __attribute ((always_inline))
169 internal_ucs4_loop_single (struct __gconv_step *step,
170 struct __gconv_step_data *step_data,
171 const unsigned char **inptrp,
172 const unsigned char *inend,
173 unsigned char **outptrp, unsigned char *outend,
174 size_t *irreversible)
175 {
176 mbstate_t *state = step_data->__statep;
177 size_t cnt = state->__count & 7;
178
179 while (*inptrp < inend && cnt < 4)
180 state->__value.__wchb[cnt++] = *(*inptrp)++;
181
182 if (__builtin_expect (cnt < 4, 0))
183 {
184 /* Still not enough bytes. Store the ones in the input buffer. */
185 state->__count &= ~7;
186 state->__count |= cnt;
187
188 return __GCONV_INCOMPLETE_INPUT;
189 }
190
191 #if __BYTE_ORDER == __LITTLE_ENDIAN
192 (*outptrp)[0] = state->__value.__wchb[3];
193 (*outptrp)[1] = state->__value.__wchb[2];
194 (*outptrp)[2] = state->__value.__wchb[1];
195 (*outptrp)[3] = state->__value.__wchb[0];
196
197 #elif __BYTE_ORDER == __BIG_ENDIAN
198 /* XXX unaligned */
199 (*outptrp)[0] = state->__value.__wchb[0];
200 (*outptrp)[1] = state->__value.__wchb[1];
201 (*outptrp)[2] = state->__value.__wchb[2];
202 (*outptrp)[3] = state->__value.__wchb[3];
203 #else
204 # error "This endianess is not supported."
205 #endif
206 *outptrp += 4;
207
208 /* Clear the state buffer. */
209 state->__count &= ~7;
210
211 return __GCONV_OK;
212 }
213
214 #include <iconv/skeleton.c>
215
216
217 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
218 for the other direction we have to check for correct values here. */
219 #define DEFINE_INIT 0
220 #define DEFINE_FINI 0
221 #define MIN_NEEDED_FROM 4
222 #define MIN_NEEDED_TO 4
223 #define FROM_DIRECTION 1
224 #define FROM_LOOP ucs4_internal_loop
225 #define TO_LOOP ucs4_internal_loop /* This is not used. */
226 #define FUNCTION_NAME __gconv_transform_ucs4_internal
227
228
229 static inline int
230 __attribute ((always_inline))
231 ucs4_internal_loop (struct __gconv_step *step,
232 struct __gconv_step_data *step_data,
233 const unsigned char **inptrp, const unsigned char *inend,
234 unsigned char **outptrp, unsigned char *outend,
235 size_t *irreversible)
236 {
237 int flags = step_data->__flags;
238 const unsigned char *inptr = *inptrp;
239 unsigned char *outptr = *outptrp;
240 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
241 int result;
242 size_t cnt;
243
244 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
245 {
246 uint32_t inval;
247
248 #if __BYTE_ORDER == __LITTLE_ENDIAN
249 inval = bswap_32 (*(const uint32_t *) inptr);
250 #else
251 inval = *(const uint32_t *) inptr;
252 #endif
253
254 if (__builtin_expect (inval > 0x7fffffff, 0))
255 {
256 /* The value is too large. We don't try transliteration here since
257 this is not an error because of the lack of possibilities to
258 represent the result. This is a genuine bug in the input since
259 UCS4 does not allow such values. */
260 if (irreversible == NULL)
261 /* We are transliterating, don't try to correct anything. */
262 return __GCONV_ILLEGAL_INPUT;
263
264 if (flags & __GCONV_IGNORE_ERRORS)
265 {
266 /* Just ignore this character. */
267 ++*irreversible;
268 continue;
269 }
270
271 *inptrp = inptr;
272 *outptrp = outptr;
273 return __GCONV_ILLEGAL_INPUT;
274 }
275
276 *((uint32_t *) outptr) = inval;
277 outptr += sizeof (uint32_t);
278 }
279
280 *inptrp = inptr;
281 *outptrp = outptr;
282
283 /* Determine the status. */
284 if (*inptrp == inend)
285 result = __GCONV_EMPTY_INPUT;
286 else if (*outptrp + 4 > outend)
287 result = __GCONV_FULL_OUTPUT;
288 else
289 result = __GCONV_INCOMPLETE_INPUT;
290
291 return result;
292 }
293
294 #ifndef _STRING_ARCH_unaligned
295 static inline int
296 __attribute ((always_inline))
297 ucs4_internal_loop_unaligned (struct __gconv_step *step,
298 struct __gconv_step_data *step_data,
299 const unsigned char **inptrp,
300 const unsigned char *inend,
301 unsigned char **outptrp, unsigned char *outend,
302 size_t *irreversible)
303 {
304 int flags = step_data->__flags;
305 const unsigned char *inptr = *inptrp;
306 unsigned char *outptr = *outptrp;
307 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
308 int result;
309 size_t cnt;
310
311 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
312 {
313 if (__builtin_expect (inptr[0] > 0x80, 0))
314 {
315 /* The value is too large. We don't try transliteration here since
316 this is not an error because of the lack of possibilities to
317 represent the result. This is a genuine bug in the input since
318 UCS4 does not allow such values. */
319 if (irreversible == NULL)
320 /* We are transliterating, don't try to correct anything. */
321 return __GCONV_ILLEGAL_INPUT;
322
323 if (flags & __GCONV_IGNORE_ERRORS)
324 {
325 /* Just ignore this character. */
326 ++*irreversible;
327 continue;
328 }
329
330 *inptrp = inptr;
331 *outptrp = outptr;
332 return __GCONV_ILLEGAL_INPUT;
333 }
334
335 # if __BYTE_ORDER == __LITTLE_ENDIAN
336 outptr[3] = inptr[0];
337 outptr[2] = inptr[1];
338 outptr[1] = inptr[2];
339 outptr[0] = inptr[3];
340 # else
341 outptr[0] = inptr[0];
342 outptr[1] = inptr[1];
343 outptr[2] = inptr[2];
344 outptr[3] = inptr[3];
345 # endif
346 outptr += 4;
347 }
348
349 *inptrp = inptr;
350 *outptrp = outptr;
351
352 /* Determine the status. */
353 if (*inptrp == inend)
354 result = __GCONV_EMPTY_INPUT;
355 else if (*outptrp + 4 > outend)
356 result = __GCONV_FULL_OUTPUT;
357 else
358 result = __GCONV_INCOMPLETE_INPUT;
359
360 return result;
361 }
362 #endif
363
364
365 static inline int
366 __attribute ((always_inline))
367 ucs4_internal_loop_single (struct __gconv_step *step,
368 struct __gconv_step_data *step_data,
369 const unsigned char **inptrp,
370 const unsigned char *inend,
371 unsigned char **outptrp, unsigned char *outend,
372 size_t *irreversible)
373 {
374 mbstate_t *state = step_data->__statep;
375 int flags = step_data->__flags;
376 size_t cnt = state->__count & 7;
377
378 while (*inptrp < inend && cnt < 4)
379 state->__value.__wchb[cnt++] = *(*inptrp)++;
380
381 if (__builtin_expect (cnt < 4, 0))
382 {
383 /* Still not enough bytes. Store the ones in the input buffer. */
384 state->__count &= ~7;
385 state->__count |= cnt;
386
387 return __GCONV_INCOMPLETE_INPUT;
388 }
389
390 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
391 0))
392 {
393 /* The value is too large. We don't try transliteration here since
394 this is not an error because of the lack of possibilities to
395 represent the result. This is a genuine bug in the input since
396 UCS4 does not allow such values. */
397 if (!(flags & __GCONV_IGNORE_ERRORS))
398 {
399 *inptrp -= cnt - (state->__count & 7);
400 return __GCONV_ILLEGAL_INPUT;
401 }
402 }
403 else
404 {
405 #if __BYTE_ORDER == __LITTLE_ENDIAN
406 (*outptrp)[0] = state->__value.__wchb[3];
407 (*outptrp)[1] = state->__value.__wchb[2];
408 (*outptrp)[2] = state->__value.__wchb[1];
409 (*outptrp)[3] = state->__value.__wchb[0];
410 #elif __BYTE_ORDER == __BIG_ENDIAN
411 (*outptrp)[0] = state->__value.__wchb[0];
412 (*outptrp)[1] = state->__value.__wchb[1];
413 (*outptrp)[2] = state->__value.__wchb[2];
414 (*outptrp)[3] = state->__value.__wchb[3];
415 #endif
416
417 *outptrp += 4;
418 }
419
420 /* Clear the state buffer. */
421 state->__count &= ~7;
422
423 return __GCONV_OK;
424 }
425
426 #include <iconv/skeleton.c>
427
428
429 /* Similarly for the little endian form. */
430 #define DEFINE_INIT 0
431 #define DEFINE_FINI 0
432 #define MIN_NEEDED_FROM 4
433 #define MIN_NEEDED_TO 4
434 #define FROM_DIRECTION 1
435 #define FROM_LOOP internal_ucs4le_loop
436 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
437 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
438
439
440 static inline int
441 __attribute ((always_inline))
442 internal_ucs4le_loop (struct __gconv_step *step,
443 struct __gconv_step_data *step_data,
444 const unsigned char **inptrp, const unsigned char *inend,
445 unsigned char **outptrp, unsigned char *outend,
446 size_t *irreversible)
447 {
448 const unsigned char *inptr = *inptrp;
449 unsigned char *outptr = *outptrp;
450 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
451 int result;
452
453 #if __BYTE_ORDER == __BIG_ENDIAN
454 /* Sigh, we have to do some real work. */
455 size_t cnt;
456
457 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
458 *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr);
459
460 *inptrp = inptr;
461 *outptrp = outptr;
462 #elif __BYTE_ORDER == __LITTLE_ENDIAN
463 /* Simply copy the data. */
464 *inptrp = inptr + n_convert * 4;
465 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
466 #else
467 # error "This endianess is not supported."
468 #endif
469
470 /* Determine the status. */
471 if (*inptrp == inend)
472 result = __GCONV_EMPTY_INPUT;
473 else if (*outptrp + 4 > outend)
474 result = __GCONV_FULL_OUTPUT;
475 else
476 result = __GCONV_INCOMPLETE_INPUT;
477
478 return result;
479 }
480
481 #ifndef _STRING_ARCH_unaligned
482 static inline int
483 __attribute ((always_inline))
484 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
485 struct __gconv_step_data *step_data,
486 const unsigned char **inptrp,
487 const unsigned char *inend,
488 unsigned char **outptrp, unsigned char *outend,
489 size_t *irreversible)
490 {
491 const unsigned char *inptr = *inptrp;
492 unsigned char *outptr = *outptrp;
493 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
494 int result;
495
496 # if __BYTE_ORDER == __BIG_ENDIAN
497 /* Sigh, we have to do some real work. */
498 size_t cnt;
499
500 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
501 {
502 outptr[0] = inptr[3];
503 outptr[1] = inptr[2];
504 outptr[2] = inptr[1];
505 outptr[3] = inptr[0];
506 }
507
508 *inptrp = inptr;
509 *outptrp = outptr;
510 # elif __BYTE_ORDER == __LITTLE_ENDIAN
511 /* Simply copy the data. */
512 *inptrp = inptr + n_convert * 4;
513 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
514 # else
515 # error "This endianess is not supported."
516 # endif
517
518 /* Determine the status. */
519 if (*inptrp == inend)
520 result = __GCONV_EMPTY_INPUT;
521 else if (*inptrp + 4 > inend)
522 result = __GCONV_INCOMPLETE_INPUT;
523 else
524 {
525 assert (*outptrp + 4 > outend);
526 result = __GCONV_FULL_OUTPUT;
527 }
528
529 return result;
530 }
531 #endif
532
533
534 static inline int
535 __attribute ((always_inline))
536 internal_ucs4le_loop_single (struct __gconv_step *step,
537 struct __gconv_step_data *step_data,
538 const unsigned char **inptrp,
539 const unsigned char *inend,
540 unsigned char **outptrp, unsigned char *outend,
541 size_t *irreversible)
542 {
543 mbstate_t *state = step_data->__statep;
544 size_t cnt = state->__count & 7;
545
546 while (*inptrp < inend && cnt < 4)
547 state->__value.__wchb[cnt++] = *(*inptrp)++;
548
549 if (__builtin_expect (cnt < 4, 0))
550 {
551 /* Still not enough bytes. Store the ones in the input buffer. */
552 state->__count &= ~7;
553 state->__count |= cnt;
554
555 return __GCONV_INCOMPLETE_INPUT;
556 }
557
558 #if __BYTE_ORDER == __BIG_ENDIAN
559 (*outptrp)[0] = state->__value.__wchb[3];
560 (*outptrp)[1] = state->__value.__wchb[2];
561 (*outptrp)[2] = state->__value.__wchb[1];
562 (*outptrp)[3] = state->__value.__wchb[0];
563
564 *outptrp += 4;
565 #else
566 /* XXX unaligned */
567 (*outptrp)[0] = state->__value.__wchb[0];
568 (*outptrp)[1] = state->__value.__wchb[1];
569 (*outptrp)[2] = state->__value.__wchb[2];
570 (*outptrp)[3] = state->__value.__wchb[3];
571
572 #endif
573 *outptrp += 4;
574
575 /* Clear the state buffer. */
576 state->__count &= ~7;
577
578 return __GCONV_OK;
579 }
580
581 #include <iconv/skeleton.c>
582
583
584 /* And finally from UCS4-LE to the internal encoding. */
585 #define DEFINE_INIT 0
586 #define DEFINE_FINI 0
587 #define MIN_NEEDED_FROM 4
588 #define MIN_NEEDED_TO 4
589 #define FROM_DIRECTION 1
590 #define FROM_LOOP ucs4le_internal_loop
591 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
592 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
593
594
595 static inline int
596 __attribute ((always_inline))
597 ucs4le_internal_loop (struct __gconv_step *step,
598 struct __gconv_step_data *step_data,
599 const unsigned char **inptrp, const unsigned char *inend,
600 unsigned char **outptrp, unsigned char *outend,
601 size_t *irreversible)
602 {
603 int flags = step_data->__flags;
604 const unsigned char *inptr = *inptrp;
605 unsigned char *outptr = *outptrp;
606 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
607 int result;
608 size_t cnt;
609
610 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
611 {
612 uint32_t inval;
613
614 #if __BYTE_ORDER == __BIG_ENDIAN
615 inval = bswap_32 (*(const uint32_t *) inptr);
616 #else
617 inval = *(const uint32_t *) inptr;
618 #endif
619
620 if (__builtin_expect (inval > 0x7fffffff, 0))
621 {
622 /* The value is too large. We don't try transliteration here since
623 this is not an error because of the lack of possibilities to
624 represent the result. This is a genuine bug in the input since
625 UCS4 does not allow such values. */
626 if (irreversible == NULL)
627 /* We are transliterating, don't try to correct anything. */
628 return __GCONV_ILLEGAL_INPUT;
629
630 if (flags & __GCONV_IGNORE_ERRORS)
631 {
632 /* Just ignore this character. */
633 ++*irreversible;
634 continue;
635 }
636
637 return __GCONV_ILLEGAL_INPUT;
638 }
639
640 *((uint32_t *) outptr) = inval;
641 outptr += sizeof (uint32_t);
642 }
643
644 *inptrp = inptr;
645 *outptrp = outptr;
646
647 /* Determine the status. */
648 if (*inptrp == inend)
649 result = __GCONV_EMPTY_INPUT;
650 else if (*inptrp + 4 > inend)
651 result = __GCONV_INCOMPLETE_INPUT;
652 else
653 {
654 assert (*outptrp + 4 > outend);
655 result = __GCONV_FULL_OUTPUT;
656 }
657
658 return result;
659 }
660
661 #ifndef _STRING_ARCH_unaligned
662 static inline int
663 __attribute ((always_inline))
664 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
665 struct __gconv_step_data *step_data,
666 const unsigned char **inptrp,
667 const unsigned char *inend,
668 unsigned char **outptrp, unsigned char *outend,
669 size_t *irreversible)
670 {
671 int flags = step_data->__flags;
672 const unsigned char *inptr = *inptrp;
673 unsigned char *outptr = *outptrp;
674 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
675 int result;
676 size_t cnt;
677
678 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
679 {
680 if (__builtin_expect (inptr[3] > 0x80, 0))
681 {
682 /* The value is too large. We don't try transliteration here since
683 this is not an error because of the lack of possibilities to
684 represent the result. This is a genuine bug in the input since
685 UCS4 does not allow such values. */
686 if (irreversible == NULL)
687 /* We are transliterating, don't try to correct anything. */
688 return __GCONV_ILLEGAL_INPUT;
689
690 if (flags & __GCONV_IGNORE_ERRORS)
691 {
692 /* Just ignore this character. */
693 ++*irreversible;
694 continue;
695 }
696
697 *inptrp = inptr;
698 *outptrp = outptr;
699 return __GCONV_ILLEGAL_INPUT;
700 }
701
702 # if __BYTE_ORDER == __BIG_ENDIAN
703 outptr[3] = inptr[0];
704 outptr[2] = inptr[1];
705 outptr[1] = inptr[2];
706 outptr[0] = inptr[3];
707 # else
708 outptr[0] = inptr[0];
709 outptr[1] = inptr[1];
710 outptr[2] = inptr[2];
711 outptr[3] = inptr[3];
712 # endif
713
714 outptr += 4;
715 }
716
717 *inptrp = inptr;
718 *outptrp = outptr;
719
720 /* Determine the status. */
721 if (*inptrp == inend)
722 result = __GCONV_EMPTY_INPUT;
723 else if (*inptrp + 4 > inend)
724 result = __GCONV_INCOMPLETE_INPUT;
725 else
726 {
727 assert (*outptrp + 4 > outend);
728 result = __GCONV_FULL_OUTPUT;
729 }
730
731 return result;
732 }
733 #endif
734
735
736 static inline int
737 __attribute ((always_inline))
738 ucs4le_internal_loop_single (struct __gconv_step *step,
739 struct __gconv_step_data *step_data,
740 const unsigned char **inptrp,
741 const unsigned char *inend,
742 unsigned char **outptrp, unsigned char *outend,
743 size_t *irreversible)
744 {
745 mbstate_t *state = step_data->__statep;
746 int flags = step_data->__flags;
747 size_t cnt = state->__count & 7;
748
749 while (*inptrp < inend && cnt < 4)
750 state->__value.__wchb[cnt++] = *(*inptrp)++;
751
752 if (__builtin_expect (cnt < 4, 0))
753 {
754 /* Still not enough bytes. Store the ones in the input buffer. */
755 state->__count &= ~7;
756 state->__count |= cnt;
757
758 return __GCONV_INCOMPLETE_INPUT;
759 }
760
761 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
762 0))
763 {
764 /* The value is too large. We don't try transliteration here since
765 this is not an error because of the lack of possibilities to
766 represent the result. This is a genuine bug in the input since
767 UCS4 does not allow such values. */
768 if (!(flags & __GCONV_IGNORE_ERRORS))
769 return __GCONV_ILLEGAL_INPUT;
770 }
771 else
772 {
773 #if __BYTE_ORDER == __BIG_ENDIAN
774 (*outptrp)[0] = state->__value.__wchb[3];
775 (*outptrp)[1] = state->__value.__wchb[2];
776 (*outptrp)[2] = state->__value.__wchb[1];
777 (*outptrp)[3] = state->__value.__wchb[0];
778 #else
779 (*outptrp)[0] = state->__value.__wchb[0];
780 (*outptrp)[1] = state->__value.__wchb[1];
781 (*outptrp)[2] = state->__value.__wchb[2];
782 (*outptrp)[3] = state->__value.__wchb[3];
783 #endif
784
785 *outptrp += 4;
786 }
787
788 /* Clear the state buffer. */
789 state->__count &= ~7;
790
791 return __GCONV_OK;
792 }
793
794 #include <iconv/skeleton.c>
795
796
797 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
798 #define DEFINE_INIT 0
799 #define DEFINE_FINI 0
800 #define MIN_NEEDED_FROM 1
801 #define MIN_NEEDED_TO 4
802 #define FROM_DIRECTION 1
803 #define FROM_LOOP ascii_internal_loop
804 #define TO_LOOP ascii_internal_loop /* This is not used. */
805 #define FUNCTION_NAME __gconv_transform_ascii_internal
806 #define ONE_DIRECTION 1
807
808 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
809 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
810 #define LOOPFCT FROM_LOOP
811 #define BODY \
812 { \
813 if (__builtin_expect (*inptr > '\x7f', 0)) \
814 { \
815 /* The value is too large. We don't try transliteration here since \
816 this is not an error because of the lack of possibilities to \
817 represent the result. This is a genuine bug in the input since \
818 ASCII does not allow such values. */ \
819 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
820 } \
821 else \
822 /* It's an one byte sequence. */ \
823 *((uint32_t *) outptr) = *inptr++; \
824 outptr += sizeof (uint32_t); \
825 }
826 #define LOOP_NEED_FLAGS
827 #include <iconv/loop.c>
828 #include <iconv/skeleton.c>
829
830
831 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
832 #define DEFINE_INIT 0
833 #define DEFINE_FINI 0
834 #define MIN_NEEDED_FROM 4
835 #define MIN_NEEDED_TO 1
836 #define FROM_DIRECTION 1
837 #define FROM_LOOP internal_ascii_loop
838 #define TO_LOOP internal_ascii_loop /* This is not used. */
839 #define FUNCTION_NAME __gconv_transform_internal_ascii
840 #define ONE_DIRECTION 1
841
842 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
843 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
844 #define LOOPFCT FROM_LOOP
845 #define BODY \
846 { \
847 if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
848 { \
849 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
850 STANDARD_TO_LOOP_ERR_HANDLER (4); \
851 } \
852 else \
853 /* It's an one byte sequence. */ \
854 *outptr++ = *((const uint32_t *) inptr); \
855 inptr += sizeof (uint32_t); \
856 }
857 #define LOOP_NEED_FLAGS
858 #include <iconv/loop.c>
859 #include <iconv/skeleton.c>
860
861
862 /* Convert from the internal (UCS4-like) format to UTF-8. */
863 #define DEFINE_INIT 0
864 #define DEFINE_FINI 0
865 #define MIN_NEEDED_FROM 4
866 #define MIN_NEEDED_TO 1
867 #define MAX_NEEDED_TO 6
868 #define FROM_DIRECTION 1
869 #define FROM_LOOP internal_utf8_loop
870 #define TO_LOOP internal_utf8_loop /* This is not used. */
871 #define FUNCTION_NAME __gconv_transform_internal_utf8
872 #define ONE_DIRECTION 1
873
874 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
875 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
876 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
877 #define LOOPFCT FROM_LOOP
878 #define BODY \
879 { \
880 uint32_t wc = *((const uint32_t *) inptr); \
881 \
882 if (wc < 0x80) \
883 /* It's an one byte sequence. */ \
884 *outptr++ = (unsigned char) wc; \
885 else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
886 { \
887 size_t step; \
888 char *start; \
889 \
890 for (step = 2; step < 6; ++step) \
891 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
892 break; \
893 \
894 if (__builtin_expect (outptr + step > outend, 0)) \
895 { \
896 /* Too long. */ \
897 result = __GCONV_FULL_OUTPUT; \
898 break; \
899 } \
900 \
901 start = outptr; \
902 *outptr = (unsigned char) (~0xff >> step); \
903 outptr += step; \
904 --step; \
905 do \
906 { \
907 start[step] = 0x80 | (wc & 0x3f); \
908 wc >>= 6; \
909 } \
910 while (--step > 0); \
911 start[0] |= wc; \
912 } \
913 else \
914 { \
915 STANDARD_TO_LOOP_ERR_HANDLER (4); \
916 } \
917 \
918 inptr += 4; \
919 }
920 #define LOOP_NEED_FLAGS
921 #include <iconv/loop.c>
922 #include <iconv/skeleton.c>
923
924
925 /* Convert from UTF-8 to the internal (UCS4-like) format. */
926 #define DEFINE_INIT 0
927 #define DEFINE_FINI 0
928 #define MIN_NEEDED_FROM 1
929 #define MAX_NEEDED_FROM 6
930 #define MIN_NEEDED_TO 4
931 #define FROM_DIRECTION 1
932 #define FROM_LOOP utf8_internal_loop
933 #define TO_LOOP utf8_internal_loop /* This is not used. */
934 #define FUNCTION_NAME __gconv_transform_utf8_internal
935 #define ONE_DIRECTION 1
936
937 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
938 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
939 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
940 #define LOOPFCT FROM_LOOP
941 #define BODY \
942 { \
943 uint32_t ch; \
944 uint_fast32_t cnt; \
945 uint_fast32_t i; \
946 \
947 /* Next input byte. */ \
948 ch = *inptr; \
949 \
950 if (ch < 0x80) \
951 { \
952 /* One byte sequence. */ \
953 cnt = 1; \
954 ++inptr; \
955 } \
956 else \
957 { \
958 if (ch >= 0xc2 && ch < 0xe0) \
959 { \
960 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
961 otherwise the wide character could have been represented \
962 using a single byte. */ \
963 cnt = 2; \
964 ch &= 0x1f; \
965 } \
966 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
967 { \
968 /* We expect three bytes. */ \
969 cnt = 3; \
970 ch &= 0x0f; \
971 } \
972 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
973 { \
974 /* We expect four bytes. */ \
975 cnt = 4; \
976 ch &= 0x07; \
977 } \
978 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
979 { \
980 /* We expect five bytes. */ \
981 cnt = 5; \
982 ch &= 0x03; \
983 } \
984 else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
985 { \
986 /* We expect six bytes. */ \
987 cnt = 6; \
988 ch &= 0x01; \
989 } \
990 else \
991 { \
992 int skipped; \
993 \
994 /* Search the end of this ill-formed UTF-8 character. This \
995 is the next byte with (x & 0xc0) != 0x80. */ \
996 skipped = 0; \
997 do \
998 ++skipped; \
999 while (inptr + skipped < inend \
1000 && (*(inptr + skipped) & 0xc0) == 0x80 \
1001 && skipped < 5); \
1002 \
1003 STANDARD_FROM_LOOP_ERR_HANDLER (skipped); \
1004 } \
1005 \
1006 if (__builtin_expect (inptr + cnt > inend, 0)) \
1007 { \
1008 /* We don't have enough input. But before we report that check \
1009 that all the bytes are correct. */ \
1010 for (i = 1; inptr + i < inend; ++i) \
1011 if ((inptr[i] & 0xc0) != 0x80) \
1012 break; \
1013 \
1014 if (__builtin_expect (inptr + i == inend, 1)) \
1015 { \
1016 result = __GCONV_INCOMPLETE_INPUT; \
1017 break; \
1018 } \
1019 \
1020 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1021 } \
1022 \
1023 /* Read the possible remaining bytes. */ \
1024 for (i = 1; i < cnt; ++i) \
1025 { \
1026 uint32_t byte = inptr[i]; \
1027 \
1028 if ((byte & 0xc0) != 0x80) \
1029 /* This is an illegal encoding. */ \
1030 break; \
1031 \
1032 ch <<= 6; \
1033 ch |= byte & 0x3f; \
1034 } \
1035 \
1036 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1037 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1038 have been represented with fewer than cnt bytes. */ \
1039 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1040 { \
1041 /* This is an illegal encoding. */ \
1042 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
1043 } \
1044 \
1045 inptr += cnt; \
1046 } \
1047 \
1048 /* Now adjust the pointers and store the result. */ \
1049 *((uint32_t *) outptr) = ch; \
1050 outptr += sizeof (uint32_t); \
1051 }
1052 #define LOOP_NEED_FLAGS
1053
1054 #define STORE_REST \
1055 { \
1056 /* We store the remaining bytes while converting them into the UCS4 \
1057 format. We can assume that the first byte in the buffer is \
1058 correct and that it requires a larger number of bytes than there \
1059 are in the input buffer. */ \
1060 wint_t ch = **inptrp; \
1061 size_t cnt, r; \
1062 \
1063 state->__count = inend - *inptrp; \
1064 \
1065 if (ch >= 0xc2 && ch < 0xe0) \
1066 { \
1067 /* We expect two bytes. The first byte cannot be 0xc0 or \
1068 0xc1, otherwise the wide character could have been \
1069 represented using a single byte. */ \
1070 cnt = 2; \
1071 ch &= 0x1f; \
1072 } \
1073 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
1074 { \
1075 /* We expect three bytes. */ \
1076 cnt = 3; \
1077 ch &= 0x0f; \
1078 } \
1079 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
1080 { \
1081 /* We expect four bytes. */ \
1082 cnt = 4; \
1083 ch &= 0x07; \
1084 } \
1085 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
1086 { \
1087 /* We expect five bytes. */ \
1088 cnt = 5; \
1089 ch &= 0x03; \
1090 } \
1091 else \
1092 { \
1093 /* We expect six bytes. */ \
1094 cnt = 6; \
1095 ch &= 0x01; \
1096 } \
1097 \
1098 /* The first byte is already consumed. */ \
1099 r = cnt - 1; \
1100 while (++(*inptrp) < inend) \
1101 { \
1102 ch <<= 6; \
1103 ch |= **inptrp & 0x3f; \
1104 --r; \
1105 } \
1106 \
1107 /* Shift for the so far missing bytes. */ \
1108 ch <<= r * 6; \
1109 \
1110 /* Store the number of bytes expected for the entire sequence. */ \
1111 state->__count |= cnt << 8; \
1112 \
1113 /* Store the value. */ \
1114 state->__value.__wch = ch; \
1115 }
1116
1117 #define UNPACK_BYTES \
1118 { \
1119 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
1120 wint_t wch = state->__value.__wch; \
1121 size_t ntotal = state->__count >> 8; \
1122 \
1123 inlen = state->__count & 255; \
1124 \
1125 bytebuf[0] = inmask[ntotal - 2]; \
1126 \
1127 do \
1128 { \
1129 if (--ntotal < inlen) \
1130 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1131 wch >>= 6; \
1132 } \
1133 while (ntotal > 1); \
1134 \
1135 bytebuf[0] |= wch; \
1136 }
1137
1138 #define CLEAR_STATE \
1139 state->__count = 0
1140
1141
1142 #include <iconv/loop.c>
1143 #include <iconv/skeleton.c>
1144
1145
1146 /* Convert from UCS2 to the internal (UCS4-like) format. */
1147 #define DEFINE_INIT 0
1148 #define DEFINE_FINI 0
1149 #define MIN_NEEDED_FROM 2
1150 #define MIN_NEEDED_TO 4
1151 #define FROM_DIRECTION 1
1152 #define FROM_LOOP ucs2_internal_loop
1153 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1154 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1155 #define ONE_DIRECTION 1
1156
1157 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1158 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1159 #define LOOPFCT FROM_LOOP
1160 #define BODY \
1161 { \
1162 uint16_t u1 = *((const uint16_t *) inptr); \
1163 \
1164 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1165 { \
1166 /* Surrogate characters in UCS-2 input are not valid. Reject \
1167 them. (Catching this here is not security relevant.) */ \
1168 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
1169 } \
1170 \
1171 *((uint32_t *) outptr) = u1; \
1172 outptr += sizeof (uint32_t); \
1173 inptr += 2; \
1174 }
1175 #define LOOP_NEED_FLAGS
1176 #include <iconv/loop.c>
1177 #include <iconv/skeleton.c>
1178
1179
1180 /* Convert from the internal (UCS4-like) format to UCS2. */
1181 #define DEFINE_INIT 0
1182 #define DEFINE_FINI 0
1183 #define MIN_NEEDED_FROM 4
1184 #define MIN_NEEDED_TO 2
1185 #define FROM_DIRECTION 1
1186 #define FROM_LOOP internal_ucs2_loop
1187 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1188 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1189 #define ONE_DIRECTION 1
1190
1191 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1192 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1193 #define LOOPFCT FROM_LOOP
1194 #define BODY \
1195 { \
1196 uint32_t val = *((const uint32_t *) inptr); \
1197 \
1198 if (__builtin_expect (val >= 0x10000, 0)) \
1199 { \
1200 UNICODE_TAG_HANDLER (val, 4); \
1201 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1202 } \
1203 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1204 { \
1205 /* Surrogate characters in UCS-4 input are not valid. \
1206 We must catch this, because the UCS-2 output might be \
1207 interpreted as UTF-16 by other programs. If we let \
1208 surrogates pass through, attackers could make a security \
1209 hole exploit by synthesizing any desired plane 1-16 \
1210 character. */ \
1211 result = __GCONV_ILLEGAL_INPUT; \
1212 if (! ignore_errors_p ()) \
1213 break; \
1214 inptr += 4; \
1215 ++*irreversible; \
1216 continue; \
1217 } \
1218 else \
1219 { \
1220 *((uint16_t *) outptr) = val; \
1221 outptr += sizeof (uint16_t); \
1222 inptr += 4; \
1223 } \
1224 }
1225 #define LOOP_NEED_FLAGS
1226 #include <iconv/loop.c>
1227 #include <iconv/skeleton.c>
1228
1229
1230 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1231 #define DEFINE_INIT 0
1232 #define DEFINE_FINI 0
1233 #define MIN_NEEDED_FROM 2
1234 #define MIN_NEEDED_TO 4
1235 #define FROM_DIRECTION 1
1236 #define FROM_LOOP ucs2reverse_internal_loop
1237 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1238 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1239 #define ONE_DIRECTION 1
1240
1241 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1242 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1243 #define LOOPFCT FROM_LOOP
1244 #define BODY \
1245 { \
1246 uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \
1247 \
1248 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1249 { \
1250 /* Surrogate characters in UCS-2 input are not valid. Reject \
1251 them. (Catching this here is not security relevant.) */ \
1252 if (! ignore_errors_p ()) \
1253 { \
1254 result = __GCONV_ILLEGAL_INPUT; \
1255 break; \
1256 } \
1257 inptr += 2; \
1258 ++*irreversible; \
1259 continue; \
1260 } \
1261 \
1262 *((uint32_t *) outptr) = u1; \
1263 outptr += sizeof (uint32_t); \
1264 inptr += 2; \
1265 }
1266 #define LOOP_NEED_FLAGS
1267 #include <iconv/loop.c>
1268 #include <iconv/skeleton.c>
1269
1270
1271 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1272 #define DEFINE_INIT 0
1273 #define DEFINE_FINI 0
1274 #define MIN_NEEDED_FROM 4
1275 #define MIN_NEEDED_TO 2
1276 #define FROM_DIRECTION 1
1277 #define FROM_LOOP internal_ucs2reverse_loop
1278 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1279 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1280 #define ONE_DIRECTION 1
1281
1282 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1283 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1284 #define LOOPFCT FROM_LOOP
1285 #define BODY \
1286 { \
1287 uint32_t val = *((const uint32_t *) inptr); \
1288 if (__builtin_expect (val >= 0x10000, 0)) \
1289 { \
1290 UNICODE_TAG_HANDLER (val, 4); \
1291 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1292 } \
1293 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1294 { \
1295 /* Surrogate characters in UCS-4 input are not valid. \
1296 We must catch this, because the UCS-2 output might be \
1297 interpreted as UTF-16 by other programs. If we let \
1298 surrogates pass through, attackers could make a security \
1299 hole exploit by synthesizing any desired plane 1-16 \
1300 character. */ \
1301 if (! ignore_errors_p ()) \
1302 { \
1303 result = __GCONV_ILLEGAL_INPUT; \
1304 break; \
1305 } \
1306 inptr += 4; \
1307 ++*irreversible; \
1308 continue; \
1309 } \
1310 else \
1311 { \
1312 *((uint16_t *) outptr) = bswap_16 (val); \
1313 outptr += sizeof (uint16_t); \
1314 inptr += 4; \
1315 } \
1316 }
1317 #define LOOP_NEED_FLAGS
1318 #include <iconv/loop.c>
1319 #include <iconv/skeleton.c>