]> git.ipfire.org Git - thirdparty/glibc.git/blob - iconv/gconv_simple.c
Update.
[thirdparty/glibc.git] / iconv / gconv_simple.c
1 /* Simple transformations functions.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21 #include <byteswap.h>
22 #include <dlfcn.h>
23 #include <endian.h>
24 #include <errno.h>
25 #include <gconv.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <sys/param.h>
31
32 #define BUILTIN_ALIAS(s1, s2) /* nothing */
33 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, Init, End, MinF, \
34 MaxF, MinT, MaxT) \
35 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
36 __const unsigned char **, __const unsigned char *, \
37 unsigned char **, size_t *, int, int);
38 #include "gconv_builtin.h"
39
40
41 #ifndef EILSEQ
42 # define EILSEQ EINVAL
43 #endif
44
45
46 /* Transform from the internal, UCS4-like format, to UCS4. The
47 difference between the internal ucs4 format and the real UCS4
48 format is, if any, the endianess. The Unicode/ISO 10646 says that
49 unless some higher protocol specifies it differently, the byte
50 order is big endian.*/
51 #define DEFINE_INIT 0
52 #define DEFINE_FINI 0
53 #define MIN_NEEDED_FROM 4
54 #define MIN_NEEDED_TO 4
55 #define FROM_DIRECTION 1
56 #define FROM_LOOP internal_ucs4_loop
57 #define TO_LOOP internal_ucs4_loop /* This is not used. */
58 #define FUNCTION_NAME __gconv_transform_internal_ucs4
59
60
61 static inline int
62 internal_ucs4_loop (struct __gconv_step *step,
63 struct __gconv_step_data *step_data,
64 const unsigned char **inptrp, const unsigned char *inend,
65 unsigned char **outptrp, unsigned char *outend,
66 size_t *irreversible)
67 {
68 const unsigned char *inptr = *inptrp;
69 unsigned char *outptr = *outptrp;
70 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
71 int result;
72
73 #if __BYTE_ORDER == __LITTLE_ENDIAN
74 /* Sigh, we have to do some real work. */
75 size_t cnt;
76
77 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
78 *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr);
79
80 *inptrp = inptr;
81 *outptrp = outptr;
82 #elif __BYTE_ORDER == __BIG_ENDIAN
83 /* Simply copy the data. */
84 *inptrp = inptr + n_convert * 4;
85 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
86 #else
87 # error "This endianess is not supported."
88 #endif
89
90 /* Determine the status. */
91 if (*inptrp == inend)
92 result = __GCONV_EMPTY_INPUT;
93 else if (*outptrp + 4 > outend)
94 result = __GCONV_FULL_OUTPUT;
95 else
96 result = __GCONV_INCOMPLETE_INPUT;
97
98 return result;
99 }
100
101 #ifndef _STRING_ARCH_unaligned
102 static inline int
103 internal_ucs4_loop_unaligned (struct __gconv_step *step,
104 struct __gconv_step_data *step_data,
105 const unsigned char **inptrp,
106 const unsigned char *inend,
107 unsigned char **outptrp, unsigned char *outend,
108 size_t *irreversible)
109 {
110 const unsigned char *inptr = *inptrp;
111 unsigned char *outptr = *outptrp;
112 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
113 int result;
114
115 # if __BYTE_ORDER == __LITTLE_ENDIAN
116 /* Sigh, we have to do some real work. */
117 size_t cnt;
118
119 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
120 {
121 outptr[0] = inptr[3];
122 outptr[1] = inptr[2];
123 outptr[2] = inptr[1];
124 outptr[3] = inptr[0];
125 }
126
127 *inptrp = inptr;
128 *outptrp = outptr;
129 # elif __BYTE_ORDER == __BIG_ENDIAN
130 /* Simply copy the data. */
131 *inptrp = inptr + n_convert * 4;
132 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
133 # else
134 # error "This endianess is not supported."
135 # endif
136
137 /* Determine the status. */
138 if (*inptrp == inend)
139 result = __GCONV_EMPTY_INPUT;
140 else if (*outptrp + 4 > outend)
141 result = __GCONV_FULL_OUTPUT;
142 else
143 result = __GCONV_INCOMPLETE_INPUT;
144
145 return result;
146 }
147 #endif
148
149
150 static inline int
151 internal_ucs4_loop_single (struct __gconv_step *step,
152 struct __gconv_step_data *step_data,
153 const unsigned char **inptrp,
154 const unsigned char *inend,
155 unsigned char **outptrp, unsigned char *outend,
156 size_t *irreversible)
157 {
158 mbstate_t *state = step_data->__statep;
159 size_t cnt = state->__count & 7;
160
161 while (*inptrp < inend && cnt < 4)
162 state->__value.__wchb[cnt++] = *(*inptrp)++;
163
164 if (__builtin_expect (cnt, 4) < 4)
165 {
166 /* Still not enough bytes. Store the ones in the input buffer. */
167 state->__count &= ~7;
168 state->__count |= cnt;
169
170 return __GCONV_INCOMPLETE_INPUT;
171 }
172
173 #if __BYTE_ORDER == __LITTLE_ENDIAN
174 (*outptrp)[0] = state->__value.__wchb[3];
175 (*outptrp)[1] = state->__value.__wchb[2];
176 (*outptrp)[2] = state->__value.__wchb[1];
177 (*outptrp)[3] = state->__value.__wchb[0];
178
179 *outptrp += 4;
180 #elif __BYTE_ORDER == __BIG_ENDIAN
181 /* XXX unaligned */
182 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
183 #else
184 # error "This endianess is not supported."
185 #endif
186
187 /* Clear the state buffer. */
188 state->__count &= ~7;
189
190 return __GCONV_OK;
191 }
192
193 #include <iconv/skeleton.c>
194
195
196 /* Transform from UCS4 to the internal, UCS4-like format. Unlike
197 for the other direction we have to check for correct values here. */
198 #define DEFINE_INIT 0
199 #define DEFINE_FINI 0
200 #define MIN_NEEDED_FROM 4
201 #define MIN_NEEDED_TO 4
202 #define FROM_DIRECTION 1
203 #define FROM_LOOP ucs4_internal_loop
204 #define TO_LOOP ucs4_internal_loop /* This is not used. */
205 #define FUNCTION_NAME __gconv_transform_ucs4_internal
206
207
208 static inline int
209 ucs4_internal_loop (struct __gconv_step *step,
210 struct __gconv_step_data *step_data,
211 const unsigned char **inptrp, const unsigned char *inend,
212 unsigned char **outptrp, unsigned char *outend,
213 size_t *irreversible)
214 {
215 int flags = step_data->__flags;
216 const unsigned char *inptr = *inptrp;
217 unsigned char *outptr = *outptrp;
218 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
219 int result;
220 size_t cnt;
221
222 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
223 {
224 uint32_t inval;
225
226 #if __BYTE_ORDER == __LITTLE_ENDIAN
227 inval = bswap_32 (*(const uint32_t *) inptr);
228 #else
229 inval = *(const uint32_t *) inptr;
230 #endif
231
232 if (__builtin_expect (inval, 0) > 0x7fffffff)
233 {
234 /* The value is too large. We don't try transliteration here since
235 this is not an error because of the lack of possibilities to
236 represent the result. This is a genuine bug in the input since
237 UCS4 does not allow such values. */
238 if (irreversible == NULL)
239 /* We are transliterating, don't try to correct anything. */
240 return __GCONV_ILLEGAL_INPUT;
241
242 if (flags & __GCONV_IGNORE_ERRORS)
243 {
244 /* Just ignore this character. */
245 ++*irreversible;
246 continue;
247 }
248
249 *inptrp = inptr;
250 *outptrp = outptr;
251 return __GCONV_ILLEGAL_INPUT;
252 }
253
254 *((uint32_t *) outptr)++ = inval;
255 }
256
257 *inptrp = inptr;
258 *outptrp = outptr;
259
260 /* Determine the status. */
261 if (*inptrp == inend)
262 result = __GCONV_EMPTY_INPUT;
263 else if (*outptrp + 4 > outend)
264 result = __GCONV_FULL_OUTPUT;
265 else
266 result = __GCONV_INCOMPLETE_INPUT;
267
268 return result;
269 }
270
271 #ifndef _STRING_ARCH_unaligned
272 static inline int
273 ucs4_internal_loop_unaligned (struct __gconv_step *step,
274 struct __gconv_step_data *step_data,
275 const unsigned char **inptrp,
276 const unsigned char *inend,
277 unsigned char **outptrp, unsigned char *outend,
278 size_t *irreversible)
279 {
280 int flags = step_data->__flags;
281 const unsigned char *inptr = *inptrp;
282 unsigned char *outptr = *outptrp;
283 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
284 int result;
285 size_t cnt;
286
287 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
288 {
289 if (__builtin_expect (inptr[0], 0) > 0x80)
290 {
291 /* The value is too large. We don't try transliteration here since
292 this is not an error because of the lack of possibilities to
293 represent the result. This is a genuine bug in the input since
294 UCS4 does not allow such values. */
295 if (irreversible == NULL)
296 /* We are transliterating, don't try to correct anything. */
297 return __GCONV_ILLEGAL_INPUT;
298
299 if (flags & __GCONV_IGNORE_ERRORS)
300 {
301 /* Just ignore this character. */
302 ++*irreversible;
303 continue;
304 }
305
306 *inptrp = inptr;
307 *outptrp = outptr;
308 return __GCONV_ILLEGAL_INPUT;
309 }
310
311 # if __BYTE_ORDER == __LITTLE_ENDIAN
312 outptr[3] = inptr[0];
313 outptr[2] = inptr[1];
314 outptr[1] = inptr[2];
315 outptr[0] = inptr[3];
316 # else
317 outptr[0] = inptr[0];
318 outptr[1] = inptr[1];
319 outptr[2] = inptr[2];
320 outptr[3] = inptr[3];
321 # endif
322 outptr += 4;
323 }
324
325 *inptrp = inptr;
326 *outptrp = outptr;
327
328 /* Determine the status. */
329 if (*inptrp == inend)
330 result = __GCONV_EMPTY_INPUT;
331 else if (*outptrp + 4 > outend)
332 result = __GCONV_FULL_OUTPUT;
333 else
334 result = __GCONV_INCOMPLETE_INPUT;
335
336 return result;
337 }
338 #endif
339
340
341 static inline int
342 ucs4_internal_loop_single (struct __gconv_step *step,
343 struct __gconv_step_data *step_data,
344 const unsigned char **inptrp,
345 const unsigned char *inend,
346 unsigned char **outptrp, unsigned char *outend,
347 size_t *irreversible)
348 {
349 mbstate_t *state = step_data->__statep;
350 int flags = step_data->__flags;
351 size_t cnt = state->__count & 7;
352
353 while (*inptrp < inend && cnt < 4)
354 state->__value.__wchb[cnt++] = *(*inptrp)++;
355
356 if (__builtin_expect (cnt, 4) < 4)
357 {
358 /* Still not enough bytes. Store the ones in the input buffer. */
359 state->__count &= ~7;
360 state->__count |= cnt;
361
362 return __GCONV_INCOMPLETE_INPUT;
363 }
364
365 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0], 0)
366 > 0x80)
367 {
368 /* The value is too large. We don't try transliteration here since
369 this is not an error because of the lack of possibilities to
370 represent the result. This is a genuine bug in the input since
371 UCS4 does not allow such values. */
372 if (!(flags & __GCONV_IGNORE_ERRORS))
373 {
374 *inptrp -= cnt - (state->__count & 7);
375 return __GCONV_ILLEGAL_INPUT;
376 }
377 }
378 else
379 {
380 #if __BYTE_ORDER == __LITTLE_ENDIAN
381 (*outptrp)[0] = state->__value.__wchb[3];
382 (*outptrp)[1] = state->__value.__wchb[2];
383 (*outptrp)[2] = state->__value.__wchb[1];
384 (*outptrp)[3] = state->__value.__wchb[0];
385 #elif __BYTE_ORDER == __BIG_ENDIAN
386 (*outptrp)[0] = state->__value.__wchb[0];
387 (*outptrp)[1] = state->__value.__wchb[1];
388 (*outptrp)[2] = state->__value.__wchb[2];
389 (*outptrp)[3] = state->__value.__wchb[3];
390 #endif
391
392 *outptrp += 4;
393 }
394
395 /* Clear the state buffer. */
396 state->__count &= ~7;
397
398 return __GCONV_OK;
399 }
400
401 #include <iconv/skeleton.c>
402
403
404 /* Similarly for the little endian form. */
405 #define DEFINE_INIT 0
406 #define DEFINE_FINI 0
407 #define MIN_NEEDED_FROM 4
408 #define MIN_NEEDED_TO 4
409 #define FROM_DIRECTION 1
410 #define FROM_LOOP internal_ucs4le_loop
411 #define TO_LOOP internal_ucs4le_loop /* This is not used. */
412 #define FUNCTION_NAME __gconv_transform_internal_ucs4le
413
414
415 static inline int
416 internal_ucs4le_loop (struct __gconv_step *step,
417 struct __gconv_step_data *step_data,
418 const unsigned char **inptrp, const unsigned char *inend,
419 unsigned char **outptrp, unsigned char *outend,
420 size_t *irreversible)
421 {
422 const unsigned char *inptr = *inptrp;
423 unsigned char *outptr = *outptrp;
424 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
425 int result;
426
427 #if __BYTE_ORDER == __BIG_ENDIAN
428 /* Sigh, we have to do some real work. */
429 size_t cnt;
430
431 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
432 *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr);
433
434 *inptrp = inptr;
435 *outptrp = outptr;
436 #elif __BYTE_ORDER == __LITTLE_ENDIAN
437 /* Simply copy the data. */
438 *inptrp = inptr + n_convert * 4;
439 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
440 #else
441 # error "This endianess is not supported."
442 #endif
443
444 /* Determine the status. */
445 if (*inptrp == inend)
446 result = __GCONV_EMPTY_INPUT;
447 else if (*outptrp + 4 > outend)
448 result = __GCONV_FULL_OUTPUT;
449 else
450 result = __GCONV_INCOMPLETE_INPUT;
451
452 return result;
453 }
454
455 #ifndef _STRING_ARCH_unaligned
456 static inline int
457 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
458 struct __gconv_step_data *step_data,
459 const unsigned char **inptrp,
460 const unsigned char *inend,
461 unsigned char **outptrp, unsigned char *outend,
462 size_t *irreversible)
463 {
464 const unsigned char *inptr = *inptrp;
465 unsigned char *outptr = *outptrp;
466 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
467 int result;
468
469 # if __BYTE_ORDER == __BIG_ENDIAN
470 /* Sigh, we have to do some real work. */
471 size_t cnt;
472
473 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
474 {
475 outptr[0] = inptr[3];
476 outptr[1] = inptr[2];
477 outptr[2] = inptr[1];
478 outptr[3] = inptr[0];
479 }
480
481 *inptrp = inptr;
482 *outptrp = outptr;
483 # elif __BYTE_ORDER == __LITTLE_ENDIAN
484 /* Simply copy the data. */
485 *inptrp = inptr + n_convert * 4;
486 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
487 # else
488 # error "This endianess is not supported."
489 # endif
490
491 /* Determine the status. */
492 if (*inptrp + 4 > inend)
493 result = __GCONV_EMPTY_INPUT;
494 else if (*outptrp + 4 > outend)
495 result = __GCONV_FULL_OUTPUT;
496 else
497 result = __GCONV_INCOMPLETE_INPUT;
498
499 return result;
500 }
501 #endif
502
503
504 static inline int
505 internal_ucs4le_loop_single (struct __gconv_step *step,
506 struct __gconv_step_data *step_data,
507 const unsigned char **inptrp,
508 const unsigned char *inend,
509 unsigned char **outptrp, unsigned char *outend,
510 size_t *irreversible)
511 {
512 mbstate_t *state = step_data->__statep;
513 size_t cnt = state->__count & 7;
514
515 while (*inptrp < inend && cnt < 4)
516 state->__value.__wchb[cnt++] = *(*inptrp)++;
517
518 if (__builtin_expect (cnt, 4) < 4)
519 {
520 /* Still not enough bytes. Store the ones in the input buffer. */
521 state->__count &= ~7;
522 state->__count |= cnt;
523
524 return __GCONV_INCOMPLETE_INPUT;
525 }
526
527 #if __BYTE_ORDER == __BIG_ENDIAN
528 (*outptrp)[0] = state->__value.__wchb[3];
529 (*outptrp)[1] = state->__value.__wchb[2];
530 (*outptrp)[2] = state->__value.__wchb[1];
531 (*outptrp)[3] = state->__value.__wchb[0];
532
533 *outptrp += 4;
534 #else
535 /* XXX unaligned */
536 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
537 #endif
538
539 /* Clear the state buffer. */
540 state->__count &= ~7;
541
542 return __GCONV_OK;
543 }
544
545 #include <iconv/skeleton.c>
546
547
548 /* And finally from UCS4-LE to the internal encoding. */
549 #define DEFINE_INIT 0
550 #define DEFINE_FINI 0
551 #define MIN_NEEDED_FROM 4
552 #define MIN_NEEDED_TO 4
553 #define FROM_DIRECTION 1
554 #define FROM_LOOP ucs4le_internal_loop
555 #define TO_LOOP ucs4le_internal_loop /* This is not used. */
556 #define FUNCTION_NAME __gconv_transform_ucs4le_internal
557
558
559 static inline int
560 ucs4le_internal_loop (struct __gconv_step *step,
561 struct __gconv_step_data *step_data,
562 const unsigned char **inptrp, const unsigned char *inend,
563 unsigned char **outptrp, unsigned char *outend,
564 size_t *irreversible)
565 {
566 int flags = step_data->__flags;
567 const unsigned char *inptr = *inptrp;
568 unsigned char *outptr = *outptrp;
569 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
570 int result;
571 size_t cnt;
572
573 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
574 {
575 uint32_t inval;
576
577 #if __BYTE_ORDER == __BIG_ENDIAN
578 inval = bswap_32 (*(const uint32_t *) inptr);
579 #else
580 inval = *(const uint32_t *) inptr;
581 #endif
582
583 if (__builtin_expect (inval, 0) > 0x7fffffff)
584 {
585 /* The value is too large. We don't try transliteration here since
586 this is not an error because of the lack of possibilities to
587 represent the result. This is a genuine bug in the input since
588 UCS4 does not allow such values. */
589 if (irreversible == NULL)
590 /* We are transliterating, don't try to correct anything. */
591 return __GCONV_ILLEGAL_INPUT;
592
593 if (flags & __GCONV_IGNORE_ERRORS)
594 {
595 /* Just ignore this character. */
596 ++*irreversible;
597 continue;
598 }
599
600 return __GCONV_ILLEGAL_INPUT;
601 }
602
603 *((uint32_t *) outptr)++ = inval;
604 }
605
606 *inptrp = inptr;
607 *outptrp = outptr;
608
609 /* Determine the status. */
610 if (*inptrp == inend)
611 result = __GCONV_EMPTY_INPUT;
612 else if (*outptrp + 4 > outend)
613 result = __GCONV_FULL_OUTPUT;
614 else
615 result = __GCONV_INCOMPLETE_INPUT;
616
617 return result;
618 }
619
620 #ifndef _STRING_ARCH_unaligned
621 static inline int
622 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
623 struct __gconv_step_data *step_data,
624 const unsigned char **inptrp,
625 const unsigned char *inend,
626 unsigned char **outptrp, unsigned char *outend,
627 size_t *irreversible)
628 {
629 int flags = step_data->__flags;
630 const unsigned char *inptr = *inptrp;
631 unsigned char *outptr = *outptrp;
632 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
633 int result;
634 size_t cnt;
635
636 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
637 {
638 if (__builtin_expect (inptr[3], 0) > 0x80)
639 {
640 /* The value is too large. We don't try transliteration here since
641 this is not an error because of the lack of possibilities to
642 represent the result. This is a genuine bug in the input since
643 UCS4 does not allow such values. */
644 if (irreversible == NULL)
645 /* We are transliterating, don't try to correct anything. */
646 return __GCONV_ILLEGAL_INPUT;
647
648 if (flags & __GCONV_IGNORE_ERRORS)
649 {
650 /* Just ignore this character. */
651 ++*irreversible;
652 continue;
653 }
654
655 *inptrp = inptr;
656 *outptrp = outptr;
657 return __GCONV_ILLEGAL_INPUT;
658 }
659
660 # if __BYTE_ORDER == __BIG_ENDIAN
661 outptr[3] = inptr[0];
662 outptr[2] = inptr[1];
663 outptr[1] = inptr[2];
664 outptr[0] = inptr[3];
665 # else
666 outptr[0] = inptr[0];
667 outptr[1] = inptr[1];
668 outptr[2] = inptr[2];
669 outptr[3] = inptr[3];
670 # endif
671
672 outptr += 4;
673 }
674
675 *inptrp = inptr;
676 *outptrp = outptr;
677
678 /* Determine the status. */
679 if (*inptrp == inend)
680 result = __GCONV_EMPTY_INPUT;
681 else if (*outptrp + 4 > outend)
682 result = __GCONV_FULL_OUTPUT;
683 else
684 result = __GCONV_INCOMPLETE_INPUT;
685
686 return result;
687 }
688 #endif
689
690
691 static inline int
692 ucs4le_internal_loop_single (struct __gconv_step *step,
693 struct __gconv_step_data *step_data,
694 const unsigned char **inptrp,
695 const unsigned char *inend,
696 unsigned char **outptrp, unsigned char *outend,
697 size_t *irreversible)
698 {
699 mbstate_t *state = step_data->__statep;
700 int flags = step_data->__flags;
701 size_t cnt = state->__count & 7;
702
703 while (*inptrp < inend && cnt < 4)
704 state->__value.__wchb[cnt++] = *(*inptrp)++;
705
706 if (__builtin_expect (cnt, 4) < 4)
707 {
708 /* Still not enough bytes. Store the ones in the input buffer. */
709 state->__count &= ~7;
710 state->__count |= cnt;
711
712 return __GCONV_INCOMPLETE_INPUT;
713 }
714
715 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3], 0)
716 > 0x80)
717 {
718 /* The value is too large. We don't try transliteration here since
719 this is not an error because of the lack of possibilities to
720 represent the result. This is a genuine bug in the input since
721 UCS4 does not allow such values. */
722 if (!(flags & __GCONV_IGNORE_ERRORS))
723 return __GCONV_ILLEGAL_INPUT;
724 }
725 else
726 {
727 #if __BYTE_ORDER == __BIG_ENDIAN
728 (*outptrp)[0] = state->__value.__wchb[3];
729 (*outptrp)[1] = state->__value.__wchb[2];
730 (*outptrp)[2] = state->__value.__wchb[1];
731 (*outptrp)[3] = state->__value.__wchb[0];
732 #elif __BYTE_ORDER == __BIG_ENDIAN
733 (*outptrp)[0] = state->__value.__wchb[0];
734 (*outptrp)[1] = state->__value.__wchb[1];
735 (*outptrp)[2] = state->__value.__wchb[2];
736 (*outptrp)[3] = state->__value.__wchb[3];
737 #endif
738
739 *outptrp += 4;
740 }
741
742 /* Clear the state buffer. */
743 state->__count &= ~7;
744
745 return __GCONV_OK;
746 }
747
748 #include <iconv/skeleton.c>
749
750
751 /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
752 #define DEFINE_INIT 0
753 #define DEFINE_FINI 0
754 #define MIN_NEEDED_FROM 1
755 #define MIN_NEEDED_TO 4
756 #define FROM_DIRECTION 1
757 #define FROM_LOOP ascii_internal_loop
758 #define TO_LOOP ascii_internal_loop /* This is not used. */
759 #define FUNCTION_NAME __gconv_transform_ascii_internal
760 #define ONE_DIRECTION 1
761
762 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
763 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
764 #define LOOPFCT FROM_LOOP
765 #define BODY \
766 { \
767 if (__builtin_expect (*inptr, 0) > '\x7f') \
768 { \
769 /* The value is too large. We don't try transliteration here since \
770 this is not an error because of the lack of possibilities to \
771 represent the result. This is a genuine bug in the input since \
772 ASCII does not allow such values. */ \
773 if (! ignore_errors_p ()) \
774 { \
775 /* This is no correct ANSI_X3.4-1968 character. */ \
776 result = __GCONV_ILLEGAL_INPUT; \
777 break; \
778 } \
779 \
780 ++*irreversible; \
781 ++inptr; \
782 } \
783 else \
784 /* It's an one byte sequence. */ \
785 *((uint32_t *) outptr)++ = *inptr++; \
786 }
787 #define LOOP_NEED_FLAGS
788 #include <iconv/loop.c>
789 #include <iconv/skeleton.c>
790
791
792 /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
793 #define DEFINE_INIT 0
794 #define DEFINE_FINI 0
795 #define MIN_NEEDED_FROM 4
796 #define MIN_NEEDED_TO 1
797 #define FROM_DIRECTION 1
798 #define FROM_LOOP internal_ascii_loop
799 #define TO_LOOP internal_ascii_loop /* This is not used. */
800 #define FUNCTION_NAME __gconv_transform_internal_ascii
801 #define ONE_DIRECTION 1
802
803 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
804 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
805 #define LOOPFCT FROM_LOOP
806 #define BODY \
807 { \
808 if (__builtin_expect (*((const uint32_t *) inptr), 0) > 0x7f) \
809 { \
810 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
811 STANDARD_ERR_HANDLER (4); \
812 } \
813 else \
814 /* It's an one byte sequence. */ \
815 *outptr++ = *((const uint32_t *) inptr)++; \
816 }
817 #define LOOP_NEED_FLAGS
818 #include <iconv/loop.c>
819 #include <iconv/skeleton.c>
820
821
822 /* Convert from the internal (UCS4-like) format to UTF-8. */
823 #define DEFINE_INIT 0
824 #define DEFINE_FINI 0
825 #define MIN_NEEDED_FROM 4
826 #define MIN_NEEDED_TO 1
827 #define MAX_NEEDED_TO 6
828 #define FROM_DIRECTION 1
829 #define FROM_LOOP internal_utf8_loop
830 #define TO_LOOP internal_utf8_loop /* This is not used. */
831 #define FUNCTION_NAME __gconv_transform_internal_utf8
832 #define ONE_DIRECTION 1
833
834 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
835 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
836 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
837 #define LOOPFCT FROM_LOOP
838 #define BODY \
839 { \
840 uint32_t wc = *((const uint32_t *) inptr); \
841 \
842 /* Since we control every character we read this cannot happen. */ \
843 assert (wc <= 0x7fffffff); \
844 \
845 if (wc < 0x80) \
846 /* It's an one byte sequence. */ \
847 *outptr++ = (unsigned char) wc; \
848 else \
849 { \
850 size_t step; \
851 char *start; \
852 \
853 for (step = 2; step < 6; ++step) \
854 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
855 break; \
856 \
857 if (__builtin_expect (outptr + step > outend, 0)) \
858 { \
859 /* Too long. */ \
860 result = __GCONV_FULL_OUTPUT; \
861 break; \
862 } \
863 \
864 start = outptr; \
865 *outptr = (unsigned char) (~0xff >> step); \
866 outptr += step; \
867 --step; \
868 do \
869 { \
870 start[step] = 0x80 | (wc & 0x3f); \
871 wc >>= 6; \
872 } \
873 while (--step > 0); \
874 start[0] |= wc; \
875 } \
876 \
877 inptr += 4; \
878 }
879 #include <iconv/loop.c>
880 #include <iconv/skeleton.c>
881
882
883 /* Convert from UTF-8 to the internal (UCS4-like) format. */
884 #define DEFINE_INIT 0
885 #define DEFINE_FINI 0
886 #define MIN_NEEDED_FROM 1
887 #define MAX_NEEDED_FROM 6
888 #define MIN_NEEDED_TO 4
889 #define FROM_DIRECTION 1
890 #define FROM_LOOP utf8_internal_loop
891 #define TO_LOOP utf8_internal_loop /* This is not used. */
892 #define FUNCTION_NAME __gconv_transform_utf8_internal
893 #define ONE_DIRECTION 1
894
895 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
896 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
897 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
898 #define LOOPFCT FROM_LOOP
899 #define BODY \
900 { \
901 uint32_t ch; \
902 uint_fast32_t cnt; \
903 uint_fast32_t i; \
904 \
905 /* Next input byte. */ \
906 ch = *inptr; \
907 \
908 if (ch < 0x80) \
909 { \
910 /* One byte sequence. */ \
911 cnt = 1; \
912 ++inptr; \
913 } \
914 else \
915 { \
916 if (ch >= 0xc2 && ch < 0xe0) \
917 { \
918 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
919 otherwise the wide character could have been represented \
920 using a single byte. */ \
921 cnt = 2; \
922 ch &= 0x1f; \
923 } \
924 else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
925 { \
926 /* We expect three bytes. */ \
927 cnt = 3; \
928 ch &= 0x0f; \
929 } \
930 else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
931 { \
932 /* We expect four bytes. */ \
933 cnt = 4; \
934 ch &= 0x07; \
935 } \
936 else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
937 { \
938 /* We expect five bytes. */ \
939 cnt = 5; \
940 ch &= 0x03; \
941 } \
942 else if (__builtin_expect (ch & 0xfe, 0xfc) == 0xfc) \
943 { \
944 /* We expect six bytes. */ \
945 cnt = 6; \
946 ch &= 0x01; \
947 } \
948 else \
949 { \
950 int skipped; \
951 \
952 if (! ignore_errors_p ()) \
953 { \
954 /* This is an illegal encoding. */ \
955 result = __GCONV_ILLEGAL_INPUT; \
956 break; \
957 } \
958 \
959 /* Search the end of this ill-formed UTF-8 character. This \
960 is the next byte with (x & 0xc0) != 0x80. */ \
961 skipped = 0; \
962 do \
963 { \
964 ++inptr; \
965 ++skipped; \
966 } \
967 while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \
968 \
969 continue; \
970 } \
971 \
972 if (__builtin_expect (inptr + cnt > inend, 0)) \
973 { \
974 /* We don't have enough input. But before we report that check \
975 that all the bytes are correct. */ \
976 for (i = 1; inptr + i < inend; ++i) \
977 if ((inptr[i] & 0xc0) != 0x80) \
978 break; \
979 \
980 if (__builtin_expect (inptr + i == inend, 1)) \
981 { \
982 result = __GCONV_INCOMPLETE_INPUT; \
983 break; \
984 } \
985 \
986 if (ignore_errors_p ()) \
987 { \
988 /* Ignore it. */ \
989 inptr += i; \
990 ++*irreversible; \
991 continue; \
992 } \
993 \
994 result = __GCONV_ILLEGAL_INPUT; \
995 break; \
996 } \
997 \
998 /* Read the possible remaining bytes. */ \
999 for (i = 1; i < cnt; ++i) \
1000 { \
1001 uint32_t byte = inptr[i]; \
1002 \
1003 if ((byte & 0xc0) != 0x80) \
1004 /* This is an illegal encoding. */ \
1005 break; \
1006 \
1007 ch <<= 6; \
1008 ch |= byte & 0x3f; \
1009 } \
1010 \
1011 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1012 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1013 have been represented with fewer than cnt bytes. */ \
1014 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
1015 { \
1016 /* This is an illegal encoding. */ \
1017 if (ignore_errors_p ()) \
1018 { \
1019 inptr += i; \
1020 ++*irreversible; \
1021 continue; \
1022 } \
1023 \
1024 result = __GCONV_ILLEGAL_INPUT; \
1025 break; \
1026 } \
1027 \
1028 inptr += cnt; \
1029 } \
1030 \
1031 /* Now adjust the pointers and store the result. */ \
1032 *((uint32_t *) outptr)++ = ch; \
1033 }
1034 #define LOOP_NEED_FLAGS
1035
1036 #define STORE_REST \
1037 { \
1038 /* We store the remaining bytes while converting them into the UCS4 \
1039 format. We can assume that the first byte in the buffer is \
1040 correct and that it requires a larger number of bytes than there \
1041 are in the input buffer. */ \
1042 wint_t ch = **inptrp; \
1043 size_t cnt; \
1044 \
1045 state->__count = inend - *inptrp; \
1046 \
1047 if (ch >= 0xc2 && ch < 0xe0) \
1048 { \
1049 /* We expect two bytes. The first byte cannot be 0xc0 or \
1050 0xc1, otherwise the wide character could have been \
1051 represented using a single byte. */ \
1052 cnt = 2; \
1053 ch &= 0x1f; \
1054 } \
1055 else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
1056 { \
1057 /* We expect three bytes. */ \
1058 cnt = 3; \
1059 ch &= 0x0f; \
1060 } \
1061 else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
1062 { \
1063 /* We expect four bytes. */ \
1064 cnt = 4; \
1065 ch &= 0x07; \
1066 } \
1067 else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
1068 { \
1069 /* We expect five bytes. */ \
1070 cnt = 5; \
1071 ch &= 0x03; \
1072 } \
1073 else \
1074 { \
1075 /* We expect six bytes. */ \
1076 cnt = 6; \
1077 ch &= 0x01; \
1078 } \
1079 \
1080 /* The first byte is already consumed. */ \
1081 --cnt; \
1082 while (++(*inptrp) < inend) \
1083 { \
1084 ch <<= 6; \
1085 ch |= **inptrp & 0x3f; \
1086 --cnt; \
1087 } \
1088 \
1089 /* Shift for the so far missing bytes. */ \
1090 ch <<= cnt * 6; \
1091 \
1092 /* Store the value. */ \
1093 state->__value.__wch = ch; \
1094 }
1095
1096 #define UNPACK_BYTES \
1097 { \
1098 wint_t wch = state->__value.__wch; \
1099 size_t ntotal; \
1100 inlen = state->__count; \
1101 \
1102 if (state->__value.__wch <= 0x7ff) \
1103 { \
1104 bytebuf[0] = 0xc0; \
1105 ntotal = 2; \
1106 } \
1107 else if (__builtin_expect (state->__value.__wch, 0) <= 0xffff) \
1108 { \
1109 bytebuf[0] = 0xe0; \
1110 ntotal = 3; \
1111 } \
1112 else if (__builtin_expect (state->__value.__wch, 0) <= 0x1fffff) \
1113 { \
1114 bytebuf[0] = 0xf0; \
1115 ntotal = 4; \
1116 } \
1117 else if (__builtin_expect (state->__value.__wch, 0) <= 0x3ffffff) \
1118 { \
1119 bytebuf[0] = 0xf8; \
1120 ntotal = 5; \
1121 } \
1122 else \
1123 { \
1124 bytebuf[0] = 0xfc; \
1125 ntotal = 6; \
1126 } \
1127 \
1128 do \
1129 { \
1130 if (--ntotal < inlen) \
1131 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1132 wch >>= 6; \
1133 } \
1134 while (ntotal > 1); \
1135 \
1136 bytebuf[0] |= wch; \
1137 }
1138
1139 #include <iconv/loop.c>
1140 #include <iconv/skeleton.c>
1141
1142
1143 /* Convert from UCS2 to the internal (UCS4-like) format. */
1144 #define DEFINE_INIT 0
1145 #define DEFINE_FINI 0
1146 #define MIN_NEEDED_FROM 2
1147 #define MIN_NEEDED_TO 4
1148 #define FROM_DIRECTION 1
1149 #define FROM_LOOP ucs2_internal_loop
1150 #define TO_LOOP ucs2_internal_loop /* This is not used. */
1151 #define FUNCTION_NAME __gconv_transform_ucs2_internal
1152 #define ONE_DIRECTION 1
1153
1154 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1155 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1156 #define LOOPFCT FROM_LOOP
1157 #define BODY \
1158 { \
1159 uint16_t u1 = *((const uint16_t *) inptr); \
1160 \
1161 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1162 { \
1163 /* Surrogate characters in UCS-2 input are not valid. Reject \
1164 them. (Catching this here is not security relevant.) */ \
1165 if (! ignore_errors_p ()) \
1166 { \
1167 result = __GCONV_ILLEGAL_INPUT; \
1168 break; \
1169 } \
1170 inptr += 2; \
1171 ++*irreversible; \
1172 continue; \
1173 } \
1174 \
1175 *((uint32_t *) outptr)++ = u1; \
1176 inptr += 2; \
1177 }
1178 #define LOOP_NEED_FLAGS
1179 #include <iconv/loop.c>
1180 #include <iconv/skeleton.c>
1181
1182
1183 /* Convert from the internal (UCS4-like) format to UCS2. */
1184 #define DEFINE_INIT 0
1185 #define DEFINE_FINI 0
1186 #define MIN_NEEDED_FROM 4
1187 #define MIN_NEEDED_TO 2
1188 #define FROM_DIRECTION 1
1189 #define FROM_LOOP internal_ucs2_loop
1190 #define TO_LOOP internal_ucs2_loop /* This is not used. */
1191 #define FUNCTION_NAME __gconv_transform_internal_ucs2
1192 #define ONE_DIRECTION 1
1193
1194 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1195 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1196 #define LOOPFCT FROM_LOOP
1197 #define BODY \
1198 { \
1199 uint32_t val = *((const uint32_t *) inptr); \
1200 \
1201 if (__builtin_expect (val, 0) >= 0x10000) \
1202 { \
1203 UNICODE_TAG_HANDLER (val, 4); \
1204 STANDARD_ERR_HANDLER (4); \
1205 } \
1206 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1207 { \
1208 /* Surrogate characters in UCS-4 input are not valid. \
1209 We must catch this, because the UCS-2 output might be \
1210 interpreted as UTF-16 by other programs. If we let \
1211 surrogates pass through, attackers could make a security \
1212 hole exploit by synthesizing any desired plane 1-16 \
1213 character. */ \
1214 if (! ignore_errors_p ()) \
1215 { \
1216 result = __GCONV_ILLEGAL_INPUT; \
1217 break; \
1218 } \
1219 inptr += 4; \
1220 ++*irreversible; \
1221 continue; \
1222 } \
1223 else \
1224 { \
1225 *((uint16_t *) outptr)++ = val; \
1226 inptr += 4; \
1227 } \
1228 }
1229 #define LOOP_NEED_FLAGS
1230 #include <iconv/loop.c>
1231 #include <iconv/skeleton.c>
1232
1233
1234 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1235 #define DEFINE_INIT 0
1236 #define DEFINE_FINI 0
1237 #define MIN_NEEDED_FROM 2
1238 #define MIN_NEEDED_TO 4
1239 #define FROM_DIRECTION 1
1240 #define FROM_LOOP ucs2reverse_internal_loop
1241 #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
1242 #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
1243 #define ONE_DIRECTION 1
1244
1245 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1246 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1247 #define LOOPFCT FROM_LOOP
1248 #define BODY \
1249 { \
1250 uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \
1251 \
1252 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1253 { \
1254 /* Surrogate characters in UCS-2 input are not valid. Reject \
1255 them. (Catching this here is not security relevant.) */ \
1256 if (! ignore_errors_p ()) \
1257 { \
1258 result = __GCONV_ILLEGAL_INPUT; \
1259 break; \
1260 } \
1261 inptr += 2; \
1262 ++*irreversible; \
1263 continue; \
1264 } \
1265 \
1266 *((uint32_t *) outptr)++ = u1; \
1267 inptr += 2; \
1268 }
1269 #define LOOP_NEED_FLAGS
1270 #include <iconv/loop.c>
1271 #include <iconv/skeleton.c>
1272
1273
1274 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1275 #define DEFINE_INIT 0
1276 #define DEFINE_FINI 0
1277 #define MIN_NEEDED_FROM 4
1278 #define MIN_NEEDED_TO 2
1279 #define FROM_DIRECTION 1
1280 #define FROM_LOOP internal_ucs2reverse_loop
1281 #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
1282 #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
1283 #define ONE_DIRECTION 1
1284
1285 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1286 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1287 #define LOOPFCT FROM_LOOP
1288 #define BODY \
1289 { \
1290 uint32_t val = *((const uint32_t *) inptr); \
1291 if (__builtin_expect (val, 0) >= 0x10000) \
1292 { \
1293 UNICODE_TAG_HANDLER (val, 4); \
1294 STANDARD_ERR_HANDLER (4); \
1295 } \
1296 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1297 { \
1298 /* Surrogate characters in UCS-4 input are not valid. \
1299 We must catch this, because the UCS-2 output might be \
1300 interpreted as UTF-16 by other programs. If we let \
1301 surrogates pass through, attackers could make a security \
1302 hole exploit by synthesizing any desired plane 1-16 \
1303 character. */ \
1304 if (! ignore_errors_p ()) \
1305 { \
1306 result = __GCONV_ILLEGAL_INPUT; \
1307 break; \
1308 } \
1309 inptr += 4; \
1310 ++*irreversible; \
1311 continue; \
1312 } \
1313 else \
1314 { \
1315 *((uint16_t *) outptr)++ = bswap_16 (val); \
1316 inptr += 4; \
1317 } \
1318 }
1319 #define LOOP_NEED_FLAGS
1320 #include <iconv/loop.c>
1321 #include <iconv/skeleton.c>