]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconv/gconv_simple.c
Update.
[thirdparty/glibc.git] / iconv / gconv_simple.c
CommitLineData
6973fc01 1/* Simple transformations functions.
ea31b613 2 Copyright (C) 1997-2002, 2003 Free Software Foundation, Inc.
6973fc01
UD
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
6973fc01
UD
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 14 Lesser General Public License for more details.
6973fc01 15
41bdb6e2
AJ
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
6973fc01 20
f1fa8b68 21#include <byteswap.h>
55985355 22#include <dlfcn.h>
f1fa8b68 23#include <endian.h>
f4017d20 24#include <errno.h>
6973fc01 25#include <gconv.h>
d2374599 26#include <stdint.h>
6973fc01
UD
27#include <stdlib.h>
28#include <string.h>
29#include <wchar.h>
30#include <sys/param.h>
f9ad060c 31#include <gconv_int.h>
6973fc01 32
17427edd 33#define BUILTIN_ALIAS(s1, s2) /* nothing */
f9ad060c
UD
34#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
35 MinF, MaxF, MinT, MaxT) \
17427edd
UD
36 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
37 __const unsigned char **, __const unsigned char *, \
38 unsigned char **, size_t *, int, int);
39#include "gconv_builtin.h"
40
41
a904b5d9
UD
42#ifndef EILSEQ
43# define EILSEQ EINVAL
44#endif
45
46
f9ad060c
UD
47/* Specialized conversion function for a single byte to INTERNAL, recognizing
48 only ASCII characters. */
49wint_t
50__gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
51{
52 if (c < 0x80)
53 return c;
54 else
55 return WEOF;
56}
57
58
f1fa8b68
UD
59/* Transform from the internal, UCS4-like format, to UCS4. The
60 difference between the internal ucs4 format and the real UCS4
61 format is, if any, the endianess. The Unicode/ISO 10646 says that
62 unless some higher protocol specifies it differently, the byte
63 order is big endian.*/
8619129f
UD
64#define DEFINE_INIT 0
65#define DEFINE_FINI 0
66#define MIN_NEEDED_FROM 4
67#define MIN_NEEDED_TO 4
68#define FROM_DIRECTION 1
69#define FROM_LOOP internal_ucs4_loop
70#define TO_LOOP internal_ucs4_loop /* This is not used. */
71#define FUNCTION_NAME __gconv_transform_internal_ucs4
72
73
74static inline int
dd9423a6 75__attribute ((always_inline))
55985355
UD
76internal_ucs4_loop (struct __gconv_step *step,
77 struct __gconv_step_data *step_data,
78 const unsigned char **inptrp, const unsigned char *inend,
8619129f 79 unsigned char **outptrp, unsigned char *outend,
38677ace 80 size_t *irreversible)
4bca4c17 81{
8619129f
UD
82 const unsigned char *inptr = *inptrp;
83 unsigned char *outptr = *outptrp;
84 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
4bca4c17
UD
85 int result;
86
f1fa8b68 87#if __BYTE_ORDER == __LITTLE_ENDIAN
8619129f
UD
88 /* Sigh, we have to do some real work. */
89 size_t cnt;
f1fa8b68 90
fdf19bf7 91 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
17427edd 92 *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr);
f1fa8b68 93
8619129f
UD
94 *inptrp = inptr;
95 *outptrp = outptr;
f1fa8b68 96#elif __BYTE_ORDER == __BIG_ENDIAN
8619129f
UD
97 /* Simply copy the data. */
98 *inptrp = inptr + n_convert * 4;
99 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
f1fa8b68
UD
100#else
101# error "This endianess is not supported."
102#endif
103
8619129f 104 /* Determine the status. */
1336419e 105 if (*inptrp == inend)
d64b6ad0 106 result = __GCONV_EMPTY_INPUT;
c4f66413 107 else if (*outptrp + 4 > outend)
1336419e 108 result = __GCONV_FULL_OUTPUT;
6973fc01 109 else
d64b6ad0 110 result = __GCONV_INCOMPLETE_INPUT;
6973fc01 111
f43ce637 112 return result;
6973fc01 113}
d2374599 114
c1db8b0d
UD
115#ifndef _STRING_ARCH_unaligned
116static inline int
dd9423a6 117__attribute ((always_inline))
55985355
UD
118internal_ucs4_loop_unaligned (struct __gconv_step *step,
119 struct __gconv_step_data *step_data,
120 const unsigned char **inptrp,
c1db8b0d
UD
121 const unsigned char *inend,
122 unsigned char **outptrp, unsigned char *outend,
38677ace 123 size_t *irreversible)
c1db8b0d
UD
124{
125 const unsigned char *inptr = *inptrp;
126 unsigned char *outptr = *outptrp;
127 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
128 int result;
129
130# if __BYTE_ORDER == __LITTLE_ENDIAN
131 /* Sigh, we have to do some real work. */
132 size_t cnt;
133
134 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
135 {
136 outptr[0] = inptr[3];
137 outptr[1] = inptr[2];
138 outptr[2] = inptr[1];
139 outptr[3] = inptr[0];
140 }
141
142 *inptrp = inptr;
143 *outptrp = outptr;
144# elif __BYTE_ORDER == __BIG_ENDIAN
145 /* Simply copy the data. */
146 *inptrp = inptr + n_convert * 4;
147 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
148# else
149# error "This endianess is not supported."
150# endif
151
152 /* Determine the status. */
eacde9d0 153 if (*inptrp == inend)
c1db8b0d 154 result = __GCONV_EMPTY_INPUT;
eacde9d0
UD
155 else if (*outptrp + 4 > outend)
156 result = __GCONV_FULL_OUTPUT;
c1db8b0d
UD
157 else
158 result = __GCONV_INCOMPLETE_INPUT;
159
160 return result;
161}
162#endif
163
fd1b5c0f
UD
164
165static inline int
dd9423a6 166__attribute ((always_inline))
55985355
UD
167internal_ucs4_loop_single (struct __gconv_step *step,
168 struct __gconv_step_data *step_data,
169 const unsigned char **inptrp,
fd1b5c0f
UD
170 const unsigned char *inend,
171 unsigned char **outptrp, unsigned char *outend,
38677ace 172 size_t *irreversible)
fd1b5c0f 173{
55985355 174 mbstate_t *state = step_data->__statep;
fd1b5c0f
UD
175 size_t cnt = state->__count & 7;
176
177 while (*inptrp < inend && cnt < 4)
178 state->__value.__wchb[cnt++] = *(*inptrp)++;
179
db2d05f9 180 if (__builtin_expect (cnt < 4, 0))
fd1b5c0f
UD
181 {
182 /* Still not enough bytes. Store the ones in the input buffer. */
183 state->__count &= ~7;
184 state->__count |= cnt;
185
186 return __GCONV_INCOMPLETE_INPUT;
187 }
188
189#if __BYTE_ORDER == __LITTLE_ENDIAN
190 (*outptrp)[0] = state->__value.__wchb[3];
191 (*outptrp)[1] = state->__value.__wchb[2];
192 (*outptrp)[2] = state->__value.__wchb[1];
193 (*outptrp)[3] = state->__value.__wchb[0];
85830c4c
UD
194
195 *outptrp += 4;
fd1b5c0f
UD
196#elif __BYTE_ORDER == __BIG_ENDIAN
197 /* XXX unaligned */
198 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
199#else
200# error "This endianess is not supported."
201#endif
202
203 /* Clear the state buffer. */
204 state->__count &= ~7;
205
206 return __GCONV_OK;
207}
208
8619129f 209#include <iconv/skeleton.c>
d2374599 210
d2374599 211
4a069c33
UD
212/* Transform from UCS4 to the internal, UCS4-like format. Unlike
213 for the other direction we have to check for correct values here. */
214#define DEFINE_INIT 0
215#define DEFINE_FINI 0
216#define MIN_NEEDED_FROM 4
217#define MIN_NEEDED_TO 4
218#define FROM_DIRECTION 1
219#define FROM_LOOP ucs4_internal_loop
220#define TO_LOOP ucs4_internal_loop /* This is not used. */
221#define FUNCTION_NAME __gconv_transform_ucs4_internal
222
223
224static inline int
dd9423a6 225__attribute ((always_inline))
55985355
UD
226ucs4_internal_loop (struct __gconv_step *step,
227 struct __gconv_step_data *step_data,
228 const unsigned char **inptrp, const unsigned char *inend,
4a069c33 229 unsigned char **outptrp, unsigned char *outend,
38677ace 230 size_t *irreversible)
4a069c33 231{
55985355 232 int flags = step_data->__flags;
4a069c33
UD
233 const unsigned char *inptr = *inptrp;
234 unsigned char *outptr = *outptrp;
235 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
236 int result;
237 size_t cnt;
238
239 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
240 {
241 uint32_t inval;
242
243#if __BYTE_ORDER == __LITTLE_ENDIAN
17427edd 244 inval = bswap_32 (*(const uint32_t *) inptr);
4a069c33 245#else
17427edd 246 inval = *(const uint32_t *) inptr;
4a069c33
UD
247#endif
248
db2d05f9 249 if (__builtin_expect (inval > 0x7fffffff, 0))
4a069c33 250 {
55985355
UD
251 /* The value is too large. We don't try transliteration here since
252 this is not an error because of the lack of possibilities to
253 represent the result. This is a genuine bug in the input since
254 UCS4 does not allow such values. */
0cdb4983
UD
255 if (irreversible == NULL)
256 /* We are transliterating, don't try to correct anything. */
257 return __GCONV_ILLEGAL_INPUT;
258
85830c4c
UD
259 if (flags & __GCONV_IGNORE_ERRORS)
260 {
261 /* Just ignore this character. */
38677ace 262 ++*irreversible;
85830c4c
UD
263 continue;
264 }
265
4a069c33
UD
266 *inptrp = inptr;
267 *outptrp = outptr;
268 return __GCONV_ILLEGAL_INPUT;
269 }
270
3593973b 271 *((uint32_t *) outptr)++ = inval;
4a069c33
UD
272 }
273
274 *inptrp = inptr;
275 *outptrp = outptr;
276
277 /* Determine the status. */
fc08075d 278 if (*inptrp == inend)
4a069c33 279 result = __GCONV_EMPTY_INPUT;
c4f66413 280 else if (*outptrp + 4 > outend)
fc08075d 281 result = __GCONV_FULL_OUTPUT;
4a069c33
UD
282 else
283 result = __GCONV_INCOMPLETE_INPUT;
284
285 return result;
286}
287
288#ifndef _STRING_ARCH_unaligned
289static inline int
dd9423a6 290__attribute ((always_inline))
55985355
UD
291ucs4_internal_loop_unaligned (struct __gconv_step *step,
292 struct __gconv_step_data *step_data,
293 const unsigned char **inptrp,
4a069c33
UD
294 const unsigned char *inend,
295 unsigned char **outptrp, unsigned char *outend,
38677ace 296 size_t *irreversible)
4a069c33 297{
55985355 298 int flags = step_data->__flags;
4a069c33
UD
299 const unsigned char *inptr = *inptrp;
300 unsigned char *outptr = *outptrp;
301 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
302 int result;
303 size_t cnt;
304
55985355 305 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
4a069c33 306 {
db2d05f9 307 if (__builtin_expect (inptr[0] > 0x80, 0))
4a069c33 308 {
55985355
UD
309 /* The value is too large. We don't try transliteration here since
310 this is not an error because of the lack of possibilities to
311 represent the result. This is a genuine bug in the input since
312 UCS4 does not allow such values. */
0cdb4983
UD
313 if (irreversible == NULL)
314 /* We are transliterating, don't try to correct anything. */
315 return __GCONV_ILLEGAL_INPUT;
316
85830c4c
UD
317 if (flags & __GCONV_IGNORE_ERRORS)
318 {
319 /* Just ignore this character. */
38677ace 320 ++*irreversible;
85830c4c
UD
321 continue;
322 }
323
4a069c33
UD
324 *inptrp = inptr;
325 *outptrp = outptr;
9ea2c194 326 return __GCONV_ILLEGAL_INPUT;
4a069c33
UD
327 }
328
4a069c33
UD
329# if __BYTE_ORDER == __LITTLE_ENDIAN
330 outptr[3] = inptr[0];
331 outptr[2] = inptr[1];
332 outptr[1] = inptr[2];
333 outptr[0] = inptr[3];
334# else
335 outptr[0] = inptr[0];
336 outptr[1] = inptr[1];
337 outptr[2] = inptr[2];
338 outptr[3] = inptr[3];
339# endif
55985355 340 outptr += 4;
4a069c33
UD
341 }
342
343 *inptrp = inptr;
344 *outptrp = outptr;
345
346 /* Determine the status. */
fc08075d 347 if (*inptrp == inend)
4a069c33 348 result = __GCONV_EMPTY_INPUT;
c4f66413 349 else if (*outptrp + 4 > outend)
fc08075d 350 result = __GCONV_FULL_OUTPUT;
4a069c33
UD
351 else
352 result = __GCONV_INCOMPLETE_INPUT;
353
354 return result;
355}
356#endif
357
358
359static inline int
dd9423a6 360__attribute ((always_inline))
55985355
UD
361ucs4_internal_loop_single (struct __gconv_step *step,
362 struct __gconv_step_data *step_data,
363 const unsigned char **inptrp,
4a069c33
UD
364 const unsigned char *inend,
365 unsigned char **outptrp, unsigned char *outend,
38677ace 366 size_t *irreversible)
4a069c33 367{
55985355
UD
368 mbstate_t *state = step_data->__statep;
369 int flags = step_data->__flags;
4a069c33
UD
370 size_t cnt = state->__count & 7;
371
372 while (*inptrp < inend && cnt < 4)
373 state->__value.__wchb[cnt++] = *(*inptrp)++;
374
db2d05f9 375 if (__builtin_expect (cnt < 4, 0))
4a069c33
UD
376 {
377 /* Still not enough bytes. Store the ones in the input buffer. */
378 state->__count &= ~7;
379 state->__count |= cnt;
380
381 return __GCONV_INCOMPLETE_INPUT;
382 }
383
db2d05f9
UD
384 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
385 0))
85830c4c 386 {
55985355
UD
387 /* The value is too large. We don't try transliteration here since
388 this is not an error because of the lack of possibilities to
389 represent the result. This is a genuine bug in the input since
390 UCS4 does not allow such values. */
85830c4c 391 if (!(flags & __GCONV_IGNORE_ERRORS))
3593973b
UD
392 {
393 *inptrp -= cnt - (state->__count & 7);
394 return __GCONV_ILLEGAL_INPUT;
395 }
85830c4c
UD
396 }
397 else
398 {
4a069c33 399#if __BYTE_ORDER == __LITTLE_ENDIAN
85830c4c
UD
400 (*outptrp)[0] = state->__value.__wchb[3];
401 (*outptrp)[1] = state->__value.__wchb[2];
402 (*outptrp)[2] = state->__value.__wchb[1];
403 (*outptrp)[3] = state->__value.__wchb[0];
4a069c33 404#elif __BYTE_ORDER == __BIG_ENDIAN
85830c4c
UD
405 (*outptrp)[0] = state->__value.__wchb[0];
406 (*outptrp)[1] = state->__value.__wchb[1];
407 (*outptrp)[2] = state->__value.__wchb[2];
408 (*outptrp)[3] = state->__value.__wchb[3];
4a069c33
UD
409#endif
410
85830c4c
UD
411 *outptrp += 4;
412 }
413
4a069c33
UD
414 /* Clear the state buffer. */
415 state->__count &= ~7;
416
417 return __GCONV_OK;
418}
419
420#include <iconv/skeleton.c>
421
422
423/* Similarly for the little endian form. */
8d617a71
UD
424#define DEFINE_INIT 0
425#define DEFINE_FINI 0
426#define MIN_NEEDED_FROM 4
427#define MIN_NEEDED_TO 4
428#define FROM_DIRECTION 1
429#define FROM_LOOP internal_ucs4le_loop
430#define TO_LOOP internal_ucs4le_loop /* This is not used. */
431#define FUNCTION_NAME __gconv_transform_internal_ucs4le
432
433
434static inline int
dd9423a6 435__attribute ((always_inline))
55985355
UD
436internal_ucs4le_loop (struct __gconv_step *step,
437 struct __gconv_step_data *step_data,
438 const unsigned char **inptrp, const unsigned char *inend,
8d617a71 439 unsigned char **outptrp, unsigned char *outend,
38677ace 440 size_t *irreversible)
8d617a71
UD
441{
442 const unsigned char *inptr = *inptrp;
443 unsigned char *outptr = *outptrp;
444 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
445 int result;
446
447#if __BYTE_ORDER == __BIG_ENDIAN
448 /* Sigh, we have to do some real work. */
449 size_t cnt;
450
451 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
17427edd 452 *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr);
8d617a71
UD
453
454 *inptrp = inptr;
455 *outptrp = outptr;
456#elif __BYTE_ORDER == __LITTLE_ENDIAN
457 /* Simply copy the data. */
458 *inptrp = inptr + n_convert * 4;
459 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
460#else
461# error "This endianess is not supported."
462#endif
463
464 /* Determine the status. */
fc08075d 465 if (*inptrp == inend)
8d617a71 466 result = __GCONV_EMPTY_INPUT;
c4f66413 467 else if (*outptrp + 4 > outend)
fc08075d 468 result = __GCONV_FULL_OUTPUT;
8d617a71
UD
469 else
470 result = __GCONV_INCOMPLETE_INPUT;
471
8d617a71
UD
472 return result;
473}
474
c1db8b0d
UD
475#ifndef _STRING_ARCH_unaligned
476static inline int
dd9423a6 477__attribute ((always_inline))
55985355
UD
478internal_ucs4le_loop_unaligned (struct __gconv_step *step,
479 struct __gconv_step_data *step_data,
480 const unsigned char **inptrp,
c1db8b0d
UD
481 const unsigned char *inend,
482 unsigned char **outptrp, unsigned char *outend,
38677ace 483 size_t *irreversible)
c1db8b0d
UD
484{
485 const unsigned char *inptr = *inptrp;
486 unsigned char *outptr = *outptrp;
487 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
488 int result;
489
490# if __BYTE_ORDER == __BIG_ENDIAN
491 /* Sigh, we have to do some real work. */
492 size_t cnt;
493
3593973b 494 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
c1db8b0d
UD
495 {
496 outptr[0] = inptr[3];
497 outptr[1] = inptr[2];
498 outptr[2] = inptr[1];
499 outptr[3] = inptr[0];
500 }
501
502 *inptrp = inptr;
503 *outptrp = outptr;
504# elif __BYTE_ORDER == __LITTLE_ENDIAN
505 /* Simply copy the data. */
506 *inptrp = inptr + n_convert * 4;
507 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
508# else
509# error "This endianess is not supported."
510# endif
511
512 /* Determine the status. */
eb9dc2a2 513 if (*inptrp == inend)
c1db8b0d 514 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 515 else if (*inptrp + 4 > inend)
c1db8b0d 516 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
517 else
518 {
519 assert (*outptrp + 4 > outend);
520 result = __GCONV_FULL_OUTPUT;
521 }
c1db8b0d
UD
522
523 return result;
524}
525#endif
526
fd1b5c0f
UD
527
528static inline int
dd9423a6 529__attribute ((always_inline))
55985355
UD
530internal_ucs4le_loop_single (struct __gconv_step *step,
531 struct __gconv_step_data *step_data,
532 const unsigned char **inptrp,
fd1b5c0f
UD
533 const unsigned char *inend,
534 unsigned char **outptrp, unsigned char *outend,
38677ace 535 size_t *irreversible)
fd1b5c0f 536{
55985355 537 mbstate_t *state = step_data->__statep;
fd1b5c0f
UD
538 size_t cnt = state->__count & 7;
539
540 while (*inptrp < inend && cnt < 4)
541 state->__value.__wchb[cnt++] = *(*inptrp)++;
542
db2d05f9 543 if (__builtin_expect (cnt < 4, 0))
fd1b5c0f
UD
544 {
545 /* Still not enough bytes. Store the ones in the input buffer. */
546 state->__count &= ~7;
547 state->__count |= cnt;
548
549 return __GCONV_INCOMPLETE_INPUT;
550 }
551
552#if __BYTE_ORDER == __BIG_ENDIAN
553 (*outptrp)[0] = state->__value.__wchb[3];
554 (*outptrp)[1] = state->__value.__wchb[2];
555 (*outptrp)[2] = state->__value.__wchb[1];
556 (*outptrp)[3] = state->__value.__wchb[0];
85830c4c
UD
557
558 *outptrp += 4;
fd1b5c0f
UD
559#else
560 /* XXX unaligned */
561 *(*((uint32_t **) outptrp)++) = state->__value.__wch;
562#endif
563
564 /* Clear the state buffer. */
565 state->__count &= ~7;
566
567 return __GCONV_OK;
568}
569
8d617a71
UD
570#include <iconv/skeleton.c>
571
572
4a069c33
UD
573/* And finally from UCS4-LE to the internal encoding. */
574#define DEFINE_INIT 0
575#define DEFINE_FINI 0
576#define MIN_NEEDED_FROM 4
577#define MIN_NEEDED_TO 4
578#define FROM_DIRECTION 1
579#define FROM_LOOP ucs4le_internal_loop
580#define TO_LOOP ucs4le_internal_loop /* This is not used. */
581#define FUNCTION_NAME __gconv_transform_ucs4le_internal
582
583
584static inline int
dd9423a6 585__attribute ((always_inline))
55985355
UD
586ucs4le_internal_loop (struct __gconv_step *step,
587 struct __gconv_step_data *step_data,
588 const unsigned char **inptrp, const unsigned char *inend,
4a069c33 589 unsigned char **outptrp, unsigned char *outend,
38677ace 590 size_t *irreversible)
4a069c33 591{
55985355 592 int flags = step_data->__flags;
4a069c33
UD
593 const unsigned char *inptr = *inptrp;
594 unsigned char *outptr = *outptrp;
595 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
596 int result;
597 size_t cnt;
598
599 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
600 {
601 uint32_t inval;
602
603#if __BYTE_ORDER == __BIG_ENDIAN
17427edd 604 inval = bswap_32 (*(const uint32_t *) inptr);
4a069c33 605#else
17427edd 606 inval = *(const uint32_t *) inptr;
4a069c33
UD
607#endif
608
db2d05f9 609 if (__builtin_expect (inval > 0x7fffffff, 0))
85830c4c 610 {
55985355
UD
611 /* The value is too large. We don't try transliteration here since
612 this is not an error because of the lack of possibilities to
613 represent the result. This is a genuine bug in the input since
614 UCS4 does not allow such values. */
0cdb4983
UD
615 if (irreversible == NULL)
616 /* We are transliterating, don't try to correct anything. */
617 return __GCONV_ILLEGAL_INPUT;
618
85830c4c
UD
619 if (flags & __GCONV_IGNORE_ERRORS)
620 {
621 /* Just ignore this character. */
38677ace 622 ++*irreversible;
85830c4c
UD
623 continue;
624 }
625
626 return __GCONV_ILLEGAL_INPUT;
627 }
4a069c33 628
3593973b 629 *((uint32_t *) outptr)++ = inval;
4a069c33
UD
630 }
631
632 *inptrp = inptr;
633 *outptrp = outptr;
634
635 /* Determine the status. */
fc08075d 636 if (*inptrp == inend)
4a069c33 637 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 638 else if (*inptrp + 4 > inend)
4a069c33 639 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
640 else
641 {
642 assert (*outptrp + 4 > outend);
643 result = __GCONV_FULL_OUTPUT;
644 }
4a069c33
UD
645
646 return result;
647}
648
649#ifndef _STRING_ARCH_unaligned
650static inline int
dd9423a6 651__attribute ((always_inline))
55985355
UD
652ucs4le_internal_loop_unaligned (struct __gconv_step *step,
653 struct __gconv_step_data *step_data,
654 const unsigned char **inptrp,
4a069c33
UD
655 const unsigned char *inend,
656 unsigned char **outptrp, unsigned char *outend,
38677ace 657 size_t *irreversible)
4a069c33 658{
55985355 659 int flags = step_data->__flags;
4a069c33
UD
660 const unsigned char *inptr = *inptrp;
661 unsigned char *outptr = *outptrp;
662 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
663 int result;
664 size_t cnt;
665
666 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
667 {
db2d05f9 668 if (__builtin_expect (inptr[3] > 0x80, 0))
4a069c33 669 {
55985355
UD
670 /* The value is too large. We don't try transliteration here since
671 this is not an error because of the lack of possibilities to
672 represent the result. This is a genuine bug in the input since
673 UCS4 does not allow such values. */
0cdb4983
UD
674 if (irreversible == NULL)
675 /* We are transliterating, don't try to correct anything. */
676 return __GCONV_ILLEGAL_INPUT;
677
85830c4c
UD
678 if (flags & __GCONV_IGNORE_ERRORS)
679 {
680 /* Just ignore this character. */
38677ace 681 ++*irreversible;
85830c4c
UD
682 continue;
683 }
684
4a069c33
UD
685 *inptrp = inptr;
686 *outptrp = outptr;
9ea2c194 687 return __GCONV_ILLEGAL_INPUT;
4a069c33
UD
688 }
689
4a069c33
UD
690# if __BYTE_ORDER == __BIG_ENDIAN
691 outptr[3] = inptr[0];
692 outptr[2] = inptr[1];
693 outptr[1] = inptr[2];
694 outptr[0] = inptr[3];
695# else
696 outptr[0] = inptr[0];
697 outptr[1] = inptr[1];
698 outptr[2] = inptr[2];
699 outptr[3] = inptr[3];
700# endif
85830c4c
UD
701
702 outptr += 4;
4a069c33
UD
703 }
704
705 *inptrp = inptr;
706 *outptrp = outptr;
707
708 /* Determine the status. */
fc08075d 709 if (*inptrp == inend)
4a069c33 710 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 711 else if (*inptrp + 4 > inend)
4a069c33 712 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
713 else
714 {
715 assert (*outptrp + 4 > outend);
716 result = __GCONV_FULL_OUTPUT;
717 }
4a069c33
UD
718
719 return result;
720}
721#endif
722
723
724static inline int
dd9423a6 725__attribute ((always_inline))
55985355
UD
726ucs4le_internal_loop_single (struct __gconv_step *step,
727 struct __gconv_step_data *step_data,
728 const unsigned char **inptrp,
4a069c33
UD
729 const unsigned char *inend,
730 unsigned char **outptrp, unsigned char *outend,
38677ace 731 size_t *irreversible)
4a069c33 732{
55985355
UD
733 mbstate_t *state = step_data->__statep;
734 int flags = step_data->__flags;
4a069c33
UD
735 size_t cnt = state->__count & 7;
736
737 while (*inptrp < inend && cnt < 4)
738 state->__value.__wchb[cnt++] = *(*inptrp)++;
739
db2d05f9 740 if (__builtin_expect (cnt < 4, 0))
4a069c33
UD
741 {
742 /* Still not enough bytes. Store the ones in the input buffer. */
743 state->__count &= ~7;
744 state->__count |= cnt;
745
746 return __GCONV_INCOMPLETE_INPUT;
747 }
748
db2d05f9
UD
749 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
750 0))
85830c4c 751 {
55985355
UD
752 /* The value is too large. We don't try transliteration here since
753 this is not an error because of the lack of possibilities to
754 represent the result. This is a genuine bug in the input since
755 UCS4 does not allow such values. */
85830c4c
UD
756 if (!(flags & __GCONV_IGNORE_ERRORS))
757 return __GCONV_ILLEGAL_INPUT;
758 }
759 else
760 {
4a069c33 761#if __BYTE_ORDER == __BIG_ENDIAN
85830c4c
UD
762 (*outptrp)[0] = state->__value.__wchb[3];
763 (*outptrp)[1] = state->__value.__wchb[2];
764 (*outptrp)[2] = state->__value.__wchb[1];
765 (*outptrp)[3] = state->__value.__wchb[0];
9ea2c194 766#else
85830c4c
UD
767 (*outptrp)[0] = state->__value.__wchb[0];
768 (*outptrp)[1] = state->__value.__wchb[1];
769 (*outptrp)[2] = state->__value.__wchb[2];
770 (*outptrp)[3] = state->__value.__wchb[3];
4a069c33
UD
771#endif
772
85830c4c
UD
773 *outptrp += 4;
774 }
775
4a069c33
UD
776 /* Clear the state buffer. */
777 state->__count &= ~7;
778
779 return __GCONV_OK;
780}
781
782#include <iconv/skeleton.c>
783
784
8619129f
UD
785/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
786#define DEFINE_INIT 0
787#define DEFINE_FINI 0
788#define MIN_NEEDED_FROM 1
789#define MIN_NEEDED_TO 4
790#define FROM_DIRECTION 1
791#define FROM_LOOP ascii_internal_loop
792#define TO_LOOP ascii_internal_loop /* This is not used. */
793#define FUNCTION_NAME __gconv_transform_ascii_internal
fd1b5c0f 794#define ONE_DIRECTION 1
8619129f
UD
795
796#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
797#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
798#define LOOPFCT FROM_LOOP
799#define BODY \
800 { \
db2d05f9 801 if (__builtin_expect (*inptr > '\x7f', 0)) \
8619129f 802 { \
55985355
UD
803 /* The value is too large. We don't try transliteration here since \
804 this is not an error because of the lack of possibilities to \
805 represent the result. This is a genuine bug in the input since \
806 ASCII does not allow such values. */ \
e438a468 807 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
85830c4c
UD
808 } \
809 else \
810 /* It's an one byte sequence. */ \
85830c4c 811 *((uint32_t *) outptr)++ = *inptr++; \
8619129f 812 }
55985355 813#define LOOP_NEED_FLAGS
8619129f
UD
814#include <iconv/loop.c>
815#include <iconv/skeleton.c>
816
817
818/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
819#define DEFINE_INIT 0
820#define DEFINE_FINI 0
821#define MIN_NEEDED_FROM 4
822#define MIN_NEEDED_TO 1
823#define FROM_DIRECTION 1
824#define FROM_LOOP internal_ascii_loop
825#define TO_LOOP internal_ascii_loop /* This is not used. */
826#define FUNCTION_NAME __gconv_transform_internal_ascii
fd1b5c0f 827#define ONE_DIRECTION 1
8619129f
UD
828
829#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
830#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
831#define LOOPFCT FROM_LOOP
832#define BODY \
833 { \
db2d05f9 834 if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
8619129f 835 { \
601d2942 836 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
e438a468 837 STANDARD_TO_LOOP_ERR_HANDLER (4); \
85830c4c
UD
838 } \
839 else \
840 /* It's an one byte sequence. */ \
17427edd 841 *outptr++ = *((const uint32_t *) inptr)++; \
8619129f 842 }
55985355 843#define LOOP_NEED_FLAGS
8619129f
UD
844#include <iconv/loop.c>
845#include <iconv/skeleton.c>
846
847
848/* Convert from the internal (UCS4-like) format to UTF-8. */
849#define DEFINE_INIT 0
850#define DEFINE_FINI 0
851#define MIN_NEEDED_FROM 4
852#define MIN_NEEDED_TO 1
853#define MAX_NEEDED_TO 6
854#define FROM_DIRECTION 1
855#define FROM_LOOP internal_utf8_loop
856#define TO_LOOP internal_utf8_loop /* This is not used. */
857#define FUNCTION_NAME __gconv_transform_internal_utf8
fd1b5c0f 858#define ONE_DIRECTION 1
8619129f
UD
859
860#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
861#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
5aa8ff62 862#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
8619129f
UD
863#define LOOPFCT FROM_LOOP
864#define BODY \
865 { \
17427edd 866 uint32_t wc = *((const uint32_t *) inptr); \
8619129f 867 \
8619129f
UD
868 if (wc < 0x80) \
869 /* It's an one byte sequence. */ \
870 *outptr++ = (unsigned char) wc; \
db2d05f9 871 else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
8619129f
UD
872 { \
873 size_t step; \
874 char *start; \
875 \
876 for (step = 2; step < 6; ++step) \
b79f74cd 877 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
8619129f
UD
878 break; \
879 \
365afefc 880 if (__builtin_expect (outptr + step > outend, 0)) \
8619129f
UD
881 { \
882 /* Too long. */ \
d64b6ad0 883 result = __GCONV_FULL_OUTPUT; \
8619129f
UD
884 break; \
885 } \
886 \
887 start = outptr; \
b79f74cd 888 *outptr = (unsigned char) (~0xff >> step); \
8619129f
UD
889 outptr += step; \
890 --step; \
891 do \
892 { \
893 start[step] = 0x80 | (wc & 0x3f); \
894 wc >>= 6; \
895 } \
896 while (--step > 0); \
897 start[0] |= wc; \
db2d05f9
UD
898 } \
899 else \
900 { \
e438a468 901 STANDARD_TO_LOOP_ERR_HANDLER (4); \
8619129f
UD
902 } \
903 \
904 inptr += 4; \
905 }
db2d05f9 906#define LOOP_NEED_FLAGS
8619129f
UD
907#include <iconv/loop.c>
908#include <iconv/skeleton.c>
909
910
911/* Convert from UTF-8 to the internal (UCS4-like) format. */
912#define DEFINE_INIT 0
913#define DEFINE_FINI 0
914#define MIN_NEEDED_FROM 1
915#define MAX_NEEDED_FROM 6
916#define MIN_NEEDED_TO 4
917#define FROM_DIRECTION 1
918#define FROM_LOOP utf8_internal_loop
919#define TO_LOOP utf8_internal_loop /* This is not used. */
920#define FUNCTION_NAME __gconv_transform_utf8_internal
fd1b5c0f 921#define ONE_DIRECTION 1
8619129f
UD
922
923#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
5aa8ff62 924#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
8619129f
UD
925#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
926#define LOOPFCT FROM_LOOP
927#define BODY \
928 { \
929 uint32_t ch; \
930 uint_fast32_t cnt; \
931 uint_fast32_t i; \
932 \
933 /* Next input byte. */ \
934 ch = *inptr; \
935 \
936 if (ch < 0x80) \
8619129f 937 { \
5aa8ff62
UD
938 /* One byte sequence. */ \
939 cnt = 1; \
940 ++inptr; \
8619129f
UD
941 } \
942 else \
943 { \
9ea2c194 944 if (ch >= 0xc2 && ch < 0xe0) \
5aa8ff62 945 { \
9ea2c194
AJ
946 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
947 otherwise the wide character could have been represented \
948 using a single byte. */ \
5aa8ff62
UD
949 cnt = 2; \
950 ch &= 0x1f; \
951 } \
db2d05f9 952 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
5aa8ff62
UD
953 { \
954 /* We expect three bytes. */ \
955 cnt = 3; \
956 ch &= 0x0f; \
957 } \
db2d05f9 958 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
5aa8ff62
UD
959 { \
960 /* We expect four bytes. */ \
961 cnt = 4; \
962 ch &= 0x07; \
963 } \
db2d05f9 964 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
5aa8ff62
UD
965 { \
966 /* We expect five bytes. */ \
967 cnt = 5; \
968 ch &= 0x03; \
969 } \
db2d05f9 970 else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
5aa8ff62
UD
971 { \
972 /* We expect six bytes. */ \
973 cnt = 6; \
974 ch &= 0x01; \
975 } \
976 else \
8619129f 977 { \
85830c4c
UD
978 int skipped; \
979 \
85830c4c
UD
980 /* Search the end of this ill-formed UTF-8 character. This \
981 is the next byte with (x & 0xc0) != 0x80. */ \
e438a468
UD
982 skipped = 0; \
983 do \
984 ++skipped; \
985 while (inptr + skipped < inend \
986 && (*(inptr + skipped) & 0xc0) == 0x80 \
987 && skipped < 5); \
85830c4c 988 \
e438a468 989 STANDARD_FROM_LOOP_ERR_HANDLER (skipped); \
8619129f
UD
990 } \
991 \
0cdb4983 992 if (__builtin_expect (inptr + cnt > inend, 0)) \
5aa8ff62 993 { \
fd1b5c0f
UD
994 /* We don't have enough input. But before we report that check \
995 that all the bytes are correct. */ \
996 for (i = 1; inptr + i < inend; ++i) \
997 if ((inptr[i] & 0xc0) != 0x80) \
998 break; \
85830c4c 999 \
365afefc 1000 if (__builtin_expect (inptr + i == inend, 1)) \
85830c4c
UD
1001 { \
1002 result = __GCONV_INCOMPLETE_INPUT; \
1003 break; \
1004 } \
1005 \
e438a468 1006 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
5aa8ff62
UD
1007 } \
1008 \
1009 /* Read the possible remaining bytes. */ \
1010 for (i = 1; i < cnt; ++i) \
1011 { \
1012 uint32_t byte = inptr[i]; \
1013 \
1014 if ((byte & 0xc0) != 0x80) \
bd32e4a6
UD
1015 /* This is an illegal encoding. */ \
1016 break; \
5aa8ff62
UD
1017 \
1018 ch <<= 6; \
1019 ch |= byte & 0x3f; \
1020 } \
85830c4c 1021 \
bd32e4a6
UD
1022 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1023 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1024 have been represented with fewer than cnt bytes. */ \
85830c4c 1025 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
bd32e4a6
UD
1026 { \
1027 /* This is an illegal encoding. */ \
e438a468 1028 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
bd32e4a6
UD
1029 } \
1030 \
5aa8ff62 1031 inptr += cnt; \
8619129f
UD
1032 } \
1033 \
1034 /* Now adjust the pointers and store the result. */ \
8619129f
UD
1035 *((uint32_t *) outptr)++ = ch; \
1036 }
55985355 1037#define LOOP_NEED_FLAGS
fd1b5c0f
UD
1038
1039#define STORE_REST \
1040 { \
1041 /* We store the remaining bytes while converting them into the UCS4 \
1042 format. We can assume that the first byte in the buffer is \
1043 correct and that it requires a larger number of bytes than there \
1044 are in the input buffer. */ \
1045 wint_t ch = **inptrp; \
ea31b613 1046 size_t cnt, r; \
fd1b5c0f
UD
1047 \
1048 state->__count = inend - *inptrp; \
1049 \
1050 if (ch >= 0xc2 && ch < 0xe0) \
1051 { \
1052 /* We expect two bytes. The first byte cannot be 0xc0 or \
1053 0xc1, otherwise the wide character could have been \
1054 represented using a single byte. */ \
1055 cnt = 2; \
1056 ch &= 0x1f; \
1057 } \
db2d05f9 1058 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
fd1b5c0f
UD
1059 { \
1060 /* We expect three bytes. */ \
1061 cnt = 3; \
1062 ch &= 0x0f; \
1063 } \
db2d05f9 1064 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
fd1b5c0f
UD
1065 { \
1066 /* We expect four bytes. */ \
1067 cnt = 4; \
1068 ch &= 0x07; \
1069 } \
db2d05f9 1070 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
fd1b5c0f
UD
1071 { \
1072 /* We expect five bytes. */ \
1073 cnt = 5; \
1074 ch &= 0x03; \
1075 } \
1076 else \
1077 { \
1078 /* We expect six bytes. */ \
1079 cnt = 6; \
1080 ch &= 0x01; \
1081 } \
1082 \
1083 /* The first byte is already consumed. */ \
ea31b613 1084 r = cnt - 1; \
fd1b5c0f
UD
1085 while (++(*inptrp) < inend) \
1086 { \
1087 ch <<= 6; \
1088 ch |= **inptrp & 0x3f; \
ea31b613 1089 --r; \
fd1b5c0f
UD
1090 } \
1091 \
1092 /* Shift for the so far missing bytes. */ \
ea31b613
UD
1093 ch <<= r * 6; \
1094 \
1095 /* Store the number of bytes expected for the entire sequence. */ \
41f112ad 1096 state->__count |= cnt << 8; \
fd1b5c0f
UD
1097 \
1098 /* Store the value. */ \
1099 state->__value.__wch = ch; \
1100 }
1101
1102#define UNPACK_BYTES \
1103 { \
ea31b613 1104 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
fd1b5c0f 1105 wint_t wch = state->__value.__wch; \
41f112ad 1106 size_t ntotal = state->__count >> 8; \
ea31b613 1107 \
41f112ad 1108 inlen = state->__count & 255; \
fd1b5c0f 1109 \
ea31b613 1110 bytebuf[0] = inmask[ntotal - 2]; \
fd1b5c0f 1111 \
cd201e38
UD
1112 do \
1113 { \
1114 if (--ntotal < inlen) \
1115 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1116 wch >>= 6; \
1117 } \
1118 while (ntotal > 1); \
fd1b5c0f
UD
1119 \
1120 bytebuf[0] |= wch; \
1121 }
1122
41f112ad
UD
1123#define CLEAR_STATE \
1124 state->__count = 0
1125
1126
8619129f
UD
1127#include <iconv/loop.c>
1128#include <iconv/skeleton.c>
1129
1130
1131/* Convert from UCS2 to the internal (UCS4-like) format. */
1132#define DEFINE_INIT 0
1133#define DEFINE_FINI 0
1134#define MIN_NEEDED_FROM 2
1135#define MIN_NEEDED_TO 4
1136#define FROM_DIRECTION 1
1137#define FROM_LOOP ucs2_internal_loop
1138#define TO_LOOP ucs2_internal_loop /* This is not used. */
1139#define FUNCTION_NAME __gconv_transform_ucs2_internal
fd1b5c0f 1140#define ONE_DIRECTION 1
8619129f
UD
1141
1142#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1143#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1144#define LOOPFCT FROM_LOOP
428bcea4 1145#define BODY \
755104ed 1146 { \
17427edd 1147 uint16_t u1 = *((const uint16_t *) inptr); \
755104ed
UD
1148 \
1149 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1150 { \
1151 /* Surrogate characters in UCS-2 input are not valid. Reject \
1152 them. (Catching this here is not security relevant.) */ \
e438a468 1153 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
755104ed
UD
1154 } \
1155 \
1156 *((uint32_t *) outptr)++ = u1; \
1157 inptr += 2; \
1158 }
1159#define LOOP_NEED_FLAGS
8619129f
UD
1160#include <iconv/loop.c>
1161#include <iconv/skeleton.c>
1162
1163
1164/* Convert from the internal (UCS4-like) format to UCS2. */
1165#define DEFINE_INIT 0
1166#define DEFINE_FINI 0
1167#define MIN_NEEDED_FROM 4
1168#define MIN_NEEDED_TO 2
1169#define FROM_DIRECTION 1
1170#define FROM_LOOP internal_ucs2_loop
1171#define TO_LOOP internal_ucs2_loop /* This is not used. */
1172#define FUNCTION_NAME __gconv_transform_internal_ucs2
fd1b5c0f 1173#define ONE_DIRECTION 1
8619129f
UD
1174
1175#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1176#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1177#define LOOPFCT FROM_LOOP
428bcea4 1178#define BODY \
8619129f 1179 { \
17427edd 1180 uint32_t val = *((const uint32_t *) inptr); \
755104ed 1181 \
db2d05f9 1182 if (__builtin_expect (val >= 0x10000, 0)) \
8619129f 1183 { \
601d2942 1184 UNICODE_TAG_HANDLER (val, 4); \
e438a468 1185 STANDARD_TO_LOOP_ERR_HANDLER (4); \
8619129f 1186 } \
755104ed
UD
1187 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1188 { \
1189 /* Surrogate characters in UCS-4 input are not valid. \
1190 We must catch this, because the UCS-2 output might be \
1191 interpreted as UTF-16 by other programs. If we let \
1192 surrogates pass through, attackers could make a security \
1193 hole exploit by synthesizing any desired plane 1-16 \
1194 character. */ \
e438a468 1195 result = __GCONV_ILLEGAL_INPUT; \
755104ed 1196 if (! ignore_errors_p ()) \
e438a468 1197 break; \
755104ed
UD
1198 inptr += 4; \
1199 ++*irreversible; \
1200 continue; \
1201 } \
9ea2c194 1202 else \
755104ed
UD
1203 { \
1204 *((uint16_t *) outptr)++ = val; \
1205 inptr += 4; \
1206 } \
8619129f 1207 }
55985355 1208#define LOOP_NEED_FLAGS
8619129f
UD
1209#include <iconv/loop.c>
1210#include <iconv/skeleton.c>
9b26f5c4
UD
1211
1212
428bcea4 1213/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
9b26f5c4
UD
1214#define DEFINE_INIT 0
1215#define DEFINE_FINI 0
1216#define MIN_NEEDED_FROM 2
1217#define MIN_NEEDED_TO 4
1218#define FROM_DIRECTION 1
428bcea4
UD
1219#define FROM_LOOP ucs2reverse_internal_loop
1220#define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
8d617a71 1221#define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
fd1b5c0f 1222#define ONE_DIRECTION 1
9b26f5c4
UD
1223
1224#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1225#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1226#define LOOPFCT FROM_LOOP
428bcea4 1227#define BODY \
755104ed 1228 { \
17427edd 1229 uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \
755104ed
UD
1230 \
1231 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1232 { \
1233 /* Surrogate characters in UCS-2 input are not valid. Reject \
1234 them. (Catching this here is not security relevant.) */ \
1235 if (! ignore_errors_p ()) \
1236 { \
1237 result = __GCONV_ILLEGAL_INPUT; \
1238 break; \
1239 } \
1240 inptr += 2; \
1241 ++*irreversible; \
1242 continue; \
1243 } \
1244 \
1245 *((uint32_t *) outptr)++ = u1; \
1246 inptr += 2; \
1247 }
1248#define LOOP_NEED_FLAGS
9b26f5c4
UD
1249#include <iconv/loop.c>
1250#include <iconv/skeleton.c>
1251
1252
428bcea4 1253/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
9b26f5c4
UD
1254#define DEFINE_INIT 0
1255#define DEFINE_FINI 0
1256#define MIN_NEEDED_FROM 4
1257#define MIN_NEEDED_TO 2
1258#define FROM_DIRECTION 1
428bcea4
UD
1259#define FROM_LOOP internal_ucs2reverse_loop
1260#define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
8d617a71 1261#define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
fd1b5c0f 1262#define ONE_DIRECTION 1
9b26f5c4
UD
1263
1264#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1265#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1266#define LOOPFCT FROM_LOOP
428bcea4 1267#define BODY \
9b26f5c4 1268 { \
17427edd 1269 uint32_t val = *((const uint32_t *) inptr); \
db2d05f9 1270 if (__builtin_expect (val >= 0x10000, 0)) \
9b26f5c4 1271 { \
601d2942 1272 UNICODE_TAG_HANDLER (val, 4); \
e438a468 1273 STANDARD_TO_LOOP_ERR_HANDLER (4); \
9b26f5c4 1274 } \
755104ed
UD
1275 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1276 { \
1277 /* Surrogate characters in UCS-4 input are not valid. \
1278 We must catch this, because the UCS-2 output might be \
1279 interpreted as UTF-16 by other programs. If we let \
1280 surrogates pass through, attackers could make a security \
1281 hole exploit by synthesizing any desired plane 1-16 \
1282 character. */ \
1283 if (! ignore_errors_p ()) \
1284 { \
1285 result = __GCONV_ILLEGAL_INPUT; \
1286 break; \
1287 } \
1288 inptr += 4; \
1289 ++*irreversible; \
1290 continue; \
1291 } \
9ea2c194 1292 else \
755104ed
UD
1293 { \
1294 *((uint16_t *) outptr)++ = bswap_16 (val); \
1295 inptr += 4; \
1296 } \
9b26f5c4 1297 }
55985355 1298#define LOOP_NEED_FLAGS
9b26f5c4
UD
1299#include <iconv/loop.c>
1300#include <iconv/skeleton.c>