]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconv/gconv_simple.c
* Banner: NPTL no longer has its own version number.
[thirdparty/glibc.git] / iconv / gconv_simple.c
CommitLineData
6973fc01 1/* Simple transformations functions.
cdda3d7d 2 Copyright (C) 1997-2003, 2004 Free Software Foundation, Inc.
6973fc01
UD
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
6973fc01
UD
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 14 Lesser General Public License for more details.
6973fc01 15
41bdb6e2
AJ
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
6973fc01 20
f1fa8b68 21#include <byteswap.h>
55985355 22#include <dlfcn.h>
f1fa8b68 23#include <endian.h>
f4017d20 24#include <errno.h>
6973fc01 25#include <gconv.h>
d2374599 26#include <stdint.h>
6973fc01
UD
27#include <stdlib.h>
28#include <string.h>
29#include <wchar.h>
30#include <sys/param.h>
f9ad060c 31#include <gconv_int.h>
6973fc01 32
17427edd 33#define BUILTIN_ALIAS(s1, s2) /* nothing */
f9ad060c
UD
34#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
35 MinF, MaxF, MinT, MaxT) \
17427edd
UD
36 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
37 __const unsigned char **, __const unsigned char *, \
38 unsigned char **, size_t *, int, int);
39#include "gconv_builtin.h"
40
41
a904b5d9
UD
42#ifndef EILSEQ
43# define EILSEQ EINVAL
44#endif
45
46
f9ad060c
UD
47/* Specialized conversion function for a single byte to INTERNAL, recognizing
48 only ASCII characters. */
49wint_t
50__gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
51{
52 if (c < 0x80)
53 return c;
54 else
55 return WEOF;
56}
57
58
f1fa8b68
UD
59/* Transform from the internal, UCS4-like format, to UCS4. The
60 difference between the internal ucs4 format and the real UCS4
61 format is, if any, the endianess. The Unicode/ISO 10646 says that
62 unless some higher protocol specifies it differently, the byte
63 order is big endian.*/
8619129f
UD
64#define DEFINE_INIT 0
65#define DEFINE_FINI 0
66#define MIN_NEEDED_FROM 4
67#define MIN_NEEDED_TO 4
68#define FROM_DIRECTION 1
69#define FROM_LOOP internal_ucs4_loop
70#define TO_LOOP internal_ucs4_loop /* This is not used. */
71#define FUNCTION_NAME __gconv_transform_internal_ucs4
72
73
74static inline int
dd9423a6 75__attribute ((always_inline))
55985355
UD
76internal_ucs4_loop (struct __gconv_step *step,
77 struct __gconv_step_data *step_data,
78 const unsigned char **inptrp, const unsigned char *inend,
8619129f 79 unsigned char **outptrp, unsigned char *outend,
38677ace 80 size_t *irreversible)
4bca4c17 81{
8619129f
UD
82 const unsigned char *inptr = *inptrp;
83 unsigned char *outptr = *outptrp;
84 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
4bca4c17
UD
85 int result;
86
f1fa8b68 87#if __BYTE_ORDER == __LITTLE_ENDIAN
8619129f
UD
88 /* Sigh, we have to do some real work. */
89 size_t cnt;
cdda3d7d 90 uint32_t *outptr32 = (uint32_t *) outptr;
f1fa8b68 91
fdf19bf7 92 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
cdda3d7d 93 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
f1fa8b68 94
8619129f 95 *inptrp = inptr;
cd5b5023 96 *outptrp = (unsigned char *) outptr32;
f1fa8b68 97#elif __BYTE_ORDER == __BIG_ENDIAN
8619129f
UD
98 /* Simply copy the data. */
99 *inptrp = inptr + n_convert * 4;
100 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
f1fa8b68
UD
101#else
102# error "This endianess is not supported."
103#endif
104
8619129f 105 /* Determine the status. */
1336419e 106 if (*inptrp == inend)
d64b6ad0 107 result = __GCONV_EMPTY_INPUT;
c4f66413 108 else if (*outptrp + 4 > outend)
1336419e 109 result = __GCONV_FULL_OUTPUT;
6973fc01 110 else
d64b6ad0 111 result = __GCONV_INCOMPLETE_INPUT;
6973fc01 112
f43ce637 113 return result;
6973fc01 114}
d2374599 115
c1db8b0d
UD
116#ifndef _STRING_ARCH_unaligned
117static inline int
dd9423a6 118__attribute ((always_inline))
55985355
UD
119internal_ucs4_loop_unaligned (struct __gconv_step *step,
120 struct __gconv_step_data *step_data,
121 const unsigned char **inptrp,
c1db8b0d
UD
122 const unsigned char *inend,
123 unsigned char **outptrp, unsigned char *outend,
38677ace 124 size_t *irreversible)
c1db8b0d
UD
125{
126 const unsigned char *inptr = *inptrp;
127 unsigned char *outptr = *outptrp;
128 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
129 int result;
130
131# if __BYTE_ORDER == __LITTLE_ENDIAN
132 /* Sigh, we have to do some real work. */
133 size_t cnt;
134
135 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
136 {
137 outptr[0] = inptr[3];
138 outptr[1] = inptr[2];
139 outptr[2] = inptr[1];
140 outptr[3] = inptr[0];
141 }
142
143 *inptrp = inptr;
144 *outptrp = outptr;
145# elif __BYTE_ORDER == __BIG_ENDIAN
146 /* Simply copy the data. */
147 *inptrp = inptr + n_convert * 4;
148 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
149# else
150# error "This endianess is not supported."
151# endif
152
153 /* Determine the status. */
eacde9d0 154 if (*inptrp == inend)
c1db8b0d 155 result = __GCONV_EMPTY_INPUT;
eacde9d0
UD
156 else if (*outptrp + 4 > outend)
157 result = __GCONV_FULL_OUTPUT;
c1db8b0d
UD
158 else
159 result = __GCONV_INCOMPLETE_INPUT;
160
161 return result;
162}
163#endif
164
fd1b5c0f
UD
165
166static inline int
dd9423a6 167__attribute ((always_inline))
55985355
UD
168internal_ucs4_loop_single (struct __gconv_step *step,
169 struct __gconv_step_data *step_data,
170 const unsigned char **inptrp,
fd1b5c0f
UD
171 const unsigned char *inend,
172 unsigned char **outptrp, unsigned char *outend,
38677ace 173 size_t *irreversible)
fd1b5c0f 174{
55985355 175 mbstate_t *state = step_data->__statep;
fd1b5c0f
UD
176 size_t cnt = state->__count & 7;
177
178 while (*inptrp < inend && cnt < 4)
179 state->__value.__wchb[cnt++] = *(*inptrp)++;
180
db2d05f9 181 if (__builtin_expect (cnt < 4, 0))
fd1b5c0f
UD
182 {
183 /* Still not enough bytes. Store the ones in the input buffer. */
184 state->__count &= ~7;
185 state->__count |= cnt;
186
187 return __GCONV_INCOMPLETE_INPUT;
188 }
189
190#if __BYTE_ORDER == __LITTLE_ENDIAN
191 (*outptrp)[0] = state->__value.__wchb[3];
192 (*outptrp)[1] = state->__value.__wchb[2];
193 (*outptrp)[2] = state->__value.__wchb[1];
194 (*outptrp)[3] = state->__value.__wchb[0];
85830c4c 195
fd1b5c0f
UD
196#elif __BYTE_ORDER == __BIG_ENDIAN
197 /* XXX unaligned */
cdda3d7d
AJ
198 (*outptrp)[0] = state->__value.__wchb[0];
199 (*outptrp)[1] = state->__value.__wchb[1];
200 (*outptrp)[2] = state->__value.__wchb[2];
201 (*outptrp)[3] = state->__value.__wchb[3];
fd1b5c0f
UD
202#else
203# error "This endianess is not supported."
204#endif
cdda3d7d 205 *outptrp += 4;
fd1b5c0f
UD
206
207 /* Clear the state buffer. */
208 state->__count &= ~7;
209
210 return __GCONV_OK;
211}
212
8619129f 213#include <iconv/skeleton.c>
d2374599 214
d2374599 215
4a069c33
UD
216/* Transform from UCS4 to the internal, UCS4-like format. Unlike
217 for the other direction we have to check for correct values here. */
218#define DEFINE_INIT 0
219#define DEFINE_FINI 0
220#define MIN_NEEDED_FROM 4
221#define MIN_NEEDED_TO 4
222#define FROM_DIRECTION 1
223#define FROM_LOOP ucs4_internal_loop
224#define TO_LOOP ucs4_internal_loop /* This is not used. */
225#define FUNCTION_NAME __gconv_transform_ucs4_internal
226
227
228static inline int
dd9423a6 229__attribute ((always_inline))
55985355
UD
230ucs4_internal_loop (struct __gconv_step *step,
231 struct __gconv_step_data *step_data,
232 const unsigned char **inptrp, const unsigned char *inend,
4a069c33 233 unsigned char **outptrp, unsigned char *outend,
38677ace 234 size_t *irreversible)
4a069c33 235{
55985355 236 int flags = step_data->__flags;
4a069c33
UD
237 const unsigned char *inptr = *inptrp;
238 unsigned char *outptr = *outptrp;
239 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
240 int result;
241 size_t cnt;
242
243 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
244 {
245 uint32_t inval;
246
247#if __BYTE_ORDER == __LITTLE_ENDIAN
17427edd 248 inval = bswap_32 (*(const uint32_t *) inptr);
4a069c33 249#else
17427edd 250 inval = *(const uint32_t *) inptr;
4a069c33
UD
251#endif
252
db2d05f9 253 if (__builtin_expect (inval > 0x7fffffff, 0))
4a069c33 254 {
55985355
UD
255 /* The value is too large. We don't try transliteration here since
256 this is not an error because of the lack of possibilities to
257 represent the result. This is a genuine bug in the input since
258 UCS4 does not allow such values. */
0cdb4983
UD
259 if (irreversible == NULL)
260 /* We are transliterating, don't try to correct anything. */
261 return __GCONV_ILLEGAL_INPUT;
262
85830c4c
UD
263 if (flags & __GCONV_IGNORE_ERRORS)
264 {
265 /* Just ignore this character. */
38677ace 266 ++*irreversible;
85830c4c
UD
267 continue;
268 }
269
4a069c33
UD
270 *inptrp = inptr;
271 *outptrp = outptr;
272 return __GCONV_ILLEGAL_INPUT;
273 }
274
cdda3d7d
AJ
275 *((uint32_t *) outptr) = inval;
276 outptr += sizeof (uint32_t);
4a069c33
UD
277 }
278
279 *inptrp = inptr;
280 *outptrp = outptr;
281
282 /* Determine the status. */
fc08075d 283 if (*inptrp == inend)
4a069c33 284 result = __GCONV_EMPTY_INPUT;
c4f66413 285 else if (*outptrp + 4 > outend)
fc08075d 286 result = __GCONV_FULL_OUTPUT;
4a069c33
UD
287 else
288 result = __GCONV_INCOMPLETE_INPUT;
289
290 return result;
291}
292
293#ifndef _STRING_ARCH_unaligned
294static inline int
dd9423a6 295__attribute ((always_inline))
55985355
UD
296ucs4_internal_loop_unaligned (struct __gconv_step *step,
297 struct __gconv_step_data *step_data,
298 const unsigned char **inptrp,
4a069c33
UD
299 const unsigned char *inend,
300 unsigned char **outptrp, unsigned char *outend,
38677ace 301 size_t *irreversible)
4a069c33 302{
55985355 303 int flags = step_data->__flags;
4a069c33
UD
304 const unsigned char *inptr = *inptrp;
305 unsigned char *outptr = *outptrp;
306 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
307 int result;
308 size_t cnt;
309
55985355 310 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
4a069c33 311 {
db2d05f9 312 if (__builtin_expect (inptr[0] > 0x80, 0))
4a069c33 313 {
55985355
UD
314 /* The value is too large. We don't try transliteration here since
315 this is not an error because of the lack of possibilities to
316 represent the result. This is a genuine bug in the input since
317 UCS4 does not allow such values. */
0cdb4983
UD
318 if (irreversible == NULL)
319 /* We are transliterating, don't try to correct anything. */
320 return __GCONV_ILLEGAL_INPUT;
321
85830c4c
UD
322 if (flags & __GCONV_IGNORE_ERRORS)
323 {
324 /* Just ignore this character. */
38677ace 325 ++*irreversible;
85830c4c
UD
326 continue;
327 }
328
4a069c33
UD
329 *inptrp = inptr;
330 *outptrp = outptr;
9ea2c194 331 return __GCONV_ILLEGAL_INPUT;
4a069c33
UD
332 }
333
4a069c33
UD
334# if __BYTE_ORDER == __LITTLE_ENDIAN
335 outptr[3] = inptr[0];
336 outptr[2] = inptr[1];
337 outptr[1] = inptr[2];
338 outptr[0] = inptr[3];
339# else
340 outptr[0] = inptr[0];
341 outptr[1] = inptr[1];
342 outptr[2] = inptr[2];
343 outptr[3] = inptr[3];
344# endif
55985355 345 outptr += 4;
4a069c33
UD
346 }
347
348 *inptrp = inptr;
349 *outptrp = outptr;
350
351 /* Determine the status. */
fc08075d 352 if (*inptrp == inend)
4a069c33 353 result = __GCONV_EMPTY_INPUT;
c4f66413 354 else if (*outptrp + 4 > outend)
fc08075d 355 result = __GCONV_FULL_OUTPUT;
4a069c33
UD
356 else
357 result = __GCONV_INCOMPLETE_INPUT;
358
359 return result;
360}
361#endif
362
363
364static inline int
dd9423a6 365__attribute ((always_inline))
55985355
UD
366ucs4_internal_loop_single (struct __gconv_step *step,
367 struct __gconv_step_data *step_data,
368 const unsigned char **inptrp,
4a069c33
UD
369 const unsigned char *inend,
370 unsigned char **outptrp, unsigned char *outend,
38677ace 371 size_t *irreversible)
4a069c33 372{
55985355
UD
373 mbstate_t *state = step_data->__statep;
374 int flags = step_data->__flags;
4a069c33
UD
375 size_t cnt = state->__count & 7;
376
377 while (*inptrp < inend && cnt < 4)
378 state->__value.__wchb[cnt++] = *(*inptrp)++;
379
db2d05f9 380 if (__builtin_expect (cnt < 4, 0))
4a069c33
UD
381 {
382 /* Still not enough bytes. Store the ones in the input buffer. */
383 state->__count &= ~7;
384 state->__count |= cnt;
385
386 return __GCONV_INCOMPLETE_INPUT;
387 }
388
db2d05f9
UD
389 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
390 0))
85830c4c 391 {
55985355
UD
392 /* The value is too large. We don't try transliteration here since
393 this is not an error because of the lack of possibilities to
394 represent the result. This is a genuine bug in the input since
395 UCS4 does not allow such values. */
85830c4c 396 if (!(flags & __GCONV_IGNORE_ERRORS))
3593973b
UD
397 {
398 *inptrp -= cnt - (state->__count & 7);
399 return __GCONV_ILLEGAL_INPUT;
400 }
85830c4c
UD
401 }
402 else
403 {
4a069c33 404#if __BYTE_ORDER == __LITTLE_ENDIAN
85830c4c
UD
405 (*outptrp)[0] = state->__value.__wchb[3];
406 (*outptrp)[1] = state->__value.__wchb[2];
407 (*outptrp)[2] = state->__value.__wchb[1];
408 (*outptrp)[3] = state->__value.__wchb[0];
4a069c33 409#elif __BYTE_ORDER == __BIG_ENDIAN
85830c4c
UD
410 (*outptrp)[0] = state->__value.__wchb[0];
411 (*outptrp)[1] = state->__value.__wchb[1];
412 (*outptrp)[2] = state->__value.__wchb[2];
413 (*outptrp)[3] = state->__value.__wchb[3];
4a069c33
UD
414#endif
415
85830c4c
UD
416 *outptrp += 4;
417 }
418
4a069c33
UD
419 /* Clear the state buffer. */
420 state->__count &= ~7;
421
422 return __GCONV_OK;
423}
424
425#include <iconv/skeleton.c>
426
427
428/* Similarly for the little endian form. */
8d617a71
UD
429#define DEFINE_INIT 0
430#define DEFINE_FINI 0
431#define MIN_NEEDED_FROM 4
432#define MIN_NEEDED_TO 4
433#define FROM_DIRECTION 1
434#define FROM_LOOP internal_ucs4le_loop
435#define TO_LOOP internal_ucs4le_loop /* This is not used. */
436#define FUNCTION_NAME __gconv_transform_internal_ucs4le
437
438
439static inline int
dd9423a6 440__attribute ((always_inline))
55985355
UD
441internal_ucs4le_loop (struct __gconv_step *step,
442 struct __gconv_step_data *step_data,
443 const unsigned char **inptrp, const unsigned char *inend,
8d617a71 444 unsigned char **outptrp, unsigned char *outend,
38677ace 445 size_t *irreversible)
8d617a71
UD
446{
447 const unsigned char *inptr = *inptrp;
448 unsigned char *outptr = *outptrp;
449 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
450 int result;
451
452#if __BYTE_ORDER == __BIG_ENDIAN
453 /* Sigh, we have to do some real work. */
454 size_t cnt;
cd5b5023 455 uint32_t *outptr32 = (uint32_t *) outptr;
8d617a71
UD
456
457 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
cd5b5023
AJ
458 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
459 outptr = (unsigned char *) outptr32;
8d617a71
UD
460
461 *inptrp = inptr;
462 *outptrp = outptr;
463#elif __BYTE_ORDER == __LITTLE_ENDIAN
464 /* Simply copy the data. */
465 *inptrp = inptr + n_convert * 4;
466 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
467#else
468# error "This endianess is not supported."
469#endif
470
471 /* Determine the status. */
fc08075d 472 if (*inptrp == inend)
8d617a71 473 result = __GCONV_EMPTY_INPUT;
c4f66413 474 else if (*outptrp + 4 > outend)
fc08075d 475 result = __GCONV_FULL_OUTPUT;
8d617a71
UD
476 else
477 result = __GCONV_INCOMPLETE_INPUT;
478
8d617a71
UD
479 return result;
480}
481
c1db8b0d
UD
482#ifndef _STRING_ARCH_unaligned
483static inline int
dd9423a6 484__attribute ((always_inline))
55985355
UD
485internal_ucs4le_loop_unaligned (struct __gconv_step *step,
486 struct __gconv_step_data *step_data,
487 const unsigned char **inptrp,
c1db8b0d
UD
488 const unsigned char *inend,
489 unsigned char **outptrp, unsigned char *outend,
38677ace 490 size_t *irreversible)
c1db8b0d
UD
491{
492 const unsigned char *inptr = *inptrp;
493 unsigned char *outptr = *outptrp;
494 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
495 int result;
496
497# if __BYTE_ORDER == __BIG_ENDIAN
498 /* Sigh, we have to do some real work. */
499 size_t cnt;
500
3593973b 501 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
c1db8b0d
UD
502 {
503 outptr[0] = inptr[3];
504 outptr[1] = inptr[2];
505 outptr[2] = inptr[1];
506 outptr[3] = inptr[0];
507 }
508
509 *inptrp = inptr;
510 *outptrp = outptr;
511# elif __BYTE_ORDER == __LITTLE_ENDIAN
512 /* Simply copy the data. */
513 *inptrp = inptr + n_convert * 4;
514 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
515# else
516# error "This endianess is not supported."
517# endif
518
519 /* Determine the status. */
eb9dc2a2 520 if (*inptrp == inend)
c1db8b0d 521 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 522 else if (*inptrp + 4 > inend)
c1db8b0d 523 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
524 else
525 {
526 assert (*outptrp + 4 > outend);
527 result = __GCONV_FULL_OUTPUT;
528 }
c1db8b0d
UD
529
530 return result;
531}
532#endif
533
fd1b5c0f
UD
534
535static inline int
dd9423a6 536__attribute ((always_inline))
55985355
UD
537internal_ucs4le_loop_single (struct __gconv_step *step,
538 struct __gconv_step_data *step_data,
539 const unsigned char **inptrp,
fd1b5c0f
UD
540 const unsigned char *inend,
541 unsigned char **outptrp, unsigned char *outend,
38677ace 542 size_t *irreversible)
fd1b5c0f 543{
55985355 544 mbstate_t *state = step_data->__statep;
fd1b5c0f
UD
545 size_t cnt = state->__count & 7;
546
547 while (*inptrp < inend && cnt < 4)
548 state->__value.__wchb[cnt++] = *(*inptrp)++;
549
db2d05f9 550 if (__builtin_expect (cnt < 4, 0))
fd1b5c0f
UD
551 {
552 /* Still not enough bytes. Store the ones in the input buffer. */
553 state->__count &= ~7;
554 state->__count |= cnt;
555
556 return __GCONV_INCOMPLETE_INPUT;
557 }
558
559#if __BYTE_ORDER == __BIG_ENDIAN
560 (*outptrp)[0] = state->__value.__wchb[3];
561 (*outptrp)[1] = state->__value.__wchb[2];
562 (*outptrp)[2] = state->__value.__wchb[1];
563 (*outptrp)[3] = state->__value.__wchb[0];
85830c4c 564
fd1b5c0f
UD
565#else
566 /* XXX unaligned */
cdda3d7d
AJ
567 (*outptrp)[0] = state->__value.__wchb[0];
568 (*outptrp)[1] = state->__value.__wchb[1];
569 (*outptrp)[2] = state->__value.__wchb[2];
570 (*outptrp)[3] = state->__value.__wchb[3];
571
fd1b5c0f 572#endif
540e1b45 573
cdda3d7d 574 *outptrp += 4;
fd1b5c0f
UD
575
576 /* Clear the state buffer. */
577 state->__count &= ~7;
578
579 return __GCONV_OK;
580}
581
8d617a71
UD
582#include <iconv/skeleton.c>
583
584
4a069c33
UD
585/* And finally from UCS4-LE to the internal encoding. */
586#define DEFINE_INIT 0
587#define DEFINE_FINI 0
588#define MIN_NEEDED_FROM 4
589#define MIN_NEEDED_TO 4
590#define FROM_DIRECTION 1
591#define FROM_LOOP ucs4le_internal_loop
592#define TO_LOOP ucs4le_internal_loop /* This is not used. */
593#define FUNCTION_NAME __gconv_transform_ucs4le_internal
594
595
596static inline int
dd9423a6 597__attribute ((always_inline))
55985355
UD
598ucs4le_internal_loop (struct __gconv_step *step,
599 struct __gconv_step_data *step_data,
600 const unsigned char **inptrp, const unsigned char *inend,
4a069c33 601 unsigned char **outptrp, unsigned char *outend,
38677ace 602 size_t *irreversible)
4a069c33 603{
55985355 604 int flags = step_data->__flags;
4a069c33
UD
605 const unsigned char *inptr = *inptrp;
606 unsigned char *outptr = *outptrp;
607 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
608 int result;
609 size_t cnt;
610
611 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
612 {
613 uint32_t inval;
614
615#if __BYTE_ORDER == __BIG_ENDIAN
17427edd 616 inval = bswap_32 (*(const uint32_t *) inptr);
4a069c33 617#else
17427edd 618 inval = *(const uint32_t *) inptr;
4a069c33
UD
619#endif
620
db2d05f9 621 if (__builtin_expect (inval > 0x7fffffff, 0))
85830c4c 622 {
55985355
UD
623 /* The value is too large. We don't try transliteration here since
624 this is not an error because of the lack of possibilities to
625 represent the result. This is a genuine bug in the input since
626 UCS4 does not allow such values. */
0cdb4983
UD
627 if (irreversible == NULL)
628 /* We are transliterating, don't try to correct anything. */
629 return __GCONV_ILLEGAL_INPUT;
630
85830c4c
UD
631 if (flags & __GCONV_IGNORE_ERRORS)
632 {
633 /* Just ignore this character. */
38677ace 634 ++*irreversible;
85830c4c
UD
635 continue;
636 }
637
638 return __GCONV_ILLEGAL_INPUT;
639 }
4a069c33 640
cdda3d7d
AJ
641 *((uint32_t *) outptr) = inval;
642 outptr += sizeof (uint32_t);
4a069c33
UD
643 }
644
645 *inptrp = inptr;
646 *outptrp = outptr;
647
648 /* Determine the status. */
fc08075d 649 if (*inptrp == inend)
4a069c33 650 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 651 else if (*inptrp + 4 > inend)
4a069c33 652 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
653 else
654 {
655 assert (*outptrp + 4 > outend);
656 result = __GCONV_FULL_OUTPUT;
657 }
4a069c33
UD
658
659 return result;
660}
661
662#ifndef _STRING_ARCH_unaligned
663static inline int
dd9423a6 664__attribute ((always_inline))
55985355
UD
665ucs4le_internal_loop_unaligned (struct __gconv_step *step,
666 struct __gconv_step_data *step_data,
667 const unsigned char **inptrp,
4a069c33
UD
668 const unsigned char *inend,
669 unsigned char **outptrp, unsigned char *outend,
38677ace 670 size_t *irreversible)
4a069c33 671{
55985355 672 int flags = step_data->__flags;
4a069c33
UD
673 const unsigned char *inptr = *inptrp;
674 unsigned char *outptr = *outptrp;
675 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
676 int result;
677 size_t cnt;
678
679 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
680 {
db2d05f9 681 if (__builtin_expect (inptr[3] > 0x80, 0))
4a069c33 682 {
55985355
UD
683 /* The value is too large. We don't try transliteration here since
684 this is not an error because of the lack of possibilities to
685 represent the result. This is a genuine bug in the input since
686 UCS4 does not allow such values. */
0cdb4983
UD
687 if (irreversible == NULL)
688 /* We are transliterating, don't try to correct anything. */
689 return __GCONV_ILLEGAL_INPUT;
690
85830c4c
UD
691 if (flags & __GCONV_IGNORE_ERRORS)
692 {
693 /* Just ignore this character. */
38677ace 694 ++*irreversible;
85830c4c
UD
695 continue;
696 }
697
4a069c33
UD
698 *inptrp = inptr;
699 *outptrp = outptr;
9ea2c194 700 return __GCONV_ILLEGAL_INPUT;
4a069c33
UD
701 }
702
4a069c33
UD
703# if __BYTE_ORDER == __BIG_ENDIAN
704 outptr[3] = inptr[0];
705 outptr[2] = inptr[1];
706 outptr[1] = inptr[2];
707 outptr[0] = inptr[3];
708# else
709 outptr[0] = inptr[0];
710 outptr[1] = inptr[1];
711 outptr[2] = inptr[2];
712 outptr[3] = inptr[3];
713# endif
85830c4c
UD
714
715 outptr += 4;
4a069c33
UD
716 }
717
718 *inptrp = inptr;
719 *outptrp = outptr;
720
721 /* Determine the status. */
fc08075d 722 if (*inptrp == inend)
4a069c33 723 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 724 else if (*inptrp + 4 > inend)
4a069c33 725 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
726 else
727 {
728 assert (*outptrp + 4 > outend);
729 result = __GCONV_FULL_OUTPUT;
730 }
4a069c33
UD
731
732 return result;
733}
734#endif
735
736
737static inline int
dd9423a6 738__attribute ((always_inline))
55985355
UD
739ucs4le_internal_loop_single (struct __gconv_step *step,
740 struct __gconv_step_data *step_data,
741 const unsigned char **inptrp,
4a069c33
UD
742 const unsigned char *inend,
743 unsigned char **outptrp, unsigned char *outend,
38677ace 744 size_t *irreversible)
4a069c33 745{
55985355
UD
746 mbstate_t *state = step_data->__statep;
747 int flags = step_data->__flags;
4a069c33
UD
748 size_t cnt = state->__count & 7;
749
750 while (*inptrp < inend && cnt < 4)
751 state->__value.__wchb[cnt++] = *(*inptrp)++;
752
db2d05f9 753 if (__builtin_expect (cnt < 4, 0))
4a069c33
UD
754 {
755 /* Still not enough bytes. Store the ones in the input buffer. */
756 state->__count &= ~7;
757 state->__count |= cnt;
758
759 return __GCONV_INCOMPLETE_INPUT;
760 }
761
db2d05f9
UD
762 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
763 0))
85830c4c 764 {
55985355
UD
765 /* The value is too large. We don't try transliteration here since
766 this is not an error because of the lack of possibilities to
767 represent the result. This is a genuine bug in the input since
768 UCS4 does not allow such values. */
85830c4c
UD
769 if (!(flags & __GCONV_IGNORE_ERRORS))
770 return __GCONV_ILLEGAL_INPUT;
771 }
772 else
773 {
4a069c33 774#if __BYTE_ORDER == __BIG_ENDIAN
85830c4c
UD
775 (*outptrp)[0] = state->__value.__wchb[3];
776 (*outptrp)[1] = state->__value.__wchb[2];
777 (*outptrp)[2] = state->__value.__wchb[1];
778 (*outptrp)[3] = state->__value.__wchb[0];
9ea2c194 779#else
85830c4c
UD
780 (*outptrp)[0] = state->__value.__wchb[0];
781 (*outptrp)[1] = state->__value.__wchb[1];
782 (*outptrp)[2] = state->__value.__wchb[2];
783 (*outptrp)[3] = state->__value.__wchb[3];
4a069c33
UD
784#endif
785
85830c4c
UD
786 *outptrp += 4;
787 }
788
4a069c33
UD
789 /* Clear the state buffer. */
790 state->__count &= ~7;
791
792 return __GCONV_OK;
793}
794
795#include <iconv/skeleton.c>
796
797
8619129f
UD
798/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
799#define DEFINE_INIT 0
800#define DEFINE_FINI 0
801#define MIN_NEEDED_FROM 1
802#define MIN_NEEDED_TO 4
803#define FROM_DIRECTION 1
804#define FROM_LOOP ascii_internal_loop
805#define TO_LOOP ascii_internal_loop /* This is not used. */
806#define FUNCTION_NAME __gconv_transform_ascii_internal
fd1b5c0f 807#define ONE_DIRECTION 1
8619129f
UD
808
809#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
810#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
811#define LOOPFCT FROM_LOOP
812#define BODY \
813 { \
db2d05f9 814 if (__builtin_expect (*inptr > '\x7f', 0)) \
8619129f 815 { \
55985355
UD
816 /* The value is too large. We don't try transliteration here since \
817 this is not an error because of the lack of possibilities to \
818 represent the result. This is a genuine bug in the input since \
819 ASCII does not allow such values. */ \
e438a468 820 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
85830c4c
UD
821 } \
822 else \
823 /* It's an one byte sequence. */ \
cdda3d7d
AJ
824 *((uint32_t *) outptr) = *inptr++; \
825 outptr += sizeof (uint32_t); \
8619129f 826 }
55985355 827#define LOOP_NEED_FLAGS
8619129f
UD
828#include <iconv/loop.c>
829#include <iconv/skeleton.c>
830
831
832/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
833#define DEFINE_INIT 0
834#define DEFINE_FINI 0
835#define MIN_NEEDED_FROM 4
836#define MIN_NEEDED_TO 1
837#define FROM_DIRECTION 1
838#define FROM_LOOP internal_ascii_loop
839#define TO_LOOP internal_ascii_loop /* This is not used. */
840#define FUNCTION_NAME __gconv_transform_internal_ascii
fd1b5c0f 841#define ONE_DIRECTION 1
8619129f
UD
842
843#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
844#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
845#define LOOPFCT FROM_LOOP
846#define BODY \
847 { \
db2d05f9 848 if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
8619129f 849 { \
601d2942 850 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
e438a468 851 STANDARD_TO_LOOP_ERR_HANDLER (4); \
85830c4c
UD
852 } \
853 else \
854 /* It's an one byte sequence. */ \
cdda3d7d
AJ
855 *outptr++ = *((const uint32_t *) inptr); \
856 inptr += sizeof (uint32_t); \
8619129f 857 }
55985355 858#define LOOP_NEED_FLAGS
8619129f
UD
859#include <iconv/loop.c>
860#include <iconv/skeleton.c>
861
862
863/* Convert from the internal (UCS4-like) format to UTF-8. */
864#define DEFINE_INIT 0
865#define DEFINE_FINI 0
866#define MIN_NEEDED_FROM 4
867#define MIN_NEEDED_TO 1
868#define MAX_NEEDED_TO 6
869#define FROM_DIRECTION 1
870#define FROM_LOOP internal_utf8_loop
871#define TO_LOOP internal_utf8_loop /* This is not used. */
872#define FUNCTION_NAME __gconv_transform_internal_utf8
fd1b5c0f 873#define ONE_DIRECTION 1
8619129f
UD
874
875#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
876#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
5aa8ff62 877#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
8619129f
UD
878#define LOOPFCT FROM_LOOP
879#define BODY \
880 { \
17427edd 881 uint32_t wc = *((const uint32_t *) inptr); \
8619129f 882 \
8619129f
UD
883 if (wc < 0x80) \
884 /* It's an one byte sequence. */ \
885 *outptr++ = (unsigned char) wc; \
db2d05f9 886 else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
8619129f
UD
887 { \
888 size_t step; \
889 char *start; \
890 \
891 for (step = 2; step < 6; ++step) \
b79f74cd 892 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
8619129f
UD
893 break; \
894 \
365afefc 895 if (__builtin_expect (outptr + step > outend, 0)) \
8619129f
UD
896 { \
897 /* Too long. */ \
d64b6ad0 898 result = __GCONV_FULL_OUTPUT; \
8619129f
UD
899 break; \
900 } \
901 \
902 start = outptr; \
b79f74cd 903 *outptr = (unsigned char) (~0xff >> step); \
8619129f 904 outptr += step; \
8619129f
UD
905 do \
906 { \
347bace2 907 start[--step] = 0x80 | (wc & 0x3f); \
8619129f
UD
908 wc >>= 6; \
909 } \
347bace2 910 while (step > 1); \
8619129f 911 start[0] |= wc; \
db2d05f9
UD
912 } \
913 else \
914 { \
e438a468 915 STANDARD_TO_LOOP_ERR_HANDLER (4); \
8619129f
UD
916 } \
917 \
918 inptr += 4; \
919 }
db2d05f9 920#define LOOP_NEED_FLAGS
8619129f
UD
921#include <iconv/loop.c>
922#include <iconv/skeleton.c>
923
924
925/* Convert from UTF-8 to the internal (UCS4-like) format. */
926#define DEFINE_INIT 0
927#define DEFINE_FINI 0
928#define MIN_NEEDED_FROM 1
929#define MAX_NEEDED_FROM 6
930#define MIN_NEEDED_TO 4
931#define FROM_DIRECTION 1
932#define FROM_LOOP utf8_internal_loop
933#define TO_LOOP utf8_internal_loop /* This is not used. */
934#define FUNCTION_NAME __gconv_transform_utf8_internal
fd1b5c0f 935#define ONE_DIRECTION 1
8619129f
UD
936
937#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
5aa8ff62 938#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
8619129f
UD
939#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
940#define LOOPFCT FROM_LOOP
941#define BODY \
942 { \
943 uint32_t ch; \
944 uint_fast32_t cnt; \
945 uint_fast32_t i; \
946 \
947 /* Next input byte. */ \
948 ch = *inptr; \
949 \
950 if (ch < 0x80) \
8619129f 951 { \
5aa8ff62
UD
952 /* One byte sequence. */ \
953 cnt = 1; \
954 ++inptr; \
8619129f
UD
955 } \
956 else \
957 { \
9ea2c194 958 if (ch >= 0xc2 && ch < 0xe0) \
5aa8ff62 959 { \
9ea2c194
AJ
960 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
961 otherwise the wide character could have been represented \
962 using a single byte. */ \
5aa8ff62
UD
963 cnt = 2; \
964 ch &= 0x1f; \
965 } \
db2d05f9 966 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
5aa8ff62
UD
967 { \
968 /* We expect three bytes. */ \
969 cnt = 3; \
970 ch &= 0x0f; \
971 } \
db2d05f9 972 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
5aa8ff62
UD
973 { \
974 /* We expect four bytes. */ \
975 cnt = 4; \
976 ch &= 0x07; \
977 } \
db2d05f9 978 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
5aa8ff62
UD
979 { \
980 /* We expect five bytes. */ \
981 cnt = 5; \
982 ch &= 0x03; \
983 } \
db2d05f9 984 else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
5aa8ff62
UD
985 { \
986 /* We expect six bytes. */ \
987 cnt = 6; \
988 ch &= 0x01; \
989 } \
990 else \
8619129f 991 { \
85830c4c
UD
992 /* Search the end of this ill-formed UTF-8 character. This \
993 is the next byte with (x & 0xc0) != 0x80. */ \
347bace2 994 i = 0; \
e438a468 995 do \
347bace2
UD
996 ++i; \
997 while (inptr + i < inend \
998 && (*(inptr + i) & 0xc0) == 0x80 \
999 && i < 5); \
85830c4c 1000 \
347bace2
UD
1001 errout: \
1002 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
8619129f
UD
1003 } \
1004 \
0cdb4983 1005 if (__builtin_expect (inptr + cnt > inend, 0)) \
5aa8ff62 1006 { \
fd1b5c0f
UD
1007 /* We don't have enough input. But before we report that check \
1008 that all the bytes are correct. */ \
1009 for (i = 1; inptr + i < inend; ++i) \
1010 if ((inptr[i] & 0xc0) != 0x80) \
1011 break; \
85830c4c 1012 \
365afefc 1013 if (__builtin_expect (inptr + i == inend, 1)) \
85830c4c
UD
1014 { \
1015 result = __GCONV_INCOMPLETE_INPUT; \
1016 break; \
1017 } \
1018 \
347bace2 1019 goto errout; \
5aa8ff62
UD
1020 } \
1021 \
1022 /* Read the possible remaining bytes. */ \
1023 for (i = 1; i < cnt; ++i) \
1024 { \
1025 uint32_t byte = inptr[i]; \
1026 \
1027 if ((byte & 0xc0) != 0x80) \
bd32e4a6
UD
1028 /* This is an illegal encoding. */ \
1029 break; \
5aa8ff62
UD
1030 \
1031 ch <<= 6; \
1032 ch |= byte & 0x3f; \
1033 } \
85830c4c 1034 \
bd32e4a6
UD
1035 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1036 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1037 have been represented with fewer than cnt bytes. */ \
85830c4c 1038 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
bd32e4a6
UD
1039 { \
1040 /* This is an illegal encoding. */ \
347bace2 1041 goto errout; \
bd32e4a6
UD
1042 } \
1043 \
5aa8ff62 1044 inptr += cnt; \
8619129f
UD
1045 } \
1046 \
1047 /* Now adjust the pointers and store the result. */ \
cdda3d7d
AJ
1048 *((uint32_t *) outptr) = ch; \
1049 outptr += sizeof (uint32_t); \
8619129f 1050 }
55985355 1051#define LOOP_NEED_FLAGS
fd1b5c0f
UD
1052
1053#define STORE_REST \
1054 { \
1055 /* We store the remaining bytes while converting them into the UCS4 \
1056 format. We can assume that the first byte in the buffer is \
1057 correct and that it requires a larger number of bytes than there \
1058 are in the input buffer. */ \
1059 wint_t ch = **inptrp; \
ea31b613 1060 size_t cnt, r; \
fd1b5c0f
UD
1061 \
1062 state->__count = inend - *inptrp; \
1063 \
1064 if (ch >= 0xc2 && ch < 0xe0) \
1065 { \
1066 /* We expect two bytes. The first byte cannot be 0xc0 or \
1067 0xc1, otherwise the wide character could have been \
1068 represented using a single byte. */ \
1069 cnt = 2; \
1070 ch &= 0x1f; \
1071 } \
db2d05f9 1072 else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
fd1b5c0f
UD
1073 { \
1074 /* We expect three bytes. */ \
1075 cnt = 3; \
1076 ch &= 0x0f; \
1077 } \
db2d05f9 1078 else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
fd1b5c0f
UD
1079 { \
1080 /* We expect four bytes. */ \
1081 cnt = 4; \
1082 ch &= 0x07; \
1083 } \
db2d05f9 1084 else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
fd1b5c0f
UD
1085 { \
1086 /* We expect five bytes. */ \
1087 cnt = 5; \
1088 ch &= 0x03; \
1089 } \
1090 else \
1091 { \
1092 /* We expect six bytes. */ \
1093 cnt = 6; \
1094 ch &= 0x01; \
1095 } \
1096 \
1097 /* The first byte is already consumed. */ \
ea31b613 1098 r = cnt - 1; \
fd1b5c0f
UD
1099 while (++(*inptrp) < inend) \
1100 { \
1101 ch <<= 6; \
1102 ch |= **inptrp & 0x3f; \
ea31b613 1103 --r; \
fd1b5c0f
UD
1104 } \
1105 \
1106 /* Shift for the so far missing bytes. */ \
ea31b613
UD
1107 ch <<= r * 6; \
1108 \
1109 /* Store the number of bytes expected for the entire sequence. */ \
41f112ad 1110 state->__count |= cnt << 8; \
fd1b5c0f
UD
1111 \
1112 /* Store the value. */ \
1113 state->__value.__wch = ch; \
1114 }
1115
1116#define UNPACK_BYTES \
1117 { \
ea31b613 1118 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
fd1b5c0f 1119 wint_t wch = state->__value.__wch; \
41f112ad 1120 size_t ntotal = state->__count >> 8; \
ea31b613 1121 \
41f112ad 1122 inlen = state->__count & 255; \
fd1b5c0f 1123 \
ea31b613 1124 bytebuf[0] = inmask[ntotal - 2]; \
fd1b5c0f 1125 \
cd201e38
UD
1126 do \
1127 { \
1128 if (--ntotal < inlen) \
1129 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1130 wch >>= 6; \
1131 } \
1132 while (ntotal > 1); \
fd1b5c0f
UD
1133 \
1134 bytebuf[0] |= wch; \
1135 }
1136
41f112ad
UD
1137#define CLEAR_STATE \
1138 state->__count = 0
1139
1140
8619129f
UD
1141#include <iconv/loop.c>
1142#include <iconv/skeleton.c>
1143
1144
1145/* Convert from UCS2 to the internal (UCS4-like) format. */
1146#define DEFINE_INIT 0
1147#define DEFINE_FINI 0
1148#define MIN_NEEDED_FROM 2
1149#define MIN_NEEDED_TO 4
1150#define FROM_DIRECTION 1
1151#define FROM_LOOP ucs2_internal_loop
1152#define TO_LOOP ucs2_internal_loop /* This is not used. */
1153#define FUNCTION_NAME __gconv_transform_ucs2_internal
fd1b5c0f 1154#define ONE_DIRECTION 1
8619129f
UD
1155
1156#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1157#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1158#define LOOPFCT FROM_LOOP
428bcea4 1159#define BODY \
755104ed 1160 { \
17427edd 1161 uint16_t u1 = *((const uint16_t *) inptr); \
755104ed
UD
1162 \
1163 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1164 { \
1165 /* Surrogate characters in UCS-2 input are not valid. Reject \
1166 them. (Catching this here is not security relevant.) */ \
e438a468 1167 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
755104ed
UD
1168 } \
1169 \
cdda3d7d
AJ
1170 *((uint32_t *) outptr) = u1; \
1171 outptr += sizeof (uint32_t); \
755104ed
UD
1172 inptr += 2; \
1173 }
1174#define LOOP_NEED_FLAGS
8619129f
UD
1175#include <iconv/loop.c>
1176#include <iconv/skeleton.c>
1177
1178
1179/* Convert from the internal (UCS4-like) format to UCS2. */
1180#define DEFINE_INIT 0
1181#define DEFINE_FINI 0
1182#define MIN_NEEDED_FROM 4
1183#define MIN_NEEDED_TO 2
1184#define FROM_DIRECTION 1
1185#define FROM_LOOP internal_ucs2_loop
1186#define TO_LOOP internal_ucs2_loop /* This is not used. */
1187#define FUNCTION_NAME __gconv_transform_internal_ucs2
fd1b5c0f 1188#define ONE_DIRECTION 1
8619129f
UD
1189
1190#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1191#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1192#define LOOPFCT FROM_LOOP
428bcea4 1193#define BODY \
8619129f 1194 { \
17427edd 1195 uint32_t val = *((const uint32_t *) inptr); \
755104ed 1196 \
db2d05f9 1197 if (__builtin_expect (val >= 0x10000, 0)) \
8619129f 1198 { \
601d2942 1199 UNICODE_TAG_HANDLER (val, 4); \
e438a468 1200 STANDARD_TO_LOOP_ERR_HANDLER (4); \
8619129f 1201 } \
755104ed
UD
1202 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1203 { \
1204 /* Surrogate characters in UCS-4 input are not valid. \
1205 We must catch this, because the UCS-2 output might be \
1206 interpreted as UTF-16 by other programs. If we let \
1207 surrogates pass through, attackers could make a security \
1208 hole exploit by synthesizing any desired plane 1-16 \
1209 character. */ \
e438a468 1210 result = __GCONV_ILLEGAL_INPUT; \
755104ed 1211 if (! ignore_errors_p ()) \
e438a468 1212 break; \
755104ed
UD
1213 inptr += 4; \
1214 ++*irreversible; \
1215 continue; \
1216 } \
9ea2c194 1217 else \
755104ed 1218 { \
cdda3d7d
AJ
1219 *((uint16_t *) outptr) = val; \
1220 outptr += sizeof (uint16_t); \
755104ed
UD
1221 inptr += 4; \
1222 } \
8619129f 1223 }
55985355 1224#define LOOP_NEED_FLAGS
8619129f
UD
1225#include <iconv/loop.c>
1226#include <iconv/skeleton.c>
9b26f5c4
UD
1227
1228
428bcea4 1229/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
9b26f5c4
UD
1230#define DEFINE_INIT 0
1231#define DEFINE_FINI 0
1232#define MIN_NEEDED_FROM 2
1233#define MIN_NEEDED_TO 4
1234#define FROM_DIRECTION 1
428bcea4
UD
1235#define FROM_LOOP ucs2reverse_internal_loop
1236#define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
8d617a71 1237#define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
fd1b5c0f 1238#define ONE_DIRECTION 1
9b26f5c4
UD
1239
1240#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1241#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1242#define LOOPFCT FROM_LOOP
428bcea4 1243#define BODY \
755104ed 1244 { \
17427edd 1245 uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \
755104ed
UD
1246 \
1247 if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
1248 { \
1249 /* Surrogate characters in UCS-2 input are not valid. Reject \
1250 them. (Catching this here is not security relevant.) */ \
1251 if (! ignore_errors_p ()) \
1252 { \
1253 result = __GCONV_ILLEGAL_INPUT; \
1254 break; \
1255 } \
1256 inptr += 2; \
1257 ++*irreversible; \
1258 continue; \
1259 } \
1260 \
cdda3d7d
AJ
1261 *((uint32_t *) outptr) = u1; \
1262 outptr += sizeof (uint32_t); \
755104ed
UD
1263 inptr += 2; \
1264 }
1265#define LOOP_NEED_FLAGS
9b26f5c4
UD
1266#include <iconv/loop.c>
1267#include <iconv/skeleton.c>
1268
1269
428bcea4 1270/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
9b26f5c4
UD
1271#define DEFINE_INIT 0
1272#define DEFINE_FINI 0
1273#define MIN_NEEDED_FROM 4
1274#define MIN_NEEDED_TO 2
1275#define FROM_DIRECTION 1
428bcea4
UD
1276#define FROM_LOOP internal_ucs2reverse_loop
1277#define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
8d617a71 1278#define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
fd1b5c0f 1279#define ONE_DIRECTION 1
9b26f5c4
UD
1280
1281#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1282#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1283#define LOOPFCT FROM_LOOP
428bcea4 1284#define BODY \
9b26f5c4 1285 { \
17427edd 1286 uint32_t val = *((const uint32_t *) inptr); \
db2d05f9 1287 if (__builtin_expect (val >= 0x10000, 0)) \
9b26f5c4 1288 { \
601d2942 1289 UNICODE_TAG_HANDLER (val, 4); \
e438a468 1290 STANDARD_TO_LOOP_ERR_HANDLER (4); \
9b26f5c4 1291 } \
755104ed
UD
1292 else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
1293 { \
1294 /* Surrogate characters in UCS-4 input are not valid. \
1295 We must catch this, because the UCS-2 output might be \
1296 interpreted as UTF-16 by other programs. If we let \
1297 surrogates pass through, attackers could make a security \
1298 hole exploit by synthesizing any desired plane 1-16 \
1299 character. */ \
1300 if (! ignore_errors_p ()) \
1301 { \
1302 result = __GCONV_ILLEGAL_INPUT; \
1303 break; \
1304 } \
1305 inptr += 4; \
1306 ++*irreversible; \
1307 continue; \
1308 } \
9ea2c194 1309 else \
755104ed 1310 { \
cdda3d7d
AJ
1311 *((uint16_t *) outptr) = bswap_16 (val); \
1312 outptr += sizeof (uint16_t); \
755104ed
UD
1313 inptr += 4; \
1314 } \
9b26f5c4 1315 }
55985355 1316#define LOOP_NEED_FLAGS
9b26f5c4
UD
1317#include <iconv/loop.c>
1318#include <iconv/skeleton.c>