]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconv/gconv_simple.c
iconv: Remove alloca use in gconv-modules configuration parsing
[thirdparty/glibc.git] / iconv / gconv_simple.c
CommitLineData
6973fc01 1/* Simple transformations functions.
2b778ceb 2 Copyright (C) 1997-2021 Free Software Foundation, Inc.
6973fc01
UD
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
6973fc01
UD
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 14 Lesser General Public License for more details.
6973fc01 15
41bdb6e2 16 You should have received a copy of the GNU Lesser General Public
59ba27a6 17 License along with the GNU C Library; if not, see
5a82c748 18 <https://www.gnu.org/licenses/>. */
6973fc01 19
f1fa8b68 20#include <byteswap.h>
55985355 21#include <dlfcn.h>
f1fa8b68 22#include <endian.h>
f4017d20 23#include <errno.h>
6973fc01 24#include <gconv.h>
d2374599 25#include <stdint.h>
6973fc01
UD
26#include <stdlib.h>
27#include <string.h>
28#include <wchar.h>
29#include <sys/param.h>
f9ad060c 30#include <gconv_int.h>
6973fc01 31
17427edd 32#define BUILTIN_ALIAS(s1, s2) /* nothing */
f9ad060c
UD
33#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
34 MinF, MaxF, MinT, MaxT) \
17427edd 35 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
a784e502 36 const unsigned char **, const unsigned char *, \
17427edd
UD
37 unsigned char **, size_t *, int, int);
38#include "gconv_builtin.h"
39
40
a904b5d9
UD
41#ifndef EILSEQ
42# define EILSEQ EINVAL
43#endif
44
45
f9ad060c
UD
46/* Specialized conversion function for a single byte to INTERNAL, recognizing
47 only ASCII characters. */
48wint_t
49__gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
50{
51 if (c < 0x80)
52 return c;
53 else
54 return WEOF;
55}
56
57
f1fa8b68
UD
58/* Transform from the internal, UCS4-like format, to UCS4. The
59 difference between the internal ucs4 format and the real UCS4
60 format is, if any, the endianess. The Unicode/ISO 10646 says that
61 unless some higher protocol specifies it differently, the byte
62 order is big endian.*/
8619129f
UD
63#define DEFINE_INIT 0
64#define DEFINE_FINI 0
65#define MIN_NEEDED_FROM 4
66#define MIN_NEEDED_TO 4
67#define FROM_DIRECTION 1
68#define FROM_LOOP internal_ucs4_loop
69#define TO_LOOP internal_ucs4_loop /* This is not used. */
70#define FUNCTION_NAME __gconv_transform_internal_ucs4
0cdddc25 71#define ONE_DIRECTION 0
8619129f
UD
72
73
74static inline int
dd9423a6 75__attribute ((always_inline))
55985355
UD
76internal_ucs4_loop (struct __gconv_step *step,
77 struct __gconv_step_data *step_data,
78 const unsigned char **inptrp, const unsigned char *inend,
4802be92 79 unsigned char **outptrp, const unsigned char *outend,
38677ace 80 size_t *irreversible)
4bca4c17 81{
8619129f
UD
82 const unsigned char *inptr = *inptrp;
83 unsigned char *outptr = *outptrp;
84 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
4bca4c17
UD
85 int result;
86
f1fa8b68 87#if __BYTE_ORDER == __LITTLE_ENDIAN
8619129f
UD
88 /* Sigh, we have to do some real work. */
89 size_t cnt;
cdda3d7d 90 uint32_t *outptr32 = (uint32_t *) outptr;
f1fa8b68 91
fdf19bf7 92 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
cdda3d7d 93 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
f1fa8b68 94
8619129f 95 *inptrp = inptr;
cd5b5023 96 *outptrp = (unsigned char *) outptr32;
f1fa8b68 97#elif __BYTE_ORDER == __BIG_ENDIAN
8619129f
UD
98 /* Simply copy the data. */
99 *inptrp = inptr + n_convert * 4;
100 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
f1fa8b68
UD
101#else
102# error "This endianess is not supported."
103#endif
104
8619129f 105 /* Determine the status. */
1336419e 106 if (*inptrp == inend)
d64b6ad0 107 result = __GCONV_EMPTY_INPUT;
c4f66413 108 else if (*outptrp + 4 > outend)
1336419e 109 result = __GCONV_FULL_OUTPUT;
6973fc01 110 else
d64b6ad0 111 result = __GCONV_INCOMPLETE_INPUT;
6973fc01 112
f43ce637 113 return result;
6973fc01 114}
d2374599 115
27822ce6 116#if !_STRING_ARCH_unaligned
c1db8b0d 117static inline int
dd9423a6 118__attribute ((always_inline))
55985355
UD
119internal_ucs4_loop_unaligned (struct __gconv_step *step,
120 struct __gconv_step_data *step_data,
121 const unsigned char **inptrp,
c1db8b0d 122 const unsigned char *inend,
4802be92
AS
123 unsigned char **outptrp,
124 const unsigned char *outend,
38677ace 125 size_t *irreversible)
c1db8b0d
UD
126{
127 const unsigned char *inptr = *inptrp;
128 unsigned char *outptr = *outptrp;
129 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
130 int result;
131
132# if __BYTE_ORDER == __LITTLE_ENDIAN
133 /* Sigh, we have to do some real work. */
134 size_t cnt;
135
136 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
137 {
138 outptr[0] = inptr[3];
139 outptr[1] = inptr[2];
140 outptr[2] = inptr[1];
141 outptr[3] = inptr[0];
142 }
143
144 *inptrp = inptr;
145 *outptrp = outptr;
146# elif __BYTE_ORDER == __BIG_ENDIAN
147 /* Simply copy the data. */
148 *inptrp = inptr + n_convert * 4;
149 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
150# else
151# error "This endianess is not supported."
152# endif
153
154 /* Determine the status. */
eacde9d0 155 if (*inptrp == inend)
c1db8b0d 156 result = __GCONV_EMPTY_INPUT;
eacde9d0
UD
157 else if (*outptrp + 4 > outend)
158 result = __GCONV_FULL_OUTPUT;
c1db8b0d
UD
159 else
160 result = __GCONV_INCOMPLETE_INPUT;
161
162 return result;
163}
164#endif
165
fd1b5c0f
UD
166
167static inline int
dd9423a6 168__attribute ((always_inline))
55985355
UD
169internal_ucs4_loop_single (struct __gconv_step *step,
170 struct __gconv_step_data *step_data,
171 const unsigned char **inptrp,
fd1b5c0f 172 const unsigned char *inend,
4802be92
AS
173 unsigned char **outptrp,
174 const unsigned char *outend,
38677ace 175 size_t *irreversible)
fd1b5c0f 176{
55985355 177 mbstate_t *state = step_data->__statep;
fd1b5c0f
UD
178 size_t cnt = state->__count & 7;
179
180 while (*inptrp < inend && cnt < 4)
181 state->__value.__wchb[cnt++] = *(*inptrp)++;
182
a1ffb40e 183 if (__glibc_unlikely (cnt < 4))
fd1b5c0f
UD
184 {
185 /* Still not enough bytes. Store the ones in the input buffer. */
186 state->__count &= ~7;
187 state->__count |= cnt;
188
189 return __GCONV_INCOMPLETE_INPUT;
190 }
191
192#if __BYTE_ORDER == __LITTLE_ENDIAN
193 (*outptrp)[0] = state->__value.__wchb[3];
194 (*outptrp)[1] = state->__value.__wchb[2];
195 (*outptrp)[2] = state->__value.__wchb[1];
196 (*outptrp)[3] = state->__value.__wchb[0];
85830c4c 197
fd1b5c0f
UD
198#elif __BYTE_ORDER == __BIG_ENDIAN
199 /* XXX unaligned */
cdda3d7d
AJ
200 (*outptrp)[0] = state->__value.__wchb[0];
201 (*outptrp)[1] = state->__value.__wchb[1];
202 (*outptrp)[2] = state->__value.__wchb[2];
203 (*outptrp)[3] = state->__value.__wchb[3];
fd1b5c0f
UD
204#else
205# error "This endianess is not supported."
206#endif
cdda3d7d 207 *outptrp += 4;
fd1b5c0f
UD
208
209 /* Clear the state buffer. */
210 state->__count &= ~7;
211
212 return __GCONV_OK;
213}
214
8619129f 215#include <iconv/skeleton.c>
d2374599 216
d2374599 217
4a069c33
UD
218/* Transform from UCS4 to the internal, UCS4-like format. Unlike
219 for the other direction we have to check for correct values here. */
220#define DEFINE_INIT 0
221#define DEFINE_FINI 0
222#define MIN_NEEDED_FROM 4
223#define MIN_NEEDED_TO 4
224#define FROM_DIRECTION 1
225#define FROM_LOOP ucs4_internal_loop
226#define TO_LOOP ucs4_internal_loop /* This is not used. */
227#define FUNCTION_NAME __gconv_transform_ucs4_internal
0cdddc25 228#define ONE_DIRECTION 0
4a069c33
UD
229
230
231static inline int
dd9423a6 232__attribute ((always_inline))
55985355
UD
233ucs4_internal_loop (struct __gconv_step *step,
234 struct __gconv_step_data *step_data,
235 const unsigned char **inptrp, const unsigned char *inend,
4802be92 236 unsigned char **outptrp, const unsigned char *outend,
38677ace 237 size_t *irreversible)
4a069c33 238{
55985355 239 int flags = step_data->__flags;
4a069c33
UD
240 const unsigned char *inptr = *inptrp;
241 unsigned char *outptr = *outptrp;
4a069c33 242 int result;
4a069c33 243
228edd35 244 for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4)
4a069c33
UD
245 {
246 uint32_t inval;
247
248#if __BYTE_ORDER == __LITTLE_ENDIAN
17427edd 249 inval = bswap_32 (*(const uint32_t *) inptr);
4a069c33 250#else
17427edd 251 inval = *(const uint32_t *) inptr;
4a069c33
UD
252#endif
253
a1ffb40e 254 if (__glibc_unlikely (inval > 0x7fffffff))
4a069c33 255 {
55985355
UD
256 /* The value is too large. We don't try transliteration here since
257 this is not an error because of the lack of possibilities to
258 represent the result. This is a genuine bug in the input since
259 UCS4 does not allow such values. */
0cdb4983
UD
260 if (irreversible == NULL)
261 /* We are transliterating, don't try to correct anything. */
262 return __GCONV_ILLEGAL_INPUT;
263
85830c4c
UD
264 if (flags & __GCONV_IGNORE_ERRORS)
265 {
266 /* Just ignore this character. */
38677ace 267 ++*irreversible;
85830c4c
UD
268 continue;
269 }
270
4a069c33
UD
271 *inptrp = inptr;
272 *outptrp = outptr;
273 return __GCONV_ILLEGAL_INPUT;
274 }
275
cdda3d7d
AJ
276 *((uint32_t *) outptr) = inval;
277 outptr += sizeof (uint32_t);
4a069c33
UD
278 }
279
280 *inptrp = inptr;
281 *outptrp = outptr;
282
283 /* Determine the status. */
fc08075d 284 if (*inptrp == inend)
4a069c33 285 result = __GCONV_EMPTY_INPUT;
c4f66413 286 else if (*outptrp + 4 > outend)
fc08075d 287 result = __GCONV_FULL_OUTPUT;
4a069c33
UD
288 else
289 result = __GCONV_INCOMPLETE_INPUT;
290
291 return result;
292}
293
27822ce6 294#if !_STRING_ARCH_unaligned
4a069c33 295static inline int
dd9423a6 296__attribute ((always_inline))
55985355
UD
297ucs4_internal_loop_unaligned (struct __gconv_step *step,
298 struct __gconv_step_data *step_data,
299 const unsigned char **inptrp,
4a069c33 300 const unsigned char *inend,
4802be92
AS
301 unsigned char **outptrp,
302 const unsigned char *outend,
38677ace 303 size_t *irreversible)
4a069c33 304{
55985355 305 int flags = step_data->__flags;
4a069c33
UD
306 const unsigned char *inptr = *inptrp;
307 unsigned char *outptr = *outptrp;
4a069c33 308 int result;
4a069c33 309
228edd35 310 for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4)
4a069c33 311 {
a1ffb40e 312 if (__glibc_unlikely (inptr[0] > 0x80))
4a069c33 313 {
55985355
UD
314 /* The value is too large. We don't try transliteration here since
315 this is not an error because of the lack of possibilities to
316 represent the result. This is a genuine bug in the input since
317 UCS4 does not allow such values. */
0cdb4983
UD
318 if (irreversible == NULL)
319 /* We are transliterating, don't try to correct anything. */
320 return __GCONV_ILLEGAL_INPUT;
321
85830c4c
UD
322 if (flags & __GCONV_IGNORE_ERRORS)
323 {
324 /* Just ignore this character. */
38677ace 325 ++*irreversible;
85830c4c
UD
326 continue;
327 }
328
4a069c33
UD
329 *inptrp = inptr;
330 *outptrp = outptr;
9ea2c194 331 return __GCONV_ILLEGAL_INPUT;
4a069c33
UD
332 }
333
4a069c33
UD
334# if __BYTE_ORDER == __LITTLE_ENDIAN
335 outptr[3] = inptr[0];
336 outptr[2] = inptr[1];
337 outptr[1] = inptr[2];
338 outptr[0] = inptr[3];
339# else
340 outptr[0] = inptr[0];
341 outptr[1] = inptr[1];
342 outptr[2] = inptr[2];
343 outptr[3] = inptr[3];
344# endif
55985355 345 outptr += 4;
4a069c33
UD
346 }
347
348 *inptrp = inptr;
349 *outptrp = outptr;
350
351 /* Determine the status. */
fc08075d 352 if (*inptrp == inend)
4a069c33 353 result = __GCONV_EMPTY_INPUT;
c4f66413 354 else if (*outptrp + 4 > outend)
fc08075d 355 result = __GCONV_FULL_OUTPUT;
4a069c33
UD
356 else
357 result = __GCONV_INCOMPLETE_INPUT;
358
359 return result;
360}
361#endif
362
363
364static inline int
dd9423a6 365__attribute ((always_inline))
55985355
UD
366ucs4_internal_loop_single (struct __gconv_step *step,
367 struct __gconv_step_data *step_data,
368 const unsigned char **inptrp,
4a069c33 369 const unsigned char *inend,
4802be92
AS
370 unsigned char **outptrp,
371 const unsigned char *outend,
38677ace 372 size_t *irreversible)
4a069c33 373{
55985355
UD
374 mbstate_t *state = step_data->__statep;
375 int flags = step_data->__flags;
4a069c33
UD
376 size_t cnt = state->__count & 7;
377
378 while (*inptrp < inend && cnt < 4)
379 state->__value.__wchb[cnt++] = *(*inptrp)++;
380
a1ffb40e 381 if (__glibc_unlikely (cnt < 4))
4a069c33
UD
382 {
383 /* Still not enough bytes. Store the ones in the input buffer. */
384 state->__count &= ~7;
385 state->__count |= cnt;
386
387 return __GCONV_INCOMPLETE_INPUT;
388 }
389
db2d05f9
UD
390 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
391 0))
85830c4c 392 {
55985355
UD
393 /* The value is too large. We don't try transliteration here since
394 this is not an error because of the lack of possibilities to
395 represent the result. This is a genuine bug in the input since
396 UCS4 does not allow such values. */
85830c4c 397 if (!(flags & __GCONV_IGNORE_ERRORS))
3593973b
UD
398 {
399 *inptrp -= cnt - (state->__count & 7);
400 return __GCONV_ILLEGAL_INPUT;
401 }
85830c4c
UD
402 }
403 else
404 {
4a069c33 405#if __BYTE_ORDER == __LITTLE_ENDIAN
85830c4c
UD
406 (*outptrp)[0] = state->__value.__wchb[3];
407 (*outptrp)[1] = state->__value.__wchb[2];
408 (*outptrp)[2] = state->__value.__wchb[1];
409 (*outptrp)[3] = state->__value.__wchb[0];
4a069c33 410#elif __BYTE_ORDER == __BIG_ENDIAN
85830c4c
UD
411 (*outptrp)[0] = state->__value.__wchb[0];
412 (*outptrp)[1] = state->__value.__wchb[1];
413 (*outptrp)[2] = state->__value.__wchb[2];
414 (*outptrp)[3] = state->__value.__wchb[3];
4a069c33
UD
415#endif
416
85830c4c
UD
417 *outptrp += 4;
418 }
419
4a069c33
UD
420 /* Clear the state buffer. */
421 state->__count &= ~7;
422
423 return __GCONV_OK;
424}
425
426#include <iconv/skeleton.c>
427
428
429/* Similarly for the little endian form. */
8d617a71
UD
430#define DEFINE_INIT 0
431#define DEFINE_FINI 0
432#define MIN_NEEDED_FROM 4
433#define MIN_NEEDED_TO 4
434#define FROM_DIRECTION 1
435#define FROM_LOOP internal_ucs4le_loop
436#define TO_LOOP internal_ucs4le_loop /* This is not used. */
437#define FUNCTION_NAME __gconv_transform_internal_ucs4le
0cdddc25 438#define ONE_DIRECTION 0
8d617a71
UD
439
440
441static inline int
dd9423a6 442__attribute ((always_inline))
55985355
UD
443internal_ucs4le_loop (struct __gconv_step *step,
444 struct __gconv_step_data *step_data,
445 const unsigned char **inptrp, const unsigned char *inend,
4802be92 446 unsigned char **outptrp, const unsigned char *outend,
38677ace 447 size_t *irreversible)
8d617a71
UD
448{
449 const unsigned char *inptr = *inptrp;
450 unsigned char *outptr = *outptrp;
451 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
452 int result;
453
454#if __BYTE_ORDER == __BIG_ENDIAN
455 /* Sigh, we have to do some real work. */
456 size_t cnt;
cd5b5023 457 uint32_t *outptr32 = (uint32_t *) outptr;
8d617a71
UD
458
459 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
cd5b5023
AJ
460 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
461 outptr = (unsigned char *) outptr32;
8d617a71
UD
462
463 *inptrp = inptr;
464 *outptrp = outptr;
465#elif __BYTE_ORDER == __LITTLE_ENDIAN
466 /* Simply copy the data. */
467 *inptrp = inptr + n_convert * 4;
468 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
469#else
470# error "This endianess is not supported."
471#endif
472
473 /* Determine the status. */
fc08075d 474 if (*inptrp == inend)
8d617a71 475 result = __GCONV_EMPTY_INPUT;
c4f66413 476 else if (*outptrp + 4 > outend)
fc08075d 477 result = __GCONV_FULL_OUTPUT;
8d617a71
UD
478 else
479 result = __GCONV_INCOMPLETE_INPUT;
480
8d617a71
UD
481 return result;
482}
483
27822ce6 484#if !_STRING_ARCH_unaligned
c1db8b0d 485static inline int
dd9423a6 486__attribute ((always_inline))
55985355
UD
487internal_ucs4le_loop_unaligned (struct __gconv_step *step,
488 struct __gconv_step_data *step_data,
489 const unsigned char **inptrp,
c1db8b0d 490 const unsigned char *inend,
4802be92
AS
491 unsigned char **outptrp,
492 const unsigned char *outend,
38677ace 493 size_t *irreversible)
c1db8b0d
UD
494{
495 const unsigned char *inptr = *inptrp;
496 unsigned char *outptr = *outptrp;
497 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
498 int result;
499
500# if __BYTE_ORDER == __BIG_ENDIAN
501 /* Sigh, we have to do some real work. */
502 size_t cnt;
503
3593973b 504 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
c1db8b0d
UD
505 {
506 outptr[0] = inptr[3];
507 outptr[1] = inptr[2];
508 outptr[2] = inptr[1];
509 outptr[3] = inptr[0];
510 }
511
512 *inptrp = inptr;
513 *outptrp = outptr;
514# elif __BYTE_ORDER == __LITTLE_ENDIAN
515 /* Simply copy the data. */
516 *inptrp = inptr + n_convert * 4;
517 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
518# else
519# error "This endianess is not supported."
520# endif
521
522 /* Determine the status. */
eb9dc2a2 523 if (*inptrp == inend)
c1db8b0d 524 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 525 else if (*inptrp + 4 > inend)
c1db8b0d 526 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
527 else
528 {
529 assert (*outptrp + 4 > outend);
530 result = __GCONV_FULL_OUTPUT;
531 }
c1db8b0d
UD
532
533 return result;
534}
535#endif
536
fd1b5c0f
UD
537
538static inline int
dd9423a6 539__attribute ((always_inline))
55985355
UD
540internal_ucs4le_loop_single (struct __gconv_step *step,
541 struct __gconv_step_data *step_data,
542 const unsigned char **inptrp,
fd1b5c0f 543 const unsigned char *inend,
4802be92
AS
544 unsigned char **outptrp,
545 const unsigned char *outend,
38677ace 546 size_t *irreversible)
fd1b5c0f 547{
55985355 548 mbstate_t *state = step_data->__statep;
fd1b5c0f
UD
549 size_t cnt = state->__count & 7;
550
551 while (*inptrp < inend && cnt < 4)
552 state->__value.__wchb[cnt++] = *(*inptrp)++;
553
a1ffb40e 554 if (__glibc_unlikely (cnt < 4))
fd1b5c0f
UD
555 {
556 /* Still not enough bytes. Store the ones in the input buffer. */
557 state->__count &= ~7;
558 state->__count |= cnt;
559
560 return __GCONV_INCOMPLETE_INPUT;
561 }
562
563#if __BYTE_ORDER == __BIG_ENDIAN
564 (*outptrp)[0] = state->__value.__wchb[3];
565 (*outptrp)[1] = state->__value.__wchb[2];
566 (*outptrp)[2] = state->__value.__wchb[1];
567 (*outptrp)[3] = state->__value.__wchb[0];
85830c4c 568
fd1b5c0f
UD
569#else
570 /* XXX unaligned */
cdda3d7d
AJ
571 (*outptrp)[0] = state->__value.__wchb[0];
572 (*outptrp)[1] = state->__value.__wchb[1];
573 (*outptrp)[2] = state->__value.__wchb[2];
574 (*outptrp)[3] = state->__value.__wchb[3];
575
fd1b5c0f 576#endif
540e1b45 577
cdda3d7d 578 *outptrp += 4;
fd1b5c0f
UD
579
580 /* Clear the state buffer. */
581 state->__count &= ~7;
582
583 return __GCONV_OK;
584}
585
8d617a71
UD
586#include <iconv/skeleton.c>
587
588
4a069c33
UD
589/* And finally from UCS4-LE to the internal encoding. */
590#define DEFINE_INIT 0
591#define DEFINE_FINI 0
592#define MIN_NEEDED_FROM 4
593#define MIN_NEEDED_TO 4
594#define FROM_DIRECTION 1
595#define FROM_LOOP ucs4le_internal_loop
596#define TO_LOOP ucs4le_internal_loop /* This is not used. */
597#define FUNCTION_NAME __gconv_transform_ucs4le_internal
0cdddc25 598#define ONE_DIRECTION 0
4a069c33
UD
599
600
601static inline int
dd9423a6 602__attribute ((always_inline))
55985355
UD
603ucs4le_internal_loop (struct __gconv_step *step,
604 struct __gconv_step_data *step_data,
605 const unsigned char **inptrp, const unsigned char *inend,
4802be92 606 unsigned char **outptrp, const unsigned char *outend,
38677ace 607 size_t *irreversible)
4a069c33 608{
55985355 609 int flags = step_data->__flags;
4a069c33
UD
610 const unsigned char *inptr = *inptrp;
611 unsigned char *outptr = *outptrp;
4a069c33 612 int result;
4a069c33 613
228edd35 614 for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4)
4a069c33
UD
615 {
616 uint32_t inval;
617
618#if __BYTE_ORDER == __BIG_ENDIAN
17427edd 619 inval = bswap_32 (*(const uint32_t *) inptr);
4a069c33 620#else
17427edd 621 inval = *(const uint32_t *) inptr;
4a069c33
UD
622#endif
623
a1ffb40e 624 if (__glibc_unlikely (inval > 0x7fffffff))
85830c4c 625 {
55985355
UD
626 /* The value is too large. We don't try transliteration here since
627 this is not an error because of the lack of possibilities to
628 represent the result. This is a genuine bug in the input since
629 UCS4 does not allow such values. */
0cdb4983
UD
630 if (irreversible == NULL)
631 /* We are transliterating, don't try to correct anything. */
632 return __GCONV_ILLEGAL_INPUT;
633
85830c4c
UD
634 if (flags & __GCONV_IGNORE_ERRORS)
635 {
636 /* Just ignore this character. */
38677ace 637 ++*irreversible;
85830c4c
UD
638 continue;
639 }
640
8f25676c
SL
641 *inptrp = inptr;
642 *outptrp = outptr;
85830c4c
UD
643 return __GCONV_ILLEGAL_INPUT;
644 }
4a069c33 645
cdda3d7d
AJ
646 *((uint32_t *) outptr) = inval;
647 outptr += sizeof (uint32_t);
4a069c33
UD
648 }
649
650 *inptrp = inptr;
651 *outptrp = outptr;
652
653 /* Determine the status. */
fc08075d 654 if (*inptrp == inend)
4a069c33 655 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 656 else if (*inptrp + 4 > inend)
4a069c33 657 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
658 else
659 {
660 assert (*outptrp + 4 > outend);
661 result = __GCONV_FULL_OUTPUT;
662 }
4a069c33
UD
663
664 return result;
665}
666
27822ce6 667#if !_STRING_ARCH_unaligned
4a069c33 668static inline int
dd9423a6 669__attribute ((always_inline))
55985355
UD
670ucs4le_internal_loop_unaligned (struct __gconv_step *step,
671 struct __gconv_step_data *step_data,
672 const unsigned char **inptrp,
4a069c33 673 const unsigned char *inend,
4802be92
AS
674 unsigned char **outptrp,
675 const unsigned char *outend,
38677ace 676 size_t *irreversible)
4a069c33 677{
55985355 678 int flags = step_data->__flags;
4a069c33
UD
679 const unsigned char *inptr = *inptrp;
680 unsigned char *outptr = *outptrp;
4a069c33 681 int result;
4a069c33 682
228edd35 683 for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4)
4a069c33 684 {
a1ffb40e 685 if (__glibc_unlikely (inptr[3] > 0x80))
4a069c33 686 {
55985355
UD
687 /* The value is too large. We don't try transliteration here since
688 this is not an error because of the lack of possibilities to
689 represent the result. This is a genuine bug in the input since
690 UCS4 does not allow such values. */
0cdb4983
UD
691 if (irreversible == NULL)
692 /* We are transliterating, don't try to correct anything. */
693 return __GCONV_ILLEGAL_INPUT;
694
85830c4c
UD
695 if (flags & __GCONV_IGNORE_ERRORS)
696 {
697 /* Just ignore this character. */
38677ace 698 ++*irreversible;
85830c4c
UD
699 continue;
700 }
701
4a069c33
UD
702 *inptrp = inptr;
703 *outptrp = outptr;
9ea2c194 704 return __GCONV_ILLEGAL_INPUT;
4a069c33
UD
705 }
706
4a069c33
UD
707# if __BYTE_ORDER == __BIG_ENDIAN
708 outptr[3] = inptr[0];
709 outptr[2] = inptr[1];
710 outptr[1] = inptr[2];
711 outptr[0] = inptr[3];
712# else
713 outptr[0] = inptr[0];
714 outptr[1] = inptr[1];
715 outptr[2] = inptr[2];
716 outptr[3] = inptr[3];
717# endif
85830c4c
UD
718
719 outptr += 4;
4a069c33
UD
720 }
721
722 *inptrp = inptr;
723 *outptrp = outptr;
724
725 /* Determine the status. */
fc08075d 726 if (*inptrp == inend)
4a069c33 727 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 728 else if (*inptrp + 4 > inend)
4a069c33 729 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
730 else
731 {
732 assert (*outptrp + 4 > outend);
733 result = __GCONV_FULL_OUTPUT;
734 }
4a069c33
UD
735
736 return result;
737}
738#endif
739
740
741static inline int
dd9423a6 742__attribute ((always_inline))
55985355
UD
743ucs4le_internal_loop_single (struct __gconv_step *step,
744 struct __gconv_step_data *step_data,
745 const unsigned char **inptrp,
4a069c33 746 const unsigned char *inend,
4802be92
AS
747 unsigned char **outptrp,
748 const unsigned char *outend,
38677ace 749 size_t *irreversible)
4a069c33 750{
55985355
UD
751 mbstate_t *state = step_data->__statep;
752 int flags = step_data->__flags;
4a069c33
UD
753 size_t cnt = state->__count & 7;
754
755 while (*inptrp < inend && cnt < 4)
756 state->__value.__wchb[cnt++] = *(*inptrp)++;
757
a1ffb40e 758 if (__glibc_unlikely (cnt < 4))
4a069c33
UD
759 {
760 /* Still not enough bytes. Store the ones in the input buffer. */
761 state->__count &= ~7;
762 state->__count |= cnt;
763
764 return __GCONV_INCOMPLETE_INPUT;
765 }
766
db2d05f9
UD
767 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
768 0))
85830c4c 769 {
55985355
UD
770 /* The value is too large. We don't try transliteration here since
771 this is not an error because of the lack of possibilities to
772 represent the result. This is a genuine bug in the input since
773 UCS4 does not allow such values. */
85830c4c
UD
774 if (!(flags & __GCONV_IGNORE_ERRORS))
775 return __GCONV_ILLEGAL_INPUT;
776 }
777 else
778 {
4a069c33 779#if __BYTE_ORDER == __BIG_ENDIAN
85830c4c
UD
780 (*outptrp)[0] = state->__value.__wchb[3];
781 (*outptrp)[1] = state->__value.__wchb[2];
782 (*outptrp)[2] = state->__value.__wchb[1];
783 (*outptrp)[3] = state->__value.__wchb[0];
9ea2c194 784#else
85830c4c
UD
785 (*outptrp)[0] = state->__value.__wchb[0];
786 (*outptrp)[1] = state->__value.__wchb[1];
787 (*outptrp)[2] = state->__value.__wchb[2];
788 (*outptrp)[3] = state->__value.__wchb[3];
4a069c33
UD
789#endif
790
85830c4c
UD
791 *outptrp += 4;
792 }
793
4a069c33
UD
794 /* Clear the state buffer. */
795 state->__count &= ~7;
796
797 return __GCONV_OK;
798}
799
800#include <iconv/skeleton.c>
801
802
8619129f
UD
803/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
804#define DEFINE_INIT 0
805#define DEFINE_FINI 0
806#define MIN_NEEDED_FROM 1
807#define MIN_NEEDED_TO 4
808#define FROM_DIRECTION 1
809#define FROM_LOOP ascii_internal_loop
810#define TO_LOOP ascii_internal_loop /* This is not used. */
811#define FUNCTION_NAME __gconv_transform_ascii_internal
fd1b5c0f 812#define ONE_DIRECTION 1
8619129f
UD
813
814#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
815#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
816#define LOOPFCT FROM_LOOP
817#define BODY \
818 { \
a1ffb40e 819 if (__glibc_unlikely (*inptr > '\x7f')) \
8619129f 820 { \
55985355
UD
821 /* The value is too large. We don't try transliteration here since \
822 this is not an error because of the lack of possibilities to \
823 represent the result. This is a genuine bug in the input since \
824 ASCII does not allow such values. */ \
e438a468 825 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
85830c4c
UD
826 } \
827 else \
5deca9bb
UD
828 { \
829 /* It's an one byte sequence. */ \
830 *((uint32_t *) outptr) = *inptr++; \
831 outptr += sizeof (uint32_t); \
832 } \
8619129f 833 }
55985355 834#define LOOP_NEED_FLAGS
8619129f
UD
835#include <iconv/loop.c>
836#include <iconv/skeleton.c>
837
838
839/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
840#define DEFINE_INIT 0
841#define DEFINE_FINI 0
842#define MIN_NEEDED_FROM 4
843#define MIN_NEEDED_TO 1
844#define FROM_DIRECTION 1
845#define FROM_LOOP internal_ascii_loop
846#define TO_LOOP internal_ascii_loop /* This is not used. */
847#define FUNCTION_NAME __gconv_transform_internal_ascii
fd1b5c0f 848#define ONE_DIRECTION 1
8619129f
UD
849
850#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
851#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
852#define LOOPFCT FROM_LOOP
853#define BODY \
854 { \
a1ffb40e 855 if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \
8619129f 856 { \
601d2942 857 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
e438a468 858 STANDARD_TO_LOOP_ERR_HANDLER (4); \
85830c4c
UD
859 } \
860 else \
5deca9bb
UD
861 { \
862 /* It's an one byte sequence. */ \
863 *outptr++ = *((const uint32_t *) inptr); \
864 inptr += sizeof (uint32_t); \
865 } \
8619129f 866 }
55985355 867#define LOOP_NEED_FLAGS
8619129f
UD
868#include <iconv/loop.c>
869#include <iconv/skeleton.c>
870
871
872/* Convert from the internal (UCS4-like) format to UTF-8. */
873#define DEFINE_INIT 0
874#define DEFINE_FINI 0
875#define MIN_NEEDED_FROM 4
876#define MIN_NEEDED_TO 1
877#define MAX_NEEDED_TO 6
878#define FROM_DIRECTION 1
879#define FROM_LOOP internal_utf8_loop
880#define TO_LOOP internal_utf8_loop /* This is not used. */
881#define FUNCTION_NAME __gconv_transform_internal_utf8
fd1b5c0f 882#define ONE_DIRECTION 1
8619129f
UD
883
884#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
885#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
5aa8ff62 886#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
8619129f
UD
887#define LOOPFCT FROM_LOOP
888#define BODY \
889 { \
17427edd 890 uint32_t wc = *((const uint32_t *) inptr); \
8619129f 891 \
a1ffb40e 892 if (__glibc_likely (wc < 0x80)) \
8619129f
UD
893 /* It's an one byte sequence. */ \
894 *outptr++ = (unsigned char) wc; \
7ab1de21
SL
895 else if (__glibc_likely (wc <= 0x7fffffff \
896 && (wc < 0xd800 || wc > 0xdfff))) \
8619129f
UD
897 { \
898 size_t step; \
3cc4a097 899 unsigned char *start; \
8619129f
UD
900 \
901 for (step = 2; step < 6; ++step) \
b79f74cd 902 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
8619129f
UD
903 break; \
904 \
a1ffb40e 905 if (__glibc_unlikely (outptr + step > outend)) \
8619129f
UD
906 { \
907 /* Too long. */ \
d64b6ad0 908 result = __GCONV_FULL_OUTPUT; \
8619129f
UD
909 break; \
910 } \
911 \
912 start = outptr; \
b79f74cd 913 *outptr = (unsigned char) (~0xff >> step); \
8619129f 914 outptr += step; \
8619129f
UD
915 do \
916 { \
347bace2 917 start[--step] = 0x80 | (wc & 0x3f); \
8619129f
UD
918 wc >>= 6; \
919 } \
347bace2 920 while (step > 1); \
8619129f 921 start[0] |= wc; \
db2d05f9
UD
922 } \
923 else \
924 { \
e438a468 925 STANDARD_TO_LOOP_ERR_HANDLER (4); \
8619129f
UD
926 } \
927 \
928 inptr += 4; \
929 }
db2d05f9 930#define LOOP_NEED_FLAGS
8619129f
UD
931#include <iconv/loop.c>
932#include <iconv/skeleton.c>
933
934
935/* Convert from UTF-8 to the internal (UCS4-like) format. */
936#define DEFINE_INIT 0
937#define DEFINE_FINI 0
938#define MIN_NEEDED_FROM 1
939#define MAX_NEEDED_FROM 6
940#define MIN_NEEDED_TO 4
941#define FROM_DIRECTION 1
942#define FROM_LOOP utf8_internal_loop
943#define TO_LOOP utf8_internal_loop /* This is not used. */
944#define FUNCTION_NAME __gconv_transform_utf8_internal
fd1b5c0f 945#define ONE_DIRECTION 1
8619129f
UD
946
947#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
5aa8ff62 948#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
8619129f
UD
949#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
950#define LOOPFCT FROM_LOOP
951#define BODY \
952 { \
8619129f 953 /* Next input byte. */ \
26a51060 954 uint32_t ch = *inptr; \
8619129f 955 \
a1ffb40e 956 if (__glibc_likely (ch < 0x80)) \
8619129f 957 { \
5aa8ff62 958 /* One byte sequence. */ \
5aa8ff62 959 ++inptr; \
8619129f
UD
960 } \
961 else \
962 { \
26a51060
UD
963 uint_fast32_t cnt; \
964 uint_fast32_t i; \
965 \
9ea2c194 966 if (ch >= 0xc2 && ch < 0xe0) \
5aa8ff62 967 { \
9ea2c194
AJ
968 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
969 otherwise the wide character could have been represented \
970 using a single byte. */ \
5aa8ff62
UD
971 cnt = 2; \
972 ch &= 0x1f; \
973 } \
a1ffb40e 974 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
5aa8ff62
UD
975 { \
976 /* We expect three bytes. */ \
977 cnt = 3; \
978 ch &= 0x0f; \
979 } \
a1ffb40e 980 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
5aa8ff62
UD
981 { \
982 /* We expect four bytes. */ \
983 cnt = 4; \
984 ch &= 0x07; \
985 } \
a1ffb40e 986 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
5aa8ff62
UD
987 { \
988 /* We expect five bytes. */ \
989 cnt = 5; \
990 ch &= 0x03; \
991 } \
a1ffb40e 992 else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \
5aa8ff62
UD
993 { \
994 /* We expect six bytes. */ \
995 cnt = 6; \
996 ch &= 0x01; \
997 } \
998 else \
8619129f 999 { \
85830c4c
UD
1000 /* Search the end of this ill-formed UTF-8 character. This \
1001 is the next byte with (x & 0xc0) != 0x80. */ \
347bace2 1002 i = 0; \
e438a468 1003 do \
347bace2
UD
1004 ++i; \
1005 while (inptr + i < inend \
1006 && (*(inptr + i) & 0xc0) == 0x80 \
1007 && i < 5); \
85830c4c 1008 \
347bace2
UD
1009 errout: \
1010 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
8619129f
UD
1011 } \
1012 \
a1ffb40e 1013 if (__glibc_unlikely (inptr + cnt > inend)) \
5aa8ff62 1014 { \
fd1b5c0f
UD
1015 /* We don't have enough input. But before we report that check \
1016 that all the bytes are correct. */ \
1017 for (i = 1; inptr + i < inend; ++i) \
1018 if ((inptr[i] & 0xc0) != 0x80) \
1019 break; \
85830c4c 1020 \
a1ffb40e 1021 if (__glibc_likely (inptr + i == inend)) \
85830c4c
UD
1022 { \
1023 result = __GCONV_INCOMPLETE_INPUT; \
1024 break; \
1025 } \
1026 \
347bace2 1027 goto errout; \
5aa8ff62
UD
1028 } \
1029 \
1030 /* Read the possible remaining bytes. */ \
1031 for (i = 1; i < cnt; ++i) \
1032 { \
1033 uint32_t byte = inptr[i]; \
1034 \
1035 if ((byte & 0xc0) != 0x80) \
bd32e4a6
UD
1036 /* This is an illegal encoding. */ \
1037 break; \
5aa8ff62
UD
1038 \
1039 ch <<= 6; \
1040 ch |= byte & 0x3f; \
1041 } \
85830c4c 1042 \
bd32e4a6
UD
1043 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1044 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1045 have been represented with fewer than cnt bytes. */ \
9c32c895
UD
1046 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
1047 /* Do not accept UTF-16 surrogates. */ \
1048 || (ch >= 0xd800 && ch <= 0xdfff)) \
bd32e4a6
UD
1049 { \
1050 /* This is an illegal encoding. */ \
347bace2 1051 goto errout; \
bd32e4a6
UD
1052 } \
1053 \
5aa8ff62 1054 inptr += cnt; \
8619129f
UD
1055 } \
1056 \
1057 /* Now adjust the pointers and store the result. */ \
cdda3d7d
AJ
1058 *((uint32_t *) outptr) = ch; \
1059 outptr += sizeof (uint32_t); \
8619129f 1060 }
55985355 1061#define LOOP_NEED_FLAGS
fd1b5c0f
UD
1062
1063#define STORE_REST \
1064 { \
1065 /* We store the remaining bytes while converting them into the UCS4 \
1066 format. We can assume that the first byte in the buffer is \
1067 correct and that it requires a larger number of bytes than there \
1068 are in the input buffer. */ \
1069 wint_t ch = **inptrp; \
ea31b613 1070 size_t cnt, r; \
fd1b5c0f
UD
1071 \
1072 state->__count = inend - *inptrp; \
1073 \
9954432e 1074 assert (ch != 0xc0 && ch != 0xc1); \
fd1b5c0f
UD
1075 if (ch >= 0xc2 && ch < 0xe0) \
1076 { \
1077 /* We expect two bytes. The first byte cannot be 0xc0 or \
1078 0xc1, otherwise the wide character could have been \
1079 represented using a single byte. */ \
1080 cnt = 2; \
1081 ch &= 0x1f; \
1082 } \
a1ffb40e 1083 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
fd1b5c0f
UD
1084 { \
1085 /* We expect three bytes. */ \
1086 cnt = 3; \
1087 ch &= 0x0f; \
1088 } \
a1ffb40e 1089 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
fd1b5c0f
UD
1090 { \
1091 /* We expect four bytes. */ \
1092 cnt = 4; \
1093 ch &= 0x07; \
1094 } \
a1ffb40e 1095 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
fd1b5c0f
UD
1096 { \
1097 /* We expect five bytes. */ \
1098 cnt = 5; \
1099 ch &= 0x03; \
1100 } \
1101 else \
1102 { \
1103 /* We expect six bytes. */ \
1104 cnt = 6; \
1105 ch &= 0x01; \
1106 } \
1107 \
1108 /* The first byte is already consumed. */ \
ea31b613 1109 r = cnt - 1; \
fd1b5c0f
UD
1110 while (++(*inptrp) < inend) \
1111 { \
1112 ch <<= 6; \
1113 ch |= **inptrp & 0x3f; \
ea31b613 1114 --r; \
fd1b5c0f
UD
1115 } \
1116 \
1117 /* Shift for the so far missing bytes. */ \
ea31b613
UD
1118 ch <<= r * 6; \
1119 \
1120 /* Store the number of bytes expected for the entire sequence. */ \
41f112ad 1121 state->__count |= cnt << 8; \
fd1b5c0f
UD
1122 \
1123 /* Store the value. */ \
1124 state->__value.__wch = ch; \
1125 }
1126
1127#define UNPACK_BYTES \
1128 { \
ea31b613 1129 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
fd1b5c0f 1130 wint_t wch = state->__value.__wch; \
41f112ad 1131 size_t ntotal = state->__count >> 8; \
ea31b613 1132 \
41f112ad 1133 inlen = state->__count & 255; \
fd1b5c0f 1134 \
ea31b613 1135 bytebuf[0] = inmask[ntotal - 2]; \
fd1b5c0f 1136 \
cd201e38
UD
1137 do \
1138 { \
1139 if (--ntotal < inlen) \
1140 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1141 wch >>= 6; \
1142 } \
1143 while (ntotal > 1); \
fd1b5c0f
UD
1144 \
1145 bytebuf[0] |= wch; \
1146 }
1147
41f112ad
UD
1148#define CLEAR_STATE \
1149 state->__count = 0
1150
1151
8619129f
UD
1152#include <iconv/loop.c>
1153#include <iconv/skeleton.c>
1154
1155
1156/* Convert from UCS2 to the internal (UCS4-like) format. */
1157#define DEFINE_INIT 0
1158#define DEFINE_FINI 0
1159#define MIN_NEEDED_FROM 2
1160#define MIN_NEEDED_TO 4
1161#define FROM_DIRECTION 1
1162#define FROM_LOOP ucs2_internal_loop
1163#define TO_LOOP ucs2_internal_loop /* This is not used. */
1164#define FUNCTION_NAME __gconv_transform_ucs2_internal
fd1b5c0f 1165#define ONE_DIRECTION 1
8619129f
UD
1166
1167#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1168#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1169#define LOOPFCT FROM_LOOP
428bcea4 1170#define BODY \
755104ed 1171 { \
606135cf 1172 uint16_t u1 = get16 (inptr); \
755104ed 1173 \
a1ffb40e 1174 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
755104ed
UD
1175 { \
1176 /* Surrogate characters in UCS-2 input are not valid. Reject \
1177 them. (Catching this here is not security relevant.) */ \
e438a468 1178 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
755104ed
UD
1179 } \
1180 \
cdda3d7d
AJ
1181 *((uint32_t *) outptr) = u1; \
1182 outptr += sizeof (uint32_t); \
755104ed
UD
1183 inptr += 2; \
1184 }
1185#define LOOP_NEED_FLAGS
8619129f
UD
1186#include <iconv/loop.c>
1187#include <iconv/skeleton.c>
1188
1189
1190/* Convert from the internal (UCS4-like) format to UCS2. */
1191#define DEFINE_INIT 0
1192#define DEFINE_FINI 0
1193#define MIN_NEEDED_FROM 4
1194#define MIN_NEEDED_TO 2
1195#define FROM_DIRECTION 1
1196#define FROM_LOOP internal_ucs2_loop
1197#define TO_LOOP internal_ucs2_loop /* This is not used. */
1198#define FUNCTION_NAME __gconv_transform_internal_ucs2
fd1b5c0f 1199#define ONE_DIRECTION 1
8619129f
UD
1200
1201#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1202#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1203#define LOOPFCT FROM_LOOP
428bcea4 1204#define BODY \
8619129f 1205 { \
17427edd 1206 uint32_t val = *((const uint32_t *) inptr); \
755104ed 1207 \
a1ffb40e 1208 if (__glibc_unlikely (val >= 0x10000)) \
8619129f 1209 { \
601d2942 1210 UNICODE_TAG_HANDLER (val, 4); \
e438a468 1211 STANDARD_TO_LOOP_ERR_HANDLER (4); \
8619129f 1212 } \
a1ffb40e 1213 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
755104ed
UD
1214 { \
1215 /* Surrogate characters in UCS-4 input are not valid. \
1216 We must catch this, because the UCS-2 output might be \
1217 interpreted as UTF-16 by other programs. If we let \
1218 surrogates pass through, attackers could make a security \
1219 hole exploit by synthesizing any desired plane 1-16 \
1220 character. */ \
e438a468 1221 result = __GCONV_ILLEGAL_INPUT; \
755104ed 1222 if (! ignore_errors_p ()) \
e438a468 1223 break; \
755104ed
UD
1224 inptr += 4; \
1225 ++*irreversible; \
1226 continue; \
1227 } \
9ea2c194 1228 else \
755104ed 1229 { \
606135cf 1230 put16 (outptr, val); \
db6af3eb 1231 outptr += sizeof (uint16_t); \
755104ed
UD
1232 inptr += 4; \
1233 } \
8619129f 1234 }
55985355 1235#define LOOP_NEED_FLAGS
8619129f
UD
1236#include <iconv/loop.c>
1237#include <iconv/skeleton.c>
9b26f5c4
UD
1238
1239
428bcea4 1240/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
9b26f5c4
UD
1241#define DEFINE_INIT 0
1242#define DEFINE_FINI 0
1243#define MIN_NEEDED_FROM 2
1244#define MIN_NEEDED_TO 4
1245#define FROM_DIRECTION 1
428bcea4
UD
1246#define FROM_LOOP ucs2reverse_internal_loop
1247#define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
8d617a71 1248#define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
fd1b5c0f 1249#define ONE_DIRECTION 1
9b26f5c4
UD
1250
1251#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1252#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1253#define LOOPFCT FROM_LOOP
428bcea4 1254#define BODY \
755104ed 1255 { \
606135cf 1256 uint16_t u1 = bswap_16 (get16 (inptr)); \
755104ed 1257 \
a1ffb40e 1258 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
755104ed
UD
1259 { \
1260 /* Surrogate characters in UCS-2 input are not valid. Reject \
1261 them. (Catching this here is not security relevant.) */ \
1262 if (! ignore_errors_p ()) \
1263 { \
1264 result = __GCONV_ILLEGAL_INPUT; \
1265 break; \
1266 } \
1267 inptr += 2; \
1268 ++*irreversible; \
1269 continue; \
1270 } \
1271 \
cdda3d7d
AJ
1272 *((uint32_t *) outptr) = u1; \
1273 outptr += sizeof (uint32_t); \
755104ed
UD
1274 inptr += 2; \
1275 }
1276#define LOOP_NEED_FLAGS
9b26f5c4
UD
1277#include <iconv/loop.c>
1278#include <iconv/skeleton.c>
1279
1280
428bcea4 1281/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
9b26f5c4
UD
1282#define DEFINE_INIT 0
1283#define DEFINE_FINI 0
1284#define MIN_NEEDED_FROM 4
1285#define MIN_NEEDED_TO 2
1286#define FROM_DIRECTION 1
428bcea4
UD
1287#define FROM_LOOP internal_ucs2reverse_loop
1288#define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
8d617a71 1289#define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
fd1b5c0f 1290#define ONE_DIRECTION 1
9b26f5c4
UD
1291
1292#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1293#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1294#define LOOPFCT FROM_LOOP
428bcea4 1295#define BODY \
9b26f5c4 1296 { \
17427edd 1297 uint32_t val = *((const uint32_t *) inptr); \
a1ffb40e 1298 if (__glibc_unlikely (val >= 0x10000)) \
9b26f5c4 1299 { \
601d2942 1300 UNICODE_TAG_HANDLER (val, 4); \
e438a468 1301 STANDARD_TO_LOOP_ERR_HANDLER (4); \
9b26f5c4 1302 } \
a1ffb40e 1303 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
755104ed
UD
1304 { \
1305 /* Surrogate characters in UCS-4 input are not valid. \
1306 We must catch this, because the UCS-2 output might be \
1307 interpreted as UTF-16 by other programs. If we let \
1308 surrogates pass through, attackers could make a security \
1309 hole exploit by synthesizing any desired plane 1-16 \
1310 character. */ \
1311 if (! ignore_errors_p ()) \
1312 { \
1313 result = __GCONV_ILLEGAL_INPUT; \
1314 break; \
1315 } \
1316 inptr += 4; \
1317 ++*irreversible; \
1318 continue; \
1319 } \
9ea2c194 1320 else \
755104ed 1321 { \
606135cf 1322 put16 (outptr, bswap_16 (val)); \
cdda3d7d 1323 outptr += sizeof (uint16_t); \
755104ed
UD
1324 inptr += 4; \
1325 } \
9b26f5c4 1326 }
55985355 1327#define LOOP_NEED_FLAGS
9b26f5c4
UD
1328#include <iconv/loop.c>
1329#include <iconv/skeleton.c>