]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconv/gconv_simple.c
Remove NOT_IN_libc
[thirdparty/glibc.git] / iconv / gconv_simple.c
CommitLineData
6973fc01 1/* Simple transformations functions.
d4697bc9 2 Copyright (C) 1997-2014 Free Software Foundation, Inc.
6973fc01
UD
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
6973fc01
UD
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 14 Lesser General Public License for more details.
6973fc01 15
41bdb6e2 16 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
6973fc01 19
f1fa8b68 20#include <byteswap.h>
55985355 21#include <dlfcn.h>
f1fa8b68 22#include <endian.h>
f4017d20 23#include <errno.h>
6973fc01 24#include <gconv.h>
d2374599 25#include <stdint.h>
6973fc01
UD
26#include <stdlib.h>
27#include <string.h>
28#include <wchar.h>
29#include <sys/param.h>
f9ad060c 30#include <gconv_int.h>
6973fc01 31
17427edd 32#define BUILTIN_ALIAS(s1, s2) /* nothing */
f9ad060c
UD
33#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
34 MinF, MaxF, MinT, MaxT) \
17427edd 35 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
a784e502 36 const unsigned char **, const unsigned char *, \
17427edd
UD
37 unsigned char **, size_t *, int, int);
38#include "gconv_builtin.h"
39
40
a904b5d9
UD
41#ifndef EILSEQ
42# define EILSEQ EINVAL
43#endif
44
45
f9ad060c
UD
46/* Specialized conversion function for a single byte to INTERNAL, recognizing
47 only ASCII characters. */
48wint_t
49__gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
50{
51 if (c < 0x80)
52 return c;
53 else
54 return WEOF;
55}
56
57
f1fa8b68
UD
58/* Transform from the internal, UCS4-like format, to UCS4. The
59 difference between the internal ucs4 format and the real UCS4
60 format is, if any, the endianess. The Unicode/ISO 10646 says that
61 unless some higher protocol specifies it differently, the byte
62 order is big endian.*/
8619129f
UD
63#define DEFINE_INIT 0
64#define DEFINE_FINI 0
65#define MIN_NEEDED_FROM 4
66#define MIN_NEEDED_TO 4
67#define FROM_DIRECTION 1
68#define FROM_LOOP internal_ucs4_loop
69#define TO_LOOP internal_ucs4_loop /* This is not used. */
70#define FUNCTION_NAME __gconv_transform_internal_ucs4
0cdddc25 71#define ONE_DIRECTION 0
8619129f
UD
72
73
74static inline int
dd9423a6 75__attribute ((always_inline))
55985355
UD
76internal_ucs4_loop (struct __gconv_step *step,
77 struct __gconv_step_data *step_data,
78 const unsigned char **inptrp, const unsigned char *inend,
8619129f 79 unsigned char **outptrp, unsigned char *outend,
38677ace 80 size_t *irreversible)
4bca4c17 81{
8619129f
UD
82 const unsigned char *inptr = *inptrp;
83 unsigned char *outptr = *outptrp;
84 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
4bca4c17
UD
85 int result;
86
f1fa8b68 87#if __BYTE_ORDER == __LITTLE_ENDIAN
8619129f
UD
88 /* Sigh, we have to do some real work. */
89 size_t cnt;
cdda3d7d 90 uint32_t *outptr32 = (uint32_t *) outptr;
f1fa8b68 91
fdf19bf7 92 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
cdda3d7d 93 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
f1fa8b68 94
8619129f 95 *inptrp = inptr;
cd5b5023 96 *outptrp = (unsigned char *) outptr32;
f1fa8b68 97#elif __BYTE_ORDER == __BIG_ENDIAN
8619129f
UD
98 /* Simply copy the data. */
99 *inptrp = inptr + n_convert * 4;
100 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
f1fa8b68
UD
101#else
102# error "This endianess is not supported."
103#endif
104
8619129f 105 /* Determine the status. */
1336419e 106 if (*inptrp == inend)
d64b6ad0 107 result = __GCONV_EMPTY_INPUT;
c4f66413 108 else if (*outptrp + 4 > outend)
1336419e 109 result = __GCONV_FULL_OUTPUT;
6973fc01 110 else
d64b6ad0 111 result = __GCONV_INCOMPLETE_INPUT;
6973fc01 112
f43ce637 113 return result;
6973fc01 114}
d2374599 115
27822ce6 116#if !_STRING_ARCH_unaligned
c1db8b0d 117static inline int
dd9423a6 118__attribute ((always_inline))
55985355
UD
119internal_ucs4_loop_unaligned (struct __gconv_step *step,
120 struct __gconv_step_data *step_data,
121 const unsigned char **inptrp,
c1db8b0d
UD
122 const unsigned char *inend,
123 unsigned char **outptrp, unsigned char *outend,
38677ace 124 size_t *irreversible)
c1db8b0d
UD
125{
126 const unsigned char *inptr = *inptrp;
127 unsigned char *outptr = *outptrp;
128 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
129 int result;
130
131# if __BYTE_ORDER == __LITTLE_ENDIAN
132 /* Sigh, we have to do some real work. */
133 size_t cnt;
134
135 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
136 {
137 outptr[0] = inptr[3];
138 outptr[1] = inptr[2];
139 outptr[2] = inptr[1];
140 outptr[3] = inptr[0];
141 }
142
143 *inptrp = inptr;
144 *outptrp = outptr;
145# elif __BYTE_ORDER == __BIG_ENDIAN
146 /* Simply copy the data. */
147 *inptrp = inptr + n_convert * 4;
148 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
149# else
150# error "This endianess is not supported."
151# endif
152
153 /* Determine the status. */
eacde9d0 154 if (*inptrp == inend)
c1db8b0d 155 result = __GCONV_EMPTY_INPUT;
eacde9d0
UD
156 else if (*outptrp + 4 > outend)
157 result = __GCONV_FULL_OUTPUT;
c1db8b0d
UD
158 else
159 result = __GCONV_INCOMPLETE_INPUT;
160
161 return result;
162}
163#endif
164
fd1b5c0f
UD
165
166static inline int
dd9423a6 167__attribute ((always_inline))
55985355
UD
168internal_ucs4_loop_single (struct __gconv_step *step,
169 struct __gconv_step_data *step_data,
170 const unsigned char **inptrp,
fd1b5c0f
UD
171 const unsigned char *inend,
172 unsigned char **outptrp, unsigned char *outend,
38677ace 173 size_t *irreversible)
fd1b5c0f 174{
55985355 175 mbstate_t *state = step_data->__statep;
fd1b5c0f
UD
176 size_t cnt = state->__count & 7;
177
178 while (*inptrp < inend && cnt < 4)
179 state->__value.__wchb[cnt++] = *(*inptrp)++;
180
a1ffb40e 181 if (__glibc_unlikely (cnt < 4))
fd1b5c0f
UD
182 {
183 /* Still not enough bytes. Store the ones in the input buffer. */
184 state->__count &= ~7;
185 state->__count |= cnt;
186
187 return __GCONV_INCOMPLETE_INPUT;
188 }
189
190#if __BYTE_ORDER == __LITTLE_ENDIAN
191 (*outptrp)[0] = state->__value.__wchb[3];
192 (*outptrp)[1] = state->__value.__wchb[2];
193 (*outptrp)[2] = state->__value.__wchb[1];
194 (*outptrp)[3] = state->__value.__wchb[0];
85830c4c 195
fd1b5c0f
UD
196#elif __BYTE_ORDER == __BIG_ENDIAN
197 /* XXX unaligned */
cdda3d7d
AJ
198 (*outptrp)[0] = state->__value.__wchb[0];
199 (*outptrp)[1] = state->__value.__wchb[1];
200 (*outptrp)[2] = state->__value.__wchb[2];
201 (*outptrp)[3] = state->__value.__wchb[3];
fd1b5c0f
UD
202#else
203# error "This endianess is not supported."
204#endif
cdda3d7d 205 *outptrp += 4;
fd1b5c0f
UD
206
207 /* Clear the state buffer. */
208 state->__count &= ~7;
209
210 return __GCONV_OK;
211}
212
8619129f 213#include <iconv/skeleton.c>
d2374599 214
d2374599 215
4a069c33
UD
216/* Transform from UCS4 to the internal, UCS4-like format. Unlike
217 for the other direction we have to check for correct values here. */
218#define DEFINE_INIT 0
219#define DEFINE_FINI 0
220#define MIN_NEEDED_FROM 4
221#define MIN_NEEDED_TO 4
222#define FROM_DIRECTION 1
223#define FROM_LOOP ucs4_internal_loop
224#define TO_LOOP ucs4_internal_loop /* This is not used. */
225#define FUNCTION_NAME __gconv_transform_ucs4_internal
0cdddc25 226#define ONE_DIRECTION 0
4a069c33
UD
227
228
229static inline int
dd9423a6 230__attribute ((always_inline))
55985355
UD
231ucs4_internal_loop (struct __gconv_step *step,
232 struct __gconv_step_data *step_data,
233 const unsigned char **inptrp, const unsigned char *inend,
4a069c33 234 unsigned char **outptrp, unsigned char *outend,
38677ace 235 size_t *irreversible)
4a069c33 236{
55985355 237 int flags = step_data->__flags;
4a069c33
UD
238 const unsigned char *inptr = *inptrp;
239 unsigned char *outptr = *outptrp;
240 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
241 int result;
242 size_t cnt;
243
244 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
245 {
246 uint32_t inval;
247
248#if __BYTE_ORDER == __LITTLE_ENDIAN
17427edd 249 inval = bswap_32 (*(const uint32_t *) inptr);
4a069c33 250#else
17427edd 251 inval = *(const uint32_t *) inptr;
4a069c33
UD
252#endif
253
a1ffb40e 254 if (__glibc_unlikely (inval > 0x7fffffff))
4a069c33 255 {
55985355
UD
256 /* The value is too large. We don't try transliteration here since
257 this is not an error because of the lack of possibilities to
258 represent the result. This is a genuine bug in the input since
259 UCS4 does not allow such values. */
0cdb4983
UD
260 if (irreversible == NULL)
261 /* We are transliterating, don't try to correct anything. */
262 return __GCONV_ILLEGAL_INPUT;
263
85830c4c
UD
264 if (flags & __GCONV_IGNORE_ERRORS)
265 {
266 /* Just ignore this character. */
38677ace 267 ++*irreversible;
85830c4c
UD
268 continue;
269 }
270
4a069c33
UD
271 *inptrp = inptr;
272 *outptrp = outptr;
273 return __GCONV_ILLEGAL_INPUT;
274 }
275
cdda3d7d
AJ
276 *((uint32_t *) outptr) = inval;
277 outptr += sizeof (uint32_t);
4a069c33
UD
278 }
279
280 *inptrp = inptr;
281 *outptrp = outptr;
282
283 /* Determine the status. */
fc08075d 284 if (*inptrp == inend)
4a069c33 285 result = __GCONV_EMPTY_INPUT;
c4f66413 286 else if (*outptrp + 4 > outend)
fc08075d 287 result = __GCONV_FULL_OUTPUT;
4a069c33
UD
288 else
289 result = __GCONV_INCOMPLETE_INPUT;
290
291 return result;
292}
293
27822ce6 294#if !_STRING_ARCH_unaligned
4a069c33 295static inline int
dd9423a6 296__attribute ((always_inline))
55985355
UD
297ucs4_internal_loop_unaligned (struct __gconv_step *step,
298 struct __gconv_step_data *step_data,
299 const unsigned char **inptrp,
4a069c33
UD
300 const unsigned char *inend,
301 unsigned char **outptrp, unsigned char *outend,
38677ace 302 size_t *irreversible)
4a069c33 303{
55985355 304 int flags = step_data->__flags;
4a069c33
UD
305 const unsigned char *inptr = *inptrp;
306 unsigned char *outptr = *outptrp;
307 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
308 int result;
309 size_t cnt;
310
55985355 311 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
4a069c33 312 {
a1ffb40e 313 if (__glibc_unlikely (inptr[0] > 0x80))
4a069c33 314 {
55985355
UD
315 /* The value is too large. We don't try transliteration here since
316 this is not an error because of the lack of possibilities to
317 represent the result. This is a genuine bug in the input since
318 UCS4 does not allow such values. */
0cdb4983
UD
319 if (irreversible == NULL)
320 /* We are transliterating, don't try to correct anything. */
321 return __GCONV_ILLEGAL_INPUT;
322
85830c4c
UD
323 if (flags & __GCONV_IGNORE_ERRORS)
324 {
325 /* Just ignore this character. */
38677ace 326 ++*irreversible;
85830c4c
UD
327 continue;
328 }
329
4a069c33
UD
330 *inptrp = inptr;
331 *outptrp = outptr;
9ea2c194 332 return __GCONV_ILLEGAL_INPUT;
4a069c33
UD
333 }
334
4a069c33
UD
335# if __BYTE_ORDER == __LITTLE_ENDIAN
336 outptr[3] = inptr[0];
337 outptr[2] = inptr[1];
338 outptr[1] = inptr[2];
339 outptr[0] = inptr[3];
340# else
341 outptr[0] = inptr[0];
342 outptr[1] = inptr[1];
343 outptr[2] = inptr[2];
344 outptr[3] = inptr[3];
345# endif
55985355 346 outptr += 4;
4a069c33
UD
347 }
348
349 *inptrp = inptr;
350 *outptrp = outptr;
351
352 /* Determine the status. */
fc08075d 353 if (*inptrp == inend)
4a069c33 354 result = __GCONV_EMPTY_INPUT;
c4f66413 355 else if (*outptrp + 4 > outend)
fc08075d 356 result = __GCONV_FULL_OUTPUT;
4a069c33
UD
357 else
358 result = __GCONV_INCOMPLETE_INPUT;
359
360 return result;
361}
362#endif
363
364
365static inline int
dd9423a6 366__attribute ((always_inline))
55985355
UD
367ucs4_internal_loop_single (struct __gconv_step *step,
368 struct __gconv_step_data *step_data,
369 const unsigned char **inptrp,
4a069c33
UD
370 const unsigned char *inend,
371 unsigned char **outptrp, unsigned char *outend,
38677ace 372 size_t *irreversible)
4a069c33 373{
55985355
UD
374 mbstate_t *state = step_data->__statep;
375 int flags = step_data->__flags;
4a069c33
UD
376 size_t cnt = state->__count & 7;
377
378 while (*inptrp < inend && cnt < 4)
379 state->__value.__wchb[cnt++] = *(*inptrp)++;
380
a1ffb40e 381 if (__glibc_unlikely (cnt < 4))
4a069c33
UD
382 {
383 /* Still not enough bytes. Store the ones in the input buffer. */
384 state->__count &= ~7;
385 state->__count |= cnt;
386
387 return __GCONV_INCOMPLETE_INPUT;
388 }
389
db2d05f9
UD
390 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
391 0))
85830c4c 392 {
55985355
UD
393 /* The value is too large. We don't try transliteration here since
394 this is not an error because of the lack of possibilities to
395 represent the result. This is a genuine bug in the input since
396 UCS4 does not allow such values. */
85830c4c 397 if (!(flags & __GCONV_IGNORE_ERRORS))
3593973b
UD
398 {
399 *inptrp -= cnt - (state->__count & 7);
400 return __GCONV_ILLEGAL_INPUT;
401 }
85830c4c
UD
402 }
403 else
404 {
4a069c33 405#if __BYTE_ORDER == __LITTLE_ENDIAN
85830c4c
UD
406 (*outptrp)[0] = state->__value.__wchb[3];
407 (*outptrp)[1] = state->__value.__wchb[2];
408 (*outptrp)[2] = state->__value.__wchb[1];
409 (*outptrp)[3] = state->__value.__wchb[0];
4a069c33 410#elif __BYTE_ORDER == __BIG_ENDIAN
85830c4c
UD
411 (*outptrp)[0] = state->__value.__wchb[0];
412 (*outptrp)[1] = state->__value.__wchb[1];
413 (*outptrp)[2] = state->__value.__wchb[2];
414 (*outptrp)[3] = state->__value.__wchb[3];
4a069c33
UD
415#endif
416
85830c4c
UD
417 *outptrp += 4;
418 }
419
4a069c33
UD
420 /* Clear the state buffer. */
421 state->__count &= ~7;
422
423 return __GCONV_OK;
424}
425
426#include <iconv/skeleton.c>
427
428
429/* Similarly for the little endian form. */
8d617a71
UD
430#define DEFINE_INIT 0
431#define DEFINE_FINI 0
432#define MIN_NEEDED_FROM 4
433#define MIN_NEEDED_TO 4
434#define FROM_DIRECTION 1
435#define FROM_LOOP internal_ucs4le_loop
436#define TO_LOOP internal_ucs4le_loop /* This is not used. */
437#define FUNCTION_NAME __gconv_transform_internal_ucs4le
0cdddc25 438#define ONE_DIRECTION 0
8d617a71
UD
439
440
441static inline int
dd9423a6 442__attribute ((always_inline))
55985355
UD
443internal_ucs4le_loop (struct __gconv_step *step,
444 struct __gconv_step_data *step_data,
445 const unsigned char **inptrp, const unsigned char *inend,
8d617a71 446 unsigned char **outptrp, unsigned char *outend,
38677ace 447 size_t *irreversible)
8d617a71
UD
448{
449 const unsigned char *inptr = *inptrp;
450 unsigned char *outptr = *outptrp;
451 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
452 int result;
453
454#if __BYTE_ORDER == __BIG_ENDIAN
455 /* Sigh, we have to do some real work. */
456 size_t cnt;
cd5b5023 457 uint32_t *outptr32 = (uint32_t *) outptr;
8d617a71
UD
458
459 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
cd5b5023
AJ
460 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
461 outptr = (unsigned char *) outptr32;
8d617a71
UD
462
463 *inptrp = inptr;
464 *outptrp = outptr;
465#elif __BYTE_ORDER == __LITTLE_ENDIAN
466 /* Simply copy the data. */
467 *inptrp = inptr + n_convert * 4;
468 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
469#else
470# error "This endianess is not supported."
471#endif
472
473 /* Determine the status. */
fc08075d 474 if (*inptrp == inend)
8d617a71 475 result = __GCONV_EMPTY_INPUT;
c4f66413 476 else if (*outptrp + 4 > outend)
fc08075d 477 result = __GCONV_FULL_OUTPUT;
8d617a71
UD
478 else
479 result = __GCONV_INCOMPLETE_INPUT;
480
8d617a71
UD
481 return result;
482}
483
27822ce6 484#if !_STRING_ARCH_unaligned
c1db8b0d 485static inline int
dd9423a6 486__attribute ((always_inline))
55985355
UD
487internal_ucs4le_loop_unaligned (struct __gconv_step *step,
488 struct __gconv_step_data *step_data,
489 const unsigned char **inptrp,
c1db8b0d
UD
490 const unsigned char *inend,
491 unsigned char **outptrp, unsigned char *outend,
38677ace 492 size_t *irreversible)
c1db8b0d
UD
493{
494 const unsigned char *inptr = *inptrp;
495 unsigned char *outptr = *outptrp;
496 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
497 int result;
498
499# if __BYTE_ORDER == __BIG_ENDIAN
500 /* Sigh, we have to do some real work. */
501 size_t cnt;
502
3593973b 503 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
c1db8b0d
UD
504 {
505 outptr[0] = inptr[3];
506 outptr[1] = inptr[2];
507 outptr[2] = inptr[1];
508 outptr[3] = inptr[0];
509 }
510
511 *inptrp = inptr;
512 *outptrp = outptr;
513# elif __BYTE_ORDER == __LITTLE_ENDIAN
514 /* Simply copy the data. */
515 *inptrp = inptr + n_convert * 4;
516 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
517# else
518# error "This endianess is not supported."
519# endif
520
521 /* Determine the status. */
eb9dc2a2 522 if (*inptrp == inend)
c1db8b0d 523 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 524 else if (*inptrp + 4 > inend)
c1db8b0d 525 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
526 else
527 {
528 assert (*outptrp + 4 > outend);
529 result = __GCONV_FULL_OUTPUT;
530 }
c1db8b0d
UD
531
532 return result;
533}
534#endif
535
fd1b5c0f
UD
536
537static inline int
dd9423a6 538__attribute ((always_inline))
55985355
UD
539internal_ucs4le_loop_single (struct __gconv_step *step,
540 struct __gconv_step_data *step_data,
541 const unsigned char **inptrp,
fd1b5c0f
UD
542 const unsigned char *inend,
543 unsigned char **outptrp, unsigned char *outend,
38677ace 544 size_t *irreversible)
fd1b5c0f 545{
55985355 546 mbstate_t *state = step_data->__statep;
fd1b5c0f
UD
547 size_t cnt = state->__count & 7;
548
549 while (*inptrp < inend && cnt < 4)
550 state->__value.__wchb[cnt++] = *(*inptrp)++;
551
a1ffb40e 552 if (__glibc_unlikely (cnt < 4))
fd1b5c0f
UD
553 {
554 /* Still not enough bytes. Store the ones in the input buffer. */
555 state->__count &= ~7;
556 state->__count |= cnt;
557
558 return __GCONV_INCOMPLETE_INPUT;
559 }
560
561#if __BYTE_ORDER == __BIG_ENDIAN
562 (*outptrp)[0] = state->__value.__wchb[3];
563 (*outptrp)[1] = state->__value.__wchb[2];
564 (*outptrp)[2] = state->__value.__wchb[1];
565 (*outptrp)[3] = state->__value.__wchb[0];
85830c4c 566
fd1b5c0f
UD
567#else
568 /* XXX unaligned */
cdda3d7d
AJ
569 (*outptrp)[0] = state->__value.__wchb[0];
570 (*outptrp)[1] = state->__value.__wchb[1];
571 (*outptrp)[2] = state->__value.__wchb[2];
572 (*outptrp)[3] = state->__value.__wchb[3];
573
fd1b5c0f 574#endif
540e1b45 575
cdda3d7d 576 *outptrp += 4;
fd1b5c0f
UD
577
578 /* Clear the state buffer. */
579 state->__count &= ~7;
580
581 return __GCONV_OK;
582}
583
8d617a71
UD
584#include <iconv/skeleton.c>
585
586
4a069c33
UD
587/* And finally from UCS4-LE to the internal encoding. */
588#define DEFINE_INIT 0
589#define DEFINE_FINI 0
590#define MIN_NEEDED_FROM 4
591#define MIN_NEEDED_TO 4
592#define FROM_DIRECTION 1
593#define FROM_LOOP ucs4le_internal_loop
594#define TO_LOOP ucs4le_internal_loop /* This is not used. */
595#define FUNCTION_NAME __gconv_transform_ucs4le_internal
0cdddc25 596#define ONE_DIRECTION 0
4a069c33
UD
597
598
599static inline int
dd9423a6 600__attribute ((always_inline))
55985355
UD
601ucs4le_internal_loop (struct __gconv_step *step,
602 struct __gconv_step_data *step_data,
603 const unsigned char **inptrp, const unsigned char *inend,
4a069c33 604 unsigned char **outptrp, unsigned char *outend,
38677ace 605 size_t *irreversible)
4a069c33 606{
55985355 607 int flags = step_data->__flags;
4a069c33
UD
608 const unsigned char *inptr = *inptrp;
609 unsigned char *outptr = *outptrp;
610 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
611 int result;
612 size_t cnt;
613
614 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
615 {
616 uint32_t inval;
617
618#if __BYTE_ORDER == __BIG_ENDIAN
17427edd 619 inval = bswap_32 (*(const uint32_t *) inptr);
4a069c33 620#else
17427edd 621 inval = *(const uint32_t *) inptr;
4a069c33
UD
622#endif
623
a1ffb40e 624 if (__glibc_unlikely (inval > 0x7fffffff))
85830c4c 625 {
55985355
UD
626 /* The value is too large. We don't try transliteration here since
627 this is not an error because of the lack of possibilities to
628 represent the result. This is a genuine bug in the input since
629 UCS4 does not allow such values. */
0cdb4983
UD
630 if (irreversible == NULL)
631 /* We are transliterating, don't try to correct anything. */
632 return __GCONV_ILLEGAL_INPUT;
633
85830c4c
UD
634 if (flags & __GCONV_IGNORE_ERRORS)
635 {
636 /* Just ignore this character. */
38677ace 637 ++*irreversible;
85830c4c
UD
638 continue;
639 }
640
641 return __GCONV_ILLEGAL_INPUT;
642 }
4a069c33 643
cdda3d7d
AJ
644 *((uint32_t *) outptr) = inval;
645 outptr += sizeof (uint32_t);
4a069c33
UD
646 }
647
648 *inptrp = inptr;
649 *outptrp = outptr;
650
651 /* Determine the status. */
fc08075d 652 if (*inptrp == inend)
4a069c33 653 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 654 else if (*inptrp + 4 > inend)
4a069c33 655 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
656 else
657 {
658 assert (*outptrp + 4 > outend);
659 result = __GCONV_FULL_OUTPUT;
660 }
4a069c33
UD
661
662 return result;
663}
664
27822ce6 665#if !_STRING_ARCH_unaligned
4a069c33 666static inline int
dd9423a6 667__attribute ((always_inline))
55985355
UD
668ucs4le_internal_loop_unaligned (struct __gconv_step *step,
669 struct __gconv_step_data *step_data,
670 const unsigned char **inptrp,
4a069c33
UD
671 const unsigned char *inend,
672 unsigned char **outptrp, unsigned char *outend,
38677ace 673 size_t *irreversible)
4a069c33 674{
55985355 675 int flags = step_data->__flags;
4a069c33
UD
676 const unsigned char *inptr = *inptrp;
677 unsigned char *outptr = *outptrp;
678 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
679 int result;
680 size_t cnt;
681
682 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
683 {
a1ffb40e 684 if (__glibc_unlikely (inptr[3] > 0x80))
4a069c33 685 {
55985355
UD
686 /* The value is too large. We don't try transliteration here since
687 this is not an error because of the lack of possibilities to
688 represent the result. This is a genuine bug in the input since
689 UCS4 does not allow such values. */
0cdb4983
UD
690 if (irreversible == NULL)
691 /* We are transliterating, don't try to correct anything. */
692 return __GCONV_ILLEGAL_INPUT;
693
85830c4c
UD
694 if (flags & __GCONV_IGNORE_ERRORS)
695 {
696 /* Just ignore this character. */
38677ace 697 ++*irreversible;
85830c4c
UD
698 continue;
699 }
700
4a069c33
UD
701 *inptrp = inptr;
702 *outptrp = outptr;
9ea2c194 703 return __GCONV_ILLEGAL_INPUT;
4a069c33
UD
704 }
705
4a069c33
UD
706# if __BYTE_ORDER == __BIG_ENDIAN
707 outptr[3] = inptr[0];
708 outptr[2] = inptr[1];
709 outptr[1] = inptr[2];
710 outptr[0] = inptr[3];
711# else
712 outptr[0] = inptr[0];
713 outptr[1] = inptr[1];
714 outptr[2] = inptr[2];
715 outptr[3] = inptr[3];
716# endif
85830c4c
UD
717
718 outptr += 4;
4a069c33
UD
719 }
720
721 *inptrp = inptr;
722 *outptrp = outptr;
723
724 /* Determine the status. */
fc08075d 725 if (*inptrp == inend)
4a069c33 726 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 727 else if (*inptrp + 4 > inend)
4a069c33 728 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
729 else
730 {
731 assert (*outptrp + 4 > outend);
732 result = __GCONV_FULL_OUTPUT;
733 }
4a069c33
UD
734
735 return result;
736}
737#endif
738
739
740static inline int
dd9423a6 741__attribute ((always_inline))
55985355
UD
742ucs4le_internal_loop_single (struct __gconv_step *step,
743 struct __gconv_step_data *step_data,
744 const unsigned char **inptrp,
4a069c33
UD
745 const unsigned char *inend,
746 unsigned char **outptrp, unsigned char *outend,
38677ace 747 size_t *irreversible)
4a069c33 748{
55985355
UD
749 mbstate_t *state = step_data->__statep;
750 int flags = step_data->__flags;
4a069c33
UD
751 size_t cnt = state->__count & 7;
752
753 while (*inptrp < inend && cnt < 4)
754 state->__value.__wchb[cnt++] = *(*inptrp)++;
755
a1ffb40e 756 if (__glibc_unlikely (cnt < 4))
4a069c33
UD
757 {
758 /* Still not enough bytes. Store the ones in the input buffer. */
759 state->__count &= ~7;
760 state->__count |= cnt;
761
762 return __GCONV_INCOMPLETE_INPUT;
763 }
764
db2d05f9
UD
765 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
766 0))
85830c4c 767 {
55985355
UD
768 /* The value is too large. We don't try transliteration here since
769 this is not an error because of the lack of possibilities to
770 represent the result. This is a genuine bug in the input since
771 UCS4 does not allow such values. */
85830c4c
UD
772 if (!(flags & __GCONV_IGNORE_ERRORS))
773 return __GCONV_ILLEGAL_INPUT;
774 }
775 else
776 {
4a069c33 777#if __BYTE_ORDER == __BIG_ENDIAN
85830c4c
UD
778 (*outptrp)[0] = state->__value.__wchb[3];
779 (*outptrp)[1] = state->__value.__wchb[2];
780 (*outptrp)[2] = state->__value.__wchb[1];
781 (*outptrp)[3] = state->__value.__wchb[0];
9ea2c194 782#else
85830c4c
UD
783 (*outptrp)[0] = state->__value.__wchb[0];
784 (*outptrp)[1] = state->__value.__wchb[1];
785 (*outptrp)[2] = state->__value.__wchb[2];
786 (*outptrp)[3] = state->__value.__wchb[3];
4a069c33
UD
787#endif
788
85830c4c
UD
789 *outptrp += 4;
790 }
791
4a069c33
UD
792 /* Clear the state buffer. */
793 state->__count &= ~7;
794
795 return __GCONV_OK;
796}
797
798#include <iconv/skeleton.c>
799
800
8619129f
UD
801/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
802#define DEFINE_INIT 0
803#define DEFINE_FINI 0
804#define MIN_NEEDED_FROM 1
805#define MIN_NEEDED_TO 4
806#define FROM_DIRECTION 1
807#define FROM_LOOP ascii_internal_loop
808#define TO_LOOP ascii_internal_loop /* This is not used. */
809#define FUNCTION_NAME __gconv_transform_ascii_internal
fd1b5c0f 810#define ONE_DIRECTION 1
8619129f
UD
811
812#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
813#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
814#define LOOPFCT FROM_LOOP
815#define BODY \
816 { \
a1ffb40e 817 if (__glibc_unlikely (*inptr > '\x7f')) \
8619129f 818 { \
55985355
UD
819 /* The value is too large. We don't try transliteration here since \
820 this is not an error because of the lack of possibilities to \
821 represent the result. This is a genuine bug in the input since \
822 ASCII does not allow such values. */ \
e438a468 823 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
85830c4c
UD
824 } \
825 else \
5deca9bb
UD
826 { \
827 /* It's an one byte sequence. */ \
828 *((uint32_t *) outptr) = *inptr++; \
829 outptr += sizeof (uint32_t); \
830 } \
8619129f 831 }
55985355 832#define LOOP_NEED_FLAGS
8619129f
UD
833#include <iconv/loop.c>
834#include <iconv/skeleton.c>
835
836
837/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
838#define DEFINE_INIT 0
839#define DEFINE_FINI 0
840#define MIN_NEEDED_FROM 4
841#define MIN_NEEDED_TO 1
842#define FROM_DIRECTION 1
843#define FROM_LOOP internal_ascii_loop
844#define TO_LOOP internal_ascii_loop /* This is not used. */
845#define FUNCTION_NAME __gconv_transform_internal_ascii
fd1b5c0f 846#define ONE_DIRECTION 1
8619129f
UD
847
848#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
849#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
850#define LOOPFCT FROM_LOOP
851#define BODY \
852 { \
a1ffb40e 853 if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \
8619129f 854 { \
601d2942 855 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
e438a468 856 STANDARD_TO_LOOP_ERR_HANDLER (4); \
85830c4c
UD
857 } \
858 else \
5deca9bb
UD
859 { \
860 /* It's an one byte sequence. */ \
861 *outptr++ = *((const uint32_t *) inptr); \
862 inptr += sizeof (uint32_t); \
863 } \
8619129f 864 }
55985355 865#define LOOP_NEED_FLAGS
8619129f
UD
866#include <iconv/loop.c>
867#include <iconv/skeleton.c>
868
869
870/* Convert from the internal (UCS4-like) format to UTF-8. */
871#define DEFINE_INIT 0
872#define DEFINE_FINI 0
873#define MIN_NEEDED_FROM 4
874#define MIN_NEEDED_TO 1
875#define MAX_NEEDED_TO 6
876#define FROM_DIRECTION 1
877#define FROM_LOOP internal_utf8_loop
878#define TO_LOOP internal_utf8_loop /* This is not used. */
879#define FUNCTION_NAME __gconv_transform_internal_utf8
fd1b5c0f 880#define ONE_DIRECTION 1
8619129f
UD
881
882#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
883#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
5aa8ff62 884#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
8619129f
UD
885#define LOOPFCT FROM_LOOP
886#define BODY \
887 { \
17427edd 888 uint32_t wc = *((const uint32_t *) inptr); \
8619129f 889 \
a1ffb40e 890 if (__glibc_likely (wc < 0x80)) \
8619129f
UD
891 /* It's an one byte sequence. */ \
892 *outptr++ = (unsigned char) wc; \
a1ffb40e 893 else if (__glibc_likely (wc <= 0x7fffffff)) \
8619129f
UD
894 { \
895 size_t step; \
3cc4a097 896 unsigned char *start; \
8619129f
UD
897 \
898 for (step = 2; step < 6; ++step) \
b79f74cd 899 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
8619129f
UD
900 break; \
901 \
a1ffb40e 902 if (__glibc_unlikely (outptr + step > outend)) \
8619129f
UD
903 { \
904 /* Too long. */ \
d64b6ad0 905 result = __GCONV_FULL_OUTPUT; \
8619129f
UD
906 break; \
907 } \
908 \
909 start = outptr; \
b79f74cd 910 *outptr = (unsigned char) (~0xff >> step); \
8619129f 911 outptr += step; \
8619129f
UD
912 do \
913 { \
347bace2 914 start[--step] = 0x80 | (wc & 0x3f); \
8619129f
UD
915 wc >>= 6; \
916 } \
347bace2 917 while (step > 1); \
8619129f 918 start[0] |= wc; \
db2d05f9
UD
919 } \
920 else \
921 { \
e438a468 922 STANDARD_TO_LOOP_ERR_HANDLER (4); \
8619129f
UD
923 } \
924 \
925 inptr += 4; \
926 }
db2d05f9 927#define LOOP_NEED_FLAGS
8619129f
UD
928#include <iconv/loop.c>
929#include <iconv/skeleton.c>
930
931
932/* Convert from UTF-8 to the internal (UCS4-like) format. */
933#define DEFINE_INIT 0
934#define DEFINE_FINI 0
935#define MIN_NEEDED_FROM 1
936#define MAX_NEEDED_FROM 6
937#define MIN_NEEDED_TO 4
938#define FROM_DIRECTION 1
939#define FROM_LOOP utf8_internal_loop
940#define TO_LOOP utf8_internal_loop /* This is not used. */
941#define FUNCTION_NAME __gconv_transform_utf8_internal
fd1b5c0f 942#define ONE_DIRECTION 1
8619129f
UD
943
944#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
5aa8ff62 945#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
8619129f
UD
946#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
947#define LOOPFCT FROM_LOOP
948#define BODY \
949 { \
8619129f 950 /* Next input byte. */ \
26a51060 951 uint32_t ch = *inptr; \
8619129f 952 \
a1ffb40e 953 if (__glibc_likely (ch < 0x80)) \
8619129f 954 { \
5aa8ff62 955 /* One byte sequence. */ \
5aa8ff62 956 ++inptr; \
8619129f
UD
957 } \
958 else \
959 { \
26a51060
UD
960 uint_fast32_t cnt; \
961 uint_fast32_t i; \
962 \
9ea2c194 963 if (ch >= 0xc2 && ch < 0xe0) \
5aa8ff62 964 { \
9ea2c194
AJ
965 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
966 otherwise the wide character could have been represented \
967 using a single byte. */ \
5aa8ff62
UD
968 cnt = 2; \
969 ch &= 0x1f; \
970 } \
a1ffb40e 971 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
5aa8ff62
UD
972 { \
973 /* We expect three bytes. */ \
974 cnt = 3; \
975 ch &= 0x0f; \
976 } \
a1ffb40e 977 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
5aa8ff62
UD
978 { \
979 /* We expect four bytes. */ \
980 cnt = 4; \
981 ch &= 0x07; \
982 } \
a1ffb40e 983 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
5aa8ff62
UD
984 { \
985 /* We expect five bytes. */ \
986 cnt = 5; \
987 ch &= 0x03; \
988 } \
a1ffb40e 989 else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \
5aa8ff62
UD
990 { \
991 /* We expect six bytes. */ \
992 cnt = 6; \
993 ch &= 0x01; \
994 } \
995 else \
8619129f 996 { \
85830c4c
UD
997 /* Search the end of this ill-formed UTF-8 character. This \
998 is the next byte with (x & 0xc0) != 0x80. */ \
347bace2 999 i = 0; \
e438a468 1000 do \
347bace2
UD
1001 ++i; \
1002 while (inptr + i < inend \
1003 && (*(inptr + i) & 0xc0) == 0x80 \
1004 && i < 5); \
85830c4c 1005 \
347bace2
UD
1006 errout: \
1007 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
8619129f
UD
1008 } \
1009 \
a1ffb40e 1010 if (__glibc_unlikely (inptr + cnt > inend)) \
5aa8ff62 1011 { \
fd1b5c0f
UD
1012 /* We don't have enough input. But before we report that check \
1013 that all the bytes are correct. */ \
1014 for (i = 1; inptr + i < inend; ++i) \
1015 if ((inptr[i] & 0xc0) != 0x80) \
1016 break; \
85830c4c 1017 \
a1ffb40e 1018 if (__glibc_likely (inptr + i == inend)) \
85830c4c
UD
1019 { \
1020 result = __GCONV_INCOMPLETE_INPUT; \
1021 break; \
1022 } \
1023 \
347bace2 1024 goto errout; \
5aa8ff62
UD
1025 } \
1026 \
1027 /* Read the possible remaining bytes. */ \
1028 for (i = 1; i < cnt; ++i) \
1029 { \
1030 uint32_t byte = inptr[i]; \
1031 \
1032 if ((byte & 0xc0) != 0x80) \
bd32e4a6
UD
1033 /* This is an illegal encoding. */ \
1034 break; \
5aa8ff62
UD
1035 \
1036 ch <<= 6; \
1037 ch |= byte & 0x3f; \
1038 } \
85830c4c 1039 \
bd32e4a6
UD
1040 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1041 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1042 have been represented with fewer than cnt bytes. */ \
9c32c895
UD
1043 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
1044 /* Do not accept UTF-16 surrogates. */ \
1045 || (ch >= 0xd800 && ch <= 0xdfff)) \
bd32e4a6
UD
1046 { \
1047 /* This is an illegal encoding. */ \
347bace2 1048 goto errout; \
bd32e4a6
UD
1049 } \
1050 \
5aa8ff62 1051 inptr += cnt; \
8619129f
UD
1052 } \
1053 \
1054 /* Now adjust the pointers and store the result. */ \
cdda3d7d
AJ
1055 *((uint32_t *) outptr) = ch; \
1056 outptr += sizeof (uint32_t); \
8619129f 1057 }
55985355 1058#define LOOP_NEED_FLAGS
fd1b5c0f
UD
1059
1060#define STORE_REST \
1061 { \
1062 /* We store the remaining bytes while converting them into the UCS4 \
1063 format. We can assume that the first byte in the buffer is \
1064 correct and that it requires a larger number of bytes than there \
1065 are in the input buffer. */ \
1066 wint_t ch = **inptrp; \
ea31b613 1067 size_t cnt, r; \
fd1b5c0f
UD
1068 \
1069 state->__count = inend - *inptrp; \
1070 \
9954432e 1071 assert (ch != 0xc0 && ch != 0xc1); \
fd1b5c0f
UD
1072 if (ch >= 0xc2 && ch < 0xe0) \
1073 { \
1074 /* We expect two bytes. The first byte cannot be 0xc0 or \
1075 0xc1, otherwise the wide character could have been \
1076 represented using a single byte. */ \
1077 cnt = 2; \
1078 ch &= 0x1f; \
1079 } \
a1ffb40e 1080 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
fd1b5c0f
UD
1081 { \
1082 /* We expect three bytes. */ \
1083 cnt = 3; \
1084 ch &= 0x0f; \
1085 } \
a1ffb40e 1086 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
fd1b5c0f
UD
1087 { \
1088 /* We expect four bytes. */ \
1089 cnt = 4; \
1090 ch &= 0x07; \
1091 } \
a1ffb40e 1092 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
fd1b5c0f
UD
1093 { \
1094 /* We expect five bytes. */ \
1095 cnt = 5; \
1096 ch &= 0x03; \
1097 } \
1098 else \
1099 { \
1100 /* We expect six bytes. */ \
1101 cnt = 6; \
1102 ch &= 0x01; \
1103 } \
1104 \
1105 /* The first byte is already consumed. */ \
ea31b613 1106 r = cnt - 1; \
fd1b5c0f
UD
1107 while (++(*inptrp) < inend) \
1108 { \
1109 ch <<= 6; \
1110 ch |= **inptrp & 0x3f; \
ea31b613 1111 --r; \
fd1b5c0f
UD
1112 } \
1113 \
1114 /* Shift for the so far missing bytes. */ \
ea31b613
UD
1115 ch <<= r * 6; \
1116 \
1117 /* Store the number of bytes expected for the entire sequence. */ \
41f112ad 1118 state->__count |= cnt << 8; \
fd1b5c0f
UD
1119 \
1120 /* Store the value. */ \
1121 state->__value.__wch = ch; \
1122 }
1123
1124#define UNPACK_BYTES \
1125 { \
ea31b613 1126 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
fd1b5c0f 1127 wint_t wch = state->__value.__wch; \
41f112ad 1128 size_t ntotal = state->__count >> 8; \
ea31b613 1129 \
41f112ad 1130 inlen = state->__count & 255; \
fd1b5c0f 1131 \
ea31b613 1132 bytebuf[0] = inmask[ntotal - 2]; \
fd1b5c0f 1133 \
cd201e38
UD
1134 do \
1135 { \
1136 if (--ntotal < inlen) \
1137 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1138 wch >>= 6; \
1139 } \
1140 while (ntotal > 1); \
fd1b5c0f
UD
1141 \
1142 bytebuf[0] |= wch; \
1143 }
1144
41f112ad
UD
1145#define CLEAR_STATE \
1146 state->__count = 0
1147
1148
8619129f
UD
1149#include <iconv/loop.c>
1150#include <iconv/skeleton.c>
1151
1152
1153/* Convert from UCS2 to the internal (UCS4-like) format. */
1154#define DEFINE_INIT 0
1155#define DEFINE_FINI 0
1156#define MIN_NEEDED_FROM 2
1157#define MIN_NEEDED_TO 4
1158#define FROM_DIRECTION 1
1159#define FROM_LOOP ucs2_internal_loop
1160#define TO_LOOP ucs2_internal_loop /* This is not used. */
1161#define FUNCTION_NAME __gconv_transform_ucs2_internal
fd1b5c0f 1162#define ONE_DIRECTION 1
8619129f
UD
1163
1164#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1165#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1166#define LOOPFCT FROM_LOOP
428bcea4 1167#define BODY \
755104ed 1168 { \
606135cf 1169 uint16_t u1 = get16 (inptr); \
755104ed 1170 \
a1ffb40e 1171 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
755104ed
UD
1172 { \
1173 /* Surrogate characters in UCS-2 input are not valid. Reject \
1174 them. (Catching this here is not security relevant.) */ \
e438a468 1175 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
755104ed
UD
1176 } \
1177 \
cdda3d7d
AJ
1178 *((uint32_t *) outptr) = u1; \
1179 outptr += sizeof (uint32_t); \
755104ed
UD
1180 inptr += 2; \
1181 }
1182#define LOOP_NEED_FLAGS
8619129f
UD
1183#include <iconv/loop.c>
1184#include <iconv/skeleton.c>
1185
1186
1187/* Convert from the internal (UCS4-like) format to UCS2. */
1188#define DEFINE_INIT 0
1189#define DEFINE_FINI 0
1190#define MIN_NEEDED_FROM 4
1191#define MIN_NEEDED_TO 2
1192#define FROM_DIRECTION 1
1193#define FROM_LOOP internal_ucs2_loop
1194#define TO_LOOP internal_ucs2_loop /* This is not used. */
1195#define FUNCTION_NAME __gconv_transform_internal_ucs2
fd1b5c0f 1196#define ONE_DIRECTION 1
8619129f
UD
1197
1198#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1199#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1200#define LOOPFCT FROM_LOOP
428bcea4 1201#define BODY \
8619129f 1202 { \
17427edd 1203 uint32_t val = *((const uint32_t *) inptr); \
755104ed 1204 \
a1ffb40e 1205 if (__glibc_unlikely (val >= 0x10000)) \
8619129f 1206 { \
601d2942 1207 UNICODE_TAG_HANDLER (val, 4); \
e438a468 1208 STANDARD_TO_LOOP_ERR_HANDLER (4); \
8619129f 1209 } \
a1ffb40e 1210 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
755104ed
UD
1211 { \
1212 /* Surrogate characters in UCS-4 input are not valid. \
1213 We must catch this, because the UCS-2 output might be \
1214 interpreted as UTF-16 by other programs. If we let \
1215 surrogates pass through, attackers could make a security \
1216 hole exploit by synthesizing any desired plane 1-16 \
1217 character. */ \
e438a468 1218 result = __GCONV_ILLEGAL_INPUT; \
755104ed 1219 if (! ignore_errors_p ()) \
e438a468 1220 break; \
755104ed
UD
1221 inptr += 4; \
1222 ++*irreversible; \
1223 continue; \
1224 } \
9ea2c194 1225 else \
755104ed 1226 { \
606135cf 1227 put16 (outptr, val); \
db6af3eb 1228 outptr += sizeof (uint16_t); \
755104ed
UD
1229 inptr += 4; \
1230 } \
8619129f 1231 }
55985355 1232#define LOOP_NEED_FLAGS
8619129f
UD
1233#include <iconv/loop.c>
1234#include <iconv/skeleton.c>
9b26f5c4
UD
1235
1236
428bcea4 1237/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
9b26f5c4
UD
1238#define DEFINE_INIT 0
1239#define DEFINE_FINI 0
1240#define MIN_NEEDED_FROM 2
1241#define MIN_NEEDED_TO 4
1242#define FROM_DIRECTION 1
428bcea4
UD
1243#define FROM_LOOP ucs2reverse_internal_loop
1244#define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
8d617a71 1245#define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
fd1b5c0f 1246#define ONE_DIRECTION 1
9b26f5c4
UD
1247
1248#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1249#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1250#define LOOPFCT FROM_LOOP
428bcea4 1251#define BODY \
755104ed 1252 { \
606135cf 1253 uint16_t u1 = bswap_16 (get16 (inptr)); \
755104ed 1254 \
a1ffb40e 1255 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
755104ed
UD
1256 { \
1257 /* Surrogate characters in UCS-2 input are not valid. Reject \
1258 them. (Catching this here is not security relevant.) */ \
1259 if (! ignore_errors_p ()) \
1260 { \
1261 result = __GCONV_ILLEGAL_INPUT; \
1262 break; \
1263 } \
1264 inptr += 2; \
1265 ++*irreversible; \
1266 continue; \
1267 } \
1268 \
cdda3d7d
AJ
1269 *((uint32_t *) outptr) = u1; \
1270 outptr += sizeof (uint32_t); \
755104ed
UD
1271 inptr += 2; \
1272 }
1273#define LOOP_NEED_FLAGS
9b26f5c4
UD
1274#include <iconv/loop.c>
1275#include <iconv/skeleton.c>
1276
1277
428bcea4 1278/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
9b26f5c4
UD
1279#define DEFINE_INIT 0
1280#define DEFINE_FINI 0
1281#define MIN_NEEDED_FROM 4
1282#define MIN_NEEDED_TO 2
1283#define FROM_DIRECTION 1
428bcea4
UD
1284#define FROM_LOOP internal_ucs2reverse_loop
1285#define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
8d617a71 1286#define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
fd1b5c0f 1287#define ONE_DIRECTION 1
9b26f5c4
UD
1288
1289#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1290#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1291#define LOOPFCT FROM_LOOP
428bcea4 1292#define BODY \
9b26f5c4 1293 { \
17427edd 1294 uint32_t val = *((const uint32_t *) inptr); \
a1ffb40e 1295 if (__glibc_unlikely (val >= 0x10000)) \
9b26f5c4 1296 { \
601d2942 1297 UNICODE_TAG_HANDLER (val, 4); \
e438a468 1298 STANDARD_TO_LOOP_ERR_HANDLER (4); \
9b26f5c4 1299 } \
a1ffb40e 1300 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
755104ed
UD
1301 { \
1302 /* Surrogate characters in UCS-4 input are not valid. \
1303 We must catch this, because the UCS-2 output might be \
1304 interpreted as UTF-16 by other programs. If we let \
1305 surrogates pass through, attackers could make a security \
1306 hole exploit by synthesizing any desired plane 1-16 \
1307 character. */ \
1308 if (! ignore_errors_p ()) \
1309 { \
1310 result = __GCONV_ILLEGAL_INPUT; \
1311 break; \
1312 } \
1313 inptr += 4; \
1314 ++*irreversible; \
1315 continue; \
1316 } \
9ea2c194 1317 else \
755104ed 1318 { \
606135cf 1319 put16 (outptr, bswap_16 (val)); \
cdda3d7d 1320 outptr += sizeof (uint16_t); \
755104ed
UD
1321 inptr += 4; \
1322 } \
9b26f5c4 1323 }
55985355 1324#define LOOP_NEED_FLAGS
9b26f5c4
UD
1325#include <iconv/loop.c>
1326#include <iconv/skeleton.c>