]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconv/gconv_simple.c
2014-05-01 Steve Ellcey <sellcey@mips.com>
[thirdparty/glibc.git] / iconv / gconv_simple.c
CommitLineData
6973fc01 1/* Simple transformations functions.
d4697bc9 2 Copyright (C) 1997-2014 Free Software Foundation, Inc.
6973fc01
UD
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
6973fc01
UD
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 14 Lesser General Public License for more details.
6973fc01 15
41bdb6e2 16 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
6973fc01 19
f1fa8b68 20#include <byteswap.h>
55985355 21#include <dlfcn.h>
f1fa8b68 22#include <endian.h>
f4017d20 23#include <errno.h>
6973fc01 24#include <gconv.h>
d2374599 25#include <stdint.h>
6973fc01
UD
26#include <stdlib.h>
27#include <string.h>
28#include <wchar.h>
29#include <sys/param.h>
f9ad060c 30#include <gconv_int.h>
6973fc01 31
17427edd 32#define BUILTIN_ALIAS(s1, s2) /* nothing */
f9ad060c
UD
33#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
34 MinF, MaxF, MinT, MaxT) \
17427edd 35 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
a784e502 36 const unsigned char **, const unsigned char *, \
17427edd
UD
37 unsigned char **, size_t *, int, int);
38#include "gconv_builtin.h"
39
40
a904b5d9
UD
41#ifndef EILSEQ
42# define EILSEQ EINVAL
43#endif
44
45
f9ad060c
UD
46/* Specialized conversion function for a single byte to INTERNAL, recognizing
47 only ASCII characters. */
48wint_t
49__gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
50{
51 if (c < 0x80)
52 return c;
53 else
54 return WEOF;
55}
56
57
f1fa8b68
UD
58/* Transform from the internal, UCS4-like format, to UCS4. The
59 difference between the internal ucs4 format and the real UCS4
60 format is, if any, the endianess. The Unicode/ISO 10646 says that
61 unless some higher protocol specifies it differently, the byte
62 order is big endian.*/
8619129f
UD
63#define DEFINE_INIT 0
64#define DEFINE_FINI 0
65#define MIN_NEEDED_FROM 4
66#define MIN_NEEDED_TO 4
67#define FROM_DIRECTION 1
68#define FROM_LOOP internal_ucs4_loop
69#define TO_LOOP internal_ucs4_loop /* This is not used. */
70#define FUNCTION_NAME __gconv_transform_internal_ucs4
71
72
73static inline int
dd9423a6 74__attribute ((always_inline))
55985355
UD
75internal_ucs4_loop (struct __gconv_step *step,
76 struct __gconv_step_data *step_data,
77 const unsigned char **inptrp, const unsigned char *inend,
8619129f 78 unsigned char **outptrp, unsigned char *outend,
38677ace 79 size_t *irreversible)
4bca4c17 80{
8619129f
UD
81 const unsigned char *inptr = *inptrp;
82 unsigned char *outptr = *outptrp;
83 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
4bca4c17
UD
84 int result;
85
f1fa8b68 86#if __BYTE_ORDER == __LITTLE_ENDIAN
8619129f
UD
87 /* Sigh, we have to do some real work. */
88 size_t cnt;
cdda3d7d 89 uint32_t *outptr32 = (uint32_t *) outptr;
f1fa8b68 90
fdf19bf7 91 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
cdda3d7d 92 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
f1fa8b68 93
8619129f 94 *inptrp = inptr;
cd5b5023 95 *outptrp = (unsigned char *) outptr32;
f1fa8b68 96#elif __BYTE_ORDER == __BIG_ENDIAN
8619129f
UD
97 /* Simply copy the data. */
98 *inptrp = inptr + n_convert * 4;
99 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
f1fa8b68
UD
100#else
101# error "This endianess is not supported."
102#endif
103
8619129f 104 /* Determine the status. */
1336419e 105 if (*inptrp == inend)
d64b6ad0 106 result = __GCONV_EMPTY_INPUT;
c4f66413 107 else if (*outptrp + 4 > outend)
1336419e 108 result = __GCONV_FULL_OUTPUT;
6973fc01 109 else
d64b6ad0 110 result = __GCONV_INCOMPLETE_INPUT;
6973fc01 111
f43ce637 112 return result;
6973fc01 113}
d2374599 114
27822ce6 115#if !_STRING_ARCH_unaligned
c1db8b0d 116static inline int
dd9423a6 117__attribute ((always_inline))
55985355
UD
118internal_ucs4_loop_unaligned (struct __gconv_step *step,
119 struct __gconv_step_data *step_data,
120 const unsigned char **inptrp,
c1db8b0d
UD
121 const unsigned char *inend,
122 unsigned char **outptrp, unsigned char *outend,
38677ace 123 size_t *irreversible)
c1db8b0d
UD
124{
125 const unsigned char *inptr = *inptrp;
126 unsigned char *outptr = *outptrp;
127 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
128 int result;
129
130# if __BYTE_ORDER == __LITTLE_ENDIAN
131 /* Sigh, we have to do some real work. */
132 size_t cnt;
133
134 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
135 {
136 outptr[0] = inptr[3];
137 outptr[1] = inptr[2];
138 outptr[2] = inptr[1];
139 outptr[3] = inptr[0];
140 }
141
142 *inptrp = inptr;
143 *outptrp = outptr;
144# elif __BYTE_ORDER == __BIG_ENDIAN
145 /* Simply copy the data. */
146 *inptrp = inptr + n_convert * 4;
147 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
148# else
149# error "This endianess is not supported."
150# endif
151
152 /* Determine the status. */
eacde9d0 153 if (*inptrp == inend)
c1db8b0d 154 result = __GCONV_EMPTY_INPUT;
eacde9d0
UD
155 else if (*outptrp + 4 > outend)
156 result = __GCONV_FULL_OUTPUT;
c1db8b0d
UD
157 else
158 result = __GCONV_INCOMPLETE_INPUT;
159
160 return result;
161}
162#endif
163
fd1b5c0f
UD
164
165static inline int
dd9423a6 166__attribute ((always_inline))
55985355
UD
167internal_ucs4_loop_single (struct __gconv_step *step,
168 struct __gconv_step_data *step_data,
169 const unsigned char **inptrp,
fd1b5c0f
UD
170 const unsigned char *inend,
171 unsigned char **outptrp, unsigned char *outend,
38677ace 172 size_t *irreversible)
fd1b5c0f 173{
55985355 174 mbstate_t *state = step_data->__statep;
fd1b5c0f
UD
175 size_t cnt = state->__count & 7;
176
177 while (*inptrp < inend && cnt < 4)
178 state->__value.__wchb[cnt++] = *(*inptrp)++;
179
a1ffb40e 180 if (__glibc_unlikely (cnt < 4))
fd1b5c0f
UD
181 {
182 /* Still not enough bytes. Store the ones in the input buffer. */
183 state->__count &= ~7;
184 state->__count |= cnt;
185
186 return __GCONV_INCOMPLETE_INPUT;
187 }
188
189#if __BYTE_ORDER == __LITTLE_ENDIAN
190 (*outptrp)[0] = state->__value.__wchb[3];
191 (*outptrp)[1] = state->__value.__wchb[2];
192 (*outptrp)[2] = state->__value.__wchb[1];
193 (*outptrp)[3] = state->__value.__wchb[0];
85830c4c 194
fd1b5c0f
UD
195#elif __BYTE_ORDER == __BIG_ENDIAN
196 /* XXX unaligned */
cdda3d7d
AJ
197 (*outptrp)[0] = state->__value.__wchb[0];
198 (*outptrp)[1] = state->__value.__wchb[1];
199 (*outptrp)[2] = state->__value.__wchb[2];
200 (*outptrp)[3] = state->__value.__wchb[3];
fd1b5c0f
UD
201#else
202# error "This endianess is not supported."
203#endif
cdda3d7d 204 *outptrp += 4;
fd1b5c0f
UD
205
206 /* Clear the state buffer. */
207 state->__count &= ~7;
208
209 return __GCONV_OK;
210}
211
8619129f 212#include <iconv/skeleton.c>
d2374599 213
d2374599 214
4a069c33
UD
215/* Transform from UCS4 to the internal, UCS4-like format. Unlike
216 for the other direction we have to check for correct values here. */
217#define DEFINE_INIT 0
218#define DEFINE_FINI 0
219#define MIN_NEEDED_FROM 4
220#define MIN_NEEDED_TO 4
221#define FROM_DIRECTION 1
222#define FROM_LOOP ucs4_internal_loop
223#define TO_LOOP ucs4_internal_loop /* This is not used. */
224#define FUNCTION_NAME __gconv_transform_ucs4_internal
225
226
227static inline int
dd9423a6 228__attribute ((always_inline))
55985355
UD
229ucs4_internal_loop (struct __gconv_step *step,
230 struct __gconv_step_data *step_data,
231 const unsigned char **inptrp, const unsigned char *inend,
4a069c33 232 unsigned char **outptrp, unsigned char *outend,
38677ace 233 size_t *irreversible)
4a069c33 234{
55985355 235 int flags = step_data->__flags;
4a069c33
UD
236 const unsigned char *inptr = *inptrp;
237 unsigned char *outptr = *outptrp;
238 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
239 int result;
240 size_t cnt;
241
242 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
243 {
244 uint32_t inval;
245
246#if __BYTE_ORDER == __LITTLE_ENDIAN
17427edd 247 inval = bswap_32 (*(const uint32_t *) inptr);
4a069c33 248#else
17427edd 249 inval = *(const uint32_t *) inptr;
4a069c33
UD
250#endif
251
a1ffb40e 252 if (__glibc_unlikely (inval > 0x7fffffff))
4a069c33 253 {
55985355
UD
254 /* The value is too large. We don't try transliteration here since
255 this is not an error because of the lack of possibilities to
256 represent the result. This is a genuine bug in the input since
257 UCS4 does not allow such values. */
0cdb4983
UD
258 if (irreversible == NULL)
259 /* We are transliterating, don't try to correct anything. */
260 return __GCONV_ILLEGAL_INPUT;
261
85830c4c
UD
262 if (flags & __GCONV_IGNORE_ERRORS)
263 {
264 /* Just ignore this character. */
38677ace 265 ++*irreversible;
85830c4c
UD
266 continue;
267 }
268
4a069c33
UD
269 *inptrp = inptr;
270 *outptrp = outptr;
271 return __GCONV_ILLEGAL_INPUT;
272 }
273
cdda3d7d
AJ
274 *((uint32_t *) outptr) = inval;
275 outptr += sizeof (uint32_t);
4a069c33
UD
276 }
277
278 *inptrp = inptr;
279 *outptrp = outptr;
280
281 /* Determine the status. */
fc08075d 282 if (*inptrp == inend)
4a069c33 283 result = __GCONV_EMPTY_INPUT;
c4f66413 284 else if (*outptrp + 4 > outend)
fc08075d 285 result = __GCONV_FULL_OUTPUT;
4a069c33
UD
286 else
287 result = __GCONV_INCOMPLETE_INPUT;
288
289 return result;
290}
291
27822ce6 292#if !_STRING_ARCH_unaligned
4a069c33 293static inline int
dd9423a6 294__attribute ((always_inline))
55985355
UD
295ucs4_internal_loop_unaligned (struct __gconv_step *step,
296 struct __gconv_step_data *step_data,
297 const unsigned char **inptrp,
4a069c33
UD
298 const unsigned char *inend,
299 unsigned char **outptrp, unsigned char *outend,
38677ace 300 size_t *irreversible)
4a069c33 301{
55985355 302 int flags = step_data->__flags;
4a069c33
UD
303 const unsigned char *inptr = *inptrp;
304 unsigned char *outptr = *outptrp;
305 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
306 int result;
307 size_t cnt;
308
55985355 309 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
4a069c33 310 {
a1ffb40e 311 if (__glibc_unlikely (inptr[0] > 0x80))
4a069c33 312 {
55985355
UD
313 /* The value is too large. We don't try transliteration here since
314 this is not an error because of the lack of possibilities to
315 represent the result. This is a genuine bug in the input since
316 UCS4 does not allow such values. */
0cdb4983
UD
317 if (irreversible == NULL)
318 /* We are transliterating, don't try to correct anything. */
319 return __GCONV_ILLEGAL_INPUT;
320
85830c4c
UD
321 if (flags & __GCONV_IGNORE_ERRORS)
322 {
323 /* Just ignore this character. */
38677ace 324 ++*irreversible;
85830c4c
UD
325 continue;
326 }
327
4a069c33
UD
328 *inptrp = inptr;
329 *outptrp = outptr;
9ea2c194 330 return __GCONV_ILLEGAL_INPUT;
4a069c33
UD
331 }
332
4a069c33
UD
333# if __BYTE_ORDER == __LITTLE_ENDIAN
334 outptr[3] = inptr[0];
335 outptr[2] = inptr[1];
336 outptr[1] = inptr[2];
337 outptr[0] = inptr[3];
338# else
339 outptr[0] = inptr[0];
340 outptr[1] = inptr[1];
341 outptr[2] = inptr[2];
342 outptr[3] = inptr[3];
343# endif
55985355 344 outptr += 4;
4a069c33
UD
345 }
346
347 *inptrp = inptr;
348 *outptrp = outptr;
349
350 /* Determine the status. */
fc08075d 351 if (*inptrp == inend)
4a069c33 352 result = __GCONV_EMPTY_INPUT;
c4f66413 353 else if (*outptrp + 4 > outend)
fc08075d 354 result = __GCONV_FULL_OUTPUT;
4a069c33
UD
355 else
356 result = __GCONV_INCOMPLETE_INPUT;
357
358 return result;
359}
360#endif
361
362
363static inline int
dd9423a6 364__attribute ((always_inline))
55985355
UD
365ucs4_internal_loop_single (struct __gconv_step *step,
366 struct __gconv_step_data *step_data,
367 const unsigned char **inptrp,
4a069c33
UD
368 const unsigned char *inend,
369 unsigned char **outptrp, unsigned char *outend,
38677ace 370 size_t *irreversible)
4a069c33 371{
55985355
UD
372 mbstate_t *state = step_data->__statep;
373 int flags = step_data->__flags;
4a069c33
UD
374 size_t cnt = state->__count & 7;
375
376 while (*inptrp < inend && cnt < 4)
377 state->__value.__wchb[cnt++] = *(*inptrp)++;
378
a1ffb40e 379 if (__glibc_unlikely (cnt < 4))
4a069c33
UD
380 {
381 /* Still not enough bytes. Store the ones in the input buffer. */
382 state->__count &= ~7;
383 state->__count |= cnt;
384
385 return __GCONV_INCOMPLETE_INPUT;
386 }
387
db2d05f9
UD
388 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
389 0))
85830c4c 390 {
55985355
UD
391 /* The value is too large. We don't try transliteration here since
392 this is not an error because of the lack of possibilities to
393 represent the result. This is a genuine bug in the input since
394 UCS4 does not allow such values. */
85830c4c 395 if (!(flags & __GCONV_IGNORE_ERRORS))
3593973b
UD
396 {
397 *inptrp -= cnt - (state->__count & 7);
398 return __GCONV_ILLEGAL_INPUT;
399 }
85830c4c
UD
400 }
401 else
402 {
4a069c33 403#if __BYTE_ORDER == __LITTLE_ENDIAN
85830c4c
UD
404 (*outptrp)[0] = state->__value.__wchb[3];
405 (*outptrp)[1] = state->__value.__wchb[2];
406 (*outptrp)[2] = state->__value.__wchb[1];
407 (*outptrp)[3] = state->__value.__wchb[0];
4a069c33 408#elif __BYTE_ORDER == __BIG_ENDIAN
85830c4c
UD
409 (*outptrp)[0] = state->__value.__wchb[0];
410 (*outptrp)[1] = state->__value.__wchb[1];
411 (*outptrp)[2] = state->__value.__wchb[2];
412 (*outptrp)[3] = state->__value.__wchb[3];
4a069c33
UD
413#endif
414
85830c4c
UD
415 *outptrp += 4;
416 }
417
4a069c33
UD
418 /* Clear the state buffer. */
419 state->__count &= ~7;
420
421 return __GCONV_OK;
422}
423
424#include <iconv/skeleton.c>
425
426
427/* Similarly for the little endian form. */
8d617a71
UD
428#define DEFINE_INIT 0
429#define DEFINE_FINI 0
430#define MIN_NEEDED_FROM 4
431#define MIN_NEEDED_TO 4
432#define FROM_DIRECTION 1
433#define FROM_LOOP internal_ucs4le_loop
434#define TO_LOOP internal_ucs4le_loop /* This is not used. */
435#define FUNCTION_NAME __gconv_transform_internal_ucs4le
436
437
438static inline int
dd9423a6 439__attribute ((always_inline))
55985355
UD
440internal_ucs4le_loop (struct __gconv_step *step,
441 struct __gconv_step_data *step_data,
442 const unsigned char **inptrp, const unsigned char *inend,
8d617a71 443 unsigned char **outptrp, unsigned char *outend,
38677ace 444 size_t *irreversible)
8d617a71
UD
445{
446 const unsigned char *inptr = *inptrp;
447 unsigned char *outptr = *outptrp;
448 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
449 int result;
450
451#if __BYTE_ORDER == __BIG_ENDIAN
452 /* Sigh, we have to do some real work. */
453 size_t cnt;
cd5b5023 454 uint32_t *outptr32 = (uint32_t *) outptr;
8d617a71
UD
455
456 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
cd5b5023
AJ
457 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
458 outptr = (unsigned char *) outptr32;
8d617a71
UD
459
460 *inptrp = inptr;
461 *outptrp = outptr;
462#elif __BYTE_ORDER == __LITTLE_ENDIAN
463 /* Simply copy the data. */
464 *inptrp = inptr + n_convert * 4;
465 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
466#else
467# error "This endianess is not supported."
468#endif
469
470 /* Determine the status. */
fc08075d 471 if (*inptrp == inend)
8d617a71 472 result = __GCONV_EMPTY_INPUT;
c4f66413 473 else if (*outptrp + 4 > outend)
fc08075d 474 result = __GCONV_FULL_OUTPUT;
8d617a71
UD
475 else
476 result = __GCONV_INCOMPLETE_INPUT;
477
8d617a71
UD
478 return result;
479}
480
27822ce6 481#if !_STRING_ARCH_unaligned
c1db8b0d 482static inline int
dd9423a6 483__attribute ((always_inline))
55985355
UD
484internal_ucs4le_loop_unaligned (struct __gconv_step *step,
485 struct __gconv_step_data *step_data,
486 const unsigned char **inptrp,
c1db8b0d
UD
487 const unsigned char *inend,
488 unsigned char **outptrp, unsigned char *outend,
38677ace 489 size_t *irreversible)
c1db8b0d
UD
490{
491 const unsigned char *inptr = *inptrp;
492 unsigned char *outptr = *outptrp;
493 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
494 int result;
495
496# if __BYTE_ORDER == __BIG_ENDIAN
497 /* Sigh, we have to do some real work. */
498 size_t cnt;
499
3593973b 500 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
c1db8b0d
UD
501 {
502 outptr[0] = inptr[3];
503 outptr[1] = inptr[2];
504 outptr[2] = inptr[1];
505 outptr[3] = inptr[0];
506 }
507
508 *inptrp = inptr;
509 *outptrp = outptr;
510# elif __BYTE_ORDER == __LITTLE_ENDIAN
511 /* Simply copy the data. */
512 *inptrp = inptr + n_convert * 4;
513 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
514# else
515# error "This endianess is not supported."
516# endif
517
518 /* Determine the status. */
eb9dc2a2 519 if (*inptrp == inend)
c1db8b0d 520 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 521 else if (*inptrp + 4 > inend)
c1db8b0d 522 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
523 else
524 {
525 assert (*outptrp + 4 > outend);
526 result = __GCONV_FULL_OUTPUT;
527 }
c1db8b0d
UD
528
529 return result;
530}
531#endif
532
fd1b5c0f
UD
533
534static inline int
dd9423a6 535__attribute ((always_inline))
55985355
UD
536internal_ucs4le_loop_single (struct __gconv_step *step,
537 struct __gconv_step_data *step_data,
538 const unsigned char **inptrp,
fd1b5c0f
UD
539 const unsigned char *inend,
540 unsigned char **outptrp, unsigned char *outend,
38677ace 541 size_t *irreversible)
fd1b5c0f 542{
55985355 543 mbstate_t *state = step_data->__statep;
fd1b5c0f
UD
544 size_t cnt = state->__count & 7;
545
546 while (*inptrp < inend && cnt < 4)
547 state->__value.__wchb[cnt++] = *(*inptrp)++;
548
a1ffb40e 549 if (__glibc_unlikely (cnt < 4))
fd1b5c0f
UD
550 {
551 /* Still not enough bytes. Store the ones in the input buffer. */
552 state->__count &= ~7;
553 state->__count |= cnt;
554
555 return __GCONV_INCOMPLETE_INPUT;
556 }
557
558#if __BYTE_ORDER == __BIG_ENDIAN
559 (*outptrp)[0] = state->__value.__wchb[3];
560 (*outptrp)[1] = state->__value.__wchb[2];
561 (*outptrp)[2] = state->__value.__wchb[1];
562 (*outptrp)[3] = state->__value.__wchb[0];
85830c4c 563
fd1b5c0f
UD
564#else
565 /* XXX unaligned */
cdda3d7d
AJ
566 (*outptrp)[0] = state->__value.__wchb[0];
567 (*outptrp)[1] = state->__value.__wchb[1];
568 (*outptrp)[2] = state->__value.__wchb[2];
569 (*outptrp)[3] = state->__value.__wchb[3];
570
fd1b5c0f 571#endif
540e1b45 572
cdda3d7d 573 *outptrp += 4;
fd1b5c0f
UD
574
575 /* Clear the state buffer. */
576 state->__count &= ~7;
577
578 return __GCONV_OK;
579}
580
8d617a71
UD
581#include <iconv/skeleton.c>
582
583
4a069c33
UD
584/* And finally from UCS4-LE to the internal encoding. */
585#define DEFINE_INIT 0
586#define DEFINE_FINI 0
587#define MIN_NEEDED_FROM 4
588#define MIN_NEEDED_TO 4
589#define FROM_DIRECTION 1
590#define FROM_LOOP ucs4le_internal_loop
591#define TO_LOOP ucs4le_internal_loop /* This is not used. */
592#define FUNCTION_NAME __gconv_transform_ucs4le_internal
593
594
595static inline int
dd9423a6 596__attribute ((always_inline))
55985355
UD
597ucs4le_internal_loop (struct __gconv_step *step,
598 struct __gconv_step_data *step_data,
599 const unsigned char **inptrp, const unsigned char *inend,
4a069c33 600 unsigned char **outptrp, unsigned char *outend,
38677ace 601 size_t *irreversible)
4a069c33 602{
55985355 603 int flags = step_data->__flags;
4a069c33
UD
604 const unsigned char *inptr = *inptrp;
605 unsigned char *outptr = *outptrp;
606 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
607 int result;
608 size_t cnt;
609
610 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
611 {
612 uint32_t inval;
613
614#if __BYTE_ORDER == __BIG_ENDIAN
17427edd 615 inval = bswap_32 (*(const uint32_t *) inptr);
4a069c33 616#else
17427edd 617 inval = *(const uint32_t *) inptr;
4a069c33
UD
618#endif
619
a1ffb40e 620 if (__glibc_unlikely (inval > 0x7fffffff))
85830c4c 621 {
55985355
UD
622 /* The value is too large. We don't try transliteration here since
623 this is not an error because of the lack of possibilities to
624 represent the result. This is a genuine bug in the input since
625 UCS4 does not allow such values. */
0cdb4983
UD
626 if (irreversible == NULL)
627 /* We are transliterating, don't try to correct anything. */
628 return __GCONV_ILLEGAL_INPUT;
629
85830c4c
UD
630 if (flags & __GCONV_IGNORE_ERRORS)
631 {
632 /* Just ignore this character. */
38677ace 633 ++*irreversible;
85830c4c
UD
634 continue;
635 }
636
637 return __GCONV_ILLEGAL_INPUT;
638 }
4a069c33 639
cdda3d7d
AJ
640 *((uint32_t *) outptr) = inval;
641 outptr += sizeof (uint32_t);
4a069c33
UD
642 }
643
644 *inptrp = inptr;
645 *outptrp = outptr;
646
647 /* Determine the status. */
fc08075d 648 if (*inptrp == inend)
4a069c33 649 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 650 else if (*inptrp + 4 > inend)
4a069c33 651 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
652 else
653 {
654 assert (*outptrp + 4 > outend);
655 result = __GCONV_FULL_OUTPUT;
656 }
4a069c33
UD
657
658 return result;
659}
660
27822ce6 661#if !_STRING_ARCH_unaligned
4a069c33 662static inline int
dd9423a6 663__attribute ((always_inline))
55985355
UD
664ucs4le_internal_loop_unaligned (struct __gconv_step *step,
665 struct __gconv_step_data *step_data,
666 const unsigned char **inptrp,
4a069c33
UD
667 const unsigned char *inend,
668 unsigned char **outptrp, unsigned char *outend,
38677ace 669 size_t *irreversible)
4a069c33 670{
55985355 671 int flags = step_data->__flags;
4a069c33
UD
672 const unsigned char *inptr = *inptrp;
673 unsigned char *outptr = *outptrp;
674 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
675 int result;
676 size_t cnt;
677
678 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
679 {
a1ffb40e 680 if (__glibc_unlikely (inptr[3] > 0x80))
4a069c33 681 {
55985355
UD
682 /* The value is too large. We don't try transliteration here since
683 this is not an error because of the lack of possibilities to
684 represent the result. This is a genuine bug in the input since
685 UCS4 does not allow such values. */
0cdb4983
UD
686 if (irreversible == NULL)
687 /* We are transliterating, don't try to correct anything. */
688 return __GCONV_ILLEGAL_INPUT;
689
85830c4c
UD
690 if (flags & __GCONV_IGNORE_ERRORS)
691 {
692 /* Just ignore this character. */
38677ace 693 ++*irreversible;
85830c4c
UD
694 continue;
695 }
696
4a069c33
UD
697 *inptrp = inptr;
698 *outptrp = outptr;
9ea2c194 699 return __GCONV_ILLEGAL_INPUT;
4a069c33
UD
700 }
701
4a069c33
UD
702# if __BYTE_ORDER == __BIG_ENDIAN
703 outptr[3] = inptr[0];
704 outptr[2] = inptr[1];
705 outptr[1] = inptr[2];
706 outptr[0] = inptr[3];
707# else
708 outptr[0] = inptr[0];
709 outptr[1] = inptr[1];
710 outptr[2] = inptr[2];
711 outptr[3] = inptr[3];
712# endif
85830c4c
UD
713
714 outptr += 4;
4a069c33
UD
715 }
716
717 *inptrp = inptr;
718 *outptrp = outptr;
719
720 /* Determine the status. */
fc08075d 721 if (*inptrp == inend)
4a069c33 722 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 723 else if (*inptrp + 4 > inend)
4a069c33 724 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
725 else
726 {
727 assert (*outptrp + 4 > outend);
728 result = __GCONV_FULL_OUTPUT;
729 }
4a069c33
UD
730
731 return result;
732}
733#endif
734
735
736static inline int
dd9423a6 737__attribute ((always_inline))
55985355
UD
738ucs4le_internal_loop_single (struct __gconv_step *step,
739 struct __gconv_step_data *step_data,
740 const unsigned char **inptrp,
4a069c33
UD
741 const unsigned char *inend,
742 unsigned char **outptrp, unsigned char *outend,
38677ace 743 size_t *irreversible)
4a069c33 744{
55985355
UD
745 mbstate_t *state = step_data->__statep;
746 int flags = step_data->__flags;
4a069c33
UD
747 size_t cnt = state->__count & 7;
748
749 while (*inptrp < inend && cnt < 4)
750 state->__value.__wchb[cnt++] = *(*inptrp)++;
751
a1ffb40e 752 if (__glibc_unlikely (cnt < 4))
4a069c33
UD
753 {
754 /* Still not enough bytes. Store the ones in the input buffer. */
755 state->__count &= ~7;
756 state->__count |= cnt;
757
758 return __GCONV_INCOMPLETE_INPUT;
759 }
760
db2d05f9
UD
761 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
762 0))
85830c4c 763 {
55985355
UD
764 /* The value is too large. We don't try transliteration here since
765 this is not an error because of the lack of possibilities to
766 represent the result. This is a genuine bug in the input since
767 UCS4 does not allow such values. */
85830c4c
UD
768 if (!(flags & __GCONV_IGNORE_ERRORS))
769 return __GCONV_ILLEGAL_INPUT;
770 }
771 else
772 {
4a069c33 773#if __BYTE_ORDER == __BIG_ENDIAN
85830c4c
UD
774 (*outptrp)[0] = state->__value.__wchb[3];
775 (*outptrp)[1] = state->__value.__wchb[2];
776 (*outptrp)[2] = state->__value.__wchb[1];
777 (*outptrp)[3] = state->__value.__wchb[0];
9ea2c194 778#else
85830c4c
UD
779 (*outptrp)[0] = state->__value.__wchb[0];
780 (*outptrp)[1] = state->__value.__wchb[1];
781 (*outptrp)[2] = state->__value.__wchb[2];
782 (*outptrp)[3] = state->__value.__wchb[3];
4a069c33
UD
783#endif
784
85830c4c
UD
785 *outptrp += 4;
786 }
787
4a069c33
UD
788 /* Clear the state buffer. */
789 state->__count &= ~7;
790
791 return __GCONV_OK;
792}
793
794#include <iconv/skeleton.c>
795
796
8619129f
UD
797/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
798#define DEFINE_INIT 0
799#define DEFINE_FINI 0
800#define MIN_NEEDED_FROM 1
801#define MIN_NEEDED_TO 4
802#define FROM_DIRECTION 1
803#define FROM_LOOP ascii_internal_loop
804#define TO_LOOP ascii_internal_loop /* This is not used. */
805#define FUNCTION_NAME __gconv_transform_ascii_internal
fd1b5c0f 806#define ONE_DIRECTION 1
8619129f
UD
807
808#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
809#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
810#define LOOPFCT FROM_LOOP
811#define BODY \
812 { \
a1ffb40e 813 if (__glibc_unlikely (*inptr > '\x7f')) \
8619129f 814 { \
55985355
UD
815 /* The value is too large. We don't try transliteration here since \
816 this is not an error because of the lack of possibilities to \
817 represent the result. This is a genuine bug in the input since \
818 ASCII does not allow such values. */ \
e438a468 819 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
85830c4c
UD
820 } \
821 else \
5deca9bb
UD
822 { \
823 /* It's an one byte sequence. */ \
824 *((uint32_t *) outptr) = *inptr++; \
825 outptr += sizeof (uint32_t); \
826 } \
8619129f 827 }
55985355 828#define LOOP_NEED_FLAGS
8619129f
UD
829#include <iconv/loop.c>
830#include <iconv/skeleton.c>
831
832
833/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
834#define DEFINE_INIT 0
835#define DEFINE_FINI 0
836#define MIN_NEEDED_FROM 4
837#define MIN_NEEDED_TO 1
838#define FROM_DIRECTION 1
839#define FROM_LOOP internal_ascii_loop
840#define TO_LOOP internal_ascii_loop /* This is not used. */
841#define FUNCTION_NAME __gconv_transform_internal_ascii
fd1b5c0f 842#define ONE_DIRECTION 1
8619129f
UD
843
844#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
845#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
846#define LOOPFCT FROM_LOOP
847#define BODY \
848 { \
a1ffb40e 849 if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \
8619129f 850 { \
601d2942 851 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
e438a468 852 STANDARD_TO_LOOP_ERR_HANDLER (4); \
85830c4c
UD
853 } \
854 else \
5deca9bb
UD
855 { \
856 /* It's an one byte sequence. */ \
857 *outptr++ = *((const uint32_t *) inptr); \
858 inptr += sizeof (uint32_t); \
859 } \
8619129f 860 }
55985355 861#define LOOP_NEED_FLAGS
8619129f
UD
862#include <iconv/loop.c>
863#include <iconv/skeleton.c>
864
865
866/* Convert from the internal (UCS4-like) format to UTF-8. */
867#define DEFINE_INIT 0
868#define DEFINE_FINI 0
869#define MIN_NEEDED_FROM 4
870#define MIN_NEEDED_TO 1
871#define MAX_NEEDED_TO 6
872#define FROM_DIRECTION 1
873#define FROM_LOOP internal_utf8_loop
874#define TO_LOOP internal_utf8_loop /* This is not used. */
875#define FUNCTION_NAME __gconv_transform_internal_utf8
fd1b5c0f 876#define ONE_DIRECTION 1
8619129f
UD
877
878#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
879#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
5aa8ff62 880#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
8619129f
UD
881#define LOOPFCT FROM_LOOP
882#define BODY \
883 { \
17427edd 884 uint32_t wc = *((const uint32_t *) inptr); \
8619129f 885 \
a1ffb40e 886 if (__glibc_likely (wc < 0x80)) \
8619129f
UD
887 /* It's an one byte sequence. */ \
888 *outptr++ = (unsigned char) wc; \
a1ffb40e 889 else if (__glibc_likely (wc <= 0x7fffffff)) \
8619129f
UD
890 { \
891 size_t step; \
3cc4a097 892 unsigned char *start; \
8619129f
UD
893 \
894 for (step = 2; step < 6; ++step) \
b79f74cd 895 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
8619129f
UD
896 break; \
897 \
a1ffb40e 898 if (__glibc_unlikely (outptr + step > outend)) \
8619129f
UD
899 { \
900 /* Too long. */ \
d64b6ad0 901 result = __GCONV_FULL_OUTPUT; \
8619129f
UD
902 break; \
903 } \
904 \
905 start = outptr; \
b79f74cd 906 *outptr = (unsigned char) (~0xff >> step); \
8619129f 907 outptr += step; \
8619129f
UD
908 do \
909 { \
347bace2 910 start[--step] = 0x80 | (wc & 0x3f); \
8619129f
UD
911 wc >>= 6; \
912 } \
347bace2 913 while (step > 1); \
8619129f 914 start[0] |= wc; \
db2d05f9
UD
915 } \
916 else \
917 { \
e438a468 918 STANDARD_TO_LOOP_ERR_HANDLER (4); \
8619129f
UD
919 } \
920 \
921 inptr += 4; \
922 }
db2d05f9 923#define LOOP_NEED_FLAGS
8619129f
UD
924#include <iconv/loop.c>
925#include <iconv/skeleton.c>
926
927
928/* Convert from UTF-8 to the internal (UCS4-like) format. */
929#define DEFINE_INIT 0
930#define DEFINE_FINI 0
931#define MIN_NEEDED_FROM 1
932#define MAX_NEEDED_FROM 6
933#define MIN_NEEDED_TO 4
934#define FROM_DIRECTION 1
935#define FROM_LOOP utf8_internal_loop
936#define TO_LOOP utf8_internal_loop /* This is not used. */
937#define FUNCTION_NAME __gconv_transform_utf8_internal
fd1b5c0f 938#define ONE_DIRECTION 1
8619129f
UD
939
940#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
5aa8ff62 941#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
8619129f
UD
942#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
943#define LOOPFCT FROM_LOOP
944#define BODY \
945 { \
8619129f 946 /* Next input byte. */ \
26a51060 947 uint32_t ch = *inptr; \
8619129f 948 \
a1ffb40e 949 if (__glibc_likely (ch < 0x80)) \
8619129f 950 { \
5aa8ff62 951 /* One byte sequence. */ \
5aa8ff62 952 ++inptr; \
8619129f
UD
953 } \
954 else \
955 { \
26a51060
UD
956 uint_fast32_t cnt; \
957 uint_fast32_t i; \
958 \
9ea2c194 959 if (ch >= 0xc2 && ch < 0xe0) \
5aa8ff62 960 { \
9ea2c194
AJ
961 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
962 otherwise the wide character could have been represented \
963 using a single byte. */ \
5aa8ff62
UD
964 cnt = 2; \
965 ch &= 0x1f; \
966 } \
a1ffb40e 967 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
5aa8ff62
UD
968 { \
969 /* We expect three bytes. */ \
970 cnt = 3; \
971 ch &= 0x0f; \
972 } \
a1ffb40e 973 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
5aa8ff62
UD
974 { \
975 /* We expect four bytes. */ \
976 cnt = 4; \
977 ch &= 0x07; \
978 } \
a1ffb40e 979 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
5aa8ff62
UD
980 { \
981 /* We expect five bytes. */ \
982 cnt = 5; \
983 ch &= 0x03; \
984 } \
a1ffb40e 985 else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \
5aa8ff62
UD
986 { \
987 /* We expect six bytes. */ \
988 cnt = 6; \
989 ch &= 0x01; \
990 } \
991 else \
8619129f 992 { \
85830c4c
UD
993 /* Search the end of this ill-formed UTF-8 character. This \
994 is the next byte with (x & 0xc0) != 0x80. */ \
347bace2 995 i = 0; \
e438a468 996 do \
347bace2
UD
997 ++i; \
998 while (inptr + i < inend \
999 && (*(inptr + i) & 0xc0) == 0x80 \
1000 && i < 5); \
85830c4c 1001 \
347bace2
UD
1002 errout: \
1003 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
8619129f
UD
1004 } \
1005 \
a1ffb40e 1006 if (__glibc_unlikely (inptr + cnt > inend)) \
5aa8ff62 1007 { \
fd1b5c0f
UD
1008 /* We don't have enough input. But before we report that check \
1009 that all the bytes are correct. */ \
1010 for (i = 1; inptr + i < inend; ++i) \
1011 if ((inptr[i] & 0xc0) != 0x80) \
1012 break; \
85830c4c 1013 \
a1ffb40e 1014 if (__glibc_likely (inptr + i == inend)) \
85830c4c
UD
1015 { \
1016 result = __GCONV_INCOMPLETE_INPUT; \
1017 break; \
1018 } \
1019 \
347bace2 1020 goto errout; \
5aa8ff62
UD
1021 } \
1022 \
1023 /* Read the possible remaining bytes. */ \
1024 for (i = 1; i < cnt; ++i) \
1025 { \
1026 uint32_t byte = inptr[i]; \
1027 \
1028 if ((byte & 0xc0) != 0x80) \
bd32e4a6
UD
1029 /* This is an illegal encoding. */ \
1030 break; \
5aa8ff62
UD
1031 \
1032 ch <<= 6; \
1033 ch |= byte & 0x3f; \
1034 } \
85830c4c 1035 \
bd32e4a6
UD
1036 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1037 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1038 have been represented with fewer than cnt bytes. */ \
9c32c895
UD
1039 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
1040 /* Do not accept UTF-16 surrogates. */ \
1041 || (ch >= 0xd800 && ch <= 0xdfff)) \
bd32e4a6
UD
1042 { \
1043 /* This is an illegal encoding. */ \
347bace2 1044 goto errout; \
bd32e4a6
UD
1045 } \
1046 \
5aa8ff62 1047 inptr += cnt; \
8619129f
UD
1048 } \
1049 \
1050 /* Now adjust the pointers and store the result. */ \
cdda3d7d
AJ
1051 *((uint32_t *) outptr) = ch; \
1052 outptr += sizeof (uint32_t); \
8619129f 1053 }
55985355 1054#define LOOP_NEED_FLAGS
fd1b5c0f
UD
1055
1056#define STORE_REST \
1057 { \
1058 /* We store the remaining bytes while converting them into the UCS4 \
1059 format. We can assume that the first byte in the buffer is \
1060 correct and that it requires a larger number of bytes than there \
1061 are in the input buffer. */ \
1062 wint_t ch = **inptrp; \
ea31b613 1063 size_t cnt, r; \
fd1b5c0f
UD
1064 \
1065 state->__count = inend - *inptrp; \
1066 \
9954432e 1067 assert (ch != 0xc0 && ch != 0xc1); \
fd1b5c0f
UD
1068 if (ch >= 0xc2 && ch < 0xe0) \
1069 { \
1070 /* We expect two bytes. The first byte cannot be 0xc0 or \
1071 0xc1, otherwise the wide character could have been \
1072 represented using a single byte. */ \
1073 cnt = 2; \
1074 ch &= 0x1f; \
1075 } \
a1ffb40e 1076 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
fd1b5c0f
UD
1077 { \
1078 /* We expect three bytes. */ \
1079 cnt = 3; \
1080 ch &= 0x0f; \
1081 } \
a1ffb40e 1082 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
fd1b5c0f
UD
1083 { \
1084 /* We expect four bytes. */ \
1085 cnt = 4; \
1086 ch &= 0x07; \
1087 } \
a1ffb40e 1088 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
fd1b5c0f
UD
1089 { \
1090 /* We expect five bytes. */ \
1091 cnt = 5; \
1092 ch &= 0x03; \
1093 } \
1094 else \
1095 { \
1096 /* We expect six bytes. */ \
1097 cnt = 6; \
1098 ch &= 0x01; \
1099 } \
1100 \
1101 /* The first byte is already consumed. */ \
ea31b613 1102 r = cnt - 1; \
fd1b5c0f
UD
1103 while (++(*inptrp) < inend) \
1104 { \
1105 ch <<= 6; \
1106 ch |= **inptrp & 0x3f; \
ea31b613 1107 --r; \
fd1b5c0f
UD
1108 } \
1109 \
1110 /* Shift for the so far missing bytes. */ \
ea31b613
UD
1111 ch <<= r * 6; \
1112 \
1113 /* Store the number of bytes expected for the entire sequence. */ \
41f112ad 1114 state->__count |= cnt << 8; \
fd1b5c0f
UD
1115 \
1116 /* Store the value. */ \
1117 state->__value.__wch = ch; \
1118 }
1119
1120#define UNPACK_BYTES \
1121 { \
ea31b613 1122 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
fd1b5c0f 1123 wint_t wch = state->__value.__wch; \
41f112ad 1124 size_t ntotal = state->__count >> 8; \
ea31b613 1125 \
41f112ad 1126 inlen = state->__count & 255; \
fd1b5c0f 1127 \
ea31b613 1128 bytebuf[0] = inmask[ntotal - 2]; \
fd1b5c0f 1129 \
cd201e38
UD
1130 do \
1131 { \
1132 if (--ntotal < inlen) \
1133 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1134 wch >>= 6; \
1135 } \
1136 while (ntotal > 1); \
fd1b5c0f
UD
1137 \
1138 bytebuf[0] |= wch; \
1139 }
1140
41f112ad
UD
1141#define CLEAR_STATE \
1142 state->__count = 0
1143
1144
8619129f
UD
1145#include <iconv/loop.c>
1146#include <iconv/skeleton.c>
1147
1148
1149/* Convert from UCS2 to the internal (UCS4-like) format. */
1150#define DEFINE_INIT 0
1151#define DEFINE_FINI 0
1152#define MIN_NEEDED_FROM 2
1153#define MIN_NEEDED_TO 4
1154#define FROM_DIRECTION 1
1155#define FROM_LOOP ucs2_internal_loop
1156#define TO_LOOP ucs2_internal_loop /* This is not used. */
1157#define FUNCTION_NAME __gconv_transform_ucs2_internal
fd1b5c0f 1158#define ONE_DIRECTION 1
8619129f
UD
1159
1160#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1161#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1162#define LOOPFCT FROM_LOOP
428bcea4 1163#define BODY \
755104ed 1164 { \
606135cf 1165 uint16_t u1 = get16 (inptr); \
755104ed 1166 \
a1ffb40e 1167 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
755104ed
UD
1168 { \
1169 /* Surrogate characters in UCS-2 input are not valid. Reject \
1170 them. (Catching this here is not security relevant.) */ \
e438a468 1171 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
755104ed
UD
1172 } \
1173 \
cdda3d7d
AJ
1174 *((uint32_t *) outptr) = u1; \
1175 outptr += sizeof (uint32_t); \
755104ed
UD
1176 inptr += 2; \
1177 }
1178#define LOOP_NEED_FLAGS
8619129f
UD
1179#include <iconv/loop.c>
1180#include <iconv/skeleton.c>
1181
1182
1183/* Convert from the internal (UCS4-like) format to UCS2. */
1184#define DEFINE_INIT 0
1185#define DEFINE_FINI 0
1186#define MIN_NEEDED_FROM 4
1187#define MIN_NEEDED_TO 2
1188#define FROM_DIRECTION 1
1189#define FROM_LOOP internal_ucs2_loop
1190#define TO_LOOP internal_ucs2_loop /* This is not used. */
1191#define FUNCTION_NAME __gconv_transform_internal_ucs2
fd1b5c0f 1192#define ONE_DIRECTION 1
8619129f
UD
1193
1194#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1195#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1196#define LOOPFCT FROM_LOOP
428bcea4 1197#define BODY \
8619129f 1198 { \
17427edd 1199 uint32_t val = *((const uint32_t *) inptr); \
755104ed 1200 \
a1ffb40e 1201 if (__glibc_unlikely (val >= 0x10000)) \
8619129f 1202 { \
601d2942 1203 UNICODE_TAG_HANDLER (val, 4); \
e438a468 1204 STANDARD_TO_LOOP_ERR_HANDLER (4); \
8619129f 1205 } \
a1ffb40e 1206 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
755104ed
UD
1207 { \
1208 /* Surrogate characters in UCS-4 input are not valid. \
1209 We must catch this, because the UCS-2 output might be \
1210 interpreted as UTF-16 by other programs. If we let \
1211 surrogates pass through, attackers could make a security \
1212 hole exploit by synthesizing any desired plane 1-16 \
1213 character. */ \
e438a468 1214 result = __GCONV_ILLEGAL_INPUT; \
755104ed 1215 if (! ignore_errors_p ()) \
e438a468 1216 break; \
755104ed
UD
1217 inptr += 4; \
1218 ++*irreversible; \
1219 continue; \
1220 } \
9ea2c194 1221 else \
755104ed 1222 { \
606135cf 1223 put16 (outptr, val); \
db6af3eb 1224 outptr += sizeof (uint16_t); \
755104ed
UD
1225 inptr += 4; \
1226 } \
8619129f 1227 }
55985355 1228#define LOOP_NEED_FLAGS
8619129f
UD
1229#include <iconv/loop.c>
1230#include <iconv/skeleton.c>
9b26f5c4
UD
1231
1232
428bcea4 1233/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
9b26f5c4
UD
1234#define DEFINE_INIT 0
1235#define DEFINE_FINI 0
1236#define MIN_NEEDED_FROM 2
1237#define MIN_NEEDED_TO 4
1238#define FROM_DIRECTION 1
428bcea4
UD
1239#define FROM_LOOP ucs2reverse_internal_loop
1240#define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
8d617a71 1241#define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
fd1b5c0f 1242#define ONE_DIRECTION 1
9b26f5c4
UD
1243
1244#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1245#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1246#define LOOPFCT FROM_LOOP
428bcea4 1247#define BODY \
755104ed 1248 { \
606135cf 1249 uint16_t u1 = bswap_16 (get16 (inptr)); \
755104ed 1250 \
a1ffb40e 1251 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
755104ed
UD
1252 { \
1253 /* Surrogate characters in UCS-2 input are not valid. Reject \
1254 them. (Catching this here is not security relevant.) */ \
1255 if (! ignore_errors_p ()) \
1256 { \
1257 result = __GCONV_ILLEGAL_INPUT; \
1258 break; \
1259 } \
1260 inptr += 2; \
1261 ++*irreversible; \
1262 continue; \
1263 } \
1264 \
cdda3d7d
AJ
1265 *((uint32_t *) outptr) = u1; \
1266 outptr += sizeof (uint32_t); \
755104ed
UD
1267 inptr += 2; \
1268 }
1269#define LOOP_NEED_FLAGS
9b26f5c4
UD
1270#include <iconv/loop.c>
1271#include <iconv/skeleton.c>
1272
1273
428bcea4 1274/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
9b26f5c4
UD
1275#define DEFINE_INIT 0
1276#define DEFINE_FINI 0
1277#define MIN_NEEDED_FROM 4
1278#define MIN_NEEDED_TO 2
1279#define FROM_DIRECTION 1
428bcea4
UD
1280#define FROM_LOOP internal_ucs2reverse_loop
1281#define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
8d617a71 1282#define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
fd1b5c0f 1283#define ONE_DIRECTION 1
9b26f5c4
UD
1284
1285#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1286#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1287#define LOOPFCT FROM_LOOP
428bcea4 1288#define BODY \
9b26f5c4 1289 { \
17427edd 1290 uint32_t val = *((const uint32_t *) inptr); \
a1ffb40e 1291 if (__glibc_unlikely (val >= 0x10000)) \
9b26f5c4 1292 { \
601d2942 1293 UNICODE_TAG_HANDLER (val, 4); \
e438a468 1294 STANDARD_TO_LOOP_ERR_HANDLER (4); \
9b26f5c4 1295 } \
a1ffb40e 1296 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
755104ed
UD
1297 { \
1298 /* Surrogate characters in UCS-4 input are not valid. \
1299 We must catch this, because the UCS-2 output might be \
1300 interpreted as UTF-16 by other programs. If we let \
1301 surrogates pass through, attackers could make a security \
1302 hole exploit by synthesizing any desired plane 1-16 \
1303 character. */ \
1304 if (! ignore_errors_p ()) \
1305 { \
1306 result = __GCONV_ILLEGAL_INPUT; \
1307 break; \
1308 } \
1309 inptr += 4; \
1310 ++*irreversible; \
1311 continue; \
1312 } \
9ea2c194 1313 else \
755104ed 1314 { \
606135cf 1315 put16 (outptr, bswap_16 (val)); \
cdda3d7d 1316 outptr += sizeof (uint16_t); \
755104ed
UD
1317 inptr += 4; \
1318 } \
9b26f5c4 1319 }
55985355 1320#define LOOP_NEED_FLAGS
9b26f5c4
UD
1321#include <iconv/loop.c>
1322#include <iconv/skeleton.c>