]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconv/gconv_simple.c
Fix http: URL in 'configure'
[thirdparty/glibc.git] / iconv / gconv_simple.c
CommitLineData
6973fc01 1/* Simple transformations functions.
04277e02 2 Copyright (C) 1997-2019 Free Software Foundation, Inc.
6973fc01
UD
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
6973fc01
UD
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 14 Lesser General Public License for more details.
6973fc01 15
41bdb6e2 16 You should have received a copy of the GNU Lesser General Public
59ba27a6 17 License along with the GNU C Library; if not, see
5a82c748 18 <https://www.gnu.org/licenses/>. */
6973fc01 19
f1fa8b68 20#include <byteswap.h>
55985355 21#include <dlfcn.h>
f1fa8b68 22#include <endian.h>
f4017d20 23#include <errno.h>
6973fc01 24#include <gconv.h>
d2374599 25#include <stdint.h>
6973fc01
UD
26#include <stdlib.h>
27#include <string.h>
28#include <wchar.h>
29#include <sys/param.h>
f9ad060c 30#include <gconv_int.h>
6973fc01 31
17427edd 32#define BUILTIN_ALIAS(s1, s2) /* nothing */
f9ad060c
UD
33#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
34 MinF, MaxF, MinT, MaxT) \
17427edd 35 extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
a784e502 36 const unsigned char **, const unsigned char *, \
17427edd
UD
37 unsigned char **, size_t *, int, int);
38#include "gconv_builtin.h"
39
40
a904b5d9
UD
41#ifndef EILSEQ
42# define EILSEQ EINVAL
43#endif
44
45
f9ad060c
UD
46/* Specialized conversion function for a single byte to INTERNAL, recognizing
47 only ASCII characters. */
48wint_t
49__gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
50{
51 if (c < 0x80)
52 return c;
53 else
54 return WEOF;
55}
56
57
f1fa8b68
UD
58/* Transform from the internal, UCS4-like format, to UCS4. The
59 difference between the internal ucs4 format and the real UCS4
60 format is, if any, the endianess. The Unicode/ISO 10646 says that
61 unless some higher protocol specifies it differently, the byte
62 order is big endian.*/
8619129f
UD
63#define DEFINE_INIT 0
64#define DEFINE_FINI 0
65#define MIN_NEEDED_FROM 4
66#define MIN_NEEDED_TO 4
67#define FROM_DIRECTION 1
68#define FROM_LOOP internal_ucs4_loop
69#define TO_LOOP internal_ucs4_loop /* This is not used. */
70#define FUNCTION_NAME __gconv_transform_internal_ucs4
0cdddc25 71#define ONE_DIRECTION 0
8619129f
UD
72
73
74static inline int
dd9423a6 75__attribute ((always_inline))
55985355
UD
76internal_ucs4_loop (struct __gconv_step *step,
77 struct __gconv_step_data *step_data,
78 const unsigned char **inptrp, const unsigned char *inend,
4802be92 79 unsigned char **outptrp, const unsigned char *outend,
38677ace 80 size_t *irreversible)
4bca4c17 81{
8619129f
UD
82 const unsigned char *inptr = *inptrp;
83 unsigned char *outptr = *outptrp;
84 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
4bca4c17
UD
85 int result;
86
f1fa8b68 87#if __BYTE_ORDER == __LITTLE_ENDIAN
8619129f
UD
88 /* Sigh, we have to do some real work. */
89 size_t cnt;
cdda3d7d 90 uint32_t *outptr32 = (uint32_t *) outptr;
f1fa8b68 91
fdf19bf7 92 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
cdda3d7d 93 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
f1fa8b68 94
8619129f 95 *inptrp = inptr;
cd5b5023 96 *outptrp = (unsigned char *) outptr32;
f1fa8b68 97#elif __BYTE_ORDER == __BIG_ENDIAN
8619129f
UD
98 /* Simply copy the data. */
99 *inptrp = inptr + n_convert * 4;
100 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
f1fa8b68
UD
101#else
102# error "This endianess is not supported."
103#endif
104
8619129f 105 /* Determine the status. */
1336419e 106 if (*inptrp == inend)
d64b6ad0 107 result = __GCONV_EMPTY_INPUT;
c4f66413 108 else if (*outptrp + 4 > outend)
1336419e 109 result = __GCONV_FULL_OUTPUT;
6973fc01 110 else
d64b6ad0 111 result = __GCONV_INCOMPLETE_INPUT;
6973fc01 112
f43ce637 113 return result;
6973fc01 114}
d2374599 115
27822ce6 116#if !_STRING_ARCH_unaligned
c1db8b0d 117static inline int
dd9423a6 118__attribute ((always_inline))
55985355
UD
119internal_ucs4_loop_unaligned (struct __gconv_step *step,
120 struct __gconv_step_data *step_data,
121 const unsigned char **inptrp,
c1db8b0d 122 const unsigned char *inend,
4802be92
AS
123 unsigned char **outptrp,
124 const unsigned char *outend,
38677ace 125 size_t *irreversible)
c1db8b0d
UD
126{
127 const unsigned char *inptr = *inptrp;
128 unsigned char *outptr = *outptrp;
129 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
130 int result;
131
132# if __BYTE_ORDER == __LITTLE_ENDIAN
133 /* Sigh, we have to do some real work. */
134 size_t cnt;
135
136 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
137 {
138 outptr[0] = inptr[3];
139 outptr[1] = inptr[2];
140 outptr[2] = inptr[1];
141 outptr[3] = inptr[0];
142 }
143
144 *inptrp = inptr;
145 *outptrp = outptr;
146# elif __BYTE_ORDER == __BIG_ENDIAN
147 /* Simply copy the data. */
148 *inptrp = inptr + n_convert * 4;
149 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
150# else
151# error "This endianess is not supported."
152# endif
153
154 /* Determine the status. */
eacde9d0 155 if (*inptrp == inend)
c1db8b0d 156 result = __GCONV_EMPTY_INPUT;
eacde9d0
UD
157 else if (*outptrp + 4 > outend)
158 result = __GCONV_FULL_OUTPUT;
c1db8b0d
UD
159 else
160 result = __GCONV_INCOMPLETE_INPUT;
161
162 return result;
163}
164#endif
165
fd1b5c0f
UD
166
167static inline int
dd9423a6 168__attribute ((always_inline))
55985355
UD
169internal_ucs4_loop_single (struct __gconv_step *step,
170 struct __gconv_step_data *step_data,
171 const unsigned char **inptrp,
fd1b5c0f 172 const unsigned char *inend,
4802be92
AS
173 unsigned char **outptrp,
174 const unsigned char *outend,
38677ace 175 size_t *irreversible)
fd1b5c0f 176{
55985355 177 mbstate_t *state = step_data->__statep;
fd1b5c0f
UD
178 size_t cnt = state->__count & 7;
179
180 while (*inptrp < inend && cnt < 4)
181 state->__value.__wchb[cnt++] = *(*inptrp)++;
182
a1ffb40e 183 if (__glibc_unlikely (cnt < 4))
fd1b5c0f
UD
184 {
185 /* Still not enough bytes. Store the ones in the input buffer. */
186 state->__count &= ~7;
187 state->__count |= cnt;
188
189 return __GCONV_INCOMPLETE_INPUT;
190 }
191
192#if __BYTE_ORDER == __LITTLE_ENDIAN
193 (*outptrp)[0] = state->__value.__wchb[3];
194 (*outptrp)[1] = state->__value.__wchb[2];
195 (*outptrp)[2] = state->__value.__wchb[1];
196 (*outptrp)[3] = state->__value.__wchb[0];
85830c4c 197
fd1b5c0f
UD
198#elif __BYTE_ORDER == __BIG_ENDIAN
199 /* XXX unaligned */
cdda3d7d
AJ
200 (*outptrp)[0] = state->__value.__wchb[0];
201 (*outptrp)[1] = state->__value.__wchb[1];
202 (*outptrp)[2] = state->__value.__wchb[2];
203 (*outptrp)[3] = state->__value.__wchb[3];
fd1b5c0f
UD
204#else
205# error "This endianess is not supported."
206#endif
cdda3d7d 207 *outptrp += 4;
fd1b5c0f
UD
208
209 /* Clear the state buffer. */
210 state->__count &= ~7;
211
212 return __GCONV_OK;
213}
214
8619129f 215#include <iconv/skeleton.c>
d2374599 216
d2374599 217
4a069c33
UD
218/* Transform from UCS4 to the internal, UCS4-like format. Unlike
219 for the other direction we have to check for correct values here. */
220#define DEFINE_INIT 0
221#define DEFINE_FINI 0
222#define MIN_NEEDED_FROM 4
223#define MIN_NEEDED_TO 4
224#define FROM_DIRECTION 1
225#define FROM_LOOP ucs4_internal_loop
226#define TO_LOOP ucs4_internal_loop /* This is not used. */
227#define FUNCTION_NAME __gconv_transform_ucs4_internal
0cdddc25 228#define ONE_DIRECTION 0
4a069c33
UD
229
230
231static inline int
dd9423a6 232__attribute ((always_inline))
55985355
UD
233ucs4_internal_loop (struct __gconv_step *step,
234 struct __gconv_step_data *step_data,
235 const unsigned char **inptrp, const unsigned char *inend,
4802be92 236 unsigned char **outptrp, const unsigned char *outend,
38677ace 237 size_t *irreversible)
4a069c33 238{
55985355 239 int flags = step_data->__flags;
4a069c33
UD
240 const unsigned char *inptr = *inptrp;
241 unsigned char *outptr = *outptrp;
242 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
243 int result;
244 size_t cnt;
245
246 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
247 {
248 uint32_t inval;
249
250#if __BYTE_ORDER == __LITTLE_ENDIAN
17427edd 251 inval = bswap_32 (*(const uint32_t *) inptr);
4a069c33 252#else
17427edd 253 inval = *(const uint32_t *) inptr;
4a069c33
UD
254#endif
255
a1ffb40e 256 if (__glibc_unlikely (inval > 0x7fffffff))
4a069c33 257 {
55985355
UD
258 /* The value is too large. We don't try transliteration here since
259 this is not an error because of the lack of possibilities to
260 represent the result. This is a genuine bug in the input since
261 UCS4 does not allow such values. */
0cdb4983
UD
262 if (irreversible == NULL)
263 /* We are transliterating, don't try to correct anything. */
264 return __GCONV_ILLEGAL_INPUT;
265
85830c4c
UD
266 if (flags & __GCONV_IGNORE_ERRORS)
267 {
268 /* Just ignore this character. */
38677ace 269 ++*irreversible;
85830c4c
UD
270 continue;
271 }
272
4a069c33
UD
273 *inptrp = inptr;
274 *outptrp = outptr;
275 return __GCONV_ILLEGAL_INPUT;
276 }
277
cdda3d7d
AJ
278 *((uint32_t *) outptr) = inval;
279 outptr += sizeof (uint32_t);
4a069c33
UD
280 }
281
282 *inptrp = inptr;
283 *outptrp = outptr;
284
285 /* Determine the status. */
fc08075d 286 if (*inptrp == inend)
4a069c33 287 result = __GCONV_EMPTY_INPUT;
c4f66413 288 else if (*outptrp + 4 > outend)
fc08075d 289 result = __GCONV_FULL_OUTPUT;
4a069c33
UD
290 else
291 result = __GCONV_INCOMPLETE_INPUT;
292
293 return result;
294}
295
27822ce6 296#if !_STRING_ARCH_unaligned
4a069c33 297static inline int
dd9423a6 298__attribute ((always_inline))
55985355
UD
299ucs4_internal_loop_unaligned (struct __gconv_step *step,
300 struct __gconv_step_data *step_data,
301 const unsigned char **inptrp,
4a069c33 302 const unsigned char *inend,
4802be92
AS
303 unsigned char **outptrp,
304 const unsigned char *outend,
38677ace 305 size_t *irreversible)
4a069c33 306{
55985355 307 int flags = step_data->__flags;
4a069c33
UD
308 const unsigned char *inptr = *inptrp;
309 unsigned char *outptr = *outptrp;
310 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
311 int result;
312 size_t cnt;
313
55985355 314 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
4a069c33 315 {
a1ffb40e 316 if (__glibc_unlikely (inptr[0] > 0x80))
4a069c33 317 {
55985355
UD
318 /* The value is too large. We don't try transliteration here since
319 this is not an error because of the lack of possibilities to
320 represent the result. This is a genuine bug in the input since
321 UCS4 does not allow such values. */
0cdb4983
UD
322 if (irreversible == NULL)
323 /* We are transliterating, don't try to correct anything. */
324 return __GCONV_ILLEGAL_INPUT;
325
85830c4c
UD
326 if (flags & __GCONV_IGNORE_ERRORS)
327 {
328 /* Just ignore this character. */
38677ace 329 ++*irreversible;
85830c4c
UD
330 continue;
331 }
332
4a069c33
UD
333 *inptrp = inptr;
334 *outptrp = outptr;
9ea2c194 335 return __GCONV_ILLEGAL_INPUT;
4a069c33
UD
336 }
337
4a069c33
UD
338# if __BYTE_ORDER == __LITTLE_ENDIAN
339 outptr[3] = inptr[0];
340 outptr[2] = inptr[1];
341 outptr[1] = inptr[2];
342 outptr[0] = inptr[3];
343# else
344 outptr[0] = inptr[0];
345 outptr[1] = inptr[1];
346 outptr[2] = inptr[2];
347 outptr[3] = inptr[3];
348# endif
55985355 349 outptr += 4;
4a069c33
UD
350 }
351
352 *inptrp = inptr;
353 *outptrp = outptr;
354
355 /* Determine the status. */
fc08075d 356 if (*inptrp == inend)
4a069c33 357 result = __GCONV_EMPTY_INPUT;
c4f66413 358 else if (*outptrp + 4 > outend)
fc08075d 359 result = __GCONV_FULL_OUTPUT;
4a069c33
UD
360 else
361 result = __GCONV_INCOMPLETE_INPUT;
362
363 return result;
364}
365#endif
366
367
368static inline int
dd9423a6 369__attribute ((always_inline))
55985355
UD
370ucs4_internal_loop_single (struct __gconv_step *step,
371 struct __gconv_step_data *step_data,
372 const unsigned char **inptrp,
4a069c33 373 const unsigned char *inend,
4802be92
AS
374 unsigned char **outptrp,
375 const unsigned char *outend,
38677ace 376 size_t *irreversible)
4a069c33 377{
55985355
UD
378 mbstate_t *state = step_data->__statep;
379 int flags = step_data->__flags;
4a069c33
UD
380 size_t cnt = state->__count & 7;
381
382 while (*inptrp < inend && cnt < 4)
383 state->__value.__wchb[cnt++] = *(*inptrp)++;
384
a1ffb40e 385 if (__glibc_unlikely (cnt < 4))
4a069c33
UD
386 {
387 /* Still not enough bytes. Store the ones in the input buffer. */
388 state->__count &= ~7;
389 state->__count |= cnt;
390
391 return __GCONV_INCOMPLETE_INPUT;
392 }
393
db2d05f9
UD
394 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
395 0))
85830c4c 396 {
55985355
UD
397 /* The value is too large. We don't try transliteration here since
398 this is not an error because of the lack of possibilities to
399 represent the result. This is a genuine bug in the input since
400 UCS4 does not allow such values. */
85830c4c 401 if (!(flags & __GCONV_IGNORE_ERRORS))
3593973b
UD
402 {
403 *inptrp -= cnt - (state->__count & 7);
404 return __GCONV_ILLEGAL_INPUT;
405 }
85830c4c
UD
406 }
407 else
408 {
4a069c33 409#if __BYTE_ORDER == __LITTLE_ENDIAN
85830c4c
UD
410 (*outptrp)[0] = state->__value.__wchb[3];
411 (*outptrp)[1] = state->__value.__wchb[2];
412 (*outptrp)[2] = state->__value.__wchb[1];
413 (*outptrp)[3] = state->__value.__wchb[0];
4a069c33 414#elif __BYTE_ORDER == __BIG_ENDIAN
85830c4c
UD
415 (*outptrp)[0] = state->__value.__wchb[0];
416 (*outptrp)[1] = state->__value.__wchb[1];
417 (*outptrp)[2] = state->__value.__wchb[2];
418 (*outptrp)[3] = state->__value.__wchb[3];
4a069c33
UD
419#endif
420
85830c4c
UD
421 *outptrp += 4;
422 }
423
4a069c33
UD
424 /* Clear the state buffer. */
425 state->__count &= ~7;
426
427 return __GCONV_OK;
428}
429
430#include <iconv/skeleton.c>
431
432
433/* Similarly for the little endian form. */
8d617a71
UD
434#define DEFINE_INIT 0
435#define DEFINE_FINI 0
436#define MIN_NEEDED_FROM 4
437#define MIN_NEEDED_TO 4
438#define FROM_DIRECTION 1
439#define FROM_LOOP internal_ucs4le_loop
440#define TO_LOOP internal_ucs4le_loop /* This is not used. */
441#define FUNCTION_NAME __gconv_transform_internal_ucs4le
0cdddc25 442#define ONE_DIRECTION 0
8d617a71
UD
443
444
445static inline int
dd9423a6 446__attribute ((always_inline))
55985355
UD
447internal_ucs4le_loop (struct __gconv_step *step,
448 struct __gconv_step_data *step_data,
449 const unsigned char **inptrp, const unsigned char *inend,
4802be92 450 unsigned char **outptrp, const unsigned char *outend,
38677ace 451 size_t *irreversible)
8d617a71
UD
452{
453 const unsigned char *inptr = *inptrp;
454 unsigned char *outptr = *outptrp;
455 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
456 int result;
457
458#if __BYTE_ORDER == __BIG_ENDIAN
459 /* Sigh, we have to do some real work. */
460 size_t cnt;
cd5b5023 461 uint32_t *outptr32 = (uint32_t *) outptr;
8d617a71
UD
462
463 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
cd5b5023
AJ
464 *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
465 outptr = (unsigned char *) outptr32;
8d617a71
UD
466
467 *inptrp = inptr;
468 *outptrp = outptr;
469#elif __BYTE_ORDER == __LITTLE_ENDIAN
470 /* Simply copy the data. */
471 *inptrp = inptr + n_convert * 4;
472 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
473#else
474# error "This endianess is not supported."
475#endif
476
477 /* Determine the status. */
fc08075d 478 if (*inptrp == inend)
8d617a71 479 result = __GCONV_EMPTY_INPUT;
c4f66413 480 else if (*outptrp + 4 > outend)
fc08075d 481 result = __GCONV_FULL_OUTPUT;
8d617a71
UD
482 else
483 result = __GCONV_INCOMPLETE_INPUT;
484
8d617a71
UD
485 return result;
486}
487
27822ce6 488#if !_STRING_ARCH_unaligned
c1db8b0d 489static inline int
dd9423a6 490__attribute ((always_inline))
55985355
UD
491internal_ucs4le_loop_unaligned (struct __gconv_step *step,
492 struct __gconv_step_data *step_data,
493 const unsigned char **inptrp,
c1db8b0d 494 const unsigned char *inend,
4802be92
AS
495 unsigned char **outptrp,
496 const unsigned char *outend,
38677ace 497 size_t *irreversible)
c1db8b0d
UD
498{
499 const unsigned char *inptr = *inptrp;
500 unsigned char *outptr = *outptrp;
501 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
502 int result;
503
504# if __BYTE_ORDER == __BIG_ENDIAN
505 /* Sigh, we have to do some real work. */
506 size_t cnt;
507
3593973b 508 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
c1db8b0d
UD
509 {
510 outptr[0] = inptr[3];
511 outptr[1] = inptr[2];
512 outptr[2] = inptr[1];
513 outptr[3] = inptr[0];
514 }
515
516 *inptrp = inptr;
517 *outptrp = outptr;
518# elif __BYTE_ORDER == __LITTLE_ENDIAN
519 /* Simply copy the data. */
520 *inptrp = inptr + n_convert * 4;
521 *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
522# else
523# error "This endianess is not supported."
524# endif
525
526 /* Determine the status. */
eb9dc2a2 527 if (*inptrp == inend)
c1db8b0d 528 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 529 else if (*inptrp + 4 > inend)
c1db8b0d 530 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
531 else
532 {
533 assert (*outptrp + 4 > outend);
534 result = __GCONV_FULL_OUTPUT;
535 }
c1db8b0d
UD
536
537 return result;
538}
539#endif
540
fd1b5c0f
UD
541
542static inline int
dd9423a6 543__attribute ((always_inline))
55985355
UD
544internal_ucs4le_loop_single (struct __gconv_step *step,
545 struct __gconv_step_data *step_data,
546 const unsigned char **inptrp,
fd1b5c0f 547 const unsigned char *inend,
4802be92
AS
548 unsigned char **outptrp,
549 const unsigned char *outend,
38677ace 550 size_t *irreversible)
fd1b5c0f 551{
55985355 552 mbstate_t *state = step_data->__statep;
fd1b5c0f
UD
553 size_t cnt = state->__count & 7;
554
555 while (*inptrp < inend && cnt < 4)
556 state->__value.__wchb[cnt++] = *(*inptrp)++;
557
a1ffb40e 558 if (__glibc_unlikely (cnt < 4))
fd1b5c0f
UD
559 {
560 /* Still not enough bytes. Store the ones in the input buffer. */
561 state->__count &= ~7;
562 state->__count |= cnt;
563
564 return __GCONV_INCOMPLETE_INPUT;
565 }
566
567#if __BYTE_ORDER == __BIG_ENDIAN
568 (*outptrp)[0] = state->__value.__wchb[3];
569 (*outptrp)[1] = state->__value.__wchb[2];
570 (*outptrp)[2] = state->__value.__wchb[1];
571 (*outptrp)[3] = state->__value.__wchb[0];
85830c4c 572
fd1b5c0f
UD
573#else
574 /* XXX unaligned */
cdda3d7d
AJ
575 (*outptrp)[0] = state->__value.__wchb[0];
576 (*outptrp)[1] = state->__value.__wchb[1];
577 (*outptrp)[2] = state->__value.__wchb[2];
578 (*outptrp)[3] = state->__value.__wchb[3];
579
fd1b5c0f 580#endif
540e1b45 581
cdda3d7d 582 *outptrp += 4;
fd1b5c0f
UD
583
584 /* Clear the state buffer. */
585 state->__count &= ~7;
586
587 return __GCONV_OK;
588}
589
8d617a71
UD
590#include <iconv/skeleton.c>
591
592
4a069c33
UD
593/* And finally from UCS4-LE to the internal encoding. */
594#define DEFINE_INIT 0
595#define DEFINE_FINI 0
596#define MIN_NEEDED_FROM 4
597#define MIN_NEEDED_TO 4
598#define FROM_DIRECTION 1
599#define FROM_LOOP ucs4le_internal_loop
600#define TO_LOOP ucs4le_internal_loop /* This is not used. */
601#define FUNCTION_NAME __gconv_transform_ucs4le_internal
0cdddc25 602#define ONE_DIRECTION 0
4a069c33
UD
603
604
605static inline int
dd9423a6 606__attribute ((always_inline))
55985355
UD
607ucs4le_internal_loop (struct __gconv_step *step,
608 struct __gconv_step_data *step_data,
609 const unsigned char **inptrp, const unsigned char *inend,
4802be92 610 unsigned char **outptrp, const unsigned char *outend,
38677ace 611 size_t *irreversible)
4a069c33 612{
55985355 613 int flags = step_data->__flags;
4a069c33
UD
614 const unsigned char *inptr = *inptrp;
615 unsigned char *outptr = *outptrp;
616 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
617 int result;
618 size_t cnt;
619
620 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
621 {
622 uint32_t inval;
623
624#if __BYTE_ORDER == __BIG_ENDIAN
17427edd 625 inval = bswap_32 (*(const uint32_t *) inptr);
4a069c33 626#else
17427edd 627 inval = *(const uint32_t *) inptr;
4a069c33
UD
628#endif
629
a1ffb40e 630 if (__glibc_unlikely (inval > 0x7fffffff))
85830c4c 631 {
55985355
UD
632 /* The value is too large. We don't try transliteration here since
633 this is not an error because of the lack of possibilities to
634 represent the result. This is a genuine bug in the input since
635 UCS4 does not allow such values. */
0cdb4983
UD
636 if (irreversible == NULL)
637 /* We are transliterating, don't try to correct anything. */
638 return __GCONV_ILLEGAL_INPUT;
639
85830c4c
UD
640 if (flags & __GCONV_IGNORE_ERRORS)
641 {
642 /* Just ignore this character. */
38677ace 643 ++*irreversible;
85830c4c
UD
644 continue;
645 }
646
8f25676c
SL
647 *inptrp = inptr;
648 *outptrp = outptr;
85830c4c
UD
649 return __GCONV_ILLEGAL_INPUT;
650 }
4a069c33 651
cdda3d7d
AJ
652 *((uint32_t *) outptr) = inval;
653 outptr += sizeof (uint32_t);
4a069c33
UD
654 }
655
656 *inptrp = inptr;
657 *outptrp = outptr;
658
659 /* Determine the status. */
fc08075d 660 if (*inptrp == inend)
4a069c33 661 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 662 else if (*inptrp + 4 > inend)
4a069c33 663 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
664 else
665 {
666 assert (*outptrp + 4 > outend);
667 result = __GCONV_FULL_OUTPUT;
668 }
4a069c33
UD
669
670 return result;
671}
672
27822ce6 673#if !_STRING_ARCH_unaligned
4a069c33 674static inline int
dd9423a6 675__attribute ((always_inline))
55985355
UD
676ucs4le_internal_loop_unaligned (struct __gconv_step *step,
677 struct __gconv_step_data *step_data,
678 const unsigned char **inptrp,
4a069c33 679 const unsigned char *inend,
4802be92
AS
680 unsigned char **outptrp,
681 const unsigned char *outend,
38677ace 682 size_t *irreversible)
4a069c33 683{
55985355 684 int flags = step_data->__flags;
4a069c33
UD
685 const unsigned char *inptr = *inptrp;
686 unsigned char *outptr = *outptrp;
687 size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
688 int result;
689 size_t cnt;
690
691 for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
692 {
a1ffb40e 693 if (__glibc_unlikely (inptr[3] > 0x80))
4a069c33 694 {
55985355
UD
695 /* The value is too large. We don't try transliteration here since
696 this is not an error because of the lack of possibilities to
697 represent the result. This is a genuine bug in the input since
698 UCS4 does not allow such values. */
0cdb4983
UD
699 if (irreversible == NULL)
700 /* We are transliterating, don't try to correct anything. */
701 return __GCONV_ILLEGAL_INPUT;
702
85830c4c
UD
703 if (flags & __GCONV_IGNORE_ERRORS)
704 {
705 /* Just ignore this character. */
38677ace 706 ++*irreversible;
85830c4c
UD
707 continue;
708 }
709
4a069c33
UD
710 *inptrp = inptr;
711 *outptrp = outptr;
9ea2c194 712 return __GCONV_ILLEGAL_INPUT;
4a069c33
UD
713 }
714
4a069c33
UD
715# if __BYTE_ORDER == __BIG_ENDIAN
716 outptr[3] = inptr[0];
717 outptr[2] = inptr[1];
718 outptr[1] = inptr[2];
719 outptr[0] = inptr[3];
720# else
721 outptr[0] = inptr[0];
722 outptr[1] = inptr[1];
723 outptr[2] = inptr[2];
724 outptr[3] = inptr[3];
725# endif
85830c4c
UD
726
727 outptr += 4;
4a069c33
UD
728 }
729
730 *inptrp = inptr;
731 *outptrp = outptr;
732
733 /* Determine the status. */
fc08075d 734 if (*inptrp == inend)
4a069c33 735 result = __GCONV_EMPTY_INPUT;
eb9dc2a2 736 else if (*inptrp + 4 > inend)
4a069c33 737 result = __GCONV_INCOMPLETE_INPUT;
eb9dc2a2
UD
738 else
739 {
740 assert (*outptrp + 4 > outend);
741 result = __GCONV_FULL_OUTPUT;
742 }
4a069c33
UD
743
744 return result;
745}
746#endif
747
748
749static inline int
dd9423a6 750__attribute ((always_inline))
55985355
UD
751ucs4le_internal_loop_single (struct __gconv_step *step,
752 struct __gconv_step_data *step_data,
753 const unsigned char **inptrp,
4a069c33 754 const unsigned char *inend,
4802be92
AS
755 unsigned char **outptrp,
756 const unsigned char *outend,
38677ace 757 size_t *irreversible)
4a069c33 758{
55985355
UD
759 mbstate_t *state = step_data->__statep;
760 int flags = step_data->__flags;
4a069c33
UD
761 size_t cnt = state->__count & 7;
762
763 while (*inptrp < inend && cnt < 4)
764 state->__value.__wchb[cnt++] = *(*inptrp)++;
765
a1ffb40e 766 if (__glibc_unlikely (cnt < 4))
4a069c33
UD
767 {
768 /* Still not enough bytes. Store the ones in the input buffer. */
769 state->__count &= ~7;
770 state->__count |= cnt;
771
772 return __GCONV_INCOMPLETE_INPUT;
773 }
774
db2d05f9
UD
775 if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
776 0))
85830c4c 777 {
55985355
UD
778 /* The value is too large. We don't try transliteration here since
779 this is not an error because of the lack of possibilities to
780 represent the result. This is a genuine bug in the input since
781 UCS4 does not allow such values. */
85830c4c
UD
782 if (!(flags & __GCONV_IGNORE_ERRORS))
783 return __GCONV_ILLEGAL_INPUT;
784 }
785 else
786 {
4a069c33 787#if __BYTE_ORDER == __BIG_ENDIAN
85830c4c
UD
788 (*outptrp)[0] = state->__value.__wchb[3];
789 (*outptrp)[1] = state->__value.__wchb[2];
790 (*outptrp)[2] = state->__value.__wchb[1];
791 (*outptrp)[3] = state->__value.__wchb[0];
9ea2c194 792#else
85830c4c
UD
793 (*outptrp)[0] = state->__value.__wchb[0];
794 (*outptrp)[1] = state->__value.__wchb[1];
795 (*outptrp)[2] = state->__value.__wchb[2];
796 (*outptrp)[3] = state->__value.__wchb[3];
4a069c33
UD
797#endif
798
85830c4c
UD
799 *outptrp += 4;
800 }
801
4a069c33
UD
802 /* Clear the state buffer. */
803 state->__count &= ~7;
804
805 return __GCONV_OK;
806}
807
808#include <iconv/skeleton.c>
809
810
8619129f
UD
811/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
812#define DEFINE_INIT 0
813#define DEFINE_FINI 0
814#define MIN_NEEDED_FROM 1
815#define MIN_NEEDED_TO 4
816#define FROM_DIRECTION 1
817#define FROM_LOOP ascii_internal_loop
818#define TO_LOOP ascii_internal_loop /* This is not used. */
819#define FUNCTION_NAME __gconv_transform_ascii_internal
fd1b5c0f 820#define ONE_DIRECTION 1
8619129f
UD
821
822#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
823#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
824#define LOOPFCT FROM_LOOP
825#define BODY \
826 { \
a1ffb40e 827 if (__glibc_unlikely (*inptr > '\x7f')) \
8619129f 828 { \
55985355
UD
829 /* The value is too large. We don't try transliteration here since \
830 this is not an error because of the lack of possibilities to \
831 represent the result. This is a genuine bug in the input since \
832 ASCII does not allow such values. */ \
e438a468 833 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
85830c4c
UD
834 } \
835 else \
5deca9bb
UD
836 { \
837 /* It's an one byte sequence. */ \
838 *((uint32_t *) outptr) = *inptr++; \
839 outptr += sizeof (uint32_t); \
840 } \
8619129f 841 }
55985355 842#define LOOP_NEED_FLAGS
8619129f
UD
843#include <iconv/loop.c>
844#include <iconv/skeleton.c>
845
846
847/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
848#define DEFINE_INIT 0
849#define DEFINE_FINI 0
850#define MIN_NEEDED_FROM 4
851#define MIN_NEEDED_TO 1
852#define FROM_DIRECTION 1
853#define FROM_LOOP internal_ascii_loop
854#define TO_LOOP internal_ascii_loop /* This is not used. */
855#define FUNCTION_NAME __gconv_transform_internal_ascii
fd1b5c0f 856#define ONE_DIRECTION 1
8619129f
UD
857
858#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
859#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
860#define LOOPFCT FROM_LOOP
861#define BODY \
862 { \
a1ffb40e 863 if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \
8619129f 864 { \
601d2942 865 UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
e438a468 866 STANDARD_TO_LOOP_ERR_HANDLER (4); \
85830c4c
UD
867 } \
868 else \
5deca9bb
UD
869 { \
870 /* It's an one byte sequence. */ \
871 *outptr++ = *((const uint32_t *) inptr); \
872 inptr += sizeof (uint32_t); \
873 } \
8619129f 874 }
55985355 875#define LOOP_NEED_FLAGS
8619129f
UD
876#include <iconv/loop.c>
877#include <iconv/skeleton.c>
878
879
880/* Convert from the internal (UCS4-like) format to UTF-8. */
881#define DEFINE_INIT 0
882#define DEFINE_FINI 0
883#define MIN_NEEDED_FROM 4
884#define MIN_NEEDED_TO 1
885#define MAX_NEEDED_TO 6
886#define FROM_DIRECTION 1
887#define FROM_LOOP internal_utf8_loop
888#define TO_LOOP internal_utf8_loop /* This is not used. */
889#define FUNCTION_NAME __gconv_transform_internal_utf8
fd1b5c0f 890#define ONE_DIRECTION 1
8619129f
UD
891
892#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
893#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
5aa8ff62 894#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
8619129f
UD
895#define LOOPFCT FROM_LOOP
896#define BODY \
897 { \
17427edd 898 uint32_t wc = *((const uint32_t *) inptr); \
8619129f 899 \
a1ffb40e 900 if (__glibc_likely (wc < 0x80)) \
8619129f
UD
901 /* It's an one byte sequence. */ \
902 *outptr++ = (unsigned char) wc; \
7ab1de21
SL
903 else if (__glibc_likely (wc <= 0x7fffffff \
904 && (wc < 0xd800 || wc > 0xdfff))) \
8619129f
UD
905 { \
906 size_t step; \
3cc4a097 907 unsigned char *start; \
8619129f
UD
908 \
909 for (step = 2; step < 6; ++step) \
b79f74cd 910 if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
8619129f
UD
911 break; \
912 \
a1ffb40e 913 if (__glibc_unlikely (outptr + step > outend)) \
8619129f
UD
914 { \
915 /* Too long. */ \
d64b6ad0 916 result = __GCONV_FULL_OUTPUT; \
8619129f
UD
917 break; \
918 } \
919 \
920 start = outptr; \
b79f74cd 921 *outptr = (unsigned char) (~0xff >> step); \
8619129f 922 outptr += step; \
8619129f
UD
923 do \
924 { \
347bace2 925 start[--step] = 0x80 | (wc & 0x3f); \
8619129f
UD
926 wc >>= 6; \
927 } \
347bace2 928 while (step > 1); \
8619129f 929 start[0] |= wc; \
db2d05f9
UD
930 } \
931 else \
932 { \
e438a468 933 STANDARD_TO_LOOP_ERR_HANDLER (4); \
8619129f
UD
934 } \
935 \
936 inptr += 4; \
937 }
db2d05f9 938#define LOOP_NEED_FLAGS
8619129f
UD
939#include <iconv/loop.c>
940#include <iconv/skeleton.c>
941
942
943/* Convert from UTF-8 to the internal (UCS4-like) format. */
944#define DEFINE_INIT 0
945#define DEFINE_FINI 0
946#define MIN_NEEDED_FROM 1
947#define MAX_NEEDED_FROM 6
948#define MIN_NEEDED_TO 4
949#define FROM_DIRECTION 1
950#define FROM_LOOP utf8_internal_loop
951#define TO_LOOP utf8_internal_loop /* This is not used. */
952#define FUNCTION_NAME __gconv_transform_utf8_internal
fd1b5c0f 953#define ONE_DIRECTION 1
8619129f
UD
954
955#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
5aa8ff62 956#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
8619129f
UD
957#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
958#define LOOPFCT FROM_LOOP
959#define BODY \
960 { \
8619129f 961 /* Next input byte. */ \
26a51060 962 uint32_t ch = *inptr; \
8619129f 963 \
a1ffb40e 964 if (__glibc_likely (ch < 0x80)) \
8619129f 965 { \
5aa8ff62 966 /* One byte sequence. */ \
5aa8ff62 967 ++inptr; \
8619129f
UD
968 } \
969 else \
970 { \
26a51060
UD
971 uint_fast32_t cnt; \
972 uint_fast32_t i; \
973 \
9ea2c194 974 if (ch >= 0xc2 && ch < 0xe0) \
5aa8ff62 975 { \
9ea2c194
AJ
976 /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
977 otherwise the wide character could have been represented \
978 using a single byte. */ \
5aa8ff62
UD
979 cnt = 2; \
980 ch &= 0x1f; \
981 } \
a1ffb40e 982 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
5aa8ff62
UD
983 { \
984 /* We expect three bytes. */ \
985 cnt = 3; \
986 ch &= 0x0f; \
987 } \
a1ffb40e 988 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
5aa8ff62
UD
989 { \
990 /* We expect four bytes. */ \
991 cnt = 4; \
992 ch &= 0x07; \
993 } \
a1ffb40e 994 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
5aa8ff62
UD
995 { \
996 /* We expect five bytes. */ \
997 cnt = 5; \
998 ch &= 0x03; \
999 } \
a1ffb40e 1000 else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \
5aa8ff62
UD
1001 { \
1002 /* We expect six bytes. */ \
1003 cnt = 6; \
1004 ch &= 0x01; \
1005 } \
1006 else \
8619129f 1007 { \
85830c4c
UD
1008 /* Search the end of this ill-formed UTF-8 character. This \
1009 is the next byte with (x & 0xc0) != 0x80. */ \
347bace2 1010 i = 0; \
e438a468 1011 do \
347bace2
UD
1012 ++i; \
1013 while (inptr + i < inend \
1014 && (*(inptr + i) & 0xc0) == 0x80 \
1015 && i < 5); \
85830c4c 1016 \
347bace2
UD
1017 errout: \
1018 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
8619129f
UD
1019 } \
1020 \
a1ffb40e 1021 if (__glibc_unlikely (inptr + cnt > inend)) \
5aa8ff62 1022 { \
fd1b5c0f
UD
1023 /* We don't have enough input. But before we report that check \
1024 that all the bytes are correct. */ \
1025 for (i = 1; inptr + i < inend; ++i) \
1026 if ((inptr[i] & 0xc0) != 0x80) \
1027 break; \
85830c4c 1028 \
a1ffb40e 1029 if (__glibc_likely (inptr + i == inend)) \
85830c4c
UD
1030 { \
1031 result = __GCONV_INCOMPLETE_INPUT; \
1032 break; \
1033 } \
1034 \
347bace2 1035 goto errout; \
5aa8ff62
UD
1036 } \
1037 \
1038 /* Read the possible remaining bytes. */ \
1039 for (i = 1; i < cnt; ++i) \
1040 { \
1041 uint32_t byte = inptr[i]; \
1042 \
1043 if ((byte & 0xc0) != 0x80) \
bd32e4a6
UD
1044 /* This is an illegal encoding. */ \
1045 break; \
5aa8ff62
UD
1046 \
1047 ch <<= 6; \
1048 ch |= byte & 0x3f; \
1049 } \
85830c4c 1050 \
bd32e4a6
UD
1051 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
1052 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
1053 have been represented with fewer than cnt bytes. */ \
9c32c895
UD
1054 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
1055 /* Do not accept UTF-16 surrogates. */ \
1056 || (ch >= 0xd800 && ch <= 0xdfff)) \
bd32e4a6
UD
1057 { \
1058 /* This is an illegal encoding. */ \
347bace2 1059 goto errout; \
bd32e4a6
UD
1060 } \
1061 \
5aa8ff62 1062 inptr += cnt; \
8619129f
UD
1063 } \
1064 \
1065 /* Now adjust the pointers and store the result. */ \
cdda3d7d
AJ
1066 *((uint32_t *) outptr) = ch; \
1067 outptr += sizeof (uint32_t); \
8619129f 1068 }
55985355 1069#define LOOP_NEED_FLAGS
fd1b5c0f
UD
1070
1071#define STORE_REST \
1072 { \
1073 /* We store the remaining bytes while converting them into the UCS4 \
1074 format. We can assume that the first byte in the buffer is \
1075 correct and that it requires a larger number of bytes than there \
1076 are in the input buffer. */ \
1077 wint_t ch = **inptrp; \
ea31b613 1078 size_t cnt, r; \
fd1b5c0f
UD
1079 \
1080 state->__count = inend - *inptrp; \
1081 \
9954432e 1082 assert (ch != 0xc0 && ch != 0xc1); \
fd1b5c0f
UD
1083 if (ch >= 0xc2 && ch < 0xe0) \
1084 { \
1085 /* We expect two bytes. The first byte cannot be 0xc0 or \
1086 0xc1, otherwise the wide character could have been \
1087 represented using a single byte. */ \
1088 cnt = 2; \
1089 ch &= 0x1f; \
1090 } \
a1ffb40e 1091 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
fd1b5c0f
UD
1092 { \
1093 /* We expect three bytes. */ \
1094 cnt = 3; \
1095 ch &= 0x0f; \
1096 } \
a1ffb40e 1097 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
fd1b5c0f
UD
1098 { \
1099 /* We expect four bytes. */ \
1100 cnt = 4; \
1101 ch &= 0x07; \
1102 } \
a1ffb40e 1103 else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
fd1b5c0f
UD
1104 { \
1105 /* We expect five bytes. */ \
1106 cnt = 5; \
1107 ch &= 0x03; \
1108 } \
1109 else \
1110 { \
1111 /* We expect six bytes. */ \
1112 cnt = 6; \
1113 ch &= 0x01; \
1114 } \
1115 \
1116 /* The first byte is already consumed. */ \
ea31b613 1117 r = cnt - 1; \
fd1b5c0f
UD
1118 while (++(*inptrp) < inend) \
1119 { \
1120 ch <<= 6; \
1121 ch |= **inptrp & 0x3f; \
ea31b613 1122 --r; \
fd1b5c0f
UD
1123 } \
1124 \
1125 /* Shift for the so far missing bytes. */ \
ea31b613
UD
1126 ch <<= r * 6; \
1127 \
1128 /* Store the number of bytes expected for the entire sequence. */ \
41f112ad 1129 state->__count |= cnt << 8; \
fd1b5c0f
UD
1130 \
1131 /* Store the value. */ \
1132 state->__value.__wch = ch; \
1133 }
1134
1135#define UNPACK_BYTES \
1136 { \
ea31b613 1137 static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
fd1b5c0f 1138 wint_t wch = state->__value.__wch; \
41f112ad 1139 size_t ntotal = state->__count >> 8; \
ea31b613 1140 \
41f112ad 1141 inlen = state->__count & 255; \
fd1b5c0f 1142 \
ea31b613 1143 bytebuf[0] = inmask[ntotal - 2]; \
fd1b5c0f 1144 \
cd201e38
UD
1145 do \
1146 { \
1147 if (--ntotal < inlen) \
1148 bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
1149 wch >>= 6; \
1150 } \
1151 while (ntotal > 1); \
fd1b5c0f
UD
1152 \
1153 bytebuf[0] |= wch; \
1154 }
1155
41f112ad
UD
1156#define CLEAR_STATE \
1157 state->__count = 0
1158
1159
8619129f
UD
1160#include <iconv/loop.c>
1161#include <iconv/skeleton.c>
1162
1163
1164/* Convert from UCS2 to the internal (UCS4-like) format. */
1165#define DEFINE_INIT 0
1166#define DEFINE_FINI 0
1167#define MIN_NEEDED_FROM 2
1168#define MIN_NEEDED_TO 4
1169#define FROM_DIRECTION 1
1170#define FROM_LOOP ucs2_internal_loop
1171#define TO_LOOP ucs2_internal_loop /* This is not used. */
1172#define FUNCTION_NAME __gconv_transform_ucs2_internal
fd1b5c0f 1173#define ONE_DIRECTION 1
8619129f
UD
1174
1175#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1176#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1177#define LOOPFCT FROM_LOOP
428bcea4 1178#define BODY \
755104ed 1179 { \
606135cf 1180 uint16_t u1 = get16 (inptr); \
755104ed 1181 \
a1ffb40e 1182 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
755104ed
UD
1183 { \
1184 /* Surrogate characters in UCS-2 input are not valid. Reject \
1185 them. (Catching this here is not security relevant.) */ \
e438a468 1186 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
755104ed
UD
1187 } \
1188 \
cdda3d7d
AJ
1189 *((uint32_t *) outptr) = u1; \
1190 outptr += sizeof (uint32_t); \
755104ed
UD
1191 inptr += 2; \
1192 }
1193#define LOOP_NEED_FLAGS
8619129f
UD
1194#include <iconv/loop.c>
1195#include <iconv/skeleton.c>
1196
1197
1198/* Convert from the internal (UCS4-like) format to UCS2. */
1199#define DEFINE_INIT 0
1200#define DEFINE_FINI 0
1201#define MIN_NEEDED_FROM 4
1202#define MIN_NEEDED_TO 2
1203#define FROM_DIRECTION 1
1204#define FROM_LOOP internal_ucs2_loop
1205#define TO_LOOP internal_ucs2_loop /* This is not used. */
1206#define FUNCTION_NAME __gconv_transform_internal_ucs2
fd1b5c0f 1207#define ONE_DIRECTION 1
8619129f
UD
1208
1209#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1210#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1211#define LOOPFCT FROM_LOOP
428bcea4 1212#define BODY \
8619129f 1213 { \
17427edd 1214 uint32_t val = *((const uint32_t *) inptr); \
755104ed 1215 \
a1ffb40e 1216 if (__glibc_unlikely (val >= 0x10000)) \
8619129f 1217 { \
601d2942 1218 UNICODE_TAG_HANDLER (val, 4); \
e438a468 1219 STANDARD_TO_LOOP_ERR_HANDLER (4); \
8619129f 1220 } \
a1ffb40e 1221 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
755104ed
UD
1222 { \
1223 /* Surrogate characters in UCS-4 input are not valid. \
1224 We must catch this, because the UCS-2 output might be \
1225 interpreted as UTF-16 by other programs. If we let \
1226 surrogates pass through, attackers could make a security \
1227 hole exploit by synthesizing any desired plane 1-16 \
1228 character. */ \
e438a468 1229 result = __GCONV_ILLEGAL_INPUT; \
755104ed 1230 if (! ignore_errors_p ()) \
e438a468 1231 break; \
755104ed
UD
1232 inptr += 4; \
1233 ++*irreversible; \
1234 continue; \
1235 } \
9ea2c194 1236 else \
755104ed 1237 { \
606135cf 1238 put16 (outptr, val); \
db6af3eb 1239 outptr += sizeof (uint16_t); \
755104ed
UD
1240 inptr += 4; \
1241 } \
8619129f 1242 }
55985355 1243#define LOOP_NEED_FLAGS
8619129f
UD
1244#include <iconv/loop.c>
1245#include <iconv/skeleton.c>
9b26f5c4
UD
1246
1247
428bcea4 1248/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
9b26f5c4
UD
1249#define DEFINE_INIT 0
1250#define DEFINE_FINI 0
1251#define MIN_NEEDED_FROM 2
1252#define MIN_NEEDED_TO 4
1253#define FROM_DIRECTION 1
428bcea4
UD
1254#define FROM_LOOP ucs2reverse_internal_loop
1255#define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/
8d617a71 1256#define FUNCTION_NAME __gconv_transform_ucs2reverse_internal
fd1b5c0f 1257#define ONE_DIRECTION 1
9b26f5c4
UD
1258
1259#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1260#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1261#define LOOPFCT FROM_LOOP
428bcea4 1262#define BODY \
755104ed 1263 { \
606135cf 1264 uint16_t u1 = bswap_16 (get16 (inptr)); \
755104ed 1265 \
a1ffb40e 1266 if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
755104ed
UD
1267 { \
1268 /* Surrogate characters in UCS-2 input are not valid. Reject \
1269 them. (Catching this here is not security relevant.) */ \
1270 if (! ignore_errors_p ()) \
1271 { \
1272 result = __GCONV_ILLEGAL_INPUT; \
1273 break; \
1274 } \
1275 inptr += 2; \
1276 ++*irreversible; \
1277 continue; \
1278 } \
1279 \
cdda3d7d
AJ
1280 *((uint32_t *) outptr) = u1; \
1281 outptr += sizeof (uint32_t); \
755104ed
UD
1282 inptr += 2; \
1283 }
1284#define LOOP_NEED_FLAGS
9b26f5c4
UD
1285#include <iconv/loop.c>
1286#include <iconv/skeleton.c>
1287
1288
428bcea4 1289/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
9b26f5c4
UD
1290#define DEFINE_INIT 0
1291#define DEFINE_FINI 0
1292#define MIN_NEEDED_FROM 4
1293#define MIN_NEEDED_TO 2
1294#define FROM_DIRECTION 1
428bcea4
UD
1295#define FROM_LOOP internal_ucs2reverse_loop
1296#define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/
8d617a71 1297#define FUNCTION_NAME __gconv_transform_internal_ucs2reverse
fd1b5c0f 1298#define ONE_DIRECTION 1
9b26f5c4
UD
1299
1300#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
1301#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
1302#define LOOPFCT FROM_LOOP
428bcea4 1303#define BODY \
9b26f5c4 1304 { \
17427edd 1305 uint32_t val = *((const uint32_t *) inptr); \
a1ffb40e 1306 if (__glibc_unlikely (val >= 0x10000)) \
9b26f5c4 1307 { \
601d2942 1308 UNICODE_TAG_HANDLER (val, 4); \
e438a468 1309 STANDARD_TO_LOOP_ERR_HANDLER (4); \
9b26f5c4 1310 } \
a1ffb40e 1311 else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
755104ed
UD
1312 { \
1313 /* Surrogate characters in UCS-4 input are not valid. \
1314 We must catch this, because the UCS-2 output might be \
1315 interpreted as UTF-16 by other programs. If we let \
1316 surrogates pass through, attackers could make a security \
1317 hole exploit by synthesizing any desired plane 1-16 \
1318 character. */ \
1319 if (! ignore_errors_p ()) \
1320 { \
1321 result = __GCONV_ILLEGAL_INPUT; \
1322 break; \
1323 } \
1324 inptr += 4; \
1325 ++*irreversible; \
1326 continue; \
1327 } \
9ea2c194 1328 else \
755104ed 1329 { \
606135cf 1330 put16 (outptr, bswap_16 (val)); \
cdda3d7d 1331 outptr += sizeof (uint16_t); \
755104ed
UD
1332 inptr += 4; \
1333 } \
9b26f5c4 1334 }
55985355 1335#define LOOP_NEED_FLAGS
9b26f5c4
UD
1336#include <iconv/loop.c>
1337#include <iconv/skeleton.c>