]>
Commit | Line | Data |
---|---|---|
6973fc01 | 1 | /* Simple transformations functions. |
c4f66413 | 2 | Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. |
6973fc01 UD |
3 | This file is part of the GNU C Library. |
4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Library General Public License as | |
8 | published by the Free Software Foundation; either version 2 of the | |
9 | License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Library General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Library General Public | |
17 | License along with the GNU C Library; see the file COPYING.LIB. If not, | |
18 | write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
19 | Boston, MA 02111-1307, USA. */ | |
20 | ||
f1fa8b68 | 21 | #include <byteswap.h> |
55985355 | 22 | #include <dlfcn.h> |
f1fa8b68 | 23 | #include <endian.h> |
f4017d20 | 24 | #include <errno.h> |
6973fc01 | 25 | #include <gconv.h> |
d2374599 | 26 | #include <stdint.h> |
6973fc01 UD |
27 | #include <stdlib.h> |
28 | #include <string.h> | |
29 | #include <wchar.h> | |
30 | #include <sys/param.h> | |
31 | ||
17427edd UD |
32 | #define BUILTIN_ALIAS(s1, s2) /* nothing */ |
33 | #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, Init, End, MinF, \ | |
34 | MaxF, MinT, MaxT) \ | |
35 | extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \ | |
36 | __const unsigned char **, __const unsigned char *, \ | |
37 | unsigned char **, size_t *, int, int); | |
38 | #include "gconv_builtin.h" | |
39 | ||
40 | ||
a904b5d9 UD |
41 | #ifndef EILSEQ |
42 | # define EILSEQ EINVAL | |
43 | #endif | |
44 | ||
45 | ||
f1fa8b68 UD |
46 | /* Transform from the internal, UCS4-like format, to UCS4. The |
47 | difference between the internal ucs4 format and the real UCS4 | |
48 | format is, if any, the endianess. The Unicode/ISO 10646 says that | |
49 | unless some higher protocol specifies it differently, the byte | |
50 | order is big endian.*/ | |
8619129f UD |
51 | #define DEFINE_INIT 0 |
52 | #define DEFINE_FINI 0 | |
53 | #define MIN_NEEDED_FROM 4 | |
54 | #define MIN_NEEDED_TO 4 | |
55 | #define FROM_DIRECTION 1 | |
56 | #define FROM_LOOP internal_ucs4_loop | |
57 | #define TO_LOOP internal_ucs4_loop /* This is not used. */ | |
58 | #define FUNCTION_NAME __gconv_transform_internal_ucs4 | |
59 | ||
60 | ||
61 | static inline int | |
55985355 UD |
62 | internal_ucs4_loop (struct __gconv_step *step, |
63 | struct __gconv_step_data *step_data, | |
64 | const unsigned char **inptrp, const unsigned char *inend, | |
8619129f | 65 | unsigned char **outptrp, unsigned char *outend, |
38677ace | 66 | size_t *irreversible) |
4bca4c17 | 67 | { |
8619129f UD |
68 | const unsigned char *inptr = *inptrp; |
69 | unsigned char *outptr = *outptrp; | |
70 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
4bca4c17 UD |
71 | int result; |
72 | ||
f1fa8b68 | 73 | #if __BYTE_ORDER == __LITTLE_ENDIAN |
8619129f UD |
74 | /* Sigh, we have to do some real work. */ |
75 | size_t cnt; | |
f1fa8b68 | 76 | |
fdf19bf7 | 77 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) |
17427edd | 78 | *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr); |
f1fa8b68 | 79 | |
8619129f UD |
80 | *inptrp = inptr; |
81 | *outptrp = outptr; | |
f1fa8b68 | 82 | #elif __BYTE_ORDER == __BIG_ENDIAN |
8619129f UD |
83 | /* Simply copy the data. */ |
84 | *inptrp = inptr + n_convert * 4; | |
85 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
f1fa8b68 UD |
86 | #else |
87 | # error "This endianess is not supported." | |
88 | #endif | |
89 | ||
8619129f | 90 | /* Determine the status. */ |
1336419e | 91 | if (*inptrp == inend) |
d64b6ad0 | 92 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 93 | else if (*outptrp + 4 > outend) |
1336419e | 94 | result = __GCONV_FULL_OUTPUT; |
6973fc01 | 95 | else |
d64b6ad0 | 96 | result = __GCONV_INCOMPLETE_INPUT; |
6973fc01 | 97 | |
f43ce637 | 98 | return result; |
6973fc01 | 99 | } |
d2374599 | 100 | |
c1db8b0d UD |
101 | #ifndef _STRING_ARCH_unaligned |
102 | static inline int | |
55985355 UD |
103 | internal_ucs4_loop_unaligned (struct __gconv_step *step, |
104 | struct __gconv_step_data *step_data, | |
105 | const unsigned char **inptrp, | |
c1db8b0d UD |
106 | const unsigned char *inend, |
107 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 108 | size_t *irreversible) |
c1db8b0d UD |
109 | { |
110 | const unsigned char *inptr = *inptrp; | |
111 | unsigned char *outptr = *outptrp; | |
112 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
113 | int result; | |
114 | ||
115 | # if __BYTE_ORDER == __LITTLE_ENDIAN | |
116 | /* Sigh, we have to do some real work. */ | |
117 | size_t cnt; | |
118 | ||
119 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) | |
120 | { | |
121 | outptr[0] = inptr[3]; | |
122 | outptr[1] = inptr[2]; | |
123 | outptr[2] = inptr[1]; | |
124 | outptr[3] = inptr[0]; | |
125 | } | |
126 | ||
127 | *inptrp = inptr; | |
128 | *outptrp = outptr; | |
129 | # elif __BYTE_ORDER == __BIG_ENDIAN | |
130 | /* Simply copy the data. */ | |
131 | *inptrp = inptr + n_convert * 4; | |
132 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
133 | # else | |
134 | # error "This endianess is not supported." | |
135 | # endif | |
136 | ||
137 | /* Determine the status. */ | |
eacde9d0 | 138 | if (*inptrp == inend) |
c1db8b0d | 139 | result = __GCONV_EMPTY_INPUT; |
eacde9d0 UD |
140 | else if (*outptrp + 4 > outend) |
141 | result = __GCONV_FULL_OUTPUT; | |
c1db8b0d UD |
142 | else |
143 | result = __GCONV_INCOMPLETE_INPUT; | |
144 | ||
145 | return result; | |
146 | } | |
147 | #endif | |
148 | ||
fd1b5c0f UD |
149 | |
150 | static inline int | |
55985355 UD |
151 | internal_ucs4_loop_single (struct __gconv_step *step, |
152 | struct __gconv_step_data *step_data, | |
153 | const unsigned char **inptrp, | |
fd1b5c0f UD |
154 | const unsigned char *inend, |
155 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 156 | size_t *irreversible) |
fd1b5c0f | 157 | { |
55985355 | 158 | mbstate_t *state = step_data->__statep; |
fd1b5c0f UD |
159 | size_t cnt = state->__count & 7; |
160 | ||
161 | while (*inptrp < inend && cnt < 4) | |
162 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
163 | ||
365afefc | 164 | if (__builtin_expect (cnt, 4) < 4) |
fd1b5c0f UD |
165 | { |
166 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
167 | state->__count &= ~7; | |
168 | state->__count |= cnt; | |
169 | ||
170 | return __GCONV_INCOMPLETE_INPUT; | |
171 | } | |
172 | ||
173 | #if __BYTE_ORDER == __LITTLE_ENDIAN | |
174 | (*outptrp)[0] = state->__value.__wchb[3]; | |
175 | (*outptrp)[1] = state->__value.__wchb[2]; | |
176 | (*outptrp)[2] = state->__value.__wchb[1]; | |
177 | (*outptrp)[3] = state->__value.__wchb[0]; | |
85830c4c UD |
178 | |
179 | *outptrp += 4; | |
fd1b5c0f UD |
180 | #elif __BYTE_ORDER == __BIG_ENDIAN |
181 | /* XXX unaligned */ | |
182 | *(*((uint32_t **) outptrp)++) = state->__value.__wch; | |
183 | #else | |
184 | # error "This endianess is not supported." | |
185 | #endif | |
186 | ||
187 | /* Clear the state buffer. */ | |
188 | state->__count &= ~7; | |
189 | ||
190 | return __GCONV_OK; | |
191 | } | |
192 | ||
8619129f | 193 | #include <iconv/skeleton.c> |
d2374599 | 194 | |
d2374599 | 195 | |
4a069c33 UD |
196 | /* Transform from UCS4 to the internal, UCS4-like format. Unlike |
197 | for the other direction we have to check for correct values here. */ | |
198 | #define DEFINE_INIT 0 | |
199 | #define DEFINE_FINI 0 | |
200 | #define MIN_NEEDED_FROM 4 | |
201 | #define MIN_NEEDED_TO 4 | |
202 | #define FROM_DIRECTION 1 | |
203 | #define FROM_LOOP ucs4_internal_loop | |
204 | #define TO_LOOP ucs4_internal_loop /* This is not used. */ | |
205 | #define FUNCTION_NAME __gconv_transform_ucs4_internal | |
206 | ||
207 | ||
208 | static inline int | |
55985355 UD |
209 | ucs4_internal_loop (struct __gconv_step *step, |
210 | struct __gconv_step_data *step_data, | |
211 | const unsigned char **inptrp, const unsigned char *inend, | |
4a069c33 | 212 | unsigned char **outptrp, unsigned char *outend, |
38677ace | 213 | size_t *irreversible) |
4a069c33 | 214 | { |
55985355 | 215 | int flags = step_data->__flags; |
4a069c33 UD |
216 | const unsigned char *inptr = *inptrp; |
217 | unsigned char *outptr = *outptrp; | |
218 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
219 | int result; | |
220 | size_t cnt; | |
221 | ||
222 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) | |
223 | { | |
224 | uint32_t inval; | |
225 | ||
226 | #if __BYTE_ORDER == __LITTLE_ENDIAN | |
17427edd | 227 | inval = bswap_32 (*(const uint32_t *) inptr); |
4a069c33 | 228 | #else |
17427edd | 229 | inval = *(const uint32_t *) inptr; |
4a069c33 UD |
230 | #endif |
231 | ||
365afefc | 232 | if (__builtin_expect (inval, 0) > 0x7fffffff) |
4a069c33 | 233 | { |
55985355 UD |
234 | /* The value is too large. We don't try transliteration here since |
235 | this is not an error because of the lack of possibilities to | |
236 | represent the result. This is a genuine bug in the input since | |
237 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
238 | if (irreversible == NULL) |
239 | /* We are transliterating, don't try to correct anything. */ | |
240 | return __GCONV_ILLEGAL_INPUT; | |
241 | ||
85830c4c UD |
242 | if (flags & __GCONV_IGNORE_ERRORS) |
243 | { | |
244 | /* Just ignore this character. */ | |
38677ace | 245 | ++*irreversible; |
85830c4c UD |
246 | continue; |
247 | } | |
248 | ||
4a069c33 UD |
249 | *inptrp = inptr; |
250 | *outptrp = outptr; | |
251 | return __GCONV_ILLEGAL_INPUT; | |
252 | } | |
253 | ||
3593973b | 254 | *((uint32_t *) outptr)++ = inval; |
4a069c33 UD |
255 | } |
256 | ||
257 | *inptrp = inptr; | |
258 | *outptrp = outptr; | |
259 | ||
260 | /* Determine the status. */ | |
fc08075d | 261 | if (*inptrp == inend) |
4a069c33 | 262 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 263 | else if (*outptrp + 4 > outend) |
fc08075d | 264 | result = __GCONV_FULL_OUTPUT; |
4a069c33 UD |
265 | else |
266 | result = __GCONV_INCOMPLETE_INPUT; | |
267 | ||
268 | return result; | |
269 | } | |
270 | ||
271 | #ifndef _STRING_ARCH_unaligned | |
272 | static inline int | |
55985355 UD |
273 | ucs4_internal_loop_unaligned (struct __gconv_step *step, |
274 | struct __gconv_step_data *step_data, | |
275 | const unsigned char **inptrp, | |
4a069c33 UD |
276 | const unsigned char *inend, |
277 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 278 | size_t *irreversible) |
4a069c33 | 279 | { |
55985355 | 280 | int flags = step_data->__flags; |
4a069c33 UD |
281 | const unsigned char *inptr = *inptrp; |
282 | unsigned char *outptr = *outptrp; | |
283 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
284 | int result; | |
285 | size_t cnt; | |
286 | ||
55985355 | 287 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) |
4a069c33 | 288 | { |
365afefc | 289 | if (__builtin_expect (inptr[0], 0) > 0x80) |
4a069c33 | 290 | { |
55985355 UD |
291 | /* The value is too large. We don't try transliteration here since |
292 | this is not an error because of the lack of possibilities to | |
293 | represent the result. This is a genuine bug in the input since | |
294 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
295 | if (irreversible == NULL) |
296 | /* We are transliterating, don't try to correct anything. */ | |
297 | return __GCONV_ILLEGAL_INPUT; | |
298 | ||
85830c4c UD |
299 | if (flags & __GCONV_IGNORE_ERRORS) |
300 | { | |
301 | /* Just ignore this character. */ | |
38677ace | 302 | ++*irreversible; |
85830c4c UD |
303 | continue; |
304 | } | |
305 | ||
4a069c33 UD |
306 | *inptrp = inptr; |
307 | *outptrp = outptr; | |
308 | return __GCONV_ILLEGAL_INPUT; | |
309 | } | |
310 | ||
4a069c33 UD |
311 | # if __BYTE_ORDER == __LITTLE_ENDIAN |
312 | outptr[3] = inptr[0]; | |
313 | outptr[2] = inptr[1]; | |
314 | outptr[1] = inptr[2]; | |
315 | outptr[0] = inptr[3]; | |
316 | # else | |
317 | outptr[0] = inptr[0]; | |
318 | outptr[1] = inptr[1]; | |
319 | outptr[2] = inptr[2]; | |
320 | outptr[3] = inptr[3]; | |
321 | # endif | |
55985355 | 322 | outptr += 4; |
4a069c33 UD |
323 | } |
324 | ||
325 | *inptrp = inptr; | |
326 | *outptrp = outptr; | |
327 | ||
328 | /* Determine the status. */ | |
fc08075d | 329 | if (*inptrp == inend) |
4a069c33 | 330 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 331 | else if (*outptrp + 4 > outend) |
fc08075d | 332 | result = __GCONV_FULL_OUTPUT; |
4a069c33 UD |
333 | else |
334 | result = __GCONV_INCOMPLETE_INPUT; | |
335 | ||
336 | return result; | |
337 | } | |
338 | #endif | |
339 | ||
340 | ||
341 | static inline int | |
55985355 UD |
342 | ucs4_internal_loop_single (struct __gconv_step *step, |
343 | struct __gconv_step_data *step_data, | |
344 | const unsigned char **inptrp, | |
4a069c33 UD |
345 | const unsigned char *inend, |
346 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 347 | size_t *irreversible) |
4a069c33 | 348 | { |
55985355 UD |
349 | mbstate_t *state = step_data->__statep; |
350 | int flags = step_data->__flags; | |
4a069c33 UD |
351 | size_t cnt = state->__count & 7; |
352 | ||
353 | while (*inptrp < inend && cnt < 4) | |
354 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
355 | ||
365afefc | 356 | if (__builtin_expect (cnt, 4) < 4) |
4a069c33 UD |
357 | { |
358 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
359 | state->__count &= ~7; | |
360 | state->__count |= cnt; | |
361 | ||
362 | return __GCONV_INCOMPLETE_INPUT; | |
363 | } | |
364 | ||
365afefc UD |
365 | if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0], 0) |
366 | > 0x80) | |
85830c4c | 367 | { |
55985355 UD |
368 | /* The value is too large. We don't try transliteration here since |
369 | this is not an error because of the lack of possibilities to | |
370 | represent the result. This is a genuine bug in the input since | |
371 | UCS4 does not allow such values. */ | |
85830c4c | 372 | if (!(flags & __GCONV_IGNORE_ERRORS)) |
3593973b UD |
373 | { |
374 | *inptrp -= cnt - (state->__count & 7); | |
375 | return __GCONV_ILLEGAL_INPUT; | |
376 | } | |
85830c4c UD |
377 | } |
378 | else | |
379 | { | |
4a069c33 | 380 | #if __BYTE_ORDER == __LITTLE_ENDIAN |
85830c4c UD |
381 | (*outptrp)[0] = state->__value.__wchb[3]; |
382 | (*outptrp)[1] = state->__value.__wchb[2]; | |
383 | (*outptrp)[2] = state->__value.__wchb[1]; | |
384 | (*outptrp)[3] = state->__value.__wchb[0]; | |
4a069c33 | 385 | #elif __BYTE_ORDER == __BIG_ENDIAN |
85830c4c UD |
386 | (*outptrp)[0] = state->__value.__wchb[0]; |
387 | (*outptrp)[1] = state->__value.__wchb[1]; | |
388 | (*outptrp)[2] = state->__value.__wchb[2]; | |
389 | (*outptrp)[3] = state->__value.__wchb[3]; | |
4a069c33 UD |
390 | #endif |
391 | ||
85830c4c UD |
392 | *outptrp += 4; |
393 | } | |
394 | ||
4a069c33 UD |
395 | /* Clear the state buffer. */ |
396 | state->__count &= ~7; | |
397 | ||
398 | return __GCONV_OK; | |
399 | } | |
400 | ||
401 | #include <iconv/skeleton.c> | |
402 | ||
403 | ||
404 | /* Similarly for the little endian form. */ | |
8d617a71 UD |
405 | #define DEFINE_INIT 0 |
406 | #define DEFINE_FINI 0 | |
407 | #define MIN_NEEDED_FROM 4 | |
408 | #define MIN_NEEDED_TO 4 | |
409 | #define FROM_DIRECTION 1 | |
410 | #define FROM_LOOP internal_ucs4le_loop | |
411 | #define TO_LOOP internal_ucs4le_loop /* This is not used. */ | |
412 | #define FUNCTION_NAME __gconv_transform_internal_ucs4le | |
413 | ||
414 | ||
415 | static inline int | |
55985355 UD |
416 | internal_ucs4le_loop (struct __gconv_step *step, |
417 | struct __gconv_step_data *step_data, | |
418 | const unsigned char **inptrp, const unsigned char *inend, | |
8d617a71 | 419 | unsigned char **outptrp, unsigned char *outend, |
38677ace | 420 | size_t *irreversible) |
8d617a71 UD |
421 | { |
422 | const unsigned char *inptr = *inptrp; | |
423 | unsigned char *outptr = *outptrp; | |
424 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
425 | int result; | |
426 | ||
427 | #if __BYTE_ORDER == __BIG_ENDIAN | |
428 | /* Sigh, we have to do some real work. */ | |
429 | size_t cnt; | |
430 | ||
431 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) | |
17427edd | 432 | *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr); |
8d617a71 UD |
433 | |
434 | *inptrp = inptr; | |
435 | *outptrp = outptr; | |
436 | #elif __BYTE_ORDER == __LITTLE_ENDIAN | |
437 | /* Simply copy the data. */ | |
438 | *inptrp = inptr + n_convert * 4; | |
439 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
440 | #else | |
441 | # error "This endianess is not supported." | |
442 | #endif | |
443 | ||
444 | /* Determine the status. */ | |
fc08075d | 445 | if (*inptrp == inend) |
8d617a71 | 446 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 447 | else if (*outptrp + 4 > outend) |
fc08075d | 448 | result = __GCONV_FULL_OUTPUT; |
8d617a71 UD |
449 | else |
450 | result = __GCONV_INCOMPLETE_INPUT; | |
451 | ||
8d617a71 UD |
452 | return result; |
453 | } | |
454 | ||
c1db8b0d UD |
455 | #ifndef _STRING_ARCH_unaligned |
456 | static inline int | |
55985355 UD |
457 | internal_ucs4le_loop_unaligned (struct __gconv_step *step, |
458 | struct __gconv_step_data *step_data, | |
459 | const unsigned char **inptrp, | |
c1db8b0d UD |
460 | const unsigned char *inend, |
461 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 462 | size_t *irreversible) |
c1db8b0d UD |
463 | { |
464 | const unsigned char *inptr = *inptrp; | |
465 | unsigned char *outptr = *outptrp; | |
466 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
467 | int result; | |
468 | ||
469 | # if __BYTE_ORDER == __BIG_ENDIAN | |
470 | /* Sigh, we have to do some real work. */ | |
471 | size_t cnt; | |
472 | ||
3593973b | 473 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) |
c1db8b0d UD |
474 | { |
475 | outptr[0] = inptr[3]; | |
476 | outptr[1] = inptr[2]; | |
477 | outptr[2] = inptr[1]; | |
478 | outptr[3] = inptr[0]; | |
479 | } | |
480 | ||
481 | *inptrp = inptr; | |
482 | *outptrp = outptr; | |
483 | # elif __BYTE_ORDER == __LITTLE_ENDIAN | |
484 | /* Simply copy the data. */ | |
485 | *inptrp = inptr + n_convert * 4; | |
486 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
487 | # else | |
488 | # error "This endianess is not supported." | |
489 | # endif | |
490 | ||
491 | /* Determine the status. */ | |
c4f66413 | 492 | if (*inptrp + 4 > inend) |
c1db8b0d | 493 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 494 | else if (*outptrp + 4 > outend) |
fc08075d | 495 | result = __GCONV_FULL_OUTPUT; |
c1db8b0d UD |
496 | else |
497 | result = __GCONV_INCOMPLETE_INPUT; | |
498 | ||
499 | return result; | |
500 | } | |
501 | #endif | |
502 | ||
fd1b5c0f UD |
503 | |
504 | static inline int | |
55985355 UD |
505 | internal_ucs4le_loop_single (struct __gconv_step *step, |
506 | struct __gconv_step_data *step_data, | |
507 | const unsigned char **inptrp, | |
fd1b5c0f UD |
508 | const unsigned char *inend, |
509 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 510 | size_t *irreversible) |
fd1b5c0f | 511 | { |
55985355 | 512 | mbstate_t *state = step_data->__statep; |
fd1b5c0f UD |
513 | size_t cnt = state->__count & 7; |
514 | ||
515 | while (*inptrp < inend && cnt < 4) | |
516 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
517 | ||
365afefc | 518 | if (__builtin_expect (cnt, 4) < 4) |
fd1b5c0f UD |
519 | { |
520 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
521 | state->__count &= ~7; | |
522 | state->__count |= cnt; | |
523 | ||
524 | return __GCONV_INCOMPLETE_INPUT; | |
525 | } | |
526 | ||
527 | #if __BYTE_ORDER == __BIG_ENDIAN | |
528 | (*outptrp)[0] = state->__value.__wchb[3]; | |
529 | (*outptrp)[1] = state->__value.__wchb[2]; | |
530 | (*outptrp)[2] = state->__value.__wchb[1]; | |
531 | (*outptrp)[3] = state->__value.__wchb[0]; | |
85830c4c UD |
532 | |
533 | *outptrp += 4; | |
fd1b5c0f UD |
534 | #else |
535 | /* XXX unaligned */ | |
536 | *(*((uint32_t **) outptrp)++) = state->__value.__wch; | |
537 | #endif | |
538 | ||
539 | /* Clear the state buffer. */ | |
540 | state->__count &= ~7; | |
541 | ||
542 | return __GCONV_OK; | |
543 | } | |
544 | ||
8d617a71 UD |
545 | #include <iconv/skeleton.c> |
546 | ||
547 | ||
4a069c33 UD |
548 | /* And finally from UCS4-LE to the internal encoding. */ |
549 | #define DEFINE_INIT 0 | |
550 | #define DEFINE_FINI 0 | |
551 | #define MIN_NEEDED_FROM 4 | |
552 | #define MIN_NEEDED_TO 4 | |
553 | #define FROM_DIRECTION 1 | |
554 | #define FROM_LOOP ucs4le_internal_loop | |
555 | #define TO_LOOP ucs4le_internal_loop /* This is not used. */ | |
556 | #define FUNCTION_NAME __gconv_transform_ucs4le_internal | |
557 | ||
558 | ||
559 | static inline int | |
55985355 UD |
560 | ucs4le_internal_loop (struct __gconv_step *step, |
561 | struct __gconv_step_data *step_data, | |
562 | const unsigned char **inptrp, const unsigned char *inend, | |
4a069c33 | 563 | unsigned char **outptrp, unsigned char *outend, |
38677ace | 564 | size_t *irreversible) |
4a069c33 | 565 | { |
55985355 | 566 | int flags = step_data->__flags; |
4a069c33 UD |
567 | const unsigned char *inptr = *inptrp; |
568 | unsigned char *outptr = *outptrp; | |
569 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
570 | int result; | |
571 | size_t cnt; | |
572 | ||
573 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) | |
574 | { | |
575 | uint32_t inval; | |
576 | ||
577 | #if __BYTE_ORDER == __BIG_ENDIAN | |
17427edd | 578 | inval = bswap_32 (*(const uint32_t *) inptr); |
4a069c33 | 579 | #else |
17427edd | 580 | inval = *(const uint32_t *) inptr; |
4a069c33 UD |
581 | #endif |
582 | ||
365afefc | 583 | if (__builtin_expect (inval, 0) > 0x7fffffff) |
85830c4c | 584 | { |
55985355 UD |
585 | /* The value is too large. We don't try transliteration here since |
586 | this is not an error because of the lack of possibilities to | |
587 | represent the result. This is a genuine bug in the input since | |
588 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
589 | if (irreversible == NULL) |
590 | /* We are transliterating, don't try to correct anything. */ | |
591 | return __GCONV_ILLEGAL_INPUT; | |
592 | ||
85830c4c UD |
593 | if (flags & __GCONV_IGNORE_ERRORS) |
594 | { | |
595 | /* Just ignore this character. */ | |
38677ace | 596 | ++*irreversible; |
85830c4c UD |
597 | continue; |
598 | } | |
599 | ||
600 | return __GCONV_ILLEGAL_INPUT; | |
601 | } | |
4a069c33 | 602 | |
3593973b | 603 | *((uint32_t *) outptr)++ = inval; |
4a069c33 UD |
604 | } |
605 | ||
606 | *inptrp = inptr; | |
607 | *outptrp = outptr; | |
608 | ||
609 | /* Determine the status. */ | |
fc08075d | 610 | if (*inptrp == inend) |
4a069c33 | 611 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 612 | else if (*outptrp + 4 > outend) |
fc08075d | 613 | result = __GCONV_FULL_OUTPUT; |
4a069c33 UD |
614 | else |
615 | result = __GCONV_INCOMPLETE_INPUT; | |
616 | ||
617 | return result; | |
618 | } | |
619 | ||
620 | #ifndef _STRING_ARCH_unaligned | |
621 | static inline int | |
55985355 UD |
622 | ucs4le_internal_loop_unaligned (struct __gconv_step *step, |
623 | struct __gconv_step_data *step_data, | |
624 | const unsigned char **inptrp, | |
4a069c33 UD |
625 | const unsigned char *inend, |
626 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 627 | size_t *irreversible) |
4a069c33 | 628 | { |
55985355 | 629 | int flags = step_data->__flags; |
4a069c33 UD |
630 | const unsigned char *inptr = *inptrp; |
631 | unsigned char *outptr = *outptrp; | |
632 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
633 | int result; | |
634 | size_t cnt; | |
635 | ||
636 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) | |
637 | { | |
365afefc | 638 | if (__builtin_expect (inptr[3], 0) > 0x80) |
4a069c33 | 639 | { |
55985355 UD |
640 | /* The value is too large. We don't try transliteration here since |
641 | this is not an error because of the lack of possibilities to | |
642 | represent the result. This is a genuine bug in the input since | |
643 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
644 | if (irreversible == NULL) |
645 | /* We are transliterating, don't try to correct anything. */ | |
646 | return __GCONV_ILLEGAL_INPUT; | |
647 | ||
85830c4c UD |
648 | if (flags & __GCONV_IGNORE_ERRORS) |
649 | { | |
650 | /* Just ignore this character. */ | |
38677ace | 651 | ++*irreversible; |
85830c4c UD |
652 | continue; |
653 | } | |
654 | ||
4a069c33 UD |
655 | *inptrp = inptr; |
656 | *outptrp = outptr; | |
657 | return __GCONV_ILLEGAL_INPUT; | |
658 | } | |
659 | ||
4a069c33 UD |
660 | # if __BYTE_ORDER == __BIG_ENDIAN |
661 | outptr[3] = inptr[0]; | |
662 | outptr[2] = inptr[1]; | |
663 | outptr[1] = inptr[2]; | |
664 | outptr[0] = inptr[3]; | |
665 | # else | |
666 | outptr[0] = inptr[0]; | |
667 | outptr[1] = inptr[1]; | |
668 | outptr[2] = inptr[2]; | |
669 | outptr[3] = inptr[3]; | |
670 | # endif | |
85830c4c UD |
671 | |
672 | outptr += 4; | |
4a069c33 UD |
673 | } |
674 | ||
675 | *inptrp = inptr; | |
676 | *outptrp = outptr; | |
677 | ||
678 | /* Determine the status. */ | |
fc08075d | 679 | if (*inptrp == inend) |
4a069c33 | 680 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 681 | else if (*outptrp + 4 > outend) |
fc08075d | 682 | result = __GCONV_FULL_OUTPUT; |
4a069c33 UD |
683 | else |
684 | result = __GCONV_INCOMPLETE_INPUT; | |
685 | ||
686 | return result; | |
687 | } | |
688 | #endif | |
689 | ||
690 | ||
691 | static inline int | |
55985355 UD |
692 | ucs4le_internal_loop_single (struct __gconv_step *step, |
693 | struct __gconv_step_data *step_data, | |
694 | const unsigned char **inptrp, | |
4a069c33 UD |
695 | const unsigned char *inend, |
696 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 697 | size_t *irreversible) |
4a069c33 | 698 | { |
55985355 UD |
699 | mbstate_t *state = step_data->__statep; |
700 | int flags = step_data->__flags; | |
4a069c33 UD |
701 | size_t cnt = state->__count & 7; |
702 | ||
703 | while (*inptrp < inend && cnt < 4) | |
704 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
705 | ||
365afefc | 706 | if (__builtin_expect (cnt, 4) < 4) |
4a069c33 UD |
707 | { |
708 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
709 | state->__count &= ~7; | |
710 | state->__count |= cnt; | |
711 | ||
712 | return __GCONV_INCOMPLETE_INPUT; | |
713 | } | |
714 | ||
365afefc UD |
715 | if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3], 0) |
716 | > 0x80) | |
85830c4c | 717 | { |
55985355 UD |
718 | /* The value is too large. We don't try transliteration here since |
719 | this is not an error because of the lack of possibilities to | |
720 | represent the result. This is a genuine bug in the input since | |
721 | UCS4 does not allow such values. */ | |
85830c4c UD |
722 | if (!(flags & __GCONV_IGNORE_ERRORS)) |
723 | return __GCONV_ILLEGAL_INPUT; | |
724 | } | |
725 | else | |
726 | { | |
4a069c33 | 727 | #if __BYTE_ORDER == __BIG_ENDIAN |
85830c4c UD |
728 | (*outptrp)[0] = state->__value.__wchb[3]; |
729 | (*outptrp)[1] = state->__value.__wchb[2]; | |
730 | (*outptrp)[2] = state->__value.__wchb[1]; | |
731 | (*outptrp)[3] = state->__value.__wchb[0]; | |
4a069c33 | 732 | #elif __BYTE_ORDER == __BIG_ENDIAN |
85830c4c UD |
733 | (*outptrp)[0] = state->__value.__wchb[0]; |
734 | (*outptrp)[1] = state->__value.__wchb[1]; | |
735 | (*outptrp)[2] = state->__value.__wchb[2]; | |
736 | (*outptrp)[3] = state->__value.__wchb[3]; | |
4a069c33 UD |
737 | #endif |
738 | ||
85830c4c UD |
739 | *outptrp += 4; |
740 | } | |
741 | ||
4a069c33 UD |
742 | /* Clear the state buffer. */ |
743 | state->__count &= ~7; | |
744 | ||
745 | return __GCONV_OK; | |
746 | } | |
747 | ||
748 | #include <iconv/skeleton.c> | |
749 | ||
750 | ||
8619129f UD |
751 | /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ |
752 | #define DEFINE_INIT 0 | |
753 | #define DEFINE_FINI 0 | |
754 | #define MIN_NEEDED_FROM 1 | |
755 | #define MIN_NEEDED_TO 4 | |
756 | #define FROM_DIRECTION 1 | |
757 | #define FROM_LOOP ascii_internal_loop | |
758 | #define TO_LOOP ascii_internal_loop /* This is not used. */ | |
759 | #define FUNCTION_NAME __gconv_transform_ascii_internal | |
fd1b5c0f | 760 | #define ONE_DIRECTION 1 |
8619129f UD |
761 | |
762 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
763 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
764 | #define LOOPFCT FROM_LOOP | |
765 | #define BODY \ | |
766 | { \ | |
365afefc | 767 | if (__builtin_expect (*inptr, 0) > '\x7f') \ |
8619129f | 768 | { \ |
55985355 UD |
769 | /* The value is too large. We don't try transliteration here since \ |
770 | this is not an error because of the lack of possibilities to \ | |
771 | represent the result. This is a genuine bug in the input since \ | |
772 | ASCII does not allow such values. */ \ | |
85830c4c UD |
773 | if (! ignore_errors_p ()) \ |
774 | { \ | |
775 | /* This is no correct ANSI_X3.4-1968 character. */ \ | |
776 | result = __GCONV_ILLEGAL_INPUT; \ | |
777 | break; \ | |
778 | } \ | |
8619129f | 779 | \ |
38677ace | 780 | ++*irreversible; \ |
55985355 | 781 | ++inptr; \ |
85830c4c UD |
782 | } \ |
783 | else \ | |
784 | /* It's an one byte sequence. */ \ | |
85830c4c | 785 | *((uint32_t *) outptr)++ = *inptr++; \ |
8619129f | 786 | } |
55985355 | 787 | #define LOOP_NEED_FLAGS |
8619129f UD |
788 | #include <iconv/loop.c> |
789 | #include <iconv/skeleton.c> | |
790 | ||
791 | ||
792 | /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */ | |
793 | #define DEFINE_INIT 0 | |
794 | #define DEFINE_FINI 0 | |
795 | #define MIN_NEEDED_FROM 4 | |
796 | #define MIN_NEEDED_TO 1 | |
797 | #define FROM_DIRECTION 1 | |
798 | #define FROM_LOOP internal_ascii_loop | |
799 | #define TO_LOOP internal_ascii_loop /* This is not used. */ | |
800 | #define FUNCTION_NAME __gconv_transform_internal_ascii | |
fd1b5c0f | 801 | #define ONE_DIRECTION 1 |
8619129f UD |
802 | |
803 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
804 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
805 | #define LOOPFCT FROM_LOOP | |
806 | #define BODY \ | |
807 | { \ | |
17427edd | 808 | if (__builtin_expect (*((const uint32_t *) inptr), 0) > 0x7f) \ |
8619129f | 809 | { \ |
601d2942 | 810 | UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \ |
d6204268 | 811 | STANDARD_ERR_HANDLER (4); \ |
85830c4c UD |
812 | } \ |
813 | else \ | |
814 | /* It's an one byte sequence. */ \ | |
17427edd | 815 | *outptr++ = *((const uint32_t *) inptr)++; \ |
8619129f | 816 | } |
55985355 | 817 | #define LOOP_NEED_FLAGS |
8619129f UD |
818 | #include <iconv/loop.c> |
819 | #include <iconv/skeleton.c> | |
820 | ||
821 | ||
822 | /* Convert from the internal (UCS4-like) format to UTF-8. */ | |
823 | #define DEFINE_INIT 0 | |
824 | #define DEFINE_FINI 0 | |
825 | #define MIN_NEEDED_FROM 4 | |
826 | #define MIN_NEEDED_TO 1 | |
827 | #define MAX_NEEDED_TO 6 | |
828 | #define FROM_DIRECTION 1 | |
829 | #define FROM_LOOP internal_utf8_loop | |
830 | #define TO_LOOP internal_utf8_loop /* This is not used. */ | |
831 | #define FUNCTION_NAME __gconv_transform_internal_utf8 | |
fd1b5c0f | 832 | #define ONE_DIRECTION 1 |
8619129f UD |
833 | |
834 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
835 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
5aa8ff62 | 836 | #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO |
8619129f UD |
837 | #define LOOPFCT FROM_LOOP |
838 | #define BODY \ | |
839 | { \ | |
17427edd | 840 | uint32_t wc = *((const uint32_t *) inptr); \ |
8619129f UD |
841 | \ |
842 | /* Since we control every character we read this cannot happen. */ \ | |
843 | assert (wc <= 0x7fffffff); \ | |
844 | \ | |
845 | if (wc < 0x80) \ | |
846 | /* It's an one byte sequence. */ \ | |
847 | *outptr++ = (unsigned char) wc; \ | |
848 | else \ | |
849 | { \ | |
850 | size_t step; \ | |
851 | char *start; \ | |
852 | \ | |
853 | for (step = 2; step < 6; ++step) \ | |
b79f74cd | 854 | if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \ |
8619129f UD |
855 | break; \ |
856 | \ | |
365afefc | 857 | if (__builtin_expect (outptr + step > outend, 0)) \ |
8619129f UD |
858 | { \ |
859 | /* Too long. */ \ | |
d64b6ad0 | 860 | result = __GCONV_FULL_OUTPUT; \ |
8619129f UD |
861 | break; \ |
862 | } \ | |
863 | \ | |
864 | start = outptr; \ | |
b79f74cd | 865 | *outptr = (unsigned char) (~0xff >> step); \ |
8619129f UD |
866 | outptr += step; \ |
867 | --step; \ | |
868 | do \ | |
869 | { \ | |
870 | start[step] = 0x80 | (wc & 0x3f); \ | |
871 | wc >>= 6; \ | |
872 | } \ | |
873 | while (--step > 0); \ | |
874 | start[0] |= wc; \ | |
875 | } \ | |
876 | \ | |
877 | inptr += 4; \ | |
878 | } | |
879 | #include <iconv/loop.c> | |
880 | #include <iconv/skeleton.c> | |
881 | ||
882 | ||
883 | /* Convert from UTF-8 to the internal (UCS4-like) format. */ | |
884 | #define DEFINE_INIT 0 | |
885 | #define DEFINE_FINI 0 | |
886 | #define MIN_NEEDED_FROM 1 | |
887 | #define MAX_NEEDED_FROM 6 | |
888 | #define MIN_NEEDED_TO 4 | |
889 | #define FROM_DIRECTION 1 | |
890 | #define FROM_LOOP utf8_internal_loop | |
891 | #define TO_LOOP utf8_internal_loop /* This is not used. */ | |
892 | #define FUNCTION_NAME __gconv_transform_utf8_internal | |
fd1b5c0f | 893 | #define ONE_DIRECTION 1 |
8619129f UD |
894 | |
895 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
5aa8ff62 | 896 | #define MAX_NEEDED_INPUT MAX_NEEDED_FROM |
8619129f UD |
897 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
898 | #define LOOPFCT FROM_LOOP | |
899 | #define BODY \ | |
900 | { \ | |
901 | uint32_t ch; \ | |
902 | uint_fast32_t cnt; \ | |
903 | uint_fast32_t i; \ | |
904 | \ | |
905 | /* Next input byte. */ \ | |
906 | ch = *inptr; \ | |
907 | \ | |
908 | if (ch < 0x80) \ | |
8619129f | 909 | { \ |
5aa8ff62 UD |
910 | /* One byte sequence. */ \ |
911 | cnt = 1; \ | |
912 | ++inptr; \ | |
8619129f UD |
913 | } \ |
914 | else \ | |
915 | { \ | |
bd32e4a6 | 916 | if (ch >= 0xc2 && ch < 0xe0) \ |
5aa8ff62 | 917 | { \ |
bd32e4a6 UD |
918 | /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \ |
919 | otherwise the wide character could have been represented \ | |
920 | using a single byte. */ \ | |
5aa8ff62 UD |
921 | cnt = 2; \ |
922 | ch &= 0x1f; \ | |
923 | } \ | |
3593973b | 924 | else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \ |
5aa8ff62 UD |
925 | { \ |
926 | /* We expect three bytes. */ \ | |
927 | cnt = 3; \ | |
928 | ch &= 0x0f; \ | |
929 | } \ | |
3593973b | 930 | else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \ |
5aa8ff62 UD |
931 | { \ |
932 | /* We expect four bytes. */ \ | |
933 | cnt = 4; \ | |
934 | ch &= 0x07; \ | |
935 | } \ | |
365afefc | 936 | else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \ |
5aa8ff62 UD |
937 | { \ |
938 | /* We expect five bytes. */ \ | |
939 | cnt = 5; \ | |
940 | ch &= 0x03; \ | |
941 | } \ | |
365afefc | 942 | else if (__builtin_expect (ch & 0xfe, 0xfc) == 0xfc) \ |
5aa8ff62 UD |
943 | { \ |
944 | /* We expect six bytes. */ \ | |
945 | cnt = 6; \ | |
946 | ch &= 0x01; \ | |
947 | } \ | |
948 | else \ | |
8619129f | 949 | { \ |
85830c4c UD |
950 | int skipped; \ |
951 | \ | |
952 | if (! ignore_errors_p ()) \ | |
953 | { \ | |
954 | /* This is an illegal encoding. */ \ | |
955 | result = __GCONV_ILLEGAL_INPUT; \ | |
956 | break; \ | |
957 | } \ | |
958 | \ | |
959 | /* Search the end of this ill-formed UTF-8 character. This \ | |
960 | is the next byte with (x & 0xc0) != 0x80. */ \ | |
961 | skipped = 0; \ | |
962 | do \ | |
963 | { \ | |
964 | ++inptr; \ | |
965 | ++skipped; \ | |
966 | } \ | |
967 | while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \ | |
968 | \ | |
969 | continue; \ | |
8619129f UD |
970 | } \ |
971 | \ | |
0cdb4983 | 972 | if (__builtin_expect (inptr + cnt > inend, 0)) \ |
5aa8ff62 | 973 | { \ |
fd1b5c0f UD |
974 | /* We don't have enough input. But before we report that check \ |
975 | that all the bytes are correct. */ \ | |
976 | for (i = 1; inptr + i < inend; ++i) \ | |
977 | if ((inptr[i] & 0xc0) != 0x80) \ | |
978 | break; \ | |
85830c4c | 979 | \ |
365afefc | 980 | if (__builtin_expect (inptr + i == inend, 1)) \ |
85830c4c UD |
981 | { \ |
982 | result = __GCONV_INCOMPLETE_INPUT; \ | |
983 | break; \ | |
984 | } \ | |
985 | \ | |
85830c4c UD |
986 | if (ignore_errors_p ()) \ |
987 | { \ | |
988 | /* Ignore it. */ \ | |
989 | inptr += i; \ | |
38677ace | 990 | ++*irreversible; \ |
85830c4c UD |
991 | continue; \ |
992 | } \ | |
993 | \ | |
994 | result = __GCONV_ILLEGAL_INPUT; \ | |
5aa8ff62 UD |
995 | break; \ |
996 | } \ | |
997 | \ | |
998 | /* Read the possible remaining bytes. */ \ | |
999 | for (i = 1; i < cnt; ++i) \ | |
1000 | { \ | |
1001 | uint32_t byte = inptr[i]; \ | |
1002 | \ | |
1003 | if ((byte & 0xc0) != 0x80) \ | |
bd32e4a6 UD |
1004 | /* This is an illegal encoding. */ \ |
1005 | break; \ | |
5aa8ff62 UD |
1006 | \ |
1007 | ch <<= 6; \ | |
1008 | ch |= byte & 0x3f; \ | |
1009 | } \ | |
85830c4c | 1010 | \ |
bd32e4a6 UD |
1011 | /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ |
1012 | If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ | |
1013 | have been represented with fewer than cnt bytes. */ \ | |
85830c4c | 1014 | if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \ |
bd32e4a6 UD |
1015 | { \ |
1016 | /* This is an illegal encoding. */ \ | |
85830c4c UD |
1017 | if (ignore_errors_p ()) \ |
1018 | { \ | |
1019 | inptr += i; \ | |
38677ace | 1020 | ++*irreversible; \ |
85830c4c UD |
1021 | continue; \ |
1022 | } \ | |
1023 | \ | |
786731fc | 1024 | result = __GCONV_ILLEGAL_INPUT; \ |
bd32e4a6 UD |
1025 | break; \ |
1026 | } \ | |
1027 | \ | |
5aa8ff62 | 1028 | inptr += cnt; \ |
8619129f UD |
1029 | } \ |
1030 | \ | |
1031 | /* Now adjust the pointers and store the result. */ \ | |
8619129f UD |
1032 | *((uint32_t *) outptr)++ = ch; \ |
1033 | } | |
55985355 | 1034 | #define LOOP_NEED_FLAGS |
fd1b5c0f UD |
1035 | |
1036 | #define STORE_REST \ | |
1037 | { \ | |
1038 | /* We store the remaining bytes while converting them into the UCS4 \ | |
1039 | format. We can assume that the first byte in the buffer is \ | |
1040 | correct and that it requires a larger number of bytes than there \ | |
1041 | are in the input buffer. */ \ | |
1042 | wint_t ch = **inptrp; \ | |
1043 | size_t cnt; \ | |
1044 | \ | |
1045 | state->__count = inend - *inptrp; \ | |
1046 | \ | |
1047 | if (ch >= 0xc2 && ch < 0xe0) \ | |
1048 | { \ | |
1049 | /* We expect two bytes. The first byte cannot be 0xc0 or \ | |
1050 | 0xc1, otherwise the wide character could have been \ | |
1051 | represented using a single byte. */ \ | |
1052 | cnt = 2; \ | |
1053 | ch &= 0x1f; \ | |
1054 | } \ | |
365afefc | 1055 | else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \ |
fd1b5c0f UD |
1056 | { \ |
1057 | /* We expect three bytes. */ \ | |
1058 | cnt = 3; \ | |
1059 | ch &= 0x0f; \ | |
1060 | } \ | |
3593973b | 1061 | else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \ |
fd1b5c0f UD |
1062 | { \ |
1063 | /* We expect four bytes. */ \ | |
1064 | cnt = 4; \ | |
1065 | ch &= 0x07; \ | |
1066 | } \ | |
3593973b | 1067 | else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \ |
fd1b5c0f UD |
1068 | { \ |
1069 | /* We expect five bytes. */ \ | |
1070 | cnt = 5; \ | |
1071 | ch &= 0x03; \ | |
1072 | } \ | |
1073 | else \ | |
1074 | { \ | |
1075 | /* We expect six bytes. */ \ | |
1076 | cnt = 6; \ | |
1077 | ch &= 0x01; \ | |
1078 | } \ | |
1079 | \ | |
1080 | /* The first byte is already consumed. */ \ | |
1081 | --cnt; \ | |
1082 | while (++(*inptrp) < inend) \ | |
1083 | { \ | |
1084 | ch <<= 6; \ | |
1085 | ch |= **inptrp & 0x3f; \ | |
1086 | --cnt; \ | |
1087 | } \ | |
1088 | \ | |
1089 | /* Shift for the so far missing bytes. */ \ | |
1090 | ch <<= cnt * 6; \ | |
1091 | \ | |
1092 | /* Store the value. */ \ | |
1093 | state->__value.__wch = ch; \ | |
1094 | } | |
1095 | ||
1096 | #define UNPACK_BYTES \ | |
1097 | { \ | |
1098 | wint_t wch = state->__value.__wch; \ | |
cd201e38 | 1099 | size_t ntotal; \ |
fd1b5c0f UD |
1100 | inlen = state->__count; \ |
1101 | \ | |
1102 | if (state->__value.__wch <= 0x7ff) \ | |
cd201e38 UD |
1103 | { \ |
1104 | bytebuf[0] = 0xc0; \ | |
1105 | ntotal = 2; \ | |
1106 | } \ | |
365afefc | 1107 | else if (__builtin_expect (state->__value.__wch, 0) <= 0xffff) \ |
cd201e38 UD |
1108 | { \ |
1109 | bytebuf[0] = 0xe0; \ | |
1110 | ntotal = 3; \ | |
1111 | } \ | |
365afefc | 1112 | else if (__builtin_expect (state->__value.__wch, 0) <= 0x1fffff) \ |
cd201e38 UD |
1113 | { \ |
1114 | bytebuf[0] = 0xf0; \ | |
1115 | ntotal = 4; \ | |
1116 | } \ | |
365afefc | 1117 | else if (__builtin_expect (state->__value.__wch, 0) <= 0x3ffffff) \ |
cd201e38 UD |
1118 | { \ |
1119 | bytebuf[0] = 0xf8; \ | |
1120 | ntotal = 5; \ | |
1121 | } \ | |
fd1b5c0f | 1122 | else \ |
cd201e38 UD |
1123 | { \ |
1124 | bytebuf[0] = 0xfc; \ | |
1125 | ntotal = 6; \ | |
1126 | } \ | |
fd1b5c0f | 1127 | \ |
cd201e38 UD |
1128 | do \ |
1129 | { \ | |
1130 | if (--ntotal < inlen) \ | |
1131 | bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ | |
1132 | wch >>= 6; \ | |
1133 | } \ | |
1134 | while (ntotal > 1); \ | |
fd1b5c0f UD |
1135 | \ |
1136 | bytebuf[0] |= wch; \ | |
1137 | } | |
1138 | ||
8619129f UD |
1139 | #include <iconv/loop.c> |
1140 | #include <iconv/skeleton.c> | |
1141 | ||
1142 | ||
1143 | /* Convert from UCS2 to the internal (UCS4-like) format. */ | |
1144 | #define DEFINE_INIT 0 | |
1145 | #define DEFINE_FINI 0 | |
1146 | #define MIN_NEEDED_FROM 2 | |
1147 | #define MIN_NEEDED_TO 4 | |
1148 | #define FROM_DIRECTION 1 | |
1149 | #define FROM_LOOP ucs2_internal_loop | |
1150 | #define TO_LOOP ucs2_internal_loop /* This is not used. */ | |
1151 | #define FUNCTION_NAME __gconv_transform_ucs2_internal | |
fd1b5c0f | 1152 | #define ONE_DIRECTION 1 |
8619129f UD |
1153 | |
1154 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1155 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1156 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1157 | #define BODY \ |
755104ed | 1158 | { \ |
17427edd | 1159 | uint16_t u1 = *((const uint16_t *) inptr); \ |
755104ed UD |
1160 | \ |
1161 | if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \ | |
1162 | { \ | |
1163 | /* Surrogate characters in UCS-2 input are not valid. Reject \ | |
1164 | them. (Catching this here is not security relevant.) */ \ | |
1165 | if (! ignore_errors_p ()) \ | |
1166 | { \ | |
1167 | result = __GCONV_ILLEGAL_INPUT; \ | |
1168 | break; \ | |
1169 | } \ | |
1170 | inptr += 2; \ | |
1171 | ++*irreversible; \ | |
1172 | continue; \ | |
1173 | } \ | |
1174 | \ | |
1175 | *((uint32_t *) outptr)++ = u1; \ | |
1176 | inptr += 2; \ | |
1177 | } | |
1178 | #define LOOP_NEED_FLAGS | |
8619129f UD |
1179 | #include <iconv/loop.c> |
1180 | #include <iconv/skeleton.c> | |
1181 | ||
1182 | ||
1183 | /* Convert from the internal (UCS4-like) format to UCS2. */ | |
1184 | #define DEFINE_INIT 0 | |
1185 | #define DEFINE_FINI 0 | |
1186 | #define MIN_NEEDED_FROM 4 | |
1187 | #define MIN_NEEDED_TO 2 | |
1188 | #define FROM_DIRECTION 1 | |
1189 | #define FROM_LOOP internal_ucs2_loop | |
1190 | #define TO_LOOP internal_ucs2_loop /* This is not used. */ | |
1191 | #define FUNCTION_NAME __gconv_transform_internal_ucs2 | |
fd1b5c0f | 1192 | #define ONE_DIRECTION 1 |
8619129f UD |
1193 | |
1194 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1195 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1196 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1197 | #define BODY \ |
8619129f | 1198 | { \ |
17427edd | 1199 | uint32_t val = *((const uint32_t *) inptr); \ |
755104ed UD |
1200 | \ |
1201 | if (__builtin_expect (val, 0) >= 0x10000) \ | |
8619129f | 1202 | { \ |
601d2942 | 1203 | UNICODE_TAG_HANDLER (val, 4); \ |
d6204268 | 1204 | STANDARD_ERR_HANDLER (4); \ |
8619129f | 1205 | } \ |
755104ed UD |
1206 | else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \ |
1207 | { \ | |
1208 | /* Surrogate characters in UCS-4 input are not valid. \ | |
1209 | We must catch this, because the UCS-2 output might be \ | |
1210 | interpreted as UTF-16 by other programs. If we let \ | |
1211 | surrogates pass through, attackers could make a security \ | |
1212 | hole exploit by synthesizing any desired plane 1-16 \ | |
1213 | character. */ \ | |
1214 | if (! ignore_errors_p ()) \ | |
1215 | { \ | |
1216 | result = __GCONV_ILLEGAL_INPUT; \ | |
1217 | break; \ | |
1218 | } \ | |
1219 | inptr += 4; \ | |
1220 | ++*irreversible; \ | |
1221 | continue; \ | |
1222 | } \ | |
85830c4c | 1223 | else \ |
755104ed UD |
1224 | { \ |
1225 | *((uint16_t *) outptr)++ = val; \ | |
1226 | inptr += 4; \ | |
1227 | } \ | |
8619129f | 1228 | } |
55985355 | 1229 | #define LOOP_NEED_FLAGS |
8619129f UD |
1230 | #include <iconv/loop.c> |
1231 | #include <iconv/skeleton.c> | |
9b26f5c4 UD |
1232 | |
1233 | ||
428bcea4 | 1234 | /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */ |
9b26f5c4 UD |
1235 | #define DEFINE_INIT 0 |
1236 | #define DEFINE_FINI 0 | |
1237 | #define MIN_NEEDED_FROM 2 | |
1238 | #define MIN_NEEDED_TO 4 | |
1239 | #define FROM_DIRECTION 1 | |
428bcea4 UD |
1240 | #define FROM_LOOP ucs2reverse_internal_loop |
1241 | #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/ | |
8d617a71 | 1242 | #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal |
fd1b5c0f | 1243 | #define ONE_DIRECTION 1 |
9b26f5c4 UD |
1244 | |
1245 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1246 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1247 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1248 | #define BODY \ |
755104ed | 1249 | { \ |
17427edd | 1250 | uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \ |
755104ed UD |
1251 | \ |
1252 | if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \ | |
1253 | { \ | |
1254 | /* Surrogate characters in UCS-2 input are not valid. Reject \ | |
1255 | them. (Catching this here is not security relevant.) */ \ | |
1256 | if (! ignore_errors_p ()) \ | |
1257 | { \ | |
1258 | result = __GCONV_ILLEGAL_INPUT; \ | |
1259 | break; \ | |
1260 | } \ | |
1261 | inptr += 2; \ | |
1262 | ++*irreversible; \ | |
1263 | continue; \ | |
1264 | } \ | |
1265 | \ | |
1266 | *((uint32_t *) outptr)++ = u1; \ | |
1267 | inptr += 2; \ | |
1268 | } | |
1269 | #define LOOP_NEED_FLAGS | |
9b26f5c4 UD |
1270 | #include <iconv/loop.c> |
1271 | #include <iconv/skeleton.c> | |
1272 | ||
1273 | ||
428bcea4 | 1274 | /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */ |
9b26f5c4 UD |
1275 | #define DEFINE_INIT 0 |
1276 | #define DEFINE_FINI 0 | |
1277 | #define MIN_NEEDED_FROM 4 | |
1278 | #define MIN_NEEDED_TO 2 | |
1279 | #define FROM_DIRECTION 1 | |
428bcea4 UD |
1280 | #define FROM_LOOP internal_ucs2reverse_loop |
1281 | #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/ | |
8d617a71 | 1282 | #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse |
fd1b5c0f | 1283 | #define ONE_DIRECTION 1 |
9b26f5c4 UD |
1284 | |
1285 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1286 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1287 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1288 | #define BODY \ |
9b26f5c4 | 1289 | { \ |
17427edd | 1290 | uint32_t val = *((const uint32_t *) inptr); \ |
365afefc | 1291 | if (__builtin_expect (val, 0) >= 0x10000) \ |
9b26f5c4 | 1292 | { \ |
601d2942 | 1293 | UNICODE_TAG_HANDLER (val, 4); \ |
d6204268 | 1294 | STANDARD_ERR_HANDLER (4); \ |
9b26f5c4 | 1295 | } \ |
755104ed UD |
1296 | else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \ |
1297 | { \ | |
1298 | /* Surrogate characters in UCS-4 input are not valid. \ | |
1299 | We must catch this, because the UCS-2 output might be \ | |
1300 | interpreted as UTF-16 by other programs. If we let \ | |
1301 | surrogates pass through, attackers could make a security \ | |
1302 | hole exploit by synthesizing any desired plane 1-16 \ | |
1303 | character. */ \ | |
1304 | if (! ignore_errors_p ()) \ | |
1305 | { \ | |
1306 | result = __GCONV_ILLEGAL_INPUT; \ | |
1307 | break; \ | |
1308 | } \ | |
1309 | inptr += 4; \ | |
1310 | ++*irreversible; \ | |
1311 | continue; \ | |
1312 | } \ | |
1313 | else \ | |
1314 | { \ | |
1315 | *((uint16_t *) outptr)++ = bswap_16 (val); \ | |
1316 | inptr += 4; \ | |
1317 | } \ | |
9b26f5c4 | 1318 | } |
55985355 | 1319 | #define LOOP_NEED_FLAGS |
9b26f5c4 UD |
1320 | #include <iconv/loop.c> |
1321 | #include <iconv/skeleton.c> |