]>
Commit | Line | Data |
---|---|---|
6973fc01 | 1 | /* Simple transformations functions. |
ea31b613 | 2 | Copyright (C) 1997-2002, 2003 Free Software Foundation, Inc. |
6973fc01 UD |
3 | This file is part of the GNU C Library. |
4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
6973fc01 UD |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 14 | Lesser General Public License for more details. |
6973fc01 | 15 | |
41bdb6e2 AJ |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, write to the Free | |
18 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
19 | 02111-1307 USA. */ | |
6973fc01 | 20 | |
f1fa8b68 | 21 | #include <byteswap.h> |
55985355 | 22 | #include <dlfcn.h> |
f1fa8b68 | 23 | #include <endian.h> |
f4017d20 | 24 | #include <errno.h> |
6973fc01 | 25 | #include <gconv.h> |
d2374599 | 26 | #include <stdint.h> |
6973fc01 UD |
27 | #include <stdlib.h> |
28 | #include <string.h> | |
29 | #include <wchar.h> | |
30 | #include <sys/param.h> | |
f9ad060c | 31 | #include <gconv_int.h> |
6973fc01 | 32 | |
17427edd | 33 | #define BUILTIN_ALIAS(s1, s2) /* nothing */ |
f9ad060c UD |
34 | #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ |
35 | MinF, MaxF, MinT, MaxT) \ | |
17427edd UD |
36 | extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \ |
37 | __const unsigned char **, __const unsigned char *, \ | |
38 | unsigned char **, size_t *, int, int); | |
39 | #include "gconv_builtin.h" | |
40 | ||
41 | ||
a904b5d9 UD |
42 | #ifndef EILSEQ |
43 | # define EILSEQ EINVAL | |
44 | #endif | |
45 | ||
46 | ||
f9ad060c UD |
47 | /* Specialized conversion function for a single byte to INTERNAL, recognizing |
48 | only ASCII characters. */ | |
49 | wint_t | |
50 | __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c) | |
51 | { | |
52 | if (c < 0x80) | |
53 | return c; | |
54 | else | |
55 | return WEOF; | |
56 | } | |
57 | ||
58 | ||
f1fa8b68 UD |
59 | /* Transform from the internal, UCS4-like format, to UCS4. The |
60 | difference between the internal ucs4 format and the real UCS4 | |
61 | format is, if any, the endianess. The Unicode/ISO 10646 says that | |
62 | unless some higher protocol specifies it differently, the byte | |
63 | order is big endian.*/ | |
8619129f UD |
64 | #define DEFINE_INIT 0 |
65 | #define DEFINE_FINI 0 | |
66 | #define MIN_NEEDED_FROM 4 | |
67 | #define MIN_NEEDED_TO 4 | |
68 | #define FROM_DIRECTION 1 | |
69 | #define FROM_LOOP internal_ucs4_loop | |
70 | #define TO_LOOP internal_ucs4_loop /* This is not used. */ | |
71 | #define FUNCTION_NAME __gconv_transform_internal_ucs4 | |
72 | ||
73 | ||
74 | static inline int | |
dd9423a6 | 75 | __attribute ((always_inline)) |
55985355 UD |
76 | internal_ucs4_loop (struct __gconv_step *step, |
77 | struct __gconv_step_data *step_data, | |
78 | const unsigned char **inptrp, const unsigned char *inend, | |
8619129f | 79 | unsigned char **outptrp, unsigned char *outend, |
38677ace | 80 | size_t *irreversible) |
4bca4c17 | 81 | { |
8619129f UD |
82 | const unsigned char *inptr = *inptrp; |
83 | unsigned char *outptr = *outptrp; | |
84 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
4bca4c17 UD |
85 | int result; |
86 | ||
f1fa8b68 | 87 | #if __BYTE_ORDER == __LITTLE_ENDIAN |
8619129f UD |
88 | /* Sigh, we have to do some real work. */ |
89 | size_t cnt; | |
f1fa8b68 | 90 | |
fdf19bf7 | 91 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) |
17427edd | 92 | *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr); |
f1fa8b68 | 93 | |
8619129f UD |
94 | *inptrp = inptr; |
95 | *outptrp = outptr; | |
f1fa8b68 | 96 | #elif __BYTE_ORDER == __BIG_ENDIAN |
8619129f UD |
97 | /* Simply copy the data. */ |
98 | *inptrp = inptr + n_convert * 4; | |
99 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
f1fa8b68 UD |
100 | #else |
101 | # error "This endianess is not supported." | |
102 | #endif | |
103 | ||
8619129f | 104 | /* Determine the status. */ |
1336419e | 105 | if (*inptrp == inend) |
d64b6ad0 | 106 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 107 | else if (*outptrp + 4 > outend) |
1336419e | 108 | result = __GCONV_FULL_OUTPUT; |
6973fc01 | 109 | else |
d64b6ad0 | 110 | result = __GCONV_INCOMPLETE_INPUT; |
6973fc01 | 111 | |
f43ce637 | 112 | return result; |
6973fc01 | 113 | } |
d2374599 | 114 | |
c1db8b0d UD |
115 | #ifndef _STRING_ARCH_unaligned |
116 | static inline int | |
dd9423a6 | 117 | __attribute ((always_inline)) |
55985355 UD |
118 | internal_ucs4_loop_unaligned (struct __gconv_step *step, |
119 | struct __gconv_step_data *step_data, | |
120 | const unsigned char **inptrp, | |
c1db8b0d UD |
121 | const unsigned char *inend, |
122 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 123 | size_t *irreversible) |
c1db8b0d UD |
124 | { |
125 | const unsigned char *inptr = *inptrp; | |
126 | unsigned char *outptr = *outptrp; | |
127 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
128 | int result; | |
129 | ||
130 | # if __BYTE_ORDER == __LITTLE_ENDIAN | |
131 | /* Sigh, we have to do some real work. */ | |
132 | size_t cnt; | |
133 | ||
134 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) | |
135 | { | |
136 | outptr[0] = inptr[3]; | |
137 | outptr[1] = inptr[2]; | |
138 | outptr[2] = inptr[1]; | |
139 | outptr[3] = inptr[0]; | |
140 | } | |
141 | ||
142 | *inptrp = inptr; | |
143 | *outptrp = outptr; | |
144 | # elif __BYTE_ORDER == __BIG_ENDIAN | |
145 | /* Simply copy the data. */ | |
146 | *inptrp = inptr + n_convert * 4; | |
147 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
148 | # else | |
149 | # error "This endianess is not supported." | |
150 | # endif | |
151 | ||
152 | /* Determine the status. */ | |
eacde9d0 | 153 | if (*inptrp == inend) |
c1db8b0d | 154 | result = __GCONV_EMPTY_INPUT; |
eacde9d0 UD |
155 | else if (*outptrp + 4 > outend) |
156 | result = __GCONV_FULL_OUTPUT; | |
c1db8b0d UD |
157 | else |
158 | result = __GCONV_INCOMPLETE_INPUT; | |
159 | ||
160 | return result; | |
161 | } | |
162 | #endif | |
163 | ||
fd1b5c0f UD |
164 | |
165 | static inline int | |
dd9423a6 | 166 | __attribute ((always_inline)) |
55985355 UD |
167 | internal_ucs4_loop_single (struct __gconv_step *step, |
168 | struct __gconv_step_data *step_data, | |
169 | const unsigned char **inptrp, | |
fd1b5c0f UD |
170 | const unsigned char *inend, |
171 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 172 | size_t *irreversible) |
fd1b5c0f | 173 | { |
55985355 | 174 | mbstate_t *state = step_data->__statep; |
fd1b5c0f UD |
175 | size_t cnt = state->__count & 7; |
176 | ||
177 | while (*inptrp < inend && cnt < 4) | |
178 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
179 | ||
db2d05f9 | 180 | if (__builtin_expect (cnt < 4, 0)) |
fd1b5c0f UD |
181 | { |
182 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
183 | state->__count &= ~7; | |
184 | state->__count |= cnt; | |
185 | ||
186 | return __GCONV_INCOMPLETE_INPUT; | |
187 | } | |
188 | ||
189 | #if __BYTE_ORDER == __LITTLE_ENDIAN | |
190 | (*outptrp)[0] = state->__value.__wchb[3]; | |
191 | (*outptrp)[1] = state->__value.__wchb[2]; | |
192 | (*outptrp)[2] = state->__value.__wchb[1]; | |
193 | (*outptrp)[3] = state->__value.__wchb[0]; | |
85830c4c UD |
194 | |
195 | *outptrp += 4; | |
fd1b5c0f UD |
196 | #elif __BYTE_ORDER == __BIG_ENDIAN |
197 | /* XXX unaligned */ | |
198 | *(*((uint32_t **) outptrp)++) = state->__value.__wch; | |
199 | #else | |
200 | # error "This endianess is not supported." | |
201 | #endif | |
202 | ||
203 | /* Clear the state buffer. */ | |
204 | state->__count &= ~7; | |
205 | ||
206 | return __GCONV_OK; | |
207 | } | |
208 | ||
8619129f | 209 | #include <iconv/skeleton.c> |
d2374599 | 210 | |
d2374599 | 211 | |
4a069c33 UD |
212 | /* Transform from UCS4 to the internal, UCS4-like format. Unlike |
213 | for the other direction we have to check for correct values here. */ | |
214 | #define DEFINE_INIT 0 | |
215 | #define DEFINE_FINI 0 | |
216 | #define MIN_NEEDED_FROM 4 | |
217 | #define MIN_NEEDED_TO 4 | |
218 | #define FROM_DIRECTION 1 | |
219 | #define FROM_LOOP ucs4_internal_loop | |
220 | #define TO_LOOP ucs4_internal_loop /* This is not used. */ | |
221 | #define FUNCTION_NAME __gconv_transform_ucs4_internal | |
222 | ||
223 | ||
224 | static inline int | |
dd9423a6 | 225 | __attribute ((always_inline)) |
55985355 UD |
226 | ucs4_internal_loop (struct __gconv_step *step, |
227 | struct __gconv_step_data *step_data, | |
228 | const unsigned char **inptrp, const unsigned char *inend, | |
4a069c33 | 229 | unsigned char **outptrp, unsigned char *outend, |
38677ace | 230 | size_t *irreversible) |
4a069c33 | 231 | { |
55985355 | 232 | int flags = step_data->__flags; |
4a069c33 UD |
233 | const unsigned char *inptr = *inptrp; |
234 | unsigned char *outptr = *outptrp; | |
235 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
236 | int result; | |
237 | size_t cnt; | |
238 | ||
239 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) | |
240 | { | |
241 | uint32_t inval; | |
242 | ||
243 | #if __BYTE_ORDER == __LITTLE_ENDIAN | |
17427edd | 244 | inval = bswap_32 (*(const uint32_t *) inptr); |
4a069c33 | 245 | #else |
17427edd | 246 | inval = *(const uint32_t *) inptr; |
4a069c33 UD |
247 | #endif |
248 | ||
db2d05f9 | 249 | if (__builtin_expect (inval > 0x7fffffff, 0)) |
4a069c33 | 250 | { |
55985355 UD |
251 | /* The value is too large. We don't try transliteration here since |
252 | this is not an error because of the lack of possibilities to | |
253 | represent the result. This is a genuine bug in the input since | |
254 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
255 | if (irreversible == NULL) |
256 | /* We are transliterating, don't try to correct anything. */ | |
257 | return __GCONV_ILLEGAL_INPUT; | |
258 | ||
85830c4c UD |
259 | if (flags & __GCONV_IGNORE_ERRORS) |
260 | { | |
261 | /* Just ignore this character. */ | |
38677ace | 262 | ++*irreversible; |
85830c4c UD |
263 | continue; |
264 | } | |
265 | ||
4a069c33 UD |
266 | *inptrp = inptr; |
267 | *outptrp = outptr; | |
268 | return __GCONV_ILLEGAL_INPUT; | |
269 | } | |
270 | ||
3593973b | 271 | *((uint32_t *) outptr)++ = inval; |
4a069c33 UD |
272 | } |
273 | ||
274 | *inptrp = inptr; | |
275 | *outptrp = outptr; | |
276 | ||
277 | /* Determine the status. */ | |
fc08075d | 278 | if (*inptrp == inend) |
4a069c33 | 279 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 280 | else if (*outptrp + 4 > outend) |
fc08075d | 281 | result = __GCONV_FULL_OUTPUT; |
4a069c33 UD |
282 | else |
283 | result = __GCONV_INCOMPLETE_INPUT; | |
284 | ||
285 | return result; | |
286 | } | |
287 | ||
288 | #ifndef _STRING_ARCH_unaligned | |
289 | static inline int | |
dd9423a6 | 290 | __attribute ((always_inline)) |
55985355 UD |
291 | ucs4_internal_loop_unaligned (struct __gconv_step *step, |
292 | struct __gconv_step_data *step_data, | |
293 | const unsigned char **inptrp, | |
4a069c33 UD |
294 | const unsigned char *inend, |
295 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 296 | size_t *irreversible) |
4a069c33 | 297 | { |
55985355 | 298 | int flags = step_data->__flags; |
4a069c33 UD |
299 | const unsigned char *inptr = *inptrp; |
300 | unsigned char *outptr = *outptrp; | |
301 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
302 | int result; | |
303 | size_t cnt; | |
304 | ||
55985355 | 305 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) |
4a069c33 | 306 | { |
db2d05f9 | 307 | if (__builtin_expect (inptr[0] > 0x80, 0)) |
4a069c33 | 308 | { |
55985355 UD |
309 | /* The value is too large. We don't try transliteration here since |
310 | this is not an error because of the lack of possibilities to | |
311 | represent the result. This is a genuine bug in the input since | |
312 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
313 | if (irreversible == NULL) |
314 | /* We are transliterating, don't try to correct anything. */ | |
315 | return __GCONV_ILLEGAL_INPUT; | |
316 | ||
85830c4c UD |
317 | if (flags & __GCONV_IGNORE_ERRORS) |
318 | { | |
319 | /* Just ignore this character. */ | |
38677ace | 320 | ++*irreversible; |
85830c4c UD |
321 | continue; |
322 | } | |
323 | ||
4a069c33 UD |
324 | *inptrp = inptr; |
325 | *outptrp = outptr; | |
9ea2c194 | 326 | return __GCONV_ILLEGAL_INPUT; |
4a069c33 UD |
327 | } |
328 | ||
4a069c33 UD |
329 | # if __BYTE_ORDER == __LITTLE_ENDIAN |
330 | outptr[3] = inptr[0]; | |
331 | outptr[2] = inptr[1]; | |
332 | outptr[1] = inptr[2]; | |
333 | outptr[0] = inptr[3]; | |
334 | # else | |
335 | outptr[0] = inptr[0]; | |
336 | outptr[1] = inptr[1]; | |
337 | outptr[2] = inptr[2]; | |
338 | outptr[3] = inptr[3]; | |
339 | # endif | |
55985355 | 340 | outptr += 4; |
4a069c33 UD |
341 | } |
342 | ||
343 | *inptrp = inptr; | |
344 | *outptrp = outptr; | |
345 | ||
346 | /* Determine the status. */ | |
fc08075d | 347 | if (*inptrp == inend) |
4a069c33 | 348 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 349 | else if (*outptrp + 4 > outend) |
fc08075d | 350 | result = __GCONV_FULL_OUTPUT; |
4a069c33 UD |
351 | else |
352 | result = __GCONV_INCOMPLETE_INPUT; | |
353 | ||
354 | return result; | |
355 | } | |
356 | #endif | |
357 | ||
358 | ||
359 | static inline int | |
dd9423a6 | 360 | __attribute ((always_inline)) |
55985355 UD |
361 | ucs4_internal_loop_single (struct __gconv_step *step, |
362 | struct __gconv_step_data *step_data, | |
363 | const unsigned char **inptrp, | |
4a069c33 UD |
364 | const unsigned char *inend, |
365 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 366 | size_t *irreversible) |
4a069c33 | 367 | { |
55985355 UD |
368 | mbstate_t *state = step_data->__statep; |
369 | int flags = step_data->__flags; | |
4a069c33 UD |
370 | size_t cnt = state->__count & 7; |
371 | ||
372 | while (*inptrp < inend && cnt < 4) | |
373 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
374 | ||
db2d05f9 | 375 | if (__builtin_expect (cnt < 4, 0)) |
4a069c33 UD |
376 | { |
377 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
378 | state->__count &= ~7; | |
379 | state->__count |= cnt; | |
380 | ||
381 | return __GCONV_INCOMPLETE_INPUT; | |
382 | } | |
383 | ||
db2d05f9 UD |
384 | if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80, |
385 | 0)) | |
85830c4c | 386 | { |
55985355 UD |
387 | /* The value is too large. We don't try transliteration here since |
388 | this is not an error because of the lack of possibilities to | |
389 | represent the result. This is a genuine bug in the input since | |
390 | UCS4 does not allow such values. */ | |
85830c4c | 391 | if (!(flags & __GCONV_IGNORE_ERRORS)) |
3593973b UD |
392 | { |
393 | *inptrp -= cnt - (state->__count & 7); | |
394 | return __GCONV_ILLEGAL_INPUT; | |
395 | } | |
85830c4c UD |
396 | } |
397 | else | |
398 | { | |
4a069c33 | 399 | #if __BYTE_ORDER == __LITTLE_ENDIAN |
85830c4c UD |
400 | (*outptrp)[0] = state->__value.__wchb[3]; |
401 | (*outptrp)[1] = state->__value.__wchb[2]; | |
402 | (*outptrp)[2] = state->__value.__wchb[1]; | |
403 | (*outptrp)[3] = state->__value.__wchb[0]; | |
4a069c33 | 404 | #elif __BYTE_ORDER == __BIG_ENDIAN |
85830c4c UD |
405 | (*outptrp)[0] = state->__value.__wchb[0]; |
406 | (*outptrp)[1] = state->__value.__wchb[1]; | |
407 | (*outptrp)[2] = state->__value.__wchb[2]; | |
408 | (*outptrp)[3] = state->__value.__wchb[3]; | |
4a069c33 UD |
409 | #endif |
410 | ||
85830c4c UD |
411 | *outptrp += 4; |
412 | } | |
413 | ||
4a069c33 UD |
414 | /* Clear the state buffer. */ |
415 | state->__count &= ~7; | |
416 | ||
417 | return __GCONV_OK; | |
418 | } | |
419 | ||
420 | #include <iconv/skeleton.c> | |
421 | ||
422 | ||
423 | /* Similarly for the little endian form. */ | |
8d617a71 UD |
424 | #define DEFINE_INIT 0 |
425 | #define DEFINE_FINI 0 | |
426 | #define MIN_NEEDED_FROM 4 | |
427 | #define MIN_NEEDED_TO 4 | |
428 | #define FROM_DIRECTION 1 | |
429 | #define FROM_LOOP internal_ucs4le_loop | |
430 | #define TO_LOOP internal_ucs4le_loop /* This is not used. */ | |
431 | #define FUNCTION_NAME __gconv_transform_internal_ucs4le | |
432 | ||
433 | ||
434 | static inline int | |
dd9423a6 | 435 | __attribute ((always_inline)) |
55985355 UD |
436 | internal_ucs4le_loop (struct __gconv_step *step, |
437 | struct __gconv_step_data *step_data, | |
438 | const unsigned char **inptrp, const unsigned char *inend, | |
8d617a71 | 439 | unsigned char **outptrp, unsigned char *outend, |
38677ace | 440 | size_t *irreversible) |
8d617a71 UD |
441 | { |
442 | const unsigned char *inptr = *inptrp; | |
443 | unsigned char *outptr = *outptrp; | |
444 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
445 | int result; | |
446 | ||
447 | #if __BYTE_ORDER == __BIG_ENDIAN | |
448 | /* Sigh, we have to do some real work. */ | |
449 | size_t cnt; | |
450 | ||
451 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) | |
17427edd | 452 | *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr); |
8d617a71 UD |
453 | |
454 | *inptrp = inptr; | |
455 | *outptrp = outptr; | |
456 | #elif __BYTE_ORDER == __LITTLE_ENDIAN | |
457 | /* Simply copy the data. */ | |
458 | *inptrp = inptr + n_convert * 4; | |
459 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
460 | #else | |
461 | # error "This endianess is not supported." | |
462 | #endif | |
463 | ||
464 | /* Determine the status. */ | |
fc08075d | 465 | if (*inptrp == inend) |
8d617a71 | 466 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 467 | else if (*outptrp + 4 > outend) |
fc08075d | 468 | result = __GCONV_FULL_OUTPUT; |
8d617a71 UD |
469 | else |
470 | result = __GCONV_INCOMPLETE_INPUT; | |
471 | ||
8d617a71 UD |
472 | return result; |
473 | } | |
474 | ||
c1db8b0d UD |
475 | #ifndef _STRING_ARCH_unaligned |
476 | static inline int | |
dd9423a6 | 477 | __attribute ((always_inline)) |
55985355 UD |
478 | internal_ucs4le_loop_unaligned (struct __gconv_step *step, |
479 | struct __gconv_step_data *step_data, | |
480 | const unsigned char **inptrp, | |
c1db8b0d UD |
481 | const unsigned char *inend, |
482 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 483 | size_t *irreversible) |
c1db8b0d UD |
484 | { |
485 | const unsigned char *inptr = *inptrp; | |
486 | unsigned char *outptr = *outptrp; | |
487 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
488 | int result; | |
489 | ||
490 | # if __BYTE_ORDER == __BIG_ENDIAN | |
491 | /* Sigh, we have to do some real work. */ | |
492 | size_t cnt; | |
493 | ||
3593973b | 494 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) |
c1db8b0d UD |
495 | { |
496 | outptr[0] = inptr[3]; | |
497 | outptr[1] = inptr[2]; | |
498 | outptr[2] = inptr[1]; | |
499 | outptr[3] = inptr[0]; | |
500 | } | |
501 | ||
502 | *inptrp = inptr; | |
503 | *outptrp = outptr; | |
504 | # elif __BYTE_ORDER == __LITTLE_ENDIAN | |
505 | /* Simply copy the data. */ | |
506 | *inptrp = inptr + n_convert * 4; | |
507 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
508 | # else | |
509 | # error "This endianess is not supported." | |
510 | # endif | |
511 | ||
512 | /* Determine the status. */ | |
eb9dc2a2 | 513 | if (*inptrp == inend) |
c1db8b0d | 514 | result = __GCONV_EMPTY_INPUT; |
eb9dc2a2 | 515 | else if (*inptrp + 4 > inend) |
c1db8b0d | 516 | result = __GCONV_INCOMPLETE_INPUT; |
eb9dc2a2 UD |
517 | else |
518 | { | |
519 | assert (*outptrp + 4 > outend); | |
520 | result = __GCONV_FULL_OUTPUT; | |
521 | } | |
c1db8b0d UD |
522 | |
523 | return result; | |
524 | } | |
525 | #endif | |
526 | ||
fd1b5c0f UD |
527 | |
528 | static inline int | |
dd9423a6 | 529 | __attribute ((always_inline)) |
55985355 UD |
530 | internal_ucs4le_loop_single (struct __gconv_step *step, |
531 | struct __gconv_step_data *step_data, | |
532 | const unsigned char **inptrp, | |
fd1b5c0f UD |
533 | const unsigned char *inend, |
534 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 535 | size_t *irreversible) |
fd1b5c0f | 536 | { |
55985355 | 537 | mbstate_t *state = step_data->__statep; |
fd1b5c0f UD |
538 | size_t cnt = state->__count & 7; |
539 | ||
540 | while (*inptrp < inend && cnt < 4) | |
541 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
542 | ||
db2d05f9 | 543 | if (__builtin_expect (cnt < 4, 0)) |
fd1b5c0f UD |
544 | { |
545 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
546 | state->__count &= ~7; | |
547 | state->__count |= cnt; | |
548 | ||
549 | return __GCONV_INCOMPLETE_INPUT; | |
550 | } | |
551 | ||
552 | #if __BYTE_ORDER == __BIG_ENDIAN | |
553 | (*outptrp)[0] = state->__value.__wchb[3]; | |
554 | (*outptrp)[1] = state->__value.__wchb[2]; | |
555 | (*outptrp)[2] = state->__value.__wchb[1]; | |
556 | (*outptrp)[3] = state->__value.__wchb[0]; | |
85830c4c UD |
557 | |
558 | *outptrp += 4; | |
fd1b5c0f UD |
559 | #else |
560 | /* XXX unaligned */ | |
561 | *(*((uint32_t **) outptrp)++) = state->__value.__wch; | |
562 | #endif | |
563 | ||
564 | /* Clear the state buffer. */ | |
565 | state->__count &= ~7; | |
566 | ||
567 | return __GCONV_OK; | |
568 | } | |
569 | ||
8d617a71 UD |
570 | #include <iconv/skeleton.c> |
571 | ||
572 | ||
4a069c33 UD |
573 | /* And finally from UCS4-LE to the internal encoding. */ |
574 | #define DEFINE_INIT 0 | |
575 | #define DEFINE_FINI 0 | |
576 | #define MIN_NEEDED_FROM 4 | |
577 | #define MIN_NEEDED_TO 4 | |
578 | #define FROM_DIRECTION 1 | |
579 | #define FROM_LOOP ucs4le_internal_loop | |
580 | #define TO_LOOP ucs4le_internal_loop /* This is not used. */ | |
581 | #define FUNCTION_NAME __gconv_transform_ucs4le_internal | |
582 | ||
583 | ||
584 | static inline int | |
dd9423a6 | 585 | __attribute ((always_inline)) |
55985355 UD |
586 | ucs4le_internal_loop (struct __gconv_step *step, |
587 | struct __gconv_step_data *step_data, | |
588 | const unsigned char **inptrp, const unsigned char *inend, | |
4a069c33 | 589 | unsigned char **outptrp, unsigned char *outend, |
38677ace | 590 | size_t *irreversible) |
4a069c33 | 591 | { |
55985355 | 592 | int flags = step_data->__flags; |
4a069c33 UD |
593 | const unsigned char *inptr = *inptrp; |
594 | unsigned char *outptr = *outptrp; | |
595 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
596 | int result; | |
597 | size_t cnt; | |
598 | ||
599 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) | |
600 | { | |
601 | uint32_t inval; | |
602 | ||
603 | #if __BYTE_ORDER == __BIG_ENDIAN | |
17427edd | 604 | inval = bswap_32 (*(const uint32_t *) inptr); |
4a069c33 | 605 | #else |
17427edd | 606 | inval = *(const uint32_t *) inptr; |
4a069c33 UD |
607 | #endif |
608 | ||
db2d05f9 | 609 | if (__builtin_expect (inval > 0x7fffffff, 0)) |
85830c4c | 610 | { |
55985355 UD |
611 | /* The value is too large. We don't try transliteration here since |
612 | this is not an error because of the lack of possibilities to | |
613 | represent the result. This is a genuine bug in the input since | |
614 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
615 | if (irreversible == NULL) |
616 | /* We are transliterating, don't try to correct anything. */ | |
617 | return __GCONV_ILLEGAL_INPUT; | |
618 | ||
85830c4c UD |
619 | if (flags & __GCONV_IGNORE_ERRORS) |
620 | { | |
621 | /* Just ignore this character. */ | |
38677ace | 622 | ++*irreversible; |
85830c4c UD |
623 | continue; |
624 | } | |
625 | ||
626 | return __GCONV_ILLEGAL_INPUT; | |
627 | } | |
4a069c33 | 628 | |
3593973b | 629 | *((uint32_t *) outptr)++ = inval; |
4a069c33 UD |
630 | } |
631 | ||
632 | *inptrp = inptr; | |
633 | *outptrp = outptr; | |
634 | ||
635 | /* Determine the status. */ | |
fc08075d | 636 | if (*inptrp == inend) |
4a069c33 | 637 | result = __GCONV_EMPTY_INPUT; |
eb9dc2a2 | 638 | else if (*inptrp + 4 > inend) |
4a069c33 | 639 | result = __GCONV_INCOMPLETE_INPUT; |
eb9dc2a2 UD |
640 | else |
641 | { | |
642 | assert (*outptrp + 4 > outend); | |
643 | result = __GCONV_FULL_OUTPUT; | |
644 | } | |
4a069c33 UD |
645 | |
646 | return result; | |
647 | } | |
648 | ||
649 | #ifndef _STRING_ARCH_unaligned | |
650 | static inline int | |
dd9423a6 | 651 | __attribute ((always_inline)) |
55985355 UD |
652 | ucs4le_internal_loop_unaligned (struct __gconv_step *step, |
653 | struct __gconv_step_data *step_data, | |
654 | const unsigned char **inptrp, | |
4a069c33 UD |
655 | const unsigned char *inend, |
656 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 657 | size_t *irreversible) |
4a069c33 | 658 | { |
55985355 | 659 | int flags = step_data->__flags; |
4a069c33 UD |
660 | const unsigned char *inptr = *inptrp; |
661 | unsigned char *outptr = *outptrp; | |
662 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
663 | int result; | |
664 | size_t cnt; | |
665 | ||
666 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) | |
667 | { | |
db2d05f9 | 668 | if (__builtin_expect (inptr[3] > 0x80, 0)) |
4a069c33 | 669 | { |
55985355 UD |
670 | /* The value is too large. We don't try transliteration here since |
671 | this is not an error because of the lack of possibilities to | |
672 | represent the result. This is a genuine bug in the input since | |
673 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
674 | if (irreversible == NULL) |
675 | /* We are transliterating, don't try to correct anything. */ | |
676 | return __GCONV_ILLEGAL_INPUT; | |
677 | ||
85830c4c UD |
678 | if (flags & __GCONV_IGNORE_ERRORS) |
679 | { | |
680 | /* Just ignore this character. */ | |
38677ace | 681 | ++*irreversible; |
85830c4c UD |
682 | continue; |
683 | } | |
684 | ||
4a069c33 UD |
685 | *inptrp = inptr; |
686 | *outptrp = outptr; | |
9ea2c194 | 687 | return __GCONV_ILLEGAL_INPUT; |
4a069c33 UD |
688 | } |
689 | ||
4a069c33 UD |
690 | # if __BYTE_ORDER == __BIG_ENDIAN |
691 | outptr[3] = inptr[0]; | |
692 | outptr[2] = inptr[1]; | |
693 | outptr[1] = inptr[2]; | |
694 | outptr[0] = inptr[3]; | |
695 | # else | |
696 | outptr[0] = inptr[0]; | |
697 | outptr[1] = inptr[1]; | |
698 | outptr[2] = inptr[2]; | |
699 | outptr[3] = inptr[3]; | |
700 | # endif | |
85830c4c UD |
701 | |
702 | outptr += 4; | |
4a069c33 UD |
703 | } |
704 | ||
705 | *inptrp = inptr; | |
706 | *outptrp = outptr; | |
707 | ||
708 | /* Determine the status. */ | |
fc08075d | 709 | if (*inptrp == inend) |
4a069c33 | 710 | result = __GCONV_EMPTY_INPUT; |
eb9dc2a2 | 711 | else if (*inptrp + 4 > inend) |
4a069c33 | 712 | result = __GCONV_INCOMPLETE_INPUT; |
eb9dc2a2 UD |
713 | else |
714 | { | |
715 | assert (*outptrp + 4 > outend); | |
716 | result = __GCONV_FULL_OUTPUT; | |
717 | } | |
4a069c33 UD |
718 | |
719 | return result; | |
720 | } | |
721 | #endif | |
722 | ||
723 | ||
724 | static inline int | |
dd9423a6 | 725 | __attribute ((always_inline)) |
55985355 UD |
726 | ucs4le_internal_loop_single (struct __gconv_step *step, |
727 | struct __gconv_step_data *step_data, | |
728 | const unsigned char **inptrp, | |
4a069c33 UD |
729 | const unsigned char *inend, |
730 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 731 | size_t *irreversible) |
4a069c33 | 732 | { |
55985355 UD |
733 | mbstate_t *state = step_data->__statep; |
734 | int flags = step_data->__flags; | |
4a069c33 UD |
735 | size_t cnt = state->__count & 7; |
736 | ||
737 | while (*inptrp < inend && cnt < 4) | |
738 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
739 | ||
db2d05f9 | 740 | if (__builtin_expect (cnt < 4, 0)) |
4a069c33 UD |
741 | { |
742 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
743 | state->__count &= ~7; | |
744 | state->__count |= cnt; | |
745 | ||
746 | return __GCONV_INCOMPLETE_INPUT; | |
747 | } | |
748 | ||
db2d05f9 UD |
749 | if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80, |
750 | 0)) | |
85830c4c | 751 | { |
55985355 UD |
752 | /* The value is too large. We don't try transliteration here since |
753 | this is not an error because of the lack of possibilities to | |
754 | represent the result. This is a genuine bug in the input since | |
755 | UCS4 does not allow such values. */ | |
85830c4c UD |
756 | if (!(flags & __GCONV_IGNORE_ERRORS)) |
757 | return __GCONV_ILLEGAL_INPUT; | |
758 | } | |
759 | else | |
760 | { | |
4a069c33 | 761 | #if __BYTE_ORDER == __BIG_ENDIAN |
85830c4c UD |
762 | (*outptrp)[0] = state->__value.__wchb[3]; |
763 | (*outptrp)[1] = state->__value.__wchb[2]; | |
764 | (*outptrp)[2] = state->__value.__wchb[1]; | |
765 | (*outptrp)[3] = state->__value.__wchb[0]; | |
9ea2c194 | 766 | #else |
85830c4c UD |
767 | (*outptrp)[0] = state->__value.__wchb[0]; |
768 | (*outptrp)[1] = state->__value.__wchb[1]; | |
769 | (*outptrp)[2] = state->__value.__wchb[2]; | |
770 | (*outptrp)[3] = state->__value.__wchb[3]; | |
4a069c33 UD |
771 | #endif |
772 | ||
85830c4c UD |
773 | *outptrp += 4; |
774 | } | |
775 | ||
4a069c33 UD |
776 | /* Clear the state buffer. */ |
777 | state->__count &= ~7; | |
778 | ||
779 | return __GCONV_OK; | |
780 | } | |
781 | ||
782 | #include <iconv/skeleton.c> | |
783 | ||
784 | ||
8619129f UD |
785 | /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ |
786 | #define DEFINE_INIT 0 | |
787 | #define DEFINE_FINI 0 | |
788 | #define MIN_NEEDED_FROM 1 | |
789 | #define MIN_NEEDED_TO 4 | |
790 | #define FROM_DIRECTION 1 | |
791 | #define FROM_LOOP ascii_internal_loop | |
792 | #define TO_LOOP ascii_internal_loop /* This is not used. */ | |
793 | #define FUNCTION_NAME __gconv_transform_ascii_internal | |
fd1b5c0f | 794 | #define ONE_DIRECTION 1 |
8619129f UD |
795 | |
796 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
797 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
798 | #define LOOPFCT FROM_LOOP | |
799 | #define BODY \ | |
800 | { \ | |
db2d05f9 | 801 | if (__builtin_expect (*inptr > '\x7f', 0)) \ |
8619129f | 802 | { \ |
55985355 UD |
803 | /* The value is too large. We don't try transliteration here since \ |
804 | this is not an error because of the lack of possibilities to \ | |
805 | represent the result. This is a genuine bug in the input since \ | |
806 | ASCII does not allow such values. */ \ | |
e438a468 | 807 | STANDARD_FROM_LOOP_ERR_HANDLER (1); \ |
85830c4c UD |
808 | } \ |
809 | else \ | |
810 | /* It's an one byte sequence. */ \ | |
85830c4c | 811 | *((uint32_t *) outptr)++ = *inptr++; \ |
8619129f | 812 | } |
55985355 | 813 | #define LOOP_NEED_FLAGS |
8619129f UD |
814 | #include <iconv/loop.c> |
815 | #include <iconv/skeleton.c> | |
816 | ||
817 | ||
818 | /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */ | |
819 | #define DEFINE_INIT 0 | |
820 | #define DEFINE_FINI 0 | |
821 | #define MIN_NEEDED_FROM 4 | |
822 | #define MIN_NEEDED_TO 1 | |
823 | #define FROM_DIRECTION 1 | |
824 | #define FROM_LOOP internal_ascii_loop | |
825 | #define TO_LOOP internal_ascii_loop /* This is not used. */ | |
826 | #define FUNCTION_NAME __gconv_transform_internal_ascii | |
fd1b5c0f | 827 | #define ONE_DIRECTION 1 |
8619129f UD |
828 | |
829 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
830 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
831 | #define LOOPFCT FROM_LOOP | |
832 | #define BODY \ | |
833 | { \ | |
db2d05f9 | 834 | if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \ |
8619129f | 835 | { \ |
601d2942 | 836 | UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \ |
e438a468 | 837 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
85830c4c UD |
838 | } \ |
839 | else \ | |
840 | /* It's an one byte sequence. */ \ | |
17427edd | 841 | *outptr++ = *((const uint32_t *) inptr)++; \ |
8619129f | 842 | } |
55985355 | 843 | #define LOOP_NEED_FLAGS |
8619129f UD |
844 | #include <iconv/loop.c> |
845 | #include <iconv/skeleton.c> | |
846 | ||
847 | ||
848 | /* Convert from the internal (UCS4-like) format to UTF-8. */ | |
849 | #define DEFINE_INIT 0 | |
850 | #define DEFINE_FINI 0 | |
851 | #define MIN_NEEDED_FROM 4 | |
852 | #define MIN_NEEDED_TO 1 | |
853 | #define MAX_NEEDED_TO 6 | |
854 | #define FROM_DIRECTION 1 | |
855 | #define FROM_LOOP internal_utf8_loop | |
856 | #define TO_LOOP internal_utf8_loop /* This is not used. */ | |
857 | #define FUNCTION_NAME __gconv_transform_internal_utf8 | |
fd1b5c0f | 858 | #define ONE_DIRECTION 1 |
8619129f UD |
859 | |
860 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
861 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
5aa8ff62 | 862 | #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO |
8619129f UD |
863 | #define LOOPFCT FROM_LOOP |
864 | #define BODY \ | |
865 | { \ | |
17427edd | 866 | uint32_t wc = *((const uint32_t *) inptr); \ |
8619129f | 867 | \ |
8619129f UD |
868 | if (wc < 0x80) \ |
869 | /* It's an one byte sequence. */ \ | |
870 | *outptr++ = (unsigned char) wc; \ | |
db2d05f9 | 871 | else if (__builtin_expect (wc <= 0x7fffffff, 1)) \ |
8619129f UD |
872 | { \ |
873 | size_t step; \ | |
874 | char *start; \ | |
875 | \ | |
876 | for (step = 2; step < 6; ++step) \ | |
b79f74cd | 877 | if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \ |
8619129f UD |
878 | break; \ |
879 | \ | |
365afefc | 880 | if (__builtin_expect (outptr + step > outend, 0)) \ |
8619129f UD |
881 | { \ |
882 | /* Too long. */ \ | |
d64b6ad0 | 883 | result = __GCONV_FULL_OUTPUT; \ |
8619129f UD |
884 | break; \ |
885 | } \ | |
886 | \ | |
887 | start = outptr; \ | |
b79f74cd | 888 | *outptr = (unsigned char) (~0xff >> step); \ |
8619129f UD |
889 | outptr += step; \ |
890 | --step; \ | |
891 | do \ | |
892 | { \ | |
893 | start[step] = 0x80 | (wc & 0x3f); \ | |
894 | wc >>= 6; \ | |
895 | } \ | |
896 | while (--step > 0); \ | |
897 | start[0] |= wc; \ | |
db2d05f9 UD |
898 | } \ |
899 | else \ | |
900 | { \ | |
e438a468 | 901 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
8619129f UD |
902 | } \ |
903 | \ | |
904 | inptr += 4; \ | |
905 | } | |
db2d05f9 | 906 | #define LOOP_NEED_FLAGS |
8619129f UD |
907 | #include <iconv/loop.c> |
908 | #include <iconv/skeleton.c> | |
909 | ||
910 | ||
911 | /* Convert from UTF-8 to the internal (UCS4-like) format. */ | |
912 | #define DEFINE_INIT 0 | |
913 | #define DEFINE_FINI 0 | |
914 | #define MIN_NEEDED_FROM 1 | |
915 | #define MAX_NEEDED_FROM 6 | |
916 | #define MIN_NEEDED_TO 4 | |
917 | #define FROM_DIRECTION 1 | |
918 | #define FROM_LOOP utf8_internal_loop | |
919 | #define TO_LOOP utf8_internal_loop /* This is not used. */ | |
920 | #define FUNCTION_NAME __gconv_transform_utf8_internal | |
fd1b5c0f | 921 | #define ONE_DIRECTION 1 |
8619129f UD |
922 | |
923 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
5aa8ff62 | 924 | #define MAX_NEEDED_INPUT MAX_NEEDED_FROM |
8619129f UD |
925 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
926 | #define LOOPFCT FROM_LOOP | |
927 | #define BODY \ | |
928 | { \ | |
929 | uint32_t ch; \ | |
930 | uint_fast32_t cnt; \ | |
931 | uint_fast32_t i; \ | |
932 | \ | |
933 | /* Next input byte. */ \ | |
934 | ch = *inptr; \ | |
935 | \ | |
936 | if (ch < 0x80) \ | |
8619129f | 937 | { \ |
5aa8ff62 UD |
938 | /* One byte sequence. */ \ |
939 | cnt = 1; \ | |
940 | ++inptr; \ | |
8619129f UD |
941 | } \ |
942 | else \ | |
943 | { \ | |
9ea2c194 | 944 | if (ch >= 0xc2 && ch < 0xe0) \ |
5aa8ff62 | 945 | { \ |
9ea2c194 AJ |
946 | /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \ |
947 | otherwise the wide character could have been represented \ | |
948 | using a single byte. */ \ | |
5aa8ff62 UD |
949 | cnt = 2; \ |
950 | ch &= 0x1f; \ | |
951 | } \ | |
db2d05f9 | 952 | else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ |
5aa8ff62 UD |
953 | { \ |
954 | /* We expect three bytes. */ \ | |
955 | cnt = 3; \ | |
956 | ch &= 0x0f; \ | |
957 | } \ | |
db2d05f9 | 958 | else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ |
5aa8ff62 UD |
959 | { \ |
960 | /* We expect four bytes. */ \ | |
961 | cnt = 4; \ | |
962 | ch &= 0x07; \ | |
963 | } \ | |
db2d05f9 | 964 | else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \ |
5aa8ff62 UD |
965 | { \ |
966 | /* We expect five bytes. */ \ | |
967 | cnt = 5; \ | |
968 | ch &= 0x03; \ | |
969 | } \ | |
db2d05f9 | 970 | else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \ |
5aa8ff62 UD |
971 | { \ |
972 | /* We expect six bytes. */ \ | |
973 | cnt = 6; \ | |
974 | ch &= 0x01; \ | |
975 | } \ | |
976 | else \ | |
8619129f | 977 | { \ |
85830c4c UD |
978 | int skipped; \ |
979 | \ | |
85830c4c UD |
980 | /* Search the end of this ill-formed UTF-8 character. This \ |
981 | is the next byte with (x & 0xc0) != 0x80. */ \ | |
e438a468 UD |
982 | skipped = 0; \ |
983 | do \ | |
984 | ++skipped; \ | |
985 | while (inptr + skipped < inend \ | |
986 | && (*(inptr + skipped) & 0xc0) == 0x80 \ | |
987 | && skipped < 5); \ | |
85830c4c | 988 | \ |
e438a468 | 989 | STANDARD_FROM_LOOP_ERR_HANDLER (skipped); \ |
8619129f UD |
990 | } \ |
991 | \ | |
0cdb4983 | 992 | if (__builtin_expect (inptr + cnt > inend, 0)) \ |
5aa8ff62 | 993 | { \ |
fd1b5c0f UD |
994 | /* We don't have enough input. But before we report that check \ |
995 | that all the bytes are correct. */ \ | |
996 | for (i = 1; inptr + i < inend; ++i) \ | |
997 | if ((inptr[i] & 0xc0) != 0x80) \ | |
998 | break; \ | |
85830c4c | 999 | \ |
365afefc | 1000 | if (__builtin_expect (inptr + i == inend, 1)) \ |
85830c4c UD |
1001 | { \ |
1002 | result = __GCONV_INCOMPLETE_INPUT; \ | |
1003 | break; \ | |
1004 | } \ | |
1005 | \ | |
e438a468 | 1006 | STANDARD_FROM_LOOP_ERR_HANDLER (i); \ |
5aa8ff62 UD |
1007 | } \ |
1008 | \ | |
1009 | /* Read the possible remaining bytes. */ \ | |
1010 | for (i = 1; i < cnt; ++i) \ | |
1011 | { \ | |
1012 | uint32_t byte = inptr[i]; \ | |
1013 | \ | |
1014 | if ((byte & 0xc0) != 0x80) \ | |
bd32e4a6 UD |
1015 | /* This is an illegal encoding. */ \ |
1016 | break; \ | |
5aa8ff62 UD |
1017 | \ |
1018 | ch <<= 6; \ | |
1019 | ch |= byte & 0x3f; \ | |
1020 | } \ | |
85830c4c | 1021 | \ |
bd32e4a6 UD |
1022 | /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ |
1023 | If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ | |
1024 | have been represented with fewer than cnt bytes. */ \ | |
85830c4c | 1025 | if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \ |
bd32e4a6 UD |
1026 | { \ |
1027 | /* This is an illegal encoding. */ \ | |
e438a468 | 1028 | STANDARD_FROM_LOOP_ERR_HANDLER (i); \ |
bd32e4a6 UD |
1029 | } \ |
1030 | \ | |
5aa8ff62 | 1031 | inptr += cnt; \ |
8619129f UD |
1032 | } \ |
1033 | \ | |
1034 | /* Now adjust the pointers and store the result. */ \ | |
8619129f UD |
1035 | *((uint32_t *) outptr)++ = ch; \ |
1036 | } | |
55985355 | 1037 | #define LOOP_NEED_FLAGS |
fd1b5c0f UD |
1038 | |
1039 | #define STORE_REST \ | |
1040 | { \ | |
1041 | /* We store the remaining bytes while converting them into the UCS4 \ | |
1042 | format. We can assume that the first byte in the buffer is \ | |
1043 | correct and that it requires a larger number of bytes than there \ | |
1044 | are in the input buffer. */ \ | |
1045 | wint_t ch = **inptrp; \ | |
ea31b613 | 1046 | size_t cnt, r; \ |
fd1b5c0f UD |
1047 | \ |
1048 | state->__count = inend - *inptrp; \ | |
1049 | \ | |
1050 | if (ch >= 0xc2 && ch < 0xe0) \ | |
1051 | { \ | |
1052 | /* We expect two bytes. The first byte cannot be 0xc0 or \ | |
1053 | 0xc1, otherwise the wide character could have been \ | |
1054 | represented using a single byte. */ \ | |
1055 | cnt = 2; \ | |
1056 | ch &= 0x1f; \ | |
1057 | } \ | |
db2d05f9 | 1058 | else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ |
fd1b5c0f UD |
1059 | { \ |
1060 | /* We expect three bytes. */ \ | |
1061 | cnt = 3; \ | |
1062 | ch &= 0x0f; \ | |
1063 | } \ | |
db2d05f9 | 1064 | else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ |
fd1b5c0f UD |
1065 | { \ |
1066 | /* We expect four bytes. */ \ | |
1067 | cnt = 4; \ | |
1068 | ch &= 0x07; \ | |
1069 | } \ | |
db2d05f9 | 1070 | else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \ |
fd1b5c0f UD |
1071 | { \ |
1072 | /* We expect five bytes. */ \ | |
1073 | cnt = 5; \ | |
1074 | ch &= 0x03; \ | |
1075 | } \ | |
1076 | else \ | |
1077 | { \ | |
1078 | /* We expect six bytes. */ \ | |
1079 | cnt = 6; \ | |
1080 | ch &= 0x01; \ | |
1081 | } \ | |
1082 | \ | |
1083 | /* The first byte is already consumed. */ \ | |
ea31b613 | 1084 | r = cnt - 1; \ |
fd1b5c0f UD |
1085 | while (++(*inptrp) < inend) \ |
1086 | { \ | |
1087 | ch <<= 6; \ | |
1088 | ch |= **inptrp & 0x3f; \ | |
ea31b613 | 1089 | --r; \ |
fd1b5c0f UD |
1090 | } \ |
1091 | \ | |
1092 | /* Shift for the so far missing bytes. */ \ | |
ea31b613 UD |
1093 | ch <<= r * 6; \ |
1094 | \ | |
1095 | /* Store the number of bytes expected for the entire sequence. */ \ | |
41f112ad | 1096 | state->__count |= cnt << 8; \ |
fd1b5c0f UD |
1097 | \ |
1098 | /* Store the value. */ \ | |
1099 | state->__value.__wch = ch; \ | |
1100 | } | |
1101 | ||
1102 | #define UNPACK_BYTES \ | |
1103 | { \ | |
ea31b613 | 1104 | static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \ |
fd1b5c0f | 1105 | wint_t wch = state->__value.__wch; \ |
41f112ad | 1106 | size_t ntotal = state->__count >> 8; \ |
ea31b613 | 1107 | \ |
41f112ad | 1108 | inlen = state->__count & 255; \ |
fd1b5c0f | 1109 | \ |
ea31b613 | 1110 | bytebuf[0] = inmask[ntotal - 2]; \ |
fd1b5c0f | 1111 | \ |
cd201e38 UD |
1112 | do \ |
1113 | { \ | |
1114 | if (--ntotal < inlen) \ | |
1115 | bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ | |
1116 | wch >>= 6; \ | |
1117 | } \ | |
1118 | while (ntotal > 1); \ | |
fd1b5c0f UD |
1119 | \ |
1120 | bytebuf[0] |= wch; \ | |
1121 | } | |
1122 | ||
41f112ad UD |
1123 | #define CLEAR_STATE \ |
1124 | state->__count = 0 | |
1125 | ||
1126 | ||
8619129f UD |
1127 | #include <iconv/loop.c> |
1128 | #include <iconv/skeleton.c> | |
1129 | ||
1130 | ||
1131 | /* Convert from UCS2 to the internal (UCS4-like) format. */ | |
1132 | #define DEFINE_INIT 0 | |
1133 | #define DEFINE_FINI 0 | |
1134 | #define MIN_NEEDED_FROM 2 | |
1135 | #define MIN_NEEDED_TO 4 | |
1136 | #define FROM_DIRECTION 1 | |
1137 | #define FROM_LOOP ucs2_internal_loop | |
1138 | #define TO_LOOP ucs2_internal_loop /* This is not used. */ | |
1139 | #define FUNCTION_NAME __gconv_transform_ucs2_internal | |
fd1b5c0f | 1140 | #define ONE_DIRECTION 1 |
8619129f UD |
1141 | |
1142 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1143 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1144 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1145 | #define BODY \ |
755104ed | 1146 | { \ |
17427edd | 1147 | uint16_t u1 = *((const uint16_t *) inptr); \ |
755104ed UD |
1148 | \ |
1149 | if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \ | |
1150 | { \ | |
1151 | /* Surrogate characters in UCS-2 input are not valid. Reject \ | |
1152 | them. (Catching this here is not security relevant.) */ \ | |
e438a468 | 1153 | STANDARD_FROM_LOOP_ERR_HANDLER (2); \ |
755104ed UD |
1154 | } \ |
1155 | \ | |
1156 | *((uint32_t *) outptr)++ = u1; \ | |
1157 | inptr += 2; \ | |
1158 | } | |
1159 | #define LOOP_NEED_FLAGS | |
8619129f UD |
1160 | #include <iconv/loop.c> |
1161 | #include <iconv/skeleton.c> | |
1162 | ||
1163 | ||
1164 | /* Convert from the internal (UCS4-like) format to UCS2. */ | |
1165 | #define DEFINE_INIT 0 | |
1166 | #define DEFINE_FINI 0 | |
1167 | #define MIN_NEEDED_FROM 4 | |
1168 | #define MIN_NEEDED_TO 2 | |
1169 | #define FROM_DIRECTION 1 | |
1170 | #define FROM_LOOP internal_ucs2_loop | |
1171 | #define TO_LOOP internal_ucs2_loop /* This is not used. */ | |
1172 | #define FUNCTION_NAME __gconv_transform_internal_ucs2 | |
fd1b5c0f | 1173 | #define ONE_DIRECTION 1 |
8619129f UD |
1174 | |
1175 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1176 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1177 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1178 | #define BODY \ |
8619129f | 1179 | { \ |
17427edd | 1180 | uint32_t val = *((const uint32_t *) inptr); \ |
755104ed | 1181 | \ |
db2d05f9 | 1182 | if (__builtin_expect (val >= 0x10000, 0)) \ |
8619129f | 1183 | { \ |
601d2942 | 1184 | UNICODE_TAG_HANDLER (val, 4); \ |
e438a468 | 1185 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
8619129f | 1186 | } \ |
755104ed UD |
1187 | else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \ |
1188 | { \ | |
1189 | /* Surrogate characters in UCS-4 input are not valid. \ | |
1190 | We must catch this, because the UCS-2 output might be \ | |
1191 | interpreted as UTF-16 by other programs. If we let \ | |
1192 | surrogates pass through, attackers could make a security \ | |
1193 | hole exploit by synthesizing any desired plane 1-16 \ | |
1194 | character. */ \ | |
e438a468 | 1195 | result = __GCONV_ILLEGAL_INPUT; \ |
755104ed | 1196 | if (! ignore_errors_p ()) \ |
e438a468 | 1197 | break; \ |
755104ed UD |
1198 | inptr += 4; \ |
1199 | ++*irreversible; \ | |
1200 | continue; \ | |
1201 | } \ | |
9ea2c194 | 1202 | else \ |
755104ed UD |
1203 | { \ |
1204 | *((uint16_t *) outptr)++ = val; \ | |
1205 | inptr += 4; \ | |
1206 | } \ | |
8619129f | 1207 | } |
55985355 | 1208 | #define LOOP_NEED_FLAGS |
8619129f UD |
1209 | #include <iconv/loop.c> |
1210 | #include <iconv/skeleton.c> | |
9b26f5c4 UD |
1211 | |
1212 | ||
428bcea4 | 1213 | /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */ |
9b26f5c4 UD |
1214 | #define DEFINE_INIT 0 |
1215 | #define DEFINE_FINI 0 | |
1216 | #define MIN_NEEDED_FROM 2 | |
1217 | #define MIN_NEEDED_TO 4 | |
1218 | #define FROM_DIRECTION 1 | |
428bcea4 UD |
1219 | #define FROM_LOOP ucs2reverse_internal_loop |
1220 | #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/ | |
8d617a71 | 1221 | #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal |
fd1b5c0f | 1222 | #define ONE_DIRECTION 1 |
9b26f5c4 UD |
1223 | |
1224 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1225 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1226 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1227 | #define BODY \ |
755104ed | 1228 | { \ |
17427edd | 1229 | uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \ |
755104ed UD |
1230 | \ |
1231 | if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \ | |
1232 | { \ | |
1233 | /* Surrogate characters in UCS-2 input are not valid. Reject \ | |
1234 | them. (Catching this here is not security relevant.) */ \ | |
1235 | if (! ignore_errors_p ()) \ | |
1236 | { \ | |
1237 | result = __GCONV_ILLEGAL_INPUT; \ | |
1238 | break; \ | |
1239 | } \ | |
1240 | inptr += 2; \ | |
1241 | ++*irreversible; \ | |
1242 | continue; \ | |
1243 | } \ | |
1244 | \ | |
1245 | *((uint32_t *) outptr)++ = u1; \ | |
1246 | inptr += 2; \ | |
1247 | } | |
1248 | #define LOOP_NEED_FLAGS | |
9b26f5c4 UD |
1249 | #include <iconv/loop.c> |
1250 | #include <iconv/skeleton.c> | |
1251 | ||
1252 | ||
428bcea4 | 1253 | /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */ |
9b26f5c4 UD |
1254 | #define DEFINE_INIT 0 |
1255 | #define DEFINE_FINI 0 | |
1256 | #define MIN_NEEDED_FROM 4 | |
1257 | #define MIN_NEEDED_TO 2 | |
1258 | #define FROM_DIRECTION 1 | |
428bcea4 UD |
1259 | #define FROM_LOOP internal_ucs2reverse_loop |
1260 | #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/ | |
8d617a71 | 1261 | #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse |
fd1b5c0f | 1262 | #define ONE_DIRECTION 1 |
9b26f5c4 UD |
1263 | |
1264 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1265 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1266 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1267 | #define BODY \ |
9b26f5c4 | 1268 | { \ |
17427edd | 1269 | uint32_t val = *((const uint32_t *) inptr); \ |
db2d05f9 | 1270 | if (__builtin_expect (val >= 0x10000, 0)) \ |
9b26f5c4 | 1271 | { \ |
601d2942 | 1272 | UNICODE_TAG_HANDLER (val, 4); \ |
e438a468 | 1273 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
9b26f5c4 | 1274 | } \ |
755104ed UD |
1275 | else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \ |
1276 | { \ | |
1277 | /* Surrogate characters in UCS-4 input are not valid. \ | |
1278 | We must catch this, because the UCS-2 output might be \ | |
1279 | interpreted as UTF-16 by other programs. If we let \ | |
1280 | surrogates pass through, attackers could make a security \ | |
1281 | hole exploit by synthesizing any desired plane 1-16 \ | |
1282 | character. */ \ | |
1283 | if (! ignore_errors_p ()) \ | |
1284 | { \ | |
1285 | result = __GCONV_ILLEGAL_INPUT; \ | |
1286 | break; \ | |
1287 | } \ | |
1288 | inptr += 4; \ | |
1289 | ++*irreversible; \ | |
1290 | continue; \ | |
1291 | } \ | |
9ea2c194 | 1292 | else \ |
755104ed UD |
1293 | { \ |
1294 | *((uint16_t *) outptr)++ = bswap_16 (val); \ | |
1295 | inptr += 4; \ | |
1296 | } \ | |
9b26f5c4 | 1297 | } |
55985355 | 1298 | #define LOOP_NEED_FLAGS |
9b26f5c4 UD |
1299 | #include <iconv/loop.c> |
1300 | #include <iconv/skeleton.c> |