]>
Commit | Line | Data |
---|---|---|
6973fc01 | 1 | /* Simple transformations functions. |
9ea2c194 | 2 | Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. |
6973fc01 UD |
3 | This file is part of the GNU C Library. |
4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
6973fc01 UD |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 14 | Lesser General Public License for more details. |
6973fc01 | 15 | |
41bdb6e2 AJ |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, write to the Free | |
18 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
19 | 02111-1307 USA. */ | |
6973fc01 | 20 | |
f1fa8b68 | 21 | #include <byteswap.h> |
55985355 | 22 | #include <dlfcn.h> |
f1fa8b68 | 23 | #include <endian.h> |
f4017d20 | 24 | #include <errno.h> |
6973fc01 | 25 | #include <gconv.h> |
d2374599 | 26 | #include <stdint.h> |
6973fc01 UD |
27 | #include <stdlib.h> |
28 | #include <string.h> | |
29 | #include <wchar.h> | |
30 | #include <sys/param.h> | |
31 | ||
17427edd | 32 | #define BUILTIN_ALIAS(s1, s2) /* nothing */ |
6b98979f UD |
33 | #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, MinF, MaxF, \ |
34 | MinT, MaxT) \ | |
17427edd UD |
35 | extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \ |
36 | __const unsigned char **, __const unsigned char *, \ | |
37 | unsigned char **, size_t *, int, int); | |
38 | #include "gconv_builtin.h" | |
39 | ||
40 | ||
a904b5d9 UD |
41 | #ifndef EILSEQ |
42 | # define EILSEQ EINVAL | |
43 | #endif | |
44 | ||
45 | ||
f1fa8b68 UD |
46 | /* Transform from the internal, UCS4-like format, to UCS4. The |
47 | difference between the internal ucs4 format and the real UCS4 | |
48 | format is, if any, the endianess. The Unicode/ISO 10646 says that | |
49 | unless some higher protocol specifies it differently, the byte | |
50 | order is big endian.*/ | |
8619129f UD |
51 | #define DEFINE_INIT 0 |
52 | #define DEFINE_FINI 0 | |
53 | #define MIN_NEEDED_FROM 4 | |
54 | #define MIN_NEEDED_TO 4 | |
55 | #define FROM_DIRECTION 1 | |
56 | #define FROM_LOOP internal_ucs4_loop | |
57 | #define TO_LOOP internal_ucs4_loop /* This is not used. */ | |
58 | #define FUNCTION_NAME __gconv_transform_internal_ucs4 | |
59 | ||
60 | ||
61 | static inline int | |
55985355 UD |
62 | internal_ucs4_loop (struct __gconv_step *step, |
63 | struct __gconv_step_data *step_data, | |
64 | const unsigned char **inptrp, const unsigned char *inend, | |
8619129f | 65 | unsigned char **outptrp, unsigned char *outend, |
38677ace | 66 | size_t *irreversible) |
4bca4c17 | 67 | { |
8619129f UD |
68 | const unsigned char *inptr = *inptrp; |
69 | unsigned char *outptr = *outptrp; | |
70 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
4bca4c17 UD |
71 | int result; |
72 | ||
f1fa8b68 | 73 | #if __BYTE_ORDER == __LITTLE_ENDIAN |
8619129f UD |
74 | /* Sigh, we have to do some real work. */ |
75 | size_t cnt; | |
f1fa8b68 | 76 | |
fdf19bf7 | 77 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) |
17427edd | 78 | *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr); |
f1fa8b68 | 79 | |
8619129f UD |
80 | *inptrp = inptr; |
81 | *outptrp = outptr; | |
f1fa8b68 | 82 | #elif __BYTE_ORDER == __BIG_ENDIAN |
8619129f UD |
83 | /* Simply copy the data. */ |
84 | *inptrp = inptr + n_convert * 4; | |
85 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
f1fa8b68 UD |
86 | #else |
87 | # error "This endianess is not supported." | |
88 | #endif | |
89 | ||
8619129f | 90 | /* Determine the status. */ |
1336419e | 91 | if (*inptrp == inend) |
d64b6ad0 | 92 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 93 | else if (*outptrp + 4 > outend) |
1336419e | 94 | result = __GCONV_FULL_OUTPUT; |
6973fc01 | 95 | else |
d64b6ad0 | 96 | result = __GCONV_INCOMPLETE_INPUT; |
6973fc01 | 97 | |
f43ce637 | 98 | return result; |
6973fc01 | 99 | } |
d2374599 | 100 | |
c1db8b0d UD |
101 | #ifndef _STRING_ARCH_unaligned |
102 | static inline int | |
55985355 UD |
103 | internal_ucs4_loop_unaligned (struct __gconv_step *step, |
104 | struct __gconv_step_data *step_data, | |
105 | const unsigned char **inptrp, | |
c1db8b0d UD |
106 | const unsigned char *inend, |
107 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 108 | size_t *irreversible) |
c1db8b0d UD |
109 | { |
110 | const unsigned char *inptr = *inptrp; | |
111 | unsigned char *outptr = *outptrp; | |
112 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
113 | int result; | |
114 | ||
115 | # if __BYTE_ORDER == __LITTLE_ENDIAN | |
116 | /* Sigh, we have to do some real work. */ | |
117 | size_t cnt; | |
118 | ||
119 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) | |
120 | { | |
121 | outptr[0] = inptr[3]; | |
122 | outptr[1] = inptr[2]; | |
123 | outptr[2] = inptr[1]; | |
124 | outptr[3] = inptr[0]; | |
125 | } | |
126 | ||
127 | *inptrp = inptr; | |
128 | *outptrp = outptr; | |
129 | # elif __BYTE_ORDER == __BIG_ENDIAN | |
130 | /* Simply copy the data. */ | |
131 | *inptrp = inptr + n_convert * 4; | |
132 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
133 | # else | |
134 | # error "This endianess is not supported." | |
135 | # endif | |
136 | ||
137 | /* Determine the status. */ | |
eacde9d0 | 138 | if (*inptrp == inend) |
c1db8b0d | 139 | result = __GCONV_EMPTY_INPUT; |
eacde9d0 UD |
140 | else if (*outptrp + 4 > outend) |
141 | result = __GCONV_FULL_OUTPUT; | |
c1db8b0d UD |
142 | else |
143 | result = __GCONV_INCOMPLETE_INPUT; | |
144 | ||
145 | return result; | |
146 | } | |
147 | #endif | |
148 | ||
fd1b5c0f UD |
149 | |
150 | static inline int | |
55985355 UD |
151 | internal_ucs4_loop_single (struct __gconv_step *step, |
152 | struct __gconv_step_data *step_data, | |
153 | const unsigned char **inptrp, | |
fd1b5c0f UD |
154 | const unsigned char *inend, |
155 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 156 | size_t *irreversible) |
fd1b5c0f | 157 | { |
55985355 | 158 | mbstate_t *state = step_data->__statep; |
fd1b5c0f UD |
159 | size_t cnt = state->__count & 7; |
160 | ||
161 | while (*inptrp < inend && cnt < 4) | |
162 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
163 | ||
db2d05f9 | 164 | if (__builtin_expect (cnt < 4, 0)) |
fd1b5c0f UD |
165 | { |
166 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
167 | state->__count &= ~7; | |
168 | state->__count |= cnt; | |
169 | ||
170 | return __GCONV_INCOMPLETE_INPUT; | |
171 | } | |
172 | ||
173 | #if __BYTE_ORDER == __LITTLE_ENDIAN | |
174 | (*outptrp)[0] = state->__value.__wchb[3]; | |
175 | (*outptrp)[1] = state->__value.__wchb[2]; | |
176 | (*outptrp)[2] = state->__value.__wchb[1]; | |
177 | (*outptrp)[3] = state->__value.__wchb[0]; | |
85830c4c UD |
178 | |
179 | *outptrp += 4; | |
fd1b5c0f UD |
180 | #elif __BYTE_ORDER == __BIG_ENDIAN |
181 | /* XXX unaligned */ | |
182 | *(*((uint32_t **) outptrp)++) = state->__value.__wch; | |
183 | #else | |
184 | # error "This endianess is not supported." | |
185 | #endif | |
186 | ||
187 | /* Clear the state buffer. */ | |
188 | state->__count &= ~7; | |
189 | ||
190 | return __GCONV_OK; | |
191 | } | |
192 | ||
8619129f | 193 | #include <iconv/skeleton.c> |
d2374599 | 194 | |
d2374599 | 195 | |
4a069c33 UD |
196 | /* Transform from UCS4 to the internal, UCS4-like format. Unlike |
197 | for the other direction we have to check for correct values here. */ | |
198 | #define DEFINE_INIT 0 | |
199 | #define DEFINE_FINI 0 | |
200 | #define MIN_NEEDED_FROM 4 | |
201 | #define MIN_NEEDED_TO 4 | |
202 | #define FROM_DIRECTION 1 | |
203 | #define FROM_LOOP ucs4_internal_loop | |
204 | #define TO_LOOP ucs4_internal_loop /* This is not used. */ | |
205 | #define FUNCTION_NAME __gconv_transform_ucs4_internal | |
206 | ||
207 | ||
208 | static inline int | |
55985355 UD |
209 | ucs4_internal_loop (struct __gconv_step *step, |
210 | struct __gconv_step_data *step_data, | |
211 | const unsigned char **inptrp, const unsigned char *inend, | |
4a069c33 | 212 | unsigned char **outptrp, unsigned char *outend, |
38677ace | 213 | size_t *irreversible) |
4a069c33 | 214 | { |
55985355 | 215 | int flags = step_data->__flags; |
4a069c33 UD |
216 | const unsigned char *inptr = *inptrp; |
217 | unsigned char *outptr = *outptrp; | |
218 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
219 | int result; | |
220 | size_t cnt; | |
221 | ||
222 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) | |
223 | { | |
224 | uint32_t inval; | |
225 | ||
226 | #if __BYTE_ORDER == __LITTLE_ENDIAN | |
17427edd | 227 | inval = bswap_32 (*(const uint32_t *) inptr); |
4a069c33 | 228 | #else |
17427edd | 229 | inval = *(const uint32_t *) inptr; |
4a069c33 UD |
230 | #endif |
231 | ||
db2d05f9 | 232 | if (__builtin_expect (inval > 0x7fffffff, 0)) |
4a069c33 | 233 | { |
55985355 UD |
234 | /* The value is too large. We don't try transliteration here since |
235 | this is not an error because of the lack of possibilities to | |
236 | represent the result. This is a genuine bug in the input since | |
237 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
238 | if (irreversible == NULL) |
239 | /* We are transliterating, don't try to correct anything. */ | |
240 | return __GCONV_ILLEGAL_INPUT; | |
241 | ||
85830c4c UD |
242 | if (flags & __GCONV_IGNORE_ERRORS) |
243 | { | |
244 | /* Just ignore this character. */ | |
38677ace | 245 | ++*irreversible; |
85830c4c UD |
246 | continue; |
247 | } | |
248 | ||
4a069c33 UD |
249 | *inptrp = inptr; |
250 | *outptrp = outptr; | |
251 | return __GCONV_ILLEGAL_INPUT; | |
252 | } | |
253 | ||
3593973b | 254 | *((uint32_t *) outptr)++ = inval; |
4a069c33 UD |
255 | } |
256 | ||
257 | *inptrp = inptr; | |
258 | *outptrp = outptr; | |
259 | ||
260 | /* Determine the status. */ | |
fc08075d | 261 | if (*inptrp == inend) |
4a069c33 | 262 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 263 | else if (*outptrp + 4 > outend) |
fc08075d | 264 | result = __GCONV_FULL_OUTPUT; |
4a069c33 UD |
265 | else |
266 | result = __GCONV_INCOMPLETE_INPUT; | |
267 | ||
268 | return result; | |
269 | } | |
270 | ||
271 | #ifndef _STRING_ARCH_unaligned | |
272 | static inline int | |
55985355 UD |
273 | ucs4_internal_loop_unaligned (struct __gconv_step *step, |
274 | struct __gconv_step_data *step_data, | |
275 | const unsigned char **inptrp, | |
4a069c33 UD |
276 | const unsigned char *inend, |
277 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 278 | size_t *irreversible) |
4a069c33 | 279 | { |
55985355 | 280 | int flags = step_data->__flags; |
4a069c33 UD |
281 | const unsigned char *inptr = *inptrp; |
282 | unsigned char *outptr = *outptrp; | |
283 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
284 | int result; | |
285 | size_t cnt; | |
286 | ||
55985355 | 287 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) |
4a069c33 | 288 | { |
db2d05f9 | 289 | if (__builtin_expect (inptr[0] > 0x80, 0)) |
4a069c33 | 290 | { |
55985355 UD |
291 | /* The value is too large. We don't try transliteration here since |
292 | this is not an error because of the lack of possibilities to | |
293 | represent the result. This is a genuine bug in the input since | |
294 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
295 | if (irreversible == NULL) |
296 | /* We are transliterating, don't try to correct anything. */ | |
297 | return __GCONV_ILLEGAL_INPUT; | |
298 | ||
85830c4c UD |
299 | if (flags & __GCONV_IGNORE_ERRORS) |
300 | { | |
301 | /* Just ignore this character. */ | |
38677ace | 302 | ++*irreversible; |
85830c4c UD |
303 | continue; |
304 | } | |
305 | ||
4a069c33 UD |
306 | *inptrp = inptr; |
307 | *outptrp = outptr; | |
9ea2c194 | 308 | return __GCONV_ILLEGAL_INPUT; |
4a069c33 UD |
309 | } |
310 | ||
4a069c33 UD |
311 | # if __BYTE_ORDER == __LITTLE_ENDIAN |
312 | outptr[3] = inptr[0]; | |
313 | outptr[2] = inptr[1]; | |
314 | outptr[1] = inptr[2]; | |
315 | outptr[0] = inptr[3]; | |
316 | # else | |
317 | outptr[0] = inptr[0]; | |
318 | outptr[1] = inptr[1]; | |
319 | outptr[2] = inptr[2]; | |
320 | outptr[3] = inptr[3]; | |
321 | # endif | |
55985355 | 322 | outptr += 4; |
4a069c33 UD |
323 | } |
324 | ||
325 | *inptrp = inptr; | |
326 | *outptrp = outptr; | |
327 | ||
328 | /* Determine the status. */ | |
fc08075d | 329 | if (*inptrp == inend) |
4a069c33 | 330 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 331 | else if (*outptrp + 4 > outend) |
fc08075d | 332 | result = __GCONV_FULL_OUTPUT; |
4a069c33 UD |
333 | else |
334 | result = __GCONV_INCOMPLETE_INPUT; | |
335 | ||
336 | return result; | |
337 | } | |
338 | #endif | |
339 | ||
340 | ||
341 | static inline int | |
55985355 UD |
342 | ucs4_internal_loop_single (struct __gconv_step *step, |
343 | struct __gconv_step_data *step_data, | |
344 | const unsigned char **inptrp, | |
4a069c33 UD |
345 | const unsigned char *inend, |
346 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 347 | size_t *irreversible) |
4a069c33 | 348 | { |
55985355 UD |
349 | mbstate_t *state = step_data->__statep; |
350 | int flags = step_data->__flags; | |
4a069c33 UD |
351 | size_t cnt = state->__count & 7; |
352 | ||
353 | while (*inptrp < inend && cnt < 4) | |
354 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
355 | ||
db2d05f9 | 356 | if (__builtin_expect (cnt < 4, 0)) |
4a069c33 UD |
357 | { |
358 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
359 | state->__count &= ~7; | |
360 | state->__count |= cnt; | |
361 | ||
362 | return __GCONV_INCOMPLETE_INPUT; | |
363 | } | |
364 | ||
db2d05f9 UD |
365 | if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80, |
366 | 0)) | |
85830c4c | 367 | { |
55985355 UD |
368 | /* The value is too large. We don't try transliteration here since |
369 | this is not an error because of the lack of possibilities to | |
370 | represent the result. This is a genuine bug in the input since | |
371 | UCS4 does not allow such values. */ | |
85830c4c | 372 | if (!(flags & __GCONV_IGNORE_ERRORS)) |
3593973b UD |
373 | { |
374 | *inptrp -= cnt - (state->__count & 7); | |
375 | return __GCONV_ILLEGAL_INPUT; | |
376 | } | |
85830c4c UD |
377 | } |
378 | else | |
379 | { | |
4a069c33 | 380 | #if __BYTE_ORDER == __LITTLE_ENDIAN |
85830c4c UD |
381 | (*outptrp)[0] = state->__value.__wchb[3]; |
382 | (*outptrp)[1] = state->__value.__wchb[2]; | |
383 | (*outptrp)[2] = state->__value.__wchb[1]; | |
384 | (*outptrp)[3] = state->__value.__wchb[0]; | |
4a069c33 | 385 | #elif __BYTE_ORDER == __BIG_ENDIAN |
85830c4c UD |
386 | (*outptrp)[0] = state->__value.__wchb[0]; |
387 | (*outptrp)[1] = state->__value.__wchb[1]; | |
388 | (*outptrp)[2] = state->__value.__wchb[2]; | |
389 | (*outptrp)[3] = state->__value.__wchb[3]; | |
4a069c33 UD |
390 | #endif |
391 | ||
85830c4c UD |
392 | *outptrp += 4; |
393 | } | |
394 | ||
4a069c33 UD |
395 | /* Clear the state buffer. */ |
396 | state->__count &= ~7; | |
397 | ||
398 | return __GCONV_OK; | |
399 | } | |
400 | ||
401 | #include <iconv/skeleton.c> | |
402 | ||
403 | ||
404 | /* Similarly for the little endian form. */ | |
8d617a71 UD |
405 | #define DEFINE_INIT 0 |
406 | #define DEFINE_FINI 0 | |
407 | #define MIN_NEEDED_FROM 4 | |
408 | #define MIN_NEEDED_TO 4 | |
409 | #define FROM_DIRECTION 1 | |
410 | #define FROM_LOOP internal_ucs4le_loop | |
411 | #define TO_LOOP internal_ucs4le_loop /* This is not used. */ | |
412 | #define FUNCTION_NAME __gconv_transform_internal_ucs4le | |
413 | ||
414 | ||
415 | static inline int | |
55985355 UD |
416 | internal_ucs4le_loop (struct __gconv_step *step, |
417 | struct __gconv_step_data *step_data, | |
418 | const unsigned char **inptrp, const unsigned char *inend, | |
8d617a71 | 419 | unsigned char **outptrp, unsigned char *outend, |
38677ace | 420 | size_t *irreversible) |
8d617a71 UD |
421 | { |
422 | const unsigned char *inptr = *inptrp; | |
423 | unsigned char *outptr = *outptrp; | |
424 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
425 | int result; | |
426 | ||
427 | #if __BYTE_ORDER == __BIG_ENDIAN | |
428 | /* Sigh, we have to do some real work. */ | |
429 | size_t cnt; | |
430 | ||
431 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) | |
17427edd | 432 | *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr); |
8d617a71 UD |
433 | |
434 | *inptrp = inptr; | |
435 | *outptrp = outptr; | |
436 | #elif __BYTE_ORDER == __LITTLE_ENDIAN | |
437 | /* Simply copy the data. */ | |
438 | *inptrp = inptr + n_convert * 4; | |
439 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
440 | #else | |
441 | # error "This endianess is not supported." | |
442 | #endif | |
443 | ||
444 | /* Determine the status. */ | |
fc08075d | 445 | if (*inptrp == inend) |
8d617a71 | 446 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 447 | else if (*outptrp + 4 > outend) |
fc08075d | 448 | result = __GCONV_FULL_OUTPUT; |
8d617a71 UD |
449 | else |
450 | result = __GCONV_INCOMPLETE_INPUT; | |
451 | ||
8d617a71 UD |
452 | return result; |
453 | } | |
454 | ||
c1db8b0d UD |
455 | #ifndef _STRING_ARCH_unaligned |
456 | static inline int | |
55985355 UD |
457 | internal_ucs4le_loop_unaligned (struct __gconv_step *step, |
458 | struct __gconv_step_data *step_data, | |
459 | const unsigned char **inptrp, | |
c1db8b0d UD |
460 | const unsigned char *inend, |
461 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 462 | size_t *irreversible) |
c1db8b0d UD |
463 | { |
464 | const unsigned char *inptr = *inptrp; | |
465 | unsigned char *outptr = *outptrp; | |
466 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
467 | int result; | |
468 | ||
469 | # if __BYTE_ORDER == __BIG_ENDIAN | |
470 | /* Sigh, we have to do some real work. */ | |
471 | size_t cnt; | |
472 | ||
3593973b | 473 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) |
c1db8b0d UD |
474 | { |
475 | outptr[0] = inptr[3]; | |
476 | outptr[1] = inptr[2]; | |
477 | outptr[2] = inptr[1]; | |
478 | outptr[3] = inptr[0]; | |
479 | } | |
480 | ||
481 | *inptrp = inptr; | |
482 | *outptrp = outptr; | |
483 | # elif __BYTE_ORDER == __LITTLE_ENDIAN | |
484 | /* Simply copy the data. */ | |
485 | *inptrp = inptr + n_convert * 4; | |
486 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
487 | # else | |
488 | # error "This endianess is not supported." | |
489 | # endif | |
490 | ||
491 | /* Determine the status. */ | |
c4f66413 | 492 | if (*inptrp + 4 > inend) |
c1db8b0d | 493 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 494 | else if (*outptrp + 4 > outend) |
fc08075d | 495 | result = __GCONV_FULL_OUTPUT; |
c1db8b0d UD |
496 | else |
497 | result = __GCONV_INCOMPLETE_INPUT; | |
498 | ||
499 | return result; | |
500 | } | |
501 | #endif | |
502 | ||
fd1b5c0f UD |
503 | |
504 | static inline int | |
55985355 UD |
505 | internal_ucs4le_loop_single (struct __gconv_step *step, |
506 | struct __gconv_step_data *step_data, | |
507 | const unsigned char **inptrp, | |
fd1b5c0f UD |
508 | const unsigned char *inend, |
509 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 510 | size_t *irreversible) |
fd1b5c0f | 511 | { |
55985355 | 512 | mbstate_t *state = step_data->__statep; |
fd1b5c0f UD |
513 | size_t cnt = state->__count & 7; |
514 | ||
515 | while (*inptrp < inend && cnt < 4) | |
516 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
517 | ||
db2d05f9 | 518 | if (__builtin_expect (cnt < 4, 0)) |
fd1b5c0f UD |
519 | { |
520 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
521 | state->__count &= ~7; | |
522 | state->__count |= cnt; | |
523 | ||
524 | return __GCONV_INCOMPLETE_INPUT; | |
525 | } | |
526 | ||
527 | #if __BYTE_ORDER == __BIG_ENDIAN | |
528 | (*outptrp)[0] = state->__value.__wchb[3]; | |
529 | (*outptrp)[1] = state->__value.__wchb[2]; | |
530 | (*outptrp)[2] = state->__value.__wchb[1]; | |
531 | (*outptrp)[3] = state->__value.__wchb[0]; | |
85830c4c UD |
532 | |
533 | *outptrp += 4; | |
fd1b5c0f UD |
534 | #else |
535 | /* XXX unaligned */ | |
536 | *(*((uint32_t **) outptrp)++) = state->__value.__wch; | |
537 | #endif | |
538 | ||
539 | /* Clear the state buffer. */ | |
540 | state->__count &= ~7; | |
541 | ||
542 | return __GCONV_OK; | |
543 | } | |
544 | ||
8d617a71 UD |
545 | #include <iconv/skeleton.c> |
546 | ||
547 | ||
4a069c33 UD |
548 | /* And finally from UCS4-LE to the internal encoding. */ |
549 | #define DEFINE_INIT 0 | |
550 | #define DEFINE_FINI 0 | |
551 | #define MIN_NEEDED_FROM 4 | |
552 | #define MIN_NEEDED_TO 4 | |
553 | #define FROM_DIRECTION 1 | |
554 | #define FROM_LOOP ucs4le_internal_loop | |
555 | #define TO_LOOP ucs4le_internal_loop /* This is not used. */ | |
556 | #define FUNCTION_NAME __gconv_transform_ucs4le_internal | |
557 | ||
558 | ||
559 | static inline int | |
55985355 UD |
560 | ucs4le_internal_loop (struct __gconv_step *step, |
561 | struct __gconv_step_data *step_data, | |
562 | const unsigned char **inptrp, const unsigned char *inend, | |
4a069c33 | 563 | unsigned char **outptrp, unsigned char *outend, |
38677ace | 564 | size_t *irreversible) |
4a069c33 | 565 | { |
55985355 | 566 | int flags = step_data->__flags; |
4a069c33 UD |
567 | const unsigned char *inptr = *inptrp; |
568 | unsigned char *outptr = *outptrp; | |
569 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
570 | int result; | |
571 | size_t cnt; | |
572 | ||
573 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) | |
574 | { | |
575 | uint32_t inval; | |
576 | ||
577 | #if __BYTE_ORDER == __BIG_ENDIAN | |
17427edd | 578 | inval = bswap_32 (*(const uint32_t *) inptr); |
4a069c33 | 579 | #else |
17427edd | 580 | inval = *(const uint32_t *) inptr; |
4a069c33 UD |
581 | #endif |
582 | ||
db2d05f9 | 583 | if (__builtin_expect (inval > 0x7fffffff, 0)) |
85830c4c | 584 | { |
55985355 UD |
585 | /* The value is too large. We don't try transliteration here since |
586 | this is not an error because of the lack of possibilities to | |
587 | represent the result. This is a genuine bug in the input since | |
588 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
589 | if (irreversible == NULL) |
590 | /* We are transliterating, don't try to correct anything. */ | |
591 | return __GCONV_ILLEGAL_INPUT; | |
592 | ||
85830c4c UD |
593 | if (flags & __GCONV_IGNORE_ERRORS) |
594 | { | |
595 | /* Just ignore this character. */ | |
38677ace | 596 | ++*irreversible; |
85830c4c UD |
597 | continue; |
598 | } | |
599 | ||
600 | return __GCONV_ILLEGAL_INPUT; | |
601 | } | |
4a069c33 | 602 | |
3593973b | 603 | *((uint32_t *) outptr)++ = inval; |
4a069c33 UD |
604 | } |
605 | ||
606 | *inptrp = inptr; | |
607 | *outptrp = outptr; | |
608 | ||
609 | /* Determine the status. */ | |
fc08075d | 610 | if (*inptrp == inend) |
4a069c33 | 611 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 612 | else if (*outptrp + 4 > outend) |
fc08075d | 613 | result = __GCONV_FULL_OUTPUT; |
4a069c33 UD |
614 | else |
615 | result = __GCONV_INCOMPLETE_INPUT; | |
616 | ||
617 | return result; | |
618 | } | |
619 | ||
620 | #ifndef _STRING_ARCH_unaligned | |
621 | static inline int | |
55985355 UD |
622 | ucs4le_internal_loop_unaligned (struct __gconv_step *step, |
623 | struct __gconv_step_data *step_data, | |
624 | const unsigned char **inptrp, | |
4a069c33 UD |
625 | const unsigned char *inend, |
626 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 627 | size_t *irreversible) |
4a069c33 | 628 | { |
55985355 | 629 | int flags = step_data->__flags; |
4a069c33 UD |
630 | const unsigned char *inptr = *inptrp; |
631 | unsigned char *outptr = *outptrp; | |
632 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
633 | int result; | |
634 | size_t cnt; | |
635 | ||
636 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) | |
637 | { | |
db2d05f9 | 638 | if (__builtin_expect (inptr[3] > 0x80, 0)) |
4a069c33 | 639 | { |
55985355 UD |
640 | /* The value is too large. We don't try transliteration here since |
641 | this is not an error because of the lack of possibilities to | |
642 | represent the result. This is a genuine bug in the input since | |
643 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
644 | if (irreversible == NULL) |
645 | /* We are transliterating, don't try to correct anything. */ | |
646 | return __GCONV_ILLEGAL_INPUT; | |
647 | ||
85830c4c UD |
648 | if (flags & __GCONV_IGNORE_ERRORS) |
649 | { | |
650 | /* Just ignore this character. */ | |
38677ace | 651 | ++*irreversible; |
85830c4c UD |
652 | continue; |
653 | } | |
654 | ||
4a069c33 UD |
655 | *inptrp = inptr; |
656 | *outptrp = outptr; | |
9ea2c194 | 657 | return __GCONV_ILLEGAL_INPUT; |
4a069c33 UD |
658 | } |
659 | ||
4a069c33 UD |
660 | # if __BYTE_ORDER == __BIG_ENDIAN |
661 | outptr[3] = inptr[0]; | |
662 | outptr[2] = inptr[1]; | |
663 | outptr[1] = inptr[2]; | |
664 | outptr[0] = inptr[3]; | |
665 | # else | |
666 | outptr[0] = inptr[0]; | |
667 | outptr[1] = inptr[1]; | |
668 | outptr[2] = inptr[2]; | |
669 | outptr[3] = inptr[3]; | |
670 | # endif | |
85830c4c UD |
671 | |
672 | outptr += 4; | |
4a069c33 UD |
673 | } |
674 | ||
675 | *inptrp = inptr; | |
676 | *outptrp = outptr; | |
677 | ||
678 | /* Determine the status. */ | |
fc08075d | 679 | if (*inptrp == inend) |
4a069c33 | 680 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 681 | else if (*outptrp + 4 > outend) |
fc08075d | 682 | result = __GCONV_FULL_OUTPUT; |
4a069c33 UD |
683 | else |
684 | result = __GCONV_INCOMPLETE_INPUT; | |
685 | ||
686 | return result; | |
687 | } | |
688 | #endif | |
689 | ||
690 | ||
691 | static inline int | |
55985355 UD |
692 | ucs4le_internal_loop_single (struct __gconv_step *step, |
693 | struct __gconv_step_data *step_data, | |
694 | const unsigned char **inptrp, | |
4a069c33 UD |
695 | const unsigned char *inend, |
696 | unsigned char **outptrp, unsigned char *outend, | |
38677ace | 697 | size_t *irreversible) |
4a069c33 | 698 | { |
55985355 UD |
699 | mbstate_t *state = step_data->__statep; |
700 | int flags = step_data->__flags; | |
4a069c33 UD |
701 | size_t cnt = state->__count & 7; |
702 | ||
703 | while (*inptrp < inend && cnt < 4) | |
704 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
705 | ||
db2d05f9 | 706 | if (__builtin_expect (cnt < 4, 0)) |
4a069c33 UD |
707 | { |
708 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
709 | state->__count &= ~7; | |
710 | state->__count |= cnt; | |
711 | ||
712 | return __GCONV_INCOMPLETE_INPUT; | |
713 | } | |
714 | ||
db2d05f9 UD |
715 | if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80, |
716 | 0)) | |
85830c4c | 717 | { |
55985355 UD |
718 | /* The value is too large. We don't try transliteration here since |
719 | this is not an error because of the lack of possibilities to | |
720 | represent the result. This is a genuine bug in the input since | |
721 | UCS4 does not allow such values. */ | |
85830c4c UD |
722 | if (!(flags & __GCONV_IGNORE_ERRORS)) |
723 | return __GCONV_ILLEGAL_INPUT; | |
724 | } | |
725 | else | |
726 | { | |
4a069c33 | 727 | #if __BYTE_ORDER == __BIG_ENDIAN |
85830c4c UD |
728 | (*outptrp)[0] = state->__value.__wchb[3]; |
729 | (*outptrp)[1] = state->__value.__wchb[2]; | |
730 | (*outptrp)[2] = state->__value.__wchb[1]; | |
731 | (*outptrp)[3] = state->__value.__wchb[0]; | |
9ea2c194 | 732 | #else |
85830c4c UD |
733 | (*outptrp)[0] = state->__value.__wchb[0]; |
734 | (*outptrp)[1] = state->__value.__wchb[1]; | |
735 | (*outptrp)[2] = state->__value.__wchb[2]; | |
736 | (*outptrp)[3] = state->__value.__wchb[3]; | |
4a069c33 UD |
737 | #endif |
738 | ||
85830c4c UD |
739 | *outptrp += 4; |
740 | } | |
741 | ||
4a069c33 UD |
742 | /* Clear the state buffer. */ |
743 | state->__count &= ~7; | |
744 | ||
745 | return __GCONV_OK; | |
746 | } | |
747 | ||
748 | #include <iconv/skeleton.c> | |
749 | ||
750 | ||
8619129f UD |
751 | /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ |
752 | #define DEFINE_INIT 0 | |
753 | #define DEFINE_FINI 0 | |
754 | #define MIN_NEEDED_FROM 1 | |
755 | #define MIN_NEEDED_TO 4 | |
756 | #define FROM_DIRECTION 1 | |
757 | #define FROM_LOOP ascii_internal_loop | |
758 | #define TO_LOOP ascii_internal_loop /* This is not used. */ | |
759 | #define FUNCTION_NAME __gconv_transform_ascii_internal | |
fd1b5c0f | 760 | #define ONE_DIRECTION 1 |
8619129f UD |
761 | |
762 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
763 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
764 | #define LOOPFCT FROM_LOOP | |
765 | #define BODY \ | |
766 | { \ | |
db2d05f9 | 767 | if (__builtin_expect (*inptr > '\x7f', 0)) \ |
8619129f | 768 | { \ |
55985355 UD |
769 | /* The value is too large. We don't try transliteration here since \ |
770 | this is not an error because of the lack of possibilities to \ | |
771 | represent the result. This is a genuine bug in the input since \ | |
772 | ASCII does not allow such values. */ \ | |
e438a468 | 773 | STANDARD_FROM_LOOP_ERR_HANDLER (1); \ |
85830c4c UD |
774 | } \ |
775 | else \ | |
776 | /* It's an one byte sequence. */ \ | |
85830c4c | 777 | *((uint32_t *) outptr)++ = *inptr++; \ |
8619129f | 778 | } |
55985355 | 779 | #define LOOP_NEED_FLAGS |
8619129f UD |
780 | #include <iconv/loop.c> |
781 | #include <iconv/skeleton.c> | |
782 | ||
783 | ||
784 | /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */ | |
785 | #define DEFINE_INIT 0 | |
786 | #define DEFINE_FINI 0 | |
787 | #define MIN_NEEDED_FROM 4 | |
788 | #define MIN_NEEDED_TO 1 | |
789 | #define FROM_DIRECTION 1 | |
790 | #define FROM_LOOP internal_ascii_loop | |
791 | #define TO_LOOP internal_ascii_loop /* This is not used. */ | |
792 | #define FUNCTION_NAME __gconv_transform_internal_ascii | |
fd1b5c0f | 793 | #define ONE_DIRECTION 1 |
8619129f UD |
794 | |
795 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
796 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
797 | #define LOOPFCT FROM_LOOP | |
798 | #define BODY \ | |
799 | { \ | |
db2d05f9 | 800 | if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \ |
8619129f | 801 | { \ |
601d2942 | 802 | UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \ |
e438a468 | 803 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
85830c4c UD |
804 | } \ |
805 | else \ | |
806 | /* It's an one byte sequence. */ \ | |
17427edd | 807 | *outptr++ = *((const uint32_t *) inptr)++; \ |
8619129f | 808 | } |
55985355 | 809 | #define LOOP_NEED_FLAGS |
8619129f UD |
810 | #include <iconv/loop.c> |
811 | #include <iconv/skeleton.c> | |
812 | ||
813 | ||
814 | /* Convert from the internal (UCS4-like) format to UTF-8. */ | |
815 | #define DEFINE_INIT 0 | |
816 | #define DEFINE_FINI 0 | |
817 | #define MIN_NEEDED_FROM 4 | |
818 | #define MIN_NEEDED_TO 1 | |
819 | #define MAX_NEEDED_TO 6 | |
820 | #define FROM_DIRECTION 1 | |
821 | #define FROM_LOOP internal_utf8_loop | |
822 | #define TO_LOOP internal_utf8_loop /* This is not used. */ | |
823 | #define FUNCTION_NAME __gconv_transform_internal_utf8 | |
fd1b5c0f | 824 | #define ONE_DIRECTION 1 |
8619129f UD |
825 | |
826 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
827 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
5aa8ff62 | 828 | #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO |
8619129f UD |
829 | #define LOOPFCT FROM_LOOP |
830 | #define BODY \ | |
831 | { \ | |
17427edd | 832 | uint32_t wc = *((const uint32_t *) inptr); \ |
8619129f | 833 | \ |
8619129f UD |
834 | if (wc < 0x80) \ |
835 | /* It's an one byte sequence. */ \ | |
836 | *outptr++ = (unsigned char) wc; \ | |
db2d05f9 | 837 | else if (__builtin_expect (wc <= 0x7fffffff, 1)) \ |
8619129f UD |
838 | { \ |
839 | size_t step; \ | |
840 | char *start; \ | |
841 | \ | |
842 | for (step = 2; step < 6; ++step) \ | |
b79f74cd | 843 | if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \ |
8619129f UD |
844 | break; \ |
845 | \ | |
365afefc | 846 | if (__builtin_expect (outptr + step > outend, 0)) \ |
8619129f UD |
847 | { \ |
848 | /* Too long. */ \ | |
d64b6ad0 | 849 | result = __GCONV_FULL_OUTPUT; \ |
8619129f UD |
850 | break; \ |
851 | } \ | |
852 | \ | |
853 | start = outptr; \ | |
b79f74cd | 854 | *outptr = (unsigned char) (~0xff >> step); \ |
8619129f UD |
855 | outptr += step; \ |
856 | --step; \ | |
857 | do \ | |
858 | { \ | |
859 | start[step] = 0x80 | (wc & 0x3f); \ | |
860 | wc >>= 6; \ | |
861 | } \ | |
862 | while (--step > 0); \ | |
863 | start[0] |= wc; \ | |
db2d05f9 UD |
864 | } \ |
865 | else \ | |
866 | { \ | |
e438a468 | 867 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
8619129f UD |
868 | } \ |
869 | \ | |
870 | inptr += 4; \ | |
871 | } | |
db2d05f9 | 872 | #define LOOP_NEED_FLAGS |
8619129f UD |
873 | #include <iconv/loop.c> |
874 | #include <iconv/skeleton.c> | |
875 | ||
876 | ||
877 | /* Convert from UTF-8 to the internal (UCS4-like) format. */ | |
878 | #define DEFINE_INIT 0 | |
879 | #define DEFINE_FINI 0 | |
880 | #define MIN_NEEDED_FROM 1 | |
881 | #define MAX_NEEDED_FROM 6 | |
882 | #define MIN_NEEDED_TO 4 | |
883 | #define FROM_DIRECTION 1 | |
884 | #define FROM_LOOP utf8_internal_loop | |
885 | #define TO_LOOP utf8_internal_loop /* This is not used. */ | |
886 | #define FUNCTION_NAME __gconv_transform_utf8_internal | |
fd1b5c0f | 887 | #define ONE_DIRECTION 1 |
8619129f UD |
888 | |
889 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
5aa8ff62 | 890 | #define MAX_NEEDED_INPUT MAX_NEEDED_FROM |
8619129f UD |
891 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
892 | #define LOOPFCT FROM_LOOP | |
893 | #define BODY \ | |
894 | { \ | |
895 | uint32_t ch; \ | |
896 | uint_fast32_t cnt; \ | |
897 | uint_fast32_t i; \ | |
898 | \ | |
899 | /* Next input byte. */ \ | |
900 | ch = *inptr; \ | |
901 | \ | |
902 | if (ch < 0x80) \ | |
8619129f | 903 | { \ |
5aa8ff62 UD |
904 | /* One byte sequence. */ \ |
905 | cnt = 1; \ | |
906 | ++inptr; \ | |
8619129f UD |
907 | } \ |
908 | else \ | |
909 | { \ | |
9ea2c194 | 910 | if (ch >= 0xc2 && ch < 0xe0) \ |
5aa8ff62 | 911 | { \ |
9ea2c194 AJ |
912 | /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \ |
913 | otherwise the wide character could have been represented \ | |
914 | using a single byte. */ \ | |
5aa8ff62 UD |
915 | cnt = 2; \ |
916 | ch &= 0x1f; \ | |
917 | } \ | |
db2d05f9 | 918 | else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ |
5aa8ff62 UD |
919 | { \ |
920 | /* We expect three bytes. */ \ | |
921 | cnt = 3; \ | |
922 | ch &= 0x0f; \ | |
923 | } \ | |
db2d05f9 | 924 | else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ |
5aa8ff62 UD |
925 | { \ |
926 | /* We expect four bytes. */ \ | |
927 | cnt = 4; \ | |
928 | ch &= 0x07; \ | |
929 | } \ | |
db2d05f9 | 930 | else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \ |
5aa8ff62 UD |
931 | { \ |
932 | /* We expect five bytes. */ \ | |
933 | cnt = 5; \ | |
934 | ch &= 0x03; \ | |
935 | } \ | |
db2d05f9 | 936 | else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \ |
5aa8ff62 UD |
937 | { \ |
938 | /* We expect six bytes. */ \ | |
939 | cnt = 6; \ | |
940 | ch &= 0x01; \ | |
941 | } \ | |
942 | else \ | |
8619129f | 943 | { \ |
85830c4c UD |
944 | int skipped; \ |
945 | \ | |
85830c4c UD |
946 | /* Search the end of this ill-formed UTF-8 character. This \ |
947 | is the next byte with (x & 0xc0) != 0x80. */ \ | |
e438a468 UD |
948 | skipped = 0; \ |
949 | do \ | |
950 | ++skipped; \ | |
951 | while (inptr + skipped < inend \ | |
952 | && (*(inptr + skipped) & 0xc0) == 0x80 \ | |
953 | && skipped < 5); \ | |
85830c4c | 954 | \ |
e438a468 | 955 | STANDARD_FROM_LOOP_ERR_HANDLER (skipped); \ |
8619129f UD |
956 | } \ |
957 | \ | |
0cdb4983 | 958 | if (__builtin_expect (inptr + cnt > inend, 0)) \ |
5aa8ff62 | 959 | { \ |
fd1b5c0f UD |
960 | /* We don't have enough input. But before we report that check \ |
961 | that all the bytes are correct. */ \ | |
962 | for (i = 1; inptr + i < inend; ++i) \ | |
963 | if ((inptr[i] & 0xc0) != 0x80) \ | |
964 | break; \ | |
85830c4c | 965 | \ |
365afefc | 966 | if (__builtin_expect (inptr + i == inend, 1)) \ |
85830c4c UD |
967 | { \ |
968 | result = __GCONV_INCOMPLETE_INPUT; \ | |
969 | break; \ | |
970 | } \ | |
971 | \ | |
e438a468 | 972 | STANDARD_FROM_LOOP_ERR_HANDLER (i); \ |
5aa8ff62 UD |
973 | } \ |
974 | \ | |
975 | /* Read the possible remaining bytes. */ \ | |
976 | for (i = 1; i < cnt; ++i) \ | |
977 | { \ | |
978 | uint32_t byte = inptr[i]; \ | |
979 | \ | |
980 | if ((byte & 0xc0) != 0x80) \ | |
bd32e4a6 UD |
981 | /* This is an illegal encoding. */ \ |
982 | break; \ | |
5aa8ff62 UD |
983 | \ |
984 | ch <<= 6; \ | |
985 | ch |= byte & 0x3f; \ | |
986 | } \ | |
85830c4c | 987 | \ |
bd32e4a6 UD |
988 | /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ |
989 | If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ | |
990 | have been represented with fewer than cnt bytes. */ \ | |
85830c4c | 991 | if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \ |
bd32e4a6 UD |
992 | { \ |
993 | /* This is an illegal encoding. */ \ | |
e438a468 | 994 | STANDARD_FROM_LOOP_ERR_HANDLER (i); \ |
bd32e4a6 UD |
995 | } \ |
996 | \ | |
5aa8ff62 | 997 | inptr += cnt; \ |
8619129f UD |
998 | } \ |
999 | \ | |
1000 | /* Now adjust the pointers and store the result. */ \ | |
8619129f UD |
1001 | *((uint32_t *) outptr)++ = ch; \ |
1002 | } | |
55985355 | 1003 | #define LOOP_NEED_FLAGS |
fd1b5c0f UD |
1004 | |
1005 | #define STORE_REST \ | |
1006 | { \ | |
1007 | /* We store the remaining bytes while converting them into the UCS4 \ | |
1008 | format. We can assume that the first byte in the buffer is \ | |
1009 | correct and that it requires a larger number of bytes than there \ | |
1010 | are in the input buffer. */ \ | |
1011 | wint_t ch = **inptrp; \ | |
1012 | size_t cnt; \ | |
1013 | \ | |
1014 | state->__count = inend - *inptrp; \ | |
1015 | \ | |
1016 | if (ch >= 0xc2 && ch < 0xe0) \ | |
1017 | { \ | |
1018 | /* We expect two bytes. The first byte cannot be 0xc0 or \ | |
1019 | 0xc1, otherwise the wide character could have been \ | |
1020 | represented using a single byte. */ \ | |
1021 | cnt = 2; \ | |
1022 | ch &= 0x1f; \ | |
1023 | } \ | |
db2d05f9 | 1024 | else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ |
fd1b5c0f UD |
1025 | { \ |
1026 | /* We expect three bytes. */ \ | |
1027 | cnt = 3; \ | |
1028 | ch &= 0x0f; \ | |
1029 | } \ | |
db2d05f9 | 1030 | else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ |
fd1b5c0f UD |
1031 | { \ |
1032 | /* We expect four bytes. */ \ | |
1033 | cnt = 4; \ | |
1034 | ch &= 0x07; \ | |
1035 | } \ | |
db2d05f9 | 1036 | else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \ |
fd1b5c0f UD |
1037 | { \ |
1038 | /* We expect five bytes. */ \ | |
1039 | cnt = 5; \ | |
1040 | ch &= 0x03; \ | |
1041 | } \ | |
1042 | else \ | |
1043 | { \ | |
1044 | /* We expect six bytes. */ \ | |
1045 | cnt = 6; \ | |
1046 | ch &= 0x01; \ | |
1047 | } \ | |
1048 | \ | |
1049 | /* The first byte is already consumed. */ \ | |
1050 | --cnt; \ | |
1051 | while (++(*inptrp) < inend) \ | |
1052 | { \ | |
1053 | ch <<= 6; \ | |
1054 | ch |= **inptrp & 0x3f; \ | |
1055 | --cnt; \ | |
1056 | } \ | |
1057 | \ | |
1058 | /* Shift for the so far missing bytes. */ \ | |
1059 | ch <<= cnt * 6; \ | |
1060 | \ | |
1061 | /* Store the value. */ \ | |
1062 | state->__value.__wch = ch; \ | |
1063 | } | |
1064 | ||
1065 | #define UNPACK_BYTES \ | |
1066 | { \ | |
1067 | wint_t wch = state->__value.__wch; \ | |
cd201e38 | 1068 | size_t ntotal; \ |
fd1b5c0f UD |
1069 | inlen = state->__count; \ |
1070 | \ | |
1071 | if (state->__value.__wch <= 0x7ff) \ | |
cd201e38 UD |
1072 | { \ |
1073 | bytebuf[0] = 0xc0; \ | |
1074 | ntotal = 2; \ | |
1075 | } \ | |
db2d05f9 | 1076 | else if (__builtin_expect (state->__value.__wch <= 0xffff, 1)) \ |
cd201e38 UD |
1077 | { \ |
1078 | bytebuf[0] = 0xe0; \ | |
1079 | ntotal = 3; \ | |
1080 | } \ | |
db2d05f9 | 1081 | else if (__builtin_expect (state->__value.__wch < 0x1fffff, 1)) \ |
cd201e38 UD |
1082 | { \ |
1083 | bytebuf[0] = 0xf0; \ | |
1084 | ntotal = 4; \ | |
1085 | } \ | |
db2d05f9 | 1086 | else if (__builtin_expect (state->__value.__wch < 0x3ffffff, 1)) \ |
cd201e38 UD |
1087 | { \ |
1088 | bytebuf[0] = 0xf8; \ | |
1089 | ntotal = 5; \ | |
1090 | } \ | |
fd1b5c0f | 1091 | else \ |
cd201e38 UD |
1092 | { \ |
1093 | bytebuf[0] = 0xfc; \ | |
1094 | ntotal = 6; \ | |
1095 | } \ | |
fd1b5c0f | 1096 | \ |
cd201e38 UD |
1097 | do \ |
1098 | { \ | |
1099 | if (--ntotal < inlen) \ | |
1100 | bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ | |
1101 | wch >>= 6; \ | |
1102 | } \ | |
1103 | while (ntotal > 1); \ | |
fd1b5c0f UD |
1104 | \ |
1105 | bytebuf[0] |= wch; \ | |
1106 | } | |
1107 | ||
8619129f UD |
1108 | #include <iconv/loop.c> |
1109 | #include <iconv/skeleton.c> | |
1110 | ||
1111 | ||
1112 | /* Convert from UCS2 to the internal (UCS4-like) format. */ | |
1113 | #define DEFINE_INIT 0 | |
1114 | #define DEFINE_FINI 0 | |
1115 | #define MIN_NEEDED_FROM 2 | |
1116 | #define MIN_NEEDED_TO 4 | |
1117 | #define FROM_DIRECTION 1 | |
1118 | #define FROM_LOOP ucs2_internal_loop | |
1119 | #define TO_LOOP ucs2_internal_loop /* This is not used. */ | |
1120 | #define FUNCTION_NAME __gconv_transform_ucs2_internal | |
fd1b5c0f | 1121 | #define ONE_DIRECTION 1 |
8619129f UD |
1122 | |
1123 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1124 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1125 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1126 | #define BODY \ |
755104ed | 1127 | { \ |
17427edd | 1128 | uint16_t u1 = *((const uint16_t *) inptr); \ |
755104ed UD |
1129 | \ |
1130 | if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \ | |
1131 | { \ | |
1132 | /* Surrogate characters in UCS-2 input are not valid. Reject \ | |
1133 | them. (Catching this here is not security relevant.) */ \ | |
e438a468 | 1134 | STANDARD_FROM_LOOP_ERR_HANDLER (2); \ |
755104ed UD |
1135 | } \ |
1136 | \ | |
1137 | *((uint32_t *) outptr)++ = u1; \ | |
1138 | inptr += 2; \ | |
1139 | } | |
1140 | #define LOOP_NEED_FLAGS | |
8619129f UD |
1141 | #include <iconv/loop.c> |
1142 | #include <iconv/skeleton.c> | |
1143 | ||
1144 | ||
1145 | /* Convert from the internal (UCS4-like) format to UCS2. */ | |
1146 | #define DEFINE_INIT 0 | |
1147 | #define DEFINE_FINI 0 | |
1148 | #define MIN_NEEDED_FROM 4 | |
1149 | #define MIN_NEEDED_TO 2 | |
1150 | #define FROM_DIRECTION 1 | |
1151 | #define FROM_LOOP internal_ucs2_loop | |
1152 | #define TO_LOOP internal_ucs2_loop /* This is not used. */ | |
1153 | #define FUNCTION_NAME __gconv_transform_internal_ucs2 | |
fd1b5c0f | 1154 | #define ONE_DIRECTION 1 |
8619129f UD |
1155 | |
1156 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1157 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1158 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1159 | #define BODY \ |
8619129f | 1160 | { \ |
17427edd | 1161 | uint32_t val = *((const uint32_t *) inptr); \ |
755104ed | 1162 | \ |
db2d05f9 | 1163 | if (__builtin_expect (val >= 0x10000, 0)) \ |
8619129f | 1164 | { \ |
601d2942 | 1165 | UNICODE_TAG_HANDLER (val, 4); \ |
e438a468 | 1166 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
8619129f | 1167 | } \ |
755104ed UD |
1168 | else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \ |
1169 | { \ | |
1170 | /* Surrogate characters in UCS-4 input are not valid. \ | |
1171 | We must catch this, because the UCS-2 output might be \ | |
1172 | interpreted as UTF-16 by other programs. If we let \ | |
1173 | surrogates pass through, attackers could make a security \ | |
1174 | hole exploit by synthesizing any desired plane 1-16 \ | |
1175 | character. */ \ | |
e438a468 | 1176 | result = __GCONV_ILLEGAL_INPUT; \ |
755104ed | 1177 | if (! ignore_errors_p ()) \ |
e438a468 | 1178 | break; \ |
755104ed UD |
1179 | inptr += 4; \ |
1180 | ++*irreversible; \ | |
1181 | continue; \ | |
1182 | } \ | |
9ea2c194 | 1183 | else \ |
755104ed UD |
1184 | { \ |
1185 | *((uint16_t *) outptr)++ = val; \ | |
1186 | inptr += 4; \ | |
1187 | } \ | |
8619129f | 1188 | } |
55985355 | 1189 | #define LOOP_NEED_FLAGS |
8619129f UD |
1190 | #include <iconv/loop.c> |
1191 | #include <iconv/skeleton.c> | |
9b26f5c4 UD |
1192 | |
1193 | ||
428bcea4 | 1194 | /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */ |
9b26f5c4 UD |
1195 | #define DEFINE_INIT 0 |
1196 | #define DEFINE_FINI 0 | |
1197 | #define MIN_NEEDED_FROM 2 | |
1198 | #define MIN_NEEDED_TO 4 | |
1199 | #define FROM_DIRECTION 1 | |
428bcea4 UD |
1200 | #define FROM_LOOP ucs2reverse_internal_loop |
1201 | #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/ | |
8d617a71 | 1202 | #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal |
fd1b5c0f | 1203 | #define ONE_DIRECTION 1 |
9b26f5c4 UD |
1204 | |
1205 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1206 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1207 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1208 | #define BODY \ |
755104ed | 1209 | { \ |
17427edd | 1210 | uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \ |
755104ed UD |
1211 | \ |
1212 | if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \ | |
1213 | { \ | |
1214 | /* Surrogate characters in UCS-2 input are not valid. Reject \ | |
1215 | them. (Catching this here is not security relevant.) */ \ | |
1216 | if (! ignore_errors_p ()) \ | |
1217 | { \ | |
1218 | result = __GCONV_ILLEGAL_INPUT; \ | |
1219 | break; \ | |
1220 | } \ | |
1221 | inptr += 2; \ | |
1222 | ++*irreversible; \ | |
1223 | continue; \ | |
1224 | } \ | |
1225 | \ | |
1226 | *((uint32_t *) outptr)++ = u1; \ | |
1227 | inptr += 2; \ | |
1228 | } | |
1229 | #define LOOP_NEED_FLAGS | |
9b26f5c4 UD |
1230 | #include <iconv/loop.c> |
1231 | #include <iconv/skeleton.c> | |
1232 | ||
1233 | ||
428bcea4 | 1234 | /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */ |
9b26f5c4 UD |
1235 | #define DEFINE_INIT 0 |
1236 | #define DEFINE_FINI 0 | |
1237 | #define MIN_NEEDED_FROM 4 | |
1238 | #define MIN_NEEDED_TO 2 | |
1239 | #define FROM_DIRECTION 1 | |
428bcea4 UD |
1240 | #define FROM_LOOP internal_ucs2reverse_loop |
1241 | #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/ | |
8d617a71 | 1242 | #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse |
fd1b5c0f | 1243 | #define ONE_DIRECTION 1 |
9b26f5c4 UD |
1244 | |
1245 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1246 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1247 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1248 | #define BODY \ |
9b26f5c4 | 1249 | { \ |
17427edd | 1250 | uint32_t val = *((const uint32_t *) inptr); \ |
db2d05f9 | 1251 | if (__builtin_expect (val >= 0x10000, 0)) \ |
9b26f5c4 | 1252 | { \ |
601d2942 | 1253 | UNICODE_TAG_HANDLER (val, 4); \ |
e438a468 | 1254 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
9b26f5c4 | 1255 | } \ |
755104ed UD |
1256 | else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \ |
1257 | { \ | |
1258 | /* Surrogate characters in UCS-4 input are not valid. \ | |
1259 | We must catch this, because the UCS-2 output might be \ | |
1260 | interpreted as UTF-16 by other programs. If we let \ | |
1261 | surrogates pass through, attackers could make a security \ | |
1262 | hole exploit by synthesizing any desired plane 1-16 \ | |
1263 | character. */ \ | |
1264 | if (! ignore_errors_p ()) \ | |
1265 | { \ | |
1266 | result = __GCONV_ILLEGAL_INPUT; \ | |
1267 | break; \ | |
1268 | } \ | |
1269 | inptr += 4; \ | |
1270 | ++*irreversible; \ | |
1271 | continue; \ | |
1272 | } \ | |
9ea2c194 | 1273 | else \ |
755104ed UD |
1274 | { \ |
1275 | *((uint16_t *) outptr)++ = bswap_16 (val); \ | |
1276 | inptr += 4; \ | |
1277 | } \ | |
9b26f5c4 | 1278 | } |
55985355 | 1279 | #define LOOP_NEED_FLAGS |
9b26f5c4 UD |
1280 | #include <iconv/loop.c> |
1281 | #include <iconv/skeleton.c> |