]>
Commit | Line | Data |
---|---|---|
6973fc01 | 1 | /* Simple transformations functions. |
2b778ceb | 2 | Copyright (C) 1997-2021 Free Software Foundation, Inc. |
6973fc01 UD |
3 | This file is part of the GNU C Library. |
4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
6973fc01 UD |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 14 | Lesser General Public License for more details. |
6973fc01 | 15 | |
41bdb6e2 | 16 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 | 17 | License along with the GNU C Library; if not, see |
5a82c748 | 18 | <https://www.gnu.org/licenses/>. */ |
6973fc01 | 19 | |
f1fa8b68 | 20 | #include <byteswap.h> |
55985355 | 21 | #include <dlfcn.h> |
f1fa8b68 | 22 | #include <endian.h> |
f4017d20 | 23 | #include <errno.h> |
6973fc01 | 24 | #include <gconv.h> |
d2374599 | 25 | #include <stdint.h> |
6973fc01 UD |
26 | #include <stdlib.h> |
27 | #include <string.h> | |
28 | #include <wchar.h> | |
29 | #include <sys/param.h> | |
f9ad060c | 30 | #include <gconv_int.h> |
6973fc01 | 31 | |
17427edd | 32 | #define BUILTIN_ALIAS(s1, s2) /* nothing */ |
f9ad060c UD |
33 | #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ |
34 | MinF, MaxF, MinT, MaxT) \ | |
17427edd | 35 | extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \ |
a784e502 | 36 | const unsigned char **, const unsigned char *, \ |
17427edd UD |
37 | unsigned char **, size_t *, int, int); |
38 | #include "gconv_builtin.h" | |
39 | ||
40 | ||
a904b5d9 UD |
41 | #ifndef EILSEQ |
42 | # define EILSEQ EINVAL | |
43 | #endif | |
44 | ||
45 | ||
f9ad060c UD |
46 | /* Specialized conversion function for a single byte to INTERNAL, recognizing |
47 | only ASCII characters. */ | |
48 | wint_t | |
49 | __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c) | |
50 | { | |
51 | if (c < 0x80) | |
52 | return c; | |
53 | else | |
54 | return WEOF; | |
55 | } | |
56 | ||
57 | ||
f1fa8b68 UD |
58 | /* Transform from the internal, UCS4-like format, to UCS4. The |
59 | difference between the internal ucs4 format and the real UCS4 | |
60 | format is, if any, the endianess. The Unicode/ISO 10646 says that | |
61 | unless some higher protocol specifies it differently, the byte | |
62 | order is big endian.*/ | |
8619129f UD |
63 | #define DEFINE_INIT 0 |
64 | #define DEFINE_FINI 0 | |
65 | #define MIN_NEEDED_FROM 4 | |
66 | #define MIN_NEEDED_TO 4 | |
67 | #define FROM_DIRECTION 1 | |
68 | #define FROM_LOOP internal_ucs4_loop | |
69 | #define TO_LOOP internal_ucs4_loop /* This is not used. */ | |
70 | #define FUNCTION_NAME __gconv_transform_internal_ucs4 | |
0cdddc25 | 71 | #define ONE_DIRECTION 0 |
8619129f UD |
72 | |
73 | ||
74 | static inline int | |
dd9423a6 | 75 | __attribute ((always_inline)) |
55985355 UD |
76 | internal_ucs4_loop (struct __gconv_step *step, |
77 | struct __gconv_step_data *step_data, | |
78 | const unsigned char **inptrp, const unsigned char *inend, | |
4802be92 | 79 | unsigned char **outptrp, const unsigned char *outend, |
38677ace | 80 | size_t *irreversible) |
4bca4c17 | 81 | { |
8619129f UD |
82 | const unsigned char *inptr = *inptrp; |
83 | unsigned char *outptr = *outptrp; | |
84 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
4bca4c17 UD |
85 | int result; |
86 | ||
f1fa8b68 | 87 | #if __BYTE_ORDER == __LITTLE_ENDIAN |
8619129f UD |
88 | /* Sigh, we have to do some real work. */ |
89 | size_t cnt; | |
cdda3d7d | 90 | uint32_t *outptr32 = (uint32_t *) outptr; |
f1fa8b68 | 91 | |
fdf19bf7 | 92 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) |
cdda3d7d | 93 | *outptr32++ = bswap_32 (*(const uint32_t *) inptr); |
f1fa8b68 | 94 | |
8619129f | 95 | *inptrp = inptr; |
cd5b5023 | 96 | *outptrp = (unsigned char *) outptr32; |
f1fa8b68 | 97 | #elif __BYTE_ORDER == __BIG_ENDIAN |
8619129f UD |
98 | /* Simply copy the data. */ |
99 | *inptrp = inptr + n_convert * 4; | |
100 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
f1fa8b68 UD |
101 | #else |
102 | # error "This endianess is not supported." | |
103 | #endif | |
104 | ||
8619129f | 105 | /* Determine the status. */ |
1336419e | 106 | if (*inptrp == inend) |
d64b6ad0 | 107 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 108 | else if (*outptrp + 4 > outend) |
1336419e | 109 | result = __GCONV_FULL_OUTPUT; |
6973fc01 | 110 | else |
d64b6ad0 | 111 | result = __GCONV_INCOMPLETE_INPUT; |
6973fc01 | 112 | |
f43ce637 | 113 | return result; |
6973fc01 | 114 | } |
d2374599 | 115 | |
27822ce6 | 116 | #if !_STRING_ARCH_unaligned |
c1db8b0d | 117 | static inline int |
dd9423a6 | 118 | __attribute ((always_inline)) |
55985355 UD |
119 | internal_ucs4_loop_unaligned (struct __gconv_step *step, |
120 | struct __gconv_step_data *step_data, | |
121 | const unsigned char **inptrp, | |
c1db8b0d | 122 | const unsigned char *inend, |
4802be92 AS |
123 | unsigned char **outptrp, |
124 | const unsigned char *outend, | |
38677ace | 125 | size_t *irreversible) |
c1db8b0d UD |
126 | { |
127 | const unsigned char *inptr = *inptrp; | |
128 | unsigned char *outptr = *outptrp; | |
129 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
130 | int result; | |
131 | ||
132 | # if __BYTE_ORDER == __LITTLE_ENDIAN | |
133 | /* Sigh, we have to do some real work. */ | |
134 | size_t cnt; | |
135 | ||
136 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) | |
137 | { | |
138 | outptr[0] = inptr[3]; | |
139 | outptr[1] = inptr[2]; | |
140 | outptr[2] = inptr[1]; | |
141 | outptr[3] = inptr[0]; | |
142 | } | |
143 | ||
144 | *inptrp = inptr; | |
145 | *outptrp = outptr; | |
146 | # elif __BYTE_ORDER == __BIG_ENDIAN | |
147 | /* Simply copy the data. */ | |
148 | *inptrp = inptr + n_convert * 4; | |
149 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
150 | # else | |
151 | # error "This endianess is not supported." | |
152 | # endif | |
153 | ||
154 | /* Determine the status. */ | |
eacde9d0 | 155 | if (*inptrp == inend) |
c1db8b0d | 156 | result = __GCONV_EMPTY_INPUT; |
eacde9d0 UD |
157 | else if (*outptrp + 4 > outend) |
158 | result = __GCONV_FULL_OUTPUT; | |
c1db8b0d UD |
159 | else |
160 | result = __GCONV_INCOMPLETE_INPUT; | |
161 | ||
162 | return result; | |
163 | } | |
164 | #endif | |
165 | ||
fd1b5c0f UD |
166 | |
167 | static inline int | |
dd9423a6 | 168 | __attribute ((always_inline)) |
55985355 UD |
169 | internal_ucs4_loop_single (struct __gconv_step *step, |
170 | struct __gconv_step_data *step_data, | |
171 | const unsigned char **inptrp, | |
fd1b5c0f | 172 | const unsigned char *inend, |
4802be92 AS |
173 | unsigned char **outptrp, |
174 | const unsigned char *outend, | |
38677ace | 175 | size_t *irreversible) |
fd1b5c0f | 176 | { |
55985355 | 177 | mbstate_t *state = step_data->__statep; |
fd1b5c0f UD |
178 | size_t cnt = state->__count & 7; |
179 | ||
180 | while (*inptrp < inend && cnt < 4) | |
181 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
182 | ||
a1ffb40e | 183 | if (__glibc_unlikely (cnt < 4)) |
fd1b5c0f UD |
184 | { |
185 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
186 | state->__count &= ~7; | |
187 | state->__count |= cnt; | |
188 | ||
189 | return __GCONV_INCOMPLETE_INPUT; | |
190 | } | |
191 | ||
192 | #if __BYTE_ORDER == __LITTLE_ENDIAN | |
193 | (*outptrp)[0] = state->__value.__wchb[3]; | |
194 | (*outptrp)[1] = state->__value.__wchb[2]; | |
195 | (*outptrp)[2] = state->__value.__wchb[1]; | |
196 | (*outptrp)[3] = state->__value.__wchb[0]; | |
85830c4c | 197 | |
fd1b5c0f UD |
198 | #elif __BYTE_ORDER == __BIG_ENDIAN |
199 | /* XXX unaligned */ | |
cdda3d7d AJ |
200 | (*outptrp)[0] = state->__value.__wchb[0]; |
201 | (*outptrp)[1] = state->__value.__wchb[1]; | |
202 | (*outptrp)[2] = state->__value.__wchb[2]; | |
203 | (*outptrp)[3] = state->__value.__wchb[3]; | |
fd1b5c0f UD |
204 | #else |
205 | # error "This endianess is not supported." | |
206 | #endif | |
cdda3d7d | 207 | *outptrp += 4; |
fd1b5c0f UD |
208 | |
209 | /* Clear the state buffer. */ | |
210 | state->__count &= ~7; | |
211 | ||
212 | return __GCONV_OK; | |
213 | } | |
214 | ||
8619129f | 215 | #include <iconv/skeleton.c> |
d2374599 | 216 | |
d2374599 | 217 | |
4a069c33 UD |
218 | /* Transform from UCS4 to the internal, UCS4-like format. Unlike |
219 | for the other direction we have to check for correct values here. */ | |
220 | #define DEFINE_INIT 0 | |
221 | #define DEFINE_FINI 0 | |
222 | #define MIN_NEEDED_FROM 4 | |
223 | #define MIN_NEEDED_TO 4 | |
224 | #define FROM_DIRECTION 1 | |
225 | #define FROM_LOOP ucs4_internal_loop | |
226 | #define TO_LOOP ucs4_internal_loop /* This is not used. */ | |
227 | #define FUNCTION_NAME __gconv_transform_ucs4_internal | |
0cdddc25 | 228 | #define ONE_DIRECTION 0 |
4a069c33 UD |
229 | |
230 | ||
231 | static inline int | |
dd9423a6 | 232 | __attribute ((always_inline)) |
55985355 UD |
233 | ucs4_internal_loop (struct __gconv_step *step, |
234 | struct __gconv_step_data *step_data, | |
235 | const unsigned char **inptrp, const unsigned char *inend, | |
4802be92 | 236 | unsigned char **outptrp, const unsigned char *outend, |
38677ace | 237 | size_t *irreversible) |
4a069c33 | 238 | { |
55985355 | 239 | int flags = step_data->__flags; |
4a069c33 UD |
240 | const unsigned char *inptr = *inptrp; |
241 | unsigned char *outptr = *outptrp; | |
4a069c33 | 242 | int result; |
4a069c33 | 243 | |
228edd35 | 244 | for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) |
4a069c33 UD |
245 | { |
246 | uint32_t inval; | |
247 | ||
248 | #if __BYTE_ORDER == __LITTLE_ENDIAN | |
17427edd | 249 | inval = bswap_32 (*(const uint32_t *) inptr); |
4a069c33 | 250 | #else |
17427edd | 251 | inval = *(const uint32_t *) inptr; |
4a069c33 UD |
252 | #endif |
253 | ||
a1ffb40e | 254 | if (__glibc_unlikely (inval > 0x7fffffff)) |
4a069c33 | 255 | { |
55985355 UD |
256 | /* The value is too large. We don't try transliteration here since |
257 | this is not an error because of the lack of possibilities to | |
258 | represent the result. This is a genuine bug in the input since | |
259 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
260 | if (irreversible == NULL) |
261 | /* We are transliterating, don't try to correct anything. */ | |
262 | return __GCONV_ILLEGAL_INPUT; | |
263 | ||
85830c4c UD |
264 | if (flags & __GCONV_IGNORE_ERRORS) |
265 | { | |
266 | /* Just ignore this character. */ | |
38677ace | 267 | ++*irreversible; |
85830c4c UD |
268 | continue; |
269 | } | |
270 | ||
4a069c33 UD |
271 | *inptrp = inptr; |
272 | *outptrp = outptr; | |
273 | return __GCONV_ILLEGAL_INPUT; | |
274 | } | |
275 | ||
cdda3d7d AJ |
276 | *((uint32_t *) outptr) = inval; |
277 | outptr += sizeof (uint32_t); | |
4a069c33 UD |
278 | } |
279 | ||
280 | *inptrp = inptr; | |
281 | *outptrp = outptr; | |
282 | ||
283 | /* Determine the status. */ | |
fc08075d | 284 | if (*inptrp == inend) |
4a069c33 | 285 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 286 | else if (*outptrp + 4 > outend) |
fc08075d | 287 | result = __GCONV_FULL_OUTPUT; |
4a069c33 UD |
288 | else |
289 | result = __GCONV_INCOMPLETE_INPUT; | |
290 | ||
291 | return result; | |
292 | } | |
293 | ||
27822ce6 | 294 | #if !_STRING_ARCH_unaligned |
4a069c33 | 295 | static inline int |
dd9423a6 | 296 | __attribute ((always_inline)) |
55985355 UD |
297 | ucs4_internal_loop_unaligned (struct __gconv_step *step, |
298 | struct __gconv_step_data *step_data, | |
299 | const unsigned char **inptrp, | |
4a069c33 | 300 | const unsigned char *inend, |
4802be92 AS |
301 | unsigned char **outptrp, |
302 | const unsigned char *outend, | |
38677ace | 303 | size_t *irreversible) |
4a069c33 | 304 | { |
55985355 | 305 | int flags = step_data->__flags; |
4a069c33 UD |
306 | const unsigned char *inptr = *inptrp; |
307 | unsigned char *outptr = *outptrp; | |
4a069c33 | 308 | int result; |
4a069c33 | 309 | |
228edd35 | 310 | for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) |
4a069c33 | 311 | { |
a1ffb40e | 312 | if (__glibc_unlikely (inptr[0] > 0x80)) |
4a069c33 | 313 | { |
55985355 UD |
314 | /* The value is too large. We don't try transliteration here since |
315 | this is not an error because of the lack of possibilities to | |
316 | represent the result. This is a genuine bug in the input since | |
317 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
318 | if (irreversible == NULL) |
319 | /* We are transliterating, don't try to correct anything. */ | |
320 | return __GCONV_ILLEGAL_INPUT; | |
321 | ||
85830c4c UD |
322 | if (flags & __GCONV_IGNORE_ERRORS) |
323 | { | |
324 | /* Just ignore this character. */ | |
38677ace | 325 | ++*irreversible; |
85830c4c UD |
326 | continue; |
327 | } | |
328 | ||
4a069c33 UD |
329 | *inptrp = inptr; |
330 | *outptrp = outptr; | |
9ea2c194 | 331 | return __GCONV_ILLEGAL_INPUT; |
4a069c33 UD |
332 | } |
333 | ||
4a069c33 UD |
334 | # if __BYTE_ORDER == __LITTLE_ENDIAN |
335 | outptr[3] = inptr[0]; | |
336 | outptr[2] = inptr[1]; | |
337 | outptr[1] = inptr[2]; | |
338 | outptr[0] = inptr[3]; | |
339 | # else | |
340 | outptr[0] = inptr[0]; | |
341 | outptr[1] = inptr[1]; | |
342 | outptr[2] = inptr[2]; | |
343 | outptr[3] = inptr[3]; | |
344 | # endif | |
55985355 | 345 | outptr += 4; |
4a069c33 UD |
346 | } |
347 | ||
348 | *inptrp = inptr; | |
349 | *outptrp = outptr; | |
350 | ||
351 | /* Determine the status. */ | |
fc08075d | 352 | if (*inptrp == inend) |
4a069c33 | 353 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 354 | else if (*outptrp + 4 > outend) |
fc08075d | 355 | result = __GCONV_FULL_OUTPUT; |
4a069c33 UD |
356 | else |
357 | result = __GCONV_INCOMPLETE_INPUT; | |
358 | ||
359 | return result; | |
360 | } | |
361 | #endif | |
362 | ||
363 | ||
364 | static inline int | |
dd9423a6 | 365 | __attribute ((always_inline)) |
55985355 UD |
366 | ucs4_internal_loop_single (struct __gconv_step *step, |
367 | struct __gconv_step_data *step_data, | |
368 | const unsigned char **inptrp, | |
4a069c33 | 369 | const unsigned char *inend, |
4802be92 AS |
370 | unsigned char **outptrp, |
371 | const unsigned char *outend, | |
38677ace | 372 | size_t *irreversible) |
4a069c33 | 373 | { |
55985355 UD |
374 | mbstate_t *state = step_data->__statep; |
375 | int flags = step_data->__flags; | |
4a069c33 UD |
376 | size_t cnt = state->__count & 7; |
377 | ||
378 | while (*inptrp < inend && cnt < 4) | |
379 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
380 | ||
a1ffb40e | 381 | if (__glibc_unlikely (cnt < 4)) |
4a069c33 UD |
382 | { |
383 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
384 | state->__count &= ~7; | |
385 | state->__count |= cnt; | |
386 | ||
387 | return __GCONV_INCOMPLETE_INPUT; | |
388 | } | |
389 | ||
db2d05f9 UD |
390 | if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80, |
391 | 0)) | |
85830c4c | 392 | { |
55985355 UD |
393 | /* The value is too large. We don't try transliteration here since |
394 | this is not an error because of the lack of possibilities to | |
395 | represent the result. This is a genuine bug in the input since | |
396 | UCS4 does not allow such values. */ | |
85830c4c | 397 | if (!(flags & __GCONV_IGNORE_ERRORS)) |
3593973b UD |
398 | { |
399 | *inptrp -= cnt - (state->__count & 7); | |
400 | return __GCONV_ILLEGAL_INPUT; | |
401 | } | |
85830c4c UD |
402 | } |
403 | else | |
404 | { | |
4a069c33 | 405 | #if __BYTE_ORDER == __LITTLE_ENDIAN |
85830c4c UD |
406 | (*outptrp)[0] = state->__value.__wchb[3]; |
407 | (*outptrp)[1] = state->__value.__wchb[2]; | |
408 | (*outptrp)[2] = state->__value.__wchb[1]; | |
409 | (*outptrp)[3] = state->__value.__wchb[0]; | |
4a069c33 | 410 | #elif __BYTE_ORDER == __BIG_ENDIAN |
85830c4c UD |
411 | (*outptrp)[0] = state->__value.__wchb[0]; |
412 | (*outptrp)[1] = state->__value.__wchb[1]; | |
413 | (*outptrp)[2] = state->__value.__wchb[2]; | |
414 | (*outptrp)[3] = state->__value.__wchb[3]; | |
4a069c33 UD |
415 | #endif |
416 | ||
85830c4c UD |
417 | *outptrp += 4; |
418 | } | |
419 | ||
4a069c33 UD |
420 | /* Clear the state buffer. */ |
421 | state->__count &= ~7; | |
422 | ||
423 | return __GCONV_OK; | |
424 | } | |
425 | ||
426 | #include <iconv/skeleton.c> | |
427 | ||
428 | ||
429 | /* Similarly for the little endian form. */ | |
8d617a71 UD |
430 | #define DEFINE_INIT 0 |
431 | #define DEFINE_FINI 0 | |
432 | #define MIN_NEEDED_FROM 4 | |
433 | #define MIN_NEEDED_TO 4 | |
434 | #define FROM_DIRECTION 1 | |
435 | #define FROM_LOOP internal_ucs4le_loop | |
436 | #define TO_LOOP internal_ucs4le_loop /* This is not used. */ | |
437 | #define FUNCTION_NAME __gconv_transform_internal_ucs4le | |
0cdddc25 | 438 | #define ONE_DIRECTION 0 |
8d617a71 UD |
439 | |
440 | ||
441 | static inline int | |
dd9423a6 | 442 | __attribute ((always_inline)) |
55985355 UD |
443 | internal_ucs4le_loop (struct __gconv_step *step, |
444 | struct __gconv_step_data *step_data, | |
445 | const unsigned char **inptrp, const unsigned char *inend, | |
4802be92 | 446 | unsigned char **outptrp, const unsigned char *outend, |
38677ace | 447 | size_t *irreversible) |
8d617a71 UD |
448 | { |
449 | const unsigned char *inptr = *inptrp; | |
450 | unsigned char *outptr = *outptrp; | |
451 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
452 | int result; | |
453 | ||
454 | #if __BYTE_ORDER == __BIG_ENDIAN | |
455 | /* Sigh, we have to do some real work. */ | |
456 | size_t cnt; | |
cd5b5023 | 457 | uint32_t *outptr32 = (uint32_t *) outptr; |
8d617a71 UD |
458 | |
459 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) | |
cd5b5023 AJ |
460 | *outptr32++ = bswap_32 (*(const uint32_t *) inptr); |
461 | outptr = (unsigned char *) outptr32; | |
8d617a71 UD |
462 | |
463 | *inptrp = inptr; | |
464 | *outptrp = outptr; | |
465 | #elif __BYTE_ORDER == __LITTLE_ENDIAN | |
466 | /* Simply copy the data. */ | |
467 | *inptrp = inptr + n_convert * 4; | |
468 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
469 | #else | |
470 | # error "This endianess is not supported." | |
471 | #endif | |
472 | ||
473 | /* Determine the status. */ | |
fc08075d | 474 | if (*inptrp == inend) |
8d617a71 | 475 | result = __GCONV_EMPTY_INPUT; |
c4f66413 | 476 | else if (*outptrp + 4 > outend) |
fc08075d | 477 | result = __GCONV_FULL_OUTPUT; |
8d617a71 UD |
478 | else |
479 | result = __GCONV_INCOMPLETE_INPUT; | |
480 | ||
8d617a71 UD |
481 | return result; |
482 | } | |
483 | ||
27822ce6 | 484 | #if !_STRING_ARCH_unaligned |
c1db8b0d | 485 | static inline int |
dd9423a6 | 486 | __attribute ((always_inline)) |
55985355 UD |
487 | internal_ucs4le_loop_unaligned (struct __gconv_step *step, |
488 | struct __gconv_step_data *step_data, | |
489 | const unsigned char **inptrp, | |
c1db8b0d | 490 | const unsigned char *inend, |
4802be92 AS |
491 | unsigned char **outptrp, |
492 | const unsigned char *outend, | |
38677ace | 493 | size_t *irreversible) |
c1db8b0d UD |
494 | { |
495 | const unsigned char *inptr = *inptrp; | |
496 | unsigned char *outptr = *outptrp; | |
497 | size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; | |
498 | int result; | |
499 | ||
500 | # if __BYTE_ORDER == __BIG_ENDIAN | |
501 | /* Sigh, we have to do some real work. */ | |
502 | size_t cnt; | |
503 | ||
3593973b | 504 | for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) |
c1db8b0d UD |
505 | { |
506 | outptr[0] = inptr[3]; | |
507 | outptr[1] = inptr[2]; | |
508 | outptr[2] = inptr[1]; | |
509 | outptr[3] = inptr[0]; | |
510 | } | |
511 | ||
512 | *inptrp = inptr; | |
513 | *outptrp = outptr; | |
514 | # elif __BYTE_ORDER == __LITTLE_ENDIAN | |
515 | /* Simply copy the data. */ | |
516 | *inptrp = inptr + n_convert * 4; | |
517 | *outptrp = __mempcpy (outptr, inptr, n_convert * 4); | |
518 | # else | |
519 | # error "This endianess is not supported." | |
520 | # endif | |
521 | ||
522 | /* Determine the status. */ | |
eb9dc2a2 | 523 | if (*inptrp == inend) |
c1db8b0d | 524 | result = __GCONV_EMPTY_INPUT; |
eb9dc2a2 | 525 | else if (*inptrp + 4 > inend) |
c1db8b0d | 526 | result = __GCONV_INCOMPLETE_INPUT; |
eb9dc2a2 UD |
527 | else |
528 | { | |
529 | assert (*outptrp + 4 > outend); | |
530 | result = __GCONV_FULL_OUTPUT; | |
531 | } | |
c1db8b0d UD |
532 | |
533 | return result; | |
534 | } | |
535 | #endif | |
536 | ||
fd1b5c0f UD |
537 | |
538 | static inline int | |
dd9423a6 | 539 | __attribute ((always_inline)) |
55985355 UD |
540 | internal_ucs4le_loop_single (struct __gconv_step *step, |
541 | struct __gconv_step_data *step_data, | |
542 | const unsigned char **inptrp, | |
fd1b5c0f | 543 | const unsigned char *inend, |
4802be92 AS |
544 | unsigned char **outptrp, |
545 | const unsigned char *outend, | |
38677ace | 546 | size_t *irreversible) |
fd1b5c0f | 547 | { |
55985355 | 548 | mbstate_t *state = step_data->__statep; |
fd1b5c0f UD |
549 | size_t cnt = state->__count & 7; |
550 | ||
551 | while (*inptrp < inend && cnt < 4) | |
552 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
553 | ||
a1ffb40e | 554 | if (__glibc_unlikely (cnt < 4)) |
fd1b5c0f UD |
555 | { |
556 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
557 | state->__count &= ~7; | |
558 | state->__count |= cnt; | |
559 | ||
560 | return __GCONV_INCOMPLETE_INPUT; | |
561 | } | |
562 | ||
563 | #if __BYTE_ORDER == __BIG_ENDIAN | |
564 | (*outptrp)[0] = state->__value.__wchb[3]; | |
565 | (*outptrp)[1] = state->__value.__wchb[2]; | |
566 | (*outptrp)[2] = state->__value.__wchb[1]; | |
567 | (*outptrp)[3] = state->__value.__wchb[0]; | |
85830c4c | 568 | |
fd1b5c0f UD |
569 | #else |
570 | /* XXX unaligned */ | |
cdda3d7d AJ |
571 | (*outptrp)[0] = state->__value.__wchb[0]; |
572 | (*outptrp)[1] = state->__value.__wchb[1]; | |
573 | (*outptrp)[2] = state->__value.__wchb[2]; | |
574 | (*outptrp)[3] = state->__value.__wchb[3]; | |
575 | ||
fd1b5c0f | 576 | #endif |
540e1b45 | 577 | |
cdda3d7d | 578 | *outptrp += 4; |
fd1b5c0f UD |
579 | |
580 | /* Clear the state buffer. */ | |
581 | state->__count &= ~7; | |
582 | ||
583 | return __GCONV_OK; | |
584 | } | |
585 | ||
8d617a71 UD |
586 | #include <iconv/skeleton.c> |
587 | ||
588 | ||
4a069c33 UD |
589 | /* And finally from UCS4-LE to the internal encoding. */ |
590 | #define DEFINE_INIT 0 | |
591 | #define DEFINE_FINI 0 | |
592 | #define MIN_NEEDED_FROM 4 | |
593 | #define MIN_NEEDED_TO 4 | |
594 | #define FROM_DIRECTION 1 | |
595 | #define FROM_LOOP ucs4le_internal_loop | |
596 | #define TO_LOOP ucs4le_internal_loop /* This is not used. */ | |
597 | #define FUNCTION_NAME __gconv_transform_ucs4le_internal | |
0cdddc25 | 598 | #define ONE_DIRECTION 0 |
4a069c33 UD |
599 | |
600 | ||
601 | static inline int | |
dd9423a6 | 602 | __attribute ((always_inline)) |
55985355 UD |
603 | ucs4le_internal_loop (struct __gconv_step *step, |
604 | struct __gconv_step_data *step_data, | |
605 | const unsigned char **inptrp, const unsigned char *inend, | |
4802be92 | 606 | unsigned char **outptrp, const unsigned char *outend, |
38677ace | 607 | size_t *irreversible) |
4a069c33 | 608 | { |
55985355 | 609 | int flags = step_data->__flags; |
4a069c33 UD |
610 | const unsigned char *inptr = *inptrp; |
611 | unsigned char *outptr = *outptrp; | |
4a069c33 | 612 | int result; |
4a069c33 | 613 | |
228edd35 | 614 | for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) |
4a069c33 UD |
615 | { |
616 | uint32_t inval; | |
617 | ||
618 | #if __BYTE_ORDER == __BIG_ENDIAN | |
17427edd | 619 | inval = bswap_32 (*(const uint32_t *) inptr); |
4a069c33 | 620 | #else |
17427edd | 621 | inval = *(const uint32_t *) inptr; |
4a069c33 UD |
622 | #endif |
623 | ||
a1ffb40e | 624 | if (__glibc_unlikely (inval > 0x7fffffff)) |
85830c4c | 625 | { |
55985355 UD |
626 | /* The value is too large. We don't try transliteration here since |
627 | this is not an error because of the lack of possibilities to | |
628 | represent the result. This is a genuine bug in the input since | |
629 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
630 | if (irreversible == NULL) |
631 | /* We are transliterating, don't try to correct anything. */ | |
632 | return __GCONV_ILLEGAL_INPUT; | |
633 | ||
85830c4c UD |
634 | if (flags & __GCONV_IGNORE_ERRORS) |
635 | { | |
636 | /* Just ignore this character. */ | |
38677ace | 637 | ++*irreversible; |
85830c4c UD |
638 | continue; |
639 | } | |
640 | ||
8f25676c SL |
641 | *inptrp = inptr; |
642 | *outptrp = outptr; | |
85830c4c UD |
643 | return __GCONV_ILLEGAL_INPUT; |
644 | } | |
4a069c33 | 645 | |
cdda3d7d AJ |
646 | *((uint32_t *) outptr) = inval; |
647 | outptr += sizeof (uint32_t); | |
4a069c33 UD |
648 | } |
649 | ||
650 | *inptrp = inptr; | |
651 | *outptrp = outptr; | |
652 | ||
653 | /* Determine the status. */ | |
fc08075d | 654 | if (*inptrp == inend) |
4a069c33 | 655 | result = __GCONV_EMPTY_INPUT; |
eb9dc2a2 | 656 | else if (*inptrp + 4 > inend) |
4a069c33 | 657 | result = __GCONV_INCOMPLETE_INPUT; |
eb9dc2a2 UD |
658 | else |
659 | { | |
660 | assert (*outptrp + 4 > outend); | |
661 | result = __GCONV_FULL_OUTPUT; | |
662 | } | |
4a069c33 UD |
663 | |
664 | return result; | |
665 | } | |
666 | ||
27822ce6 | 667 | #if !_STRING_ARCH_unaligned |
4a069c33 | 668 | static inline int |
dd9423a6 | 669 | __attribute ((always_inline)) |
55985355 UD |
670 | ucs4le_internal_loop_unaligned (struct __gconv_step *step, |
671 | struct __gconv_step_data *step_data, | |
672 | const unsigned char **inptrp, | |
4a069c33 | 673 | const unsigned char *inend, |
4802be92 AS |
674 | unsigned char **outptrp, |
675 | const unsigned char *outend, | |
38677ace | 676 | size_t *irreversible) |
4a069c33 | 677 | { |
55985355 | 678 | int flags = step_data->__flags; |
4a069c33 UD |
679 | const unsigned char *inptr = *inptrp; |
680 | unsigned char *outptr = *outptrp; | |
4a069c33 | 681 | int result; |
4a069c33 | 682 | |
228edd35 | 683 | for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) |
4a069c33 | 684 | { |
a1ffb40e | 685 | if (__glibc_unlikely (inptr[3] > 0x80)) |
4a069c33 | 686 | { |
55985355 UD |
687 | /* The value is too large. We don't try transliteration here since |
688 | this is not an error because of the lack of possibilities to | |
689 | represent the result. This is a genuine bug in the input since | |
690 | UCS4 does not allow such values. */ | |
0cdb4983 UD |
691 | if (irreversible == NULL) |
692 | /* We are transliterating, don't try to correct anything. */ | |
693 | return __GCONV_ILLEGAL_INPUT; | |
694 | ||
85830c4c UD |
695 | if (flags & __GCONV_IGNORE_ERRORS) |
696 | { | |
697 | /* Just ignore this character. */ | |
38677ace | 698 | ++*irreversible; |
85830c4c UD |
699 | continue; |
700 | } | |
701 | ||
4a069c33 UD |
702 | *inptrp = inptr; |
703 | *outptrp = outptr; | |
9ea2c194 | 704 | return __GCONV_ILLEGAL_INPUT; |
4a069c33 UD |
705 | } |
706 | ||
4a069c33 UD |
707 | # if __BYTE_ORDER == __BIG_ENDIAN |
708 | outptr[3] = inptr[0]; | |
709 | outptr[2] = inptr[1]; | |
710 | outptr[1] = inptr[2]; | |
711 | outptr[0] = inptr[3]; | |
712 | # else | |
713 | outptr[0] = inptr[0]; | |
714 | outptr[1] = inptr[1]; | |
715 | outptr[2] = inptr[2]; | |
716 | outptr[3] = inptr[3]; | |
717 | # endif | |
85830c4c UD |
718 | |
719 | outptr += 4; | |
4a069c33 UD |
720 | } |
721 | ||
722 | *inptrp = inptr; | |
723 | *outptrp = outptr; | |
724 | ||
725 | /* Determine the status. */ | |
fc08075d | 726 | if (*inptrp == inend) |
4a069c33 | 727 | result = __GCONV_EMPTY_INPUT; |
eb9dc2a2 | 728 | else if (*inptrp + 4 > inend) |
4a069c33 | 729 | result = __GCONV_INCOMPLETE_INPUT; |
eb9dc2a2 UD |
730 | else |
731 | { | |
732 | assert (*outptrp + 4 > outend); | |
733 | result = __GCONV_FULL_OUTPUT; | |
734 | } | |
4a069c33 UD |
735 | |
736 | return result; | |
737 | } | |
738 | #endif | |
739 | ||
740 | ||
741 | static inline int | |
dd9423a6 | 742 | __attribute ((always_inline)) |
55985355 UD |
743 | ucs4le_internal_loop_single (struct __gconv_step *step, |
744 | struct __gconv_step_data *step_data, | |
745 | const unsigned char **inptrp, | |
4a069c33 | 746 | const unsigned char *inend, |
4802be92 AS |
747 | unsigned char **outptrp, |
748 | const unsigned char *outend, | |
38677ace | 749 | size_t *irreversible) |
4a069c33 | 750 | { |
55985355 UD |
751 | mbstate_t *state = step_data->__statep; |
752 | int flags = step_data->__flags; | |
4a069c33 UD |
753 | size_t cnt = state->__count & 7; |
754 | ||
755 | while (*inptrp < inend && cnt < 4) | |
756 | state->__value.__wchb[cnt++] = *(*inptrp)++; | |
757 | ||
a1ffb40e | 758 | if (__glibc_unlikely (cnt < 4)) |
4a069c33 UD |
759 | { |
760 | /* Still not enough bytes. Store the ones in the input buffer. */ | |
761 | state->__count &= ~7; | |
762 | state->__count |= cnt; | |
763 | ||
764 | return __GCONV_INCOMPLETE_INPUT; | |
765 | } | |
766 | ||
db2d05f9 UD |
767 | if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80, |
768 | 0)) | |
85830c4c | 769 | { |
55985355 UD |
770 | /* The value is too large. We don't try transliteration here since |
771 | this is not an error because of the lack of possibilities to | |
772 | represent the result. This is a genuine bug in the input since | |
773 | UCS4 does not allow such values. */ | |
85830c4c UD |
774 | if (!(flags & __GCONV_IGNORE_ERRORS)) |
775 | return __GCONV_ILLEGAL_INPUT; | |
776 | } | |
777 | else | |
778 | { | |
4a069c33 | 779 | #if __BYTE_ORDER == __BIG_ENDIAN |
85830c4c UD |
780 | (*outptrp)[0] = state->__value.__wchb[3]; |
781 | (*outptrp)[1] = state->__value.__wchb[2]; | |
782 | (*outptrp)[2] = state->__value.__wchb[1]; | |
783 | (*outptrp)[3] = state->__value.__wchb[0]; | |
9ea2c194 | 784 | #else |
85830c4c UD |
785 | (*outptrp)[0] = state->__value.__wchb[0]; |
786 | (*outptrp)[1] = state->__value.__wchb[1]; | |
787 | (*outptrp)[2] = state->__value.__wchb[2]; | |
788 | (*outptrp)[3] = state->__value.__wchb[3]; | |
4a069c33 UD |
789 | #endif |
790 | ||
85830c4c UD |
791 | *outptrp += 4; |
792 | } | |
793 | ||
4a069c33 UD |
794 | /* Clear the state buffer. */ |
795 | state->__count &= ~7; | |
796 | ||
797 | return __GCONV_OK; | |
798 | } | |
799 | ||
800 | #include <iconv/skeleton.c> | |
801 | ||
802 | ||
8619129f UD |
803 | /* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ |
804 | #define DEFINE_INIT 0 | |
805 | #define DEFINE_FINI 0 | |
806 | #define MIN_NEEDED_FROM 1 | |
807 | #define MIN_NEEDED_TO 4 | |
808 | #define FROM_DIRECTION 1 | |
809 | #define FROM_LOOP ascii_internal_loop | |
810 | #define TO_LOOP ascii_internal_loop /* This is not used. */ | |
811 | #define FUNCTION_NAME __gconv_transform_ascii_internal | |
fd1b5c0f | 812 | #define ONE_DIRECTION 1 |
8619129f UD |
813 | |
814 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
815 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
816 | #define LOOPFCT FROM_LOOP | |
817 | #define BODY \ | |
818 | { \ | |
a1ffb40e | 819 | if (__glibc_unlikely (*inptr > '\x7f')) \ |
8619129f | 820 | { \ |
55985355 UD |
821 | /* The value is too large. We don't try transliteration here since \ |
822 | this is not an error because of the lack of possibilities to \ | |
823 | represent the result. This is a genuine bug in the input since \ | |
824 | ASCII does not allow such values. */ \ | |
e438a468 | 825 | STANDARD_FROM_LOOP_ERR_HANDLER (1); \ |
85830c4c UD |
826 | } \ |
827 | else \ | |
5deca9bb UD |
828 | { \ |
829 | /* It's an one byte sequence. */ \ | |
830 | *((uint32_t *) outptr) = *inptr++; \ | |
831 | outptr += sizeof (uint32_t); \ | |
832 | } \ | |
8619129f | 833 | } |
55985355 | 834 | #define LOOP_NEED_FLAGS |
8619129f UD |
835 | #include <iconv/loop.c> |
836 | #include <iconv/skeleton.c> | |
837 | ||
838 | ||
839 | /* Convert from the internal (UCS4-like) format to ISO 646-IRV. */ | |
840 | #define DEFINE_INIT 0 | |
841 | #define DEFINE_FINI 0 | |
842 | #define MIN_NEEDED_FROM 4 | |
843 | #define MIN_NEEDED_TO 1 | |
844 | #define FROM_DIRECTION 1 | |
845 | #define FROM_LOOP internal_ascii_loop | |
846 | #define TO_LOOP internal_ascii_loop /* This is not used. */ | |
847 | #define FUNCTION_NAME __gconv_transform_internal_ascii | |
fd1b5c0f | 848 | #define ONE_DIRECTION 1 |
8619129f UD |
849 | |
850 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
851 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
852 | #define LOOPFCT FROM_LOOP | |
853 | #define BODY \ | |
854 | { \ | |
a1ffb40e | 855 | if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \ |
8619129f | 856 | { \ |
601d2942 | 857 | UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \ |
e438a468 | 858 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
85830c4c UD |
859 | } \ |
860 | else \ | |
5deca9bb UD |
861 | { \ |
862 | /* It's an one byte sequence. */ \ | |
863 | *outptr++ = *((const uint32_t *) inptr); \ | |
864 | inptr += sizeof (uint32_t); \ | |
865 | } \ | |
8619129f | 866 | } |
55985355 | 867 | #define LOOP_NEED_FLAGS |
8619129f UD |
868 | #include <iconv/loop.c> |
869 | #include <iconv/skeleton.c> | |
870 | ||
871 | ||
872 | /* Convert from the internal (UCS4-like) format to UTF-8. */ | |
873 | #define DEFINE_INIT 0 | |
874 | #define DEFINE_FINI 0 | |
875 | #define MIN_NEEDED_FROM 4 | |
876 | #define MIN_NEEDED_TO 1 | |
877 | #define MAX_NEEDED_TO 6 | |
878 | #define FROM_DIRECTION 1 | |
879 | #define FROM_LOOP internal_utf8_loop | |
880 | #define TO_LOOP internal_utf8_loop /* This is not used. */ | |
881 | #define FUNCTION_NAME __gconv_transform_internal_utf8 | |
fd1b5c0f | 882 | #define ONE_DIRECTION 1 |
8619129f UD |
883 | |
884 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
885 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
5aa8ff62 | 886 | #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO |
8619129f UD |
887 | #define LOOPFCT FROM_LOOP |
888 | #define BODY \ | |
889 | { \ | |
17427edd | 890 | uint32_t wc = *((const uint32_t *) inptr); \ |
8619129f | 891 | \ |
a1ffb40e | 892 | if (__glibc_likely (wc < 0x80)) \ |
8619129f UD |
893 | /* It's an one byte sequence. */ \ |
894 | *outptr++ = (unsigned char) wc; \ | |
7ab1de21 SL |
895 | else if (__glibc_likely (wc <= 0x7fffffff \ |
896 | && (wc < 0xd800 || wc > 0xdfff))) \ | |
8619129f UD |
897 | { \ |
898 | size_t step; \ | |
3cc4a097 | 899 | unsigned char *start; \ |
8619129f UD |
900 | \ |
901 | for (step = 2; step < 6; ++step) \ | |
b79f74cd | 902 | if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \ |
8619129f UD |
903 | break; \ |
904 | \ | |
a1ffb40e | 905 | if (__glibc_unlikely (outptr + step > outend)) \ |
8619129f UD |
906 | { \ |
907 | /* Too long. */ \ | |
d64b6ad0 | 908 | result = __GCONV_FULL_OUTPUT; \ |
8619129f UD |
909 | break; \ |
910 | } \ | |
911 | \ | |
912 | start = outptr; \ | |
b79f74cd | 913 | *outptr = (unsigned char) (~0xff >> step); \ |
8619129f | 914 | outptr += step; \ |
8619129f UD |
915 | do \ |
916 | { \ | |
347bace2 | 917 | start[--step] = 0x80 | (wc & 0x3f); \ |
8619129f UD |
918 | wc >>= 6; \ |
919 | } \ | |
347bace2 | 920 | while (step > 1); \ |
8619129f | 921 | start[0] |= wc; \ |
db2d05f9 UD |
922 | } \ |
923 | else \ | |
924 | { \ | |
e438a468 | 925 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
8619129f UD |
926 | } \ |
927 | \ | |
928 | inptr += 4; \ | |
929 | } | |
db2d05f9 | 930 | #define LOOP_NEED_FLAGS |
8619129f UD |
931 | #include <iconv/loop.c> |
932 | #include <iconv/skeleton.c> | |
933 | ||
934 | ||
935 | /* Convert from UTF-8 to the internal (UCS4-like) format. */ | |
936 | #define DEFINE_INIT 0 | |
937 | #define DEFINE_FINI 0 | |
938 | #define MIN_NEEDED_FROM 1 | |
939 | #define MAX_NEEDED_FROM 6 | |
940 | #define MIN_NEEDED_TO 4 | |
941 | #define FROM_DIRECTION 1 | |
942 | #define FROM_LOOP utf8_internal_loop | |
943 | #define TO_LOOP utf8_internal_loop /* This is not used. */ | |
944 | #define FUNCTION_NAME __gconv_transform_utf8_internal | |
fd1b5c0f | 945 | #define ONE_DIRECTION 1 |
8619129f UD |
946 | |
947 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
5aa8ff62 | 948 | #define MAX_NEEDED_INPUT MAX_NEEDED_FROM |
8619129f UD |
949 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
950 | #define LOOPFCT FROM_LOOP | |
951 | #define BODY \ | |
952 | { \ | |
8619129f | 953 | /* Next input byte. */ \ |
26a51060 | 954 | uint32_t ch = *inptr; \ |
8619129f | 955 | \ |
a1ffb40e | 956 | if (__glibc_likely (ch < 0x80)) \ |
8619129f | 957 | { \ |
5aa8ff62 | 958 | /* One byte sequence. */ \ |
5aa8ff62 | 959 | ++inptr; \ |
8619129f UD |
960 | } \ |
961 | else \ | |
962 | { \ | |
26a51060 UD |
963 | uint_fast32_t cnt; \ |
964 | uint_fast32_t i; \ | |
965 | \ | |
9ea2c194 | 966 | if (ch >= 0xc2 && ch < 0xe0) \ |
5aa8ff62 | 967 | { \ |
9ea2c194 AJ |
968 | /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \ |
969 | otherwise the wide character could have been represented \ | |
970 | using a single byte. */ \ | |
5aa8ff62 UD |
971 | cnt = 2; \ |
972 | ch &= 0x1f; \ | |
973 | } \ | |
a1ffb40e | 974 | else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ |
5aa8ff62 UD |
975 | { \ |
976 | /* We expect three bytes. */ \ | |
977 | cnt = 3; \ | |
978 | ch &= 0x0f; \ | |
979 | } \ | |
a1ffb40e | 980 | else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ |
5aa8ff62 UD |
981 | { \ |
982 | /* We expect four bytes. */ \ | |
983 | cnt = 4; \ | |
984 | ch &= 0x07; \ | |
985 | } \ | |
a1ffb40e | 986 | else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ |
5aa8ff62 UD |
987 | { \ |
988 | /* We expect five bytes. */ \ | |
989 | cnt = 5; \ | |
990 | ch &= 0x03; \ | |
991 | } \ | |
a1ffb40e | 992 | else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \ |
5aa8ff62 UD |
993 | { \ |
994 | /* We expect six bytes. */ \ | |
995 | cnt = 6; \ | |
996 | ch &= 0x01; \ | |
997 | } \ | |
998 | else \ | |
8619129f | 999 | { \ |
85830c4c UD |
1000 | /* Search the end of this ill-formed UTF-8 character. This \ |
1001 | is the next byte with (x & 0xc0) != 0x80. */ \ | |
347bace2 | 1002 | i = 0; \ |
e438a468 | 1003 | do \ |
347bace2 UD |
1004 | ++i; \ |
1005 | while (inptr + i < inend \ | |
1006 | && (*(inptr + i) & 0xc0) == 0x80 \ | |
1007 | && i < 5); \ | |
85830c4c | 1008 | \ |
347bace2 UD |
1009 | errout: \ |
1010 | STANDARD_FROM_LOOP_ERR_HANDLER (i); \ | |
8619129f UD |
1011 | } \ |
1012 | \ | |
a1ffb40e | 1013 | if (__glibc_unlikely (inptr + cnt > inend)) \ |
5aa8ff62 | 1014 | { \ |
fd1b5c0f UD |
1015 | /* We don't have enough input. But before we report that check \ |
1016 | that all the bytes are correct. */ \ | |
1017 | for (i = 1; inptr + i < inend; ++i) \ | |
1018 | if ((inptr[i] & 0xc0) != 0x80) \ | |
1019 | break; \ | |
85830c4c | 1020 | \ |
a1ffb40e | 1021 | if (__glibc_likely (inptr + i == inend)) \ |
85830c4c UD |
1022 | { \ |
1023 | result = __GCONV_INCOMPLETE_INPUT; \ | |
1024 | break; \ | |
1025 | } \ | |
1026 | \ | |
347bace2 | 1027 | goto errout; \ |
5aa8ff62 UD |
1028 | } \ |
1029 | \ | |
1030 | /* Read the possible remaining bytes. */ \ | |
1031 | for (i = 1; i < cnt; ++i) \ | |
1032 | { \ | |
1033 | uint32_t byte = inptr[i]; \ | |
1034 | \ | |
1035 | if ((byte & 0xc0) != 0x80) \ | |
bd32e4a6 UD |
1036 | /* This is an illegal encoding. */ \ |
1037 | break; \ | |
5aa8ff62 UD |
1038 | \ |
1039 | ch <<= 6; \ | |
1040 | ch |= byte & 0x3f; \ | |
1041 | } \ | |
85830c4c | 1042 | \ |
bd32e4a6 UD |
1043 | /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ |
1044 | If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ | |
1045 | have been represented with fewer than cnt bytes. */ \ | |
9c32c895 UD |
1046 | if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \ |
1047 | /* Do not accept UTF-16 surrogates. */ \ | |
1048 | || (ch >= 0xd800 && ch <= 0xdfff)) \ | |
bd32e4a6 UD |
1049 | { \ |
1050 | /* This is an illegal encoding. */ \ | |
347bace2 | 1051 | goto errout; \ |
bd32e4a6 UD |
1052 | } \ |
1053 | \ | |
5aa8ff62 | 1054 | inptr += cnt; \ |
8619129f UD |
1055 | } \ |
1056 | \ | |
1057 | /* Now adjust the pointers and store the result. */ \ | |
cdda3d7d AJ |
1058 | *((uint32_t *) outptr) = ch; \ |
1059 | outptr += sizeof (uint32_t); \ | |
8619129f | 1060 | } |
55985355 | 1061 | #define LOOP_NEED_FLAGS |
fd1b5c0f UD |
1062 | |
1063 | #define STORE_REST \ | |
1064 | { \ | |
1065 | /* We store the remaining bytes while converting them into the UCS4 \ | |
1066 | format. We can assume that the first byte in the buffer is \ | |
1067 | correct and that it requires a larger number of bytes than there \ | |
1068 | are in the input buffer. */ \ | |
1069 | wint_t ch = **inptrp; \ | |
ea31b613 | 1070 | size_t cnt, r; \ |
fd1b5c0f UD |
1071 | \ |
1072 | state->__count = inend - *inptrp; \ | |
1073 | \ | |
9954432e | 1074 | assert (ch != 0xc0 && ch != 0xc1); \ |
fd1b5c0f UD |
1075 | if (ch >= 0xc2 && ch < 0xe0) \ |
1076 | { \ | |
1077 | /* We expect two bytes. The first byte cannot be 0xc0 or \ | |
1078 | 0xc1, otherwise the wide character could have been \ | |
1079 | represented using a single byte. */ \ | |
1080 | cnt = 2; \ | |
1081 | ch &= 0x1f; \ | |
1082 | } \ | |
a1ffb40e | 1083 | else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ |
fd1b5c0f UD |
1084 | { \ |
1085 | /* We expect three bytes. */ \ | |
1086 | cnt = 3; \ | |
1087 | ch &= 0x0f; \ | |
1088 | } \ | |
a1ffb40e | 1089 | else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ |
fd1b5c0f UD |
1090 | { \ |
1091 | /* We expect four bytes. */ \ | |
1092 | cnt = 4; \ | |
1093 | ch &= 0x07; \ | |
1094 | } \ | |
a1ffb40e | 1095 | else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \ |
fd1b5c0f UD |
1096 | { \ |
1097 | /* We expect five bytes. */ \ | |
1098 | cnt = 5; \ | |
1099 | ch &= 0x03; \ | |
1100 | } \ | |
1101 | else \ | |
1102 | { \ | |
1103 | /* We expect six bytes. */ \ | |
1104 | cnt = 6; \ | |
1105 | ch &= 0x01; \ | |
1106 | } \ | |
1107 | \ | |
1108 | /* The first byte is already consumed. */ \ | |
ea31b613 | 1109 | r = cnt - 1; \ |
fd1b5c0f UD |
1110 | while (++(*inptrp) < inend) \ |
1111 | { \ | |
1112 | ch <<= 6; \ | |
1113 | ch |= **inptrp & 0x3f; \ | |
ea31b613 | 1114 | --r; \ |
fd1b5c0f UD |
1115 | } \ |
1116 | \ | |
1117 | /* Shift for the so far missing bytes. */ \ | |
ea31b613 UD |
1118 | ch <<= r * 6; \ |
1119 | \ | |
1120 | /* Store the number of bytes expected for the entire sequence. */ \ | |
41f112ad | 1121 | state->__count |= cnt << 8; \ |
fd1b5c0f UD |
1122 | \ |
1123 | /* Store the value. */ \ | |
1124 | state->__value.__wch = ch; \ | |
1125 | } | |
1126 | ||
1127 | #define UNPACK_BYTES \ | |
1128 | { \ | |
ea31b613 | 1129 | static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \ |
fd1b5c0f | 1130 | wint_t wch = state->__value.__wch; \ |
41f112ad | 1131 | size_t ntotal = state->__count >> 8; \ |
ea31b613 | 1132 | \ |
41f112ad | 1133 | inlen = state->__count & 255; \ |
fd1b5c0f | 1134 | \ |
ea31b613 | 1135 | bytebuf[0] = inmask[ntotal - 2]; \ |
fd1b5c0f | 1136 | \ |
cd201e38 UD |
1137 | do \ |
1138 | { \ | |
1139 | if (--ntotal < inlen) \ | |
1140 | bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ | |
1141 | wch >>= 6; \ | |
1142 | } \ | |
1143 | while (ntotal > 1); \ | |
fd1b5c0f UD |
1144 | \ |
1145 | bytebuf[0] |= wch; \ | |
1146 | } | |
1147 | ||
41f112ad UD |
1148 | #define CLEAR_STATE \ |
1149 | state->__count = 0 | |
1150 | ||
1151 | ||
8619129f UD |
1152 | #include <iconv/loop.c> |
1153 | #include <iconv/skeleton.c> | |
1154 | ||
1155 | ||
1156 | /* Convert from UCS2 to the internal (UCS4-like) format. */ | |
1157 | #define DEFINE_INIT 0 | |
1158 | #define DEFINE_FINI 0 | |
1159 | #define MIN_NEEDED_FROM 2 | |
1160 | #define MIN_NEEDED_TO 4 | |
1161 | #define FROM_DIRECTION 1 | |
1162 | #define FROM_LOOP ucs2_internal_loop | |
1163 | #define TO_LOOP ucs2_internal_loop /* This is not used. */ | |
1164 | #define FUNCTION_NAME __gconv_transform_ucs2_internal | |
fd1b5c0f | 1165 | #define ONE_DIRECTION 1 |
8619129f UD |
1166 | |
1167 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1168 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1169 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1170 | #define BODY \ |
755104ed | 1171 | { \ |
606135cf | 1172 | uint16_t u1 = get16 (inptr); \ |
755104ed | 1173 | \ |
a1ffb40e | 1174 | if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \ |
755104ed UD |
1175 | { \ |
1176 | /* Surrogate characters in UCS-2 input are not valid. Reject \ | |
1177 | them. (Catching this here is not security relevant.) */ \ | |
e438a468 | 1178 | STANDARD_FROM_LOOP_ERR_HANDLER (2); \ |
755104ed UD |
1179 | } \ |
1180 | \ | |
cdda3d7d AJ |
1181 | *((uint32_t *) outptr) = u1; \ |
1182 | outptr += sizeof (uint32_t); \ | |
755104ed UD |
1183 | inptr += 2; \ |
1184 | } | |
1185 | #define LOOP_NEED_FLAGS | |
8619129f UD |
1186 | #include <iconv/loop.c> |
1187 | #include <iconv/skeleton.c> | |
1188 | ||
1189 | ||
1190 | /* Convert from the internal (UCS4-like) format to UCS2. */ | |
1191 | #define DEFINE_INIT 0 | |
1192 | #define DEFINE_FINI 0 | |
1193 | #define MIN_NEEDED_FROM 4 | |
1194 | #define MIN_NEEDED_TO 2 | |
1195 | #define FROM_DIRECTION 1 | |
1196 | #define FROM_LOOP internal_ucs2_loop | |
1197 | #define TO_LOOP internal_ucs2_loop /* This is not used. */ | |
1198 | #define FUNCTION_NAME __gconv_transform_internal_ucs2 | |
fd1b5c0f | 1199 | #define ONE_DIRECTION 1 |
8619129f UD |
1200 | |
1201 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1202 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1203 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1204 | #define BODY \ |
8619129f | 1205 | { \ |
17427edd | 1206 | uint32_t val = *((const uint32_t *) inptr); \ |
755104ed | 1207 | \ |
a1ffb40e | 1208 | if (__glibc_unlikely (val >= 0x10000)) \ |
8619129f | 1209 | { \ |
601d2942 | 1210 | UNICODE_TAG_HANDLER (val, 4); \ |
e438a468 | 1211 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
8619129f | 1212 | } \ |
a1ffb40e | 1213 | else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \ |
755104ed UD |
1214 | { \ |
1215 | /* Surrogate characters in UCS-4 input are not valid. \ | |
1216 | We must catch this, because the UCS-2 output might be \ | |
1217 | interpreted as UTF-16 by other programs. If we let \ | |
1218 | surrogates pass through, attackers could make a security \ | |
1219 | hole exploit by synthesizing any desired plane 1-16 \ | |
1220 | character. */ \ | |
e438a468 | 1221 | result = __GCONV_ILLEGAL_INPUT; \ |
755104ed | 1222 | if (! ignore_errors_p ()) \ |
e438a468 | 1223 | break; \ |
755104ed UD |
1224 | inptr += 4; \ |
1225 | ++*irreversible; \ | |
1226 | continue; \ | |
1227 | } \ | |
9ea2c194 | 1228 | else \ |
755104ed | 1229 | { \ |
606135cf | 1230 | put16 (outptr, val); \ |
db6af3eb | 1231 | outptr += sizeof (uint16_t); \ |
755104ed UD |
1232 | inptr += 4; \ |
1233 | } \ | |
8619129f | 1234 | } |
55985355 | 1235 | #define LOOP_NEED_FLAGS |
8619129f UD |
1236 | #include <iconv/loop.c> |
1237 | #include <iconv/skeleton.c> | |
9b26f5c4 UD |
1238 | |
1239 | ||
428bcea4 | 1240 | /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */ |
9b26f5c4 UD |
1241 | #define DEFINE_INIT 0 |
1242 | #define DEFINE_FINI 0 | |
1243 | #define MIN_NEEDED_FROM 2 | |
1244 | #define MIN_NEEDED_TO 4 | |
1245 | #define FROM_DIRECTION 1 | |
428bcea4 UD |
1246 | #define FROM_LOOP ucs2reverse_internal_loop |
1247 | #define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/ | |
8d617a71 | 1248 | #define FUNCTION_NAME __gconv_transform_ucs2reverse_internal |
fd1b5c0f | 1249 | #define ONE_DIRECTION 1 |
9b26f5c4 UD |
1250 | |
1251 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1252 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1253 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1254 | #define BODY \ |
755104ed | 1255 | { \ |
606135cf | 1256 | uint16_t u1 = bswap_16 (get16 (inptr)); \ |
755104ed | 1257 | \ |
a1ffb40e | 1258 | if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \ |
755104ed UD |
1259 | { \ |
1260 | /* Surrogate characters in UCS-2 input are not valid. Reject \ | |
1261 | them. (Catching this here is not security relevant.) */ \ | |
1262 | if (! ignore_errors_p ()) \ | |
1263 | { \ | |
1264 | result = __GCONV_ILLEGAL_INPUT; \ | |
1265 | break; \ | |
1266 | } \ | |
1267 | inptr += 2; \ | |
1268 | ++*irreversible; \ | |
1269 | continue; \ | |
1270 | } \ | |
1271 | \ | |
cdda3d7d AJ |
1272 | *((uint32_t *) outptr) = u1; \ |
1273 | outptr += sizeof (uint32_t); \ | |
755104ed UD |
1274 | inptr += 2; \ |
1275 | } | |
1276 | #define LOOP_NEED_FLAGS | |
9b26f5c4 UD |
1277 | #include <iconv/loop.c> |
1278 | #include <iconv/skeleton.c> | |
1279 | ||
1280 | ||
428bcea4 | 1281 | /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */ |
9b26f5c4 UD |
1282 | #define DEFINE_INIT 0 |
1283 | #define DEFINE_FINI 0 | |
1284 | #define MIN_NEEDED_FROM 4 | |
1285 | #define MIN_NEEDED_TO 2 | |
1286 | #define FROM_DIRECTION 1 | |
428bcea4 UD |
1287 | #define FROM_LOOP internal_ucs2reverse_loop |
1288 | #define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/ | |
8d617a71 | 1289 | #define FUNCTION_NAME __gconv_transform_internal_ucs2reverse |
fd1b5c0f | 1290 | #define ONE_DIRECTION 1 |
9b26f5c4 UD |
1291 | |
1292 | #define MIN_NEEDED_INPUT MIN_NEEDED_FROM | |
1293 | #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO | |
1294 | #define LOOPFCT FROM_LOOP | |
428bcea4 | 1295 | #define BODY \ |
9b26f5c4 | 1296 | { \ |
17427edd | 1297 | uint32_t val = *((const uint32_t *) inptr); \ |
a1ffb40e | 1298 | if (__glibc_unlikely (val >= 0x10000)) \ |
9b26f5c4 | 1299 | { \ |
601d2942 | 1300 | UNICODE_TAG_HANDLER (val, 4); \ |
e438a468 | 1301 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
9b26f5c4 | 1302 | } \ |
a1ffb40e | 1303 | else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \ |
755104ed UD |
1304 | { \ |
1305 | /* Surrogate characters in UCS-4 input are not valid. \ | |
1306 | We must catch this, because the UCS-2 output might be \ | |
1307 | interpreted as UTF-16 by other programs. If we let \ | |
1308 | surrogates pass through, attackers could make a security \ | |
1309 | hole exploit by synthesizing any desired plane 1-16 \ | |
1310 | character. */ \ | |
1311 | if (! ignore_errors_p ()) \ | |
1312 | { \ | |
1313 | result = __GCONV_ILLEGAL_INPUT; \ | |
1314 | break; \ | |
1315 | } \ | |
1316 | inptr += 4; \ | |
1317 | ++*irreversible; \ | |
1318 | continue; \ | |
1319 | } \ | |
9ea2c194 | 1320 | else \ |
755104ed | 1321 | { \ |
606135cf | 1322 | put16 (outptr, bswap_16 (val)); \ |
cdda3d7d | 1323 | outptr += sizeof (uint16_t); \ |
755104ed UD |
1324 | inptr += 4; \ |
1325 | } \ | |
9b26f5c4 | 1326 | } |
55985355 | 1327 | #define LOOP_NEED_FLAGS |
9b26f5c4 UD |
1328 | #include <iconv/loop.c> |
1329 | #include <iconv/skeleton.c> |