]>
Commit | Line | Data |
---|---|---|
8619129f | 1 | /* Conversion loop frame work. |
9a1f6754 | 2 | Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. |
8619129f UD |
3 | This file is part of the GNU C Library. |
4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
8619129f UD |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 14 | Lesser General Public License for more details. |
8619129f | 15 | |
41bdb6e2 AJ |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, write to the Free | |
18 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
19 | 02111-1307 USA. */ | |
8619129f UD |
20 | |
21 | /* This file provides a frame for the reader loop in all conversion modules. | |
22 | The actual code must (of course) be provided in the actual module source | |
23 | code but certain actions can be written down generically, with some | |
24 | customization options which are these: | |
25 | ||
26 | MIN_NEEDED_INPUT minimal number of input bytes needed for the next | |
27 | conversion. | |
28 | MIN_NEEDED_OUTPUT minimal number of bytes produced by the next round | |
29 | of conversion. | |
30 | ||
31 | MAX_NEEDED_INPUT you guess it, this is the maximal number of input | |
32 | bytes needed. It defaults to MIN_NEEDED_INPUT | |
33 | MAX_NEEDED_OUTPUT likewise for output bytes. | |
34 | ||
8619129f UD |
35 | LOOPFCT name of the function created. If not specified |
36 | the name is `loop' but this prevents the use | |
37 | of multiple functions in the same file. | |
38 | ||
8619129f UD |
39 | BODY this is supposed to expand to the body of the loop. |
40 | The user must provide this. | |
28f1c862 | 41 | |
66175fa8 UD |
42 | EXTRA_LOOP_DECLS extra arguments passed from converion loop call. |
43 | ||
44 | INIT_PARAMS code to define and initialize variables from params. | |
45 | UPDATE_PARAMS code to store result in params. | |
8619129f UD |
46 | */ |
47 | ||
fd1b5c0f | 48 | #include <assert.h> |
b35e58e4 | 49 | #include <endian.h> |
8619129f | 50 | #include <gconv.h> |
b35e58e4 UD |
51 | #include <stdint.h> |
52 | #include <string.h> | |
d64b6ad0 | 53 | #include <wchar.h> |
8619129f UD |
54 | #include <sys/param.h> /* For MIN. */ |
55 | #define __need_size_t | |
56 | #include <stddef.h> | |
57 | ||
58 | ||
b35e58e4 UD |
59 | /* We have to provide support for machines which are not able to handled |
60 | unaligned memory accesses. Some of the character encodings have | |
61 | representations with a fixed width of 2 or 4 bytes. But if we cannot | |
62 | access unaligned memory we still have to read byte-wise. */ | |
63 | #undef FCTNAME2 | |
64 | #if defined _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED | |
65 | /* We can handle unaligned memory access. */ | |
b91d5eda UD |
66 | # define get16(addr) *((__const uint16_t *) (addr)) |
67 | # define get32(addr) *((__const uint32_t *) (addr)) | |
b35e58e4 UD |
68 | |
69 | /* We need no special support for writing values either. */ | |
70 | # define put16(addr, val) *((uint16_t *) (addr)) = (val) | |
71 | # define put32(addr, val) *((uint32_t *) (addr)) = (val) | |
72 | ||
73 | # define FCTNAME2(name) name | |
74 | #else | |
75 | /* Distinguish between big endian and little endian. */ | |
76 | # if __BYTE_ORDER == __LITTLE_ENDIAN | |
77 | # define get16(addr) \ | |
78 | (((__const unsigned char *) (addr))[1] << 8 \ | |
79 | | ((__const unsigned char *) (addr))[0]) | |
80 | # define get32(addr) \ | |
81 | (((((__const unsigned char *) (addr))[3] << 8 \ | |
82 | | ((__const unsigned char *) (addr))[2]) << 8 \ | |
83 | | ((__const unsigned char *) (addr))[1]) << 8 \ | |
84 | | ((__const unsigned char *) (addr))[0]) | |
85 | ||
cb2c5501 | 86 | # define put16(addr, val) \ |
b35e58e4 | 87 | ({ uint16_t __val = (val); \ |
cb2c5501 UD |
88 | ((unsigned char *) (addr))[0] = __val; \ |
89 | ((unsigned char *) (addr))[1] = __val >> 8; \ | |
b35e58e4 | 90 | (void) 0; }) |
cb2c5501 UD |
91 | # define put32(addr, val) \ |
92 | ({ uint32_t __val = (val); \ | |
93 | ((unsigned char *) (addr))[0] = __val; \ | |
b35e58e4 | 94 | __val >>= 8; \ |
cb2c5501 | 95 | ((unsigned char *) (addr))[1] = __val; \ |
b35e58e4 | 96 | __val >>= 8; \ |
cb2c5501 | 97 | ((unsigned char *) (addr))[2] = __val; \ |
b35e58e4 | 98 | __val >>= 8; \ |
cb2c5501 | 99 | ((unsigned char *) (addr))[3] = __val; \ |
b35e58e4 UD |
100 | (void) 0; }) |
101 | # else | |
102 | # define get16(addr) \ | |
103 | (((__const unsigned char *) (addr))[0] << 8 \ | |
104 | | ((__const unsigned char *) (addr))[1]) | |
105 | # define get32(addr) \ | |
106 | (((((__const unsigned char *) (addr))[0] << 8 \ | |
107 | | ((__const unsigned char *) (addr))[1]) << 8 \ | |
108 | | ((__const unsigned char *) (addr))[2]) << 8 \ | |
109 | | ((__const unsigned char *) (addr))[3]) | |
110 | ||
cb2c5501 | 111 | # define put16(addr, val) \ |
b35e58e4 | 112 | ({ uint16_t __val = (val); \ |
cb2c5501 | 113 | ((unsigned char *) (addr))[1] = __val; \ |
f7ccf2fc | 114 | ((unsigned char *) (addr))[0] = __val >> 8; \ |
b35e58e4 | 115 | (void) 0; }) |
cb2c5501 UD |
116 | # define put32(addr, val) \ |
117 | ({ uint32_t __val = (val); \ | |
118 | ((unsigned char *) (addr))[3] = __val; \ | |
b35e58e4 | 119 | __val >>= 8; \ |
cb2c5501 | 120 | ((unsigned char *) (addr))[2] = __val; \ |
b35e58e4 | 121 | __val >>= 8; \ |
cb2c5501 | 122 | ((unsigned char *) (addr))[1] = __val; \ |
b35e58e4 | 123 | __val >>= 8; \ |
cb2c5501 | 124 | ((unsigned char *) (addr))[0] = __val; \ |
b35e58e4 UD |
125 | (void) 0; }) |
126 | # endif | |
127 | ||
128 | # define FCTNAME2(name) name##_unaligned | |
129 | #endif | |
130 | #define FCTNAME(name) FCTNAME2(name) | |
131 | ||
132 | ||
8619129f UD |
133 | /* We need at least one byte for the next round. */ |
134 | #ifndef MIN_NEEDED_INPUT | |
5aa8ff62 | 135 | # error "MIN_NEEDED_INPUT definition missing" |
8619129f UD |
136 | #endif |
137 | ||
138 | /* Let's see how many bytes we produce. */ | |
139 | #ifndef MAX_NEEDED_INPUT | |
140 | # define MAX_NEEDED_INPUT MIN_NEEDED_INPUT | |
141 | #endif | |
142 | ||
143 | /* We produce at least one byte in the next round. */ | |
144 | #ifndef MIN_NEEDED_OUTPUT | |
5aa8ff62 | 145 | # error "MIN_NEEDED_OUTPUT definition missing" |
8619129f UD |
146 | #endif |
147 | ||
148 | /* Let's see how many bytes we produce. */ | |
149 | #ifndef MAX_NEEDED_OUTPUT | |
150 | # define MAX_NEEDED_OUTPUT MIN_NEEDED_OUTPUT | |
151 | #endif | |
152 | ||
153 | /* Default name for the function. */ | |
154 | #ifndef LOOPFCT | |
155 | # define LOOPFCT loop | |
156 | #endif | |
157 | ||
158 | /* Make sure we have a loop body. */ | |
159 | #ifndef BODY | |
160 | # error "Definition of BODY missing for function" LOOPFCT | |
161 | #endif | |
162 | ||
8619129f | 163 | |
28f1c862 UD |
164 | /* If no arguments have to passed to the loop function define the macro |
165 | as empty. */ | |
166 | #ifndef EXTRA_LOOP_DECLS | |
167 | # define EXTRA_LOOP_DECLS | |
168 | #endif | |
169 | ||
170 | ||
85830c4c UD |
171 | /* To make it easier for the writers of the modules, we define a macro |
172 | to test whether we have to ignore errors. */ | |
b572c2da UD |
173 | #define ignore_errors_p() \ |
174 | (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS)) | |
85830c4c UD |
175 | |
176 | ||
d6204268 UD |
177 | /* Error handling with transliteration/transcription function use and |
178 | ignoring of errors. Note that we cannot use the do while (0) trick | |
179 | since `break' and `continue' must reach certain points. */ | |
180 | #define STANDARD_ERR_HANDLER(Incr) \ | |
181 | { \ | |
182 | struct __gconv_trans_data *trans; \ | |
183 | \ | |
184 | result = __GCONV_ILLEGAL_INPUT; \ | |
b572c2da UD |
185 | \ |
186 | if (irreversible == NULL) \ | |
187 | /* This means we are in call from __gconv_transliterate. In this \ | |
188 | case we are not doing any error recovery outself. */ \ | |
189 | break; \ | |
190 | \ | |
d6204268 UD |
191 | /* First try the transliteration methods. */ \ |
192 | for (trans = step_data->__trans; trans != NULL; trans = trans->__next) \ | |
193 | { \ | |
194 | result = DL_CALL_FCT (trans->__trans_fct, \ | |
195 | (step, step_data, trans->__data, *inptrp, \ | |
196 | &inptr, inend, &outptr, irreversible)); \ | |
197 | if (result != __GCONV_ILLEGAL_INPUT) \ | |
198 | break; \ | |
199 | } \ | |
7888313d | 200 | /* If any of them recognized the input continue with the loop. */ \ |
d6204268 | 201 | if (result != __GCONV_ILLEGAL_INPUT) \ |
7888313d | 202 | continue; \ |
d6204268 UD |
203 | \ |
204 | /* Next see whether we have to ignore the error. If not, stop. */ \ | |
205 | if (! ignore_errors_p ()) \ | |
206 | break; \ | |
b572c2da | 207 | \ |
d6204268 UD |
208 | /* When we come here it means we ignore the character. */ \ |
209 | ++*irreversible; \ | |
210 | inptr += Incr; \ | |
211 | continue; \ | |
212 | } | |
213 | ||
214 | ||
9a1f6754 UD |
215 | /* Handling of Unicode 3.1 TAG characters. Unicode recommends |
216 | "If language codes are not relevant to the particular processing | |
217 | operation, then they should be ignored." | |
218 | This macro is usually called right before STANDARD_ERR_HANDLER (Incr). */ | |
219 | #define UNICODE_TAG_HANDLER(Character, Incr) \ | |
220 | { \ | |
221 | /* TAG characters are those in the range U+E0000..U+E007F. */ \ | |
222 | if (((Character) >> 7) == (0xe0000 >> 7)) \ | |
223 | { \ | |
224 | inptr += Incr; \ | |
225 | continue; \ | |
226 | } \ | |
227 | } | |
228 | ||
229 | ||
8619129f UD |
230 | /* The function returns the status, as defined in gconv.h. */ |
231 | static inline int | |
55985355 UD |
232 | FCTNAME (LOOPFCT) (struct __gconv_step *step, |
233 | struct __gconv_step_data *step_data, | |
234 | const unsigned char **inptrp, const unsigned char *inend, | |
17427edd | 235 | unsigned char **outptrp, const unsigned char *outend, |
38677ace | 236 | size_t *irreversible EXTRA_LOOP_DECLS) |
8619129f | 237 | { |
55985355 UD |
238 | #ifdef LOOP_NEED_STATE |
239 | mbstate_t *state = step_data->__statep; | |
240 | #endif | |
241 | #ifdef LOOP_NEED_FLAGS | |
242 | int flags = step_data->__flags; | |
243 | #endif | |
244 | #ifdef LOOP_NEED_DATA | |
245 | void *data = step->__data; | |
246 | #endif | |
247 | int result = __GCONV_EMPTY_INPUT; | |
8619129f UD |
248 | const unsigned char *inptr = *inptrp; |
249 | unsigned char *outptr = *outptrp; | |
8619129f | 250 | |
66175fa8 UD |
251 | #ifdef INIT_PARAMS |
252 | INIT_PARAMS; | |
253 | #endif | |
254 | ||
55985355 | 255 | while (inptr != inend) |
8619129f | 256 | { |
55985355 | 257 | /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the |
ca3c0135 | 258 | compiler generating better code. They will be optimized away |
55985355 UD |
259 | since MIN_NEEDED_OUTPUT is always a constant. */ |
260 | if ((MIN_NEEDED_OUTPUT != 1 | |
261 | && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0)) | |
262 | || (MIN_NEEDED_OUTPUT == 1 | |
263 | && __builtin_expect (outptr >= outend, 0))) | |
264 | { | |
265 | /* Overflow in the output buffer. */ | |
266 | result = __GCONV_FULL_OUTPUT; | |
267 | break; | |
268 | } | |
269 | if (MIN_NEEDED_INPUT > 1 | |
270 | && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0)) | |
8619129f | 271 | { |
55985355 UD |
272 | /* We don't have enough input for another complete input |
273 | character. */ | |
274 | result = __GCONV_INCOMPLETE_INPUT; | |
275 | break; | |
8619129f | 276 | } |
55985355 UD |
277 | |
278 | /* Here comes the body the user provides. It can stop with | |
279 | RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the | |
280 | input characters vary in size), GCONV_ILLEGAL_INPUT, or | |
281 | GCONV_FULL_OUTPUT (if the output characters vary in size). */ | |
282 | BODY | |
8619129f UD |
283 | } |
284 | ||
8619129f UD |
285 | /* Update the pointers pointed to by the parameters. */ |
286 | *inptrp = inptr; | |
287 | *outptrp = outptr; | |
66175fa8 UD |
288 | #ifdef UPDATE_PARAMS |
289 | UPDATE_PARAMS; | |
290 | #endif | |
8619129f UD |
291 | |
292 | return result; | |
293 | } | |
294 | ||
295 | ||
b02b4774 UD |
296 | /* Include the file a second time to define the function to handle |
297 | unaligned access. */ | |
fdf64555 UD |
298 | #if !defined DEFINE_UNALIGNED && !defined _STRING_ARCH_unaligned \ |
299 | && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \ | |
300 | && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0 | |
fd1b5c0f UD |
301 | # undef get16 |
302 | # undef get32 | |
303 | # undef put16 | |
304 | # undef put32 | |
305 | # undef unaligned | |
306 | ||
b35e58e4 UD |
307 | # define DEFINE_UNALIGNED |
308 | # include "loop.c" | |
309 | # undef DEFINE_UNALIGNED | |
310 | #endif | |
311 | ||
312 | ||
fd1b5c0f UD |
313 | #if MAX_NEEDED_INPUT > 1 |
314 | # define SINGLE(fct) SINGLE2 (fct) | |
315 | # define SINGLE2(fct) fct##_single | |
316 | static inline int | |
55985355 UD |
317 | SINGLE(LOOPFCT) (struct __gconv_step *step, |
318 | struct __gconv_step_data *step_data, | |
319 | const unsigned char **inptrp, const unsigned char *inend, | |
fd1b5c0f | 320 | unsigned char **outptrp, unsigned char *outend, |
55985355 | 321 | size_t *irreversible EXTRA_LOOP_DECLS) |
fd1b5c0f | 322 | { |
55985355 UD |
323 | mbstate_t *state = step_data->__statep; |
324 | #ifdef LOOP_NEED_FLAGS | |
325 | int flags = step_data->__flags; | |
326 | #endif | |
327 | #ifdef LOOP_NEED_DATA | |
328 | void *data = step->__data; | |
329 | #endif | |
fd1b5c0f UD |
330 | int result = __GCONV_OK; |
331 | unsigned char bytebuf[MAX_NEEDED_INPUT]; | |
332 | const unsigned char *inptr = *inptrp; | |
333 | unsigned char *outptr = *outptrp; | |
334 | size_t inlen; | |
335 | ||
336 | #ifdef INIT_PARAMS | |
337 | INIT_PARAMS; | |
338 | #endif | |
339 | ||
340 | #ifdef UNPACK_BYTES | |
341 | UNPACK_BYTES | |
342 | #else | |
343 | /* Add the bytes from the state to the input buffer. */ | |
17427edd | 344 | for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen) |
fd1b5c0f UD |
345 | bytebuf[inlen] = state->__value.__wchb[inlen]; |
346 | #endif | |
347 | ||
348 | /* Are there enough bytes in the input buffer? */ | |
316518d6 | 349 | if (__builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0)) |
fd1b5c0f | 350 | { |
fd1b5c0f | 351 | *inptrp = inend; |
316518d6 | 352 | #ifdef STORE_REST |
fd1b5c0f UD |
353 | inptr = bytebuf; |
354 | inptrp = &inptr; | |
355 | inend = &bytebuf[inlen]; | |
356 | ||
357 | STORE_REST | |
358 | #else | |
359 | /* We don't have enough input for another complete input | |
360 | character. */ | |
361 | while (inptr < inend) | |
362 | state->__value.__wchb[inlen++] = *inptr++; | |
363 | #endif | |
364 | ||
365 | return __GCONV_INCOMPLETE_INPUT; | |
366 | } | |
367 | ||
368 | /* Enough space in output buffer. */ | |
369 | if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend) | |
370 | || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend)) | |
371 | /* Overflow in the output buffer. */ | |
372 | return __GCONV_FULL_OUTPUT; | |
373 | ||
374 | /* Now add characters from the normal input buffer. */ | |
375 | do | |
376 | bytebuf[inlen++] = *inptr++; | |
316518d6 | 377 | while (inlen < MAX_NEEDED_INPUT && inptr < inend); |
fd1b5c0f UD |
378 | |
379 | inptr = bytebuf; | |
316518d6 | 380 | inend = &bytebuf[inlen]; |
55985355 | 381 | |
fd1b5c0f UD |
382 | do |
383 | { | |
384 | BODY | |
385 | } | |
386 | while (0); | |
387 | ||
316518d6 UD |
388 | /* Now we either have produced an output character and consumed all the |
389 | bytes from the state and at least one more, or the character is still | |
390 | incomplete, or we have some other error (like illegal input character, | |
391 | no space in output buffer). */ | |
5ea1a82d | 392 | if (__builtin_expect (inptr != bytebuf, 1)) |
fd1b5c0f | 393 | { |
316518d6 | 394 | /* We found a new character. */ |
fd1b5c0f UD |
395 | assert (inptr - bytebuf > (state->__count & 7)); |
396 | ||
397 | *inptrp += inptr - bytebuf - (state->__count & 7); | |
398 | *outptrp = outptr; | |
399 | ||
316518d6 UD |
400 | result = __GCONV_OK; |
401 | ||
fd1b5c0f UD |
402 | /* Clear the state buffer. */ |
403 | state->__count &= ~7; | |
404 | } | |
316518d6 UD |
405 | else if (result == __GCONV_INCOMPLETE_INPUT) |
406 | { | |
407 | /* This can only happen if we have less than MAX_NEEDED_INPUT bytes | |
408 | available. */ | |
409 | assert (inend != &bytebuf[MAX_NEEDED_INPUT]); | |
410 | ||
411 | *inptrp += inend - bytebuf - (state->__count & 7); | |
412 | #ifdef STORE_REST | |
413 | inptrp = &inptr; | |
414 | ||
415 | STORE_REST | |
416 | #else | |
417 | /* We don't have enough input for another complete input | |
418 | character. */ | |
419 | while (inptr < inend) | |
420 | state->__value.__wchb[inlen++] = *inptr++; | |
421 | #endif | |
422 | } | |
fd1b5c0f UD |
423 | |
424 | return result; | |
425 | } | |
426 | # undef SINGLE | |
427 | # undef SINGLE2 | |
428 | #endif | |
429 | ||
430 | ||
8619129f UD |
431 | /* We remove the macro definitions so that we can include this file again |
432 | for the definition of another function. */ | |
433 | #undef MIN_NEEDED_INPUT | |
434 | #undef MAX_NEEDED_INPUT | |
435 | #undef MIN_NEEDED_OUTPUT | |
436 | #undef MAX_NEEDED_OUTPUT | |
437 | #undef LOOPFCT | |
8619129f UD |
438 | #undef BODY |
439 | #undef LOOPFCT | |
28f1c862 | 440 | #undef EXTRA_LOOP_DECLS |
66175fa8 UD |
441 | #undef INIT_PARAMS |
442 | #undef UPDATE_PARAMS | |
55985355 UD |
443 | #undef UNPACK_BYTES |
444 | #undef LOOP_NEED_STATE | |
445 | #undef LOOP_NEED_FLAGS | |
446 | #undef LOOP_NEED_DATA | |
fd1b5c0f UD |
447 | #undef get16 |
448 | #undef get32 | |
449 | #undef put16 | |
450 | #undef put32 | |
451 | #undef unaligned |