]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconv/loop.c
Update.
[thirdparty/glibc.git] / iconv / loop.c
CommitLineData
8619129f 1/* Conversion loop frame work.
9a1f6754 2 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
8619129f
UD
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
8619129f
UD
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 14 Lesser General Public License for more details.
8619129f 15
41bdb6e2
AJ
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
8619129f
UD
20
21/* This file provides a frame for the reader loop in all conversion modules.
22 The actual code must (of course) be provided in the actual module source
23 code but certain actions can be written down generically, with some
24 customization options which are these:
25
26 MIN_NEEDED_INPUT minimal number of input bytes needed for the next
27 conversion.
28 MIN_NEEDED_OUTPUT minimal number of bytes produced by the next round
29 of conversion.
30
31 MAX_NEEDED_INPUT you guess it, this is the maximal number of input
32 bytes needed. It defaults to MIN_NEEDED_INPUT
33 MAX_NEEDED_OUTPUT likewise for output bytes.
34
8619129f
UD
35 LOOPFCT name of the function created. If not specified
36 the name is `loop' but this prevents the use
37 of multiple functions in the same file.
38
8619129f
UD
39 BODY this is supposed to expand to the body of the loop.
40 The user must provide this.
28f1c862 41
66175fa8
UD
42 EXTRA_LOOP_DECLS extra arguments passed from converion loop call.
43
44 INIT_PARAMS code to define and initialize variables from params.
45 UPDATE_PARAMS code to store result in params.
8619129f
UD
46*/
47
fd1b5c0f 48#include <assert.h>
b35e58e4 49#include <endian.h>
8619129f 50#include <gconv.h>
b35e58e4
UD
51#include <stdint.h>
52#include <string.h>
d64b6ad0 53#include <wchar.h>
8619129f
UD
54#include <sys/param.h> /* For MIN. */
55#define __need_size_t
56#include <stddef.h>
57
58
b35e58e4
UD
59/* We have to provide support for machines which are not able to handled
60 unaligned memory accesses. Some of the character encodings have
61 representations with a fixed width of 2 or 4 bytes. But if we cannot
62 access unaligned memory we still have to read byte-wise. */
63#undef FCTNAME2
64#if defined _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED
65/* We can handle unaligned memory access. */
b91d5eda
UD
66# define get16(addr) *((__const uint16_t *) (addr))
67# define get32(addr) *((__const uint32_t *) (addr))
b35e58e4
UD
68
69/* We need no special support for writing values either. */
70# define put16(addr, val) *((uint16_t *) (addr)) = (val)
71# define put32(addr, val) *((uint32_t *) (addr)) = (val)
72
73# define FCTNAME2(name) name
74#else
75/* Distinguish between big endian and little endian. */
76# if __BYTE_ORDER == __LITTLE_ENDIAN
77# define get16(addr) \
78 (((__const unsigned char *) (addr))[1] << 8 \
79 | ((__const unsigned char *) (addr))[0])
80# define get32(addr) \
81 (((((__const unsigned char *) (addr))[3] << 8 \
82 | ((__const unsigned char *) (addr))[2]) << 8 \
83 | ((__const unsigned char *) (addr))[1]) << 8 \
84 | ((__const unsigned char *) (addr))[0])
85
cb2c5501 86# define put16(addr, val) \
b35e58e4 87 ({ uint16_t __val = (val); \
cb2c5501
UD
88 ((unsigned char *) (addr))[0] = __val; \
89 ((unsigned char *) (addr))[1] = __val >> 8; \
b35e58e4 90 (void) 0; })
cb2c5501
UD
91# define put32(addr, val) \
92 ({ uint32_t __val = (val); \
93 ((unsigned char *) (addr))[0] = __val; \
b35e58e4 94 __val >>= 8; \
cb2c5501 95 ((unsigned char *) (addr))[1] = __val; \
b35e58e4 96 __val >>= 8; \
cb2c5501 97 ((unsigned char *) (addr))[2] = __val; \
b35e58e4 98 __val >>= 8; \
cb2c5501 99 ((unsigned char *) (addr))[3] = __val; \
b35e58e4
UD
100 (void) 0; })
101# else
102# define get16(addr) \
103 (((__const unsigned char *) (addr))[0] << 8 \
104 | ((__const unsigned char *) (addr))[1])
105# define get32(addr) \
106 (((((__const unsigned char *) (addr))[0] << 8 \
107 | ((__const unsigned char *) (addr))[1]) << 8 \
108 | ((__const unsigned char *) (addr))[2]) << 8 \
109 | ((__const unsigned char *) (addr))[3])
110
cb2c5501 111# define put16(addr, val) \
b35e58e4 112 ({ uint16_t __val = (val); \
cb2c5501 113 ((unsigned char *) (addr))[1] = __val; \
f7ccf2fc 114 ((unsigned char *) (addr))[0] = __val >> 8; \
b35e58e4 115 (void) 0; })
cb2c5501
UD
116# define put32(addr, val) \
117 ({ uint32_t __val = (val); \
118 ((unsigned char *) (addr))[3] = __val; \
b35e58e4 119 __val >>= 8; \
cb2c5501 120 ((unsigned char *) (addr))[2] = __val; \
b35e58e4 121 __val >>= 8; \
cb2c5501 122 ((unsigned char *) (addr))[1] = __val; \
b35e58e4 123 __val >>= 8; \
cb2c5501 124 ((unsigned char *) (addr))[0] = __val; \
b35e58e4
UD
125 (void) 0; })
126# endif
127
128# define FCTNAME2(name) name##_unaligned
129#endif
130#define FCTNAME(name) FCTNAME2(name)
131
132
8619129f
UD
133/* We need at least one byte for the next round. */
134#ifndef MIN_NEEDED_INPUT
5aa8ff62 135# error "MIN_NEEDED_INPUT definition missing"
8619129f
UD
136#endif
137
138/* Let's see how many bytes we produce. */
139#ifndef MAX_NEEDED_INPUT
140# define MAX_NEEDED_INPUT MIN_NEEDED_INPUT
141#endif
142
143/* We produce at least one byte in the next round. */
144#ifndef MIN_NEEDED_OUTPUT
5aa8ff62 145# error "MIN_NEEDED_OUTPUT definition missing"
8619129f
UD
146#endif
147
148/* Let's see how many bytes we produce. */
149#ifndef MAX_NEEDED_OUTPUT
150# define MAX_NEEDED_OUTPUT MIN_NEEDED_OUTPUT
151#endif
152
153/* Default name for the function. */
154#ifndef LOOPFCT
155# define LOOPFCT loop
156#endif
157
158/* Make sure we have a loop body. */
159#ifndef BODY
160# error "Definition of BODY missing for function" LOOPFCT
161#endif
162
8619129f 163
28f1c862
UD
164/* If no arguments have to passed to the loop function define the macro
165 as empty. */
166#ifndef EXTRA_LOOP_DECLS
167# define EXTRA_LOOP_DECLS
168#endif
169
170
85830c4c
UD
171/* To make it easier for the writers of the modules, we define a macro
172 to test whether we have to ignore errors. */
b572c2da
UD
173#define ignore_errors_p() \
174 (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS))
85830c4c
UD
175
176
d6204268
UD
177/* Error handling with transliteration/transcription function use and
178 ignoring of errors. Note that we cannot use the do while (0) trick
179 since `break' and `continue' must reach certain points. */
180#define STANDARD_ERR_HANDLER(Incr) \
181 { \
182 struct __gconv_trans_data *trans; \
183 \
184 result = __GCONV_ILLEGAL_INPUT; \
b572c2da
UD
185 \
186 if (irreversible == NULL) \
187 /* This means we are in call from __gconv_transliterate. In this \
188 case we are not doing any error recovery outself. */ \
189 break; \
190 \
d6204268
UD
191 /* First try the transliteration methods. */ \
192 for (trans = step_data->__trans; trans != NULL; trans = trans->__next) \
193 { \
194 result = DL_CALL_FCT (trans->__trans_fct, \
195 (step, step_data, trans->__data, *inptrp, \
196 &inptr, inend, &outptr, irreversible)); \
197 if (result != __GCONV_ILLEGAL_INPUT) \
198 break; \
199 } \
7888313d 200 /* If any of them recognized the input continue with the loop. */ \
d6204268 201 if (result != __GCONV_ILLEGAL_INPUT) \
7888313d 202 continue; \
d6204268
UD
203 \
204 /* Next see whether we have to ignore the error. If not, stop. */ \
205 if (! ignore_errors_p ()) \
206 break; \
b572c2da 207 \
d6204268
UD
208 /* When we come here it means we ignore the character. */ \
209 ++*irreversible; \
210 inptr += Incr; \
211 continue; \
212 }
213
214
9a1f6754
UD
215/* Handling of Unicode 3.1 TAG characters. Unicode recommends
216 "If language codes are not relevant to the particular processing
217 operation, then they should be ignored."
218 This macro is usually called right before STANDARD_ERR_HANDLER (Incr). */
219#define UNICODE_TAG_HANDLER(Character, Incr) \
220 { \
221 /* TAG characters are those in the range U+E0000..U+E007F. */ \
222 if (((Character) >> 7) == (0xe0000 >> 7)) \
223 { \
224 inptr += Incr; \
225 continue; \
226 } \
227 }
228
229
8619129f
UD
230/* The function returns the status, as defined in gconv.h. */
231static inline int
55985355
UD
232FCTNAME (LOOPFCT) (struct __gconv_step *step,
233 struct __gconv_step_data *step_data,
234 const unsigned char **inptrp, const unsigned char *inend,
17427edd 235 unsigned char **outptrp, const unsigned char *outend,
38677ace 236 size_t *irreversible EXTRA_LOOP_DECLS)
8619129f 237{
55985355
UD
238#ifdef LOOP_NEED_STATE
239 mbstate_t *state = step_data->__statep;
240#endif
241#ifdef LOOP_NEED_FLAGS
242 int flags = step_data->__flags;
243#endif
244#ifdef LOOP_NEED_DATA
245 void *data = step->__data;
246#endif
247 int result = __GCONV_EMPTY_INPUT;
8619129f
UD
248 const unsigned char *inptr = *inptrp;
249 unsigned char *outptr = *outptrp;
8619129f 250
66175fa8
UD
251#ifdef INIT_PARAMS
252 INIT_PARAMS;
253#endif
254
55985355 255 while (inptr != inend)
8619129f 256 {
55985355 257 /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the
ca3c0135 258 compiler generating better code. They will be optimized away
55985355
UD
259 since MIN_NEEDED_OUTPUT is always a constant. */
260 if ((MIN_NEEDED_OUTPUT != 1
261 && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0))
262 || (MIN_NEEDED_OUTPUT == 1
263 && __builtin_expect (outptr >= outend, 0)))
264 {
265 /* Overflow in the output buffer. */
266 result = __GCONV_FULL_OUTPUT;
267 break;
268 }
269 if (MIN_NEEDED_INPUT > 1
270 && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
8619129f 271 {
55985355
UD
272 /* We don't have enough input for another complete input
273 character. */
274 result = __GCONV_INCOMPLETE_INPUT;
275 break;
8619129f 276 }
55985355
UD
277
278 /* Here comes the body the user provides. It can stop with
279 RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the
280 input characters vary in size), GCONV_ILLEGAL_INPUT, or
281 GCONV_FULL_OUTPUT (if the output characters vary in size). */
282 BODY
8619129f
UD
283 }
284
8619129f
UD
285 /* Update the pointers pointed to by the parameters. */
286 *inptrp = inptr;
287 *outptrp = outptr;
66175fa8
UD
288#ifdef UPDATE_PARAMS
289 UPDATE_PARAMS;
290#endif
8619129f
UD
291
292 return result;
293}
294
295
b02b4774
UD
296/* Include the file a second time to define the function to handle
297 unaligned access. */
fdf64555
UD
298#if !defined DEFINE_UNALIGNED && !defined _STRING_ARCH_unaligned \
299 && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
300 && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
fd1b5c0f
UD
301# undef get16
302# undef get32
303# undef put16
304# undef put32
305# undef unaligned
306
b35e58e4
UD
307# define DEFINE_UNALIGNED
308# include "loop.c"
309# undef DEFINE_UNALIGNED
310#endif
311
312
fd1b5c0f
UD
313#if MAX_NEEDED_INPUT > 1
314# define SINGLE(fct) SINGLE2 (fct)
315# define SINGLE2(fct) fct##_single
316static inline int
55985355
UD
317SINGLE(LOOPFCT) (struct __gconv_step *step,
318 struct __gconv_step_data *step_data,
319 const unsigned char **inptrp, const unsigned char *inend,
fd1b5c0f 320 unsigned char **outptrp, unsigned char *outend,
55985355 321 size_t *irreversible EXTRA_LOOP_DECLS)
fd1b5c0f 322{
55985355
UD
323 mbstate_t *state = step_data->__statep;
324#ifdef LOOP_NEED_FLAGS
325 int flags = step_data->__flags;
326#endif
327#ifdef LOOP_NEED_DATA
328 void *data = step->__data;
329#endif
fd1b5c0f
UD
330 int result = __GCONV_OK;
331 unsigned char bytebuf[MAX_NEEDED_INPUT];
332 const unsigned char *inptr = *inptrp;
333 unsigned char *outptr = *outptrp;
334 size_t inlen;
335
336#ifdef INIT_PARAMS
337 INIT_PARAMS;
338#endif
339
340#ifdef UNPACK_BYTES
341 UNPACK_BYTES
342#else
343 /* Add the bytes from the state to the input buffer. */
17427edd 344 for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen)
fd1b5c0f
UD
345 bytebuf[inlen] = state->__value.__wchb[inlen];
346#endif
347
348 /* Are there enough bytes in the input buffer? */
316518d6 349 if (__builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0))
fd1b5c0f 350 {
fd1b5c0f 351 *inptrp = inend;
316518d6 352#ifdef STORE_REST
fd1b5c0f
UD
353 inptr = bytebuf;
354 inptrp = &inptr;
355 inend = &bytebuf[inlen];
356
357 STORE_REST
358#else
359 /* We don't have enough input for another complete input
360 character. */
361 while (inptr < inend)
362 state->__value.__wchb[inlen++] = *inptr++;
363#endif
364
365 return __GCONV_INCOMPLETE_INPUT;
366 }
367
368 /* Enough space in output buffer. */
369 if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend)
370 || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend))
371 /* Overflow in the output buffer. */
372 return __GCONV_FULL_OUTPUT;
373
374 /* Now add characters from the normal input buffer. */
375 do
376 bytebuf[inlen++] = *inptr++;
316518d6 377 while (inlen < MAX_NEEDED_INPUT && inptr < inend);
fd1b5c0f
UD
378
379 inptr = bytebuf;
316518d6 380 inend = &bytebuf[inlen];
55985355 381
fd1b5c0f
UD
382 do
383 {
384 BODY
385 }
386 while (0);
387
316518d6
UD
388 /* Now we either have produced an output character and consumed all the
389 bytes from the state and at least one more, or the character is still
390 incomplete, or we have some other error (like illegal input character,
391 no space in output buffer). */
5ea1a82d 392 if (__builtin_expect (inptr != bytebuf, 1))
fd1b5c0f 393 {
316518d6 394 /* We found a new character. */
fd1b5c0f
UD
395 assert (inptr - bytebuf > (state->__count & 7));
396
397 *inptrp += inptr - bytebuf - (state->__count & 7);
398 *outptrp = outptr;
399
316518d6
UD
400 result = __GCONV_OK;
401
fd1b5c0f
UD
402 /* Clear the state buffer. */
403 state->__count &= ~7;
404 }
316518d6
UD
405 else if (result == __GCONV_INCOMPLETE_INPUT)
406 {
407 /* This can only happen if we have less than MAX_NEEDED_INPUT bytes
408 available. */
409 assert (inend != &bytebuf[MAX_NEEDED_INPUT]);
410
411 *inptrp += inend - bytebuf - (state->__count & 7);
412#ifdef STORE_REST
413 inptrp = &inptr;
414
415 STORE_REST
416#else
417 /* We don't have enough input for another complete input
418 character. */
419 while (inptr < inend)
420 state->__value.__wchb[inlen++] = *inptr++;
421#endif
422 }
fd1b5c0f
UD
423
424 return result;
425}
426# undef SINGLE
427# undef SINGLE2
428#endif
429
430
8619129f
UD
431/* We remove the macro definitions so that we can include this file again
432 for the definition of another function. */
433#undef MIN_NEEDED_INPUT
434#undef MAX_NEEDED_INPUT
435#undef MIN_NEEDED_OUTPUT
436#undef MAX_NEEDED_OUTPUT
437#undef LOOPFCT
8619129f
UD
438#undef BODY
439#undef LOOPFCT
28f1c862 440#undef EXTRA_LOOP_DECLS
66175fa8
UD
441#undef INIT_PARAMS
442#undef UPDATE_PARAMS
55985355
UD
443#undef UNPACK_BYTES
444#undef LOOP_NEED_STATE
445#undef LOOP_NEED_FLAGS
446#undef LOOP_NEED_DATA
fd1b5c0f
UD
447#undef get16
448#undef get32
449#undef put16
450#undef put32
451#undef unaligned