]> git.ipfire.org Git - thirdparty/glibc.git/blob - iconv/skeleton.c
Update.
[thirdparty/glibc.git] / iconv / skeleton.c
1 /* Skeleton for a conversion module.
2 Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21 /* This file can be included to provide definitions of several things
22 many modules have in common. It can be customized using the following
23 macros:
24
25 DEFINE_INIT define the default initializer. This requires the
26 following symbol to be defined.
27
28 CHARSET_NAME string with official name of the coded character
29 set (in all-caps)
30
31 DEFINE_FINI define the default destructor function.
32
33 MIN_NEEDED_FROM minimal number of bytes needed for the from-charset.
34 MIN_NEEDED_TO likewise for the to-charset.
35
36 MAX_NEEDED_FROM maximal number of bytes needed for the from-charset.
37 This macro is optional, it defaults to MIN_NEEDED_FROM.
38 MAX_NEEDED_TO likewise for the to-charset.
39
40 DEFINE_DIRECTION_OBJECTS
41 two objects will be defined to be used when the
42 `gconv' function must only distinguish two
43 directions. This is implied by DEFINE_INIT.
44 If this macro is not defined the following
45 macro must be available.
46
47 FROM_DIRECTION this macro is supposed to return a value != 0
48 if we convert from the current character set,
49 otherwise it return 0.
50
51 EMIT_SHIFT_TO_INIT this symbol is optional. If it is defined it
52 defines some code which writes out a sequence
53 of characters which bring the current state into
54 the initial state.
55
56 FROM_LOOP name of the function implementing the conversion
57 from the current characters.
58 TO_LOOP likewise for the other direction
59
60 RESET_STATE in case of an error we must reset the state for
61 the rerun so this macro must be defined for
62 stateful encodings. It takes an argument which
63 is nonzero when saving.
64
65 RESET_INPUT_BUFFER If the input character sets allow this the macro
66 can be defined to reset the input buffer pointers
67 to cover only those characters up to the error.
68
69 FUNCTION_NAME if not set the conversion function is named `gconv'.
70
71 PREPARE_LOOP optional code preparing the conversion loop. Can
72 contain variable definitions.
73 END_LOOP also optional, may be used to store information
74
75 EXTRA_LOOP_ARGS optional macro specifying extra arguments passed
76 to loop function.
77 */
78
79 #include <assert.h>
80 #include <gconv.h>
81 #include <string.h>
82 #define __need_size_t
83 #define __need_NULL
84 #include <stddef.h>
85
86 #ifndef STATIC_GCONV
87 # include <dlfcn.h>
88 #endif
89
90 #ifndef DL_CALL_FCT
91 # define DL_CALL_FCT(fct, args) fct args
92 #endif
93
94 /* The direction objects. */
95 #if DEFINE_DIRECTION_OBJECTS || DEFINE_INIT
96 static int from_object;
97 static int to_object;
98
99 # ifndef FROM_DIRECTION
100 # define FROM_DIRECTION (step->__data == &from_object)
101 # endif
102 #else
103 # ifndef FROM_DIRECTION
104 # error "FROM_DIRECTION must be provided if direction objects are not used"
105 # endif
106 #endif
107
108
109 /* How many bytes are needed at most for the from-charset. */
110 #ifndef MAX_NEEDED_FROM
111 # define MAX_NEEDED_FROM MIN_NEEDED_FROM
112 #endif
113
114 /* Same for the to-charset. */
115 #ifndef MAX_NEEDED_TO
116 # define MAX_NEEDED_TO MIN_NEEDED_TO
117 #endif
118
119
120 /* Define macros which can access unaligned buffers. These macros are
121 supposed to be used only in code outside the inner loops. For the inner
122 loops we have other definitions which allow optimized access. */
123 #ifdef _STRING_ARCH_unaligned
124 /* We can handle unaligned memory access. */
125 # define get16u(addr) *((uint16_t *) (addr))
126 # define get32u(addr) *((uint32_t *) (addr))
127
128 /* We need no special support for writing values either. */
129 # define put16u(addr, val) *((uint16_t *) (addr)) = (val)
130 # define put32u(addr, val) *((uint32_t *) (addr)) = (val)
131 #else
132 /* Distinguish between big endian and little endian. */
133 # if __BYTE_ORDER == __LITTLE_ENDIAN
134 # define get16u(addr) \
135 (((__const unsigned char *) (addr))[1] << 8 \
136 | ((__const unsigned char *) (addr))[0])
137 # define get32u(addr) \
138 (((((__const unsigned char *) (addr))[3] << 8 \
139 | ((__const unsigned char *) (addr))[2]) << 8 \
140 | ((__const unsigned char *) (addr))[1]) << 8 \
141 | ((__const unsigned char *) (addr))[0])
142
143 # define put16u(addr, val) \
144 ({ uint16_t __val = (val); \
145 ((unsigned char *) (addr))[0] = __val; \
146 ((unsigned char *) (addr))[1] = __val >> 8; \
147 (void) 0; })
148 # define put32u(addr, val) \
149 ({ uint32_t __val = (val); \
150 ((unsigned char *) (addr))[0] = __val; \
151 __val >>= 8; \
152 ((unsigned char *) (addr))[1] = __val; \
153 __val >>= 8; \
154 ((unsigned char *) (addr))[2] = __val; \
155 __val >>= 8; \
156 ((unsigned char *) (addr))[3] = __val; \
157 (void) 0; })
158 # else
159 # define get16u(addr) \
160 (((__const unsigned char *) (addr))[0] << 8 \
161 | ((__const unsigned char *) (addr))[1])
162 # define get32u(addr) \
163 (((((__const unsigned char *) (addr))[0] << 8 \
164 | ((__const unsigned char *) (addr))[1]) << 8 \
165 | ((__const unsigned char *) (addr))[2]) << 8 \
166 | ((__const unsigned char *) (addr))[3])
167
168 # define put16u(addr, val) \
169 ({ uint16_t __val = (val); \
170 ((unsigned char *) (addr))[1] = __val; \
171 ((unsigned char *) (addr))[0] = __val >> 8; \
172 (void) 0; })
173 # define put32u(addr, val) \
174 ({ uint32_t __val = (val); \
175 ((unsigned char *) (addr))[3] = __val; \
176 __val >>= 8; \
177 ((unsigned char *) (addr))[2] = __val; \
178 __val >>= 8; \
179 ((unsigned char *) (addr))[1] = __val; \
180 __val >>= 8; \
181 ((unsigned char *) (addr))[0] = __val; \
182 (void) 0; })
183 # endif
184 #endif
185
186
187 /* For conversions from a fixed width character sets to another fixed width
188 character set we we can define RESET_INPUT_BUFFER is necessary. */
189 #if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE
190 # if MIN_NEEDED_FROM == MAX_NEEDED_FROM && MIN_NEEDED_TO == MAX_NEEDED_TO
191 /* We have to use these `if's here since the compiler cannot know that
192 (outbuf - outerr) is always divisible by MIN_NEEDED_TO. */
193 # define RESET_INPUT_BUFFER \
194 if (MIN_NEEDED_FROM % MIN_NEEDED_TO == 0) \
195 *inptrp -= (outbuf - outerr) * (MIN_NEEDED_FROM / MIN_NEEDED_TO); \
196 else if (MIN_NEEDED_TO % MIN_NEEDED_FROM == 0) \
197 *inptrp -= (outbuf - outerr) / (MIN_NEEDED_TO / MIN_NEEDED_FROM); \
198 else \
199 *inptrp -= ((outbuf - outerr) / MIN_NEEDED_TO) * MIN_NEEDED_FROM
200 # endif
201 #endif
202
203
204 /* The default init function. It simply matches the name and initializes
205 the step data to point to one of the objects above. */
206 #if DEFINE_INIT
207 # ifndef CHARSET_NAME
208 # error "CHARSET_NAME not defined"
209 # endif
210
211 int
212 gconv_init (struct __gconv_step *step)
213 {
214 /* Determine which direction. */
215 if (strcmp (step->__from_name, CHARSET_NAME) == 0)
216 {
217 step->__data = &from_object;
218
219 step->__min_needed_from = MIN_NEEDED_FROM;
220 step->__max_needed_from = MAX_NEEDED_FROM;
221 step->__min_needed_to = MIN_NEEDED_TO;
222 step->__max_needed_to = MAX_NEEDED_TO;
223 }
224 else if (strcmp (step->__to_name, CHARSET_NAME) == 0)
225 {
226 step->__data = &to_object;
227
228 step->__min_needed_from = MIN_NEEDED_TO;
229 step->__max_needed_from = MAX_NEEDED_TO;
230 step->__min_needed_to = MIN_NEEDED_FROM;
231 step->__max_needed_to = MAX_NEEDED_FROM;
232 }
233 else
234 return __GCONV_NOCONV;
235
236 #ifdef RESET_STATE
237 step->__stateful = 1;
238 #else
239 step->__stateful = 0;
240 #endif
241
242 return __GCONV_OK;
243 }
244 #endif
245
246
247 /* The default destructor function does nothing in the moment and so
248 be define it at all. But we still provide the macro just in case
249 we need it some day. */
250 #if DEFINE_FINI
251 #endif
252
253
254 /* If no arguments have to passed to the loop function define the macro
255 as empty. */
256 #ifndef EXTRA_LOOP_ARGS
257 # define EXTRA_LOOP_ARGS
258 #endif
259
260
261 /* This is the actual conversion function. */
262 #ifndef FUNCTION_NAME
263 # define FUNCTION_NAME gconv
264 #endif
265
266 /* The macros are used to access the function to convert single characters. */
267 #define SINGLE(fct) SINGLE2 (fct)
268 #define SINGLE2(fct) fct##_single
269
270
271 int
272 FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
273 const unsigned char **inptrp, const unsigned char *inend,
274 size_t *written, int do_flush, int consume_incomplete)
275 {
276 struct __gconv_step *next_step = step + 1;
277 struct __gconv_step_data *next_data = data + 1;
278 __gconv_fct fct = data->__is_last ? NULL : next_step->__fct;
279 int status;
280
281 /* If the function is called with no input this means we have to reset
282 to the initial state. The possibly partly converted input is
283 dropped. */
284 if (__builtin_expect (do_flush, 0))
285 {
286 status = __GCONV_OK;
287
288 #ifdef EMIT_SHIFT_TO_INIT
289 /* Emit the escape sequence to reset the state. */
290 EMIT_SHIFT_TO_INIT;
291 #endif
292 /* Call the steps down the chain if there are any but only if we
293 successfully emitted the escape sequence. */
294 if (status == __GCONV_OK && ! data->__is_last)
295 status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL,
296 written, 1, consume_incomplete));
297 }
298 else
299 {
300 /* We preserve the initial values of the pointer variables. */
301 const unsigned char *inptr = *inptrp;
302 unsigned char *outbuf = data->__outbuf;
303 unsigned char *outend = data->__outbufend;
304 unsigned char *outstart;
305 /* This variable is used to count the number of characters we
306 actually converted. */
307 size_t converted = 0;
308 #if defined _STRING_ARCH_unaligned \
309 || MIN_NEEDED_FROM == 1 || MAX_NEEDED_FROM % MIN_NEEDED_FROM != 0 \
310 || MIN_NEEDED_TO == 1 || MAX_NEEDED_TO % MIN_NEEDED_TO != 0
311 # define unaligned 0
312 #else
313 int unaligned;
314 # define GEN_unaligned(name) GEN_unaligned2 (name)
315 # define GEN_unaligned2(name) name##_unaligned
316 #endif
317
318 #ifdef PREPARE_LOOP
319 PREPARE_LOOP
320 #endif
321
322 #if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1
323 /* If the function is used to implement the mb*towc*() or wc*tomb*()
324 functions we must test whether any bytes from the last call are
325 stored in the `state' object. */
326 if (((MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
327 || (MAX_NEEDED_TO > 1 && !FROM_DIRECTION))
328 && consume_incomplete && (data->__statep->__count & 7) != 0)
329 {
330 /* Yep, we have some bytes left over. Process them now. */
331
332 # if MAX_NEEDED_FROM > 1
333 if (MAX_NEEDED_TO == 1 || FROM_DIRECTION)
334 status = SINGLE(FROM_LOOP) (inptrp, inend, &outbuf, outend,
335 data->__statep, step->__data,
336 &converted EXTRA_LOOP_ARGS);
337 # endif
338 # if MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1 && !ONE_DIRECTION
339 else
340 # endif
341 # if MAX_NEEDED_TO > 1 && !ONE_DIRECTION
342 status = SINGLE(TO_LOOP) (inptrp, inend, &outbuf, outend,
343 data->__statep, step->__data,
344 &converted EXTRA_LOOP_ARGS);
345 # endif
346
347 if (status != __GCONV_OK)
348 return status;
349 }
350 #endif
351
352 #if !defined _STRING_ARCH_unaligned \
353 && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
354 && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
355 /* The following assumes that encodings, which have a variable length
356 what might unalign a buffer even though it is a aligned in the
357 beginning, either don't have the minimal number of bytes as a divisor
358 of the maximum length or have a minimum length of 1. This is true
359 for all known and supported encodings. */
360 unaligned = ((FROM_DIRECTION
361 && ((uintptr_t) inptr % MIN_NEEDED_FROM != 0
362 || (data->__is_last
363 && (uintptr_t) outbuf % MIN_NEEDED_TO != 0)))
364 || (!FROM_DIRECTION
365 && ((data->__is_last
366 && (uintptr_t) outbuf % MIN_NEEDED_FROM != 0)
367 || (uintptr_t) inptr % MIN_NEEDED_TO != 0)));
368 #endif
369
370 do
371 {
372 /* Remember the start value for this round. */
373 inptr = *inptrp;
374 /* The outbuf buffer is empty. */
375 outstart = outbuf;
376
377 #ifdef SAVE_RESET_STATE
378 SAVE_RESET_STATE (1);
379 #endif
380
381 if (!unaligned)
382 {
383 if (FROM_DIRECTION)
384 /* Run the conversion loop. */
385 status = FROM_LOOP (inptrp, inend, &outbuf, outend,
386 data->__statep, step->__data, &converted
387 EXTRA_LOOP_ARGS);
388 else
389 /* Run the conversion loop. */
390 status = TO_LOOP (inptrp, inend, &outbuf, outend,
391 data->__statep, step->__data, &converted
392 EXTRA_LOOP_ARGS);
393 }
394 #if !defined _STRING_ARCH_unaligned \
395 && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
396 && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
397 else
398 {
399 if (FROM_DIRECTION)
400 /* Run the conversion loop. */
401 status = GEN_unaligned (FROM_LOOP) (inptrp, inend, &outbuf,
402 outend, data->__statep,
403 step->__data, &converted
404 EXTRA_LOOP_ARGS);
405 else
406 /* Run the conversion loop. */
407 status = GEN_unaligned (TO_LOOP) (inptrp, inend, &outbuf,
408 outend, data->__statep,
409 step->__data, &converted
410 EXTRA_LOOP_ARGS);
411 }
412 #endif
413
414 /* We finished one use of the loops. */
415 ++data->__invocation_counter;
416
417 /* If this is the last step leave the loop, there is nothing
418 we can do. */
419 if (data->__is_last)
420 {
421 /* Store information about how many bytes are available. */
422 data->__outbuf = outbuf;
423
424 /* Remember how many non-identical characters we converted. */
425 *written += converted;
426
427 break;
428 }
429
430 /* Write out all output which was produced. */
431 if (outbuf > outstart)
432 {
433 const unsigned char *outerr = data->__outbuf;
434 int result;
435
436 result = DL_CALL_FCT (fct, (next_step, next_data, &outerr,
437 outbuf, written, 0,
438 consume_incomplete));
439
440 if (result != __GCONV_EMPTY_INPUT)
441 {
442 if (__builtin_expect (outerr != outbuf, 0))
443 {
444 #ifdef RESET_INPUT_BUFFER
445 RESET_INPUT_BUFFER;
446 #else
447 /* We have a problem with the in on of the functions
448 below. Undo the conversion upto the error point. */
449 size_t nstatus;
450
451 /* Reload the pointers. */
452 *inptrp = inptr;
453 outbuf = outstart;
454
455 /* Reset the state. */
456 # ifdef SAVE_RESET_STATE
457 SAVE_RESET_STATE (0);
458 # endif
459
460 if (FROM_DIRECTION)
461 /* Run the conversion loop. */
462 nstatus = FROM_LOOP ((const unsigned char **) inptrp,
463 (const unsigned char *) inend,
464 (unsigned char **) &outbuf,
465 (unsigned char *) outerr,
466 data->__statep, step->__data,
467 &converted EXTRA_LOOP_ARGS);
468 else
469 /* Run the conversion loop. */
470 nstatus = TO_LOOP ((const unsigned char **) inptrp,
471 (const unsigned char *) inend,
472 (unsigned char **) &outbuf,
473 (unsigned char *) outerr,
474 data->__statep, step->__data,
475 &converted EXTRA_LOOP_ARGS);
476
477 /* We must run out of output buffer space in this
478 rerun. */
479 assert (outbuf == outerr);
480 assert (nstatus == __GCONV_FULL_OUTPUT);
481
482 /* If we haven't consumed a single byte decrement
483 the invocation counter. */
484 if (outbuf == outstart)
485 --data->__invocation_counter;
486 #endif /* reset input buffer */
487 }
488
489 /* Change the status. */
490 status = result;
491 }
492 else
493 /* All the output is consumed, we can make another run
494 if everything was ok. */
495 if (status == __GCONV_FULL_OUTPUT)
496 status = __GCONV_OK;
497 }
498 }
499 while (status == __GCONV_OK);
500
501 #ifdef END_LOOP
502 END_LOOP
503 #endif
504
505 /* If we are supposed to consume all character store now all of the
506 remaining characters in the `state' object. */
507 #if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1
508 if (((MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
509 || (MAX_NEEDED_TO > 1 && !FROM_DIRECTION))
510 && consume_incomplete && status == __GCONV_INCOMPLETE_INPUT)
511 {
512 # ifdef STORE_REST
513 mbstate_t *state = data->__statep;
514
515 STORE_REST
516 # else
517 size_t cnt;
518
519 /* Make sure the remaining bytes fit into the state objects
520 buffer. */
521 assert (inend - *inptrp < 4);
522
523 for (cnt = 0; *inptrp < inend; ++cnt)
524 data->__statep->__value.__wchb[cnt] = *(*inptrp)++;
525 data->__statep->__count &= ~7;
526 data->__statep->__count |= cnt;
527 # endif
528 }
529 #endif
530 }
531
532 return status;
533 }
534
535 #undef DEFINE_INIT
536 #undef CHARSET_NAME
537 #undef DEFINE_FINI
538 #undef MIN_NEEDED_FROM
539 #undef MIN_NEEDED_TO
540 #undef MAX_NEEDED_FROM
541 #undef MAX_NEEDED_TO
542 #undef DEFINE_DIRECTION_OBJECTS
543 #undef FROM_DIRECTION
544 #undef EMIT_SHIFT_TO_INIT
545 #undef FROM_LOOP
546 #undef TO_LOOP
547 #undef RESET_STATE
548 #undef RESET_INPUT_BUFFER
549 #undef FUNCTION_NAME
550 #undef PREPARE_LOOP
551 #undef END_LOOP
552 #undef ONE_DIRECTION
553 #undef STORE_REST