]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconv/loop.c
associate a deallocator for iconv_open
[thirdparty/glibc.git] / iconv / loop.c
CommitLineData
8619129f 1/* Conversion loop frame work.
581c785b 2 Copyright (C) 1998-2022 Free Software Foundation, Inc.
8619129f 3 This file is part of the GNU C Library.
8619129f
UD
4
5 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
8619129f
UD
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 13 Lesser General Public License for more details.
8619129f 14
41bdb6e2 15 You should have received a copy of the GNU Lesser General Public
59ba27a6 16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
8619129f
UD
18
19/* This file provides a frame for the reader loop in all conversion modules.
20 The actual code must (of course) be provided in the actual module source
21 code but certain actions can be written down generically, with some
22 customization options which are these:
23
24 MIN_NEEDED_INPUT minimal number of input bytes needed for the next
25 conversion.
26 MIN_NEEDED_OUTPUT minimal number of bytes produced by the next round
27 of conversion.
28
29 MAX_NEEDED_INPUT you guess it, this is the maximal number of input
30 bytes needed. It defaults to MIN_NEEDED_INPUT
31 MAX_NEEDED_OUTPUT likewise for output bytes.
32
8619129f
UD
33 LOOPFCT name of the function created. If not specified
34 the name is `loop' but this prevents the use
35 of multiple functions in the same file.
36
8619129f
UD
37 BODY this is supposed to expand to the body of the loop.
38 The user must provide this.
28f1c862 39
382466e0 40 EXTRA_LOOP_DECLS extra arguments passed from conversion loop call.
66175fa8
UD
41
42 INIT_PARAMS code to define and initialize variables from params.
43 UPDATE_PARAMS code to store result in params.
f9ad060c
UD
44
45 ONEBYTE_BODY body of the specialized conversion function for a
46 single byte from the current character set to INTERNAL.
8619129f
UD
47*/
48
fd1b5c0f 49#include <assert.h>
b35e58e4 50#include <endian.h>
7ac6fad9 51#include <iconv/gconv_int.h>
b35e58e4
UD
52#include <stdint.h>
53#include <string.h>
d64b6ad0 54#include <wchar.h>
8619129f
UD
55#include <sys/param.h> /* For MIN. */
56#define __need_size_t
57#include <stddef.h>
9090848d 58#include <libc-diag.h>
8619129f 59
b35e58e4
UD
60/* We have to provide support for machines which are not able to handled
61 unaligned memory accesses. Some of the character encodings have
62 representations with a fixed width of 2 or 4 bytes. But if we cannot
63 access unaligned memory we still have to read byte-wise. */
64#undef FCTNAME2
27822ce6 65#if _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED
b35e58e4 66/* We can handle unaligned memory access. */
a784e502
UD
67# define get16(addr) *((const uint16_t *) (addr))
68# define get32(addr) *((const uint32_t *) (addr))
b35e58e4
UD
69
70/* We need no special support for writing values either. */
71# define put16(addr, val) *((uint16_t *) (addr)) = (val)
72# define put32(addr, val) *((uint32_t *) (addr)) = (val)
73
74# define FCTNAME2(name) name
75#else
76/* Distinguish between big endian and little endian. */
77# if __BYTE_ORDER == __LITTLE_ENDIAN
78# define get16(addr) \
a784e502
UD
79 (((const unsigned char *) (addr))[1] << 8 \
80 | ((const unsigned char *) (addr))[0])
b35e58e4 81# define get32(addr) \
a784e502
UD
82 (((((const unsigned char *) (addr))[3] << 8 \
83 | ((const unsigned char *) (addr))[2]) << 8 \
84 | ((const unsigned char *) (addr))[1]) << 8 \
85 | ((const unsigned char *) (addr))[0])
b35e58e4 86
cb2c5501 87# define put16(addr, val) \
b35e58e4 88 ({ uint16_t __val = (val); \
cb2c5501
UD
89 ((unsigned char *) (addr))[0] = __val; \
90 ((unsigned char *) (addr))[1] = __val >> 8; \
b35e58e4 91 (void) 0; })
cb2c5501
UD
92# define put32(addr, val) \
93 ({ uint32_t __val = (val); \
94 ((unsigned char *) (addr))[0] = __val; \
b35e58e4 95 __val >>= 8; \
cb2c5501 96 ((unsigned char *) (addr))[1] = __val; \
b35e58e4 97 __val >>= 8; \
cb2c5501 98 ((unsigned char *) (addr))[2] = __val; \
b35e58e4 99 __val >>= 8; \
cb2c5501 100 ((unsigned char *) (addr))[3] = __val; \
b35e58e4
UD
101 (void) 0; })
102# else
103# define get16(addr) \
a784e502
UD
104 (((const unsigned char *) (addr))[0] << 8 \
105 | ((const unsigned char *) (addr))[1])
b35e58e4 106# define get32(addr) \
a784e502
UD
107 (((((const unsigned char *) (addr))[0] << 8 \
108 | ((const unsigned char *) (addr))[1]) << 8 \
109 | ((const unsigned char *) (addr))[2]) << 8 \
110 | ((const unsigned char *) (addr))[3])
b35e58e4 111
cb2c5501 112# define put16(addr, val) \
b35e58e4 113 ({ uint16_t __val = (val); \
cb2c5501 114 ((unsigned char *) (addr))[1] = __val; \
f7ccf2fc 115 ((unsigned char *) (addr))[0] = __val >> 8; \
b35e58e4 116 (void) 0; })
cb2c5501
UD
117# define put32(addr, val) \
118 ({ uint32_t __val = (val); \
119 ((unsigned char *) (addr))[3] = __val; \
b35e58e4 120 __val >>= 8; \
cb2c5501 121 ((unsigned char *) (addr))[2] = __val; \
b35e58e4 122 __val >>= 8; \
cb2c5501 123 ((unsigned char *) (addr))[1] = __val; \
b35e58e4 124 __val >>= 8; \
cb2c5501 125 ((unsigned char *) (addr))[0] = __val; \
b35e58e4
UD
126 (void) 0; })
127# endif
128
129# define FCTNAME2(name) name##_unaligned
130#endif
131#define FCTNAME(name) FCTNAME2(name)
132
133
8619129f
UD
134/* We need at least one byte for the next round. */
135#ifndef MIN_NEEDED_INPUT
5aa8ff62 136# error "MIN_NEEDED_INPUT definition missing"
4a0de63b
UD
137#elif MIN_NEEDED_INPUT < 1
138# error "MIN_NEEDED_INPUT must be >= 1"
8619129f
UD
139#endif
140
141/* Let's see how many bytes we produce. */
142#ifndef MAX_NEEDED_INPUT
143# define MAX_NEEDED_INPUT MIN_NEEDED_INPUT
144#endif
145
146/* We produce at least one byte in the next round. */
147#ifndef MIN_NEEDED_OUTPUT
5aa8ff62 148# error "MIN_NEEDED_OUTPUT definition missing"
c0a0f9a3
UD
149#elif MIN_NEEDED_OUTPUT < 1
150# error "MIN_NEEDED_OUTPUT must be >= 1"
8619129f
UD
151#endif
152
153/* Let's see how many bytes we produce. */
154#ifndef MAX_NEEDED_OUTPUT
155# define MAX_NEEDED_OUTPUT MIN_NEEDED_OUTPUT
156#endif
157
158/* Default name for the function. */
159#ifndef LOOPFCT
160# define LOOPFCT loop
161#endif
162
163/* Make sure we have a loop body. */
164#ifndef BODY
165# error "Definition of BODY missing for function" LOOPFCT
166#endif
167
8619129f 168
28f1c862
UD
169/* If no arguments have to passed to the loop function define the macro
170 as empty. */
171#ifndef EXTRA_LOOP_DECLS
172# define EXTRA_LOOP_DECLS
173#endif
174
4b1b449d
UD
175/* Allow using UPDATE_PARAMS in macros where #ifdef UPDATE_PARAMS test
176 isn't possible. */
177#ifndef UPDATE_PARAMS
178# define UPDATE_PARAMS do { } while (0)
179#endif
180#ifndef REINIT_PARAMS
181# define REINIT_PARAMS do { } while (0)
182#endif
183
28f1c862 184
85830c4c
UD
185/* To make it easier for the writers of the modules, we define a macro
186 to test whether we have to ignore errors. */
b572c2da
UD
187#define ignore_errors_p() \
188 (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS))
85830c4c
UD
189
190
e438a468
UD
191/* Error handling for the FROM_LOOP direction, with ignoring of errors.
192 Note that we cannot use the do while (0) trick since `break' and
193 `continue' must reach certain points. */
194#define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \
195 { \
196 result = __GCONV_ILLEGAL_INPUT; \
197 \
198 if (! ignore_errors_p ()) \
199 break; \
200 \
201 /* We ignore the invalid input byte sequence. */ \
202 inptr += (Incr); \
203 ++*irreversible; \
204 /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
205 that "iconv -c" must give the same exitcode as "iconv". */ \
206 continue; \
207 }
208
209/* Error handling for the TO_LOOP direction, with use of transliteration/
210 transcription functions and ignoring of errors. Note that we cannot use
211 the do while (0) trick since `break' and `continue' must reach certain
212 points. */
213#define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \
d6204268 214 { \
d6204268 215 result = __GCONV_ILLEGAL_INPUT; \
b572c2da
UD
216 \
217 if (irreversible == NULL) \
218 /* This means we are in call from __gconv_transliterate. In this \
219 case we are not doing any error recovery outself. */ \
220 break; \
221 \
4b1b449d
UD
222 /* If needed, flush any conversion state, so that __gconv_transliterate \
223 starts with current shift state. */ \
224 UPDATE_PARAMS; \
225 \
d6204268 226 /* First try the transliteration methods. */ \
ba7b4d29
FW
227 if ((step_data->__flags & __GCONV_TRANSLIT) != 0) \
228 result = __gconv_transliterate \
229 (step, step_data, *inptrp, \
230 &inptr, inend, &outptr, irreversible); \
4b1b449d
UD
231 \
232 REINIT_PARAMS; \
233 \
7888313d 234 /* If any of them recognized the input continue with the loop. */ \
d6204268 235 if (result != __GCONV_ILLEGAL_INPUT) \
f2a8406a 236 { \
a1ffb40e 237 if (__glibc_unlikely (result == __GCONV_FULL_OUTPUT)) \
f2a8406a
UD
238 break; \
239 \
240 continue; \
241 } \
d6204268
UD
242 \
243 /* Next see whether we have to ignore the error. If not, stop. */ \
244 if (! ignore_errors_p ()) \
245 break; \
b572c2da 246 \
d6204268
UD
247 /* When we come here it means we ignore the character. */ \
248 ++*irreversible; \
249 inptr += Incr; \
e438a468
UD
250 /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
251 that "iconv -c" must give the same exitcode as "iconv". */ \
d6204268
UD
252 continue; \
253 }
254
255
6900d2ca
JM
256/* With GCC 7 when compiling with -Os for 32-bit s390 the compiler
257 warns that the variable 'ch', in the definition of BODY in
258 sysdeps/s390/multiarch/8bit-generic.c, may be used uninitialized in
259 the call to UNICODE_TAG_HANDLER in that macro. This variable is
260 actually always initialized before use, in the prior loop if INDEX
261 is nonzero and in the following 'if' if INDEX is zero. That code
262 has a comment referencing this diagnostic disabling; updates in one
263 place may require updates in the other. */
264DIAG_PUSH_NEEDS_COMMENT;
265DIAG_IGNORE_Os_NEEDS_COMMENT (7, "-Wmaybe-uninitialized");
9a1f6754
UD
266/* Handling of Unicode 3.1 TAG characters. Unicode recommends
267 "If language codes are not relevant to the particular processing
e438a468
UD
268 operation, then they should be ignored." This macro is usually
269 called right before STANDARD_TO_LOOP_ERR_HANDLER (Incr). */
9a1f6754
UD
270#define UNICODE_TAG_HANDLER(Character, Incr) \
271 { \
272 /* TAG characters are those in the range U+E0000..U+E007F. */ \
273 if (((Character) >> 7) == (0xe0000 >> 7)) \
274 { \
275 inptr += Incr; \
276 continue; \
277 } \
278 }
6900d2ca 279DIAG_POP_NEEDS_COMMENT;
9a1f6754
UD
280
281
8619129f
UD
282/* The function returns the status, as defined in gconv.h. */
283static inline int
dd9423a6 284__attribute ((always_inline))
55985355
UD
285FCTNAME (LOOPFCT) (struct __gconv_step *step,
286 struct __gconv_step_data *step_data,
287 const unsigned char **inptrp, const unsigned char *inend,
17427edd 288 unsigned char **outptrp, const unsigned char *outend,
38677ace 289 size_t *irreversible EXTRA_LOOP_DECLS)
8619129f 290{
55985355
UD
291#ifdef LOOP_NEED_STATE
292 mbstate_t *state = step_data->__statep;
293#endif
294#ifdef LOOP_NEED_FLAGS
295 int flags = step_data->__flags;
296#endif
297#ifdef LOOP_NEED_DATA
298 void *data = step->__data;
299#endif
300 int result = __GCONV_EMPTY_INPUT;
8619129f
UD
301 const unsigned char *inptr = *inptrp;
302 unsigned char *outptr = *outptrp;
8619129f 303
66175fa8
UD
304#ifdef INIT_PARAMS
305 INIT_PARAMS;
306#endif
307
55985355 308 while (inptr != inend)
8619129f 309 {
55985355 310 /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the
ca3c0135 311 compiler generating better code. They will be optimized away
55985355 312 since MIN_NEEDED_OUTPUT is always a constant. */
eb9dc2a2
UD
313 if (MIN_NEEDED_INPUT > 1
314 && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
315 {
316 /* We don't have enough input for another complete input
317 character. */
318 result = __GCONV_INCOMPLETE_INPUT;
319 break;
320 }
55985355
UD
321 if ((MIN_NEEDED_OUTPUT != 1
322 && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0))
323 || (MIN_NEEDED_OUTPUT == 1
324 && __builtin_expect (outptr >= outend, 0)))
325 {
326 /* Overflow in the output buffer. */
327 result = __GCONV_FULL_OUTPUT;
328 break;
329 }
55985355
UD
330
331 /* Here comes the body the user provides. It can stop with
332 RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the
333 input characters vary in size), GCONV_ILLEGAL_INPUT, or
334 GCONV_FULL_OUTPUT (if the output characters vary in size). */
335 BODY
8619129f
UD
336 }
337
8619129f
UD
338 /* Update the pointers pointed to by the parameters. */
339 *inptrp = inptr;
340 *outptrp = outptr;
66175fa8 341 UPDATE_PARAMS;
8619129f
UD
342
343 return result;
344}
345
346
b02b4774
UD
347/* Include the file a second time to define the function to handle
348 unaligned access. */
27822ce6 349#if !defined DEFINE_UNALIGNED && !_STRING_ARCH_unaligned \
4a0de63b
UD
350 && MIN_NEEDED_INPUT != 1 && MAX_NEEDED_INPUT % MIN_NEEDED_INPUT == 0 \
351 && MIN_NEEDED_OUTPUT != 1 && MAX_NEEDED_OUTPUT % MIN_NEEDED_OUTPUT == 0
fd1b5c0f
UD
352# undef get16
353# undef get32
354# undef put16
355# undef put32
356# undef unaligned
357
b35e58e4
UD
358# define DEFINE_UNALIGNED
359# include "loop.c"
360# undef DEFINE_UNALIGNED
32bead5b
WN
361#else
362# if MAX_NEEDED_INPUT > 1
363# define SINGLE(fct) SINGLE2 (fct)
364# define SINGLE2(fct) fct##_single
fd1b5c0f 365static inline int
dd9423a6 366__attribute ((always_inline))
55985355
UD
367SINGLE(LOOPFCT) (struct __gconv_step *step,
368 struct __gconv_step_data *step_data,
369 const unsigned char **inptrp, const unsigned char *inend,
fd1b5c0f 370 unsigned char **outptrp, unsigned char *outend,
55985355 371 size_t *irreversible EXTRA_LOOP_DECLS)
fd1b5c0f 372{
55985355 373 mbstate_t *state = step_data->__statep;
32bead5b 374# ifdef LOOP_NEED_FLAGS
55985355 375 int flags = step_data->__flags;
32bead5b
WN
376# endif
377# ifdef LOOP_NEED_DATA
55985355 378 void *data = step->__data;
32bead5b 379# endif
fd1b5c0f
UD
380 int result = __GCONV_OK;
381 unsigned char bytebuf[MAX_NEEDED_INPUT];
382 const unsigned char *inptr = *inptrp;
383 unsigned char *outptr = *outptrp;
384 size_t inlen;
385
32bead5b 386# ifdef INIT_PARAMS
fd1b5c0f 387 INIT_PARAMS;
32bead5b 388# endif
fd1b5c0f 389
32bead5b 390# ifdef UNPACK_BYTES
fd1b5c0f 391 UNPACK_BYTES
32bead5b 392# else
fd1b5c0f 393 /* Add the bytes from the state to the input buffer. */
5e0d0300 394 assert ((state->__count & 7) <= sizeof (state->__value));
17427edd 395 for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen)
fd1b5c0f 396 bytebuf[inlen] = state->__value.__wchb[inlen];
32bead5b 397# endif
fd1b5c0f
UD
398
399 /* Are there enough bytes in the input buffer? */
0656e90e
UD
400 if (MIN_NEEDED_INPUT > 1
401 && __builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0))
fd1b5c0f 402 {
fd1b5c0f 403 *inptrp = inend;
32bead5b 404# ifdef STORE_REST
5fe8e359
AK
405
406 /* Building with -O3 GCC emits a `array subscript is above array
407 bounds' warning. GCC BZ #64739 has been opened for this. */
408 DIAG_PUSH_NEEDS_COMMENT;
409 DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Warray-bounds");
1af4e298
UD
410 while (inptr < inend)
411 bytebuf[inlen++] = *inptr++;
5fe8e359 412 DIAG_POP_NEEDS_COMMENT;
1af4e298 413
fd1b5c0f
UD
414 inptr = bytebuf;
415 inptrp = &inptr;
416 inend = &bytebuf[inlen];
417
418 STORE_REST
32bead5b 419# else
fd1b5c0f
UD
420 /* We don't have enough input for another complete input
421 character. */
08538f36
SL
422 size_t inlen_after = inlen + (inend - inptr);
423 assert (inlen_after <= sizeof (state->__value.__wchb));
424 for (; inlen < inlen_after; inlen++)
425 state->__value.__wchb[inlen] = *inptr++;
32bead5b 426# endif
fd1b5c0f
UD
427
428 return __GCONV_INCOMPLETE_INPUT;
429 }
430
431 /* Enough space in output buffer. */
432 if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend)
433 || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend))
434 /* Overflow in the output buffer. */
435 return __GCONV_FULL_OUTPUT;
436
437 /* Now add characters from the normal input buffer. */
c8126360
JM
438 if (inlen >= MAX_NEEDED_INPUT)
439 /* Avoid a -Wstringop-overflow= warning when this loop is
440 unrolled. The compiler cannot otherwise see that this is
441 unreachable because it depends on (state->__count & 7) not
442 being too large after a previous conversion step. */
443 __builtin_unreachable ();
fd1b5c0f
UD
444 do
445 bytebuf[inlen++] = *inptr++;
316518d6 446 while (inlen < MAX_NEEDED_INPUT && inptr < inend);
fd1b5c0f
UD
447
448 inptr = bytebuf;
316518d6 449 inend = &bytebuf[inlen];
55985355 450
fd1b5c0f
UD
451 do
452 {
453 BODY
454 }
455 while (0);
456
316518d6
UD
457 /* Now we either have produced an output character and consumed all the
458 bytes from the state and at least one more, or the character is still
459 incomplete, or we have some other error (like illegal input character,
460 no space in output buffer). */
a1ffb40e 461 if (__glibc_likely (inptr != bytebuf))
fd1b5c0f 462 {
316518d6 463 /* We found a new character. */
fd1b5c0f
UD
464 assert (inptr - bytebuf > (state->__count & 7));
465
466 *inptrp += inptr - bytebuf - (state->__count & 7);
467 *outptrp = outptr;
468
316518d6
UD
469 result = __GCONV_OK;
470
fd1b5c0f 471 /* Clear the state buffer. */
32bead5b 472# ifdef CLEAR_STATE
41f112ad 473 CLEAR_STATE;
32bead5b 474# else
fd1b5c0f 475 state->__count &= ~7;
32bead5b 476# endif
fd1b5c0f 477 }
316518d6
UD
478 else if (result == __GCONV_INCOMPLETE_INPUT)
479 {
480 /* This can only happen if we have less than MAX_NEEDED_INPUT bytes
481 available. */
482 assert (inend != &bytebuf[MAX_NEEDED_INPUT]);
483
484 *inptrp += inend - bytebuf - (state->__count & 7);
32bead5b 485# ifdef STORE_REST
316518d6
UD
486 inptrp = &inptr;
487
488 STORE_REST
32bead5b 489# else
316518d6
UD
490 /* We don't have enough input for another complete input
491 character. */
5512d89b 492 assert (inend - inptr > (state->__count & ~7));
08538f36 493 assert (inend - inptr <= sizeof (state->__value.__wchb));
5512d89b 494 state->__count = (state->__count & ~7) | (inend - inptr);
08538f36
SL
495 for (inlen = 0; inlen < inend - inptr; inlen++)
496 state->__value.__wchb[inlen] = inptr[inlen];
497 inptr = inend;
32bead5b 498# endif
316518d6 499 }
fd1b5c0f
UD
500
501 return result;
502}
32bead5b
WN
503# undef SINGLE
504# undef SINGLE2
505# endif
fd1b5c0f
UD
506
507
32bead5b 508# ifdef ONEBYTE_BODY
f9ad060c
UD
509/* Define the shortcut function for btowc. */
510static wint_t
511gconv_btowc (struct __gconv_step *step, unsigned char c)
512 ONEBYTE_BODY
32bead5b
WN
513# define FROM_ONEBYTE gconv_btowc
514# endif
f9ad060c 515
32bead5b 516#endif
f9ad060c 517
8619129f
UD
518/* We remove the macro definitions so that we can include this file again
519 for the definition of another function. */
520#undef MIN_NEEDED_INPUT
521#undef MAX_NEEDED_INPUT
522#undef MIN_NEEDED_OUTPUT
523#undef MAX_NEEDED_OUTPUT
524#undef LOOPFCT
8619129f
UD
525#undef BODY
526#undef LOOPFCT
28f1c862 527#undef EXTRA_LOOP_DECLS
66175fa8
UD
528#undef INIT_PARAMS
529#undef UPDATE_PARAMS
4b1b449d 530#undef REINIT_PARAMS
f9ad060c 531#undef ONEBYTE_BODY
55985355 532#undef UNPACK_BYTES
41f112ad 533#undef CLEAR_STATE
55985355
UD
534#undef LOOP_NEED_STATE
535#undef LOOP_NEED_FLAGS
536#undef LOOP_NEED_DATA
fd1b5c0f
UD
537#undef get16
538#undef get32
539#undef put16
540#undef put32
541#undef unaligned