/* Conversion loop frame work.
- Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+ Copyright (C) 1998-2003, 2005, 2008, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
+ Lesser General Public License for more details.
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* This file provides a frame for the reader loop in all conversion modules.
The actual code must (of course) be provided in the actual module source
INIT_PARAMS code to define and initialize variables from params.
UPDATE_PARAMS code to store result in params.
+
+ ONEBYTE_BODY body of the specialized conversion function for a
+ single byte from the current character set to INTERNAL.
*/
#include <assert.h>
#undef FCTNAME2
#if defined _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED
/* We can handle unaligned memory access. */
-# define get16(addr) *((uint16_t *) (addr))
-# define get32(addr) *((uint32_t *) (addr))
+# define get16(addr) *((const uint16_t *) (addr))
+# define get32(addr) *((const uint32_t *) (addr))
/* We need no special support for writing values either. */
# define put16(addr, val) *((uint16_t *) (addr)) = (val)
/* Distinguish between big endian and little endian. */
# if __BYTE_ORDER == __LITTLE_ENDIAN
# define get16(addr) \
- (((__const unsigned char *) (addr))[1] << 8 \
- | ((__const unsigned char *) (addr))[0])
+ (((const unsigned char *) (addr))[1] << 8 \
+ | ((const unsigned char *) (addr))[0])
# define get32(addr) \
- (((((__const unsigned char *) (addr))[3] << 8 \
- | ((__const unsigned char *) (addr))[2]) << 8 \
- | ((__const unsigned char *) (addr))[1]) << 8 \
- | ((__const unsigned char *) (addr))[0])
+ (((((const unsigned char *) (addr))[3] << 8 \
+ | ((const unsigned char *) (addr))[2]) << 8 \
+ | ((const unsigned char *) (addr))[1]) << 8 \
+ | ((const unsigned char *) (addr))[0])
# define put16(addr, val) \
({ uint16_t __val = (val); \
(void) 0; })
# else
# define get16(addr) \
- (((__const unsigned char *) (addr))[0] << 8 \
- | ((__const unsigned char *) (addr))[1])
+ (((const unsigned char *) (addr))[0] << 8 \
+ | ((const unsigned char *) (addr))[1])
# define get32(addr) \
- (((((__const unsigned char *) (addr))[0] << 8 \
- | ((__const unsigned char *) (addr))[1]) << 8 \
- | ((__const unsigned char *) (addr))[2]) << 8 \
- | ((__const unsigned char *) (addr))[3])
+ (((((const unsigned char *) (addr))[0] << 8 \
+ | ((const unsigned char *) (addr))[1]) << 8 \
+ | ((const unsigned char *) (addr))[2]) << 8 \
+ | ((const unsigned char *) (addr))[3])
# define put16(addr, val) \
({ uint16_t __val = (val); \
((unsigned char *) (addr))[1] = __val; \
- ((unsigned char *) (addr))[2] = __val >> 8; \
+ ((unsigned char *) (addr))[0] = __val >> 8; \
(void) 0; })
# define put32(addr, val) \
({ uint32_t __val = (val); \
/* We need at least one byte for the next round. */
#ifndef MIN_NEEDED_INPUT
# error "MIN_NEEDED_INPUT definition missing"
+#elif MIN_NEEDED_INPUT < 1
+# error "MIN_NEEDED_INPUT must be >= 1"
#endif
/* Let's see how many bytes we produce. */
/* We produce at least one byte in the next round. */
#ifndef MIN_NEEDED_OUTPUT
# error "MIN_NEEDED_OUTPUT definition missing"
+#elif MIN_NEEDED_OUTPUT < 1
+# error "MIN_NEEDED_OUTPUT must be >= 1"
#endif
/* Let's see how many bytes we produce. */
# define EXTRA_LOOP_DECLS
#endif
+/* Allow using UPDATE_PARAMS in macros where #ifdef UPDATE_PARAMS test
+ isn't possible. */
+#ifndef UPDATE_PARAMS
+# define UPDATE_PARAMS do { } while (0)
+#endif
+#ifndef REINIT_PARAMS
+# define REINIT_PARAMS do { } while (0)
+#endif
+
/* To make it easier for the writers of the modules, we define a macro
to test whether we have to ignore errors. */
-#define ignore_errors_p() (flags & __GCONV_IGNORE_ERRORS)
+#define ignore_errors_p() \
+ (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS))
+
+
+/* Error handling for the FROM_LOOP direction, with ignoring of errors.
+ Note that we cannot use the do while (0) trick since `break' and
+ `continue' must reach certain points. */
+#define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \
+ { \
+ result = __GCONV_ILLEGAL_INPUT; \
+ \
+ if (! ignore_errors_p ()) \
+ break; \
+ \
+ /* We ignore the invalid input byte sequence. */ \
+ inptr += (Incr); \
+ ++*irreversible; \
+ /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
+ that "iconv -c" must give the same exitcode as "iconv". */ \
+ continue; \
+ }
+
+/* Error handling for the TO_LOOP direction, with use of transliteration/
+ transcription functions and ignoring of errors. Note that we cannot use
+ the do while (0) trick since `break' and `continue' must reach certain
+ points. */
+#define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \
+ { \
+ struct __gconv_trans_data *trans; \
+ \
+ result = __GCONV_ILLEGAL_INPUT; \
+ \
+ if (irreversible == NULL) \
+ /* This means we are in call from __gconv_transliterate. In this \
+ case we are not doing any error recovery outself. */ \
+ break; \
+ \
+ /* If needed, flush any conversion state, so that __gconv_transliterate \
+ starts with current shift state. */ \
+ UPDATE_PARAMS; \
+ \
+ /* First try the transliteration methods. */ \
+ for (trans = step_data->__trans; trans != NULL; trans = trans->__next) \
+ { \
+ result = DL_CALL_FCT (trans->__trans_fct, \
+ (step, step_data, trans->__data, *inptrp, \
+ &inptr, inend, &outptr, irreversible)); \
+ if (result != __GCONV_ILLEGAL_INPUT) \
+ break; \
+ } \
+ \
+ REINIT_PARAMS; \
+ \
+ /* If any of them recognized the input continue with the loop. */ \
+ if (result != __GCONV_ILLEGAL_INPUT) \
+ { \
+ if (__builtin_expect (result == __GCONV_FULL_OUTPUT, 0)) \
+ break; \
+ \
+ continue; \
+ } \
+ \
+ /* Next see whether we have to ignore the error. If not, stop. */ \
+ if (! ignore_errors_p ()) \
+ break; \
+ \
+ /* When we come here it means we ignore the character. */ \
+ ++*irreversible; \
+ inptr += Incr; \
+ /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
+ that "iconv -c" must give the same exitcode as "iconv". */ \
+ continue; \
+ }
+
+
+/* Handling of Unicode 3.1 TAG characters. Unicode recommends
+ "If language codes are not relevant to the particular processing
+ operation, then they should be ignored." This macro is usually
+ called right before STANDARD_TO_LOOP_ERR_HANDLER (Incr). */
+#define UNICODE_TAG_HANDLER(Character, Incr) \
+ { \
+ /* TAG characters are those in the range U+E0000..U+E007F. */ \
+ if (((Character) >> 7) == (0xe0000 >> 7)) \
+ { \
+ inptr += Incr; \
+ continue; \
+ } \
+ }
/* The function returns the status, as defined in gconv.h. */
static inline int
-FCTNAME (LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
- unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data,
+__attribute ((always_inline))
+FCTNAME (LOOPFCT) (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp, const unsigned char *inend,
+ unsigned char **outptrp, const unsigned char *outend,
size_t *irreversible EXTRA_LOOP_DECLS)
{
- int result = __GCONV_OK;
+#ifdef LOOP_NEED_STATE
+ mbstate_t *state = step_data->__statep;
+#endif
+#ifdef LOOP_NEED_FLAGS
+ int flags = step_data->__flags;
+#endif
+#ifdef LOOP_NEED_DATA
+ void *data = step->__data;
+#endif
+ int result = __GCONV_EMPTY_INPUT;
const unsigned char *inptr = *inptrp;
unsigned char *outptr = *outptrp;
- /* We run one loop where we avoid checks for underflow/overflow of the
- buffers to speed up the conversion a bit. */
- size_t min_in_rounds = (inend - inptr) / MAX_NEEDED_INPUT;
- size_t min_out_rounds = (outend - outptr) / MAX_NEEDED_OUTPUT;
- size_t min_rounds = MIN (min_in_rounds, min_out_rounds);
-
#ifdef INIT_PARAMS
INIT_PARAMS;
#endif
-#undef NEED_LENGTH_TEST
-#define NEED_LENGTH_TEST 0
- while (min_rounds-- > 0)
- {
- /* Here comes the body the user provides. It can stop with RESULT
- set to GCONV_INCOMPLETE_INPUT (if the size of the input characters
- vary in size), GCONV_ILLEGAL_INPUT, or GCONV_FULL_OUTPUT (if the
- output characters vary in size. */
- BODY
- }
-
- if (result == __GCONV_OK)
+ while (inptr != inend)
{
-#if MIN_NEEDED_INPUT == MAX_NEEDED_INPUT \
- && MIN_NEEDED_OUTPUT == MAX_NEEDED_OUTPUT
- /* We don't need to start another loop since we were able to determine
- the maximal number of characters to copy in advance. What remains
- to be determined is the status. */
- if (inptr == inend)
- /* No more input. */
- result = __GCONV_EMPTY_INPUT;
- else if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend)
- || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend))
- /* Overflow in the output buffer. */
- result = __GCONV_FULL_OUTPUT;
- else
- /* We have something left in the input buffer. */
- result = __GCONV_INCOMPLETE_INPUT;
-#else
- result = __GCONV_EMPTY_INPUT;
-
-# undef NEED_LENGTH_TEST
-# define NEED_LENGTH_TEST 1
- while (inptr != inend)
+ /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the
+ compiler generating better code. They will be optimized away
+ since MIN_NEEDED_OUTPUT is always a constant. */
+ if (MIN_NEEDED_INPUT > 1
+ && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
+ {
+ /* We don't have enough input for another complete input
+ character. */
+ result = __GCONV_INCOMPLETE_INPUT;
+ break;
+ }
+ if ((MIN_NEEDED_OUTPUT != 1
+ && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0))
+ || (MIN_NEEDED_OUTPUT == 1
+ && __builtin_expect (outptr >= outend, 0)))
{
- /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the
- compiler generating better code. It will optimized away
- since MIN_NEEDED_OUTPUT is always a constant. */
- if ((MIN_NEEDED_OUTPUT != 1
- && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0))
- || (MIN_NEEDED_OUTPUT == 1
- && __builtin_expect (outptr >= outend, 0)))
- {
- /* Overflow in the output buffer. */
- result = __GCONV_FULL_OUTPUT;
- break;
- }
- if (MIN_NEEDED_INPUT > 1
- && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
- {
- /* We don't have enough input for another complete input
- character. */
- result = __GCONV_INCOMPLETE_INPUT;
- break;
- }
-
- /* Here comes the body the user provides. It can stop with
- RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the
- input characters vary in size), GCONV_ILLEGAL_INPUT, or
- GCONV_FULL_OUTPUT (if the output characters vary in size). */
- BODY
+ /* Overflow in the output buffer. */
+ result = __GCONV_FULL_OUTPUT;
+ break;
}
-#endif /* Input and output charset are not both fixed width. */
+
+ /* Here comes the body the user provides. It can stop with
+ RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the
+ input characters vary in size), GCONV_ILLEGAL_INPUT, or
+ GCONV_FULL_OUTPUT (if the output characters vary in size). */
+ BODY
}
/* Update the pointers pointed to by the parameters. */
*inptrp = inptr;
*outptrp = outptr;
-#ifdef UPDATE_PARAMS
UPDATE_PARAMS;
-#endif
return result;
}
/* Include the file a second time to define the function to handle
unaligned access. */
#if !defined DEFINE_UNALIGNED && !defined _STRING_ARCH_unaligned \
- && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
- && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
+ && MIN_NEEDED_INPUT != 1 && MAX_NEEDED_INPUT % MIN_NEEDED_INPUT == 0 \
+ && MIN_NEEDED_OUTPUT != 1 && MAX_NEEDED_OUTPUT % MIN_NEEDED_OUTPUT == 0
# undef get16
# undef get32
# undef put16
# define SINGLE(fct) SINGLE2 (fct)
# define SINGLE2(fct) fct##_single
static inline int
-SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
+__attribute ((always_inline))
+SINGLE(LOOPFCT) (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp, const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data, size_t *irreversible
- EXTRA_LOOP_DECLS)
+ size_t *irreversible EXTRA_LOOP_DECLS)
{
+ mbstate_t *state = step_data->__statep;
+#ifdef LOOP_NEED_FLAGS
+ int flags = step_data->__flags;
+#endif
+#ifdef LOOP_NEED_DATA
+ void *data = step->__data;
+#endif
int result = __GCONV_OK;
unsigned char bytebuf[MAX_NEEDED_INPUT];
const unsigned char *inptr = *inptrp;
UNPACK_BYTES
#else
/* Add the bytes from the state to the input buffer. */
- for (inlen = 0; inlen < (state->__count & 7); ++ inlen)
+ assert ((state->__count & 7) <= sizeof (state->__value));
+ for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen)
bytebuf[inlen] = state->__value.__wchb[inlen];
#endif
/* Are there enough bytes in the input buffer? */
- if (__builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0))
+ if (MIN_NEEDED_INPUT > 1
+ && __builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0))
{
*inptrp = inend;
#ifdef STORE_REST
+ while (inptr < inend)
+ bytebuf[inlen++] = *inptr++;
+
inptr = bytebuf;
inptrp = &inptr;
inend = &bytebuf[inlen];
inptr = bytebuf;
inend = &bytebuf[inlen];
-#undef NEED_LENGTH_TEST
-#define NEED_LENGTH_TEST 1
+
do
{
BODY
result = __GCONV_OK;
/* Clear the state buffer. */
+#ifdef CLEAR_STATE
+ CLEAR_STATE;
+#else
state->__count &= ~7;
+#endif
}
else if (result == __GCONV_INCOMPLETE_INPUT)
{
#else
/* We don't have enough input for another complete input
character. */
+ assert (inend - inptr > (state->__count & ~7));
+ assert (inend - inptr <= sizeof (state->__value));
+ state->__count = (state->__count & ~7) | (inend - inptr);
+ inlen = 0;
while (inptr < inend)
state->__value.__wchb[inlen++] = *inptr++;
#endif
#endif
+#ifdef ONEBYTE_BODY
+/* Define the shortcut function for btowc. */
+static wint_t
+gconv_btowc (struct __gconv_step *step, unsigned char c)
+ ONEBYTE_BODY
+# define FROM_ONEBYTE gconv_btowc
+#endif
+
+
/* We remove the macro definitions so that we can include this file again
for the definition of another function. */
#undef MIN_NEEDED_INPUT
#undef EXTRA_LOOP_DECLS
#undef INIT_PARAMS
#undef UPDATE_PARAMS
+#undef REINIT_PARAMS
+#undef ONEBYTE_BODY
+#undef UNPACK_BYTES
+#undef CLEAR_STATE
+#undef LOOP_NEED_STATE
+#undef LOOP_NEED_FLAGS
+#undef LOOP_NEED_DATA
#undef get16
#undef get32
#undef put16
#undef put32
#undef unaligned
-#undef UNPACK_BYTES