/* Simple transformations functions.
- Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
+ Copyright (C) 1997-2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
+ Lesser General Public License for more details.
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <byteswap.h>
#include <dlfcn.h>
#include <string.h>
#include <wchar.h>
#include <sys/param.h>
+#include <gconv_int.h>
+
+#define BUILTIN_ALIAS(s1, s2) /* nothing */
+#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
+ MinF, MaxF, MinT, MaxT) \
+ extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
+ const unsigned char **, const unsigned char *, \
+ unsigned char **, size_t *, int, int);
+#include "gconv_builtin.h"
+
#ifndef EILSEQ
# define EILSEQ EINVAL
#endif
+/* Specialized conversion function for a single byte to INTERNAL, recognizing
+ only ASCII characters. */
+wint_t
+__gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
+{
+ if (c < 0x80)
+ return c;
+ else
+ return WEOF;
+}
+
+
/* Transform from the internal, UCS4-like format, to UCS4. The
difference between the internal ucs4 format and the real UCS4
format is, if any, the endianess. The Unicode/ISO 10646 says that
#define FROM_LOOP internal_ucs4_loop
#define TO_LOOP internal_ucs4_loop /* This is not used. */
#define FUNCTION_NAME __gconv_transform_internal_ucs4
+#define ONE_DIRECTION 0
static inline int
+__attribute ((always_inline))
internal_ucs4_loop (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp, const unsigned char *inend,
#if __BYTE_ORDER == __LITTLE_ENDIAN
/* Sigh, we have to do some real work. */
size_t cnt;
+ uint32_t *outptr32 = (uint32_t *) outptr;
for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
- *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
+ *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
*inptrp = inptr;
- *outptrp = outptr;
+ *outptrp = (unsigned char *) outptr32;
#elif __BYTE_ORDER == __BIG_ENDIAN
/* Simply copy the data. */
*inptrp = inptr + n_convert * 4;
/* Determine the status. */
if (*inptrp == inend)
result = __GCONV_EMPTY_INPUT;
- else if (*outptrp == outend)
+ else if (*outptrp + 4 > outend)
result = __GCONV_FULL_OUTPUT;
else
result = __GCONV_INCOMPLETE_INPUT;
return result;
}
-#ifndef _STRING_ARCH_unaligned
+#if !_STRING_ARCH_unaligned
static inline int
+__attribute ((always_inline))
internal_ucs4_loop_unaligned (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
# endif
/* Determine the status. */
- if (*outptrp == outend)
- result = __GCONV_FULL_OUTPUT;
- else if (*inptrp == inend)
+ if (*inptrp == inend)
result = __GCONV_EMPTY_INPUT;
+ else if (*outptrp + 4 > outend)
+ result = __GCONV_FULL_OUTPUT;
else
result = __GCONV_INCOMPLETE_INPUT;
static inline int
+__attribute ((always_inline))
internal_ucs4_loop_single (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
while (*inptrp < inend && cnt < 4)
state->__value.__wchb[cnt++] = *(*inptrp)++;
- if (__builtin_expect (cnt, 4) < 4)
+ if (__glibc_unlikely (cnt < 4))
{
/* Still not enough bytes. Store the ones in the input buffer. */
state->__count &= ~7;
(*outptrp)[2] = state->__value.__wchb[1];
(*outptrp)[3] = state->__value.__wchb[0];
- *outptrp += 4;
#elif __BYTE_ORDER == __BIG_ENDIAN
/* XXX unaligned */
- *(*((uint32_t **) outptrp)++) = state->__value.__wch;
+ (*outptrp)[0] = state->__value.__wchb[0];
+ (*outptrp)[1] = state->__value.__wchb[1];
+ (*outptrp)[2] = state->__value.__wchb[2];
+ (*outptrp)[3] = state->__value.__wchb[3];
#else
# error "This endianess is not supported."
#endif
+ *outptrp += 4;
/* Clear the state buffer. */
state->__count &= ~7;
#define FROM_LOOP ucs4_internal_loop
#define TO_LOOP ucs4_internal_loop /* This is not used. */
#define FUNCTION_NAME __gconv_transform_ucs4_internal
+#define ONE_DIRECTION 0
static inline int
+__attribute ((always_inline))
ucs4_internal_loop (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp, const unsigned char *inend,
uint32_t inval;
#if __BYTE_ORDER == __LITTLE_ENDIAN
- inval = bswap_32 (*(uint32_t *) inptr);
+ inval = bswap_32 (*(const uint32_t *) inptr);
#else
- inval = *(uint32_t *) inptr;
+ inval = *(const uint32_t *) inptr;
#endif
- if (__builtin_expect (inval, 0) > 0x7fffffff)
+ if (__glibc_unlikely (inval > 0x7fffffff))
{
/* The value is too large. We don't try transliteration here since
this is not an error because of the lack of possibilities to
return __GCONV_ILLEGAL_INPUT;
}
- *((uint32_t *) outptr)++ = inval;
+ *((uint32_t *) outptr) = inval;
+ outptr += sizeof (uint32_t);
}
*inptrp = inptr;
/* Determine the status. */
if (*inptrp == inend)
result = __GCONV_EMPTY_INPUT;
- else if (*outptrp == outend)
+ else if (*outptrp + 4 > outend)
result = __GCONV_FULL_OUTPUT;
else
result = __GCONV_INCOMPLETE_INPUT;
return result;
}
-#ifndef _STRING_ARCH_unaligned
+#if !_STRING_ARCH_unaligned
static inline int
+__attribute ((always_inline))
ucs4_internal_loop_unaligned (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
{
- if (__builtin_expect (inptr[0], 0) > 0x80)
+ if (__glibc_unlikely (inptr[0] > 0x80))
{
/* The value is too large. We don't try transliteration here since
this is not an error because of the lack of possibilities to
*inptrp = inptr;
*outptrp = outptr;
- return __GCONV_ILLEGAL_INPUT;
+ return __GCONV_ILLEGAL_INPUT;
}
# if __BYTE_ORDER == __LITTLE_ENDIAN
/* Determine the status. */
if (*inptrp == inend)
result = __GCONV_EMPTY_INPUT;
- else if (*outptrp == outend)
+ else if (*outptrp + 4 > outend)
result = __GCONV_FULL_OUTPUT;
else
result = __GCONV_INCOMPLETE_INPUT;
static inline int
+__attribute ((always_inline))
ucs4_internal_loop_single (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
while (*inptrp < inend && cnt < 4)
state->__value.__wchb[cnt++] = *(*inptrp)++;
- if (__builtin_expect (cnt, 4) < 4)
+ if (__glibc_unlikely (cnt < 4))
{
/* Still not enough bytes. Store the ones in the input buffer. */
state->__count &= ~7;
return __GCONV_INCOMPLETE_INPUT;
}
- if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0], 0)
- > 0x80)
+ if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
+ 0))
{
/* The value is too large. We don't try transliteration here since
this is not an error because of the lack of possibilities to
#define FROM_LOOP internal_ucs4le_loop
#define TO_LOOP internal_ucs4le_loop /* This is not used. */
#define FUNCTION_NAME __gconv_transform_internal_ucs4le
+#define ONE_DIRECTION 0
static inline int
+__attribute ((always_inline))
internal_ucs4le_loop (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp, const unsigned char *inend,
#if __BYTE_ORDER == __BIG_ENDIAN
/* Sigh, we have to do some real work. */
size_t cnt;
+ uint32_t *outptr32 = (uint32_t *) outptr;
for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
- *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
+ *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
+ outptr = (unsigned char *) outptr32;
*inptrp = inptr;
*outptrp = outptr;
/* Determine the status. */
if (*inptrp == inend)
result = __GCONV_EMPTY_INPUT;
- else if (*outptrp == outend)
+ else if (*outptrp + 4 > outend)
result = __GCONV_FULL_OUTPUT;
else
result = __GCONV_INCOMPLETE_INPUT;
return result;
}
-#ifndef _STRING_ARCH_unaligned
+#if !_STRING_ARCH_unaligned
static inline int
+__attribute ((always_inline))
internal_ucs4le_loop_unaligned (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
/* Determine the status. */
if (*inptrp == inend)
result = __GCONV_EMPTY_INPUT;
- else if (*outptrp == outend)
- result = __GCONV_FULL_OUTPUT;
- else
+ else if (*inptrp + 4 > inend)
result = __GCONV_INCOMPLETE_INPUT;
+ else
+ {
+ assert (*outptrp + 4 > outend);
+ result = __GCONV_FULL_OUTPUT;
+ }
return result;
}
static inline int
+__attribute ((always_inline))
internal_ucs4le_loop_single (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
while (*inptrp < inend && cnt < 4)
state->__value.__wchb[cnt++] = *(*inptrp)++;
- if (__builtin_expect (cnt, 4) < 4)
+ if (__glibc_unlikely (cnt < 4))
{
/* Still not enough bytes. Store the ones in the input buffer. */
state->__count &= ~7;
(*outptrp)[2] = state->__value.__wchb[1];
(*outptrp)[3] = state->__value.__wchb[0];
- *outptrp += 4;
#else
/* XXX unaligned */
- *(*((uint32_t **) outptrp)++) = state->__value.__wch;
+ (*outptrp)[0] = state->__value.__wchb[0];
+ (*outptrp)[1] = state->__value.__wchb[1];
+ (*outptrp)[2] = state->__value.__wchb[2];
+ (*outptrp)[3] = state->__value.__wchb[3];
+
#endif
+ *outptrp += 4;
+
/* Clear the state buffer. */
state->__count &= ~7;
#define FROM_LOOP ucs4le_internal_loop
#define TO_LOOP ucs4le_internal_loop /* This is not used. */
#define FUNCTION_NAME __gconv_transform_ucs4le_internal
+#define ONE_DIRECTION 0
static inline int
+__attribute ((always_inline))
ucs4le_internal_loop (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp, const unsigned char *inend,
uint32_t inval;
#if __BYTE_ORDER == __BIG_ENDIAN
- inval = bswap_32 (*(uint32_t *) inptr);
+ inval = bswap_32 (*(const uint32_t *) inptr);
#else
- inval = *(uint32_t *) inptr;
+ inval = *(const uint32_t *) inptr;
#endif
- if (__builtin_expect (inval, 0) > 0x7fffffff)
+ if (__glibc_unlikely (inval > 0x7fffffff))
{
/* The value is too large. We don't try transliteration here since
this is not an error because of the lack of possibilities to
continue;
}
+ *inptrp = inptr;
+ *outptrp = outptr;
return __GCONV_ILLEGAL_INPUT;
}
- *((uint32_t *) outptr)++ = inval;
+ *((uint32_t *) outptr) = inval;
+ outptr += sizeof (uint32_t);
}
*inptrp = inptr;
/* Determine the status. */
if (*inptrp == inend)
result = __GCONV_EMPTY_INPUT;
- else if (*outptrp == outend)
- result = __GCONV_FULL_OUTPUT;
- else
+ else if (*inptrp + 4 > inend)
result = __GCONV_INCOMPLETE_INPUT;
+ else
+ {
+ assert (*outptrp + 4 > outend);
+ result = __GCONV_FULL_OUTPUT;
+ }
return result;
}
-#ifndef _STRING_ARCH_unaligned
+#if !_STRING_ARCH_unaligned
static inline int
+__attribute ((always_inline))
ucs4le_internal_loop_unaligned (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
{
- if (__builtin_expect (inptr[3], 0) > 0x80)
+ if (__glibc_unlikely (inptr[3] > 0x80))
{
/* The value is too large. We don't try transliteration here since
this is not an error because of the lack of possibilities to
*inptrp = inptr;
*outptrp = outptr;
- return __GCONV_ILLEGAL_INPUT;
+ return __GCONV_ILLEGAL_INPUT;
}
# if __BYTE_ORDER == __BIG_ENDIAN
/* Determine the status. */
if (*inptrp == inend)
result = __GCONV_EMPTY_INPUT;
- else if (*outptrp == outend)
- result = __GCONV_FULL_OUTPUT;
- else
+ else if (*inptrp + 4 > inend)
result = __GCONV_INCOMPLETE_INPUT;
+ else
+ {
+ assert (*outptrp + 4 > outend);
+ result = __GCONV_FULL_OUTPUT;
+ }
return result;
}
static inline int
+__attribute ((always_inline))
ucs4le_internal_loop_single (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
while (*inptrp < inend && cnt < 4)
state->__value.__wchb[cnt++] = *(*inptrp)++;
- if (__builtin_expect (cnt, 4) < 4)
+ if (__glibc_unlikely (cnt < 4))
{
/* Still not enough bytes. Store the ones in the input buffer. */
state->__count &= ~7;
return __GCONV_INCOMPLETE_INPUT;
}
- if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3], 0)
- > 0x80)
+ if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
+ 0))
{
/* The value is too large. We don't try transliteration here since
this is not an error because of the lack of possibilities to
(*outptrp)[1] = state->__value.__wchb[2];
(*outptrp)[2] = state->__value.__wchb[1];
(*outptrp)[3] = state->__value.__wchb[0];
-#elif __BYTE_ORDER == __BIG_ENDIAN
+#else
(*outptrp)[0] = state->__value.__wchb[0];
(*outptrp)[1] = state->__value.__wchb[1];
(*outptrp)[2] = state->__value.__wchb[2];
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- if (__builtin_expect (*inptr, 0) > '\x7f') \
+ if (__glibc_unlikely (*inptr > '\x7f')) \
{ \
/* The value is too large. We don't try transliteration here since \
this is not an error because of the lack of possibilities to \
represent the result. This is a genuine bug in the input since \
ASCII does not allow such values. */ \
- if (! ignore_errors_p ()) \
- { \
- /* This is no correct ANSI_X3.4-1968 character. */ \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
- ++*irreversible; \
- ++inptr; \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
else \
- /* It's an one byte sequence. */ \
- /* XXX unaligned. */ \
- *((uint32_t *) outptr)++ = *inptr++; \
+ { \
+ /* It's an one byte sequence. */ \
+ *((uint32_t *) outptr) = *inptr++; \
+ outptr += sizeof (uint32_t); \
+ } \
}
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- /* XXX unaligned. */ \
- if (__builtin_expect (*((uint32_t *) inptr), 0) > 0x7f) \
+ if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \
{ \
- STANDARD_ERR_HANDLER (4); \
+ UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
+ STANDARD_TO_LOOP_ERR_HANDLER (4); \
} \
else \
- /* It's an one byte sequence. */ \
- *outptr++ = *((uint32_t *) inptr)++; \
+ { \
+ /* It's an one byte sequence. */ \
+ *outptr++ = *((const uint32_t *) inptr); \
+ inptr += sizeof (uint32_t); \
+ } \
}
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- uint32_t wc = *((uint32_t *) inptr); \
- \
- /* Since we control every character we read this cannot happen. */ \
- assert (wc <= 0x7fffffff); \
+ uint32_t wc = *((const uint32_t *) inptr); \
\
- if (wc < 0x80) \
+ if (__glibc_likely (wc < 0x80)) \
/* It's an one byte sequence. */ \
*outptr++ = (unsigned char) wc; \
- else \
+ else if (__glibc_likely (wc <= 0x7fffffff \
+ && (wc < 0xd800 || wc > 0xdfff))) \
{ \
size_t step; \
- char *start; \
+ unsigned char *start; \
\
for (step = 2; step < 6; ++step) \
if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
break; \
\
- if (__builtin_expect (outptr + step > outend, 0)) \
+ if (__glibc_unlikely (outptr + step > outend)) \
{ \
/* Too long. */ \
result = __GCONV_FULL_OUTPUT; \
start = outptr; \
*outptr = (unsigned char) (~0xff >> step); \
outptr += step; \
- --step; \
do \
{ \
- start[step] = 0x80 | (wc & 0x3f); \
+ start[--step] = 0x80 | (wc & 0x3f); \
wc >>= 6; \
} \
- while (--step > 0); \
+ while (step > 1); \
start[0] |= wc; \
+ } \
+ else \
+ { \
+ STANDARD_TO_LOOP_ERR_HANDLER (4); \
} \
\
inptr += 4; \
}
+#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- uint32_t ch; \
- uint_fast32_t cnt; \
- uint_fast32_t i; \
- \
/* Next input byte. */ \
- ch = *inptr; \
+ uint32_t ch = *inptr; \
\
- if (ch < 0x80) \
+ if (__glibc_likely (ch < 0x80)) \
{ \
/* One byte sequence. */ \
- cnt = 1; \
++inptr; \
} \
else \
{ \
- if (ch >= 0xc2 && ch < 0xe0) \
+ uint_fast32_t cnt; \
+ uint_fast32_t i; \
+ \
+ if (ch >= 0xc2 && ch < 0xe0) \
{ \
- /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
- otherwise the wide character could have been represented \
- using a single byte. */ \
+ /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
+ otherwise the wide character could have been represented \
+ using a single byte. */ \
cnt = 2; \
ch &= 0x1f; \
} \
- else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
+ else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
{ \
/* We expect three bytes. */ \
cnt = 3; \
ch &= 0x0f; \
} \
- else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
+ else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
{ \
/* We expect four bytes. */ \
cnt = 4; \
ch &= 0x07; \
} \
- else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
+ else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
{ \
/* We expect five bytes. */ \
cnt = 5; \
ch &= 0x03; \
} \
- else if (__builtin_expect (ch & 0xfe, 0xfc) == 0xfc) \
+ else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \
{ \
/* We expect six bytes. */ \
cnt = 6; \
} \
else \
{ \
- int skipped; \
- \
- if (! ignore_errors_p ()) \
- { \
- /* This is an illegal encoding. */ \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
/* Search the end of this ill-formed UTF-8 character. This \
is the next byte with (x & 0xc0) != 0x80. */ \
- skipped = 0; \
- do \
- { \
- ++inptr; \
- ++skipped; \
- } \
- while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \
+ i = 0; \
+ do \
+ ++i; \
+ while (inptr + i < inend \
+ && (*(inptr + i) & 0xc0) == 0x80 \
+ && i < 5); \
\
- continue; \
+ errout: \
+ STANDARD_FROM_LOOP_ERR_HANDLER (i); \
} \
\
- if (__builtin_expect (inptr + cnt > inend, 0)) \
+ if (__glibc_unlikely (inptr + cnt > inend)) \
{ \
/* We don't have enough input. But before we report that check \
that all the bytes are correct. */ \
if ((inptr[i] & 0xc0) != 0x80) \
break; \
\
- if (__builtin_expect (inptr + i == inend, 1)) \
+ if (__glibc_likely (inptr + i == inend)) \
{ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
\
- if (ignore_errors_p ()) \
- { \
- /* Ignore it. */ \
- inptr += i; \
- ++*irreversible; \
- continue; \
- } \
- \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
+ goto errout; \
} \
\
/* Read the possible remaining bytes. */ \
/* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
have been represented with fewer than cnt bytes. */ \
- if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
+ if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
+ /* Do not accept UTF-16 surrogates. */ \
+ || (ch >= 0xd800 && ch <= 0xdfff)) \
{ \
/* This is an illegal encoding. */ \
- if (ignore_errors_p ()) \
- { \
- inptr += i; \
- ++*irreversible; \
- continue; \
- } \
- \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
+ goto errout; \
} \
\
inptr += cnt; \
} \
\
/* Now adjust the pointers and store the result. */ \
- *((uint32_t *) outptr)++ = ch; \
+ *((uint32_t *) outptr) = ch; \
+ outptr += sizeof (uint32_t); \
}
#define LOOP_NEED_FLAGS
correct and that it requires a larger number of bytes than there \
are in the input buffer. */ \
wint_t ch = **inptrp; \
- size_t cnt; \
+ size_t cnt, r; \
\
state->__count = inend - *inptrp; \
\
+ assert (ch != 0xc0 && ch != 0xc1); \
if (ch >= 0xc2 && ch < 0xe0) \
{ \
/* We expect two bytes. The first byte cannot be 0xc0 or \
cnt = 2; \
ch &= 0x1f; \
} \
- else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0) \
+ else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
{ \
/* We expect three bytes. */ \
cnt = 3; \
ch &= 0x0f; \
} \
- else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0) \
+ else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
{ \
/* We expect four bytes. */ \
cnt = 4; \
ch &= 0x07; \
} \
- else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8) \
+ else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
{ \
/* We expect five bytes. */ \
cnt = 5; \
} \
\
/* The first byte is already consumed. */ \
- --cnt; \
+ r = cnt - 1; \
while (++(*inptrp) < inend) \
{ \
ch <<= 6; \
ch |= **inptrp & 0x3f; \
- --cnt; \
+ --r; \
} \
\
/* Shift for the so far missing bytes. */ \
- ch <<= cnt * 6; \
+ ch <<= r * 6; \
+ \
+ /* Store the number of bytes expected for the entire sequence. */ \
+ state->__count |= cnt << 8; \
\
/* Store the value. */ \
state->__value.__wch = ch; \
#define UNPACK_BYTES \
{ \
+ static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
wint_t wch = state->__value.__wch; \
- size_t ntotal; \
- inlen = state->__count; \
+ size_t ntotal = state->__count >> 8; \
\
- if (state->__value.__wch <= 0x7ff) \
- { \
- bytebuf[0] = 0xc0; \
- ntotal = 2; \
- } \
- else if (__builtin_expect (state->__value.__wch, 0) <= 0xffff) \
- { \
- bytebuf[0] = 0xe0; \
- ntotal = 3; \
- } \
- else if (__builtin_expect (state->__value.__wch, 0) <= 0x1fffff) \
- { \
- bytebuf[0] = 0xf0; \
- ntotal = 4; \
- } \
- else if (__builtin_expect (state->__value.__wch, 0) <= 0x3ffffff) \
- { \
- bytebuf[0] = 0xf8; \
- ntotal = 5; \
- } \
- else \
- { \
- bytebuf[0] = 0xfc; \
- ntotal = 6; \
- } \
+ inlen = state->__count & 255; \
+ \
+ bytebuf[0] = inmask[ntotal - 2]; \
\
do \
{ \
bytebuf[0] |= wch; \
}
+#define CLEAR_STATE \
+ state->__count = 0
+
+
#include <iconv/loop.c>
#include <iconv/skeleton.c>
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
#define LOOPFCT FROM_LOOP
#define BODY \
- *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
+ { \
+ uint16_t u1 = get16 (inptr); \
+ \
+ if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
+ { \
+ /* Surrogate characters in UCS-2 input are not valid. Reject \
+ them. (Catching this here is not security relevant.) */ \
+ STANDARD_FROM_LOOP_ERR_HANDLER (2); \
+ } \
+ \
+ *((uint32_t *) outptr) = u1; \
+ outptr += sizeof (uint32_t); \
+ inptr += 2; \
+ }
+#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- if (__builtin_expect (*((uint32_t *) inptr), 0) >= 0x10000) \
+ uint32_t val = *((const uint32_t *) inptr); \
+ \
+ if (__glibc_unlikely (val >= 0x10000)) \
{ \
- STANDARD_ERR_HANDLER (4); \
+ UNICODE_TAG_HANDLER (val, 4); \
+ STANDARD_TO_LOOP_ERR_HANDLER (4); \
+ } \
+ else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
+ { \
+ /* Surrogate characters in UCS-4 input are not valid. \
+ We must catch this, because the UCS-2 output might be \
+ interpreted as UTF-16 by other programs. If we let \
+ surrogates pass through, attackers could make a security \
+ hole exploit by synthesizing any desired plane 1-16 \
+ character. */ \
+ result = __GCONV_ILLEGAL_INPUT; \
+ if (! ignore_errors_p ()) \
+ break; \
+ inptr += 4; \
+ ++*irreversible; \
+ continue; \
+ } \
+ else \
+ { \
+ put16 (outptr, val); \
+ outptr += sizeof (uint16_t); \
+ inptr += 4; \
} \
- else \
- *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \
}
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
#define LOOPFCT FROM_LOOP
#define BODY \
- *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr); \
- inptr += 2;
+ { \
+ uint16_t u1 = bswap_16 (get16 (inptr)); \
+ \
+ if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
+ { \
+ /* Surrogate characters in UCS-2 input are not valid. Reject \
+ them. (Catching this here is not security relevant.) */ \
+ if (! ignore_errors_p ()) \
+ { \
+ result = __GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ inptr += 2; \
+ ++*irreversible; \
+ continue; \
+ } \
+ \
+ *((uint32_t *) outptr) = u1; \
+ outptr += sizeof (uint32_t); \
+ inptr += 2; \
+ }
+#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- uint32_t val = *((uint32_t *) inptr); \
- if (__builtin_expect (val, 0) >= 0x10000) \
+ uint32_t val = *((const uint32_t *) inptr); \
+ if (__glibc_unlikely (val >= 0x10000)) \
{ \
- STANDARD_ERR_HANDLER (4); \
+ UNICODE_TAG_HANDLER (val, 4); \
+ STANDARD_TO_LOOP_ERR_HANDLER (4); \
+ } \
+ else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
+ { \
+ /* Surrogate characters in UCS-4 input are not valid. \
+ We must catch this, because the UCS-2 output might be \
+ interpreted as UTF-16 by other programs. If we let \
+ surrogates pass through, attackers could make a security \
+ hole exploit by synthesizing any desired plane 1-16 \
+ character. */ \
+ if (! ignore_errors_p ()) \
+ { \
+ result = __GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ inptr += 4; \
+ ++*irreversible; \
+ continue; \
+ } \
+ else \
+ { \
+ put16 (outptr, bswap_16 (val)); \
+ outptr += sizeof (uint16_t); \
+ inptr += 4; \
} \
- *((uint16_t *) outptr)++ = bswap_16 (val); \
- inptr += 4; \
}
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>