/* Simple transformations functions.
- Copyright (C) 1997-2002, 2003 Free Software Foundation, Inc.
+ Copyright (C) 1997-2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <byteswap.h>
#include <dlfcn.h>
#define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
MinF, MaxF, MinT, MaxT) \
extern int Fct (struct __gconv_step *, struct __gconv_step_data *, \
- __const unsigned char **, __const unsigned char *, \
+ const unsigned char **, const unsigned char *, \
unsigned char **, size_t *, int, int);
#include "gconv_builtin.h"
#define FROM_LOOP internal_ucs4_loop
#define TO_LOOP internal_ucs4_loop /* This is not used. */
#define FUNCTION_NAME __gconv_transform_internal_ucs4
+#define ONE_DIRECTION 0
static inline int
+__attribute ((always_inline))
internal_ucs4_loop (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp, const unsigned char *inend,
#if __BYTE_ORDER == __LITTLE_ENDIAN
/* Sigh, we have to do some real work. */
size_t cnt;
+ uint32_t *outptr32 = (uint32_t *) outptr;
for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
- *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr);
+ *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
*inptrp = inptr;
- *outptrp = outptr;
+ *outptrp = (unsigned char *) outptr32;
#elif __BYTE_ORDER == __BIG_ENDIAN
/* Simply copy the data. */
*inptrp = inptr + n_convert * 4;
return result;
}
-#ifndef _STRING_ARCH_unaligned
+#if !_STRING_ARCH_unaligned
static inline int
+__attribute ((always_inline))
internal_ucs4_loop_unaligned (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
static inline int
+__attribute ((always_inline))
internal_ucs4_loop_single (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
while (*inptrp < inend && cnt < 4)
state->__value.__wchb[cnt++] = *(*inptrp)++;
- if (__builtin_expect (cnt < 4, 0))
+ if (__glibc_unlikely (cnt < 4))
{
/* Still not enough bytes. Store the ones in the input buffer. */
state->__count &= ~7;
(*outptrp)[2] = state->__value.__wchb[1];
(*outptrp)[3] = state->__value.__wchb[0];
- *outptrp += 4;
#elif __BYTE_ORDER == __BIG_ENDIAN
/* XXX unaligned */
- *(*((uint32_t **) outptrp)++) = state->__value.__wch;
+ (*outptrp)[0] = state->__value.__wchb[0];
+ (*outptrp)[1] = state->__value.__wchb[1];
+ (*outptrp)[2] = state->__value.__wchb[2];
+ (*outptrp)[3] = state->__value.__wchb[3];
#else
# error "This endianess is not supported."
#endif
+ *outptrp += 4;
/* Clear the state buffer. */
state->__count &= ~7;
#define FROM_LOOP ucs4_internal_loop
#define TO_LOOP ucs4_internal_loop /* This is not used. */
#define FUNCTION_NAME __gconv_transform_ucs4_internal
+#define ONE_DIRECTION 0
static inline int
+__attribute ((always_inline))
ucs4_internal_loop (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp, const unsigned char *inend,
inval = *(const uint32_t *) inptr;
#endif
- if (__builtin_expect (inval > 0x7fffffff, 0))
+ if (__glibc_unlikely (inval > 0x7fffffff))
{
/* The value is too large. We don't try transliteration here since
this is not an error because of the lack of possibilities to
return __GCONV_ILLEGAL_INPUT;
}
- *((uint32_t *) outptr)++ = inval;
+ *((uint32_t *) outptr) = inval;
+ outptr += sizeof (uint32_t);
}
*inptrp = inptr;
return result;
}
-#ifndef _STRING_ARCH_unaligned
+#if !_STRING_ARCH_unaligned
static inline int
+__attribute ((always_inline))
ucs4_internal_loop_unaligned (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
{
- if (__builtin_expect (inptr[0] > 0x80, 0))
+ if (__glibc_unlikely (inptr[0] > 0x80))
{
/* The value is too large. We don't try transliteration here since
this is not an error because of the lack of possibilities to
static inline int
+__attribute ((always_inline))
ucs4_internal_loop_single (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
while (*inptrp < inend && cnt < 4)
state->__value.__wchb[cnt++] = *(*inptrp)++;
- if (__builtin_expect (cnt < 4, 0))
+ if (__glibc_unlikely (cnt < 4))
{
/* Still not enough bytes. Store the ones in the input buffer. */
state->__count &= ~7;
#define FROM_LOOP internal_ucs4le_loop
#define TO_LOOP internal_ucs4le_loop /* This is not used. */
#define FUNCTION_NAME __gconv_transform_internal_ucs4le
+#define ONE_DIRECTION 0
static inline int
+__attribute ((always_inline))
internal_ucs4le_loop (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp, const unsigned char *inend,
#if __BYTE_ORDER == __BIG_ENDIAN
/* Sigh, we have to do some real work. */
size_t cnt;
+ uint32_t *outptr32 = (uint32_t *) outptr;
for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
- *((uint32_t *) outptr)++ = bswap_32 (*(const uint32_t *) inptr);
+ *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
+ outptr = (unsigned char *) outptr32;
*inptrp = inptr;
*outptrp = outptr;
return result;
}
-#ifndef _STRING_ARCH_unaligned
+#if !_STRING_ARCH_unaligned
static inline int
+__attribute ((always_inline))
internal_ucs4le_loop_unaligned (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
static inline int
+__attribute ((always_inline))
internal_ucs4le_loop_single (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
while (*inptrp < inend && cnt < 4)
state->__value.__wchb[cnt++] = *(*inptrp)++;
- if (__builtin_expect (cnt < 4, 0))
+ if (__glibc_unlikely (cnt < 4))
{
/* Still not enough bytes. Store the ones in the input buffer. */
state->__count &= ~7;
(*outptrp)[2] = state->__value.__wchb[1];
(*outptrp)[3] = state->__value.__wchb[0];
- *outptrp += 4;
#else
/* XXX unaligned */
- *(*((uint32_t **) outptrp)++) = state->__value.__wch;
+ (*outptrp)[0] = state->__value.__wchb[0];
+ (*outptrp)[1] = state->__value.__wchb[1];
+ (*outptrp)[2] = state->__value.__wchb[2];
+ (*outptrp)[3] = state->__value.__wchb[3];
+
#endif
+ *outptrp += 4;
+
/* Clear the state buffer. */
state->__count &= ~7;
#define FROM_LOOP ucs4le_internal_loop
#define TO_LOOP ucs4le_internal_loop /* This is not used. */
#define FUNCTION_NAME __gconv_transform_ucs4le_internal
+#define ONE_DIRECTION 0
static inline int
+__attribute ((always_inline))
ucs4le_internal_loop (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp, const unsigned char *inend,
inval = *(const uint32_t *) inptr;
#endif
- if (__builtin_expect (inval > 0x7fffffff, 0))
+ if (__glibc_unlikely (inval > 0x7fffffff))
{
/* The value is too large. We don't try transliteration here since
this is not an error because of the lack of possibilities to
continue;
}
+ *inptrp = inptr;
+ *outptrp = outptr;
return __GCONV_ILLEGAL_INPUT;
}
- *((uint32_t *) outptr)++ = inval;
+ *((uint32_t *) outptr) = inval;
+ outptr += sizeof (uint32_t);
}
*inptrp = inptr;
return result;
}
-#ifndef _STRING_ARCH_unaligned
+#if !_STRING_ARCH_unaligned
static inline int
+__attribute ((always_inline))
ucs4le_internal_loop_unaligned (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
{
- if (__builtin_expect (inptr[3] > 0x80, 0))
+ if (__glibc_unlikely (inptr[3] > 0x80))
{
/* The value is too large. We don't try transliteration here since
this is not an error because of the lack of possibilities to
static inline int
+__attribute ((always_inline))
ucs4le_internal_loop_single (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp,
while (*inptrp < inend && cnt < 4)
state->__value.__wchb[cnt++] = *(*inptrp)++;
- if (__builtin_expect (cnt < 4, 0))
+ if (__glibc_unlikely (cnt < 4))
{
/* Still not enough bytes. Store the ones in the input buffer. */
state->__count &= ~7;
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- if (__builtin_expect (*inptr > '\x7f', 0)) \
+ if (__glibc_unlikely (*inptr > '\x7f')) \
{ \
/* The value is too large. We don't try transliteration here since \
this is not an error because of the lack of possibilities to \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
else \
- /* It's an one byte sequence. */ \
- *((uint32_t *) outptr)++ = *inptr++; \
+ { \
+ /* It's an one byte sequence. */ \
+ *((uint32_t *) outptr) = *inptr++; \
+ outptr += sizeof (uint32_t); \
+ } \
}
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0)) \
+ if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \
{ \
UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
STANDARD_TO_LOOP_ERR_HANDLER (4); \
} \
else \
- /* It's an one byte sequence. */ \
- *outptr++ = *((const uint32_t *) inptr)++; \
+ { \
+ /* It's an one byte sequence. */ \
+ *outptr++ = *((const uint32_t *) inptr); \
+ inptr += sizeof (uint32_t); \
+ } \
}
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
{ \
uint32_t wc = *((const uint32_t *) inptr); \
\
- if (wc < 0x80) \
+ if (__glibc_likely (wc < 0x80)) \
/* It's an one byte sequence. */ \
*outptr++ = (unsigned char) wc; \
- else if (__builtin_expect (wc <= 0x7fffffff, 1)) \
+ else if (__glibc_likely (wc <= 0x7fffffff \
+ && (wc < 0xd800 || wc > 0xdfff))) \
{ \
size_t step; \
- char *start; \
+ unsigned char *start; \
\
for (step = 2; step < 6; ++step) \
if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \
break; \
\
- if (__builtin_expect (outptr + step > outend, 0)) \
+ if (__glibc_unlikely (outptr + step > outend)) \
{ \
/* Too long. */ \
result = __GCONV_FULL_OUTPUT; \
start = outptr; \
*outptr = (unsigned char) (~0xff >> step); \
outptr += step; \
- --step; \
do \
{ \
- start[step] = 0x80 | (wc & 0x3f); \
+ start[--step] = 0x80 | (wc & 0x3f); \
wc >>= 6; \
} \
- while (--step > 0); \
+ while (step > 1); \
start[0] |= wc; \
} \
else \
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- uint32_t ch; \
- uint_fast32_t cnt; \
- uint_fast32_t i; \
- \
/* Next input byte. */ \
- ch = *inptr; \
+ uint32_t ch = *inptr; \
\
- if (ch < 0x80) \
+ if (__glibc_likely (ch < 0x80)) \
{ \
/* One byte sequence. */ \
- cnt = 1; \
++inptr; \
} \
else \
{ \
+ uint_fast32_t cnt; \
+ uint_fast32_t i; \
+ \
if (ch >= 0xc2 && ch < 0xe0) \
{ \
/* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \
cnt = 2; \
ch &= 0x1f; \
} \
- else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
+ else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
{ \
/* We expect three bytes. */ \
cnt = 3; \
ch &= 0x0f; \
} \
- else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
+ else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
{ \
/* We expect four bytes. */ \
cnt = 4; \
ch &= 0x07; \
} \
- else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
+ else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
{ \
/* We expect five bytes. */ \
cnt = 5; \
ch &= 0x03; \
} \
- else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
+ else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \
{ \
/* We expect six bytes. */ \
cnt = 6; \
} \
else \
{ \
- int skipped; \
- \
/* Search the end of this ill-formed UTF-8 character. This \
is the next byte with (x & 0xc0) != 0x80. */ \
- skipped = 0; \
+ i = 0; \
do \
- ++skipped; \
- while (inptr + skipped < inend \
- && (*(inptr + skipped) & 0xc0) == 0x80 \
- && skipped < 5); \
+ ++i; \
+ while (inptr + i < inend \
+ && (*(inptr + i) & 0xc0) == 0x80 \
+ && i < 5); \
\
- STANDARD_FROM_LOOP_ERR_HANDLER (skipped); \
+ errout: \
+ STANDARD_FROM_LOOP_ERR_HANDLER (i); \
} \
\
- if (__builtin_expect (inptr + cnt > inend, 0)) \
+ if (__glibc_unlikely (inptr + cnt > inend)) \
{ \
/* We don't have enough input. But before we report that check \
that all the bytes are correct. */ \
if ((inptr[i] & 0xc0) != 0x80) \
break; \
\
- if (__builtin_expect (inptr + i == inend, 1)) \
+ if (__glibc_likely (inptr + i == inend)) \
{ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
\
- STANDARD_FROM_LOOP_ERR_HANDLER (i); \
+ goto errout; \
} \
\
/* Read the possible remaining bytes. */ \
/* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
have been represented with fewer than cnt bytes. */ \
- if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
+ if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
+ /* Do not accept UTF-16 surrogates. */ \
+ || (ch >= 0xd800 && ch <= 0xdfff)) \
{ \
/* This is an illegal encoding. */ \
- STANDARD_FROM_LOOP_ERR_HANDLER (i); \
+ goto errout; \
} \
\
inptr += cnt; \
} \
\
/* Now adjust the pointers and store the result. */ \
- *((uint32_t *) outptr)++ = ch; \
+ *((uint32_t *) outptr) = ch; \
+ outptr += sizeof (uint32_t); \
}
#define LOOP_NEED_FLAGS
\
state->__count = inend - *inptrp; \
\
+ assert (ch != 0xc0 && ch != 0xc1); \
if (ch >= 0xc2 && ch < 0xe0) \
{ \
/* We expect two bytes. The first byte cannot be 0xc0 or \
cnt = 2; \
ch &= 0x1f; \
} \
- else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
+ else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
{ \
/* We expect three bytes. */ \
cnt = 3; \
ch &= 0x0f; \
} \
- else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
+ else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
{ \
/* We expect four bytes. */ \
cnt = 4; \
ch &= 0x07; \
} \
- else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
+ else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
{ \
/* We expect five bytes. */ \
cnt = 5; \
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- uint16_t u1 = *((const uint16_t *) inptr); \
+ uint16_t u1 = get16 (inptr); \
\
- if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
+ if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
{ \
/* Surrogate characters in UCS-2 input are not valid. Reject \
them. (Catching this here is not security relevant.) */ \
STANDARD_FROM_LOOP_ERR_HANDLER (2); \
} \
\
- *((uint32_t *) outptr)++ = u1; \
+ *((uint32_t *) outptr) = u1; \
+ outptr += sizeof (uint32_t); \
inptr += 2; \
}
#define LOOP_NEED_FLAGS
{ \
uint32_t val = *((const uint32_t *) inptr); \
\
- if (__builtin_expect (val >= 0x10000, 0)) \
+ if (__glibc_unlikely (val >= 0x10000)) \
{ \
UNICODE_TAG_HANDLER (val, 4); \
STANDARD_TO_LOOP_ERR_HANDLER (4); \
} \
- else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
+ else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
{ \
/* Surrogate characters in UCS-4 input are not valid. \
We must catch this, because the UCS-2 output might be \
} \
else \
{ \
- *((uint16_t *) outptr)++ = val; \
+ put16 (outptr, val); \
+ outptr += sizeof (uint16_t); \
inptr += 4; \
} \
}
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \
+ uint16_t u1 = bswap_16 (get16 (inptr)); \
\
- if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \
+ if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
{ \
/* Surrogate characters in UCS-2 input are not valid. Reject \
them. (Catching this here is not security relevant.) */ \
continue; \
} \
\
- *((uint32_t *) outptr)++ = u1; \
+ *((uint32_t *) outptr) = u1; \
+ outptr += sizeof (uint32_t); \
inptr += 2; \
}
#define LOOP_NEED_FLAGS
#define BODY \
{ \
uint32_t val = *((const uint32_t *) inptr); \
- if (__builtin_expect (val >= 0x10000, 0)) \
+ if (__glibc_unlikely (val >= 0x10000)) \
{ \
UNICODE_TAG_HANDLER (val, 4); \
STANDARD_TO_LOOP_ERR_HANDLER (4); \
} \
- else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \
+ else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
{ \
/* Surrogate characters in UCS-4 input are not valid. \
We must catch this, because the UCS-2 output might be \
} \
else \
{ \
- *((uint16_t *) outptr)++ = bswap_16 (val); \
+ put16 (outptr, bswap_16 (val)); \
+ outptr += sizeof (uint16_t); \
inptr += 4; \
} \
}