[thirdparty/glibc.git] / sysdeps / s390 / s390-64 / utf8-utf32-z9.c

/* Conversion between UTF-8 and UTF-32 BE/internal.

   This module uses the Z9-109 variants of the Convert Unicode
   instructions.
   Copyright (C) 1997-2014 Free Software Foundation, Inc.

   Author: Andreas Krebbel  <Andreas.Krebbel@de.ibm.com>
   Based on the work by Ulrich Drepper  <drepper@cygnus.com>, 1997.

   Thanks to Daniel Appich who covered the relevant performance work
   in his diploma thesis.

   This is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   This is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <dlfcn.h>
#include <stdint.h>
#include <unistd.h>
#include <dl-procinfo.h>
#include <gconv.h>

/* UTF-32 big endian byte order mark.  */
#define BOM	                0x0000feffu

#define DEFINE_INIT		0
#define DEFINE_FINI		0
/* These definitions apply to the UTF-8 to UTF-32 direction.  The
   software implementation for UTF-8 still supports multibyte
   characters up to 6 bytes whereas the hardware variant does not.  */
#define MIN_NEEDED_FROM		1
#define MAX_NEEDED_FROM		6
#define MIN_NEEDED_TO		4
#define FROM_LOOP		from_utf8_loop
#define TO_LOOP			to_utf8_loop
#define FROM_DIRECTION		(dir == from_utf8)
#define PREPARE_LOOP							\
  enum direction dir = ((struct utf8_data *) step->__data)->dir;	\
  int emit_bom = ((struct utf8_data *) step->__data)->emit_bom;		\
									\
  if (emit_bom && !data->__internal_use					\
      && data->__invocation_counter == 0)				\
    {									\
      /* Emit the Byte Order Mark.  */					\
      if (__glibc_unlikely (outbuf + 4 > outend))			      \
	return __GCONV_FULL_OUTPUT;					\
									\
      put32u (outbuf, BOM);						\
      outbuf += 4;							\
    }

/* Direction of the transformation.  */
enum direction
{
  illegal_dir,
  to_utf8,
  from_utf8
};

struct utf8_data
{
  enum direction dir;
  int emit_bom;
};


extern int gconv_init (struct __gconv_step *step);
int
gconv_init (struct __gconv_step *step)
{
  /* Determine which direction.  */
  struct utf8_data *new_data;
  enum direction dir = illegal_dir;
  int emit_bom;
  int result;

  emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0);

  if (__strcasecmp (step->__from_name, "ISO-10646/UTF8/") == 0
      && (__strcasecmp (step->__to_name, "UTF-32//") == 0
	  || __strcasecmp (step->__to_name, "UTF-32BE//") == 0
	  || __strcasecmp (step->__to_name, "INTERNAL") == 0))
    {
      dir = from_utf8;
    }
  else if (__strcasecmp (step->__to_name, "ISO-10646/UTF8/") == 0
	   && (__strcasecmp (step->__from_name, "UTF-32BE//") == 0
	       || __strcasecmp (step->__from_name, "INTERNAL") == 0))
    {
      dir = to_utf8;
    }

  result = __GCONV_NOCONV;
  if (dir != illegal_dir)
    {
      new_data = (struct utf8_data *) malloc (sizeof (struct utf8_data));

      result = __GCONV_NOMEM;
      if (new_data != NULL)
	{
	  new_data->dir = dir;
	  new_data->emit_bom = emit_bom;
	  step->__data = new_data;

	  if (dir == from_utf8)
	    {
	      step->__min_needed_from = MIN_NEEDED_FROM;
	      step->__max_needed_from = MIN_NEEDED_FROM;
	      step->__min_needed_to = MIN_NEEDED_TO;
	      step->__max_needed_to = MIN_NEEDED_TO;
	    }
	  else
	    {
	      step->__min_needed_from = MIN_NEEDED_TO;
	      step->__max_needed_from = MIN_NEEDED_TO;
	      step->__min_needed_to = MIN_NEEDED_FROM;
	      step->__max_needed_to = MIN_NEEDED_FROM;
	    }

	  step->__stateful = 0;

	  result = __GCONV_OK;
	}
    }

  return result;
}


extern void gconv_end (struct __gconv_step *data);
void
gconv_end (struct __gconv_step *data)
{
  free (data->__data);
}

/* The macro for the hardware loop.  This is used for both
   directions.  */
#define HARDWARE_CONVERT(INSTRUCTION)					\
  {									\
    register const unsigned char* pInput asm ("8") = inptr;		\
    register unsigned long long inlen asm ("9") = inend - inptr;	\
    register unsigned char* pOutput asm ("10") = outptr;		\
    register unsigned long long outlen asm("11") = outend - outptr;	\
    uint64_t cc = 0;							\
									\
    asm volatile (".machine push       \n\t"				\
                  ".machine \"z9-109\" \n\t"				\
		  "0: " INSTRUCTION "  \n\t"				\
                  ".machine pop        \n\t"				\
                  "   jo     0b        \n\t"				\
		  "   ipm    %2        \n"				\
		  : "+a" (pOutput), "+a" (pInput), "+d" (cc),		\
		    "+d" (outlen), "+d" (inlen)				\
		  :							\
		  : "cc", "memory");					\
									\
    inptr = pInput;							\
    outptr = pOutput;							\
    cc >>= 28;								\
									\
    if (cc == 1)							\
      {									\
	result = __GCONV_FULL_OUTPUT;					\
	break;								\
      }									\
    else if (cc == 2)							\
      {									\
	result = __GCONV_ILLEGAL_INPUT;					\
	break;								\
      }									\
  }

/* Conversion function from UTF-8 to UTF-32 internal/BE.  */

#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
#define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
#define LOOPFCT			FROM_LOOP
/* The software routine is copied from gconv_simple.c.  */
#define BODY								\
  {									\
    if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH)				\
      {									\
	HARDWARE_CONVERT ("cu14 %0, %1, 1");				\
									\
	if (inptr != inend)						\
	  {								\
	    int i;							\
	    for (i = 1; inptr + i < inend; ++i)				\
	      if ((inptr[i] & 0xc0) != 0x80)				\
		break;							\
									\
	    if (__glibc_likely (inptr + i == inend))			      \
	      {								\
		result = __GCONV_INCOMPLETE_INPUT;			\
		break;							\
	      }								\
	    STANDARD_FROM_LOOP_ERR_HANDLER (i);				\
	  }								\
	continue;							\
      }									\
									\
    /* Next input byte.  */						\
    uint32_t ch = *inptr;						\
									\
    if (__glibc_likely (ch < 0x80))					      \
      {									\
	/* One byte sequence.  */					\
	++inptr;							\
      }									\
    else								\
      {									\
	uint_fast32_t cnt;						\
	uint_fast32_t i;						\
									\
	if (ch >= 0xc2 && ch < 0xe0)					\
	  {								\
	    /* We expect two bytes.  The first byte cannot be 0xc0 or	\
	       0xc1, otherwise the wide character could have been	\
	       represented using a single byte.  */			\
	    cnt = 2;							\
	    ch &= 0x1f;							\
	  }								\
        else if (__glibc_likely ((ch & 0xf0) == 0xe0))			      \
	  {								\
	    /* We expect three bytes.  */				\
	    cnt = 3;							\
	    ch &= 0x0f;							\
	  }								\
	else if (__glibc_likely ((ch & 0xf8) == 0xf0))			      \
	  {								\
	    /* We expect four bytes.  */				\
	    cnt = 4;							\
	    ch &= 0x07;							\
	  }								\
	else if (__glibc_likely ((ch & 0xfc) == 0xf8))			      \
	  {								\
	    /* We expect five bytes.  */				\
	    cnt = 5;							\
	    ch &= 0x03;							\
	  }								\
	else if (__glibc_likely ((ch & 0xfe) == 0xfc))			      \
	  {								\
	    /* We expect six bytes.  */					\
	    cnt = 6;							\
	    ch &= 0x01;							\
	  }								\
	else								\
	  {								\
	    /* Search the end of this ill-formed UTF-8 character.  This	\
	       is the next byte with (x & 0xc0) != 0x80.  */		\
	    i = 0;							\
	    do								\
	      ++i;							\
	    while (inptr + i < inend					\
		   && (*(inptr + i) & 0xc0) == 0x80			\
		   && i < 5);						\
									\
	  errout:							\
	    STANDARD_FROM_LOOP_ERR_HANDLER (i);				\
	  }								\
									\
	if (__glibc_unlikely (inptr + cnt > inend))			      \
	  {								\
	    /* We don't have enough input.  But before we report	\
	       that check that all the bytes are correct.  */		\
	    for (i = 1; inptr + i < inend; ++i)				\
	      if ((inptr[i] & 0xc0) != 0x80)				\
		break;							\
									\
	    if (__glibc_likely (inptr + i == inend))			      \
	      {								\
		result = __GCONV_INCOMPLETE_INPUT;			\
		break;							\
	      }								\
									\
	    goto errout;						\
	  }								\
									\
	/* Read the possible remaining bytes.  */			\
	for (i = 1; i < cnt; ++i)					\
	  {								\
	    uint32_t byte = inptr[i];					\
									\
	    if ((byte & 0xc0) != 0x80)					\
	      /* This is an illegal encoding.  */			\
	      break;							\
									\
	    ch <<= 6;							\
	    ch |= byte & 0x3f;						\
	  }								\
									\
	/* If i < cnt, some trail byte was not >= 0x80, < 0xc0.		\
	   If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could	\
	   have been represented with fewer than cnt bytes.  */		\
	if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0))		\
	  {								\
	    /* This is an illegal encoding.  */				\
	    goto errout;						\
	  }								\
									\
	inptr += cnt;							\
      }									\
									\
    /* Now adjust the pointers and store the result.  */		\
    *((uint32_t *) outptr) = ch;					\
    outptr += sizeof (uint32_t);					\
  }
#define LOOP_NEED_FLAGS

#define STORE_REST							\
  {									      \
    /* We store the remaining bytes while converting them into the UCS4	      \
       format.  We can assume that the first byte in the buffer is	      \
       correct and that it requires a larger number of bytes than there	      \
       are in the input buffer.  */					      \
    wint_t ch = **inptrp;						      \
    size_t cnt, r;							      \
									      \
    state->__count = inend - *inptrp;					      \
									      \
    if (ch >= 0xc2 && ch < 0xe0)					      \
      {									      \
	/* We expect two bytes.  The first byte cannot be 0xc0 or	      \
	   0xc1, otherwise the wide character could have been		      \
	   represented using a single byte.  */				      \
	cnt = 2;							      \
	ch &= 0x1f;							      \
      }									      \
    else if (__glibc_likely ((ch & 0xf0) == 0xe0))			      \
      {									      \
	/* We expect three bytes.  */					      \
	cnt = 3;							      \
	ch &= 0x0f;							      \
      }									      \
    else if (__glibc_likely ((ch & 0xf8) == 0xf0))			      \
      {									      \
	/* We expect four bytes.  */					      \
	cnt = 4;							      \
	ch &= 0x07;							      \
      }									      \
    else if (__glibc_likely ((ch & 0xfc) == 0xf8))			      \
      {									      \
	/* We expect five bytes.  */					      \
	cnt = 5;							      \
	ch &= 0x03;							      \
      }									      \
    else								      \
      {									      \
	/* We expect six bytes.  */					      \
	cnt = 6;							      \
	ch &= 0x01;							      \
      }									      \
									      \
    /* The first byte is already consumed.  */				      \
    r = cnt - 1;							      \
    while (++(*inptrp) < inend)						      \
      {									      \
	ch <<= 6;							      \
	ch |= **inptrp & 0x3f;						      \
	--r;								      \
      }									      \
									      \
    /* Shift for the so far missing bytes.  */				      \
    ch <<= r * 6;							      \
									      \
    /* Store the number of bytes expected for the entire sequence.  */	      \
    state->__count |= cnt << 8;						      \
									      \
    /* Store the value.  */						      \
    state->__value.__wch = ch;						      \
  }

#define UNPACK_BYTES \
  {									      \
    static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc };  \
    wint_t wch = state->__value.__wch;					      \
    size_t ntotal = state->__count >> 8;				      \
									      \
    inlen = state->__count & 255;					      \
									      \
    bytebuf[0] = inmask[ntotal - 2];					      \
									      \
    do									      \
      {									      \
	if (--ntotal < inlen)						      \
	  bytebuf[ntotal] = 0x80 | (wch & 0x3f);			      \
	wch >>= 6;							      \
      }									      \
    while (ntotal > 1);							      \
									      \
    bytebuf[0] |= wch;							      \
  }

#define CLEAR_STATE \
  state->__count = 0

#include <iconv/loop.c>

/* Conversion from UTF-32 internal/BE to UTF-8.  */

#define MIN_NEEDED_INPUT	MIN_NEEDED_TO
#define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
#define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
#define LOOPFCT			TO_LOOP
/* The software routine mimics the S/390 cu41 instruction.  */
#define BODY							\
  {								\
    if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH)			\
      {								\
	HARDWARE_CONVERT ("cu41 %0, %1");			\
								\
	if (inptr != inend)					\
	  {							\
	    result = __GCONV_INCOMPLETE_INPUT;			\
	    break;						\
	  }							\
	continue;						\
      }								\
								\
    uint32_t wc = *((const uint32_t *) inptr);			\
								\
    if (__glibc_likely (wc <= 0x7f))					      \
      {								\
        /* Single UTF-8 char.  */				\
        *outptr = (uint8_t)wc;					\
	outptr++;						\
      }								\
    else if (wc <= 0x7ff)					\
      {								\
        /* Two UTF-8 chars.  */					\
        if (__glibc_unlikely (outptr + 2 > outend))			      \
	  {							\
	    /* Overflow in the output buffer.  */		\
	    result = __GCONV_FULL_OUTPUT;			\
	    break;						\
	  }							\
								\
        outptr[0] = 0xc0;					\
	outptr[0] |= wc >> 6;					\
								\
	outptr[1] = 0x80;					\
	outptr[1] |= wc & 0x3f;					\
								\
	outptr += 2;						\
      }								\
    else if (wc <= 0xffff)					\
      {								\
	/* Three UTF-8 chars.  */				\
	if (__glibc_unlikely (outptr + 3 > outend))			      \
	  {							\
	    /* Overflow in the output buffer.  */		\
	    result = __GCONV_FULL_OUTPUT;			\
	    break;						\
	  }							\
	outptr[0] = 0xe0;					\
	outptr[0] |= wc >> 12;					\
								\
	outptr[1] = 0x80;					\
	outptr[1] |= (wc >> 6) & 0x3f;				\
								\
	outptr[2] = 0x80;					\
	outptr[2] |= wc & 0x3f;					\
								\
	outptr += 3;						\
      }								\
      else if (wc <= 0x10ffff)					\
	{							\
	  /* Four UTF-8 chars.  */				\
	  if (__glibc_unlikely (outptr + 4 > outend))			      \
	    {							\
	      /* Overflow in the output buffer.  */		\
	      result = __GCONV_FULL_OUTPUT;			\
	      break;						\
	    }							\
	  outptr[0] = 0xf0;					\
	  outptr[0] |= wc >> 18;				\
								\
	  outptr[1] = 0x80;					\
	  outptr[1] |= (wc >> 12) & 0x3f;			\
								\
	  outptr[2] = 0x80;					\
	  outptr[2] |= (wc >> 6) & 0x3f;			\
								\
	  outptr[3] = 0x80;					\
	  outptr[3] |= wc & 0x3f;				\
								\
	  outptr += 4;						\
	}							\
      else							\
	{							\
	  STANDARD_TO_LOOP_ERR_HANDLER (4);			\
	}							\
    inptr += 4;							\
  }
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>

#include <iconv/skeleton.c>
Commit	Line	Data
f957edde AK	1	/* Conversion between UTF-8 and UTF-32 BE/internal.
	2
	3	This module uses the Z9-109 variants of the Convert Unicode
	4	instructions.
d4697bc9	5	Copyright (C) 1997-2014 Free Software Foundation, Inc.
f957edde AK	6
	7	Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
	8	Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
	9
	10	Thanks to Daniel Appich who covered the relevant performance work
	11	in his diploma thesis.
	12
	13	This is free software; you can redistribute it and/or
	14	modify it under the terms of the GNU Lesser General Public
	15	License as published by the Free Software Foundation; either
	16	version 2.1 of the License, or (at your option) any later version.
	17
	18	This is distributed in the hope that it will be useful,
	19	but WITHOUT ANY WARRANTY; without even the implied warranty of
	20	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	21	Lesser General Public License for more details.
	22
	23	You should have received a copy of the GNU Lesser General Public
59ba27a6 PE	24	License along with the GNU C Library; if not, see
59ba27a6 PE	25	<http://www.gnu.org/licenses/>. */
f957edde AK	26
	27	#include <dlfcn.h>
	28	#include <stdint.h>
	29	#include <unistd.h>
	30	#include <dl-procinfo.h>
	31	#include <gconv.h>
	32
	33	/* UTF-32 big endian byte order mark. */
	34	#define BOM 0x0000feffu
	35
	36	#define DEFINE_INIT 0
	37	#define DEFINE_FINI 0
	38	/* These definitions apply to the UTF-8 to UTF-32 direction. The
	39	software implementation for UTF-8 still supports multibyte
	40	characters up to 6 bytes whereas the hardware variant does not. */
	41	#define MIN_NEEDED_FROM 1
	42	#define MAX_NEEDED_FROM 6
	43	#define MIN_NEEDED_TO 4
	44	#define FROM_LOOP from_utf8_loop
	45	#define TO_LOOP to_utf8_loop
	46	#define FROM_DIRECTION (dir == from_utf8)
	47	#define PREPARE_LOOP \
	48	enum direction dir = ((struct utf8_data *) step->__data)->dir; \
	49	int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \
	50	\
	51	if (emit_bom && !data->__internal_use \
	52	&& data->__invocation_counter == 0) \
	53	{ \
	54	/* Emit the Byte Order Mark. */ \
a1ffb40e	55	if (__glibc_unlikely (outbuf + 4 > outend)) \
f957edde	56	return __GCONV_FULL_OUTPUT; \
7c36ced0	57	\
f957edde AK	58	put32u (outbuf, BOM); \
	59	outbuf += 4; \
	60	}
	61
	62	/* Direction of the transformation. */
	63	enum direction
	64	{
	65	illegal_dir,
	66	to_utf8,
	67	from_utf8
	68	};
	69
	70	struct utf8_data
	71	{
	72	enum direction dir;
	73	int emit_bom;
	74	};
	75
	76
	77	extern int gconv_init (struct __gconv_step *step);
	78	int
	79	gconv_init (struct __gconv_step *step)
	80	{
	81	/* Determine which direction. */
	82	struct utf8_data *new_data;
	83	enum direction dir = illegal_dir;
	84	int emit_bom;
	85	int result;
	86
	87	emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0);
	88
	89	if (__strcasecmp (step->__from_name, "ISO-10646/UTF8/") == 0
	90	&& (__strcasecmp (step->__to_name, "UTF-32//") == 0
	91	\|\| __strcasecmp (step->__to_name, "UTF-32BE//") == 0
7c36ced0	92	\|\| __strcasecmp (step->__to_name, "INTERNAL") == 0))
f957edde AK	93	{
	94	dir = from_utf8;
	95	}
	96	else if (__strcasecmp (step->__to_name, "ISO-10646/UTF8/") == 0
	97	&& (__strcasecmp (step->__from_name, "UTF-32BE//") == 0
	98	\|\| __strcasecmp (step->__from_name, "INTERNAL") == 0))
	99	{
	100	dir = to_utf8;
	101	}
	102
	103	result = __GCONV_NOCONV;
	104	if (dir != illegal_dir)
	105	{
	106	new_data = (struct utf8_data *) malloc (sizeof (struct utf8_data));
	107
	108	result = __GCONV_NOMEM;
	109	if (new_data != NULL)
	110	{
	111	new_data->dir = dir;
	112	new_data->emit_bom = emit_bom;
	113	step->__data = new_data;
	114
	115	if (dir == from_utf8)
	116	{
	117	step->__min_needed_from = MIN_NEEDED_FROM;
	118	step->__max_needed_from = MIN_NEEDED_FROM;
	119	step->__min_needed_to = MIN_NEEDED_TO;
	120	step->__max_needed_to = MIN_NEEDED_TO;
	121	}
	122	else
	123	{
	124	step->__min_needed_from = MIN_NEEDED_TO;
	125	step->__max_needed_from = MIN_NEEDED_TO;
	126	step->__min_needed_to = MIN_NEEDED_FROM;
	127	step->__max_needed_to = MIN_NEEDED_FROM;
	128	}
	129
	130	step->__stateful = 0;
	131
	132	result = __GCONV_OK;
	133	}
	134	}
	135
	136	return result;
	137	}
	138
	139
	140	extern void gconv_end (struct __gconv_step *data);
	141	void
	142	gconv_end (struct __gconv_step *data)
	143	{
	144	free (data->__data);
	145	}
	146
	147	/* The macro for the hardware loop. This is used for both
	148	directions. */
	149	#define HARDWARE_CONVERT(INSTRUCTION) \
	150	{ \
	151	register const unsigned char* pInput asm ("8") = inptr; \
	152	register unsigned long long inlen asm ("9") = inend - inptr; \
	153	register unsigned char* pOutput asm ("10") = outptr; \
	154	register unsigned long long outlen asm("11") = outend - outptr; \
	155	uint64_t cc = 0; \
	156	\
27390476 AK	157	asm volatile (".machine push \n\t" \
	158	".machine \"z9-109\" \n\t" \
	159	"0: " INSTRUCTION " \n\t" \
	160	".machine pop \n\t" \
f957edde AK	161	" jo 0b \n\t" \
	162	" ipm %2 \n" \
	163	: "+a" (pOutput), "+a" (pInput), "+d" (cc), \
	164	"+d" (outlen), "+d" (inlen) \
	165	: \
	166	: "cc", "memory"); \
	167	\
	168	inptr = pInput; \
	169	outptr = pOutput; \
7c36ced0	170	cc >>= 28; \
f957edde AK	171	\
	172	if (cc == 1) \
	173	{ \
	174	result = __GCONV_FULL_OUTPUT; \
	175	break; \
	176	} \
	177	else if (cc == 2) \
	178	{ \
	179	result = __GCONV_ILLEGAL_INPUT; \
	180	break; \
	181	} \
	182	}
	183
	184	/* Conversion function from UTF-8 to UTF-32 internal/BE. */
	185
	186	#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
	187	#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
	188	#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
	189	#define LOOPFCT FROM_LOOP
	190	/* The software routine is copied from gconv_simple.c. */
7c36ced0	191	#define BODY \
f957edde AK	192	{ \
	193	if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
	194	{ \
	195	HARDWARE_CONVERT ("cu14 %0, %1, 1"); \
	196	\
	197	if (inptr != inend) \
	198	{ \
	199	int i; \
	200	for (i = 1; inptr + i < inend; ++i) \
	201	if ((inptr[i] & 0xc0) != 0x80) \
	202	break; \
	203	\
a1ffb40e	204	if (__glibc_likely (inptr + i == inend)) \
f957edde AK	205	{ \
	206	result = __GCONV_INCOMPLETE_INPUT; \
	207	break; \
	208	} \
	209	STANDARD_FROM_LOOP_ERR_HANDLER (i); \
	210	} \
	211	continue; \
	212	} \
	213	\
	214	/* Next input byte. */ \
	215	uint32_t ch = *inptr; \
	216	\
a1ffb40e	217	if (__glibc_likely (ch < 0x80)) \
f957edde AK	218	{ \
	219	/* One byte sequence. */ \
	220	++inptr; \
	221	} \
	222	else \
	223	{ \
	224	uint_fast32_t cnt; \
	225	uint_fast32_t i; \
	226	\
	227	if (ch >= 0xc2 && ch < 0xe0) \
	228	{ \
	229	/* We expect two bytes. The first byte cannot be 0xc0 or \
	230	0xc1, otherwise the wide character could have been \
	231	represented using a single byte. */ \
	232	cnt = 2; \
	233	ch &= 0x1f; \
	234	} \
a1ffb40e	235	else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
f957edde AK	236	{ \
	237	/* We expect three bytes. */ \
	238	cnt = 3; \
	239	ch &= 0x0f; \
	240	} \
a1ffb40e	241	else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
f957edde AK	242	{ \
	243	/* We expect four bytes. */ \
	244	cnt = 4; \
	245	ch &= 0x07; \
	246	} \
a1ffb40e	247	else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
f957edde AK	248	{ \
	249	/* We expect five bytes. */ \
	250	cnt = 5; \
	251	ch &= 0x03; \
	252	} \
a1ffb40e	253	else if (__glibc_likely ((ch & 0xfe) == 0xfc)) \
f957edde AK	254	{ \
	255	/* We expect six bytes. */ \
	256	cnt = 6; \
	257	ch &= 0x01; \
	258	} \
	259	else \
	260	{ \
	261	/* Search the end of this ill-formed UTF-8 character. This \
	262	is the next byte with (x & 0xc0) != 0x80. */ \
	263	i = 0; \
	264	do \
	265	++i; \
	266	while (inptr + i < inend \
	267	&& (*(inptr + i) & 0xc0) == 0x80 \
	268	&& i < 5); \
	269	\
	270	errout: \
	271	STANDARD_FROM_LOOP_ERR_HANDLER (i); \
	272	} \
	273	\
a1ffb40e	274	if (__glibc_unlikely (inptr + cnt > inend)) \
f957edde AK	275	{ \
	276	/* We don't have enough input. But before we report \
	277	that check that all the bytes are correct. */ \
	278	for (i = 1; inptr + i < inend; ++i) \
	279	if ((inptr[i] & 0xc0) != 0x80) \
	280	break; \
	281	\
a1ffb40e	282	if (__glibc_likely (inptr + i == inend)) \
f957edde AK	283	{ \
	284	result = __GCONV_INCOMPLETE_INPUT; \
	285	break; \
	286	} \
	287	\
	288	goto errout; \
	289	} \
	290	\
	291	/* Read the possible remaining bytes. */ \
	292	for (i = 1; i < cnt; ++i) \
	293	{ \
	294	uint32_t byte = inptr[i]; \
	295	\
	296	if ((byte & 0xc0) != 0x80) \
	297	/* This is an illegal encoding. */ \
	298	break; \
	299	\
	300	ch <<= 6; \
	301	ch \|= byte & 0x3f; \
	302	} \
	303	\
	304	/* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
	305	If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
	306	have been represented with fewer than cnt bytes. */ \
	307	if (i < cnt \|\| (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
	308	{ \
	309	/* This is an illegal encoding. */ \
	310	goto errout; \
	311	} \
	312	\
	313	inptr += cnt; \
	314	} \
	315	\
	316	/* Now adjust the pointers and store the result. */ \
	317	((uint32_t ) outptr) = ch; \
	318	outptr += sizeof (uint32_t); \
	319	}
	320	#define LOOP_NEED_FLAGS
	321
	322	#define STORE_REST \
	323	{ \
	324	/* We store the remaining bytes while converting them into the UCS4 \
	325	format. We can assume that the first byte in the buffer is \
	326	correct and that it requires a larger number of bytes than there \
	327	are in the input buffer. */ \
	328	wint_t ch = **inptrp; \
	329	size_t cnt, r; \
	330	\
	331	state->__count = inend - *inptrp; \
	332	\
	333	if (ch >= 0xc2 && ch < 0xe0) \
	334	{ \
	335	/* We expect two bytes. The first byte cannot be 0xc0 or \
	336	0xc1, otherwise the wide character could have been \
	337	represented using a single byte. */ \
	338	cnt = 2; \
	339	ch &= 0x1f; \
	340	} \
a1ffb40e	341	else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
f957edde AK	342	{ \
	343	/* We expect three bytes. */ \
	344	cnt = 3; \
	345	ch &= 0x0f; \
	346	} \
a1ffb40e	347	else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
f957edde AK	348	{ \
	349	/* We expect four bytes. */ \
	350	cnt = 4; \
	351	ch &= 0x07; \
	352	} \
a1ffb40e	353	else if (__glibc_likely ((ch & 0xfc) == 0xf8)) \
f957edde AK	354	{ \
	355	/* We expect five bytes. */ \
	356	cnt = 5; \
	357	ch &= 0x03; \
	358	} \
	359	else \
	360	{ \
	361	/* We expect six bytes. */ \
	362	cnt = 6; \
	363	ch &= 0x01; \
	364	} \
	365	\
	366	/* The first byte is already consumed. */ \
	367	r = cnt - 1; \
	368	while (++(*inptrp) < inend) \
	369	{ \
	370	ch <<= 6; \
	371	ch \|= **inptrp & 0x3f; \
	372	--r; \
	373	} \
	374	\
	375	/* Shift for the so far missing bytes. */ \
	376	ch <<= r * 6; \
	377	\
	378	/* Store the number of bytes expected for the entire sequence. */ \
	379	state->__count \|= cnt << 8; \
	380	\
	381	/* Store the value. */ \
	382	state->__value.__wch = ch; \
	383	}
	384
	385	#define UNPACK_BYTES \
	386	{ \
	387	static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
	388	wint_t wch = state->__value.__wch; \
	389	size_t ntotal = state->__count >> 8; \
	390	\
	391	inlen = state->__count & 255; \
	392	\
	393	bytebuf[0] = inmask[ntotal - 2]; \
	394	\
	395	do \
	396	{ \
	397	if (--ntotal < inlen) \
	398	bytebuf[ntotal] = 0x80 \| (wch & 0x3f); \
	399	wch >>= 6; \
	400	} \
	401	while (ntotal > 1); \
	402	\
	403	bytebuf[0] \|= wch; \
	404	}
	405
	406	#define CLEAR_STATE \
	407	state->__count = 0
	408
	409	#include <iconv/loop.c>
	410
	411	/* Conversion from UTF-32 internal/BE to UTF-8. */
	412
	413	#define MIN_NEEDED_INPUT MIN_NEEDED_TO
	414	#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
	415	#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
	416	#define LOOPFCT TO_LOOP
	417	/* The software routine mimics the S/390 cu41 instruction. */
418	#define BODY \
419	{ \
420	if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
421	{ \
422	HARDWARE_CONVERT ("cu41 %0, %1"); \
423	\
424	if (inptr != inend) \
425	{ \
426	result = __GCONV_INCOMPLETE_INPUT; \
427	break; \
428	} \
429	continue; \
430	} \
431	\
432	uint32_t wc = ((const uint32_t ) inptr); \
433	\
a1ffb40e	434	if (__glibc_likely (wc <= 0x7f)) \
f957edde AK	435	{ \
	436	/* Single UTF-8 char. */ \
	437	*outptr = (uint8_t)wc; \
	438	outptr++; \
	439	} \
	440	else if (wc <= 0x7ff) \
	441	{ \
	442	/* Two UTF-8 chars. */ \
a1ffb40e	443	if (__glibc_unlikely (outptr + 2 > outend)) \
f957edde AK	444	{ \
	445	/* Overflow in the output buffer. */ \
	446	result = __GCONV_FULL_OUTPUT; \
	447	break; \
	448	} \
	449	\
	450	outptr[0] = 0xc0; \
	451	outptr[0] \|= wc >> 6; \
	452	\
	453	outptr[1] = 0x80; \
	454	outptr[1] \|= wc & 0x3f; \
	455	\
	456	outptr += 2; \
	457	} \
	458	else if (wc <= 0xffff) \
	459	{ \
	460	/* Three UTF-8 chars. */ \
a1ffb40e	461	if (__glibc_unlikely (outptr + 3 > outend)) \
f957edde AK	462	{ \
	463	/* Overflow in the output buffer. */ \
	464	result = __GCONV_FULL_OUTPUT; \
	465	break; \
	466	} \
	467	outptr[0] = 0xe0; \
	468	outptr[0] \|= wc >> 12; \
	469	\
	470	outptr[1] = 0x80; \
	471	outptr[1] \|= (wc >> 6) & 0x3f; \
	472	\
	473	outptr[2] = 0x80; \
	474	outptr[2] \|= wc & 0x3f; \
	475	\
	476	outptr += 3; \
	477	} \
	478	else if (wc <= 0x10ffff) \
	479	{ \
	480	/* Four UTF-8 chars. */ \
a1ffb40e	481	if (__glibc_unlikely (outptr + 4 > outend)) \
f957edde AK	482	{ \
	483	/* Overflow in the output buffer. */ \
	484	result = __GCONV_FULL_OUTPUT; \
	485	break; \
	486	} \
	487	outptr[0] = 0xf0; \
	488	outptr[0] \|= wc >> 18; \
	489	\
	490	outptr[1] = 0x80; \
	491	outptr[1] \|= (wc >> 12) & 0x3f; \
	492	\
	493	outptr[2] = 0x80; \
	494	outptr[2] \|= (wc >> 6) & 0x3f; \
	495	\
	496	outptr[3] = 0x80; \
	497	outptr[3] \|= wc & 0x3f; \
	498	\
	499	outptr += 4; \
	500	} \
	501	else \
	502	{ \
	503	STANDARD_TO_LOOP_ERR_HANDLER (4); \
	504	} \
	505	inptr += 4; \
	506	}
	507	#define LOOP_NEED_FLAGS
	508	#include <iconv/loop.c>
	509
	510	#include <iconv/skeleton.c>