/* Conversion from and to EUC-JISX0213.
- Copyright (C) 2002 Free Software Foundation, Inc.
+ Copyright (C) 2002-2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Bruno Haible <bruno@clisp.org>, 2002.
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
#include <dlfcn.h>
#include <stdint.h>
#define TO_LOOP to_euc_jisx0213
#define DEFINE_INIT 1
#define DEFINE_FINI 1
-#define MIN_NEEDED_FROM 1
-#define MAX_NEEDED_FROM 3
-#define MIN_NEEDED_TO 4
+#define ONE_DIRECTION 0
+#define FROM_LOOP_MIN_NEEDED_FROM 1
+#define FROM_LOOP_MAX_NEEDED_FROM 3
+#define FROM_LOOP_MIN_NEEDED_TO 4
+#define FROM_LOOP_MAX_NEEDED_TO 8
+#define TO_LOOP_MIN_NEEDED_FROM 4
+#define TO_LOOP_MAX_NEEDED_FROM 4
+#define TO_LOOP_MIN_NEEDED_TO 1
+#define TO_LOOP_MAX_NEEDED_TO 3
#define PREPARE_LOOP \
int saved_state; \
int *statep = &data->__statep->__count;
*statep = saved_state
-/* During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state
+/* During EUC-JISX0213 to UCS-4 conversion, the COUNT element of the state
+ contains the last UCS-4 character, shifted by 3 bits.
+ During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state
contains the last two bytes to be output, shifted by 3 bits. */
/* Since this is a stateful encoding we have to provide code which resets
if (data->__statep->__count != 0) \
{ \
if (FROM_DIRECTION) \
- /* We don't use shift states in the FROM_DIRECTION. */ \
- data->__statep->__count = 0; \
+ { \
+ if (__glibc_likely (outbuf + 4 <= outend)) \
+ { \
+ /* Write out the last character. */ \
+ *((uint32_t *) outbuf) = data->__statep->__count >> 3; \
+ outbuf += sizeof (uint32_t); \
+ data->__statep->__count = 0; \
+ } \
+ else \
+ /* We don't have enough room in the output buffer. */ \
+ status = __GCONV_FULL_OUTPUT; \
+ } \
else \
{ \
- if (__builtin_expect (outbuf + 2 <= outend, 1)) \
+ if (__glibc_likely (outbuf + 2 <= outend)) \
{ \
/* Write out the last character. */ \
uint32_t lasttwo = data->__statep->__count >> 3; \
/* First define the conversion function from EUC-JISX0213 to UCS-4. */
-#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
-#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
-#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
+#define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM
+#define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO
+#define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- uint32_t ch = *inptr; \
+ uint32_t ch; \
\
- if (ch < 0x80) \
- /* Plain ASCII character. */ \
- ++inptr; \
- else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f) \
+ /* Determine whether there is a buffered character pending. */ \
+ ch = *statep >> 3; \
+ if (__glibc_likely (ch == 0)) \
{ \
- /* Two or three byte character. */ \
- uint32_t ch2; \
+ /* No - so look at the next input byte. */ \
+ ch = *inptr; \
\
- if (__builtin_expect (inptr + 1 >= inend, 0)) \
+ if (ch < 0x80) \
+ /* Plain ASCII character. */ \
+ ++inptr; \
+ else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f) \
{ \
- /* The second byte is not available. */ \
- result = __GCONV_INCOMPLETE_INPUT; \
- break; \
- } \
- \
- ch2 = inptr[1]; \
+ /* Two or three byte character. */ \
+ uint32_t ch2; \
\
- /* The second byte must be >= 0xa1 and <= 0xfe. */ \
- if (__builtin_expect (ch2 < 0xa1 || ch2 > 0xfe, 0)) \
- { \
- /* This is an illegal character. */ \
- if (! ignore_errors_p ()) \
+ if (__glibc_unlikely (inptr + 1 >= inend)) \
{ \
- result = __GCONV_ILLEGAL_INPUT; \
+ /* The second byte is not available. */ \
+ result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
\
- ++inptr; \
- ++*irreversible; \
- break; \
- } \
+ ch2 = inptr[1]; \
\
- if (ch == 0x8e) \
- { \
- /* Half-width katakana. */ \
- if (__builtin_expect (ch2 > 0xdf, 0)) \
+ /* The second byte must be >= 0xa1 and <= 0xfe. */ \
+ if (__glibc_unlikely (ch2 < 0xa1 || ch2 > 0xfe)) \
{ \
/* This is an illegal character. */ \
- if (! ignore_errors_p ()) \
- { \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
- ++inptr; \
- ++*irreversible; \
- break; \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
\
- ch = ch2 + 0xfec0; \
- inptr += 2; \
- } \
- else \
- { \
- const unsigned char *endp; \
- \
- if (ch == 0x8f) \
+ if (ch == 0x8e) \
{ \
- /* JISX 0213 plane 2. */ \
- uint32_t ch3; \
- \
- if (__builtin_expect (inptr + 2 >= inend, 0)) \
- { \
- /* The third byte is not available. */ \
- result = __GCONV_INCOMPLETE_INPUT; \
- break; \
- } \
+ /* Half-width katakana. */ \
+ if (__glibc_unlikely (ch2 > 0xdf)) \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
\
- ch3 = inptr[2]; \
- endp = inptr + 3; \
- \
- ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80); \
+ ch = ch2 + 0xfec0; \
+ inptr += 2; \
} \
else \
{ \
- /* JISX 0213 plane 1. */ \
- endp = inptr + 2; \
- \
- ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80); \
- } \
+ const unsigned char *endp; \
\
- if (ch == 0) \
- { \
- /* This is an illegal character. */ \
- if (! ignore_errors_p ()) \
+ if (ch == 0x8f) \
{ \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
+ /* JISX 0213 plane 2. */ \
+ uint32_t ch3; \
+ \
+ if (__glibc_unlikely (inptr + 2 >= inend)) \
+ { \
+ /* The third byte is not available. */ \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ \
+ ch3 = inptr[2]; \
+ endp = inptr + 3; \
+ \
+ ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80); \
} \
+ else \
+ { \
+ /* JISX 0213 plane 1. */ \
+ endp = inptr + 2; \
\
- ++inptr; \
- ++*irreversible; \
- break; \
- } \
+ ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80); \
+ } \
\
- inptr = endp; \
+ if (ch == 0) \
+ /* This is an illegal character. */ \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
\
- if (ch < 0x80) \
- { \
- /* It's a combining character. */ \
- uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
- uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
+ inptr = endp; \
\
- /* See whether we have room for two characters. */ \
- if (outptr + 8 <= outend) \
+ if (ch < 0x80) \
{ \
+ /* It's a combining character. */ \
+ uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
+ uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
+ \
put32 (outptr, u1); \
outptr += 4; \
- put32 (outptr, u2); \
- outptr += 4; \
- continue; \
- } \
- else \
- { \
+ \
+ /* See whether we have room for two characters. */ \
+ if (outptr + 4 <= outend) \
+ { \
+ put32 (outptr, u2); \
+ outptr += 4; \
+ continue; \
+ } \
+ \
+ /* Otherwise store only the first character now, and \
+ put the second one into the queue. */ \
+ *statep = u2 << 3; \
+ /* Tell the caller why we terminate the loop. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
} \
} \
- } \
- else \
- { \
- /* This is illegal. */ \
- if (! ignore_errors_p ()) \
+ else \
{ \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
+ /* This is illegal. */ \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
- \
- ++inptr; \
- ++*irreversible; \
- continue; \
} \
\
put32 (outptr, ch); \
}
#define LOOP_NEED_FLAGS
#define EXTRA_LOOP_DECLS , int *statep
+#define ONEBYTE_BODY \
+ { \
+ if (c < 0x80) \
+ return c; \
+ else \
+ return WEOF; \
+ }
#include <iconv/loop.c>
{ 0xa6f5, 0xa6f8 }, /* 0x12678 = 0x12675 U+309A */
};
-#define MIN_NEEDED_INPUT MIN_NEEDED_TO
-#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
-#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
+#define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM
+#define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO
+#define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO
#define LOOPFCT TO_LOOP
#define BODY \
{ \
if (len > 0) \
{ \
/* Output the combined character. */ \
- if (__builtin_expect (outptr + 1 >= outend, 0)) \
+ if (__glibc_unlikely (outptr + 1 >= outend)) \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
\
not_combining: \
/* Output the buffered character. */ \
- if (__builtin_expect (outptr + 1 >= outend, 0)) \
+ if (__glibc_unlikely (outptr + 1 >= outend)) \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
else if (ch >= 0xff61 && ch <= 0xff9f) \
{ \
/* Half-width katakana. */ \
- if (__builtin_expect (outptr + 1 >= outend, 0)) \
+ if (__glibc_unlikely (outptr + 1 >= outend)) \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
else \
{ \
- ch = ucs4_to_jisx0213 (ch); \
- if (ch == 0) \
+ uint32_t jch = ucs4_to_jisx0213 (ch); \
+ if (jch == 0) \
{ \
UNICODE_TAG_HANDLER (ch, 4); \
\
/* Illegal character. */ \
- STANDARD_ERR_HANDLER (4); \
+ STANDARD_TO_LOOP_ERR_HANDLER (4); \
} \
\
- if (ch & 0x0080) \
+ if (jch & 0x0080) \
{ \
/* A possible match in comp_table_data. We have to buffer it. */\
\
/* We know it's a JISX 0213 plane 1 character. */ \
- assert ((ch & 0x8000) == 0); \
+ assert ((jch & 0x8000) == 0); \
\
- *statep = (ch | 0x8080) << 3; \
+ *statep = (jch | 0x8080) << 3; \
inptr += 4; \
continue; \
} \
\
- if (ch & 0x8000) \
+ if (jch & 0x8000) \
{ \
/* JISX 0213 plane 2. */ \
- if (__builtin_expect (outptr + 2 >= outend, 0)) \
+ if (__glibc_unlikely (outptr + 2 >= outend)) \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
else \
{ \
/* JISX 0213 plane 1. */ \
- if (__builtin_expect (outptr + 1 >= outend, 0)) \
+ if (__glibc_unlikely (outptr + 1 >= outend)) \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
} \
- *outptr++ = (ch >> 8) | 0x80; \
- *outptr++ = (ch & 0xff) | 0x80; \
+ *outptr++ = (jch >> 8) | 0x80; \
+ *outptr++ = (jch & 0xff) | 0x80; \
} \
\
inptr += 4; \