/* Mapping tables for GBK handling.
- Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+ Copyright (C) 1999-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Sean Chen <sean.chen@turbolinux.com>, 1999.
The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
+ Lesser General Public License for more details.
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+#include <dlfcn.h>
#include <gconv.h>
#include <stdint.h>
#include <stdlib.h>
#include <wchar.h>
#include <assert.h>
+/* Unicode 3.0.1 does not contain all the characters in GBK. Define
+ USE_PRIVATE_AREA to 1 in order to use mappings from/to the Unicode
+ Private Use area. Until we see other systems using the same mappings,
+ it is disabled. */
+#define USE_PRIVATE_AREA 0
+
/* The conversion table to UCS4 has almost no holes. It can be generated with:
perl tab.pl < gbk.txt
[0x17fb] = 0x72d6, [0x17fc] = 0x72d8, [0x17fd] = 0x72da, [0x17fe] = 0x72db,
[0x1861] = 0x3000, [0x1862] = 0x3001, [0x1863] = 0x3002, [0x1864] = 0x00b7,
[0x1865] = 0x02c9, [0x1866] = 0x02c7, [0x1867] = 0x00a8, [0x1868] = 0x3003,
- [0x1869] = 0x3005, [0x186a] = 0x2015, [0x186b] = 0xff5e, [0x186c] = 0x2016,
+ [0x1869] = 0x3005, [0x186a] = 0x2014, [0x186b] = 0xff5e, [0x186c] = 0x2016,
[0x186d] = 0x2026, [0x186e] = 0x2018, [0x186f] = 0x2019, [0x1870] = 0x201c,
[0x1871] = 0x201d, [0x1872] = 0x3014, [0x1873] = 0x3015, [0x1874] = 0x3008,
[0x1875] = 0x3009, [0x1876] = 0x300a, [0x1877] = 0x300b, [0x1878] = 0x300c,
[0x1d2a] = 0x0448, [0x1d2b] = 0x0449, [0x1d2c] = 0x044a, [0x1d2d] = 0x044b,
[0x1d2e] = 0x044c, [0x1d2f] = 0x044d, [0x1d30] = 0x044e, [0x1d31] = 0x044f,
[0x1d40] = 0x02ca, [0x1d41] = 0x02cb, [0x1d42] = 0x02d9, [0x1d43] = 0x2013,
- [0x1d44] = 0x2014, [0x1d45] = 0x2025, [0x1d46] = 0x2035, [0x1d47] = 0x2105,
+ [0x1d44] = 0x2015, [0x1d45] = 0x2025, [0x1d46] = 0x2035, [0x1d47] = 0x2105,
[0x1d48] = 0x2109, [0x1d49] = 0x2196, [0x1d4a] = 0x2197, [0x1d4b] = 0x2198,
[0x1d4c] = 0x2199, [0x1d4d] = 0x2215, [0x1d4e] = 0x221f, [0x1d4f] = 0x2223,
[0x1d50] = 0x2252, [0x1d51] = 0x2266, [0x1d52] = 0x2267, [0x1d53] = 0x22bf,
[0x1db0] = 0x00f2, [0x1db1] = 0x016b, [0x1db2] = 0x00fa, [0x1db3] = 0x01d4,
[0x1db4] = 0x00f9, [0x1db5] = 0x01d6, [0x1db6] = 0x01d8, [0x1db7] = 0x01da,
[0x1db8] = 0x01dc, [0x1db9] = 0x00fc, [0x1dba] = 0x00ea, [0x1dbb] = 0x0251,
- [0x1dbc] = 0xe7c7, [0x1dbd] = 0x0144, [0x1dbe] = 0x0148, [0x1dbf] = 0xe7c8,
+#if USE_PRIVATE_AREA
+ [0x1dbc] = 0xe7c7,
+#endif
+ [0x1dbd] = 0x0144, [0x1dbe] = 0x0148,
+#if USE_PRIVATE_AREA
+ [0x1dbf] = 0xe7c8,
+#endif
[0x1dc0] = 0x0261, [0x1dc5] = 0x3105, [0x1dc6] = 0x3106, [0x1dc7] = 0x3107,
[0x1dc8] = 0x3108, [0x1dc9] = 0x3109, [0x1dca] = 0x310a, [0x1dcb] = 0x310b,
[0x1dcc] = 0x310c, [0x1dcd] = 0x310d, [0x1dce] = 0x310e, [0x1dcf] = 0x310f,
[0x1e3b] = 0xfe5e, [0x1e3c] = 0xfe5f, [0x1e3d] = 0xfe60, [0x1e3e] = 0xfe61,
[0x1e40] = 0xfe62, [0x1e41] = 0xfe63, [0x1e42] = 0xfe64, [0x1e43] = 0xfe65,
[0x1e44] = 0xfe66, [0x1e45] = 0xfe68, [0x1e46] = 0xfe69, [0x1e47] = 0xfe6a,
- [0x1e48] = 0xfe6b, [0x1e49] = 0xe7e7, [0x1e4a] = 0xe7e8, [0x1e4b] = 0xe7e9,
+ [0x1e48] = 0xfe6b,
+#if USE_PRIVATE_AREA
+ [0x1e49] = 0xe7e7, [0x1e4a] = 0xe7e8, [0x1e4b] = 0xe7e9,
[0x1e4c] = 0xe7ea, [0x1e4d] = 0xe7eb, [0x1e4e] = 0xe7ec, [0x1e4f] = 0xe7ed,
[0x1e50] = 0xe7ee, [0x1e51] = 0xe7ef, [0x1e52] = 0xe7f0, [0x1e53] = 0xe7f1,
- [0x1e54] = 0xe7f2, [0x1e55] = 0xe7f3, [0x1e56] = 0x3007, [0x1e64] = 0x2500,
+ [0x1e54] = 0xe7f2, [0x1e55] = 0xe7f3,
+#endif
+ [0x1e56] = 0x3007, [0x1e64] = 0x2500,
[0x1e65] = 0x2501, [0x1e66] = 0x2502, [0x1e67] = 0x2503, [0x1e68] = 0x2504,
[0x1e69] = 0x2505, [0x1e6a] = 0x2506, [0x1e6b] = 0x2507, [0x1e6c] = 0x2508,
[0x1e6d] = 0x2509, [0x1e6e] = 0x250a, [0x1e6f] = 0x250b, [0x1e70] = 0x250c,
[0x5dc2] = 0xfa0e, [0x5dc3] = 0xfa0f, [0x5dc4] = 0xfa11, [0x5dc5] = 0xfa13,
[0x5dc6] = 0xfa14, [0x5dc7] = 0xfa18, [0x5dc8] = 0xfa1f, [0x5dc9] = 0xfa20,
[0x5dca] = 0xfa21, [0x5dcb] = 0xfa23, [0x5dcc] = 0xfa24, [0x5dcd] = 0xfa27,
- [0x5dce] = 0xfa28, [0x5dcf] = 0xfa29, [0x5dd0] = 0xe815, [0x5dd1] = 0xe816,
+ [0x5dce] = 0xfa28, [0x5dcf] = 0xfa29,
+#if USE_PRIVATE_AREA
+ [0x5dd0] = 0xe815, [0x5dd1] = 0xe816,
[0x5dd2] = 0xe817, [0x5dd3] = 0xe818, [0x5dd4] = 0xe819, [0x5dd5] = 0xe81a,
[0x5dd6] = 0xe81b, [0x5dd7] = 0xe81c, [0x5dd8] = 0xe81d, [0x5dd9] = 0xe81e,
[0x5dda] = 0xe81f, [0x5ddb] = 0xe820, [0x5ddc] = 0xe821, [0x5ddd] = 0xe822,
[0x5e17] = 0xe85b, [0x5e18] = 0xe85c, [0x5e19] = 0xe85d, [0x5e1a] = 0xe85e,
[0x5e1b] = 0xe85f, [0x5e1c] = 0xe860, [0x5e1d] = 0xe861, [0x5e1e] = 0xe862,
[0x5e1f] = 0xe863, [0x5e20] = 0xe864,
+#else
+ [0x5e20] = 0x0000,
+#endif
};
/* The table can be created using
*/
static const char __gbk_from_ucs4_tab4[][2] =
{
- [0x0000] = "\xa9\x5c", [0x0003] = "\xa8\x43", [0x0004] = "\xa8\x44",
- [0x0005] = "\xa1\xaa", [0x0006] = "\xa1\xac", [0x0008] = "\xa1\xae",
+ [0x0000] = "\xa9\x5c", [0x0003] = "\xa8\x43", [0x0004] = "\xa1\xaa",
+ [0x0005] = "\xa8\x44", [0x0006] = "\xa1\xac", [0x0008] = "\xa1\xae",
[0x0009] = "\xa1\xaf", [0x000c] = "\xa1\xb0", [0x000d] = "\xa1\xb1",
[0x0015] = "\xa8\x45", [0x0016] = "\xa1\xad", [0x0020] = "\xa1\xeb",
[0x0022] = "\xa1\xe4", [0x0023] = "\xa1\xe5", [0x0025] = "\xa8\x46",
*/
static const char __gbk_from_ucs4_tab9[][2] =
{
+#if USE_PRIVATE_AREA
[0x0000] = "\xa8\xbc", [0x0001] = "\xa8\xbf", [0x0020] = "\xa9\x89",
[0x0021] = "\xa9\x8a", [0x0022] = "\xa9\x8b", [0x0023] = "\xa9\x8c",
[0x0024] = "\xa9\x8d", [0x0025] = "\xa9\x8e", [0x0026] = "\xa9\x8f",
[0x0096] = "\xfe\x99", [0x0097] = "\xfe\x9a", [0x0098] = "\xfe\x9b",
[0x0099] = "\xfe\x9c", [0x009a] = "\xfe\x9d", [0x009b] = "\xfe\x9e",
[0x009c] = "\xfe\x9f", [0x009d] = "\xfe\xa0",
+#endif
};
/* The table can be created using
#define MIN_NEEDED_FROM 1
#define MAX_NEEDED_FROM 2
#define MIN_NEEDED_TO 4
+#define ONE_DIRECTION 0
-/* First define the conversion function from ISO 8859-1 to UCS4. */
+/* First define the conversion function from GBK to UCS4. */
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
if (ch <= 0x7f) \
++inptr; \
else \
- if (__builtin_expect (ch, 0x81) <= 0x80 \
- || __builtin_expect (ch, 0x81) > 0xfe) \
+ if (__builtin_expect (ch <= 0x80, 0) \
+ || __builtin_expect (ch > 0xfe, 0)) \
{ \
/* This is illegal. */ \
- if (! ignore_errors_p ()) \
- { \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
- ++inptr; \
- ++*irreversible; \
- continue; \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
else \
{ \
/* Two or more byte character. First test whether the \
- next character is also available. */ \
+ next byte is also available. */ \
uint32_t ch2; \
int idx; \
\
- if (NEED_LENGTH_TEST && __builtin_expect (inptr + 1 >= inend, 0)) \
+ if (__glibc_unlikely (inptr + 1 >= inend)) \
{ \
/* The second character is not available. Store \
the intermediate result. */ \
\
ch2 = inptr[1]; \
\
- /* All second bytes of a multibyte character must be >= 0x40. */ \
- if (__builtin_expect (ch2, 0x41) < 0x40) \
+ /* All second bytes of a multibyte character must be >= 0x40, and \
+ the __gbk_to_ucs table only covers the range up to 0xfe 0xa0. */ \
+ if (__builtin_expect (ch2 < 0x40, 0) \
+ || (__builtin_expect (ch, 0x81) == 0xfe && ch2 > 0xa0)) \
{ \
/* This is an illegal character. */ \
- if (! ignore_errors_p ()) \
- { \
- /* This is an illegal character. */ \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
- ++inptr; \
- ++*irreversible; \
- continue; \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
\
/* This is code set 1: GBK. */ \
if (__builtin_expect (ch, 1) == 0 && *inptr != '\0') \
{ \
/* This is an illegal character. */ \
- if (! ignore_errors_p ()) \
- { \
- /* This is an illegal character. */ \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
- inptr += 2; \
- ++*irreversible; \
- continue; \
+ STANDARD_FROM_LOOP_ERR_HANDLER (2); \
} \
\
inptr += 2; \
put32 (outptr, ch); \
outptr += 4; \
}
+#define LOOP_NEED_FLAGS
+#define ONEBYTE_BODY \
+ { \
+ if (c < 0x80) \
+ return c; \
+ else \
+ return WEOF; \
+ }
#include <iconv/loop.c>
#define BODY \
{ \
uint32_t ch = get32 (inptr); \
- unsigned char buf[2]; \
- const unsigned char *cp = buf; \
+ char buf[2]; \
+ const char *cp = buf; \
\
if (ch <= L'\x7f') \
/* It's plain ASCII. */ \
cp = __gbk_from_ucs4_tab8[ch - 0x4e00]; \
break; \
case 0xe7c7 ... 0xe864: \
- cp = __gbk_from_ucs4_tab9[ch - 0xe7c7]; \
+ cp = USE_PRIVATE_AREA ? __gbk_from_ucs4_tab9[ch - 0xe7c7] : "\0\0"; \
break; \
case 0xf92c: \
cp = "\xfd\x9c"; \
cp = "\xa1\xe9\0\0\xa1\xea\0\0\xa9\x56\0\0\xa3\xfe\0\0\xa9\x57\0\0\xa3\xa4" + ((ch - 0xffe0) * 4); \
break; \
default: \
+ UNICODE_TAG_HANDLER (ch, 4); \
cp = ""; \
break; \
} \
if (__builtin_expect (cp[0], '\1') == '\0' && ch != 0) \
{ \
/* Illegal character. */ \
- if (! ignore_errors_p ()) \
- { \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
- ++*irreversible; \
+ STANDARD_TO_LOOP_ERR_HANDLER (4); \
} \
/* See whether there is enough room for the second byte we write. */ \
- else if (NEED_LENGTH_TEST && cp[1] != '\0' \
- && __builtin_expect (outptr + 1 >= outend, 0)) \
+ else if (cp[1] != '\0' && __builtin_expect (outptr + 1 >= outend, 0)) \
{ \
/* We have not enough room. */ \
result = __GCONV_FULL_OUTPUT; \
\
inptr += 4; \
}
+#define LOOP_NEED_FLAGS
#include <iconv/loop.c>