]> git.ipfire.org Git - thirdparty/glibc.git/blobdiff - iconvdata/gbk.c
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / iconvdata / gbk.c
index 952f76ab730068b9dcd63a375d06fa78cdf63049..f838dc11f11e5c8bb721c7d5667eba12c8e40ae8 100644 (file)
@@ -1,23 +1,23 @@
 /* Mapping tables for GBK handling.
-   Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1999-2015 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Sean Chen <sean.chen@turbolinux.com>, 1999.
 
    The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
+   Lesser General Public License for more details.
 
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
 
+#include <dlfcn.h>
 #include <gconv.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <wchar.h>
 #include <assert.h>
 
+/* Unicode 3.0.1 does not contain all the characters in GBK.  Define
+   USE_PRIVATE_AREA to 1 in order to use mappings from/to the Unicode
+   Private Use area.  Until we see other systems using the same mappings,
+   it is disabled.  */
+#define USE_PRIVATE_AREA 0
+
 /* The conversion table to UCS4 has almost no holes.  It can be generated with:
 
    perl tab.pl < gbk.txt
@@ -1569,7 +1575,7 @@ static const uint16_t __gbk_to_ucs[] =
   [0x17fb] = 0x72d6, [0x17fc] = 0x72d8, [0x17fd] = 0x72da, [0x17fe] = 0x72db,
   [0x1861] = 0x3000, [0x1862] = 0x3001, [0x1863] = 0x3002, [0x1864] = 0x00b7,
   [0x1865] = 0x02c9, [0x1866] = 0x02c7, [0x1867] = 0x00a8, [0x1868] = 0x3003,
-  [0x1869] = 0x3005, [0x186a] = 0x2015, [0x186b] = 0xff5e, [0x186c] = 0x2016,
+  [0x1869] = 0x3005, [0x186a] = 0x2014, [0x186b] = 0xff5e, [0x186c] = 0x2016,
   [0x186d] = 0x2026, [0x186e] = 0x2018, [0x186f] = 0x2019, [0x1870] = 0x201c,
   [0x1871] = 0x201d, [0x1872] = 0x3014, [0x1873] = 0x3015, [0x1874] = 0x3008,
   [0x1875] = 0x3009, [0x1876] = 0x300a, [0x1877] = 0x300b, [0x1878] = 0x300c,
@@ -1711,7 +1717,7 @@ static const uint16_t __gbk_to_ucs[] =
   [0x1d2a] = 0x0448, [0x1d2b] = 0x0449, [0x1d2c] = 0x044a, [0x1d2d] = 0x044b,
   [0x1d2e] = 0x044c, [0x1d2f] = 0x044d, [0x1d30] = 0x044e, [0x1d31] = 0x044f,
   [0x1d40] = 0x02ca, [0x1d41] = 0x02cb, [0x1d42] = 0x02d9, [0x1d43] = 0x2013,
-  [0x1d44] = 0x2014, [0x1d45] = 0x2025, [0x1d46] = 0x2035, [0x1d47] = 0x2105,
+  [0x1d44] = 0x2015, [0x1d45] = 0x2025, [0x1d46] = 0x2035, [0x1d47] = 0x2105,
   [0x1d48] = 0x2109, [0x1d49] = 0x2196, [0x1d4a] = 0x2197, [0x1d4b] = 0x2198,
   [0x1d4c] = 0x2199, [0x1d4d] = 0x2215, [0x1d4e] = 0x221f, [0x1d4f] = 0x2223,
   [0x1d50] = 0x2252, [0x1d51] = 0x2266, [0x1d52] = 0x2267, [0x1d53] = 0x22bf,
@@ -1738,7 +1744,13 @@ static const uint16_t __gbk_to_ucs[] =
   [0x1db0] = 0x00f2, [0x1db1] = 0x016b, [0x1db2] = 0x00fa, [0x1db3] = 0x01d4,
   [0x1db4] = 0x00f9, [0x1db5] = 0x01d6, [0x1db6] = 0x01d8, [0x1db7] = 0x01da,
   [0x1db8] = 0x01dc, [0x1db9] = 0x00fc, [0x1dba] = 0x00ea, [0x1dbb] = 0x0251,
-  [0x1dbc] = 0xe7c7, [0x1dbd] = 0x0144, [0x1dbe] = 0x0148, [0x1dbf] = 0xe7c8,
+#if USE_PRIVATE_AREA
+  [0x1dbc] = 0xe7c7,
+#endif
+                     [0x1dbd] = 0x0144, [0x1dbe] = 0x0148,
+#if USE_PRIVATE_AREA
+                                                           [0x1dbf] = 0xe7c8,
+#endif
   [0x1dc0] = 0x0261, [0x1dc5] = 0x3105, [0x1dc6] = 0x3106, [0x1dc7] = 0x3107,
   [0x1dc8] = 0x3108, [0x1dc9] = 0x3109, [0x1dca] = 0x310a, [0x1dcb] = 0x310b,
   [0x1dcc] = 0x310c, [0x1dcd] = 0x310d, [0x1dce] = 0x310e, [0x1dcf] = 0x310f,
@@ -1765,10 +1777,14 @@ static const uint16_t __gbk_to_ucs[] =
   [0x1e3b] = 0xfe5e, [0x1e3c] = 0xfe5f, [0x1e3d] = 0xfe60, [0x1e3e] = 0xfe61,
   [0x1e40] = 0xfe62, [0x1e41] = 0xfe63, [0x1e42] = 0xfe64, [0x1e43] = 0xfe65,
   [0x1e44] = 0xfe66, [0x1e45] = 0xfe68, [0x1e46] = 0xfe69, [0x1e47] = 0xfe6a,
-  [0x1e48] = 0xfe6b, [0x1e49] = 0xe7e7, [0x1e4a] = 0xe7e8, [0x1e4b] = 0xe7e9,
+  [0x1e48] = 0xfe6b,
+#if USE_PRIVATE_AREA
+                     [0x1e49] = 0xe7e7, [0x1e4a] = 0xe7e8, [0x1e4b] = 0xe7e9,
   [0x1e4c] = 0xe7ea, [0x1e4d] = 0xe7eb, [0x1e4e] = 0xe7ec, [0x1e4f] = 0xe7ed,
   [0x1e50] = 0xe7ee, [0x1e51] = 0xe7ef, [0x1e52] = 0xe7f0, [0x1e53] = 0xe7f1,
-  [0x1e54] = 0xe7f2, [0x1e55] = 0xe7f3, [0x1e56] = 0x3007, [0x1e64] = 0x2500,
+  [0x1e54] = 0xe7f2, [0x1e55] = 0xe7f3,
+#endif
+                                        [0x1e56] = 0x3007, [0x1e64] = 0x2500,
   [0x1e65] = 0x2501, [0x1e66] = 0x2502, [0x1e67] = 0x2503, [0x1e68] = 0x2504,
   [0x1e69] = 0x2505, [0x1e6a] = 0x2506, [0x1e6b] = 0x2507, [0x1e6c] = 0x2508,
   [0x1e6d] = 0x2509, [0x1e6e] = 0x250a, [0x1e6f] = 0x250b, [0x1e70] = 0x250c,
@@ -5498,7 +5514,9 @@ static const uint16_t __gbk_to_ucs[] =
   [0x5dc2] = 0xfa0e, [0x5dc3] = 0xfa0f, [0x5dc4] = 0xfa11, [0x5dc5] = 0xfa13,
   [0x5dc6] = 0xfa14, [0x5dc7] = 0xfa18, [0x5dc8] = 0xfa1f, [0x5dc9] = 0xfa20,
   [0x5dca] = 0xfa21, [0x5dcb] = 0xfa23, [0x5dcc] = 0xfa24, [0x5dcd] = 0xfa27,
-  [0x5dce] = 0xfa28, [0x5dcf] = 0xfa29, [0x5dd0] = 0xe815, [0x5dd1] = 0xe816,
+  [0x5dce] = 0xfa28, [0x5dcf] = 0xfa29,
+#if USE_PRIVATE_AREA
+                                        [0x5dd0] = 0xe815, [0x5dd1] = 0xe816,
   [0x5dd2] = 0xe817, [0x5dd3] = 0xe818, [0x5dd4] = 0xe819, [0x5dd5] = 0xe81a,
   [0x5dd6] = 0xe81b, [0x5dd7] = 0xe81c, [0x5dd8] = 0xe81d, [0x5dd9] = 0xe81e,
   [0x5dda] = 0xe81f, [0x5ddb] = 0xe820, [0x5ddc] = 0xe821, [0x5ddd] = 0xe822,
@@ -5519,6 +5537,9 @@ static const uint16_t __gbk_to_ucs[] =
   [0x5e17] = 0xe85b, [0x5e18] = 0xe85c, [0x5e19] = 0xe85d, [0x5e1a] = 0xe85e,
   [0x5e1b] = 0xe85f, [0x5e1c] = 0xe860, [0x5e1d] = 0xe861, [0x5e1e] = 0xe862,
   [0x5e1f] = 0xe863, [0x5e20] = 0xe864,
+#else
+  [0x5e20] = 0x0000,
+#endif
 };
 
 /* The table can be created using
@@ -5660,8 +5681,8 @@ static const char __gbk_from_ucs4_tab3[][2] =
 */
 static const char __gbk_from_ucs4_tab4[][2] =
 {
-  [0x0000] = "\xa9\x5c", [0x0003] = "\xa8\x43", [0x0004] = "\xa8\x44",
-  [0x0005] = "\xa1\xaa", [0x0006] = "\xa1\xac", [0x0008] = "\xa1\xae",
+  [0x0000] = "\xa9\x5c", [0x0003] = "\xa8\x43", [0x0004] = "\xa1\xaa",
+  [0x0005] = "\xa8\x44", [0x0006] = "\xa1\xac", [0x0008] = "\xa1\xae",
   [0x0009] = "\xa1\xaf", [0x000c] = "\xa1\xb0", [0x000d] = "\xa1\xb1",
   [0x0015] = "\xa8\x45", [0x0016] = "\xa1\xad", [0x0020] = "\xa1\xeb",
   [0x0022] = "\xa1\xe4", [0x0023] = "\xa1\xe5", [0x0025] = "\xa8\x46",
@@ -12935,6 +12956,7 @@ static const char __gbk_from_ucs4_tab8[][2] =
 */
 static const char __gbk_from_ucs4_tab9[][2] =
 {
+#if USE_PRIVATE_AREA
   [0x0000] = "\xa8\xbc", [0x0001] = "\xa8\xbf", [0x0020] = "\xa9\x89",
   [0x0021] = "\xa9\x8a", [0x0022] = "\xa9\x8b", [0x0023] = "\xa9\x8c",
   [0x0024] = "\xa9\x8d", [0x0025] = "\xa9\x8e", [0x0026] = "\xa9\x8f",
@@ -12967,6 +12989,7 @@ static const char __gbk_from_ucs4_tab9[][2] =
   [0x0096] = "\xfe\x99", [0x0097] = "\xfe\x9a", [0x0098] = "\xfe\x9b",
   [0x0099] = "\xfe\x9c", [0x009a] = "\xfe\x9d", [0x009b] = "\xfe\x9e",
   [0x009c] = "\xfe\x9f", [0x009d] = "\xfe\xa0",
+#endif
 };
 
 /* The table can be created using
@@ -13107,9 +13130,10 @@ static const char __gbk_from_ucs4_tab12[][2] =
 #define MIN_NEEDED_FROM                1
 #define MAX_NEEDED_FROM                2
 #define MIN_NEEDED_TO          4
+#define ONE_DIRECTION          0
 
 
-/* First define the conversion function from ISO 8859-1 to UCS4.  */
+/* First define the conversion function from GBK to UCS4.  */
 #define MIN_NEEDED_INPUT       MIN_NEEDED_FROM
 #define MAX_NEEDED_INPUT       MAX_NEEDED_FROM
 #define MIN_NEEDED_OUTPUT      MIN_NEEDED_TO
@@ -13121,28 +13145,20 @@ static const char __gbk_from_ucs4_tab12[][2] =
     if (ch <= 0x7f)                                                          \
       ++inptr;                                                               \
     else                                                                     \
-      if (__builtin_expect (ch, 0x81) <= 0x80                                \
-         || __builtin_expect (ch, 0x81) > 0xfe)                              \
+      if (__builtin_expect (ch <= 0x80, 0)                                   \
+         || __builtin_expect (ch > 0xfe, 0))                                 \
        {                                                                     \
          /* This is illegal.  */                                             \
-         if (! ignore_errors_p ())                                           \
-           {                                                                 \
-             result = __GCONV_ILLEGAL_INPUT;                                 \
-             break;                                                          \
-           }                                                                 \
-                                                                             \
-         ++inptr;                                                            \
-         ++*irreversible;                                                    \
-         continue;                                                           \
+         STANDARD_FROM_LOOP_ERR_HANDLER (1);                                 \
        }                                                                     \
       else                                                                   \
        {                                                                     \
          /* Two or more byte character.  First test whether the              \
-            next character is also available.  */                            \
+            next byte is also available.  */                                 \
          uint32_t ch2;                                                       \
          int idx;                                                            \
                                                                              \
-         if (NEED_LENGTH_TEST && __builtin_expect (inptr + 1 >= inend, 0))   \
+         if (__glibc_unlikely (inptr + 1 >= inend))                          \
            {                                                                 \
              /* The second character is not available.  Store                \
                 the intermediate result.  */                                 \
@@ -13152,20 +13168,13 @@ static const char __gbk_from_ucs4_tab12[][2] =
                                                                              \
          ch2 = inptr[1];                                                     \
                                                                              \
-         /* All second bytes of a multibyte character must be >= 0x40. */    \
-         if (__builtin_expect (ch2, 0x41) < 0x40)                            \
+         /* All second bytes of a multibyte character must be >= 0x40, and   \
+            the __gbk_to_ucs table only covers the range up to 0xfe 0xa0. */ \
+         if (__builtin_expect (ch2 < 0x40, 0)                                \
+             || (__builtin_expect (ch, 0x81) == 0xfe && ch2 > 0xa0))         \
            {                                                                 \
              /* This is an illegal character.  */                            \
-             if (! ignore_errors_p ())                                       \
-               {                                                             \
-                 /* This is an illegal character.  */                        \
-                 result = __GCONV_ILLEGAL_INPUT;                             \
-                 break;                                                      \
-               }                                                             \
-                                                                             \
-             ++inptr;                                                        \
-             ++*irreversible;                                                \
-             continue;                                                       \
+             STANDARD_FROM_LOOP_ERR_HANDLER (1);                             \
            }                                                                 \
                                                                              \
          /* This is code set 1: GBK.  */                                     \
@@ -13176,16 +13185,7 @@ static const char __gbk_from_ucs4_tab12[][2] =
          if (__builtin_expect (ch, 1) == 0 && *inptr != '\0')                \
            {                                                                 \
              /* This is an illegal character.  */                            \
-             if (! ignore_errors_p ())                                       \
-               {                                                             \
-                 /* This is an illegal character.  */                        \
-                 result = __GCONV_ILLEGAL_INPUT;                             \
-                 break;                                                      \
-               }                                                             \
-                                                                             \
-             inptr += 2;                                                     \
-             ++*irreversible;                                                \
-             continue;                                                       \
+             STANDARD_FROM_LOOP_ERR_HANDLER (2);                             \
            }                                                                 \
                                                                              \
          inptr += 2;                                                         \
@@ -13194,6 +13194,14 @@ static const char __gbk_from_ucs4_tab12[][2] =
     put32 (outptr, ch);                                                              \
     outptr += 4;                                                             \
   }
+#define LOOP_NEED_FLAGS
+#define ONEBYTE_BODY \
+  {                                                                          \
+    if (c < 0x80)                                                            \
+      return c;                                                                      \
+    else                                                                     \
+      return WEOF;                                                           \
+  }
 #include <iconv/loop.c>
 
 
@@ -13205,8 +13213,8 @@ static const char __gbk_from_ucs4_tab12[][2] =
 #define BODY \
   {                                                                          \
     uint32_t ch = get32 (inptr);                                             \
-    unsigned char buf[2];                                                    \
-    const unsigned char *cp = buf;                                           \
+    char buf[2];                                                             \
+    const char *cp = buf;                                                    \
                                                                              \
     if (ch <= L'\x7f')                                                       \
       /* It's plain ASCII.  */                                               \
@@ -13414,7 +13422,7 @@ static const char __gbk_from_ucs4_tab12[][2] =
          cp = __gbk_from_ucs4_tab8[ch - 0x4e00];                             \
          break;                                                              \
        case 0xe7c7 ... 0xe864:                                               \
-         cp = __gbk_from_ucs4_tab9[ch - 0xe7c7];                             \
+         cp = USE_PRIVATE_AREA ? __gbk_from_ucs4_tab9[ch - 0xe7c7] : "\0\0"; \
          break;                                                              \
        case 0xf92c:                                                          \
          cp = "\xfd\x9c";                                                    \
@@ -13444,23 +13452,17 @@ static const char __gbk_from_ucs4_tab12[][2] =
          cp = "\xa1\xe9\0\0\xa1\xea\0\0\xa9\x56\0\0\xa3\xfe\0\0\xa9\x57\0\0\xa3\xa4" + ((ch - 0xffe0) * 4); \
          break;                                                              \
        default:                                                              \
+         UNICODE_TAG_HANDLER (ch, 4);                                        \
          cp = "";                                                            \
          break;                                                              \
        }                                                                     \
       if (__builtin_expect (cp[0], '\1') == '\0' && ch != 0)                 \
        {                                                                     \
          /* Illegal character.  */                                           \
-         if (! ignore_errors_p ())                                           \
-           {                                                                 \
-             result = __GCONV_ILLEGAL_INPUT;                                 \
-             break;                                                          \
-           }                                                                 \
-                                                                             \
-         ++*irreversible;                                                    \
+         STANDARD_TO_LOOP_ERR_HANDLER (4);                                   \
        }                                                                     \
       /* See whether there is enough room for the second byte we write.  */   \
-      else if (NEED_LENGTH_TEST && cp[1] != '\0'                             \
-              && __builtin_expect (outptr + 1 >= outend, 0))                 \
+      else if (cp[1] != '\0' && __builtin_expect (outptr + 1 >= outend, 0))   \
        {                                                                     \
          /* We have not enough room.  */                                     \
          result = __GCONV_FULL_OUTPUT;                                       \
@@ -13476,6 +13478,7 @@ static const char __gbk_from_ucs4_tab12[][2] =
                                                                              \
     inptr += 4;                                                               \
   }
+#define LOOP_NEED_FLAGS
 #include <iconv/loop.c>