]> git.ipfire.org Git - thirdparty/glibc.git/blobdiff - iconvdata/sjis.c
alloc_buffer: Return unqualified pointer type in alloc_buffer_next
[thirdparty/glibc.git] / iconvdata / sjis.c
index aa51259355ca8072ee21e81e641d72cdb063fbb0..91dffa6fda3f602341e18de0147502e0ad46d40b 100644 (file)
@@ -1,39 +1,26 @@
 /* Mapping tables for SJIS handling.
-   Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1997-2019 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
    The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
+   Lesser General Public License for more details.
 
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
 
 #include <dlfcn.h>
 #include <stdint.h>
 #include <wchar.h>
 
-static const uint32_t halfkana_to_ucs4[] =
-{
-  0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 0xff66, 0xff67, 0xff68,
-  0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff70,
-  0xff71, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78,
-  0xff79, 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80,
-  0xff81, 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88,
-  0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90,
-  0xff91, 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98,
-  0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0xff9e, 0xff9f
-};
-
 
 /* The following table can be generated from the file
        unix/mappings/eastasia/jis/shiftjis.txt
@@ -1891,6 +1878,11 @@ static const uint16_t cjk_block4[2021] =
 
    The only problem is that the shiftjis.txt file does not contain the
    mapping for the characters 0x00 to 0x1f.  We add them ourself.
+
+   A much bigger problem is that mapping of the tilde and the backslash
+   character.  There are no such characters in SJIS.  The proposed
+   handling is to simply map the two input values to the corresponding
+   byte values of ASCII.  Things fall more or less in place this way.
 */
 static const char from_ucs4_lat1[0xf8][2] =
 {
@@ -1936,7 +1928,7 @@ static const char from_ucs4_lat1[0xf8][2] =
   [0x0075] = "\x75\x00", [0x0076] = "\x76\x00", [0x0077] = "\x77\x00",
   [0x0078] = "\x78\x00", [0x0079] = "\x79\x00", [0x007a] = "\x7a\x00",
   [0x007b] = "\x7b\x00", [0x007c] = "\x7c\x00", [0x007d] = "\x7d\x00",
-  [0x007e] = "\x7e\x00",
+  [0x007e] = "\x7e\x00", [0x007f] = "\x7f\x00",
   [0x00a2] = "\x81\x91", [0x00a3] = "\x81\x92", [0x00a5] = "\x5c\x00",
   [0x00a7] = "\x81\x98", [0x00a8] = "\x81\x4e", [0x00ac] = "\x81\xca",
   [0x00b0] = "\x81\x8b", [0x00b1] = "\x81\x7d", [0x00b4] = "\x81\x4c",
@@ -4328,6 +4320,7 @@ static const char from_ucs4_extra[0x100][2] =
 #define MIN_NEEDED_FROM                1
 #define MAX_NEEDED_FROM                2
 #define MIN_NEEDED_TO          4
+#define ONE_DIRECTION          0
 
 /* First define the conversion function from SJIS to UCS4.  */
 #define MIN_NEEDED_INPUT       MIN_NEEDED_FROM
@@ -4348,39 +4341,30 @@ static const char from_ucs4_extra[0x100][2] =
        ch = 0x203e;                                                          \
        ++inptr;                                                              \
       }                                                                              \
-    else if (ch < 0x7e)                                                              \
+    else if (ch < 0x80)                                                              \
       ++inptr;                                                               \
     else if (ch >= 0xa1 && ch <= 0xdf)                                       \
       {                                                                              \
-       ch = halfkana_to_ucs4[ch - 0xa1];                                     \
+       ch += 0xfec0;                                                         \
        ++inptr;                                                              \
       }                                                                              \
-    else if (__builtin_expect (ch, 0) > 0xea                                 \
+    else if (__builtin_expect (ch > 0xea, 0)                                 \
             || __builtin_expect (ch, 0) == 0xa0                              \
-            || __builtin_expect (ch, 0x81) <= 0x80)                          \
+            || __builtin_expect (ch <= 0x80, 0))                             \
       {                                                                              \
        /* These are illegal.  */                                             \
-       if (! ignore_errors_p ())                                             \
-         {                                                                   \
-           /* This is an illegal character.  */                              \
-           result = __GCONV_ILLEGAL_INPUT;                                   \
-           break;                                                            \
-         }                                                                   \
-                                                                             \
-       ++inptr;                                                              \
-       ++*irreversible;                                                      \
-       continue;                                                             \
+       STANDARD_FROM_LOOP_ERR_HANDLER (1);                                   \
       }                                                                              \
     else                                                                     \
       {                                                                              \
-       /* Two-byte character.  First test whether the next character         \
+       /* Two-byte character.  First test whether the next byte              \
           is also available.  */                                             \
        uint32_t ch2;                                                         \
        uint_fast32_t idx;                                                    \
                                                                              \
-       if (__builtin_expect (inptr + 1 >= inend, 0))                         \
+       if (__glibc_unlikely (inptr + 1 >= inend))                            \
          {                                                                   \
-           /* The second character is not available.  Store                  \
+           /* The second byte is not available.  Store                       \
               the intermediate result.  */                                   \
            result = __GCONV_INCOMPLETE_INPUT;                                \
            break;                                                            \
@@ -4388,23 +4372,18 @@ static const char from_ucs4_extra[0x100][2] =
                                                                              \
        ch2 = inptr[1];                                                       \
        idx = ch * 256 + ch2;                                                 \
-       if (__builtin_expect (idx, 0x8140) < 0x8140                           \
-           || (__builtin_expect (idx, 0x8140) > 0x84be && idx < 0x889f)      \
-           || (__builtin_expect (idx, 0x8140) > 0x88fc && idx < 0x8940)      \
-           || (__builtin_expect (idx, 0x8140) > 0x9ffc && idx < 0xe040)      \
-           || __builtin_expect (idx, 0x8140) > 0xeaa4)                       \
+       if (__glibc_unlikely (ch2 < 0x40))                                    \
          {                                                                   \
            /* This is illegal.  */                                           \
-           if (! ignore_errors_p ())                                         \
-             {                                                               \
-               /* This is an illegal character.  */                          \
-               result = __GCONV_ILLEGAL_INPUT;                               \
-               break;                                                        \
-             }                                                               \
-                                                                             \
-           ++inptr;                                                          \
-           ++*irreversible;                                                  \
-           continue;                                                         \
+           STANDARD_FROM_LOOP_ERR_HANDLER (1);                               \
+         }                                                                   \
+       else if ((__builtin_expect (idx > 0x84be && idx < 0x889f, 0))         \
+                || (__builtin_expect (idx > 0x88fc && idx < 0x8940, 0))      \
+                || (__builtin_expect (idx > 0x9ffc && idx < 0xe040, 0))      \
+                || __builtin_expect (idx > 0xeaa4, 0))                       \
+         {                                                                   \
+           /* This is illegal.  */                                           \
+           STANDARD_FROM_LOOP_ERR_HANDLER (2);                               \
          }                                                                   \
        else                                                                  \
          {                                                                   \
@@ -4420,22 +4399,13 @@ static const char from_ucs4_extra[0x100][2] =
            else                                                              \
              ch = cjk_block4[(ch - 0xe0) * 192 + ch2 - 0x40];                \
                                                                              \
-           inptr += 2;                                                       \
-         }                                                                   \
-                                                                             \
-       if (__builtin_expect (ch, 1) == 0)                                    \
-         {                                                                   \
-           /* This is an illegal character.  */                              \
-           if (! ignore_errors_p ())                                         \
+           if (__glibc_unlikely (ch == 0))                                   \
              {                                                               \
-               /* This is an illegal character.  */                          \
-               result = __GCONV_ILLEGAL_INPUT;                               \
-               break;                                                        \
+               /* This is an illegal character.  */                          \
+               STANDARD_FROM_LOOP_ERR_HANDLER (2);                           \
              }                                                               \
                                                                              \
            inptr += 2;                                                       \
-           ++*irreversible;                                                  \
-           continue;                                                         \
          }                                                                   \
       }                                                                              \
                                                                              \
@@ -4443,6 +4413,20 @@ static const char from_ucs4_extra[0x100][2] =
     outptr += 4;                                                             \
   }
 #define LOOP_NEED_FLAGS
+#define ONEBYTE_BODY \
+  {                                                                          \
+    if (c < 0x80)                                                            \
+      {                                                                              \
+       if (c == 0x5c)                                                        \
+         return 0xa5;                                                        \
+       if (c == 0x7e)                                                        \
+         return 0x203e;                                                      \
+       return c;                                                             \
+      }                                                                              \
+    if (c >= 0xa1 && c <= 0xdf)                                                      \
+      return 0xfec0 + c;                                                     \
+    return WEOF;                                                             \
+  }
 #include <iconv/loop.c>
 
 
@@ -4462,75 +4446,39 @@ static const char from_ucs4_extra[0x100][2] =
          cp = from_ucs4_greek[ch - 0x391];                                   \
        else if (ch >= 0x2010 && ch <= 0x9fa0)                                \
          cp = from_ucs4_cjk[ch - 0x02010];                                   \
-       else if (__builtin_expect (ch, 0xff01) >= 0xff01                      \
-                && __builtin_expect (ch, 0xff01) <= 0xffef)                  \
+       else if (__builtin_expect (ch >= 0xff01, 1)                           \
+                && __builtin_expect (ch <= 0xffef, 1))                       \
          cp = from_ucs4_extra[ch - 0xff00];                                  \
        else                                                                  \
          {                                                                   \
+           UNICODE_TAG_HANDLER (ch, 4);                                      \
            /* Illegal character.  */                                         \
-           if (step_data->__trans.__trans_fct != NULL)                       \
-             {                                                               \
-               result = DL_CALL_FCT (step_data->__trans.__trans_fct,         \
-                                     (step, step_data, *inptrp, &inptr,      \
-                                      inend, &outptr, irreversible));        \
-               if (result != __GCONV_OK)                                     \
-                 break;                                                      \
-             }                                                               \
-           else if (! ignore_errors_p ())                                    \
-             {                                                               \
-               /* This is an illegal character.  */                          \
-               result = __GCONV_ILLEGAL_INPUT;                               \
-               break;                                                        \
-             }                                                               \
-           else                                                              \
-             {                                                               \
-               inptr += 4;                                                   \
-               ++*irreversible;                                              \
-             }                                                               \
-           continue;                                                         \
+           cp = "";                                                          \
          }                                                                   \
       }                                                                              \
     else                                                                     \
       cp = from_ucs4_lat1[ch];                                               \
                                                                              \
-    if (__builtin_expect (cp[0], '\1') == '\0' && ch != 0)                   \
+    if (__builtin_expect (cp[0] == '\0', 0) && ch != 0)                              \
       {                                                                              \
        /* Illegal character.  */                                             \
-       if (step_data->__trans.__trans_fct != NULL)                           \
-         {                                                                   \
-           result = DL_CALL_FCT (step_data->__trans.__trans_fct,             \
-                                 (step, step_data, *inptrp, &inptr, inend,   \
-                                  &outptr, irreversible));                   \
-           if (result != __GCONV_OK)                                         \
-             break;                                                          \
-         }                                                                   \
-       else if (! ignore_errors_p ())                                        \
-         {                                                                   \
-           /* This is an illegal character.  */                              \
-           result = __GCONV_ILLEGAL_INPUT;                                   \
-           break;                                                            \
-         }                                                                   \
-       else                                                                  \
-         {                                                                   \
-           inptr += 4;                                                       \
-           ++*irreversible;                                                  \
-         }                                                                   \
-       continue;                                                             \
+       STANDARD_TO_LOOP_ERR_HANDLER (4);                                     \
       }                                                                              \
     else                                                                     \
       {                                                                              \
-       *outptr++ = cp[0];                                                    \
+       *outptr = cp[0];                                                      \
        /* Now test for a possible second byte and write this if possible.  */\
        if (cp[1] != '\0')                                                    \
          {                                                                   \
-           if (__builtin_expect (outptr >= outend, 0))                       \
+           if (__glibc_unlikely (outptr + 1 >= outend))                      \
              {                                                               \
                /* The result does not fit into the buffer.  */               \
                result = __GCONV_FULL_OUTPUT;                                 \
                break;                                                        \
              }                                                               \
-           *outptr++ = cp[1];                                                \
+           *++outptr = cp[1];                                                \
          }                                                                   \
+       ++outptr;                                                             \
       }                                                                              \
                                                                              \
     inptr += 4;                                                                      \