]> git.ipfire.org Git - thirdparty/glibc.git/blobdiff - iconvdata/unicode.c
ChangeLog: Correct two dates.
[thirdparty/glibc.git] / iconvdata / unicode.c
index 34c563bd796cf2ebb18a9607de1706914de8b18b..78e511ac081864938916e250a3e4eb3bb1a1dac6 100644 (file)
@@ -1,24 +1,24 @@
 /* Conversion module for Unicode
-   Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1999-2019 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
 
    The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
+   Lesser General Public License for more details.
 
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
 
 #include <byteswap.h>
+#include <dlfcn.h>
 #include <gconv.h>
 #include <stddef.h>
 #include <stdint.h>
@@ -38,6 +38,7 @@
 #define DEFINE_FINI            0
 #define MIN_NEEDED_FROM                2
 #define MIN_NEEDED_TO          4
+#define ONE_DIRECTION          0
 #define FROM_DIRECTION         (dir == from_unicode)
 #define PREPARE_LOOP \
   enum direction dir = ((struct unicode_data *) step->__data)->dir;          \
        {                                                                     \
          /* We have to find out which byte order the file is encoded in.  */ \
          if (inptr + 2 > inend)                                              \
-           return __GCONV_EMPTY_INPUT;                                       \
+           return (inptr == inend                                            \
+                   ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);        \
                                                                              \
          if (get16u (inptr) == BOM)                                          \
            /* Simply ignore the BOM character.  */                           \
-           inptr += 2;                                                       \
+           *inptrp = inptr += 2;                                             \
          else if (get16u (inptr) == BOM_OE)                                  \
            {                                                                 \
-             ((struct unicode_data *) step->__data)->swap = 1;               \
-             inptr += 2;                                                     \
+             data->__flags |= __GCONV_SWAP;                                  \
+             *inptrp = inptr += 2;                                           \
            }                                                                 \
        }                                                                     \
     }                                                                        \
   else if (!data->__internal_use && data->__invocation_counter == 0)         \
     {                                                                        \
       /* Emit the Byte Order Mark.  */                                       \
-      if (__builtin_expect (outbuf + 2 > outend, 0))                         \
+      if (__glibc_unlikely (outbuf + 2 > outend))                            \
        return __GCONV_FULL_OUTPUT;                                           \
                                                                              \
       put16u (outbuf, BOM);                                                  \
       outbuf += 2;                                                           \
     }                                                                        \
-  swap = ((struct unicode_data *) step->__data)->swap;
-#define EXTRA_LOOP_ARGS                , data, swap
+  swap = data->__flags & __GCONV_SWAP;
+#define EXTRA_LOOP_ARGS                , swap
 
 
 /* Direction of the transformation.  */
@@ -84,10 +86,10 @@ enum direction
 struct unicode_data
 {
   enum direction dir;
-  int swap;
 };
 
 
+extern int gconv_init (struct __gconv_step *step);
 int
 gconv_init (struct __gconv_step *step)
 {
@@ -96,7 +98,7 @@ gconv_init (struct __gconv_step *step)
   enum direction dir = illegal_dir;
   int result;
 
-  if (__strcasecmp (step->__from_name, "UNICODE") == 0)
+  if (strcmp (step->__from_name, "UNICODE//") == 0)
     dir = from_unicode;
   else
     dir = to_unicode;
@@ -107,7 +109,6 @@ gconv_init (struct __gconv_step *step)
   if (new_data != NULL)
     {
       new_data->dir = dir;
-      new_data->swap = 0;
       step->__data = new_data;
 
       if (dir == from_unicode)
@@ -134,6 +135,7 @@ gconv_init (struct __gconv_step *step)
 }
 
 
+extern void gconv_end (struct __gconv_step *data);
 void
 gconv_end (struct __gconv_step *data)
 {
@@ -149,16 +151,25 @@ gconv_end (struct __gconv_step *data)
   {                                                                          \
     uint32_t c = get32 (inptr);                                                      \
                                                                              \
-    if (__builtin_expect (c, 0) >= 0x10000)                                  \
+    if (__glibc_unlikely (c >= 0x10000))                                     \
       {                                                                              \
+       UNICODE_TAG_HANDLER (c, 4);                                           \
+       STANDARD_TO_LOOP_ERR_HANDLER (4);                                     \
+      }                                                                              \
+    else if (__glibc_unlikely (c >= 0xd800 && c < 0xe000))                   \
+      {                                                                              \
+       /* Surrogate characters in UCS-4 input are not valid.                 \
+          We must catch this, because the UCS-2 output might be              \
+          interpreted as UTF-16 by other programs.  If we let                \
+          surrogates pass through, attackers could make a security           \
+          hole exploit by synthesizing any desired plane 1-16                \
+          character.  */                                                     \
+       result = __GCONV_ILLEGAL_INPUT;                                       \
        if (! ignore_errors_p ())                                             \
-         {                                                                   \
-           /* This is an illegal character.  */                              \
-           result = __GCONV_ILLEGAL_INPUT;                                   \
-           break;                                                            \
-         }                                                                   \
-                                                                             \
-       ++*converted;                                                         \
+         break;                                                              \
+       inptr += 4;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
       }                                                                              \
     else                                                                     \
       {                                                                              \
@@ -168,8 +179,9 @@ gconv_end (struct __gconv_step *data)
                                                                              \
     inptr += 4;                                                                      \
   }
+#define LOOP_NEED_FLAGS
 #define EXTRA_LOOP_DECLS \
-       , struct __gconv_step_data *step_data, int swap
+       , int swap
 #include <iconv/loop.c>
 
 
@@ -184,13 +196,21 @@ gconv_end (struct __gconv_step *data)
     if (swap)                                                                \
       u1 = bswap_16 (u1);                                                    \
                                                                              \
+    if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000))                              \
+      {                                                                              \
+       /* Surrogate characters in UCS-2 input are not valid.  Reject         \
+          them.  (Catching this here is not security relevant.)  */          \
+       STANDARD_FROM_LOOP_ERR_HANDLER (2);                                   \
+      }                                                                              \
+                                                                             \
     put32 (outptr, u1);                                                              \
                                                                              \
     inptr += 2;                                                                      \
     outptr += 4;                                                             \
   }
+#define LOOP_NEED_FLAGS
 #define EXTRA_LOOP_DECLS \
-       , struct __gconv_step_data *step_data, int swap
+       , int swap
 #include <iconv/loop.c>