CVE-2014-6040: Crashes on invalid input in IBM gconv modules [BZ #17325]

[thirdparty/glibc.git] / iconvdata / utf-16.c
diff --git a/iconvdata/utf-16.c b/iconvdata/utf-16.c

index a7a01a83022ac152d86eafca2e1d72fd4af9aa75..31c2d6e8d9c6c1fb7589455cfd86f1105639bcab 100644 (file)
--- a/iconvdata/utf-16.c
+++ b/iconvdata/utf-16.c
@@ -1,24 +1,24 @@
  /* Conversion module for UTF-16.
-   Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
     This file is part of the GNU C Library.
     Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
  
     The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
  
     The GNU C Library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
+   Lesser General Public License for more details.
  
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
  
  #include <byteswap.h>
+#include <dlfcn.h>
  #include <gconv.h>
  #include <stddef.h>
  #include <stdint.h>
@@ -39,40 +39,48 @@
  #define MIN_NEEDED_FROM                2
  #define MAX_NEEDED_FROM                4
  #define MIN_NEEDED_TO          4
+#define ONE_DIRECTION          0
  #define FROM_DIRECTION         (dir == from_utf16)
  #define PREPARE_LOOP \
    enum direction dir = ((struct utf16_data *) step->__data)->dir;            \
    enum variant var = ((struct utf16_data *) step->__data)->var;                      \
-  int swap = ((struct utf16_data *) step->__data)->swap;                     \
-  if (FROM_DIRECTION || var == UTF_16)                                       \
+  if (__glibc_unlikely (data->__invocation_counter == 0))                    \
      {                                                                        \
-      if (data->__invocation_counter == 0)                                   \
+      if (var == UTF_16)                                                     \
         {                                                                     \
-         /* We have to find out which byte order the file is encoded in.  */ \
-         if (inptr + 2 > inend)                                              \
-           return __GCONV_EMPTY_INPUT;                                       \
+         if (FROM_DIRECTION)                                                 \
+           {                                                                 \
+             /* We have to find out which byte order the file is             \
+                encoded in.  */                                              \
+             if (inptr + 2 > inend)                                          \
+               return (inptr == inend                                        \
+                       ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);    \
                                                                               \
-         if (get16u (inptr) == BOM)                                          \
-           /* Simply ignore the BOM character.  */                           \
-           inptr += 2;                                                       \
-         else if (get16u (inptr) == BOM_OE)                                  \
+             if (get16u (inptr) == BOM)                                      \
+               /* Simply ignore the BOM character.  */                       \
+               *inptrp = inptr += 2;                                         \
+             else if (get16u (inptr) == BOM_OE)                              \
+               {                                                             \
+                 data->__flags |= __GCONV_SWAP;                              \
+                 *inptrp = inptr += 2;                                       \
+               }                                                             \
+           }                                                                 \
+         else if (!FROM_DIRECTION && !data->__internal_use)                  \
             {                                                                 \
-             ((struct utf16_data *) step->__data)->swap = 1;                 \
-             inptr += 2;                                                     \
+             /* Emit the Byte Order Mark.  */                                \
+             if (__glibc_unlikely (outbuf + 2 > outend))                     \
+               return __GCONV_FULL_OUTPUT;                                   \
+                                                                             \
+             put16u (outbuf, BOM);                                           \
+             outbuf += 2;                                                    \
             }                                                                 \
         }                                                                     \
+      else if ((var == UTF_16LE && BYTE_ORDER == BIG_ENDIAN)                 \
+              || (var == UTF_16BE && BYTE_ORDER == LITTLE_ENDIAN))           \
+       data->__flags |= __GCONV_SWAP;                                        \
      }                                                                        \
-  else if (!FROM_DIRECTION && var == UTF_16 && !data->__internal_use         \
-          && data->__invocation_counter == 0)                                \
-    {                                                                        \
-      /* Emit the Byte Order Mark.  */                                       \
-      if (outbuf + 2 > outend)                                               \
-       return __GCONV_FULL_OUTPUT;                                           \
-                                                                             \
-      put16u (outbuf, BOM);                                                  \
-      outbuf += 2;                                                           \
-    }
-#define EXTRA_LOOP_ARGS                , var, data, swap
+  const int swap = data->__flags & __GCONV_SWAP;
+#define EXTRA_LOOP_ARGS                , swap
  
  
  /* Direction of the transformation.  */
@@ -95,10 +103,10 @@ struct utf16_data
  {
    enum direction dir;
    enum variant var;
-  int swap;
  };
  
  
+extern int gconv_init (struct __gconv_step *step);
  int
  gconv_init (struct __gconv_step *step)
  {
@@ -108,39 +116,39 @@ gconv_init (struct __gconv_step *step)
    enum variant var = illegal_var;
    int result;
  
-  if (__strcasecmp (step->__from_name, "UTF-16") == 0)
+  if (__strcasecmp (step->__from_name, "UTF-16//") == 0)
      {
        dir = from_utf16;
        var = UTF_16;
      }
-  else if (__strcasecmp (step->__to_name, "UTF-16") == 0)
+  else if (__strcasecmp (step->__to_name, "UTF-16//") == 0)
      {
        dir = to_utf16;
        var = UTF_16;
      }
-  else if (__strcasecmp (step->__from_name, "UTF-16BE") == 0)
+  else if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0)
      {
        dir = from_utf16;
        var = UTF_16BE;
      }
-  else if (__strcasecmp (step->__to_name, "UTF-16BE") == 0)
+  else if (__strcasecmp (step->__to_name, "UTF-16BE//") == 0)
      {
        dir = to_utf16;
        var = UTF_16BE;
      }
-  else if (__strcasecmp (step->__from_name, "UTF-16LE") == 0)
+  else if (__strcasecmp (step->__from_name, "UTF-16LE//") == 0)
      {
        dir = from_utf16;
        var = UTF_16LE;
      }
-  else if (__strcasecmp (step->__to_name, "UTF-16LE") == 0)
+  else if (__strcasecmp (step->__to_name, "UTF-16LE//") == 0)
      {
        dir = to_utf16;
        var = UTF_16LE;
      }
  
    result = __GCONV_NOCONV;
-  if (dir != illegal_dir)
+  if (__builtin_expect (dir, to_utf16) != illegal_dir)
      {
        new_data = (struct utf16_data *) malloc (sizeof (struct utf16_data));
  
@@ -149,15 +157,12 @@ gconv_init (struct __gconv_step *step)
         {
           new_data->dir = dir;
           new_data->var = var;
-         new_data->swap = ((var == UTF_16LE && BYTE_ORDER == BIG_ENDIAN)
-                           || (var == UTF_16BE
-                               && BYTE_ORDER == LITTLE_ENDIAN));
           step->__data = new_data;
  
           if (dir == from_utf16)
             {
               step->__min_needed_from = MIN_NEEDED_FROM;
-             step->__max_needed_from = MIN_NEEDED_FROM;
+             step->__max_needed_from = MAX_NEEDED_FROM;
               step->__min_needed_to = MIN_NEEDED_TO;
               step->__max_needed_to = MIN_NEEDED_TO;
             }
@@ -166,7 +171,7 @@ gconv_init (struct __gconv_step *step)
               step->__min_needed_from = MIN_NEEDED_TO;
               step->__max_needed_from = MIN_NEEDED_TO;
               step->__min_needed_to = MIN_NEEDED_FROM;
-             step->__max_needed_to = MIN_NEEDED_FROM;
+             step->__max_needed_to = MAX_NEEDED_FROM;
             }
  
           step->__stateful = 0;
@@ -179,6 +184,7 @@ gconv_init (struct __gconv_step *step)
  }
  
  
+extern void gconv_end (struct __gconv_step *data);
  void
  gconv_end (struct __gconv_step *data)
  {
@@ -195,18 +201,31 @@ gconv_end (struct __gconv_step *data)
    {                                                                          \
      uint32_t c = get32 (inptr);                                                      \
                                                                               \
+    if (__glibc_unlikely (c >= 0xd800 && c < 0xe000))                        \
+      {                                                                              \
+       /* Surrogate characters in UCS-4 input are not valid.                 \
+          We must catch this.  If we let surrogates pass through,            \
+          attackers could make a security hole exploit by                    \
+          synthesizing any desired plane 1-16 character.  */                 \
+       result = __GCONV_ILLEGAL_INPUT;                                       \
+       if (! ignore_errors_p ())                                             \
+         break;                                                              \
+       inptr += 4;                                                           \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
+                                                                             \
      if (swap)                                                                \
        {                                                                              \
-       if (c >= 0x10000)                                                     \
+       if (__glibc_unlikely (c >= 0x10000))                                  \
           {                                                                   \
-           if (c >= 0x110000)                                                \
+           if (__glibc_unlikely (c >= 0x110000))                             \
               {                                                               \
-               result = __GCONV_ILLEGAL_INPUT;                               \
-               break;                                                        \
+               STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
               }                                                               \
                                                                               \
             /* Generate a surrogate character.  */                            \
-           if (NEED_LENGTH_TEST && outptr + 4 > outend)                      \
+           if (__glibc_unlikely (outptr + 4 > outend))                       \
               {                                                               \
                 /* Overflow in the output buffer.  */                         \
                 result = __GCONV_FULL_OUTPUT;                                 \
@@ -222,16 +241,15 @@ gconv_end (struct __gconv_step *data)
        }                                                                              \
      else                                                                     \
        {                                                                              \
-       if (c >= 0x10000)                                                     \
+       if (__glibc_unlikely (c >= 0x10000))                                  \
           {                                                                   \
-           if (c >= 0x110000)                                                \
+           if (__glibc_unlikely (c >= 0x110000))                             \
               {                                                               \
-               result = __GCONV_ILLEGAL_INPUT;                               \
-               break;                                                        \
+               STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
               }                                                               \
                                                                               \
             /* Generate a surrogate character.  */                            \
-           if (NEED_LENGTH_TEST && outptr + 4 > outend)                      \
+           if (__glibc_unlikely (outptr + 4 > outend))                       \
               {                                                               \
                 /* Overflow in the output buffer.  */                         \
                 result = __GCONV_FULL_OUTPUT;                                 \
@@ -248,8 +266,9 @@ gconv_end (struct __gconv_step *data)
      outptr += 2;                                                             \
      inptr += 4;                                                                      \
    }
+#define LOOP_NEED_FLAGS
  #define EXTRA_LOOP_DECLS \
-       , enum variant var, struct __gconv_step_data *step_data, int swap
+       , int swap
  #include <iconv/loop.c>
  
  
@@ -266,10 +285,10 @@ gconv_end (struct __gconv_step *data)
        {                                                                              \
         u1 = bswap_16 (u1);                                                   \
                                                                               \
-       if (u1 < 0xd800 || u1 > 0xdfff)                                       \
+       if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff)                 \
           {                                                                   \
             /* No surrogate.  */                                              \
-           put32 (outptr, u1);                               \
+           put32 (outptr, u1);                                               \
             inptr += 2;                                                       \
           }                                                                   \
         else                                                                  \
@@ -278,7 +297,7 @@ gconv_end (struct __gconv_step *data)
                                                                               \
             /* It's a surrogate character.  At least the first word says      \
                it is.  */                                                     \
-           if (NEED_LENGTH_TEST && inptr + 4 > inend)                        \
+           if (__glibc_unlikely (inptr + 4 > inend))                         \
               {                                                               \
                 /* We don't have enough input for another complete input      \
                    character.  */                                             \
@@ -288,12 +307,12 @@ gconv_end (struct __gconv_step *data)
                                                                               \
             inptr += 2;                                                       \
             u2 = bswap_16 (get16 (inptr));                                    \
-           if (u2 < 0xdc00 || u2 >= 0xdfff)                                  \
+           if (__builtin_expect (u2 < 0xdc00, 0)                             \
+               || __builtin_expect (u2 > 0xdfff, 0))                         \
               {                                                               \
                 /* This is no valid second word for a surrogate.  */          \
-               result = __GCONV_ILLEGAL_INPUT;                               \
                 inptr -= 2;                                                   \
-               break;                                                        \
+               STANDARD_FROM_LOOP_ERR_HANDLER (2);                           \
               }                                                               \
                                                                               \
             put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00));            \
@@ -302,7 +321,7 @@ gconv_end (struct __gconv_step *data)
        }                                                                              \
      else                                                                     \
        {                                                                              \
-       if (u1 < 0xd800 || u1 > 0xdfff)                                       \
+       if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff)                 \
           {                                                                   \
             /* No surrogate.  */                                              \
             put32 (outptr, u1);                                               \
@@ -310,11 +329,9 @@ gconv_end (struct __gconv_step *data)
           }                                                                   \
         else                                                                  \
           {                                                                   \
-           uint16_t u2;                                                      \
-                                                                             \
             /* It's a surrogate character.  At least the first word says      \
                it is.  */                                                     \
-           if (NEED_LENGTH_TEST && inptr + 4 > inend)                        \
+           if (__glibc_unlikely (inptr + 4 > inend))                         \
               {                                                               \
                 /* We don't have enough input for another complete input      \
                    character.  */                                             \
@@ -323,13 +340,13 @@ gconv_end (struct __gconv_step *data)
               }                                                               \
                                                                               \
             inptr += 2;                                                       \
-           u2 = get16 (inptr);                                               \
-           if (u2 < 0xdc00 || u2 >= 0xdfff)                                  \
+           uint16_t u2 = get16 (inptr);                                      \
+           if (__builtin_expect (u2 < 0xdc00, 0)                             \
+               || __builtin_expect (u2 > 0xdfff, 0))                         \
               {                                                               \
                 /* This is no valid second word for a surrogate.  */          \
-               result = __GCONV_ILLEGAL_INPUT;                               \
                 inptr -= 2;                                                   \
-               break;                                                        \
+               STANDARD_FROM_LOOP_ERR_HANDLER (2);                           \
               }                                                               \
                                                                               \
             put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00));            \
@@ -338,8 +355,9 @@ gconv_end (struct __gconv_step *data)
        }                                                                              \
      outptr += 4;                                                             \
    }
+#define LOOP_NEED_FLAGS
  #define EXTRA_LOOP_DECLS \
-       , enum variant var, struct __gconv_step_data *step_data, int swap
+       , int swap
  #include <iconv/loop.c>