]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
gconv: Fix assertion failure in ISO-2022-JP-3 module (bug 27256)
authorFlorian Weimer <fweimer@redhat.com>
Wed, 27 Jan 2021 12:36:12 +0000 (13:36 +0100)
committerFlorian Weimer <fweimer@redhat.com>
Wed, 27 Jan 2021 14:51:17 +0000 (15:51 +0100)
The conversion loop to the internal encoding does not follow
the interface contract that __GCONV_FULL_OUTPUT is only returned
after the internal wchar_t buffer has been filled completely.  This
is enforced by the first of the two asserts in iconv/skeleton.c:

      /* We must run out of output buffer space in this
 rerun.  */
      assert (outbuf == outerr);
      assert (nstatus == __GCONV_FULL_OUTPUT);

This commit solves this issue by queuing a second wide character
which cannot be written immediately in the state variable, like
other converters already do (e.g., BIG5-HKSCS or TSCII).

Reported-by: Tavis Ormandy <taviso@gmail.com>
(cherry picked from commit 7d88c6142c6efc160c0ee5e4f85cde382c072888)

iconvdata/Makefile
iconvdata/bug-iconv14.c [new file with mode: 0644]
iconvdata/iso-2022-jp-3.c

index 06e161d9b8f6711879908359c8de99b4c23c0888..36dd5d12c38d775fae4d422de7ce929d67b6aada 100644 (file)
@@ -73,7 +73,7 @@ modules.so := $(addsuffix .so, $(modules))
 ifeq (yes,$(build-shared))
 tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
        tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
-       bug-iconv10 bug-iconv11 bug-iconv12
+       bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv14
 ifeq ($(have-thread-library),yes)
 tests += bug-iconv3
 endif
@@ -316,6 +316,8 @@ $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \
                          $(addprefix $(objpfx),$(modules.so))
 $(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \
                          $(addprefix $(objpfx),$(modules.so))
+$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \
+                         $(addprefix $(objpfx),$(modules.so))
 
 $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \
                         $(addprefix $(objpfx),$(modules.so)) \
diff --git a/iconvdata/bug-iconv14.c b/iconvdata/bug-iconv14.c
new file mode 100644 (file)
index 0000000..902f140
--- /dev/null
@@ -0,0 +1,127 @@
+/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256).
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <iconv.h>
+#include <string.h>
+#include <errno.h>
+#include <support/check.h>
+
+/* Use an escape sequence to return to the initial state.  */
+static void
+with_escape_sequence (void)
+{
+  iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
+  TEST_VERIFY_EXIT (c != (iconv_t) -1);
+
+  char in[] = "\e$(O+D\e(B";
+  char *inbuf = in;
+  size_t inleft = strlen (in);
+  char out[3];                  /* Space for one output character.  */
+  char *outbuf;
+  size_t outleft;
+
+  outbuf = out;
+  outleft = sizeof (out);
+  TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
+  TEST_COMPARE (errno, E2BIG);
+  TEST_COMPARE (inleft, 3);
+  TEST_COMPARE (inbuf - in, strlen (in) - 3);
+  TEST_COMPARE (outleft, sizeof (out) - 2);
+  TEST_COMPARE (outbuf - out, 2);
+  TEST_COMPARE (out[0] & 0xff, 0xc3);
+  TEST_COMPARE (out[1] & 0xff, 0xa6);
+
+  /* Return to the initial shift state, producing the pending
+     character.  */
+  outbuf = out;
+  outleft = sizeof (out);
+  TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0);
+  TEST_COMPARE (inleft, 0);
+  TEST_COMPARE (inbuf - in, strlen (in));
+  TEST_COMPARE (outleft, sizeof (out) - 2);
+  TEST_COMPARE (outbuf - out, 2);
+  TEST_COMPARE (out[0] & 0xff, 0xcc);
+  TEST_COMPARE (out[1] & 0xff, 0x80);
+
+  /* Nothing should be flushed the second time.  */
+  outbuf = out;
+  outleft = sizeof (out);
+  TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
+  TEST_COMPARE (outleft, sizeof (out));
+  TEST_COMPARE (outbuf - out, 0);
+  TEST_COMPARE (out[0] & 0xff, 0xcc);
+  TEST_COMPARE (out[1] & 0xff, 0x80);
+
+  TEST_COMPARE (iconv_close (c), 0);
+}
+
+/* Use an explicit flush to return to the initial state.  */
+static void
+with_flush (void)
+{
+  iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
+  TEST_VERIFY_EXIT (c != (iconv_t) -1);
+
+  char in[] = "\e$(O+D";
+  char *inbuf = in;
+  size_t inleft = strlen (in);
+  char out[3];                  /* Space for one output character.  */
+  char *outbuf;
+  size_t outleft;
+
+  outbuf = out;
+  outleft = sizeof (out);
+  TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
+  TEST_COMPARE (errno, E2BIG);
+  TEST_COMPARE (inleft, 0);
+  TEST_COMPARE (inbuf - in, strlen (in));
+  TEST_COMPARE (outleft, sizeof (out) - 2);
+  TEST_COMPARE (outbuf - out, 2);
+  TEST_COMPARE (out[0] & 0xff, 0xc3);
+  TEST_COMPARE (out[1] & 0xff, 0xa6);
+
+  /* Flush the pending character.  */
+  outbuf = out;
+  outleft = sizeof (out);
+  TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
+  TEST_COMPARE (outleft, sizeof (out) - 2);
+  TEST_COMPARE (outbuf - out, 2);
+  TEST_COMPARE (out[0] & 0xff, 0xcc);
+  TEST_COMPARE (out[1] & 0xff, 0x80);
+
+  /* Nothing should be flushed the second time.  */
+  outbuf = out;
+  outleft = sizeof (out);
+  TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
+  TEST_COMPARE (outleft, sizeof (out));
+  TEST_COMPARE (outbuf - out, 0);
+  TEST_COMPARE (out[0] & 0xff, 0xcc);
+  TEST_COMPARE (out[1] & 0xff, 0x80);
+
+  TEST_COMPARE (iconv_close (c), 0);
+}
+
+static int
+do_test (void)
+{
+  with_escape_sequence ();
+  with_flush ();
+  return 0;
+}
+
+#include <support/test-driver.c>
index de259580c3f378bbaf36702a67a752e8edc8fddd..047fab8e8dfbde7eb2e54e19403cb43125566304 100644 (file)
@@ -67,23 +67,34 @@ enum
   CURRENT_SEL_MASK = 7 << 3
 };
 
-/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state
-   also contains the last two bytes to be output, shifted by 6 bits, and a
-   one-bit indicator whether they must be preceded by the shift sequence,
-   in bit 22.  */
+/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the
+   state also contains the last two bytes to be output, shifted by 6
+   bits, and a one-bit indicator whether they must be preceded by the
+   shift sequence, in bit 22.  During ISO-2022-JP-3 to UCS-4
+   conversion, COUNT may also contain a non-zero pending wide
+   character, shifted by six bits.  This happens for certain inputs in
+   JISX0213_1_2004_set and JISX0213_2_set if the second wide character
+   in a combining sequence cannot be written because the buffer is
+   full.  */
 
 /* Since this is a stateful encoding we have to provide code which resets
    the output state to the initial state.  This has to be done during the
    flushing.  */
 #define EMIT_SHIFT_TO_INIT \
-  if ((data->__statep->__count & ~7) != ASCII_set)                           \
+  if (data->__statep->__count != ASCII_set)                          \
     {                                                                        \
       if (FROM_DIRECTION)                                                    \
        {                                                                     \
-         /* It's easy, we don't have to emit anything, we just reset the     \
-            state for the input.  */                                         \
-         data->__statep->__count &= 7;                                       \
-         data->__statep->__count |= ASCII_set;                               \
+         if (__glibc_likely (outbuf + 4 <= outend))                          \
+           {                                                                 \
+             /* Write out the last character.  */                            \
+             *((uint32_t *) outbuf) = data->__statep->__count >> 6;          \
+             outbuf += sizeof (uint32_t);                                    \
+             data->__statep->__count = ASCII_set;                      \
+           }                                                                 \
+         else                                                                \
+           /* We don't have enough room in the output buffer.  */            \
+           status = __GCONV_FULL_OUTPUT;                                     \
        }                                                                     \
       else                                                                   \
        {                                                                     \
@@ -151,7 +162,21 @@ enum
 #define LOOPFCT                        FROM_LOOP
 #define BODY \
   {                                                                          \
-    uint32_t ch = *inptr;                                                    \
+    uint32_t ch;                                                             \
+                                                                             \
+    /* Output any pending character.  */                                     \
+    ch = set >> 6;                                                           \
+    if (__glibc_unlikely (ch != 0))                                          \
+      {                                                                              \
+       put32 (outptr, ch);                                                   \
+       outptr += 4;                                                          \
+       /* Remove the pending character, but preserve state bits.  */         \
+       set &= (1 << 6) - 1;                                                  \
+       continue;                                                             \
+      }                                                                              \
+                                                                             \
+    /* Otherwise read the next input byte.  */                               \
+    ch = *inptr;                                                             \
                                                                              \
     /* Recognize escape sequences.  */                                       \
     if (__glibc_unlikely (ch == ESC))                                        \
@@ -297,21 +322,25 @@ enum
            uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];             \
            uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];             \
                                                                              \
+           inptr += 2;                                                       \
+                                                                             \
+           put32 (outptr, u1);                                               \
+           outptr += 4;                                                      \
+                                                                             \
            /* See whether we have room for two characters.  */               \
-           if (outptr + 8 <= outend)                                         \
+           if (outptr + 4 <= outend)                                         \
              {                                                               \
-               inptr += 2;                                                   \
-               put32 (outptr, u1);                                           \
-               outptr += 4;                                                  \
                put32 (outptr, u2);                                           \
                outptr += 4;                                                  \
                continue;                                                     \
              }                                                               \
-           else                                                              \
-             {                                                               \
-               result = __GCONV_FULL_OUTPUT;                                 \
-               break;                                                        \
-             }                                                               \
+                                                                             \
+           /* Otherwise store only the first character now, and              \
+              put the second one into the queue.  */                         \
+           set |= u2 << 6;                                                   \
+           /* Tell the caller why we terminate the loop.  */                 \
+           result = __GCONV_FULL_OUTPUT;                                     \
+           break;                                                            \
          }                                                                   \
                                                                              \
        inptr += 2;                                                           \