From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sun, 3 Feb 2013 15:07:32 +0000 (+0200)
Subject: Issue #17106: Fix a segmentation fault in io.TextIOWrapper when an underlying
X-Git-Tag: v3.3.1rc1~236^2
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d03ce4ae3d71ba84e1e30cf0cf221ec89ad5aa67;p=thirdparty%2FPython%2Fcpython.git

Issue #17106: Fix a segmentation fault in io.TextIOWrapper when an underlying
stream or a decoder produces data of an unexpected type (i.e. when
io.TextIOWrapper initialized with text stream or use bytes-to-bytes codec).
---

d03ce4ae3d71ba84e1e30cf0cf221ec89ad5aa67
diff --cc Modules/_io/textio.c
index ffaa945b557b,a8231bebb65d..cff9c6e9372d
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@@ -257,6 -236,21 +257,25 @@@ incrementalnewlinedecoder_dealloc(nldec
      Py_TYPE(self)->tp_free((PyObject *)self);
  }
  
+ static int
+ check_decoded(PyObject *decoded)
+ {
+     if (decoded == NULL)
+         return -1;
+     if (!PyUnicode_Check(decoded)) {
+         PyErr_Format(PyExc_TypeError,
+                      "decoder should return a string result, not '%.200s'",
+                      Py_TYPE(decoded)->tp_name);
+         Py_DECREF(decoded);
+         return -1;
+     }
++    if (PyUnicode_READY(decoded) < 0) {
++        Py_DECREF(decoded);
++        return -1;
++    }
+     return 0;
+ }
+ 
  #define SEEN_CR   1
  #define SEEN_LF   2
  #define SEEN_CRLF 4
@@@ -286,35 -280,21 +305,26 @@@ _PyIncrementalNewlineDecoder_decode(PyO
          Py_INCREF(output);
      }
  
-     if (output == NULL)
+     if (check_decoded(output) < 0)
          return NULL;
  
-     if (!PyUnicode_Check(output)) {
-         PyErr_SetString(PyExc_TypeError,
-                         "decoder should return a string result");
-         goto error;
-     }
- 
-     if (PyUnicode_READY(output) == -1)
-         goto error;
- 
 -    output_len = PyUnicode_GET_SIZE(output);
 +    output_len = PyUnicode_GET_LENGTH(output);
      if (self->pendingcr && (final || output_len > 0)) {
 -        Py_UNICODE *out;
 -        PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
 +        /* Prefix output with CR */
 +        int kind;
 +        PyObject *modified;
 +        char *out;
 +
 +        modified = PyUnicode_New(output_len + 1,
 +                                 PyUnicode_MAX_CHAR_VALUE(output));
          if (modified == NULL)
              goto error;
 -        out = PyUnicode_AS_UNICODE(modified);
 -        out[0] = '\r';
 -        memcpy(out + 1, PyUnicode_AS_UNICODE(output),
 -               output_len * sizeof(Py_UNICODE));
 +        kind = PyUnicode_KIND(modified);
 +        out = PyUnicode_DATA(modified);
 +        PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
 +        memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
          Py_DECREF(output);
 -        output = modified;
 +        output = modified; /* output remains ready */
          self->pendingcr = 0;
          output_len++;
      }
@@@ -1458,10 -1463,15 +1468,16 @@@ textiowrapper_read_chunk(textio *self, 
      Py_DECREF(chunk_size);
      if (input_chunk == NULL)
          goto fail;
-     assert(PyBytes_Check(input_chunk));
+     if (!PyBytes_Check(input_chunk)) {
+         PyErr_Format(PyExc_TypeError,
+                      "underlying %s() should have returned a bytes object, "
+                      "not '%.200s'", (self->has_read1 ? "read1": "read"),
+                      Py_TYPE(input_chunk)->tp_name);
+         goto fail;
+     }
  
 -    eof = (PyBytes_Size(input_chunk) == 0);
 +    nbytes = PyBytes_Size(input_chunk);
 +    eof = (nbytes == 0);
  
      if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
          decoded_chars = _PyIncrementalNewlineDecoder_decode(
@@@ -1472,18 -1482,10 +1488,15 @@@
              _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
      }
  
-     /* TODO sanity check: isinstance(decoded_chars, unicode) */
-     if (decoded_chars == NULL)
-         goto fail;
-     if (PyUnicode_READY(decoded_chars) == -1)
+     if (check_decoded(decoded_chars) < 0)
          goto fail;
      textiowrapper_set_decoded_chars(self, decoded_chars);
 -    if (PyUnicode_GET_SIZE(decoded_chars) > 0)
 +    nchars = PyUnicode_GET_LENGTH(decoded_chars);
 +    if (nchars > 0)
 +        self->b2cratio = (double) nbytes / nchars;
 +    else
 +        self->b2cratio = 0.0;
 +    if (nchars > 0)
          eof = 0;
  
      if (self->telling) {
@@@ -1534,15 -1543,10 +1554,15 @@@ textiowrapper_read(textio *self, PyObje
          PyObject *decoded;
          if (bytes == NULL)
              goto fail;
 -        decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
 -                                             bytes, Py_True, NULL);
 +
 +        if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
 +            decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
 +                                                          bytes, 1);
 +        else
 +            decoded = PyObject_CallMethodObjArgs(
 +                self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
          Py_DECREF(bytes);
-         if (decoded == NULL)
+         if (check_decoded(decoded) < 0)
              goto fail;
  
          result = textiowrapper_get_decoded_chars(self, -1);
@@@ -2159,15 -2159,11 +2186,11 @@@ textiowrapper_seek(textio *self, PyObje
              goto fail;
          }
  
 -        decoded = PyObject_CallMethod(self->decoder, "decode",
 -                                      "Oi", input_chunk, (int)cookie.need_eof);
 +        decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
 +            "Oi", input_chunk, (int)cookie.need_eof);
  
-         if (decoded == NULL)
+         if (check_decoded(decoded) < 0)
              goto fail;
-         if (PyUnicode_READY(decoded) == -1) {
-             Py_DECREF(decoded);
-             goto fail;
-         }
  
          textiowrapper_set_decoded_chars(self, decoded);
  
@@@ -2271,74 -2265,13 +2294,72 @@@ textiowrapper_tell(textio *self, PyObje
      if (saved_state == NULL)
          goto fail;
  
 -    /* Note our initial start point. */
 -    if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
 -        goto fail;
 +#define DECODER_GETSTATE() do { \
 +        PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
 +            _PyIO_str_getstate, NULL); \
 +        if (_state == NULL) \
 +            goto fail; \
 +        if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
 +            Py_DECREF(_state); \
 +            goto fail; \
 +        } \
 +        Py_DECREF(_state); \
 +    } while (0)
 +
-     /* TODO: replace assert with exception */
 +#define DECODER_DECODE(start, len, res) do { \
 +        PyObject *_decoded = _PyObject_CallMethodId( \
 +            self->decoder, &PyId_decode, "y#", start, len); \
-         if (_decoded == NULL) \
++        if (check_decoded(_decoded) < 0) \
 +            goto fail; \
-         assert (PyUnicode_Check(_decoded)); \
 +        res = PyUnicode_GET_LENGTH(_decoded); \
 +        Py_DECREF(_decoded); \
 +    } while (0)
 +
 +    /* Fast search for an acceptable start point, close to our
 +       current pos */
 +    skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
 +    skip_back = 1;
 +    assert(skip_back <= PyBytes_GET_SIZE(next_input));
 +    input = PyBytes_AS_STRING(next_input);
 +    while (skip_bytes > 0) {
 +        /* Decode up to temptative start point */
 +        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
 +            goto fail;
 +        DECODER_DECODE(input, skip_bytes, chars_decoded);
 +        if (chars_decoded <= chars_to_skip) {
 +            DECODER_GETSTATE();
 +            if (dec_buffer_len == 0) {
 +                /* Before pos and no bytes buffered in decoder => OK */
 +                cookie.dec_flags = dec_flags;
 +                chars_to_skip -= chars_decoded;
 +                break;
 +            }
 +            /* Skip back by buffered amount and reset heuristic */
 +            skip_bytes -= dec_buffer_len;
 +            skip_back = 1;
 +        }
 +        else {
 +            /* We're too far ahead, skip back a bit */
 +            skip_bytes -= skip_back;
 +            skip_back *= 2;
 +        }
 +    }
 +    if (skip_bytes <= 0) {
 +        skip_bytes = 0;
 +        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
 +            goto fail;
 +    }
  
 -    /* Feed the decoder one byte at a time.  As we go, note the
 -     * nearest "safe start point" before the current location
 -     * (a point where the decoder has nothing buffered, so seek()
 +    /* Note our initial start point. */
 +    cookie.start_pos += skip_bytes;
 +    cookie.chars_to_skip = chars_to_skip;
 +    if (chars_to_skip == 0)
 +        goto finally;
 +
 +    /* We should be close to the desired position.  Now feed the decoder one
 +     * byte at a time until we reach the `chars_to_skip` target.
 +     * As we go, note the nearest "safe start point" before the current
 +     * location (a point where the decoder has nothing buffered, so seek()
       * can safely start from there and advance to this location).
       */
      chars_decoded = 0;
@@@ -2368,12 -2316,11 +2389,11 @@@
      }
      if (input == input_end) {
          /* We didn't get enough decoded data; signal EOF to get more. */
 -        PyObject *decoded = PyObject_CallMethod(
 -            self->decoder, "decode", "yi", "", /* final = */ 1);
 +        PyObject *decoded = _PyObject_CallMethodId(
 +            self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
-         if (decoded == NULL)
+         if (check_decoded(decoded) < 0)
              goto fail;
-         assert (PyUnicode_Check(decoded));
 -        chars_decoded += PyUnicode_GET_SIZE(decoded);
 +        chars_decoded += PyUnicode_GET_LENGTH(decoded);
          Py_DECREF(decoded);
          cookie.need_eof = 1;