From: Serhiy Storchaka Date: Sun, 3 Feb 2013 15:07:32 +0000 (+0200) Subject: Issue #17106: Fix a segmentation fault in io.TextIOWrapper when an underlying X-Git-Tag: v3.3.1rc1~236^2 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d03ce4ae3d71ba84e1e30cf0cf221ec89ad5aa67;p=thirdparty%2FPython%2Fcpython.git Issue #17106: Fix a segmentation fault in io.TextIOWrapper when an underlying stream or a decoder produces data of an unexpected type (i.e. when io.TextIOWrapper initialized with text stream or use bytes-to-bytes codec). --- d03ce4ae3d71ba84e1e30cf0cf221ec89ad5aa67 diff --cc Modules/_io/textio.c index ffaa945b557b,a8231bebb65d..cff9c6e9372d --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@@ -257,6 -236,21 +257,25 @@@ incrementalnewlinedecoder_dealloc(nldec Py_TYPE(self)->tp_free((PyObject *)self); } + static int + check_decoded(PyObject *decoded) + { + if (decoded == NULL) + return -1; + if (!PyUnicode_Check(decoded)) { + PyErr_Format(PyExc_TypeError, + "decoder should return a string result, not '%.200s'", + Py_TYPE(decoded)->tp_name); + Py_DECREF(decoded); + return -1; + } ++ if (PyUnicode_READY(decoded) < 0) { ++ Py_DECREF(decoded); ++ return -1; ++ } + return 0; + } + #define SEEN_CR 1 #define SEEN_LF 2 #define SEEN_CRLF 4 @@@ -286,35 -280,21 +305,26 @@@ _PyIncrementalNewlineDecoder_decode(PyO Py_INCREF(output); } - if (output == NULL) + if (check_decoded(output) < 0) return NULL; - if (!PyUnicode_Check(output)) { - PyErr_SetString(PyExc_TypeError, - "decoder should return a string result"); - goto error; - } - - if (PyUnicode_READY(output) == -1) - goto error; - - output_len = PyUnicode_GET_SIZE(output); + output_len = PyUnicode_GET_LENGTH(output); if (self->pendingcr && (final || output_len > 0)) { - Py_UNICODE *out; - PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1); + /* Prefix output with CR */ + int kind; + PyObject *modified; + char *out; + + modified = PyUnicode_New(output_len + 1, + PyUnicode_MAX_CHAR_VALUE(output)); if (modified == NULL) goto error; - out = PyUnicode_AS_UNICODE(modified); - out[0] = '\r'; - memcpy(out + 1, PyUnicode_AS_UNICODE(output), - output_len * sizeof(Py_UNICODE)); + kind = PyUnicode_KIND(modified); + out = PyUnicode_DATA(modified); + PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r'); + memcpy(out + kind, PyUnicode_DATA(output), kind * output_len); Py_DECREF(output); - output = modified; + output = modified; /* output remains ready */ self->pendingcr = 0; output_len++; } @@@ -1458,10 -1463,15 +1468,16 @@@ textiowrapper_read_chunk(textio *self, Py_DECREF(chunk_size); if (input_chunk == NULL) goto fail; - assert(PyBytes_Check(input_chunk)); + if (!PyBytes_Check(input_chunk)) { + PyErr_Format(PyExc_TypeError, + "underlying %s() should have returned a bytes object, " + "not '%.200s'", (self->has_read1 ? "read1": "read"), + Py_TYPE(input_chunk)->tp_name); + goto fail; + } - eof = (PyBytes_Size(input_chunk) == 0); + nbytes = PyBytes_Size(input_chunk); + eof = (nbytes == 0); if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) { decoded_chars = _PyIncrementalNewlineDecoder_decode( @@@ -1472,18 -1482,10 +1488,15 @@@ _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL); } - /* TODO sanity check: isinstance(decoded_chars, unicode) */ - if (decoded_chars == NULL) - goto fail; - if (PyUnicode_READY(decoded_chars) == -1) + if (check_decoded(decoded_chars) < 0) goto fail; textiowrapper_set_decoded_chars(self, decoded_chars); - if (PyUnicode_GET_SIZE(decoded_chars) > 0) + nchars = PyUnicode_GET_LENGTH(decoded_chars); + if (nchars > 0) + self->b2cratio = (double) nbytes / nchars; + else + self->b2cratio = 0.0; + if (nchars > 0) eof = 0; if (self->telling) { @@@ -1534,15 -1543,10 +1554,15 @@@ textiowrapper_read(textio *self, PyObje PyObject *decoded; if (bytes == NULL) goto fail; - decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode, - bytes, Py_True, NULL); + + if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) + decoded = _PyIncrementalNewlineDecoder_decode(self->decoder, + bytes, 1); + else + decoded = PyObject_CallMethodObjArgs( + self->decoder, _PyIO_str_decode, bytes, Py_True, NULL); Py_DECREF(bytes); - if (decoded == NULL) + if (check_decoded(decoded) < 0) goto fail; result = textiowrapper_get_decoded_chars(self, -1); @@@ -2159,15 -2159,11 +2186,11 @@@ textiowrapper_seek(textio *self, PyObje goto fail; } - decoded = PyObject_CallMethod(self->decoder, "decode", - "Oi", input_chunk, (int)cookie.need_eof); + decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode, + "Oi", input_chunk, (int)cookie.need_eof); - if (decoded == NULL) + if (check_decoded(decoded) < 0) goto fail; - if (PyUnicode_READY(decoded) == -1) { - Py_DECREF(decoded); - goto fail; - } textiowrapper_set_decoded_chars(self, decoded); @@@ -2271,74 -2265,13 +2294,72 @@@ textiowrapper_tell(textio *self, PyObje if (saved_state == NULL) goto fail; - /* Note our initial start point. */ - if (_textiowrapper_decoder_setstate(self, &cookie) < 0) - goto fail; +#define DECODER_GETSTATE() do { \ + PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \ + _PyIO_str_getstate, NULL); \ + if (_state == NULL) \ + goto fail; \ + if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \ + Py_DECREF(_state); \ + goto fail; \ + } \ + Py_DECREF(_state); \ + } while (0) + - /* TODO: replace assert with exception */ +#define DECODER_DECODE(start, len, res) do { \ + PyObject *_decoded = _PyObject_CallMethodId( \ + self->decoder, &PyId_decode, "y#", start, len); \ - if (_decoded == NULL) \ ++ if (check_decoded(_decoded) < 0) \ + goto fail; \ - assert (PyUnicode_Check(_decoded)); \ + res = PyUnicode_GET_LENGTH(_decoded); \ + Py_DECREF(_decoded); \ + } while (0) + + /* Fast search for an acceptable start point, close to our + current pos */ + skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip); + skip_back = 1; + assert(skip_back <= PyBytes_GET_SIZE(next_input)); + input = PyBytes_AS_STRING(next_input); + while (skip_bytes > 0) { + /* Decode up to temptative start point */ + if (_textiowrapper_decoder_setstate(self, &cookie) < 0) + goto fail; + DECODER_DECODE(input, skip_bytes, chars_decoded); + if (chars_decoded <= chars_to_skip) { + DECODER_GETSTATE(); + if (dec_buffer_len == 0) { + /* Before pos and no bytes buffered in decoder => OK */ + cookie.dec_flags = dec_flags; + chars_to_skip -= chars_decoded; + break; + } + /* Skip back by buffered amount and reset heuristic */ + skip_bytes -= dec_buffer_len; + skip_back = 1; + } + else { + /* We're too far ahead, skip back a bit */ + skip_bytes -= skip_back; + skip_back *= 2; + } + } + if (skip_bytes <= 0) { + skip_bytes = 0; + if (_textiowrapper_decoder_setstate(self, &cookie) < 0) + goto fail; + } - /* Feed the decoder one byte at a time. As we go, note the - * nearest "safe start point" before the current location - * (a point where the decoder has nothing buffered, so seek() + /* Note our initial start point. */ + cookie.start_pos += skip_bytes; + cookie.chars_to_skip = chars_to_skip; + if (chars_to_skip == 0) + goto finally; + + /* We should be close to the desired position. Now feed the decoder one + * byte at a time until we reach the `chars_to_skip` target. + * As we go, note the nearest "safe start point" before the current + * location (a point where the decoder has nothing buffered, so seek() * can safely start from there and advance to this location). */ chars_decoded = 0; @@@ -2368,12 -2316,11 +2389,11 @@@ } if (input == input_end) { /* We didn't get enough decoded data; signal EOF to get more. */ - PyObject *decoded = PyObject_CallMethod( - self->decoder, "decode", "yi", "", /* final = */ 1); + PyObject *decoded = _PyObject_CallMethodId( + self->decoder, &PyId_decode, "yi", "", /* final = */ 1); - if (decoded == NULL) + if (check_decoded(decoded) < 0) goto fail; - assert (PyUnicode_Check(decoded)); - chars_decoded += PyUnicode_GET_SIZE(decoded); + chars_decoded += PyUnicode_GET_LENGTH(decoded); Py_DECREF(decoded); cookie.need_eof = 1;