From: Benjamin Peterson Date: Mon, 29 Sep 2014 22:50:06 +0000 (-0400) Subject: merge 3.3 (closes #22518) X-Git-Tag: v3.4.3rc1~601 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2b76ce6d27c5395d88e7aef3a2bab811afc5d8cb;p=thirdparty%2FPython%2Fcpython.git merge 3.3 (closes #22518) --- 2b76ce6d27c5395d88e7aef3a2bab811afc5d8cb diff --cc Objects/unicodeobject.c index d9c131cff9f8,07832bacfa01..3da09ef92c0c --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@@ -4086,118 -4162,71 +4086,127 @@@ unicode_decode_call_errorhandler_wchar goto onError; } - if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) { - /* need more space? (at least enough for what we - have+the replacement+the rest of the string (starting - at the new input position), so we won't have to check space - when there are no errors in the rest of the string) */ - Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode); - requiredsize = *outpos; - if (requiredsize > PY_SSIZE_T_MAX - replen) - goto overflow; - requiredsize += replen; - if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) - goto overflow; - requiredsize += insize - newpos; - if (requiredsize > outsize) { - if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) - requiredsize = 2*outsize; - if (unicode_resize(output, requiredsize) < 0) - goto onError; - } - if (unicode_widen(output, *outpos, - PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0) - goto onError; - _PyUnicode_FastCopyCharacters(*output, *outpos, repunicode, 0, replen); - *outpos += replen; - } - else { - wchar_t *repwstr; - Py_ssize_t repwlen; - repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen); - if (repwstr == NULL) + repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen); + if (repwstr == NULL) + goto onError; + /* need more space? (at least enough for what we + have+the replacement+the rest of the string (starting + at the new input position), so we won't have to check space + when there are no errors in the rest of the string) */ - requiredsize = *outpos + repwlen + insize-newpos; ++ requiredsize = *outpos; ++ if (requiredsize > PY_SSIZE_T_MAX - repwlen) ++ goto overflow; ++ requiredsize += repwlen; ++ if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) ++ goto overflow; ++ requiredsize += insize - newpos; + if (requiredsize > outsize) { - if (requiredsize < 2*outsize) ++ if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) + requiredsize = 2*outsize; + if (unicode_resize(output, requiredsize) < 0) goto onError; - /* need more space? (at least enough for what we - have+the replacement+the rest of the string (starting - at the new input position), so we won't have to check space - when there are no errors in the rest of the string) */ - requiredsize = *outpos; - if (requiredsize > PY_SSIZE_T_MAX - repwlen) - goto overflow; - requiredsize += repwlen; - if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) - goto overflow; - requiredsize += insize - newpos; - if (requiredsize > outsize) { - if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) - requiredsize = 2*outsize; - if (unicode_resize(output, requiredsize) < 0) - goto onError; - } - wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen); - *outpos += repwlen; } + wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen); + *outpos += repwlen; - *endinpos = newpos; *inptr = *input + newpos; /* we made it! */ - res = 0; - - onError: Py_XDECREF(restuple); - return res; + return 0; + overflow: + PyErr_SetString(PyExc_OverflowError, + "decoded result is too long for a Python string"); - goto onError; ++ + onError: + Py_XDECREF(restuple); + return -1; +} +#endif /* HAVE_MBCS */ + +static int +unicode_decode_call_errorhandler_writer( + const char *errors, PyObject **errorHandler, + const char *encoding, const char *reason, + const char **input, const char **inend, Py_ssize_t *startinpos, + Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr, + _PyUnicodeWriter *writer /* PyObject **output, Py_ssize_t *outpos */) +{ + static char *argparse = "O!n;decoding error handler must return (str, int) tuple"; + + PyObject *restuple = NULL; + PyObject *repunicode = NULL; + Py_ssize_t insize; + Py_ssize_t newpos; + Py_ssize_t replen; + PyObject *inputobj = NULL; + + if (*errorHandler == NULL) { + *errorHandler = PyCodec_LookupError(errors); + if (*errorHandler == NULL) + goto onError; + } + + make_decode_exception(exceptionObject, + encoding, + *input, *inend - *input, + *startinpos, *endinpos, + reason); + if (*exceptionObject == NULL) + goto onError; + + restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL); + if (restuple == NULL) + goto onError; + if (!PyTuple_Check(restuple)) { + PyErr_SetString(PyExc_TypeError, &argparse[4]); + goto onError; + } + if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos)) + goto onError; + + /* Copy back the bytes variables, which might have been modified by the + callback */ + inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject); + if (!inputobj) + goto onError; + if (!PyBytes_Check(inputobj)) { + PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes"); + } + *input = PyBytes_AS_STRING(inputobj); + insize = PyBytes_GET_SIZE(inputobj); + *inend = *input + insize; + /* we can DECREF safely, as the exception has another reference, + so the object won't go away. */ + Py_DECREF(inputobj); + + if (newpos<0) + newpos = insize+newpos; + if (newpos<0 || newpos>insize) { + PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos); + goto onError; + } + + if (PyUnicode_READY(repunicode) < 0) + goto onError; + replen = PyUnicode_GET_LENGTH(repunicode); + writer->min_length += replen; + if (replen > 1) + writer->overallocate = 1; + if (_PyUnicodeWriter_WriteStr(writer, repunicode) == -1) + goto onError; + + *endinpos = newpos; + *inptr = *input + newpos; + + /* we made it! */ + Py_XDECREF(restuple); + return 0; + + onError: + Py_XDECREF(restuple); + return -1; } /* --- UTF-7 Codec -------------------------------------------------------- */