From: Martin v. Löwis Date: Sat, 13 Dec 2008 13:20:46 +0000 (+0000) Subject: Backported r55839 and r61350 X-Git-Tag: v2.4.6c1~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9b8de84a89601b24bbff56c33b2a4581cfa8c72a;p=thirdparty%2FPython%2Fcpython.git Backported r55839 and r61350 Issue #4469: Prevent expandtabs() on string and unicode objects from causing a segfault when a large width is passed on 32-bit platforms. --- diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index 82632f10ebb8..bc1976b6ac96 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -1,4 +1,6 @@ + import unittest +import sys from test import test_support, string_tests @@ -19,6 +21,15 @@ class StrTest( string_tests.MixinStrUnicodeUserStringTest.test_formatting(self) self.assertRaises(OverflowError, '%c'.__mod__, 0x1234) + def test_expandtabs_overflows_gracefully(self): + # This test only affects 32-bit platforms because expandtabs can only take + # an int as the max value, not a 64-bit C long. If expandtabs is changed + # to take a 64-bit long, this test should apply to all platforms. + if sys.maxint > (1 << 32): + return + self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxint) + + def test_main(): test_support.run_unittest(StrTest) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 7c3e4d63e002..d93c56cd29c9 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -756,6 +756,14 @@ class UnicodeTest( self.assertEqual(repr(s1()), '\\n') self.assertEqual(repr(s2()), '\\n') + def test_expandtabs_overflows_gracefully(self): + # This test only affects 32-bit platforms because expandtabs can only take + # an int as the max value, not a 64-bit C long. If expandtabs is changed + # to take a 64-bit long, this test should apply to all platforms. + if sys.maxint > (1 << 32): + return + self.assertRaises(OverflowError, u't\tt\t'.expandtabs, sys.maxint) + def test_main(): test_support.run_unittest(UnicodeTest) diff --git a/Misc/NEWS b/Misc/NEWS index 7c97134d44dc..9dca280d641b 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,10 @@ What's New in Python 2.4.6c1? Core and builtins ----------------- +- Issue #4469: Prevent expandtabs() on string and unicode + objects from causing a segfault when a large width is passed + on 32-bit platforms. + - Issue #4317: Fixed a crash in the imageop.rgb2rgb8() function. - Issue #4230: Fix a crash when a class has a custom __getattr__ and an diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 8a2530aaf379..4e4f26940fb5 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -2759,9 +2759,9 @@ If tabsize is not given, a tab size of 8 characters is assumed."); static PyObject* string_expandtabs(PyStringObject *self, PyObject *args) { - const char *e, *p; + const char *e, *p, *qe; char *q; - int i, j; + int i, j, incr; PyObject *u; int tabsize = 8; @@ -2769,46 +2769,70 @@ string_expandtabs(PyStringObject *self, PyObject *args) return NULL; /* First pass: determine size of output string */ - i = j = 0; - e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); + i = 0; /* chars up to and including most recent \n or \r */ + j = 0; /* chars since most recent \n or \r (use in tab calculations) */ + e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */ for (p = PyString_AS_STRING(self); p < e; p++) if (*p == '\t') { - if (tabsize > 0) - j += tabsize - (j % tabsize); + if (tabsize > 0) { + incr = tabsize - (j % tabsize); + if (j > INT_MAX - incr) + goto overflow1; + j += incr; + } } else { + if (j > INT_MAX - 1) + goto overflow1; j++; if (*p == '\n' || *p == '\r') { + if (i > INT_MAX - j) + goto overflow1; i += j; j = 0; } } + if (i > INT_MAX - j) + goto overflow1; + /* Second pass: create output string and fill it */ u = PyString_FromStringAndSize(NULL, i + j); if (!u) return NULL; - j = 0; - q = PyString_AS_STRING(u); + j = 0; /* same as in first pass */ + q = PyString_AS_STRING(u); /* next output char */ + qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */ for (p = PyString_AS_STRING(self); p < e; p++) if (*p == '\t') { if (tabsize > 0) { i = tabsize - (j % tabsize); j += i; - while (i--) + while (i--) { + if (q >= qe) + goto overflow2; *q++ = ' '; + } } } else { - j++; + if (q >= qe) + goto overflow2; *q++ = *p; + j++; if (*p == '\n' || *p == '\r') j = 0; } return u; + + overflow2: + Py_DECREF(u); + overflow1: + PyErr_SetString(PyExc_OverflowError, "new string is too long"); + return NULL; } static PyObject * diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b29e7ff5442e..e941076bac82 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5207,7 +5207,8 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args) Py_UNICODE *e; Py_UNICODE *p; Py_UNICODE *q; - int i, j; + Py_UNICODE *qe; + int i, j, incr; PyUnicodeObject *u; int tabsize = 8; @@ -5215,46 +5216,70 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args) return NULL; /* First pass: determine size of output string */ - i = j = 0; - e = self->str + self->length; + i = 0; /* chars up to and including most recent \n or \r */ + j = 0; /* chars since most recent \n or \r (use in tab calculations) */ + e = self->str + self->length; /* end of input */ for (p = self->str; p < e; p++) if (*p == '\t') { - if (tabsize > 0) - j += tabsize - (j % tabsize); + if (tabsize > 0) { + incr = tabsize - (j % tabsize); /* cannot overflow */ + if (j > INT_MAX - incr) + goto overflow1; + j += incr; + } } else { + if (j > INT_MAX - 1) + goto overflow1; j++; if (*p == '\n' || *p == '\r') { + if (i > INT_MAX - j) + goto overflow1; i += j; j = 0; } } + if (i > INT_MAX - j) + goto overflow1; + /* Second pass: create output string and fill it */ u = _PyUnicode_New(i + j); if (!u) return NULL; - j = 0; - q = u->str; + j = 0; /* same as in first pass */ + q = u->str; /* next output char */ + qe = u->str + u->length; /* end of output */ for (p = self->str; p < e; p++) if (*p == '\t') { if (tabsize > 0) { i = tabsize - (j % tabsize); j += i; - while (i--) + while (i--) { + if (q >= qe) + goto overflow2; *q++ = ' '; + } } } else { - j++; + if (q >= qe) + goto overflow2; *q++ = *p; + j++; if (*p == '\n' || *p == '\r') j = 0; } return (PyObject*) u; + + overflow2: + Py_DECREF(u); + overflow1: + PyErr_SetString(PyExc_OverflowError, "new string is too long"); + return NULL; } PyDoc_STRVAR(find__doc__,