Backported r55839 and r61350

author Martin v. Löwis <martin@v.loewis.de>

Sat, 13 Dec 2008 13:20:46 +0000 (13:20 +0000)

committer Martin v. Löwis <martin@v.loewis.de>

Sat, 13 Dec 2008 13:20:46 +0000 (13:20 +0000)
author Martin v. Löwis <martin@v.loewis.de>
Sat, 13 Dec 2008 13:20:46 +0000 (13:20 +0000)
committer Martin v. Löwis <martin@v.loewis.de>
Sat, 13 Dec 2008 13:20:46 +0000 (13:20 +0000)
diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py

index 82632f10ebb86c86f4c20b21be5de1c90fc45a4d..bc1976b6ac96c6c7713581609244365002989e62 100644 (file)
--- a/Lib/test/test_str.py
+++ b/Lib/test/test_str.py
@@ -1,4 +1,6 @@
+
  import unittest
+import sys
  from test import test_support, string_tests
  
  
@@ -19,6 +21,15 @@ class StrTest(
          string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
          self.assertRaises(OverflowError, '%c'.__mod__, 0x1234)
  
+    def test_expandtabs_overflows_gracefully(self):
+        # This test only affects 32-bit platforms because expandtabs can only take
+        # an int as the max value, not a 64-bit C long.  If expandtabs is changed
+        # to take a 64-bit long, this test should apply to all platforms.
+        if sys.maxint > (1 << 32):
+            return
+        self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxint)
+
+
  def test_main():
      test_support.run_unittest(StrTest)
  
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index 7c3e4d63e0025844156aabe28fd40a25c4223440..d93c56cd29c96f7798654c2e71b5eb5dfcd31c32 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -756,6 +756,14 @@ class UnicodeTest(
          self.assertEqual(repr(s1()), '\\n')
          self.assertEqual(repr(s2()), '\\n')
  
+    def test_expandtabs_overflows_gracefully(self):
+        # This test only affects 32-bit platforms because expandtabs can only take
+        # an int as the max value, not a 64-bit C long.  If expandtabs is changed
+        # to take a 64-bit long, this test should apply to all platforms.
+        if sys.maxint > (1 << 32):
+            return
+        self.assertRaises(OverflowError, u't\tt\t'.expandtabs, sys.maxint)
+
  def test_main():
      test_support.run_unittest(UnicodeTest)
  
diff --git a/Misc/NEWS b/Misc/NEWS

index 7c97134d44dc503aec1aa58c97ca4c8ebebb0f5c..9dca280d641b29ab71aaec9abbef6bada79ad944 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,10 @@ What's New in Python 2.4.6c1?
  Core and builtins
  -----------------
  
+- Issue #4469: Prevent expandtabs() on string and unicode 
+  objects from causing a segfault when a large width is passed 
+  on 32-bit platforms.
+
  - Issue #4317: Fixed a crash in the imageop.rgb2rgb8() function.
  
  - Issue #4230: Fix a crash when a class has a custom __getattr__ and an
diff --git a/Objects/stringobject.c b/Objects/stringobject.c

index 8a2530aaf3797eebd5ce3e5d4769e6f080ef748e..4e4f26940fb511c292a8e49822bdef42957e924f 100644 (file)
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -2759,9 +2759,9 @@ If tabsize is not given, a tab size of 8 characters is assumed.");
  static PyObject*
  string_expandtabs(PyStringObject *self, PyObject *args)
  {
-    const char *e, *p;
+    const char *e, *p, *qe;
      char *q;
-    int i, j;
+    int i, j, incr;
      PyObject *u;
      int tabsize = 8;
  
@@ -2769,46 +2769,70 @@ string_expandtabs(PyStringObject *self, PyObject *args)
         return NULL;
  
      /* First pass: determine size of output string */
-    i = j = 0;
-    e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
+    i = 0; /* chars up to and including most recent \n or \r */
+    j = 0; /* chars since most recent \n or \r (use in tab calculations) */
+    e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
      for (p = PyString_AS_STRING(self); p < e; p++)
          if (*p == '\t') {
-           if (tabsize > 0)
-               j += tabsize - (j % tabsize);
+           if (tabsize > 0) {
+               incr = tabsize - (j % tabsize);
+               if (j > INT_MAX - incr)
+                   goto overflow1;
+               j += incr;
+            }
         }
          else {
+           if (j > INT_MAX - 1)
+               goto overflow1;
              j++;
              if (*p == '\n' || *p == '\r') {
+               if (i > INT_MAX - j)
+                   goto overflow1;
                  i += j;
                  j = 0;
              }
          }
  
+    if (i > INT_MAX - j)
+       goto overflow1;
+
      /* Second pass: create output string and fill it */
      u = PyString_FromStringAndSize(NULL, i + j);
      if (!u)
          return NULL;
  
-    j = 0;
-    q = PyString_AS_STRING(u);
+    j = 0; /* same as in first pass */
+    q = PyString_AS_STRING(u); /* next output char */
+    qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
  
      for (p = PyString_AS_STRING(self); p < e; p++)
          if (*p == '\t') {
             if (tabsize > 0) {
                 i = tabsize - (j % tabsize);
                 j += i;
-               while (i--)
+               while (i--) {
+                   if (q >= qe)
+                       goto overflow2;
                     *q++ = ' ';
+               }
             }
         }
         else {
-            j++;
+           if (q >= qe)
+               goto overflow2;
             *q++ = *p;
+            j++;
              if (*p == '\n' || *p == '\r')
                  j = 0;
          }
  
      return u;
+
+  overflow2:
+    Py_DECREF(u);
+  overflow1:
+    PyErr_SetString(PyExc_OverflowError, "new string is too long");
+    return NULL;
  }
  
  static PyObject *
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index b29e7ff5442e423cf00e85262f3480e83d941e5f..e941076bac8202d513d1c8e3eddac113dbaa6ea8 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5207,7 +5207,8 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args)
      Py_UNICODE *e;
      Py_UNICODE *p;
      Py_UNICODE *q;
-    int i, j;
+    Py_UNICODE *qe;
+    int i, j, incr;
      PyUnicodeObject *u;
      int tabsize = 8;
  
@@ -5215,46 +5216,70 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args)
         return NULL;
  
      /* First pass: determine size of output string */
-    i = j = 0;
-    e = self->str + self->length;
+    i = 0; /* chars up to and including most recent \n or \r */
+    j = 0; /* chars since most recent \n or \r (use in tab calculations) */
+    e = self->str + self->length; /* end of input */
      for (p = self->str; p < e; p++)
          if (*p == '\t') {
-           if (tabsize > 0)
-               j += tabsize - (j % tabsize);
+           if (tabsize > 0) {
+               incr = tabsize - (j % tabsize); /* cannot overflow */
+               if (j > INT_MAX - incr)
+                   goto overflow1;
+               j += incr;
+            }
         }
          else {
+           if (j > INT_MAX - 1)
+               goto overflow1;
              j++;
              if (*p == '\n' || *p == '\r') {
+               if (i > INT_MAX - j)
+                   goto overflow1;
                  i += j;
                  j = 0;
              }
          }
  
+    if (i > INT_MAX - j)
+       goto overflow1;
+
      /* Second pass: create output string and fill it */
      u = _PyUnicode_New(i + j);
      if (!u)
          return NULL;
  
-    j = 0;
-    q = u->str;
+    j = 0; /* same as in first pass */
+    q = u->str; /* next output char */
+    qe = u->str + u->length; /* end of output */
  
      for (p = self->str; p < e; p++)
          if (*p == '\t') {
             if (tabsize > 0) {
                 i = tabsize - (j % tabsize);
                 j += i;
-               while (i--)
+               while (i--) {
+                   if (q >= qe)
+                       goto overflow2;
                     *q++ = ' ';
+                }
             }
         }
         else {
-            j++;
+           if (q >= qe)
+               goto overflow2;
             *q++ = *p;
+            j++;
              if (*p == '\n' || *p == '\r')
                  j = 0;
          }
  
      return (PyObject*) u;
+
+  overflow2:
+    Py_DECREF(u);
+  overflow1:
+    PyErr_SetString(PyExc_OverflowError, "new string is too long");
+    return NULL;
  }
  
  PyDoc_STRVAR(find__doc__,
author	Martin v. Löwis <martin@v.loewis.de>
	Sat, 13 Dec 2008 13:20:46 +0000 (13:20 +0000)
committer	Martin v. Löwis <martin@v.loewis.de>
	Sat, 13 Dec 2008 13:20:46 +0000 (13:20 +0000)
Lib/test/test_str.py		patch \| blob \| blame \| history
Lib/test/test_unicode.py		patch \| blob \| blame \| history
Misc/NEWS		patch \| blob \| blame \| history
Objects/stringobject.c		patch \| blob \| blame \| history
Objects/unicodeobject.c		patch \| blob \| blame \| history