Backport rev 51448:

author Georg Brandl <georg@python.org>

Tue, 22 Aug 2006 08:25:33 +0000 (08:25 +0000)

committer Georg Brandl <georg@python.org>

Tue, 22 Aug 2006 08:25:33 +0000 (08:25 +0000)
author Georg Brandl <georg@python.org>
Tue, 22 Aug 2006 08:25:33 +0000 (08:25 +0000)
committer Georg Brandl <georg@python.org>
Tue, 22 Aug 2006 08:25:33 +0000 (08:25 +0000)
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index f70da9d0ab8c8e17aac85abd7b420df5cbce14f0..7c3e4d63e0025844156aabe28fd40a25c4223440 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -92,6 +92,10 @@ class UnicodeTest(
                  "\\xfe\\xff'")
              testrepr = repr(u''.join(map(unichr, xrange(256))))
              self.assertEqual(testrepr, latin1repr)
+            # Test repr works on wide unicode escapes without overflow.
+            self.assertEqual(repr(u"\U00010000" * 39 + u"\uffff" * 4096),
+                             repr(u"\U00010000" * 39 + u"\uffff" * 4096))
+
  
      def test_count(self):
          string_tests.CommonTest.test_count(self)
diff --git a/Misc/ACKS b/Misc/ACKS

index e1ebb9a891f6f5368edf3c5475c139fc24c735ce..8806c801afd509bc5019ca67d8b6d452b1d2d557 100644 (file)
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -348,6 +348,7 @@ Detlef Lannert
  Soren Larsen
  Piers Lauder
  Ben Laurie
+Simon Law
  Chris Lawrence
  Christopher Lee
  Inyeol Lee
diff --git a/Misc/NEWS b/Misc/NEWS

index 4cb07a87b2274c36cd220457dadaab772825e18c..dd8cdf610eab5c5555b1f416e137055c463524e2 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 2.4.4c1?
  Core and builtins
  -----------------
  
+- Patch #1541585: fix buffer overrun when performing repr() on
+  a unicode string in a build with wide unicode (UCS-4) support.
+
  - Bug #1536786: buffer comparison could emit a RuntimeWarning.
  
  - Bug #1535165: fixed a segfault in input() and raw_input() when
@@ -33,6 +36,7 @@ Core and builtins
  
  - Patch #1488312, Fix memory alignment problem on SPARC in unicode
  
+
  Extension Modules
  -----------------
  
@@ -72,6 +76,7 @@ Extension Modules
    methods now allow their database parameter to be None as the
    sleepycat API allows.
  
+
  Library
  -------
  
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 7c69d68a9d33589b848200208a7659f2e85c7f9f..bb6a7cbf515bba8806f0ad556ba7b894eb8dbabf 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1970,7 +1970,28 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
  
      static const char *hexdigit = "0123456789abcdef";
  
-    repr = PyString_FromStringAndSize(NULL, 2 + 6*size + 1);
+    /* Initial allocation is based on the longest-possible unichr
+       escape.
+
+       In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source
+       unichr, so in this case it's the longest unichr escape. In
+       narrow (UTF-16) builds this is five chars per source unichr
+       since there are two unichrs in the surrogate pair, so in narrow
+       (UTF-16) builds it's not the longest unichr escape.
+
+       In wide or narrow builds '\uxxxx' is 6 chars per source unichr,
+       so in the narrow (UTF-16) build case it's the longest unichr
+       escape.
+    */
+
+    repr = PyString_FromStringAndSize(NULL,
+        2
+#ifdef Py_UNICODE_WIDE
+        + 10*size
+#else
+        + 6*size
+#endif
+        + 1);
      if (repr == NULL)
          return NULL;
  
@@ -1995,15 +2016,6 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
  #ifdef Py_UNICODE_WIDE
          /* Map 21-bit characters to '\U00xxxxxx' */
          else if (ch >= 0x10000) {
-           int offset = p - PyString_AS_STRING(repr);
-
-           /* Resize the string if necessary */
-           if (offset + 12 > PyString_GET_SIZE(repr)) {
-               if (_PyString_Resize(&repr, PyString_GET_SIZE(repr) + 100))
-                   return NULL;
-               p = PyString_AS_STRING(repr) + offset;
-           }
-
              *p++ = '\\';
              *p++ = 'U';
              *p++ = hexdigit[(ch >> 28) & 0x0000000F];
@@ -2016,8 +2028,8 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
              *p++ = hexdigit[ch & 0x0000000F];
             continue;
          }
-#endif
-       /* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */
+#else
+       /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
         else if (ch >= 0xD800 && ch < 0xDC00) {
             Py_UNICODE ch2;
             Py_UCS4 ucs;
@@ -2042,6 +2054,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
             s--;
             size++;
         }
+#endif
  
          /* Map 16-bit characters to '\uxxxx' */
          if (ch >= 256) {
author	Georg Brandl <georg@python.org>
	Tue, 22 Aug 2006 08:25:33 +0000 (08:25 +0000)
committer	Georg Brandl <georg@python.org>
	Tue, 22 Aug 2006 08:25:33 +0000 (08:25 +0000)
Lib/test/test_unicode.py		patch \| blob \| blame \| history
Misc/ACKS		patch \| blob \| blame \| history
Misc/NEWS		patch \| blob \| blame \| history
Objects/unicodeobject.c		patch \| blob \| blame \| history