From 79ba8e53aa19c76bd77345fd4782ad6b3372eaf8 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Tue, 22 Aug 2006 08:25:33 +0000 Subject: [PATCH] Backport rev 51448: - Patch #1541585: fix buffer overrun when performing repr() on a unicode string in a build with wide unicode (UCS-4) support. --- Lib/test/test_unicode.py | 4 ++++ Misc/ACKS | 1 + Misc/NEWS | 5 +++++ Objects/unicodeobject.c | 37 +++++++++++++++++++++++++------------ 4 files changed, 35 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index f70da9d0ab8c..7c3e4d63e002 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -92,6 +92,10 @@ class UnicodeTest( "\\xfe\\xff'") testrepr = repr(u''.join(map(unichr, xrange(256)))) self.assertEqual(testrepr, latin1repr) + # Test repr works on wide unicode escapes without overflow. + self.assertEqual(repr(u"\U00010000" * 39 + u"\uffff" * 4096), + repr(u"\U00010000" * 39 + u"\uffff" * 4096)) + def test_count(self): string_tests.CommonTest.test_count(self) diff --git a/Misc/ACKS b/Misc/ACKS index e1ebb9a891f6..8806c801afd5 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -348,6 +348,7 @@ Detlef Lannert Soren Larsen Piers Lauder Ben Laurie +Simon Law Chris Lawrence Christopher Lee Inyeol Lee diff --git a/Misc/NEWS b/Misc/NEWS index 4cb07a87b227..dd8cdf610eab 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 2.4.4c1? Core and builtins ----------------- +- Patch #1541585: fix buffer overrun when performing repr() on + a unicode string in a build with wide unicode (UCS-4) support. + - Bug #1536786: buffer comparison could emit a RuntimeWarning. - Bug #1535165: fixed a segfault in input() and raw_input() when @@ -33,6 +36,7 @@ Core and builtins - Patch #1488312, Fix memory alignment problem on SPARC in unicode + Extension Modules ----------------- @@ -72,6 +76,7 @@ Extension Modules methods now allow their database parameter to be None as the sleepycat API allows. + Library ------- diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7c69d68a9d33..bb6a7cbf515b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1970,7 +1970,28 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, static const char *hexdigit = "0123456789abcdef"; - repr = PyString_FromStringAndSize(NULL, 2 + 6*size + 1); + /* Initial allocation is based on the longest-possible unichr + escape. + + In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source + unichr, so in this case it's the longest unichr escape. In + narrow (UTF-16) builds this is five chars per source unichr + since there are two unichrs in the surrogate pair, so in narrow + (UTF-16) builds it's not the longest unichr escape. + + In wide or narrow builds '\uxxxx' is 6 chars per source unichr, + so in the narrow (UTF-16) build case it's the longest unichr + escape. + */ + + repr = PyString_FromStringAndSize(NULL, + 2 +#ifdef Py_UNICODE_WIDE + + 10*size +#else + + 6*size +#endif + + 1); if (repr == NULL) return NULL; @@ -1995,15 +2016,6 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, #ifdef Py_UNICODE_WIDE /* Map 21-bit characters to '\U00xxxxxx' */ else if (ch >= 0x10000) { - int offset = p - PyString_AS_STRING(repr); - - /* Resize the string if necessary */ - if (offset + 12 > PyString_GET_SIZE(repr)) { - if (_PyString_Resize(&repr, PyString_GET_SIZE(repr) + 100)) - return NULL; - p = PyString_AS_STRING(repr) + offset; - } - *p++ = '\\'; *p++ = 'U'; *p++ = hexdigit[(ch >> 28) & 0x0000000F]; @@ -2016,8 +2028,8 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, *p++ = hexdigit[ch & 0x0000000F]; continue; } -#endif - /* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */ +#else + /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */ else if (ch >= 0xD800 && ch < 0xDC00) { Py_UNICODE ch2; Py_UCS4 ucs; @@ -2042,6 +2054,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, s--; size++; } +#endif /* Map 16-bit characters to '\uxxxx' */ if (ch >= 256) { -- 2.47.3