From: Serhiy Storchaka Date: Thu, 24 Oct 2013 19:02:58 +0000 (+0300) Subject: Issue #19327: Fixed the working of regular expressions with too big charset. X-Git-Tag: v3.3.3rc1~23 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=be80fc9a843e3c51d1030d3eab52d6287e5aef3a;p=thirdparty%2FPython%2Fcpython.git Issue #19327: Fixed the working of regular expressions with too big charset. --- diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index b6b377f25bc7..a80c74dc6005 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -345,7 +345,7 @@ def _optimize_unicode(charset, fixup): else: code = 'I' # Convert block indices to byte array of 256 bytes - mapping = array.array('b', mapping).tobytes() + mapping = array.array('B', mapping).tobytes() # Convert byte array to word array mapping = array.array(code, mapping) assert mapping.itemsize == _sre.CODESIZE diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 2104437408df..f09381244262 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -428,6 +428,9 @@ class ReTests(unittest.TestCase): "\u2222").group(1), "\u2222") self.assertEqual(re.match("([\u2222\u2223])", "\u2222", re.UNICODE).group(1), "\u2222") + r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255))) + self.assertEqual(re.match(r, + "\uff01", re.UNICODE).group(), "\uff01") def test_big_codesize(self): # Issue #1160 diff --git a/Misc/NEWS b/Misc/NEWS index 58745f9fd88d..c05bffc882d3 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -81,6 +81,8 @@ Core and Builtins Library ------- +- Issue #19327: Fixed the working of regular expressions with too big charset. + - Issue #19350: Increasing the test coverage of macurl2path. Patch by Colin Williams. diff --git a/Modules/_sre.c b/Modules/_sre.c index 5bcc387977ae..787809fd0f51 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -451,7 +451,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch) count = *(set++); if (sizeof(SRE_CODE) == 2) { - block = ((char*)set)[ch >> 8]; + block = ((unsigned char*)set)[ch >> 8]; set += 128; if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15))) return ok; @@ -461,7 +461,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch) /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids * warnings when c's type supports only numbers < N+1 */ if (!(ch & ~65535)) - block = ((char*)set)[ch >> 8]; + block = ((unsigned char*)set)[ch >> 8]; else block = -1; set += 64;