Issue #19327: Fixed the working of regular expressions with too big charset.

author Serhiy Storchaka <storchaka@gmail.com>

Thu, 24 Oct 2013 19:02:58 +0000 (22:02 +0300)

committer Serhiy Storchaka <storchaka@gmail.com>

Thu, 24 Oct 2013 19:02:58 +0000 (22:02 +0300)
author Serhiy Storchaka <storchaka@gmail.com>
Thu, 24 Oct 2013 19:02:58 +0000 (22:02 +0300)
committer Serhiy Storchaka <storchaka@gmail.com>
Thu, 24 Oct 2013 19:02:58 +0000 (22:02 +0300)
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py

index b6b377f25bc742fafc6ff95bf59734a8d964d980..a80c74dc6005725be99aded605b99ae2a4161f9c 100644 (file)
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -345,7 +345,7 @@ def _optimize_unicode(charset, fixup):
      else:
          code = 'I'
      # Convert block indices to byte array of 256 bytes
-    mapping = array.array('b', mapping).tobytes()
+    mapping = array.array('B', mapping).tobytes()
      # Convert byte array to word array
      mapping = array.array(code, mapping)
      assert mapping.itemsize == _sre.CODESIZE
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py

index 2104437408df89fb46fdb7769bf80c65df85dc44..f093812442623d791741276af20a55ff548d35d3 100644 (file)
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -428,6 +428,9 @@ class ReTests(unittest.TestCase):
                                    "\u2222").group(1), "\u2222")
          self.assertEqual(re.match("([\u2222\u2223])",
                                    "\u2222", re.UNICODE).group(1), "\u2222")
+        r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
+        self.assertEqual(re.match(r,
+                                  "\uff01", re.UNICODE).group(), "\uff01")
  
      def test_big_codesize(self):
          # Issue #1160
diff --git a/Misc/NEWS b/Misc/NEWS

index 58745f9fd88d233ddfc3685f781440116770a0d6..c05bffc882d3d62deca8e177dffda73ffc3c8ed8 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -81,6 +81,8 @@ Core and Builtins
  Library
  -------
  
+- Issue #19327: Fixed the working of regular expressions with too big charset.
+
  - Issue #19350: Increasing the test coverage of macurl2path. Patch by Colin
    Williams.
  
diff --git a/Modules/_sre.c b/Modules/_sre.c

index 5bcc387977aec5aec2e5c473871f5672071ee10a..787809fd0f51aac602930241b0412c009b93fb6d 100644 (file)
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -451,7 +451,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
              count = *(set++);
  
              if (sizeof(SRE_CODE) == 2) {
-                block = ((char*)set)[ch >> 8];
+                block = ((unsigned char*)set)[ch >> 8];
                  set += 128;
                  if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
                      return ok;
@@ -461,7 +461,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
                  /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
                   * warnings when c's type supports only numbers < N+1 */
                  if (!(ch & ~65535))
-                    block = ((char*)set)[ch >> 8];
+                    block = ((unsigned char*)set)[ch >> 8];
                  else
                      block = -1;
                  set += 64;
author	Serhiy Storchaka <storchaka@gmail.com>
	Thu, 24 Oct 2013 19:02:58 +0000 (22:02 +0300)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Thu, 24 Oct 2013 19:02:58 +0000 (22:02 +0300)
Lib/sre_compile.py		patch \| blob \| blame \| history
Lib/test/test_re.py		patch \| blob \| blame \| history
Misc/NEWS		patch \| blob \| blame \| history
Modules/_sre.c		patch \| blob \| blame \| history