]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Issue #1160: Fix compiling large regular expressions on UCS2 builds.
authorAntoine Pitrou <solipsis@pitrou.net>
Tue, 20 Nov 2012 21:30:42 +0000 (22:30 +0100)
committerAntoine Pitrou <solipsis@pitrou.net>
Tue, 20 Nov 2012 21:30:42 +0000 (22:30 +0100)
Patch by Serhiy Storchaka.

Lib/test/test_re.py
Misc/NEWS
Modules/_sre.c
Modules/sre.h

index 0bceaa292fb40e7a78b1ff28edacac55db9ef03a..ff2c953517840be7f42c054a0e924398d13352c1 100644 (file)
@@ -425,6 +425,12 @@ class ReTests(unittest.TestCase):
         self.assertEqual(re.match(u"([\u2222\u2223])",
                                   u"\u2222", re.UNICODE).group(1), u"\u2222")
 
+    def test_big_codesize(self):
+        # Issue #1160
+        r = re.compile('|'.join(('%d'%x for x in range(10000))))
+        self.assertIsNotNone(r.match('1000'))
+        self.assertIsNotNone(r.match('9999'))
+
     def test_anyall(self):
         self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
                          "a\nb")
index b5b7a5042ce373ede439bb50adfd48a6723766bd..f69dd3c0976cbac6df9bc0fcef2a1a1fb9ed5825 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -151,6 +151,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #1160: Fix compiling large regular expressions on UCS2 builds.
+  Patch by Serhiy Storchaka.
+
 - Issue #14313: zipfile now raises NotImplementedError when the compression
   type is unknown.
 
index cd9591796283a0dfd0c99c7bcc14c172cea5c6f8..ab4f269be58ccd10c19d1f41d76566b07d4bff93 100644 (file)
@@ -2675,6 +2675,13 @@ _compile(PyObject* self_, PyObject* args)
         PyObject *o = PyList_GET_ITEM(code, i);
         unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
                                               : PyLong_AsUnsignedLong(o);
+        if (value == (unsigned long)-1 && PyErr_Occurred()) {
+            if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
+                PyErr_SetString(PyExc_OverflowError,
+                                "regular expression code size limit exceeded");
+            }
+            break;
+        }
         self->code[i] = (SRE_CODE) value;
         if ((unsigned long) self->code[i] != value) {
             PyErr_SetString(PyExc_OverflowError,
@@ -3035,10 +3042,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
                 GET_ARG; max = arg;
                 if (min > max)
                     FAIL;
-#ifdef Py_UNICODE_WIDE
                 if (max > 65535)
                     FAIL;
-#endif
                 if (!_validate_inner(code, code+skip-4, groups))
                     FAIL;
                 code += skip-4;
@@ -3056,10 +3061,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
                 GET_ARG; max = arg;
                 if (min > max)
                     FAIL;
-#ifdef Py_UNICODE_WIDE
                 if (max > 65535)
                     FAIL;
-#endif
                 if (!_validate_inner(code, code+skip-3, groups))
                     FAIL;
                 code += skip-3;
index d4af05c045e2065d78d3fba38d77a9e5d2e65792..9bfdf7fdfcdab2f5081894e5ce9b07377e2e8bab 100644 (file)
 #include "sre_constants.h"
 
 /* size of a code word (must be unsigned short or larger, and
-   large enough to hold a Py_UNICODE character) */
-#ifdef Py_UNICODE_WIDE
+   large enough to hold a UCS4 character) */
 #define SRE_CODE Py_UCS4
-#else
-#define SRE_CODE unsigned short
-#endif
 
 typedef struct {
     PyObject_VAR_HEAD