]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.10] gh-98740: Fix validation of conditional expressions in RE (GH-98764) (GH-99046)
authorSerhiy Storchaka <storchaka@gmail.com>
Thu, 3 Nov 2022 10:18:50 +0000 (12:18 +0200)
committerGitHub <noreply@github.com>
Thu, 3 Nov 2022 10:18:50 +0000 (12:18 +0200)
In very rare circumstances the JUMP opcode could be confused with the
argument of the opcode in the "then" part which doesn't end with the
JUMP opcode. This led to incorrect detection of the final JUMP opcode
and incorrect calculation of the size of the subexpression.

NOTE: Changed return value of functions _validate_inner() and
_validate_charset() in Modules/_sre/sre.c.  Now they return 0 on success,
-1 on failure, and 1 if the last op is JUMP (which usually is a failure).
Previously they returned 1 on success and 0 on failure.
(cherry picked from commit e9ac890c0273aee413aa528cc202c3efa29f1d7a)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Doc/library/re.rst
Lib/test/test_re.py
Misc/NEWS.d/next/Library/2022-10-27-12-56-38.gh-issue-98740.ZoqqGM.rst [new file with mode: 0644]
Modules/_sre.c

index e7d1f0560b2ab8b0174e4f2b0bb1130722d4bfdb..40e2dccaae3fac73fe997a8b7b6dbf3d23854ad2 100644 (file)
@@ -421,6 +421,9 @@ The special characters are:
    some fixed length.  Patterns which start with negative lookbehind assertions may
    match at the beginning of the string being searched.
 
+.. _re-conditional-expression:
+.. index:: single: (?(; in regular expressions
+
 ``(?(id/name)yes-pattern|no-pattern)``
    Will try to match with ``yes-pattern`` if the group with given *id* or
    *name* exists, and with ``no-pattern`` if it doesn't. ``no-pattern`` is
index 62bfc3a7aa43b565e77d411ec319e25029dd0dfa..010c52e75c91c372a62caab9a57393656c0df84b 100644 (file)
@@ -578,6 +578,11 @@ class ReTests(unittest.TestCase):
         self.checkPatternError(r'()(?(2)a)',
                                "invalid group reference 2", 5)
 
+    def test_re_groupref_exists_validation_bug(self):
+        for i in range(256):
+            with self.subTest(code=i):
+                re.compile(r'()(?(1)\x%02x?)' % i)
+
     def test_re_groupref_overflow(self):
         from sre_constants import MAXGROUPS
         self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
diff --git a/Misc/NEWS.d/next/Library/2022-10-27-12-56-38.gh-issue-98740.ZoqqGM.rst b/Misc/NEWS.d/next/Library/2022-10-27-12-56-38.gh-issue-98740.ZoqqGM.rst
new file mode 100644 (file)
index 0000000..887d506
--- /dev/null
@@ -0,0 +1,3 @@
+Fix internal error in the :mod:`re` module which in very rare circumstances
+prevented compilation of a regular expression containing a :ref:`conditional
+expression <re-conditional-expression>` without the "else" branch.
index 911626dafc977f0ffb26b67fc25d8d9e224035b2..2dfbf854db0d4f8e713484d9bdda7b642ef7e6b8 100644 (file)
@@ -1519,7 +1519,7 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
 #endif
 
 /* Report failure */
-#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
+#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
 
 /* Extract opcode, argument, or skip count from code array */
 #define GET_OP                                          \
@@ -1543,7 +1543,7 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
         skip = *code;                                   \
         VTRACE(("%lu (skip to %p)\n",                   \
                (unsigned long)skip, code+skip));        \
-        if (skip-adj > (uintptr_t)(end - code))      \
+        if (skip-adj > (uintptr_t)(end - code))         \
             FAIL;                                       \
         code++;                                         \
     } while (0)
@@ -1632,9 +1632,10 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
         }
     }
 
-    return 1;
+    return 0;
 }
 
+/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
 static int
 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
 {
@@ -1712,7 +1713,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
         case SRE_OP_IN_LOC_IGNORE:
             GET_SKIP;
             /* Stop 1 before the end; we check the FAILURE below */
-            if (!_validate_charset(code, code+skip-2))
+            if (_validate_charset(code, code+skip-2))
                 FAIL;
             if (code[skip-2] != SRE_OP_FAILURE)
                 FAIL;
@@ -1766,7 +1767,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
                 }
                 /* Validate the charset */
                 if (flags & SRE_INFO_CHARSET) {
-                    if (!_validate_charset(code, newcode-1))
+                    if (_validate_charset(code, newcode-1))
                         FAIL;
                     if (newcode[-1] != SRE_OP_FAILURE)
                         FAIL;
@@ -1787,7 +1788,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
                     if (skip == 0)
                         break;
                     /* Stop 2 before the end; we check the JUMP below */
-                    if (!_validate_inner(code, code+skip-3, groups))
+                    if (_validate_inner(code, code+skip-3, groups))
                         FAIL;
                     code += skip-3;
                     /* Check that it ends with a JUMP, and that each JUMP
@@ -1801,6 +1802,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
                     else if (code+skip-1 != target)
                         FAIL;
                 }
+                if (code != target)
+                    FAIL;
             }
             break;
 
@@ -1815,7 +1818,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
                     FAIL;
                 if (max > SRE_MAXREPEAT)
                     FAIL;
-                if (!_validate_inner(code, code+skip-4, groups))
+                if (_validate_inner(code, code+skip-4, groups))
                     FAIL;
                 code += skip-4;
                 GET_OP;
@@ -1834,7 +1837,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
                     FAIL;
                 if (max > SRE_MAXREPEAT)
                     FAIL;
-                if (!_validate_inner(code, code+skip-3, groups))
+                if (_validate_inner(code, code+skip-3, groups))
                     FAIL;
                 code += skip-3;
                 GET_OP;
@@ -1886,24 +1889,17 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
                to allow arbitrary jumps anywhere in the code; so we just look
                for a JUMP opcode preceding our skip target.
             */
-            if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
-                code[skip-3] == SRE_OP_JUMP)
-            {
-                VTRACE(("both then and else parts present\n"));
-                if (!_validate_inner(code+1, code+skip-3, groups))
-                    FAIL;
+            VTRACE(("then part:\n"));
+            int rc = _validate_inner(code+1, code+skip-1, groups);
+            if (rc == 1) {
+                VTRACE(("else part:\n"));
                 code += skip-2; /* Position after JUMP, at <skipno> */
                 GET_SKIP;
-                if (!_validate_inner(code, code+skip-1, groups))
-                    FAIL;
-                code += skip-1;
-            }
-            else {
-                VTRACE(("only a then part present\n"));
-                if (!_validate_inner(code+1, code+skip-1, groups))
-                    FAIL;
-                code += skip-1;
+                rc = _validate_inner(code, code+skip-1, groups);
             }
+            if (rc)
+                FAIL;
+            code += skip-1;
             break;
 
         case SRE_OP_ASSERT:
@@ -1914,7 +1910,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
             if (arg & 0x80000000)
                 FAIL; /* Width too large */
             /* Stop 1 before the end; we check the SUCCESS below */
-            if (!_validate_inner(code+1, code+skip-2, groups))
+            if (_validate_inner(code+1, code+skip-2, groups))
                 FAIL;
             code += skip-2;
             GET_OP;
@@ -1922,6 +1918,12 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
                 FAIL;
             break;
 
+        case SRE_OP_JUMP:
+            if (code + 1 != end)
+                FAIL;
+            VTRACE(("JUMP: %d\n", __LINE__));
+            return 1;
+
         default:
             FAIL;
 
@@ -1929,7 +1931,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
     }
 
     VTRACE(("okay\n"));
-    return 1;
+    return 0;
 }
 
 static int
@@ -1944,7 +1946,7 @@ _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
 static int
 _validate(PatternObject *self)
 {
-    if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
+    if (_validate_outer(self->code, self->code+self->codesize, self->groups))
     {
         PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
         return 0;