]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.6] bpo-30375: Correct the stacklevel of regex compiling warnings. (GH-1595) (...
authorSerhiy Storchaka <storchaka@gmail.com>
Tue, 16 May 2017 15:16:15 +0000 (18:16 +0300)
committerGitHub <noreply@github.com>
Tue, 16 May 2017 15:16:15 +0000 (18:16 +0300)
Warnings emitted when compile a regular expression now always point
to the line in the user code.  Previously they could point into inners
of the re module if emitted from inside of groups or conditionals..
(cherry picked from commit c7ac7280c321b3c1679fe5f657a6be0f86adf173)

Lib/sre_parse.py
Lib/test/test_re.py
Misc/NEWS

index e1c749634b71fe79b35c7dc9eac61e9122b7ff26..608f9a26642f05f3e645a6d0783669b3fedf1d34 100644 (file)
@@ -404,7 +404,7 @@ def _escape(source, escape, state):
         pass
     raise source.error("bad escape %s" % escape, len(escape))
 
-def _parse_sub(source, state, verbose, nested=True):
+def _parse_sub(source, state, verbose, nested):
     # parse an alternation: a|b|c
 
     items = []
@@ -412,7 +412,8 @@ def _parse_sub(source, state, verbose, nested=True):
     sourcematch = source.match
     start = source.tell()
     while True:
-        itemsappend(_parse(source, state, verbose, not nested and not items))
+        itemsappend(_parse(source, state, verbose, nested + 1,
+                           not nested and not items))
         if not sourcematch("|"):
             break
 
@@ -454,10 +455,10 @@ def _parse_sub(source, state, verbose, nested=True):
     subpattern.append((BRANCH, (None, items)))
     return subpattern
 
-def _parse_sub_cond(source, state, condgroup, verbose):
-    item_yes = _parse(source, state, verbose)
+def _parse_sub_cond(source, state, condgroup, verbose, nested):
+    item_yes = _parse(source, state, verbose, nested + 1)
     if source.match("|"):
-        item_no = _parse(source, state, verbose)
+        item_no = _parse(source, state, verbose, nested + 1)
         if source.next == "|":
             raise source.error("conditional backref with more than two branches")
     else:
@@ -466,7 +467,7 @@ def _parse_sub_cond(source, state, condgroup, verbose):
     subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
     return subpattern
 
-def _parse(source, state, verbose, first=False):
+def _parse(source, state, verbose, nested, first=False):
     # parse a simple pattern
     subpattern = SubPattern(state)
 
@@ -692,7 +693,7 @@ def _parse(source, state, verbose, first=False):
                         lookbehindgroups = state.lookbehindgroups
                         if lookbehindgroups is None:
                             state.lookbehindgroups = state.groups
-                    p = _parse_sub(source, state, verbose)
+                    p = _parse_sub(source, state, verbose, nested + 1)
                     if dir < 0:
                         if lookbehindgroups is None:
                             state.lookbehindgroups = None
@@ -739,7 +740,7 @@ def _parse(source, state, verbose, first=False):
                                     source.string[:20],  # truncate long regexes
                                     ' (truncated)' if len(source.string) > 20 else '',
                                 ),
-                                DeprecationWarning, stacklevel=7
+                                DeprecationWarning, stacklevel=nested + 6
                             )
                         if (state.flags & SRE_FLAG_VERBOSE) and not verbose:
                             raise Verbose
@@ -757,11 +758,11 @@ def _parse(source, state, verbose, first=False):
                 except error as err:
                     raise source.error(err.msg, len(name) + 1) from None
             if condgroup:
-                p = _parse_sub_cond(source, state, condgroup, verbose)
+                p = _parse_sub_cond(source, state, condgroup, verbose, nested + 1)
             else:
                 sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and
                                not (del_flags & SRE_FLAG_VERBOSE))
-                p = _parse_sub(source, state, sub_verbose)
+                p = _parse_sub(source, state, sub_verbose, nested + 1)
             if not source.match(")"):
                 raise source.error("missing ), unterminated subpattern",
                                    source.tell() - start)
@@ -851,7 +852,7 @@ def parse(str, flags=0, pattern=None):
     pattern.str = str
 
     try:
-        p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, False)
+        p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, 0)
     except Verbose:
         # the VERBOSE flag was switched on inside the pattern.  to be
         # on the safe side, we'll parse the whole thing again...
@@ -859,7 +860,7 @@ def parse(str, flags=0, pattern=None):
         pattern.flags = flags | SRE_FLAG_VERBOSE
         pattern.str = str
         source.seek(0)
-        p = _parse_sub(source, pattern, True, False)
+        p = _parse_sub(source, pattern, True, 0)
 
     p.pattern.flags = fix_flags(str, p.pattern.flags)
 
index c52417ba000ebca8d3c1cf23f682e909f0887d9b..e88d0b3dcf2a78a73c0705c42699175757003e2f 100644 (file)
@@ -1348,6 +1348,7 @@ class ReTests(unittest.TestCase):
             str(warns.warnings[0].message),
             'Flags not at the start of the expression %s' % p
         )
+        self.assertEqual(warns.warnings[0].filename, __file__)
 
         p = upper_char + '(?i)%s' % ('.?' * 100)
         with self.assertWarns(DeprecationWarning) as warns:
@@ -1356,6 +1357,7 @@ class ReTests(unittest.TestCase):
             str(warns.warnings[0].message),
             'Flags not at the start of the expression %s (truncated)' % p[:20]
         )
+        self.assertEqual(warns.warnings[0].filename, __file__)
 
         with self.assertWarns(DeprecationWarning):
             self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char))
@@ -1367,14 +1369,23 @@ class ReTests(unittest.TestCase):
             self.assertTrue(re.match('^(?i)' + upper_char, lower_char))
         with self.assertWarns(DeprecationWarning):
             self.assertTrue(re.match('$|(?i)' + upper_char, lower_char))
-        with self.assertWarns(DeprecationWarning):
+        with self.assertWarns(DeprecationWarning) as warns:
             self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char))
-        with self.assertWarns(DeprecationWarning):
+        self.assertRegex(str(warns.warnings[0].message),
+                         'Flags not at the start')
+        self.assertEqual(warns.warnings[0].filename, __file__)
+        with self.assertWarns(DeprecationWarning) as warns:
             self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')',
                                          lower_char))
-        with self.assertWarns(DeprecationWarning):
+        self.assertRegex(str(warns.warnings[0].message),
+                         'Flags not at the start')
+        self.assertEqual(warns.warnings[0].filename, __file__)
+        with self.assertWarns(DeprecationWarning) as warns:
             self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')',
                                          lower_char))
+        self.assertRegex(str(warns.warnings[0].message),
+                         'Flags not at the start')
+        self.assertEqual(warns.warnings[0].filename, __file__)
 
 
     def test_dollar_matches_twice(self):
index bfb14c136531110d502e353311ab91ef608f4ba6..567f40976e2cdea38f1d93a979b29959eefcbecd 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -36,6 +36,10 @@ Core and Builtins
 Library
 -------
 
+- bpo-30375: Warnings emitted when compile a regular expression now always
+  point to the line in the user code.  Previously they could point into inners
+  of the re module if emitted from inside of groups or conditionals.
+
 - bpo-30048: Fixed ``Task.cancel()`` can be ignored when the task is
   running coroutine and the coroutine returned without any more ``await``.