Issue #22493: Inline flags now should be used only at the start of the

author Serhiy Storchaka <storchaka@gmail.com>

Sun, 11 Sep 2016 09:50:02 +0000 (12:50 +0300)

committer Serhiy Storchaka <storchaka@gmail.com>

Sun, 11 Sep 2016 09:50:02 +0000 (12:50 +0300)
author Serhiy Storchaka <storchaka@gmail.com>
Sun, 11 Sep 2016 09:50:02 +0000 (12:50 +0300)
committer Serhiy Storchaka <storchaka@gmail.com>
Sun, 11 Sep 2016 09:50:02 +0000 (12:50 +0300)
diff --git a/Doc/library/re.rst b/Doc/library/re.rst

index 5297f0b52deaadf250b452d52b38adf4949ccfb2..87cd5536010c79c84a4850d2a2279dd839825755 100644 (file)
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -224,12 +224,8 @@ The special characters are:
     flags are described in :ref:`contents-of-module-re`.) This
     is useful if you wish to include the flags as part of the regular
     expression, instead of passing a *flag* argument to the
-   :func:`re.compile` function.
-
-   Note that the ``(?x)`` flag changes how the expression is parsed. It should be
-   used first in the expression string, or after one or more whitespace characters.
-   If there are non-whitespace characters before the flag, the results are
-   undefined.
+   :func:`re.compile` function.  Flags should be used first in the
+   expression string.
  
  ``(?:...)``
     A non-capturing version of regular parentheses.  Matches whatever regular
diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst

index 6bb34690e8cbbb8052fcca21252274a27300d208..8752b83e63891086f1199098bb5bfef794608842 100644 (file)
--- a/Doc/whatsnew/3.6.rst
+++ b/Doc/whatsnew/3.6.rst
@@ -1124,6 +1124,15 @@ Deprecated features
    that will not be for several Python releases.  (Contributed by Emanuel Barry
    in :issue:`27364`.)
  
+* Inline flags ``(?letters)`` now should be used only at the start of the
+  regular expression.  Inline flags in the middle of the regular expression
+  affects global flags in Python :mod:`re` module.  This is an exception to
+  other regular expression engines that either apply flags to only part of
+  the regular expression or treat them as an error.  To avoid distinguishing
+  inline flags in the middle of the regular expression now emit a deprecation
+  warning.  It will be an error in future Python releases.
+  (Contributed by Serhiy Storchaka in :issue:`22493`.)
+
  
  Deprecated Python behavior
  --------------------------
diff --git a/Lib/distutils/filelist.py b/Lib/distutils/filelist.py

index 6522e69f06c99ba8b027bbdc5d6734c627dbd15b..c92d5fdba393bb4bab0718ae7006d54eb80e1ec5 100644 (file)
--- a/Lib/distutils/filelist.py
+++ b/Lib/distutils/filelist.py
@@ -302,21 +302,26 @@ def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
          else:
              return pattern
  
+    # ditch start and end characters
+    start, _, end = glob_to_re('_').partition('_')
+
      if pattern:
          pattern_re = glob_to_re(pattern)
+        assert pattern_re.startswith(start) and pattern_re.endswith(end)
      else:
          pattern_re = ''
  
      if prefix is not None:
-        # ditch end of pattern character
-        empty_pattern = glob_to_re('')
-        prefix_re = glob_to_re(prefix)[:-len(empty_pattern)]
+        prefix_re = glob_to_re(prefix)
+        assert prefix_re.startswith(start) and prefix_re.endswith(end)
+        prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
          sep = os.sep
          if os.sep == '\\':
              sep = r'\\'
-        pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re))
+        pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
+        pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end)
      else:                               # no prefix -- respect anchor flag
          if anchor:
-            pattern_re = "^" + pattern_re
+            pattern_re = r'%s\A%s' % (start, pattern_re[len(start):])
  
      return re.compile(pattern_re)
diff --git a/Lib/distutils/tests/test_filelist.py b/Lib/distutils/tests/test_filelist.py

index 391af3cba23f30d52dcb9c6006bef69bdce54b04..c71342d0dc4e7b0bceb32fce42ff25a30c2be727 100644 (file)
--- a/Lib/distutils/tests/test_filelist.py
+++ b/Lib/distutils/tests/test_filelist.py
@@ -51,14 +51,14 @@ class FileListTestCase(support.LoggingSilencer,
  
          for glob, regex in (
              # simple cases
-            ('foo*', r'foo[^%(sep)s]*\Z(?ms)'),
-            ('foo?', r'foo[^%(sep)s]\Z(?ms)'),
-            ('foo??', r'foo[^%(sep)s][^%(sep)s]\Z(?ms)'),
+            ('foo*', r'(?s:foo[^%(sep)s]*)\Z'),
+            ('foo?', r'(?s:foo[^%(sep)s])\Z'),
+            ('foo??', r'(?s:foo[^%(sep)s][^%(sep)s])\Z'),
              # special cases
-            (r'foo\\*', r'foo\\\\[^%(sep)s]*\Z(?ms)'),
-            (r'foo\\\*', r'foo\\\\\\[^%(sep)s]*\Z(?ms)'),
-            ('foo????', r'foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s]\Z(?ms)'),
-            (r'foo\\??', r'foo\\\\[^%(sep)s][^%(sep)s]\Z(?ms)')):
+            (r'foo\\*', r'(?s:foo\\\\[^%(sep)s]*)\Z'),
+            (r'foo\\\*', r'(?s:foo\\\\\\[^%(sep)s]*)\Z'),
+            ('foo????', r'(?s:foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s])\Z'),
+            (r'foo\\??', r'(?s:foo\\\\[^%(sep)s][^%(sep)s])\Z')):
              regex = regex % {'sep': sep}
              self.assertEqual(glob_to_re(glob), regex)
  
diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py

index 07b12295df7650e5d23a11ce70ed2db0d833dd35..fd3b5142e3487a2a8d34408494225ef31de67ef1 100644 (file)
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -106,4 +106,4 @@ def translate(pat):
                  res = '%s[%s]' % (res, stuff)
          else:
              res = res + re.escape(c)
-    return res + r'\Z(?ms)'
+    return r'(?s:%s)\Z' % res
diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py

index f078da54696a5edd5b59a5e8becbb26f50cda7b3..be3b080aa3d7517028e1901d7c3986d31e862a00 100644 (file)
--- a/Lib/http/cookies.py
+++ b/Lib/http/cookies.py
@@ -458,7 +458,6 @@ class Morsel(dict):
  _LegalKeyChars  = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
  _LegalValueChars = _LegalKeyChars + r'\[\]'
  _CookiePattern = re.compile(r"""
-    (?x)                           # This is a verbose pattern
      \s*                            # Optional whitespace at start of cookie
      (?P<key>                       # Start of group 'key'
      [""" + _LegalKeyChars + r"""]+?   # Any word of at least one letter
@@ -475,7 +474,7 @@ _CookiePattern = re.compile(r"""
      )?                             # End of optional value group
      \s*                            # Any number of spaces.
      (\s+|;|$)                      # Ending either at space, semicolon, or EOS.
-    """, re.ASCII)                 # May be removed if safe.
+    """, re.ASCII | re.VERBOSE)    # re.ASCII may be removed if safe.
  
  
  # At long last, here is the cookie class.  Using this class is almost just like
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py

index d74e93ff5c854f52dff22b80cf17e3b916112bd7..4a77f0c9a7d221d32e15e97b4fb23f6b6956a835 100644 (file)
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -279,6 +279,9 @@ class Tokenizer:
                  break
              result += c
          return result
+    @property
+    def pos(self):
+        return self.index - len(self.next or '')
      def tell(self):
          return self.index - len(self.next or '')
      def seek(self, index):
@@ -727,8 +730,13 @@ def _parse(source, state, verbose):
                      state.checklookbehindgroup(condgroup, source)
                  elif char in FLAGS or char == "-":
                      # flags
+                    pos = source.pos
                      flags = _parse_flags(source, state, char)
                      if flags is None:  # global flags
+                        if pos != 3:  # "(?x"
+                            import warnings
+                            warnings.warn('Flags not at the start of the expression',
+                                          DeprecationWarning, stacklevel=7)
                          continue
                      add_flags, del_flags = flags
                      group = None
diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py

index d3692f859a0870fd05d7a6864631bc0d19b7e777..a379d33aec61d193e563f59aa4e94eef6fd26887 100755 (executable)
--- a/Lib/test/re_tests.py
+++ b/Lib/test/re_tests.py
@@ -106,8 +106,8 @@ tests = [
      ('a.*b', 'acc\nccb', FAIL),
      ('a.{4,5}b', 'acc\nccb', FAIL),
      ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
-    ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
-    ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
+    ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
+    ('(?s)a.*b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
      ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
      ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
  
@@ -563,7 +563,7 @@ tests = [
      # Check odd placement of embedded pattern modifiers
  
      # not an error under PCRE/PRE:
-    ('w(?i)', 'W', SUCCEED, 'found', 'W'),
+    ('(?i)w', 'W', SUCCEED, 'found', 'W'),
      # ('w(?i)', 'W', SYNTAX_ERROR),
  
      # Comments using the x embedded pattern modifier
@@ -627,7 +627,7 @@ xyzabc
      # bug 114033: nothing to repeat
      (r'(x?)?', 'x', SUCCEED, 'found', 'x'),
      # bug 115040: rescan if flags are modified inside pattern
-    (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'),
+    (r'(?x) foo ', 'foo', SUCCEED, 'found', 'foo'),
      # bug 115618: negative lookahead
      (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
      # bug 116251: character class bug
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py

index a5f583254424934f5b9d2759ed5592dbd69a64a7..fb7424624bb041d892185944e1952e28eb7ee1aa 100644 (file)
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -62,14 +62,14 @@ class FnmatchTestCase(unittest.TestCase):
  class TranslateTestCase(unittest.TestCase):
  
      def test_translate(self):
-        self.assertEqual(translate('*'), r'.*\Z(?ms)')
-        self.assertEqual(translate('?'), r'.\Z(?ms)')
-        self.assertEqual(translate('a?b*'), r'a.b.*\Z(?ms)')
-        self.assertEqual(translate('[abc]'), r'[abc]\Z(?ms)')
-        self.assertEqual(translate('[]]'), r'[]]\Z(?ms)')
-        self.assertEqual(translate('[!x]'), r'[^x]\Z(?ms)')
-        self.assertEqual(translate('[^x]'), r'[\^x]\Z(?ms)')
-        self.assertEqual(translate('[x'), r'\[x\Z(?ms)')
+        self.assertEqual(translate('*'), r'(?s:.*)\Z')
+        self.assertEqual(translate('?'), r'(?s:.)\Z')
+        self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
+        self.assertEqual(translate('[abc]'), r'(?s:[abc])\Z')
+        self.assertEqual(translate('[]]'), r'(?s:[]])\Z')
+        self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
+        self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
+        self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
  
  
  class FilterTestCase(unittest.TestCase):
diff --git a/Lib/test/test_pyclbr.py b/Lib/test/test_pyclbr.py

index 06c10c17af1146b0154ad370ee5a6ff7e97c8e63..2cff1c526ed455c4ec0ae095aaf1633c40f455c3 100644 (file)
--- a/Lib/test/test_pyclbr.py
+++ b/Lib/test/test_pyclbr.py
@@ -158,7 +158,7 @@ class PyclbrTest(TestCase):
          cm('cgi', ignore=('log',))      # set with = in module
          cm('pickle', ignore=('partial',))
          cm('aifc', ignore=('openfp', '_aifc_params'))  # set with = in module
-        cm('sre_parse', ignore=('dump', 'groups')) # from sre_constants import *; property
+        cm('sre_parse', ignore=('dump', 'groups', 'pos')) # from sre_constants import *; property
          cm('pdb')
          cm('pydoc')
  
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py

index afe8738e8372a6709c1d86d5e2beb222515c7d5d..79a7a057a04e7438ba1d56535bb4810312a3698b 100644 (file)
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1279,6 +1279,9 @@ class ReTests(unittest.TestCase):
          self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
          self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
  
+        with self.assertWarns(DeprecationWarning):
+            self.assertTrue(re.match(upper_char + '(?i)', lower_char))
+
      def test_dollar_matches_twice(self):
          "$ matches the end of string, and just before the terminating \n"
          pattern = re.compile('$')
diff --git a/Misc/NEWS b/Misc/NEWS

index 6b30a103970e055123666656f6d4d0d94bc4a192..fe5fab147de236d5bf078725e647afc32601b1ad 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -143,6 +143,10 @@ Core and Builtins
  Library
  -------
  
+- Issue #22493: Inline flags now should be used only at the start of the
+  regular expression.  Deprecation warning is emitted if uses them in the
+  middle of the regular expression.
+
  - Issue #26885: xmlrpc now supports unmarshalling additional data types used
    by Apache XML-RPC implementation for numerics and None.
author	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 11 Sep 2016 09:50:02 +0000 (12:50 +0300)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 11 Sep 2016 09:50:02 +0000 (12:50 +0300)
Doc/library/re.rst		patch \| blob \| blame \| history
Doc/whatsnew/3.6.rst		patch \| blob \| blame \| history
Lib/distutils/filelist.py		patch \| blob \| blame \| history
Lib/distutils/tests/test_filelist.py		patch \| blob \| blame \| history
Lib/fnmatch.py		patch \| blob \| blame \| history
Lib/http/cookies.py		patch \| blob \| blame \| history
Lib/sre_parse.py		patch \| blob \| blame \| history
Lib/test/re_tests.py		patch \| blob \| blame \| history
Lib/test/test_fnmatch.py		patch \| blob \| blame \| history
Lib/test/test_pyclbr.py		patch \| blob \| blame \| history
Lib/test/test_re.py		patch \| blob \| blame \| history
Misc/NEWS		patch \| blob \| blame \| history