]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-40480: restore ability to join fnmatch.translate() results (GH-20049)
authorTim Peters <tim.peters@gmail.com>
Tue, 12 May 2020 02:19:20 +0000 (21:19 -0500)
committerGitHub <noreply@github.com>
Tue, 12 May 2020 02:19:20 +0000 (21:19 -0500)
In translate(), generate unique group names across calls.

The restores the undocumented ability to get a valid regexp
by joining multiple translate() results via `|`.

Lib/fnmatch.py
Lib/test/test_fnmatch.py

index d7d915d51314da4f9c690c81fc757542c577bed9..0eb1802bdb53c5d8594e06a08cfe4d0e5f8f6a94 100644 (file)
@@ -16,6 +16,12 @@ import functools
 
 __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
 
+# Build a thread-safe incrementing counter to help create unique regexp group
+# names across calls.
+from itertools import count
+_nextgroupnum = count().__next__
+del count
+
 def fnmatch(name, pat):
     """Test whether FILENAME matches PATTERN.
 
@@ -148,9 +154,12 @@ def translate(pat):
     # in a lookahead assertion, save the matched part in a group, then
     # consume that group via a backreference. If the overall match fails,
     # the lookahead assertion won't try alternatives. So the translation is:
-    #     (?=(P<name>.*?fixed))(?P=name)
-    # Group names are created as needed: g1, g2, g3, ...
-    groupnum = 0
+    #     (?=(?P<name>.*?fixed))(?P=name)
+    # Group names are created as needed: g0, g1, g2, ...
+    # The numbers are obtained from _nextgroupnum() to ensure they're unique
+    # across calls and across threads. This is because people rely on the
+    # undocumented ability to join multiple translate() results together via
+    # "|" to build large regexps matching "one of many" shell patterns.
     while i < n:
         assert inp[i] is STAR
         i += 1
@@ -167,7 +176,7 @@ def translate(pat):
             add(".*")
             add(fixed)
         else:
-            groupnum += 1
+            groupnum = _nextgroupnum()
             add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
     assert i == n
     res = "".join(res)
index 4c173069503cc622bc667caa3aa3955341296c3a..10668e4f6103aa0e7e06e221dafee9050d735b5d 100644 (file)
@@ -106,6 +106,7 @@ class FnmatchTestCase(unittest.TestCase):
 class TranslateTestCase(unittest.TestCase):
 
     def test_translate(self):
+        import re
         self.assertEqual(translate('*'), r'(?s:.*)\Z')
         self.assertEqual(translate('?'), r'(?s:.)\Z')
         self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
@@ -122,9 +123,26 @@ class TranslateTestCase(unittest.TestCase):
         self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
         self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
         # fancy translation to prevent exponential-time match failure
-        self.assertEqual(translate('**a*a****a'),
-             r'(?s:(?=(?P<g1>.*?a))(?P=g1)(?=(?P<g2>.*?a))(?P=g2).*a)\Z')
-
+        t = translate('**a*a****a')
+        digits = re.findall(r'\d+', t)
+        self.assertEqual(len(digits), 4)
+        self.assertEqual(digits[0], digits[1])
+        self.assertEqual(digits[2], digits[3])
+        g1 = f"g{digits[0]}"  # e.g., group name "g4"
+        g2 = f"g{digits[2]}"  # e.g., group name "g5"
+        self.assertEqual(t,
+         fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z')
+        # and try pasting multiple translate results - it's an undocumented
+        # feature that this works; all the pain of generating unique group
+        # names across calls exists to support this
+        r1 = translate('**a**a**a*')
+        r2 = translate('**b**b**b*')
+        r3 = translate('*c*c*c*')
+        fatre = "|".join([r1, r2, r3])
+        self.assertTrue(re.match(fatre, 'abaccad'))
+        self.assertTrue(re.match(fatre, 'abxbcab'))
+        self.assertTrue(re.match(fatre, 'cbabcaxc'))
+        self.assertFalse(re.match(fatre, 'dabccbad'))
 
 class FilterTestCase(unittest.TestCase):