getattr(exception, 'winerror', None) in _IGNORED_WINERRORS)
+@functools.cache
def _is_case_sensitive(flavour):
return flavour.normcase('Aa') == 'Aa'
# Globbing helpers
#
+
+# fnmatch.translate() returns a regular expression that includes a prefix and
+# a suffix, which enable matching newlines and ensure the end of the string is
+# matched, respectively. These features are undesirable for our implementation
+# of PurePatch.match(), which represents path separators as newlines and joins
+# pattern segments together. As a workaround, we define a slice object that
+# can remove the prefix and suffix from any translate() result. See the
+# _compile_pattern_lines() function for more details.
+_FNMATCH_PREFIX, _FNMATCH_SUFFIX = fnmatch.translate('_').split('_')
+_FNMATCH_SLICE = slice(len(_FNMATCH_PREFIX), -len(_FNMATCH_SUFFIX))
+_SWAP_SEP_AND_NEWLINE = {
+ '/': str.maketrans({'/': '\n', '\n': '/'}),
+ '\\': str.maketrans({'\\': '\n', '\n': '\\'}),
+}
+
+
@functools.lru_cache()
def _make_selector(pattern_parts, flavour, case_sensitive):
pat = pattern_parts[0]
return re.compile(fnmatch.translate(pat), flags).match
+@functools.lru_cache()
+def _compile_pattern_lines(pattern_lines, case_sensitive):
+ """Compile the given pattern lines to an `re.Pattern` object.
+
+ The *pattern_lines* argument is a glob-style pattern (e.g. '**/*.py') with
+ its path separators and newlines swapped (e.g. '**\n*.py`). By using
+ newlines to separate path components, and not setting `re.DOTALL`, we
+ ensure that the `*` wildcard cannot match path separators.
+
+ The returned `re.Pattern` object may have its `match()` method called to
+ match a complete pattern, or `search()` to match from the right. The
+ argument supplied to these methods must also have its path separators and
+ newlines swapped.
+ """
+
+ # Match the start of the path, or just after a path separator
+ parts = ['^']
+ for part in pattern_lines.splitlines(keepends=True):
+ if part == '**\n':
+ # '**/' component: we use '[\s\S]' rather than '.' so that path
+ # separators (i.e. newlines) are matched. The trailing '^' ensures
+ # we terminate after a path separator (i.e. on a new line).
+ part = r'[\s\S]*^'
+ elif part == '**':
+ # '**' component.
+ part = r'[\s\S]*'
+ elif '**' in part:
+ raise ValueError("Invalid pattern: '**' can only be an entire path component")
+ else:
+ # Any other component: pass to fnmatch.translate(). We slice off
+ # the common prefix and suffix added by translate() to ensure that
+ # re.DOTALL is not set, and the end of the string not matched,
+ # respectively. With DOTALL not set, '*' wildcards will not match
+ # path separators, because the '.' characters in the pattern will
+ # not match newlines.
+ part = fnmatch.translate(part)[_FNMATCH_SLICE]
+ parts.append(part)
+ # Match the end of the path, always.
+ parts.append(r'\Z')
+ flags = re.MULTILINE
+ if not case_sensitive:
+ flags |= re.IGNORECASE
+ return re.compile(''.join(parts), flags=flags)
+
+
class _Selector:
"""A selector matches a specific glob pattern part against the children
of a given path."""
# to implement comparison methods like `__lt__()`.
'_parts_normcase_cached',
+ # The `_lines_cached` slot stores the string path with path separators
+ # and newlines swapped. This is used to implement `match()`.
+ '_lines_cached',
+
# The `_hash` slot stores the hash of the case-normalized string
# path. It's set when `__hash__()` is called for the first time.
'_hash',
self._parts_normcase_cached = self._str_normcase.split(self._flavour.sep)
return self._parts_normcase_cached
+ @property
+ def _lines(self):
+ # Path with separators and newlines swapped, for pattern matching.
+ try:
+ return self._lines_cached
+ except AttributeError:
+ trans = _SWAP_SEP_AND_NEWLINE[self._flavour.sep]
+ self._lines_cached = str(self).translate(trans)
+ return self._lines_cached
+
def __eq__(self, other):
if not isinstance(other, PurePath):
return NotImplemented
"""
Return True if this path matches the given pattern.
"""
+ if not isinstance(path_pattern, PurePath):
+ path_pattern = self.with_segments(path_pattern)
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self._flavour)
- pat = self.with_segments(path_pattern)
- if not pat.parts:
+ pattern = _compile_pattern_lines(path_pattern._lines, case_sensitive)
+ if path_pattern.drive or path_pattern.root:
+ return pattern.match(self._lines) is not None
+ elif path_pattern._tail:
+ return pattern.search(self._lines) is not None
+ else:
raise ValueError("empty pattern")
- pat_parts = pat.parts
- parts = self.parts
- if pat.drive or pat.root:
- if len(pat_parts) != len(parts):
- return False
- elif len(pat_parts) > len(parts):
- return False
- for part, pat in zip(reversed(parts), reversed(pat_parts)):
- match = _compile_pattern(pat, case_sensitive)
- if not match(part):
- return False
- return True
+
# Subclassing os.PathLike makes isinstance() checks slower,
# which in turn makes Path construction slower. Register instead!
self.assertFalse(P('/ab.py').match('/a/*.py'))
self.assertFalse(P('/a/b/c.py').match('/a/*.py'))
# Multi-part glob-style pattern.
- self.assertFalse(P('/a/b/c.py').match('/**/*.py'))
+ self.assertTrue(P('a').match('**'))
+ self.assertTrue(P('c.py').match('**'))
+ self.assertTrue(P('a/b/c.py').match('**'))
+ self.assertTrue(P('/a/b/c.py').match('**'))
+ self.assertTrue(P('/a/b/c.py').match('/**'))
+ self.assertTrue(P('/a/b/c.py').match('**/'))
+ self.assertTrue(P('/a/b/c.py').match('/a/**'))
+ self.assertTrue(P('/a/b/c.py').match('**/*.py'))
+ self.assertTrue(P('/a/b/c.py').match('/**/*.py'))
self.assertTrue(P('/a/b/c.py').match('/a/**/*.py'))
+ self.assertTrue(P('/a/b/c.py').match('/a/b/**/*.py'))
+ self.assertTrue(P('/a/b/c.py').match('/**/**/**/**/*.py'))
+ self.assertFalse(P('c.py').match('**/a.py'))
+ self.assertFalse(P('c.py').match('c/**'))
+ self.assertFalse(P('a/b/c.py').match('**/a'))
+ self.assertFalse(P('a/b/c.py').match('**/a/b'))
+ self.assertFalse(P('a/b/c.py').match('**/a/b/c'))
+ self.assertFalse(P('a/b/c.py').match('**/a/b/c.'))
+ self.assertFalse(P('a/b/c.py').match('**/a/b/c./**'))
+ self.assertFalse(P('a/b/c.py').match('**/a/b/c./**'))
+ self.assertFalse(P('a/b/c.py').match('/a/b/c.py/**'))
+ self.assertFalse(P('a/b/c.py').match('/**/a/b/c.py'))
+ self.assertRaises(ValueError, P('a').match, '**a/b/c')
+ self.assertRaises(ValueError, P('a').match, 'a/b/c**')
# Case-sensitive flag
self.assertFalse(P('A.py').match('a.PY', case_sensitive=True))
self.assertTrue(P('A.py').match('a.PY', case_sensitive=False))