"""Filename globbing utility."""
+import contextlib
import os
import re
import fnmatch
import functools
+import itertools
import operator
+import stat
import sys
"""
sys.audit("glob.glob", pathname, recursive)
sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd)
- pathname = os.fspath(pathname)
- if isinstance(pathname, bytes):
- pathname = os.fsdecode(pathname)
- if root_dir is not None:
- root_dir = os.fsdecode(root_dir)
- paths = _iglob(pathname, root_dir, dir_fd, recursive, include_hidden)
- return map(os.fsencode, paths)
+ if root_dir is not None:
+ root_dir = os.fspath(root_dir)
+ else:
+ root_dir = pathname[:0]
+ it = _iglob(pathname, root_dir, dir_fd, recursive, False,
+ include_hidden=include_hidden)
+ if not pathname or recursive and _isrecursive(pathname[:2]):
+ try:
+ s = next(it) # skip empty string
+ if s:
+ it = itertools.chain((s,), it)
+ except StopIteration:
+ pass
+ return it
+
+def _iglob(pathname, root_dir, dir_fd, recursive, dironly,
+ include_hidden=False):
+ dirname, basename = os.path.split(pathname)
+ if not has_magic(pathname):
+ assert not dironly
+ if basename:
+ if _lexists(_join(root_dir, pathname), dir_fd):
+ yield pathname
+ else:
+ # Patterns ending with a slash should match only directories
+ if _isdir(_join(root_dir, dirname), dir_fd):
+ yield pathname
+ return
+ if not dirname:
+ if recursive and _isrecursive(basename):
+ yield from _glob2(root_dir, basename, dir_fd, dironly,
+ include_hidden=include_hidden)
+ else:
+ yield from _glob1(root_dir, basename, dir_fd, dironly,
+ include_hidden=include_hidden)
+ return
+ # `os.path.split()` returns the argument itself as a dirname if it is a
+ # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
+ # contains magic characters (i.e. r'\\?\C:').
+ if dirname != pathname and has_magic(dirname):
+ dirs = _iglob(dirname, root_dir, dir_fd, recursive, True,
+ include_hidden=include_hidden)
+ else:
+ dirs = [dirname]
+ if has_magic(basename):
+ if recursive and _isrecursive(basename):
+ glob_in_dir = _glob2
+ else:
+ glob_in_dir = _glob1
else:
- return _iglob(pathname, root_dir, dir_fd, recursive, include_hidden)
-
-def _iglob(pathname, root_dir, dir_fd, recursive, include_hidden):
- if os.path.altsep:
- pathname = pathname.replace(os.path.altsep, os.path.sep)
- drive, root, tail = os.path.splitroot(pathname)
- parts = tail.split(os.path.sep)[::-1] if tail else []
- globber = _StringGlobber(recursive=recursive, include_hidden=include_hidden)
- select = globber.selector(parts)
- if drive:
- root = drive + root
- return select(root, dir_fd, root)
- elif root:
- return select(root, dir_fd, root, exists=True)
- elif not root_dir:
- return select(root, dir_fd, root, empty=True)
+ glob_in_dir = _glob0
+ for dirname in dirs:
+ for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly,
+ include_hidden=include_hidden):
+ yield os.path.join(dirname, name)
+
+# These 2 helper functions non-recursively glob inside a literal directory.
+# They return a list of basenames. _glob1 accepts a pattern while _glob0
+# takes a literal basename (so it only has to check for its existence).
+
+def _glob1(dirname, pattern, dir_fd, dironly, include_hidden=False):
+ names = _listdir(dirname, dir_fd, dironly)
+ if not (include_hidden or _ishidden(pattern)):
+ names = (x for x in names if not _ishidden(x))
+ return fnmatch.filter(names, pattern)
+
+def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False):
+ if basename:
+ if _lexists(_join(dirname, basename), dir_fd):
+ return [basename]
else:
- root = os.path.join(root_dir, '')
- root_len = len(root)
- paths = select(root, dir_fd, root, empty=True)
- return (path[root_len:] for path in paths)
+ # `os.path.split()` returns an empty basename for paths ending with a
+ # directory separator. 'q*x/' should match only directories.
+ if _isdir(dirname, dir_fd):
+ return [basename]
+ return []
_deprecated_function_message = (
"{name} is deprecated and will be removed in Python {remove}. Use "
def glob0(dirname, pattern):
import warnings
warnings._deprecated("glob.glob0", _deprecated_function_message, remove=(3, 15))
- dirname = os.path.join(dirname, '')
- select = _StringGlobber().literal_selector(pattern, [])
- return [path[len(dirname):] for path in select(dirname)]
+ return _glob0(dirname, pattern, None, False)
def glob1(dirname, pattern):
import warnings
warnings._deprecated("glob.glob1", _deprecated_function_message, remove=(3, 15))
- dirname = os.path.join(dirname, '')
- select = _StringGlobber().wildcard_selector(pattern, [])
- return [path[len(dirname):] for path in select(dirname)]
+ return _glob1(dirname, pattern, None, False)
+
+# This helper function recursively yields relative pathnames inside a literal
+# directory.
+
+def _glob2(dirname, pattern, dir_fd, dironly, include_hidden=False):
+ assert _isrecursive(pattern)
+ if not dirname or _isdir(dirname, dir_fd):
+ yield pattern[:0]
+ yield from _rlistdir(dirname, dir_fd, dironly,
+ include_hidden=include_hidden)
+
+# If dironly is false, yields all file names inside a directory.
+# If dironly is true, yields only directory names.
+def _iterdir(dirname, dir_fd, dironly):
+ try:
+ fd = None
+ fsencode = None
+ if dir_fd is not None:
+ if dirname:
+ fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd)
+ else:
+ arg = dir_fd
+ if isinstance(dirname, bytes):
+ fsencode = os.fsencode
+ elif dirname:
+ arg = dirname
+ elif isinstance(dirname, bytes):
+ arg = bytes(os.curdir, 'ASCII')
+ else:
+ arg = os.curdir
+ try:
+ with os.scandir(arg) as it:
+ for entry in it:
+ try:
+ if not dironly or entry.is_dir():
+ if fsencode is not None:
+ yield fsencode(entry.name)
+ else:
+ yield entry.name
+ except OSError:
+ pass
+ finally:
+ if fd is not None:
+ os.close(fd)
+ except OSError:
+ return
+
+def _listdir(dirname, dir_fd, dironly):
+ with contextlib.closing(_iterdir(dirname, dir_fd, dironly)) as it:
+ return list(it)
+
+# Recursively yields relative pathnames inside a literal directory.
+def _rlistdir(dirname, dir_fd, dironly, include_hidden=False):
+ names = _listdir(dirname, dir_fd, dironly)
+ for x in names:
+ if include_hidden or not _ishidden(x):
+ yield x
+ path = _join(dirname, x) if dirname else x
+ for y in _rlistdir(path, dir_fd, dironly,
+ include_hidden=include_hidden):
+ yield _join(x, y)
+
+
+def _lexists(pathname, dir_fd):
+ # Same as os.path.lexists(), but with dir_fd
+ if dir_fd is None:
+ return os.path.lexists(pathname)
+ try:
+ os.lstat(pathname, dir_fd=dir_fd)
+ except (OSError, ValueError):
+ return False
+ else:
+ return True
+
+def _isdir(pathname, dir_fd):
+ # Same as os.path.isdir(), but with dir_fd
+ if dir_fd is None:
+ return os.path.isdir(pathname)
+ try:
+ st = os.stat(pathname, dir_fd=dir_fd)
+ except (OSError, ValueError):
+ return False
+ else:
+ return stat.S_ISDIR(st.st_mode)
+
+def _join(dirname, basename):
+ # It is common if dirname or basename is empty
+ if not dirname or not basename:
+ return dirname or basename
+ return os.path.join(dirname, basename)
magic_check = re.compile('([*?[])')
magic_check_bytes = re.compile(b'([*?[])')
match = magic_check.search(s)
return match is not None
+def _ishidden(path):
+ return path[0] in ('.', b'.'[0])
+
+def _isrecursive(pattern):
+ if isinstance(pattern, bytes):
+ return pattern == b'**'
+ else:
+ return pattern == '**'
+
def escape(pathname):
"""Escape all special characters.
"""
return fr'(?s:{res})\Z'
-@functools.lru_cache(maxsize=1024)
-def _compile_pattern(pat, sep, case_sensitive, recursive, include_hidden):
+@functools.lru_cache(maxsize=512)
+def _compile_pattern(pat, sep, case_sensitive, recursive=True):
"""Compile given glob pattern to a re.Pattern object (observing case
sensitivity)."""
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
- regex = translate(pat, recursive=recursive,
- include_hidden=include_hidden, seps=sep)
+ regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep)
return re.compile(regex, flags=flags).match
"""Abstract class providing shell-style pattern matching and globbing.
"""
- def __init__(self, sep=os.path.sep, case_sensitive=os.name != 'nt',
- case_pedantic=False, recursive=False, include_hidden=False):
+ def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False):
self.sep = sep
self.case_sensitive = case_sensitive
self.case_pedantic = case_pedantic
self.recursive = recursive
- self.include_hidden = include_hidden
# Abstract methods
"""
raise NotImplementedError
- @staticmethod
- def lstat(path, dir_fd=None):
- """Implements os.lstat()
- """
- raise NotImplementedError
-
- @staticmethod
- def open(path, flags, dir_fd=None):
- """Implements os.open()
- """
- raise NotImplementedError
-
@staticmethod
def scandir(path):
"""Like os.scandir(), but generates (entry, name, path) tuples.
"""
raise NotImplementedError
- @staticmethod
- def scandir_cwd():
- raise NotImplementedError
-
- @staticmethod
- def scandir_fd(fd, prefix):
- raise NotImplementedError
-
- @staticmethod
- def close(fd):
- """Implements os.close().
- """
- raise NotImplementedError
-
@staticmethod
def concat_path(path, text):
"""Implements path concatenation.
# High-level methods
def compile(self, pat):
- return _compile_pattern(pat, self.sep, self.case_sensitive,
- self.recursive, self.include_hidden)
+ return _compile_pattern(pat, self.sep, self.case_sensitive, self.recursive)
def selector(self, parts):
"""Returns a function that selects from a given path, walking and
if parts:
part += self.sep
select_next = self.selector(parts)
- if not part:
- return select_next
- def select_special(path, dir_fd=None, rel_path=None, exists=False, empty=False):
+ def select_special(path, exists=False):
path = self.concat_path(path, part)
- if dir_fd is not None:
- rel_path = self.concat_path(rel_path, part)
- return select_next(path, dir_fd, rel_path, exists)
+ return select_next(path, exists)
return select_special
def literal_selector(self, part, parts):
select_next = self.selector(parts)
- def select_literal(path, dir_fd=None, rel_path=None, exists=False, empty=False):
+ def select_literal(path, exists=False):
path = self.concat_path(path, part)
- if dir_fd is not None:
- rel_path = self.concat_path(rel_path, part)
- return select_next(path, dir_fd, rel_path)
+ return select_next(path, exists=False)
return select_literal
def wildcard_selector(self, part, parts):
filtering by pattern.
"""
- match = None if self.include_hidden and part == '*' else self.compile(part)
+ match = None if part == '*' else self.compile(part)
dir_only = bool(parts)
if dir_only:
select_next = self.selector(parts)
- def select_wildcard(path, dir_fd=None, rel_path=None, exists=False, empty=False):
- close_fd = False
+ def select_wildcard(path, exists=False):
try:
- if dir_fd is None:
- fd = None
- entries = self.scandir(path) if path else self.scandir_cwd()
- elif not rel_path:
- fd = dir_fd
- entries = self.scandir_fd(fd, path)
- else:
- fd = self.open(rel_path, _dir_open_flags, dir_fd=dir_fd)
- close_fd = True
- entries = self.scandir_fd(fd, path)
+ entries = self.scandir(path)
except OSError:
pass
else:
except OSError:
continue
entry_path = self.concat_path(entry_path, self.sep)
- if fd is not None:
- entry_name = entry_name + self.sep
- yield from select_next(
- entry_path, fd, entry_name, exists=True)
+ yield from select_next(entry_path, exists=True)
else:
- # Optimization: directly yield the path if this is
- # last pattern part.
yield entry_path
- finally:
- if close_fd:
- self.close(fd)
return select_wildcard
def recursive_selector(self, part, parts):
while parts and parts[-1] not in _special_parts:
part += self.sep + parts.pop()
- match = None if self.include_hidden and part == '**' else self.compile(part)
+ match = None if part == '**' else self.compile(part)
dir_only = bool(parts)
select_next = self.selector(parts)
- def select_recursive(path, dir_fd=None, rel_path=None, exists=False, empty=False):
+ def select_recursive(path, exists=False):
match_pos = len(str(path))
if match is None or match(str(path), match_pos):
- yield from select_next(path, dir_fd, rel_path, exists, empty)
- stack = [(path, dir_fd, rel_path)]
- try:
- while stack:
- yield from select_recursive_step(stack, match_pos)
- finally:
- # Close any file descriptors still on the stack.
- while stack:
- path, dir_fd, _rel_path = stack.pop()
- if path is None:
- try:
- self.close(dir_fd)
- except OSError:
- pass
+ yield from select_next(path, exists)
+ stack = [path]
+ while stack:
+ yield from select_recursive_step(stack, match_pos)
def select_recursive_step(stack, match_pos):
- path, dir_fd, rel_path = stack.pop()
+ path = stack.pop()
try:
- if path is None:
- self.close(dir_fd)
- return
- elif dir_fd is None:
- fd = None
- entries = self.scandir(path) if path else self.scandir_cwd()
- elif not rel_path:
- fd = dir_fd
- entries = self.scandir_fd(fd, path)
- else:
- fd = self.open(rel_path, _dir_open_flags, dir_fd=dir_fd)
- # Schedule the file descriptor to be closed next step.
- stack.append((None, fd, None))
- entries = self.scandir_fd(fd, path)
+ entries = self.scandir(path)
except OSError:
pass
else:
- for entry, entry_name, entry_path in entries:
+ for entry, _entry_name, entry_path in entries:
is_dir = False
try:
if entry.is_dir(follow_symlinks=follow_symlinks):
entry_path_str = str(entry_path)
if dir_only:
entry_path = self.concat_path(entry_path, self.sep)
- if fd is not None:
- entry_name = entry_name + self.sep
if match is None or match(entry_path_str, match_pos):
if dir_only:
- yield from select_next(
- entry_path, fd, entry_name, exists=True)
+ yield from select_next(entry_path, exists=True)
else:
# Optimization: directly yield the path if this is
# last pattern part.
yield entry_path
if is_dir:
- stack.append((entry_path, fd, entry_name))
+ stack.append(entry_path)
return select_recursive
- def select_exists(self, path, dir_fd=None, rel_path=None, exists=False, empty=False):
- """Yields the given path, if it exists. If *dir_fd* is given, we check
- whether *rel_path* exists relative to the fd.
+ def select_exists(self, path, exists=False):
+ """Yields the given path, if it exists.
"""
- if empty:
- # Suppress initial path so iglob() doesn't yield the empty string.
- pass
- elif exists:
+ if exists:
# Optimization: this path is already known to exist, e.g. because
# it was returned from os.scandir(), so we skip calling lstat().
yield path
- elif dir_fd is not None:
- try:
- self.lstat(rel_path, dir_fd=dir_fd)
- yield path
- except OSError:
- pass
elif self.lexists(path):
yield path
"""Provides shell-style pattern matching and globbing for string paths.
"""
lexists = staticmethod(os.path.lexists)
- lstat = staticmethod(os.lstat)
- open = staticmethod(os.open)
- close = staticmethod(os.close)
concat_path = operator.add
@staticmethod
entries = list(scandir_it)
return ((entry, entry.name, entry.path) for entry in entries)
- @staticmethod
- def scandir_cwd():
- with os.scandir() as scandir_it:
- entries = list(scandir_it)
- # Suppress leading dot when scanning current directory.
- return ((entry, entry.name, entry.name) for entry in entries)
-
- @staticmethod
- def scandir_fd(fd, prefix):
- prefix = os.path.join(prefix, prefix[:0])
- with os.scandir(fd) as scandir_it:
- entries = list(scandir_it)
- return ((entry, entry.name, prefix + entry.name) for entry in entries)
-
class _PathGlobber(_GlobberBase):
"""Provides shell-style pattern matching and globbing for pathlib paths.
import shutil
import sys
import unittest
-import unittest.mock
import warnings
from test import support
-from test.support import is_wasi, Py_DEBUG, infinite_recursion
+from test.support import is_wasi, Py_DEBUG
from test.support.os_helper import (TESTFN, skip_unless_symlink,
can_symlink, create_empty_file, change_cwd)
-_supports_dir_fd = {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd
-
-
class GlobTests(unittest.TestCase):
dir_fd = None
def open_dirfd(self):
if self.dir_fd is not None:
os.close(self.dir_fd)
- if _supports_dir_fd:
+ if {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd:
self.dir_fd = os.open(self.tempdir, os.O_RDONLY | os.O_DIRECTORY)
else:
self.dir_fd = None
self.assertEqual(glob.glob(self.norm('Z*Z') + sep), [])
self.assertEqual(glob.glob(self.norm('ZZZ') + sep), [])
self.assertEqual(glob.glob(self.norm('aaa') + sep),
- [self.norm('aaa') + os.sep])
- # Redundant separators are preserved and normalized
+ [self.norm('aaa') + sep])
+ # Preserving the redundant separators is an implementation detail.
self.assertEqual(glob.glob(self.norm('aaa') + sep*2),
- [self.norm('aaa') + os.sep*2])
+ [self.norm('aaa') + sep*2])
# When there is a wildcard pattern which ends with a pathname
# separator, glob() doesn't blow.
# The result should end with the pathname separator.
+ # Normalizing the trailing separator is an implementation detail.
eq = self.assertSequencesEqual_noorder
eq(glob.glob(self.norm('aa*') + sep),
[self.norm('aaa') + os.sep, self.norm('aab') + os.sep])
+ # Stripping the redundant separators is an implementation detail.
eq(glob.glob(self.norm('aa*') + sep*2),
- [self.norm('aaa') + os.sep*2, self.norm('aab') + os.sep*2])
+ [self.norm('aaa') + os.sep, self.norm('aab') + os.sep])
def test_glob_bytes_directory_with_trailing_slash(self):
# Same as test_glob_directory_with_trailing_slash, but with a
self.assertEqual(glob.glob(os.fsencode(self.norm('Z*Z') + sep)), [])
self.assertEqual(glob.glob(os.fsencode(self.norm('ZZZ') + sep)), [])
self.assertEqual(glob.glob(os.fsencode(self.norm('aaa') + sep)),
- [os.fsencode(self.norm('aaa') + os.sep)])
+ [os.fsencode(self.norm('aaa') + sep)])
self.assertEqual(glob.glob(os.fsencode(self.norm('aaa') + sep*2)),
- [os.fsencode(self.norm('aaa') + os.sep*2)])
+ [os.fsencode(self.norm('aaa') + sep*2)])
eq = self.assertSequencesEqual_noorder
eq(glob.glob(os.fsencode(self.norm('aa*') + sep)),
[os.fsencode(self.norm('aaa') + os.sep),
os.fsencode(self.norm('aab') + os.sep)])
eq(glob.glob(os.fsencode(self.norm('aa*') + sep*2)),
- [os.fsencode(self.norm('aaa') + os.sep*2),
- os.fsencode(self.norm('aab') + os.sep*2)])
+ [os.fsencode(self.norm('aaa') + os.sep),
+ os.fsencode(self.norm('aab') + os.sep)])
@skip_unless_symlink
def test_glob_symlinks(self):
with change_cwd(self.tempdir):
join = os.path.join
eq(glob.glob('**', recursive=True), [join(*i) for i in full])
- eq(glob.glob(join('**', '**'), recursive=True),
- [join(*i) for i in full])
eq(glob.glob(join('**', ''), recursive=True),
[join(*i) for i in dirs])
- eq(glob.glob(join('**', '**', ''), recursive=True),
- [join(*i) for i in dirs])
eq(glob.glob(join('**', '*'), recursive=True),
[join(*i) for i in full])
eq(glob.glob(join(os.curdir, '**'), recursive=True),
for it in iters:
self.assertEqual(next(it), p)
- def test_glob_above_recursion_limit(self):
- depth = 30
- base = os.path.join(self.tempdir, 'deep')
- p = os.path.join(base, *(['d']*depth))
- os.makedirs(p)
- pattern = os.path.join(base, '**', 'd')
- with infinite_recursion(depth - 5):
- glob.glob(pattern, recursive=True)
-
- @unittest.skipUnless(_supports_dir_fd, "Needs support for iglob(dir_fd=...)")
- def test_iglob_iter_close(self):
- base = os.path.join(self.tempdir, 'deep')
- p = os.path.join(base, *(['d'] * 10))
- os.makedirs(p)
- with (
- unittest.mock.patch("glob._StringGlobber.open", wraps=os.open) as os_open,
- unittest.mock.patch("glob._StringGlobber.close", wraps=os.close) as os_close
- ):
- self.assertEqual(os_open.call_count, os_close.call_count)
- iter = glob.iglob('**/*/d', dir_fd=self.dir_fd, recursive=True)
- self.assertEqual(os_open.call_count, os_close.call_count)
- self.assertEqual(next(iter), 'deep/d')
- self.assertEqual(next(iter), 'deep/d/d')
- self.assertGreater(os_open.call_count, os_close.call_count)
- iter.close()
- self.assertEqual(os_open.call_count, os_close.call_count)
-
def test_glob0(self):
with self.assertWarns(DeprecationWarning):
glob.glob0(self.tempdir, 'a')