]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-125413: Add `pathlib.Path.scandir()` method (#126060)
authorBarney Gale <barney.gale@gmail.com>
Fri, 1 Nov 2024 01:19:01 +0000 (01:19 +0000)
committerGitHub <noreply@github.com>
Fri, 1 Nov 2024 01:19:01 +0000 (01:19 +0000)
Add `pathlib.Path.scandir()` as a trivial wrapper of `os.scandir()`. This
will be used to implement several `PathBase` methods more efficiently,
including methods that provide `Path.copy()`.

Doc/library/pathlib.rst
Doc/whatsnew/3.14.rst
Lib/pathlib/_abc.py
Lib/pathlib/_local.py
Lib/test/test_pathlib/test_pathlib_abc.py
Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst [new file with mode: 0644]

index 4380122eb1be7d06b65ab5bf439adbf82edad15d..b6fb36554f7cec7dd10afb49d61d395ef0dce1c7 100644 (file)
@@ -1289,6 +1289,35 @@ Reading directories
    raised.
 
 
+.. method:: Path.scandir()
+
+   When the path points to a directory, return an iterator of
+   :class:`os.DirEntry` objects corresponding to entries in the directory. The
+   returned iterator supports the :term:`context manager` protocol. It is
+   implemented using :func:`os.scandir` and gives the same guarantees.
+
+   Using :meth:`~Path.scandir` instead of :meth:`~Path.iterdir` can
+   significantly increase the performance of code that also needs file type or
+   file attribute information, because :class:`os.DirEntry` objects expose
+   this information if the operating system provides it when scanning a
+   directory.
+
+   The following example displays the names of subdirectories. The
+   ``entry.is_dir()`` check will generally not make an additional system call::
+
+      >>> p = Path('docs')
+      >>> with p.scandir() as entries:
+      ...     for entry in entries:
+      ...         if entry.is_dir():
+      ...             entry.name
+      ...
+      '_templates'
+      '_build'
+      '_static'
+
+   .. versionadded:: 3.14
+
+
 .. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False)
 
    Glob the given relative *pattern* in the directory represented by this path,
index 7f9e3107a6e1a0c28c01b8219189c0333559bb9a..48314f9c98c03617b8bc59d7108388ed199333e9 100644 (file)
@@ -380,6 +380,12 @@ pathlib
 
   (Contributed by Barney Gale in :gh:`73991`.)
 
+* Add :meth:`pathlib.Path.scandir` to scan a directory and return an iterator
+  of :class:`os.DirEntry` objects. This is exactly equivalent to calling
+  :func:`os.scandir` on a path object.
+
+  (Contributed by Barney Gale in :gh:`125413`.)
+
 
 pdb
 ---
index 11c8018b28f26bf9a01442b192c922688e0c50bd..dfff8b460d1bf106104bf6eb65489e3acc19f8ae 100644 (file)
@@ -639,13 +639,23 @@ class PathBase(PurePathBase):
         with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f:
             return f.write(data)
 
+    def scandir(self):
+        """Yield os.DirEntry objects of the directory contents.
+
+        The children are yielded in arbitrary order, and the
+        special entries '.' and '..' are not included.
+        """
+        raise UnsupportedOperation(self._unsupported_msg('scandir()'))
+
     def iterdir(self):
         """Yield path objects of the directory contents.
 
         The children are yielded in arbitrary order, and the
         special entries '.' and '..' are not included.
         """
-        raise UnsupportedOperation(self._unsupported_msg('iterdir()'))
+        with self.scandir() as entries:
+            names = [entry.name for entry in entries]
+        return map(self.joinpath, names)
 
     def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
         if case_sensitive is None:
index a78997179820b1bb6ac3045b1cd96c47291a826c..ef072b83d969045761d757ea35eaa638f8279846 100644 (file)
@@ -615,6 +615,14 @@ class Path(PathBase, PurePath):
                 path_str = path_str[:-1]
             yield path_str
 
+    def scandir(self):
+        """Yield os.DirEntry objects of the directory contents.
+
+        The children are yielded in arbitrary order, and the
+        special entries '.' and '..' are not included.
+        """
+        return os.scandir(self)
+
     def iterdir(self):
         """Yield path objects of the directory contents.
 
index 08355a71453807bdb10b4ae7ebe3d5202eb33b04..11e34f5d378a58cba5e417e498d3a5c71cc684c0 100644 (file)
@@ -1,4 +1,5 @@
 import collections
+import contextlib
 import io
 import os
 import errno
@@ -1424,6 +1425,24 @@ DummyPathStatResult = collections.namedtuple(
     'st_mode st_ino st_dev st_nlink st_uid st_gid st_size st_atime st_mtime st_ctime')
 
 
+class DummyDirEntry:
+    """
+    Minimal os.DirEntry-like object. Returned from DummyPath.scandir().
+    """
+    __slots__ = ('name', '_is_symlink', '_is_dir')
+
+    def __init__(self, name, is_symlink, is_dir):
+        self.name = name
+        self._is_symlink = is_symlink
+        self._is_dir = is_dir
+
+    def is_symlink(self):
+        return self._is_symlink
+
+    def is_dir(self, *, follow_symlinks=True):
+        return self._is_dir and (follow_symlinks or not self._is_symlink)
+
+
 class DummyPath(PathBase):
     """
     Simple implementation of PathBase that keeps files and directories in
@@ -1491,14 +1510,25 @@ class DummyPath(PathBase):
             stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline)
         return stream
 
-    def iterdir(self):
-        path = str(self.resolve())
-        if path in self._files:
-            raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path)
-        elif path in self._directories:
-            return iter([self / name for name in self._directories[path]])
+    @contextlib.contextmanager
+    def scandir(self):
+        path = self.resolve()
+        path_str = str(path)
+        if path_str in self._files:
+            raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path_str)
+        elif path_str in self._directories:
+            yield iter([path.joinpath(name)._dir_entry for name in self._directories[path_str]])
         else:
-            raise FileNotFoundError(errno.ENOENT, "File not found", path)
+            raise FileNotFoundError(errno.ENOENT, "File not found", path_str)
+
+    @property
+    def _dir_entry(self):
+        path_str = str(self)
+        is_symlink = path_str in self._symlinks
+        is_directory = (path_str in self._directories
+                        if not is_symlink
+                        else self._symlinks[path_str][1])
+        return DummyDirEntry(self.name, is_symlink, is_directory)
 
     def mkdir(self, mode=0o777, parents=False, exist_ok=False):
         path = str(self.parent.resolve() / self.name)
@@ -1602,7 +1632,7 @@ class DummyPathTest(DummyPurePathTest):
         if self.can_symlink:
             p.joinpath('linkA').symlink_to('fileA')
             p.joinpath('brokenLink').symlink_to('non-existing')
-            p.joinpath('linkB').symlink_to('dirB')
+            p.joinpath('linkB').symlink_to('dirB', target_is_directory=True)
             p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB'))
             p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB'))
             p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop')
@@ -2187,6 +2217,23 @@ class DummyPathTest(DummyPurePathTest):
         self.assertIn(cm.exception.errno, (errno.ENOTDIR,
                                            errno.ENOENT, errno.EINVAL))
 
+    def test_scandir(self):
+        p = self.cls(self.base)
+        with p.scandir() as entries:
+            self.assertTrue(list(entries))
+        with p.scandir() as entries:
+            for entry in entries:
+                child = p / entry.name
+                self.assertIsNotNone(entry)
+                self.assertEqual(entry.name, child.name)
+                self.assertEqual(entry.is_symlink(),
+                                 child.is_symlink())
+                self.assertEqual(entry.is_dir(follow_symlinks=False),
+                                 child.is_dir(follow_symlinks=False))
+                if entry.name != 'brokenLinkLoop':
+                    self.assertEqual(entry.is_dir(), child.is_dir())
+
+
     def test_glob_common(self):
         def _check(glob, expected):
             self.assertEqual(set(glob), { P(self.base, q) for q in expected })
@@ -3038,7 +3085,7 @@ class DummyPathWithSymlinks(DummyPath):
     def readlink(self):
         path = str(self.parent.resolve() / self.name)
         if path in self._symlinks:
-            return self.with_segments(self._symlinks[path])
+            return self.with_segments(self._symlinks[path][0])
         elif path in self._files or path in self._directories:
             raise OSError(errno.EINVAL, "Not a symlink", path)
         else:
@@ -3050,7 +3097,7 @@ class DummyPathWithSymlinks(DummyPath):
         if path in self._symlinks:
             raise FileExistsError(errno.EEXIST, "File exists", path)
         self._directories[parent].add(self.name)
-        self._symlinks[path] = str(target)
+        self._symlinks[path] = str(target), target_is_directory
 
 
 class DummyPathWithSymlinksTest(DummyPathTest):
diff --git a/Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst b/Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst
new file mode 100644 (file)
index 0000000..ddf1f97
--- /dev/null
@@ -0,0 +1,3 @@
+Add :meth:`pathlib.Path.scandir` method to efficiently fetch directory
+children and their file attributes. This is a trivial wrapper of
+:func:`os.scandir`.