]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-73991: Add `pathlib.Path.copy()` (#119058)
authorBarney Gale <barney.gale@gmail.com>
Fri, 14 Jun 2024 16:15:49 +0000 (17:15 +0100)
committerGitHub <noreply@github.com>
Fri, 14 Jun 2024 16:15:49 +0000 (17:15 +0100)
Add a `Path.copy()` method that copies the content of one file to another.

This method is similar to `shutil.copyfile()` but differs in the following ways:

- Uses `fcntl.FICLONE` where available (see GH-81338)
- Uses `os.copy_file_range` where available (see GH-81340)
- Uses `_winapi.CopyFile2` where available, even though this copies more metadata than the other implementations. This makes `WindowsPath.copy()` more similar to `shutil.copy2()`.

The method is presently _less_ specified than the `shutil` functions to allow OS-specific optimizations that might copy more or less metadata.

Incorporates code from GH-81338 and GH-93152.

Co-authored-by: Eryk Sun <eryksun@gmail.com>
Doc/library/pathlib.rst
Doc/whatsnew/3.14.rst
Lib/pathlib/_abc.py
Lib/pathlib/_local.py
Lib/pathlib/_os.py [new file with mode: 0644]
Lib/test/test_pathlib/test_pathlib_abc.py
Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst [new file with mode: 0644]

index 278851549c6c3bb5beb0e6b8f7c197c7986f9558..c8a3272d7bab4c71b5b587104706b2c839816f4e 100644 (file)
@@ -1429,8 +1429,22 @@ Creating files and directories
       available. In previous versions, :exc:`NotImplementedError` was raised.
 
 
-Renaming and deleting
-^^^^^^^^^^^^^^^^^^^^^
+Copying, renaming and deleting
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. method:: Path.copy(target)
+
+   Copy the contents of this file to the *target* file. If *target* specifies
+   a file that already exists, it will be replaced.
+
+   .. note::
+      This method uses operating system functionality to copy file content
+      efficiently. The OS might also copy some metadata, such as file
+      permissions. After the copy is complete, users may wish to call
+      :meth:`Path.chmod` to set the permissions of the target file.
+
+   .. versionadded:: 3.14
+
 
 .. method:: Path.rename(target)
 
index b357553735e8bbfe6e4ab7e3c119f60346b66087..a102af13a08362652a442ac3aea82d001d274057 100644 (file)
@@ -100,6 +100,13 @@ os
   by :func:`os.unsetenv`, or made outside Python in the same process.
   (Contributed by Victor Stinner in :gh:`120057`.)
 
+pathlib
+-------
+
+* Add :meth:`pathlib.Path.copy`, which copies the content of one file to
+  another, like :func:`shutil.copyfile`.
+  (Contributed by Barney Gale in :gh:`73991`.)
+
 symtable
 --------
 
index ecea8e88d1a2e3ad34974c861f9065f1bd1d2cdd..586145ead384ea6e1d25408bbb30b97f47b0abc5 100644 (file)
@@ -16,6 +16,7 @@ import operator
 import posixpath
 from glob import _GlobberBase, _no_recurse_symlinks
 from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
+from ._os import copyfileobj
 
 
 __all__ = ["UnsupportedOperation"]
@@ -563,6 +564,15 @@ class PathBase(PurePathBase):
         return (st.st_ino == other_st.st_ino and
                 st.st_dev == other_st.st_dev)
 
+    def _samefile_safe(self, other_path):
+        """
+        Like samefile(), but returns False rather than raising OSError.
+        """
+        try:
+            return self.samefile(other_path)
+        except (OSError, ValueError):
+            return False
+
     def open(self, mode='r', buffering=-1, encoding=None,
              errors=None, newline=None):
         """
@@ -780,6 +790,26 @@ class PathBase(PurePathBase):
         """
         raise UnsupportedOperation(self._unsupported_msg('mkdir()'))
 
+    def copy(self, target):
+        """
+        Copy the contents of this file to the given target.
+        """
+        if not isinstance(target, PathBase):
+            target = self.with_segments(target)
+        if self._samefile_safe(target):
+            raise OSError(f"{self!r} and {target!r} are the same file")
+        with self.open('rb') as source_f:
+            try:
+                with target.open('wb') as target_f:
+                    copyfileobj(source_f, target_f)
+            except IsADirectoryError as e:
+                if not target.exists():
+                    # Raise a less confusing exception.
+                    raise FileNotFoundError(
+                        f'Directory does not exist: {target}') from e
+                else:
+                    raise
+
     def rename(self, target):
         """
         Rename this path to the target path.
index 473fd525768b509b6dc5f59237a4258bb6d531d1..cffed10dbd12076cec4ca5030c6f94a10d7c0ffc 100644 (file)
@@ -18,6 +18,7 @@ except ImportError:
     grp = None
 
 from ._abc import UnsupportedOperation, PurePathBase, PathBase
+from ._os import copyfile
 
 
 __all__ = [
@@ -780,6 +781,21 @@ class Path(PathBase, PurePath):
             if not exist_ok or not self.is_dir():
                 raise
 
+    if copyfile:
+        def copy(self, target):
+            """
+            Copy the contents of this file to the given target.
+            """
+            try:
+                target = os.fspath(target)
+            except TypeError:
+                if isinstance(target, PathBase):
+                    # Target is an instance of PathBase but not os.PathLike.
+                    # Use generic implementation from PathBase.
+                    return PathBase.copy(self, target)
+                raise
+            copyfile(os.fspath(self), target)
+
     def chmod(self, mode, *, follow_symlinks=True):
         """
         Change the permissions of the path, like os.chmod().
diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py
new file mode 100644 (file)
index 0000000..1771d54
--- /dev/null
@@ -0,0 +1,138 @@
+"""
+Low-level OS functionality wrappers used by pathlib.
+"""
+
+from errno import EBADF, EOPNOTSUPP, ETXTBSY, EXDEV
+import os
+import sys
+try:
+    import fcntl
+except ImportError:
+    fcntl = None
+try:
+    import posix
+except ImportError:
+    posix = None
+try:
+    import _winapi
+except ImportError:
+    _winapi = None
+
+
+def get_copy_blocksize(infd):
+    """Determine blocksize for fastcopying on Linux.
+    Hopefully the whole file will be copied in a single call.
+    The copying itself should be performed in a loop 'till EOF is
+    reached (0 return) so a blocksize smaller or bigger than the actual
+    file size should not make any difference, also in case the file
+    content changes while being copied.
+    """
+    try:
+        blocksize = max(os.fstat(infd).st_size, 2 ** 23)  # min 8 MiB
+    except OSError:
+        blocksize = 2 ** 27  # 128 MiB
+    # On 32-bit architectures truncate to 1 GiB to avoid OverflowError,
+    # see gh-82500.
+    if sys.maxsize < 2 ** 32:
+        blocksize = min(blocksize, 2 ** 30)
+    return blocksize
+
+
+if fcntl and hasattr(fcntl, 'FICLONE'):
+    def clonefd(source_fd, target_fd):
+        """
+        Perform a lightweight copy of two files, where the data blocks are
+        copied only when modified. This is known as Copy on Write (CoW),
+        instantaneous copy or reflink.
+        """
+        fcntl.ioctl(target_fd, fcntl.FICLONE, source_fd)
+else:
+    clonefd = None
+
+
+if posix and hasattr(posix, '_fcopyfile'):
+    def copyfd(source_fd, target_fd):
+        """
+        Copy a regular file content using high-performance fcopyfile(3)
+        syscall (macOS).
+        """
+        posix._fcopyfile(source_fd, target_fd, posix._COPYFILE_DATA)
+elif hasattr(os, 'copy_file_range'):
+    def copyfd(source_fd, target_fd):
+        """
+        Copy data from one regular mmap-like fd to another by using a
+        high-performance copy_file_range(2) syscall that gives filesystems
+        an opportunity to implement the use of reflinks or server-side
+        copy.
+        This should work on Linux >= 4.5 only.
+        """
+        blocksize = get_copy_blocksize(source_fd)
+        offset = 0
+        while True:
+            sent = os.copy_file_range(source_fd, target_fd, blocksize,
+                                      offset_dst=offset)
+            if sent == 0:
+                break  # EOF
+            offset += sent
+elif hasattr(os, 'sendfile'):
+    def copyfd(source_fd, target_fd):
+        """Copy data from one regular mmap-like fd to another by using
+        high-performance sendfile(2) syscall.
+        This should work on Linux >= 2.6.33 only.
+        """
+        blocksize = get_copy_blocksize(source_fd)
+        offset = 0
+        while True:
+            sent = os.sendfile(target_fd, source_fd, offset, blocksize)
+            if sent == 0:
+                break  # EOF
+            offset += sent
+else:
+    copyfd = None
+
+
+if _winapi and hasattr(_winapi, 'CopyFile2'):
+    def copyfile(source, target):
+        """
+        Copy from one file to another using CopyFile2 (Windows only).
+        """
+        _winapi.CopyFile2(source, target, 0)
+else:
+    copyfile = None
+
+
+def copyfileobj(source_f, target_f):
+    """
+    Copy data from file-like object source_f to file-like object target_f.
+    """
+    try:
+        source_fd = source_f.fileno()
+        target_fd = target_f.fileno()
+    except Exception:
+        pass  # Fall through to generic code.
+    else:
+        try:
+            # Use OS copy-on-write where available.
+            if clonefd:
+                try:
+                    clonefd(source_fd, target_fd)
+                    return
+                except OSError as err:
+                    if err.errno not in (EBADF, EOPNOTSUPP, ETXTBSY, EXDEV):
+                        raise err
+
+            # Use OS copy where available.
+            if copyfd:
+                copyfd(source_fd, target_fd)
+                return
+        except OSError as err:
+            # Produce more useful error messages.
+            err.filename = source_f.name
+            err.filename2 = target_f.name
+            raise err
+
+    # Last resort: copy with fileobj read() and write().
+    read_source = source_f.read
+    write_target = target_f.write
+    while buf := read_source(1024 * 1024):
+        write_target(buf)
index 57cc1612c03468af47ebc6927a665f464dc3d952..fd71284159d5c0d4d78c5d1447d861c451d27bd5 100644 (file)
@@ -1696,6 +1696,68 @@ class DummyPathTest(DummyPurePathTest):
         self.assertEqual((p / 'fileA').read_bytes(),
                           b'abcde' + os_linesep_byte + b'fghlk' + os_linesep_byte + b'\rmnopq')
 
+    def test_copy_file(self):
+        base = self.cls(self.base)
+        source = base / 'fileA'
+        target = base / 'copyA'
+        source.copy(target)
+        self.assertTrue(target.exists())
+        self.assertEqual(source.read_text(), target.read_text())
+
+    def test_copy_directory(self):
+        base = self.cls(self.base)
+        source = base / 'dirA'
+        target = base / 'copyA'
+        with self.assertRaises(OSError):
+            source.copy(target)
+
+    @needs_symlinks
+    def test_copy_symlink(self):
+        base = self.cls(self.base)
+        source = base / 'linkA'
+        target = base / 'copyA'
+        source.copy(target)
+        self.assertTrue(target.exists())
+        self.assertFalse(target.is_symlink())
+        self.assertEqual(source.read_text(), target.read_text())
+
+    def test_copy_to_existing_file(self):
+        base = self.cls(self.base)
+        source = base / 'fileA'
+        target = base / 'dirB' / 'fileB'
+        source.copy(target)
+        self.assertTrue(target.exists())
+        self.assertEqual(source.read_text(), target.read_text())
+
+    def test_copy_to_existing_directory(self):
+        base = self.cls(self.base)
+        source = base / 'fileA'
+        target = base / 'dirA'
+        with self.assertRaises(OSError):
+            source.copy(target)
+
+    @needs_symlinks
+    def test_copy_to_existing_symlink(self):
+        base = self.cls(self.base)
+        source = base / 'dirB' / 'fileB'
+        target = base / 'linkA'
+        real_target = base / 'fileA'
+        source.copy(target)
+        self.assertTrue(target.exists())
+        self.assertTrue(target.is_symlink())
+        self.assertTrue(real_target.exists())
+        self.assertFalse(real_target.is_symlink())
+        self.assertEqual(source.read_text(), real_target.read_text())
+
+    def test_copy_empty(self):
+        base = self.cls(self.base)
+        source = base / 'empty'
+        target = base / 'copyA'
+        source.write_bytes(b'')
+        source.copy(target)
+        self.assertTrue(target.exists())
+        self.assertEqual(target.read_bytes(), b'')
+
     def test_iterdir(self):
         P = self.cls
         p = P(self.base)
diff --git a/Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst b/Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst
new file mode 100644 (file)
index 0000000..c2953c6
--- /dev/null
@@ -0,0 +1,2 @@
+Add :meth:`pathlib.Path.copy`, which copies the content of one file to another,
+like :func:`shutil.copyfile`.