1 /* SPDX-License-Identifier: LGPL-2.1+ */
8 #include <sys/sendfile.h>
12 #include "alloc-util.h"
13 #include "btrfs-util.h"
14 #include "chattr-util.h"
16 #include "dirent-util.h"
21 #include "missing_syscall.h"
22 #include "mountpoint-util.h"
23 #include "stat-util.h"
24 #include "string-util.h"
26 #include "time-util.h"
27 #include "tmpfile-util.h"
28 #include "umask-util.h"
29 #include "user-util.h"
30 #include "xattr-util.h"
32 #define COPY_BUFFER_SIZE (16U*1024U)
34 /* A safety net for descending recursively into file system trees to copy. On Linux PATH_MAX is 4096, which means the
35 * deepest valid path one can build is around 2048, which we hence use as a safety net here, to not spin endlessly in
36 * case of bind mount cycles and suchlike. */
37 #define COPY_DEPTH_MAX 2048U
39 static ssize_t
try_copy_file_range(
40 int fd_in
, loff_t
*off_in
,
41 int fd_out
, loff_t
*off_out
,
51 r
= copy_file_range(fd_in
, off_in
, fd_out
, off_out
, len
, flags
);
53 have
= r
>= 0 || errno
!= ENOSYS
;
63 FD_IS_NONBLOCKING_PIPE
,
66 static int fd_is_nonblock_pipe(int fd
) {
70 /* Checks whether the specified file descriptor refers to a pipe, and if so if O_NONBLOCK is set. */
72 if (fstat(fd
, &st
) < 0)
75 if (!S_ISFIFO(st
.st_mode
))
78 flags
= fcntl(fd
, F_GETFL
);
82 return FLAGS_SET(flags
, O_NONBLOCK
) ? FD_IS_NONBLOCKING_PIPE
: FD_IS_BLOCKING_PIPE
;
85 static int sigint_pending(void) {
88 assert_se(sigemptyset(&ss
) >= 0);
89 assert_se(sigaddset(&ss
, SIGINT
) >= 0);
91 if (sigtimedwait(&ss
, NULL
, &(struct timespec
) { 0, 0 }) < 0) {
104 CopyFlags copy_flags
,
106 size_t *ret_remains_size
,
107 copy_progress_bytes_t progress
,
110 bool try_cfr
= true, try_sendfile
= true, try_splice
= true;
111 int r
, nonblock_pipe
= -1;
112 size_t m
= SSIZE_MAX
; /* that is the maximum that sendfile and c_f_r accept */
117 /* Tries to copy bytes from the file descriptor 'fdf' to 'fdt' in the smartest possible way. Copies a maximum
118 * of 'max_bytes', which may be specified as UINT64_MAX, in which no maximum is applied. Returns negative on
119 * error, zero if EOF is hit before the bytes limit is hit and positive otherwise. If the copy fails for some
120 * reason but we read but didn't yet write some data an ret_remains/ret_remains_size is not NULL, then it will
121 * be initialized with an allocated buffer containing this "remaining" data. Note that these two parameters are
122 * initialized with a valid buffer only on failure and only if there's actually data already read. Otherwise
123 * these parameters if non-NULL are set to NULL. */
127 if (ret_remains_size
)
128 *ret_remains_size
= 0;
130 /* Try btrfs reflinks first. This only works on regular, seekable files, hence let's check the file offsets of
131 * source and destination first. */
132 if ((copy_flags
& COPY_REFLINK
)) {
135 foffset
= lseek(fdf
, 0, SEEK_CUR
);
139 toffset
= lseek(fdt
, 0, SEEK_CUR
);
142 if (foffset
== 0 && toffset
== 0 && max_bytes
== UINT64_MAX
)
143 r
= btrfs_reflink(fdf
, fdt
); /* full file reflink */
145 r
= btrfs_clone_range(fdf
, foffset
, fdt
, toffset
, max_bytes
== UINT64_MAX
? 0 : max_bytes
); /* partial reflink */
149 /* This worked, yay! Now — to be fully correct — let's adjust the file pointers */
150 if (max_bytes
== UINT64_MAX
) {
152 /* We cloned to the end of the source file, let's position the read
153 * pointer there, and query it at the same time. */
154 t
= lseek(fdf
, 0, SEEK_END
);
160 /* Let's adjust the destination file write pointer by the same number
162 t
= lseek(fdt
, toffset
+ (t
- foffset
), SEEK_SET
);
166 return 0; /* we copied the whole thing, hence hit EOF, return 0 */
168 t
= lseek(fdf
, foffset
+ max_bytes
, SEEK_SET
);
172 t
= lseek(fdt
, toffset
+ max_bytes
, SEEK_SET
);
176 return 1; /* we copied only some number of bytes, which worked, but this means we didn't hit EOF, return 1 */
187 return 1; /* return > 0 if we hit the max_bytes limit */
189 if (FLAGS_SET(copy_flags
, COPY_SIGINT
)) {
190 r
= sigint_pending();
197 if (max_bytes
!= UINT64_MAX
&& m
> max_bytes
)
200 /* First try copy_file_range(), unless we already tried */
202 n
= try_copy_file_range(fdf
, NULL
, fdt
, NULL
, m
, 0u);
204 if (!IN_SET(n
, -EINVAL
, -ENOSYS
, -EXDEV
, -EBADF
))
208 /* use fallback below */
209 } else if (n
== 0) /* EOF */
216 /* First try sendfile(), unless we already tried */
218 n
= sendfile(fdt
, fdf
, NULL
, m
);
220 if (!IN_SET(errno
, EINVAL
, ENOSYS
))
223 try_sendfile
= false;
224 /* use fallback below */
225 } else if (n
== 0) /* EOF */
232 /* Then try splice, unless we already tried. */
235 /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file
236 * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the
237 * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour here, and
238 * check if either of the specified fds are a pipe, and if so, let's pass the flag
239 * automatically, depending on O_NONBLOCK being set.
241 * Here's a twist though: when we use it to move data between two pipes of which one has
242 * O_NONBLOCK set and the other has not, then we have no individual control over O_NONBLOCK
243 * behaviour. Hence in that case we can't use splice() and still guarantee systematic
244 * O_NONBLOCK behaviour, hence don't. */
246 if (nonblock_pipe
< 0) {
249 /* Check if either of these fds is a pipe, and if so non-blocking or not */
250 a
= fd_is_nonblock_pipe(fdf
);
254 b
= fd_is_nonblock_pipe(fdt
);
258 if ((a
== FD_IS_NO_PIPE
&& b
== FD_IS_NO_PIPE
) ||
259 (a
== FD_IS_BLOCKING_PIPE
&& b
== FD_IS_NONBLOCKING_PIPE
) ||
260 (a
== FD_IS_NONBLOCKING_PIPE
&& b
== FD_IS_BLOCKING_PIPE
))
262 /* splice() only works if one of the fds is a pipe. If neither is, let's skip
263 * this step right-away. As mentioned above, if one of the two fds refers to a
264 * blocking pipe and the other to a non-blocking pipe, we can't use splice()
265 * either, hence don't try either. This hence means we can only use splice() if
266 * either only one of the two fds is a pipe, or if both are pipes with the same
267 * nonblocking flag setting. */
271 nonblock_pipe
= a
== FD_IS_NONBLOCKING_PIPE
|| b
== FD_IS_NONBLOCKING_PIPE
;
276 n
= splice(fdf
, NULL
, fdt
, NULL
, m
, nonblock_pipe
? SPLICE_F_NONBLOCK
: 0);
278 if (!IN_SET(errno
, EINVAL
, ENOSYS
))
282 /* use fallback below */
283 } else if (n
== 0) /* EOF */
290 /* As a fallback just copy bits by hand */
292 uint8_t buf
[MIN(m
, COPY_BUFFER_SIZE
)], *p
= buf
;
295 n
= read(fdf
, buf
, sizeof buf
);
298 if (n
== 0) /* EOF */
305 k
= write(fdt
, p
, z
);
319 if (ret_remains_size
)
320 *ret_remains_size
= z
;
333 r
= progress(n
, userdata
);
338 if (max_bytes
!= (uint64_t) -1) {
339 assert(max_bytes
>= (uint64_t) n
);
343 /* sendfile accepts at most SSIZE_MAX-offset bytes to copy,
344 * so reduce our maximum by the amount we already copied,
345 * but don't go below our copy buffer size, unless we are
346 * close the limit of bytes we are allowed to copy. */
347 m
= MAX(MIN(COPY_BUFFER_SIZE
, max_bytes
), m
- n
);
350 return 0; /* return 0 if we hit EOF earlier than the size limit */
353 static int fd_copy_symlink(
356 const struct stat
*st
,
361 CopyFlags copy_flags
) {
363 _cleanup_free_
char *target
= NULL
;
370 r
= readlinkat_malloc(df
, from
, &target
);
374 if (symlinkat(target
, dt
, to
) < 0)
378 uid_is_valid(override_uid
) ? override_uid
: st
->st_uid
,
379 gid_is_valid(override_gid
) ? override_gid
: st
->st_gid
,
380 AT_SYMLINK_NOFOLLOW
) < 0)
386 static int fd_copy_regular(
389 const struct stat
*st
,
394 CopyFlags copy_flags
,
395 copy_progress_bytes_t progress
,
398 _cleanup_close_
int fdf
= -1, fdt
= -1;
399 struct timespec ts
[2];
406 fdf
= openat(df
, from
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
);
410 fdt
= openat(dt
, to
, O_WRONLY
|O_CREAT
|O_EXCL
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
, st
->st_mode
& 07777);
414 r
= copy_bytes_full(fdf
, fdt
, (uint64_t) -1, copy_flags
, NULL
, NULL
, progress
, userdata
);
416 (void) unlinkat(dt
, to
, 0);
421 uid_is_valid(override_uid
) ? override_uid
: st
->st_uid
,
422 gid_is_valid(override_gid
) ? override_gid
: st
->st_gid
) < 0)
425 if (fchmod(fdt
, st
->st_mode
& 07777) < 0)
430 (void) futimens(fdt
, ts
);
431 (void) copy_xattr(fdf
, fdt
);
438 (void) unlinkat(dt
, to
, 0);
444 static int fd_copy_fifo(
447 const struct stat
*st
,
452 CopyFlags copy_flags
) {
459 r
= mkfifoat(dt
, to
, st
->st_mode
& 07777);
464 uid_is_valid(override_uid
) ? override_uid
: st
->st_uid
,
465 gid_is_valid(override_gid
) ? override_gid
: st
->st_gid
,
466 AT_SYMLINK_NOFOLLOW
) < 0)
469 if (fchmodat(dt
, to
, st
->st_mode
& 07777, 0) < 0)
475 static int fd_copy_node(
478 const struct stat
*st
,
483 CopyFlags copy_flags
) {
490 r
= mknodat(dt
, to
, st
->st_mode
, st
->st_rdev
);
495 uid_is_valid(override_uid
) ? override_uid
: st
->st_uid
,
496 gid_is_valid(override_gid
) ? override_gid
: st
->st_gid
,
497 AT_SYMLINK_NOFOLLOW
) < 0)
500 if (fchmodat(dt
, to
, st
->st_mode
& 07777, 0) < 0)
506 static int fd_copy_directory(
509 const struct stat
*st
,
512 dev_t original_device
,
516 CopyFlags copy_flags
,
517 const char *display_path
,
518 copy_progress_path_t progress_path
,
519 copy_progress_bytes_t progress_bytes
,
522 _cleanup_close_
int fdf
= -1, fdt
= -1;
523 _cleanup_closedir_
DIR *d
= NULL
;
525 bool exists
, created
;
532 return -ENAMETOOLONG
;
535 fdf
= openat(df
, from
, O_RDONLY
|O_DIRECTORY
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
);
537 fdf
= fcntl(df
, F_DUPFD_CLOEXEC
, 3);
547 if (copy_flags
& COPY_MERGE_EMPTY
) {
548 r
= dir_is_empty_at(dt
, to
);
549 if (r
< 0 && r
!= -ENOENT
)
558 r
= mkdirat(dt
, to
, st
->st_mode
& 07777);
561 else if (errno
== EEXIST
&& (copy_flags
& COPY_MERGE
))
567 fdt
= openat(dt
, to
, O_RDONLY
|O_DIRECTORY
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
);
573 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
574 const char *child_display_path
= NULL
;
575 _cleanup_free_
char *dp
= NULL
;
579 if (dot_or_dot_dot(de
->d_name
))
582 if (FLAGS_SET(copy_flags
, COPY_SIGINT
)) {
583 r
= sigint_pending();
590 if (fstatat(dirfd(d
), de
->d_name
, &buf
, AT_SYMLINK_NOFOLLOW
) < 0) {
597 child_display_path
= dp
= path_join(display_path
, de
->d_name
);
599 child_display_path
= de
->d_name
;
601 r
= progress_path(child_display_path
, &buf
, userdata
);
606 if (S_ISDIR(buf
.st_mode
)) {
608 * Don't descend into directories on other file systems, if this is requested. We do a simple
609 * .st_dev check here, which basically comes for free. Note that we do this check only on
610 * directories, not other kind of file system objects, for two reason:
612 * • The kernel's overlayfs pseudo file system that overlays multiple real file systems
613 * propagates the .st_dev field of the file system a file originates from all the way up
614 * through the stack to stat(). It doesn't do that for directories however. This means that
615 * comparing .st_dev on non-directories suggests that they all are mount points. To avoid
616 * confusion we hence avoid relying on this check for regular files.
618 * • The main reason we do this check at all is to protect ourselves from bind mount cycles,
619 * where we really want to avoid descending down in all eternity. However the .st_dev check
620 * is usually not sufficient for this protection anyway, as bind mount cycles from the same
621 * file system onto itself can't be detected that way. (Note we also do a recursion depth
622 * check, which is probably the better protection in this regard, which is why
623 * COPY_SAME_MOUNT is optional).
626 if (FLAGS_SET(copy_flags
, COPY_SAME_MOUNT
)) {
627 if (buf
.st_dev
!= original_device
)
630 r
= fd_is_mount_point(dirfd(d
), de
->d_name
, 0);
637 q
= fd_copy_directory(dirfd(d
), de
->d_name
, &buf
, fdt
, de
->d_name
, original_device
, depth_left
-1, override_uid
, override_gid
, copy_flags
, child_display_path
, progress_path
, progress_bytes
, userdata
);
638 } else if (S_ISREG(buf
.st_mode
))
639 q
= fd_copy_regular(dirfd(d
), de
->d_name
, &buf
, fdt
, de
->d_name
, override_uid
, override_gid
, copy_flags
, progress_bytes
, userdata
);
640 else if (S_ISLNK(buf
.st_mode
))
641 q
= fd_copy_symlink(dirfd(d
), de
->d_name
, &buf
, fdt
, de
->d_name
, override_uid
, override_gid
, copy_flags
);
642 else if (S_ISFIFO(buf
.st_mode
))
643 q
= fd_copy_fifo(dirfd(d
), de
->d_name
, &buf
, fdt
, de
->d_name
, override_uid
, override_gid
, copy_flags
);
644 else if (S_ISBLK(buf
.st_mode
) || S_ISCHR(buf
.st_mode
) || S_ISSOCK(buf
.st_mode
))
645 q
= fd_copy_node(dirfd(d
), de
->d_name
, &buf
, fdt
, de
->d_name
, override_uid
, override_gid
, copy_flags
);
649 if (q
== -EINTR
) /* Propagate SIGINT up instantly */
651 if (q
== -EEXIST
&& (copy_flags
& COPY_MERGE
))
658 struct timespec ut
[2] = {
664 uid_is_valid(override_uid
) ? override_uid
: st
->st_uid
,
665 gid_is_valid(override_gid
) ? override_gid
: st
->st_gid
) < 0)
668 if (fchmod(fdt
, st
->st_mode
& 07777) < 0)
671 (void) copy_xattr(dirfd(d
), fdt
);
672 (void) futimens(fdt
, ut
);
678 int copy_tree_at_full(
685 CopyFlags copy_flags
,
686 copy_progress_path_t progress_path
,
687 copy_progress_bytes_t progress_bytes
,
695 if (fstatat(fdf
, from
, &st
, AT_SYMLINK_NOFOLLOW
) < 0)
698 if (S_ISREG(st
.st_mode
))
699 return fd_copy_regular(fdf
, from
, &st
, fdt
, to
, override_uid
, override_gid
, copy_flags
, progress_bytes
, userdata
);
700 else if (S_ISDIR(st
.st_mode
))
701 return fd_copy_directory(fdf
, from
, &st
, fdt
, to
, st
.st_dev
, COPY_DEPTH_MAX
, override_uid
, override_gid
, copy_flags
, NULL
, progress_path
, progress_bytes
, userdata
);
702 else if (S_ISLNK(st
.st_mode
))
703 return fd_copy_symlink(fdf
, from
, &st
, fdt
, to
, override_uid
, override_gid
, copy_flags
);
704 else if (S_ISFIFO(st
.st_mode
))
705 return fd_copy_fifo(fdf
, from
, &st
, fdt
, to
, override_uid
, override_gid
, copy_flags
);
706 else if (S_ISBLK(st
.st_mode
) || S_ISCHR(st
.st_mode
) || S_ISSOCK(st
.st_mode
))
707 return fd_copy_node(fdf
, from
, &st
, fdt
, to
, override_uid
, override_gid
, copy_flags
);
712 int copy_directory_fd_full(
715 CopyFlags copy_flags
,
716 copy_progress_path_t progress_path
,
717 copy_progress_bytes_t progress_bytes
,
725 if (fstat(dirfd
, &st
) < 0)
728 if (!S_ISDIR(st
.st_mode
))
731 return fd_copy_directory(dirfd
, NULL
, &st
, AT_FDCWD
, to
, st
.st_dev
, COPY_DEPTH_MAX
, UID_INVALID
, GID_INVALID
, copy_flags
, NULL
, progress_path
, progress_bytes
, userdata
);
734 int copy_directory_full(
737 CopyFlags copy_flags
,
738 copy_progress_path_t progress_path
,
739 copy_progress_bytes_t progress_bytes
,
747 if (lstat(from
, &st
) < 0)
750 if (!S_ISDIR(st
.st_mode
))
753 return fd_copy_directory(AT_FDCWD
, from
, &st
, AT_FDCWD
, to
, st
.st_dev
, COPY_DEPTH_MAX
, UID_INVALID
, GID_INVALID
, copy_flags
, NULL
, progress_path
, progress_bytes
, userdata
);
756 int copy_file_fd_full(
759 CopyFlags copy_flags
,
760 copy_progress_bytes_t progress_bytes
,
763 _cleanup_close_
int fdf
= -1;
769 fdf
= open(from
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
773 r
= copy_bytes_full(fdf
, fdt
, (uint64_t) -1, copy_flags
, NULL
, NULL
, progress_bytes
, userdata
);
775 (void) copy_times(fdf
, fdt
, copy_flags
);
776 (void) copy_xattr(fdf
, fdt
);
786 unsigned chattr_flags
,
787 unsigned chattr_mask
,
788 CopyFlags copy_flags
,
789 copy_progress_bytes_t progress_bytes
,
797 RUN_WITH_UMASK(0000) {
798 fdt
= open(to
, flags
|O_WRONLY
|O_CREAT
|O_CLOEXEC
|O_NOCTTY
, mode
);
803 if (chattr_mask
!= 0)
804 (void) chattr_fd(fdt
, chattr_flags
, chattr_mask
& CHATTR_EARLY_FL
, NULL
);
806 r
= copy_file_fd_full(from
, fdt
, copy_flags
, progress_bytes
, userdata
);
813 if (chattr_mask
!= 0)
814 (void) chattr_fd(fdt
, chattr_flags
, chattr_mask
& ~CHATTR_EARLY_FL
, NULL
);
816 if (close(fdt
) < 0) {
824 int copy_file_atomic_full(
828 unsigned chattr_flags
,
829 unsigned chattr_mask
,
830 CopyFlags copy_flags
,
831 copy_progress_bytes_t progress_bytes
,
834 _cleanup_(unlink_and_freep
) char *t
= NULL
;
835 _cleanup_close_
int fdt
= -1;
841 /* We try to use O_TMPFILE here to create the file if we can. Note that that only works if COPY_REPLACE is not
842 * set though as we need to use linkat() for linking the O_TMPFILE file into the file system but that system
843 * call can't replace existing files. Hence, if COPY_REPLACE is set we create a temporary name in the file
844 * system right-away and unconditionally which we then can renameat() to the right name after we completed
847 if (copy_flags
& COPY_REPLACE
) {
848 r
= tempfn_random(to
, NULL
, &t
);
852 fdt
= open(t
, O_CREAT
|O_EXCL
|O_NOFOLLOW
|O_NOCTTY
|O_WRONLY
|O_CLOEXEC
, 0600);
858 fdt
= open_tmpfile_linkable(to
, O_WRONLY
|O_CLOEXEC
, &t
);
863 if (chattr_mask
!= 0)
864 (void) chattr_fd(fdt
, chattr_flags
, chattr_mask
& CHATTR_EARLY_FL
, NULL
);
866 r
= copy_file_fd_full(from
, fdt
, copy_flags
, progress_bytes
, userdata
);
870 if (fchmod(fdt
, mode
) < 0)
873 if (copy_flags
& COPY_REPLACE
) {
874 if (renameat(AT_FDCWD
, t
, AT_FDCWD
, to
) < 0)
877 r
= link_tmpfile(fdt
, t
, to
);
882 if (chattr_mask
!= 0)
883 (void) chattr_fd(fdt
, chattr_flags
, chattr_mask
& ~CHATTR_EARLY_FL
, NULL
);
889 int copy_times(int fdf
, int fdt
, CopyFlags flags
) {
890 struct timespec ut
[2];
896 if (fstat(fdf
, &st
) < 0)
902 if (futimens(fdt
, ut
) < 0)
905 if (FLAGS_SET(flags
, COPY_CRTIME
)) {
908 if (fd_getcrtime(fdf
, &crtime
) >= 0)
909 (void) fd_setcrtime(fdt
, crtime
);
915 int copy_xattr(int fdf
, int fdt
) {
916 _cleanup_free_
char *bufa
= NULL
, *bufb
= NULL
;
917 size_t sza
= 100, szb
= 100;
927 n
= flistxattr(fdf
, bufa
, sza
);
945 assert(l
< (size_t) n
);
947 if (startswith(p
, "user.")) {
956 m
= fgetxattr(fdf
, p
, bufb
, szb
);
958 if (errno
== ERANGE
) {
967 if (fsetxattr(fdt
, p
, bufb
, m
, 0) < 0)