1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
5 #include <linux/btrfs.h>
10 #include <sys/ioctl.h>
11 #include <sys/sendfile.h>
12 #include <sys/xattr.h>
15 #include "alloc-util.h"
16 #include "btrfs-util.h"
17 #include "chattr-util.h"
19 #include "dirent-util.h"
25 #include "missing_fs.h"
26 #include "missing_syscall.h"
27 #include "mkdir-label.h"
28 #include "mountpoint-util.h"
29 #include "nulstr-util.h"
31 #include "selinux-util.h"
32 #include "signal-util.h"
33 #include "stat-util.h"
34 #include "stdio-util.h"
35 #include "string-util.h"
37 #include "sync-util.h"
38 #include "time-util.h"
39 #include "tmpfile-util.h"
40 #include "umask-util.h"
41 #include "user-util.h"
42 #include "xattr-util.h"
44 #define COPY_BUFFER_SIZE (16U*1024U)
46 /* A safety net for descending recursively into file system trees to copy. On Linux PATH_MAX is 4096, which means the
47 * deepest valid path one can build is around 2048, which we hence use as a safety net here, to not spin endlessly in
48 * case of bind mount cycles and suchlike. */
49 #define COPY_DEPTH_MAX 2048U
51 static ssize_t
try_copy_file_range(
52 int fd_in
, loff_t
*off_in
,
53 int fd_out
, loff_t
*off_out
,
63 r
= copy_file_range(fd_in
, off_in
, fd_out
, off_out
, len
, flags
);
65 have
= r
>= 0 || errno
!= ENOSYS
;
75 FD_IS_NONBLOCKING_PIPE
,
78 static int fd_is_nonblock_pipe(int fd
) {
82 /* Checks whether the specified file descriptor refers to a pipe, and if so if O_NONBLOCK is set. */
84 if (fstat(fd
, &st
) < 0)
87 if (!S_ISFIFO(st
.st_mode
))
90 flags
= fcntl(fd
, F_GETFL
);
94 return FLAGS_SET(flags
, O_NONBLOCK
) ? FD_IS_NONBLOCKING_PIPE
: FD_IS_BLOCKING_PIPE
;
97 static int look_for_signals(CopyFlags copy_flags
) {
100 if ((copy_flags
& (COPY_SIGINT
|COPY_SIGTERM
)) == 0)
103 r
= pop_pending_signal(copy_flags
& COPY_SIGINT
? SIGINT
: 0,
104 copy_flags
& COPY_SIGTERM
? SIGTERM
: 0);
108 return log_debug_errno(SYNTHETIC_ERRNO(EINTR
),
109 "Got %s, cancelling copy operation.", signal_to_string(r
));
114 static int create_hole(int fd
, off_t size
) {
118 offset
= lseek(fd
, 0, SEEK_CUR
);
122 end
= lseek(fd
, 0, SEEK_END
);
126 /* If we're not at the end of the target file, try to punch a hole in the existing space using fallocate(). */
129 fallocate(fd
, FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
, offset
, MIN(size
, end
- offset
)) < 0 &&
130 !ERRNO_IS_NOT_SUPPORTED(errno
))
133 if (end
- offset
>= size
) {
134 /* If we've created the full hole, set the file pointer to the end of the hole we created and exit. */
135 if (lseek(fd
, offset
+ size
, SEEK_SET
) < 0)
141 /* If we haven't created the full hole, use ftruncate() to grow the file (and the hole) to the
142 * required size and move the file pointer to the end of the file. */
144 size
-= end
- offset
;
146 if (ftruncate(fd
, end
+ size
) < 0)
149 if (lseek(fd
, 0, SEEK_END
) < 0)
158 CopyFlags copy_flags
,
160 size_t *ret_remains_size
,
161 copy_progress_bytes_t progress
,
164 _cleanup_close_
int fdf_opened
= -EBADF
, fdt_opened
= -EBADF
;
165 bool try_cfr
= true, try_sendfile
= true, try_splice
= true, copied_something
= false;
166 int r
, nonblock_pipe
= -1;
167 size_t m
= SSIZE_MAX
; /* that is the maximum that sendfile and c_f_r accept */
171 assert(!FLAGS_SET(copy_flags
, COPY_LOCK_BSD
));
173 /* Tries to copy bytes from the file descriptor 'fdf' to 'fdt' in the smartest possible way. Copies a maximum
174 * of 'max_bytes', which may be specified as UINT64_MAX, in which no maximum is applied. Returns negative on
175 * error, zero if EOF is hit before the bytes limit is hit and positive otherwise. If the copy fails for some
176 * reason but we read but didn't yet write some data an ret_remains/ret_remains_size is not NULL, then it will
177 * be initialized with an allocated buffer containing this "remaining" data. Note that these two parameters are
178 * initialized with a valid buffer only on failure and only if there's actually data already read. Otherwise
179 * these parameters if non-NULL are set to NULL. */
183 if (ret_remains_size
)
184 *ret_remains_size
= 0;
186 fdf
= fd_reopen_condition(fdf
, O_CLOEXEC
| O_NOCTTY
| O_RDONLY
, O_PATH
, &fdf_opened
);
189 fdt
= fd_reopen_condition(fdt
, O_CLOEXEC
| O_NOCTTY
| O_RDWR
, O_PATH
, &fdt_opened
);
193 /* Try btrfs reflinks first. This only works on regular, seekable files, hence let's check the file offsets of
194 * source and destination first. */
195 if ((copy_flags
& COPY_REFLINK
)) {
198 foffset
= lseek(fdf
, 0, SEEK_CUR
);
202 toffset
= lseek(fdt
, 0, SEEK_CUR
);
205 if (foffset
== 0 && toffset
== 0 && max_bytes
== UINT64_MAX
)
206 r
= reflink(fdf
, fdt
); /* full file reflink */
208 r
= reflink_range(fdf
, foffset
, fdt
, toffset
, max_bytes
== UINT64_MAX
? 0 : max_bytes
); /* partial reflink */
212 /* This worked, yay! Now — to be fully correct — let's adjust the file pointers */
213 if (max_bytes
== UINT64_MAX
) {
215 /* We cloned to the end of the source file, let's position the read
216 * pointer there, and query it at the same time. */
217 t
= lseek(fdf
, 0, SEEK_END
);
223 /* Let's adjust the destination file write pointer by the same number
225 t
= lseek(fdt
, toffset
+ (t
- foffset
), SEEK_SET
);
229 return 0; /* we copied the whole thing, hence hit EOF, return 0 */
231 t
= lseek(fdf
, foffset
+ max_bytes
, SEEK_SET
);
235 t
= lseek(fdt
, toffset
+ max_bytes
, SEEK_SET
);
239 return 1; /* we copied only some number of bytes, which worked, but this means we didn't hit EOF, return 1 */
252 r
= look_for_signals(copy_flags
);
256 if (max_bytes
!= UINT64_MAX
&& m
> max_bytes
)
259 if (copy_flags
& COPY_HOLES
) {
262 c
= lseek(fdf
, 0, SEEK_CUR
);
266 /* To see if we're in a hole, we search for the next data offset. */
267 e
= lseek(fdf
, c
, SEEK_DATA
);
268 if (e
< 0 && errno
== ENXIO
)
269 /* If errno == ENXIO, that means we've reached the final hole of the file and
270 * that hole isn't followed by more data. */
271 e
= lseek(fdf
, 0, SEEK_END
);
275 /* If we're in a hole (current offset is not a data offset), create a hole of the
276 * same size in the target file. */
278 /* Make sure our new hole doesn't go over the maximum size we're allowed to copy. */
279 n
= MIN(max_bytes
, (uint64_t) e
- c
);
280 r
= create_hole(fdt
, n
);
284 /* Make sure holes are taken into account in the maximum size we're supposed to copy. */
285 if (max_bytes
!= UINT64_MAX
) {
291 /* Update the size we're supposed to copy in this iteration if needed. */
296 c
= e
; /* Set c to the start of the data segment. */
298 /* After copying a potential hole, find the end of the data segment by looking for
299 * the next hole. If we get ENXIO, we're at EOF. */
300 e
= lseek(fdf
, c
, SEEK_HOLE
);
307 /* SEEK_HOLE modifies the file offset so we need to move back to the initial offset. */
308 if (lseek(fdf
, c
, SEEK_SET
) < 0)
311 /* Make sure we're not copying more than the current data segment. */
312 m
= MIN(m
, (size_t) e
- c
);
315 /* First try copy_file_range(), unless we already tried */
317 n
= try_copy_file_range(fdf
, NULL
, fdt
, NULL
, m
, 0u);
319 if (!IN_SET(n
, -EINVAL
, -ENOSYS
, -EXDEV
, -EBADF
))
323 /* use fallback below */
324 } else if (n
== 0) { /* likely EOF */
326 if (copied_something
)
329 /* So, we hit EOF immediately, without having copied a single byte. This
330 * could indicate two things: the file is actually empty, or we are on some
331 * virtual file system such as procfs/sysfs where the syscall actually
332 * doesn't work but doesn't return an error. Try to handle that, by falling
333 * back to simple read()s in case we encounter empty files.
335 * See: https://lwn.net/Articles/846403/ */
336 try_cfr
= try_sendfile
= try_splice
= false;
342 /* First try sendfile(), unless we already tried */
344 n
= sendfile(fdt
, fdf
, NULL
, m
);
346 if (!IN_SET(errno
, EINVAL
, ENOSYS
))
349 try_sendfile
= false;
350 /* use fallback below */
351 } else if (n
== 0) { /* likely EOF */
353 if (copied_something
)
356 try_sendfile
= try_splice
= false; /* same logic as above for copy_file_range() */
362 /* Then try splice, unless we already tried. */
365 /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file
366 * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the
367 * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour
368 * here, and check if either of the specified fds are a pipe, and if so, let's pass
369 * the flag automatically, depending on O_NONBLOCK being set.
371 * Here's a twist though: when we use it to move data between two pipes of which one
372 * has O_NONBLOCK set and the other has not, then we have no individual control over
373 * O_NONBLOCK behaviour. Hence in that case we can't use splice() and still guarantee
374 * systematic O_NONBLOCK behaviour, hence don't. */
376 if (nonblock_pipe
< 0) {
379 /* Check if either of these fds is a pipe, and if so non-blocking or not */
380 a
= fd_is_nonblock_pipe(fdf
);
384 b
= fd_is_nonblock_pipe(fdt
);
388 if ((a
== FD_IS_NO_PIPE
&& b
== FD_IS_NO_PIPE
) ||
389 (a
== FD_IS_BLOCKING_PIPE
&& b
== FD_IS_NONBLOCKING_PIPE
) ||
390 (a
== FD_IS_NONBLOCKING_PIPE
&& b
== FD_IS_BLOCKING_PIPE
))
392 /* splice() only works if one of the fds is a pipe. If neither is,
393 * let's skip this step right-away. As mentioned above, if one of the
394 * two fds refers to a blocking pipe and the other to a non-blocking
395 * pipe, we can't use splice() either, hence don't try either. This
396 * hence means we can only use splice() if either only one of the two
397 * fds is a pipe, or if both are pipes with the same nonblocking flag
402 nonblock_pipe
= a
== FD_IS_NONBLOCKING_PIPE
|| b
== FD_IS_NONBLOCKING_PIPE
;
407 n
= splice(fdf
, NULL
, fdt
, NULL
, m
, nonblock_pipe
? SPLICE_F_NONBLOCK
: 0);
409 if (!IN_SET(errno
, EINVAL
, ENOSYS
))
413 /* use fallback below */
414 } else if (n
== 0) { /* likely EOF */
416 if (copied_something
)
419 try_splice
= false; /* same logic as above for copy_file_range() + sendfile() */
425 /* As a fallback just copy bits by hand */
427 uint8_t buf
[MIN(m
, COPY_BUFFER_SIZE
)], *p
= buf
;
430 n
= read(fdf
, buf
, sizeof buf
);
433 if (n
== 0) /* EOF */
440 k
= write(fdt
, p
, z
);
454 if (ret_remains_size
)
455 *ret_remains_size
= z
;
468 r
= progress(n
, userdata
);
473 if (max_bytes
!= UINT64_MAX
) {
474 assert(max_bytes
>= (uint64_t) n
);
478 /* sendfile accepts at most SSIZE_MAX-offset bytes to copy, so reduce our maximum by the
479 * amount we already copied, but don't go below our copy buffer size, unless we are close the
480 * limit of bytes we are allowed to copy. */
481 m
= MAX(MIN(COPY_BUFFER_SIZE
, max_bytes
), m
- n
);
483 copied_something
= true;
486 if (copy_flags
& COPY_TRUNCATE
) {
487 off_t off
= lseek(fdt
, 0, SEEK_CUR
);
491 if (ftruncate(fdt
, off
) < 0)
495 return max_bytes
<= 0; /* return 0 if we hit EOF earlier than the size limit */
498 static int fd_copy_symlink(
501 const struct stat
*st
,
506 CopyFlags copy_flags
) {
508 _cleanup_free_
char *target
= NULL
;
515 r
= readlinkat_malloc(df
, from
, &target
);
519 if (copy_flags
& COPY_MAC_CREATE
) {
520 r
= mac_selinux_create_file_prepare_at(dt
, to
, S_IFLNK
);
524 r
= RET_NERRNO(symlinkat(target
, dt
, to
));
525 if (copy_flags
& COPY_MAC_CREATE
)
526 mac_selinux_create_file_clear();
528 if (FLAGS_SET(copy_flags
, COPY_GRACEFUL_WARN
) && (ERRNO_IS_PRIVILEGE(r
) || ERRNO_IS_NOT_SUPPORTED(r
))) {
529 log_notice_errno(r
, "Failed to copy symlink '%s', ignoring: %m", from
);
537 uid_is_valid(override_uid
) ? override_uid
: st
->st_uid
,
538 gid_is_valid(override_gid
) ? override_gid
: st
->st_gid
,
539 AT_SYMLINK_NOFOLLOW
) < 0)
542 (void) copy_xattr(df
, from
, dt
, to
, copy_flags
);
543 (void) utimensat(dt
, to
, (struct timespec
[]) { st
->st_atim
, st
->st_mtim
}, AT_SYMLINK_NOFOLLOW
);
547 /* Encapsulates the database we store potential hardlink targets in */
548 typedef struct HardlinkContext
{
549 int dir_fd
; /* An fd to the directory we use as lookup table. Never AT_FDCWD. Lazily created, when
550 * we add the first entry. */
552 /* These two fields are used to create the hardlink repository directory above — via
553 * mkdirat(parent_fd, subdir) — and are kept so that we can automatically remove the directory again
554 * when we are done. */
555 int parent_fd
; /* Possibly AT_FDCWD */
559 static int hardlink_context_setup(
563 CopyFlags copy_flags
) {
565 _cleanup_close_
int dt_copy
= -EBADF
;
569 assert(c
->dir_fd
< 0 && c
->dir_fd
!= AT_FDCWD
);
570 assert(c
->parent_fd
< 0);
573 /* If hardlink recreation is requested we have to maintain a database of inodes that are potential
574 * hardlink sources. Given that generally disk sizes have to be assumed to be larger than what fits
575 * into physical RAM we cannot maintain that database in dynamic memory alone. Here we opt to
576 * maintain it on disk, to simplify things: inside the destination directory we'll maintain a
577 * temporary directory consisting of hardlinks of every inode we copied that might be subject of
578 * hardlinks. We can then use that as hardlink source later on. Yes, this means additional disk IO
579 * but thankfully Linux is optimized for this kind of thing. If this ever becomes a performance
580 * bottleneck we can certainly place an in-memory hash table in front of this, but for the beginning,
581 * let's keep things simple, and just use the disk as lookup table for inodes.
583 * Note that this should have zero performance impact as long as .n_link of all files copied remains
584 * <= 0, because in that case we will not actually allocate the hardlink inode lookup table directory
585 * on disk (we do so lazily, when the first candidate with .n_link > 1 is seen). This means, in the
586 * common case where hardlinks are not used at all or only for few files the fact that we store the
587 * table on disk shouldn't matter perfomance-wise. */
589 if (!FLAGS_SET(copy_flags
, COPY_HARDLINKS
))
597 dt_copy
= fcntl(dt
, F_DUPFD_CLOEXEC
, 3);
602 r
= tempfn_random_child(to
, "hardlink", &c
->subdir
);
606 c
->parent_fd
= TAKE_FD(dt_copy
);
608 /* We don't actually create the directory we keep the table in here, that's done on-demand when the
609 * first entry is added, using hardlink_context_realize() below. */
613 static int hardlink_context_realize(HardlinkContext
*c
) {
617 if (c
->dir_fd
>= 0) /* Already realized */
620 if (c
->parent_fd
< 0 && c
->parent_fd
!= AT_FDCWD
) /* Not configured */
625 c
->dir_fd
= open_mkdir_at(c
->parent_fd
, c
->subdir
, O_EXCL
|O_CLOEXEC
, 0700);
632 static void hardlink_context_destroy(HardlinkContext
*c
) {
637 /* Automatically remove the hardlink lookup table directory again after we are done. This is used via
638 * _cleanup_() so that we really delete this, even on failure. */
640 if (c
->dir_fd
>= 0) {
641 r
= rm_rf_children(TAKE_FD(c
->dir_fd
), REMOVE_PHYSICAL
, NULL
); /* consumes dir_fd in all cases, even on failure */
643 log_debug_errno(r
, "Failed to remove hardlink store (%s) contents, ignoring: %m", c
->subdir
);
645 assert(c
->parent_fd
>= 0 || c
->parent_fd
== AT_FDCWD
);
648 if (unlinkat(c
->parent_fd
, c
->subdir
, AT_REMOVEDIR
) < 0)
649 log_debug_errno(errno
, "Failed to remove hardlink store (%s) directory, ignoring: %m", c
->subdir
);
652 assert_cc(AT_FDCWD
< 0);
653 c
->parent_fd
= safe_close(c
->parent_fd
);
655 c
->subdir
= mfree(c
->subdir
);
658 static int try_hardlink(
660 const struct stat
*st
,
664 char dev_ino
[DECIMAL_STR_MAX(dev_t
)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
667 assert(dt
>= 0 || dt
== AT_FDCWD
);
670 if (!c
) /* No temporary hardlink directory, don't bother */
673 if (st
->st_nlink
<= 1) /* Source not hardlinked, don't bother */
676 if (c
->dir_fd
< 0) /* not yet realized, hence empty */
679 xsprintf(dev_ino
, "%u:%u:%" PRIu64
, major(st
->st_dev
), minor(st
->st_dev
), (uint64_t) st
->st_ino
);
680 if (linkat(c
->dir_fd
, dev_ino
, dt
, to
, 0) < 0) {
681 if (errno
!= ENOENT
) /* doesn't exist in store yet */
682 log_debug_errno(errno
, "Failed to hardlink %s to %s, ignoring: %m", dev_ino
, to
);
689 static int memorize_hardlink(
691 const struct stat
*st
,
695 char dev_ino
[DECIMAL_STR_MAX(dev_t
)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
699 assert(dt
>= 0 || dt
== AT_FDCWD
);
702 if (!c
) /* No temporary hardlink directory, don't bother */
705 if (st
->st_nlink
<= 1) /* Source not hardlinked, don't bother */
708 r
= hardlink_context_realize(c
); /* Create the hardlink store lazily */
712 xsprintf(dev_ino
, "%u:%u:%" PRIu64
, major(st
->st_dev
), minor(st
->st_dev
), (uint64_t) st
->st_ino
);
713 if (linkat(dt
, to
, c
->dir_fd
, dev_ino
, 0) < 0) {
714 log_debug_errno(errno
, "Failed to hardlink %s to %s, ignoring: %m", to
, dev_ino
);
721 static int fd_copy_tree_generic(
724 const struct stat
*st
,
727 dev_t original_device
,
731 CopyFlags copy_flags
,
734 HardlinkContext
*hardlink_context
,
735 const char *display_path
,
736 copy_progress_path_t progress_path
,
737 copy_progress_bytes_t progress_bytes
,
740 static int fd_copy_regular(
743 const struct stat
*st
,
748 CopyFlags copy_flags
,
749 HardlinkContext
*hardlink_context
,
750 copy_progress_bytes_t progress
,
753 _cleanup_close_
int fdf
= -EBADF
, fdt
= -EBADF
;
760 r
= try_hardlink(hardlink_context
, st
, dt
, to
);
763 if (r
> 0) /* worked! */
766 fdf
= openat(df
, from
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
);
770 if (copy_flags
& COPY_MAC_CREATE
) {
771 r
= mac_selinux_create_file_prepare_at(dt
, to
, S_IFREG
);
775 fdt
= openat(dt
, to
, O_WRONLY
|O_CREAT
|O_EXCL
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
, st
->st_mode
& 07777);
776 if (copy_flags
& COPY_MAC_CREATE
)
777 mac_selinux_create_file_clear();
781 r
= copy_bytes_full(fdf
, fdt
, UINT64_MAX
, copy_flags
, NULL
, NULL
, progress
, userdata
);
786 uid_is_valid(override_uid
) ? override_uid
: st
->st_uid
,
787 gid_is_valid(override_gid
) ? override_gid
: st
->st_gid
) < 0)
790 if (fchmod(fdt
, st
->st_mode
& 07777) < 0)
793 (void) futimens(fdt
, (struct timespec
[]) { st
->st_atim
, st
->st_mtim
});
794 (void) copy_xattr(fdf
, NULL
, fdt
, NULL
, copy_flags
);
796 if (copy_flags
& COPY_FSYNC
) {
797 if (fsync(fdt
) < 0) {
803 q
= close_nointr(TAKE_FD(fdt
)); /* even if this fails, the fd is now invalidated */
809 (void) memorize_hardlink(hardlink_context
, st
, dt
, to
);
813 (void) unlinkat(dt
, to
, 0);
817 static int fd_copy_fifo(
820 const struct stat
*st
,
825 CopyFlags copy_flags
,
826 HardlinkContext
*hardlink_context
) {
833 r
= try_hardlink(hardlink_context
, st
, dt
, to
);
836 if (r
> 0) /* worked! */
839 if (copy_flags
& COPY_MAC_CREATE
) {
840 r
= mac_selinux_create_file_prepare_at(dt
, to
, S_IFIFO
);
844 r
= RET_NERRNO(mkfifoat(dt
, to
, st
->st_mode
& 07777));
845 if (copy_flags
& COPY_MAC_CREATE
)
846 mac_selinux_create_file_clear();
848 if (FLAGS_SET(copy_flags
, COPY_GRACEFUL_WARN
) && (ERRNO_IS_PRIVILEGE(r
) || ERRNO_IS_NOT_SUPPORTED(r
))) {
849 log_notice_errno(r
, "Failed to copy fifo '%s', ignoring: %m", from
);
857 uid_is_valid(override_uid
) ? override_uid
: st
->st_uid
,
858 gid_is_valid(override_gid
) ? override_gid
: st
->st_gid
,
859 AT_SYMLINK_NOFOLLOW
) < 0)
862 if (fchmodat(dt
, to
, st
->st_mode
& 07777, 0) < 0)
865 (void) utimensat(dt
, to
, (struct timespec
[]) { st
->st_atim
, st
->st_mtim
}, AT_SYMLINK_NOFOLLOW
);
867 (void) memorize_hardlink(hardlink_context
, st
, dt
, to
);
871 static int fd_copy_node(
874 const struct stat
*st
,
879 CopyFlags copy_flags
,
880 HardlinkContext
*hardlink_context
) {
887 r
= try_hardlink(hardlink_context
, st
, dt
, to
);
890 if (r
> 0) /* worked! */
893 if (copy_flags
& COPY_MAC_CREATE
) {
894 r
= mac_selinux_create_file_prepare_at(dt
, to
, st
->st_mode
& S_IFMT
);
898 r
= RET_NERRNO(mknodat(dt
, to
, st
->st_mode
, st
->st_rdev
));
899 if (copy_flags
& COPY_MAC_CREATE
)
900 mac_selinux_create_file_clear();
902 if (FLAGS_SET(copy_flags
, COPY_GRACEFUL_WARN
) && (ERRNO_IS_PRIVILEGE(r
) || ERRNO_IS_NOT_SUPPORTED(r
))) {
903 log_notice_errno(r
, "Failed to copy node '%s', ignoring: %m", from
);
911 uid_is_valid(override_uid
) ? override_uid
: st
->st_uid
,
912 gid_is_valid(override_gid
) ? override_gid
: st
->st_gid
,
913 AT_SYMLINK_NOFOLLOW
) < 0)
916 if (fchmodat(dt
, to
, st
->st_mode
& 07777, 0) < 0)
919 (void) utimensat(dt
, to
, (struct timespec
[]) { st
->st_atim
, st
->st_mtim
}, AT_SYMLINK_NOFOLLOW
);
921 (void) memorize_hardlink(hardlink_context
, st
, dt
, to
);
925 static int fd_copy_directory(
928 const struct stat
*st
,
931 dev_t original_device
,
935 CopyFlags copy_flags
,
938 HardlinkContext
*hardlink_context
,
939 const char *display_path
,
940 copy_progress_path_t progress_path
,
941 copy_progress_bytes_t progress_bytes
,
944 _cleanup_(hardlink_context_destroy
) HardlinkContext our_hardlink_context
= {
949 _cleanup_close_
int fdf
= -EBADF
, fdt
= -EBADF
;
950 _cleanup_closedir_
DIR *d
= NULL
;
958 return -ENAMETOOLONG
;
961 fdf
= openat(df
, from
, O_RDONLY
|O_DIRECTORY
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
);
963 fdf
= fcntl(df
, F_DUPFD_CLOEXEC
, 3);
967 if (!hardlink_context
) {
968 /* If recreating hardlinks is requested let's set up a context for that now. */
969 r
= hardlink_context_setup(&our_hardlink_context
, dt
, to
, copy_flags
);
972 if (r
> 0) /* It's enabled and allocated, let's now use the same context for all recursive
973 * invocations from here down */
974 hardlink_context
= &our_hardlink_context
;
977 d
= take_fdopendir(&fdf
);
981 r
= dir_is_empty_at(dt
, to
, /* ignore_hidden_or_backup= */ false);
982 if (r
< 0 && r
!= -ENOENT
)
984 if ((r
> 0 && !(copy_flags
& (COPY_MERGE
|COPY_MERGE_EMPTY
))) || (r
== 0 && !FLAGS_SET(copy_flags
, COPY_MERGE
)))
989 fdt
= xopenat_lock(dt
, to
,
990 O_RDONLY
|O_DIRECTORY
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
|(exists
? 0 : O_CREAT
|O_EXCL
),
991 (copy_flags
& COPY_MAC_CREATE
? XO_LABEL
: 0)|(set_contains(subvolumes
, st
) ? XO_SUBVOLUME
: 0),
993 copy_flags
& COPY_LOCK_BSD
? LOCK_BSD
: LOCK_NONE
,
1000 if (PTR_TO_INT(hashmap_get(denylist
, st
)) == DENY_CONTENTS
) {
1001 log_debug("%s is in the denylist, not recursing", from
);
1005 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
1006 const char *child_display_path
= NULL
;
1007 _cleanup_free_
char *dp
= NULL
;
1011 if (dot_or_dot_dot(de
->d_name
))
1014 r
= look_for_signals(copy_flags
);
1018 if (fstatat(dirfd(d
), de
->d_name
, &buf
, AT_SYMLINK_NOFOLLOW
) < 0) {
1023 if (progress_path
) {
1025 child_display_path
= dp
= path_join(display_path
, de
->d_name
);
1027 child_display_path
= de
->d_name
;
1029 r
= progress_path(child_display_path
, &buf
, userdata
);
1034 if (PTR_TO_INT(hashmap_get(denylist
, &buf
)) == DENY_INODE
) {
1035 log_debug("%s/%s is in the denylist, ignoring", from
, de
->d_name
);
1039 if (S_ISDIR(buf
.st_mode
)) {
1041 * Don't descend into directories on other file systems, if this is requested. We do a simple
1042 * .st_dev check here, which basically comes for free. Note that we do this check only on
1043 * directories, not other kind of file system objects, for two reason:
1045 * • The kernel's overlayfs pseudo file system that overlays multiple real file systems
1046 * propagates the .st_dev field of the file system a file originates from all the way up
1047 * through the stack to stat(). It doesn't do that for directories however. This means that
1048 * comparing .st_dev on non-directories suggests that they all are mount points. To avoid
1049 * confusion we hence avoid relying on this check for regular files.
1051 * • The main reason we do this check at all is to protect ourselves from bind mount cycles,
1052 * where we really want to avoid descending down in all eternity. However the .st_dev check
1053 * is usually not sufficient for this protection anyway, as bind mount cycles from the same
1054 * file system onto itself can't be detected that way. (Note we also do a recursion depth
1055 * check, which is probably the better protection in this regard, which is why
1056 * COPY_SAME_MOUNT is optional).
1059 if (FLAGS_SET(copy_flags
, COPY_SAME_MOUNT
)) {
1060 if (buf
.st_dev
!= original_device
)
1063 r
= fd_is_mount_point(dirfd(d
), de
->d_name
, 0);
1071 q
= fd_copy_tree_generic(dirfd(d
), de
->d_name
, &buf
, fdt
, de
->d_name
, original_device
,
1072 depth_left
-1, override_uid
, override_gid
, copy_flags
& ~COPY_LOCK_BSD
,
1073 denylist
, subvolumes
, hardlink_context
, child_display_path
, progress_path
,
1074 progress_bytes
, userdata
);
1076 if (q
== -EINTR
) /* Propagate SIGINT/SIGTERM up instantly */
1078 if (q
== -EEXIST
&& (copy_flags
& COPY_MERGE
))
1087 uid_is_valid(override_uid
) ? override_uid
: st
->st_uid
,
1088 gid_is_valid(override_gid
) ? override_gid
: st
->st_gid
) < 0)
1091 if (fchmod(fdt
, st
->st_mode
& 07777) < 0)
1094 (void) copy_xattr(dirfd(d
), NULL
, fdt
, NULL
, copy_flags
);
1095 (void) futimens(fdt
, (struct timespec
[]) { st
->st_atim
, st
->st_mtim
});
1098 if (copy_flags
& COPY_FSYNC_FULL
) {
1106 return copy_flags
& COPY_LOCK_BSD
? TAKE_FD(fdt
) : 0;
1109 static int fd_copy_leaf(
1112 const struct stat
*st
,
1117 CopyFlags copy_flags
,
1118 HardlinkContext
*hardlink_context
,
1119 const char *display_path
,
1120 copy_progress_bytes_t progress_bytes
,
1124 if (S_ISREG(st
->st_mode
))
1125 r
= fd_copy_regular(df
, from
, st
, dt
, to
, override_uid
, override_gid
, copy_flags
, hardlink_context
, progress_bytes
, userdata
);
1126 else if (S_ISLNK(st
->st_mode
))
1127 r
= fd_copy_symlink(df
, from
, st
, dt
, to
, override_uid
, override_gid
, copy_flags
);
1128 else if (S_ISFIFO(st
->st_mode
))
1129 r
= fd_copy_fifo(df
, from
, st
, dt
, to
, override_uid
, override_gid
, copy_flags
, hardlink_context
);
1130 else if (S_ISBLK(st
->st_mode
) || S_ISCHR(st
->st_mode
) || S_ISSOCK(st
->st_mode
))
1131 r
= fd_copy_node(df
, from
, st
, dt
, to
, override_uid
, override_gid
, copy_flags
, hardlink_context
);
1138 static int fd_copy_tree_generic(
1141 const struct stat
*st
,
1144 dev_t original_device
,
1145 unsigned depth_left
,
1148 CopyFlags copy_flags
,
1151 HardlinkContext
*hardlink_context
,
1152 const char *display_path
,
1153 copy_progress_path_t progress_path
,
1154 copy_progress_bytes_t progress_bytes
,
1159 assert(!FLAGS_SET(copy_flags
, COPY_LOCK_BSD
));
1161 if (S_ISDIR(st
->st_mode
))
1162 return fd_copy_directory(df
, from
, st
, dt
, to
, original_device
, depth_left
-1, override_uid
,
1163 override_gid
, copy_flags
, denylist
, subvolumes
, hardlink_context
,
1164 display_path
, progress_path
, progress_bytes
, userdata
);
1166 DenyType t
= PTR_TO_INT(hashmap_get(denylist
, st
));
1167 if (t
== DENY_INODE
) {
1168 log_debug("%s is in the denylist, ignoring", from
);
1170 } else if (t
== DENY_CONTENTS
)
1171 log_debug("%s is configured to have its contents excluded, but is not a directory", from
);
1173 r
= fd_copy_leaf(df
, from
, st
, dt
, to
, override_uid
, override_gid
, copy_flags
, hardlink_context
, display_path
, progress_bytes
, userdata
);
1174 /* We just tried to copy a leaf node of the tree. If it failed because the node already exists *and* the COPY_REPLACE flag has been provided, we should unlink the node and re-copy. */
1175 if (r
== -EEXIST
&& (copy_flags
& COPY_REPLACE
)) {
1176 /* This codepath is us trying to address an error to copy, if the unlink fails, lets just return the original error. */
1177 if (unlinkat(dt
, to
, 0) < 0)
1180 r
= fd_copy_leaf(df
, from
, st
, dt
, to
, override_uid
, override_gid
, copy_flags
, hardlink_context
, display_path
, progress_bytes
, userdata
);
1186 int copy_tree_at_full(
1193 CopyFlags copy_flags
,
1196 copy_progress_path_t progress_path
,
1197 copy_progress_bytes_t progress_bytes
,
1205 assert(!FLAGS_SET(copy_flags
, COPY_LOCK_BSD
));
1207 if (fstatat(fdf
, from
, &st
, AT_SYMLINK_NOFOLLOW
) < 0)
1210 r
= fd_copy_tree_generic(fdf
, from
, &st
, fdt
, to
, st
.st_dev
, COPY_DEPTH_MAX
, override_uid
,
1211 override_gid
, copy_flags
, denylist
, subvolumes
, NULL
, NULL
, progress_path
,
1212 progress_bytes
, userdata
);
1216 if (S_ISDIR(st
.st_mode
) && (copy_flags
& COPY_SYNCFS
)) {
1217 /* If the top-level inode is a directory run syncfs() now. */
1218 r
= syncfs_path(fdt
, to
);
1221 } else if ((copy_flags
& (COPY_FSYNC_FULL
|COPY_SYNCFS
)) != 0) {
1222 /* fsync() the parent dir of what we just copied if COPY_FSYNC_FULL is set. Also do this in
1223 * case COPY_SYNCFS is set but the top-level inode wasn't actually a directory. We do this so that
1224 * COPY_SYNCFS provides reasonable synchronization semantics on any kind of inode: when the
1225 * copy operation is done the whole inode — regardless of its type — and all its children
1226 * will be synchronized to disk. */
1227 r
= fsync_parent_at(fdt
, to
);
1235 static int sync_dir_by_flags(int dir_fd
, const char *path
, CopyFlags copy_flags
) {
1236 assert(dir_fd
>= 0 || dir_fd
== AT_FDCWD
);
1239 if (copy_flags
& COPY_SYNCFS
)
1240 return syncfs_path(dir_fd
, path
);
1241 if (copy_flags
& COPY_FSYNC_FULL
)
1242 return fsync_parent_at(dir_fd
, path
);
1247 int copy_directory_at_full(
1252 CopyFlags copy_flags
,
1253 copy_progress_path_t progress_path
,
1254 copy_progress_bytes_t progress_bytes
,
1257 _cleanup_close_
int fdt
= -EBADF
;
1261 assert(dir_fdf
>= 0 || dir_fdf
== AT_FDCWD
);
1262 assert(dir_fdt
>= 0 || dir_fdt
== AT_FDCWD
);
1265 if (fstatat(dir_fdf
, strempty(from
), &st
, AT_SYMLINK_NOFOLLOW
|(isempty(from
) ? AT_EMPTY_PATH
: 0)) < 0)
1268 r
= stat_verify_directory(&st
);
1272 r
= fd_copy_directory(
1278 UID_INVALID
, GID_INVALID
,
1280 NULL
, NULL
, NULL
, NULL
,
1287 if (FLAGS_SET(copy_flags
, COPY_LOCK_BSD
))
1290 r
= sync_dir_by_flags(dir_fdt
, to
, copy_flags
);
1294 return FLAGS_SET(copy_flags
, COPY_LOCK_BSD
) ? TAKE_FD(fdt
) : 0;
1297 int copy_file_fd_at_full(
1301 CopyFlags copy_flags
,
1302 copy_progress_bytes_t progress_bytes
,
1305 _cleanup_close_
int fdf
= -EBADF
;
1309 assert(dir_fdf
>= 0 || dir_fdf
== AT_FDCWD
);
1312 assert(!FLAGS_SET(copy_flags
, COPY_LOCK_BSD
));
1314 fdf
= openat(dir_fdf
, from
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
1318 r
= fd_verify_regular(fdf
);
1322 if (fstat(fdt
, &st
) < 0)
1325 r
= copy_bytes_full(fdf
, fdt
, UINT64_MAX
, copy_flags
, NULL
, NULL
, progress_bytes
, userdata
);
1329 /* Make sure to copy file attributes only over if target is a regular
1330 * file (so that copying a file to /dev/null won't alter the access
1331 * mode/ownership of that device node...) */
1332 if (S_ISREG(st
.st_mode
)) {
1333 (void) copy_times(fdf
, fdt
, copy_flags
);
1334 (void) copy_xattr(fdf
, NULL
, fdt
, NULL
, copy_flags
);
1337 if (copy_flags
& COPY_FSYNC_FULL
) {
1338 r
= fsync_full(fdt
);
1341 } else if (copy_flags
& COPY_FSYNC
) {
1349 int copy_file_at_full(
1356 unsigned chattr_flags
,
1357 unsigned chattr_mask
,
1358 CopyFlags copy_flags
,
1359 copy_progress_bytes_t progress_bytes
,
1362 _cleanup_close_
int fdf
= -EBADF
, fdt
= -EBADF
;
1366 assert(dir_fdf
>= 0 || dir_fdf
== AT_FDCWD
);
1367 assert(dir_fdt
>= 0 || dir_fdt
== AT_FDCWD
);
1371 fdf
= openat(dir_fdf
, from
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
);
1375 if (fstat(fdf
, &st
) < 0)
1378 r
= stat_verify_regular(&st
);
1383 fdt
= xopenat_lock(dir_fdt
, to
,
1384 flags
|O_WRONLY
|O_CREAT
|O_CLOEXEC
|O_NOCTTY
,
1385 (copy_flags
& COPY_MAC_CREATE
? XO_LABEL
: 0),
1386 mode
!= MODE_INVALID
? mode
: st
.st_mode
,
1387 copy_flags
& COPY_LOCK_BSD
? LOCK_BSD
: LOCK_NONE
, LOCK_EX
);
1392 if (!FLAGS_SET(flags
, O_EXCL
)) { /* if O_EXCL was used we created the thing as regular file, no need to check again */
1393 r
= fd_verify_regular(fdt
);
1398 if (chattr_mask
!= 0)
1399 (void) chattr_fd(fdt
, chattr_flags
, chattr_mask
& CHATTR_EARLY_FL
, NULL
);
1401 r
= copy_bytes_full(fdf
, fdt
, UINT64_MAX
, copy_flags
& ~COPY_LOCK_BSD
, NULL
, NULL
, progress_bytes
, userdata
);
1405 (void) copy_times(fdf
, fdt
, copy_flags
);
1406 (void) copy_xattr(fdf
, NULL
, fdt
, NULL
, copy_flags
);
1408 if (chattr_mask
!= 0)
1409 (void) chattr_fd(fdt
, chattr_flags
, chattr_mask
& ~CHATTR_EARLY_FL
, NULL
);
1411 if (copy_flags
& (COPY_FSYNC
|COPY_FSYNC_FULL
)) {
1412 if (fsync(fdt
) < 0) {
1418 if (!FLAGS_SET(copy_flags
, COPY_LOCK_BSD
)) {
1419 r
= close_nointr(TAKE_FD(fdt
)); /* even if this fails, the fd is now invalidated */
1424 if (copy_flags
& COPY_FSYNC_FULL
) {
1425 r
= fsync_parent_at(dir_fdt
, to
);
1430 return copy_flags
& COPY_LOCK_BSD
? TAKE_FD(fdt
) : 0;
1433 /* Only unlink if we definitely are the ones who created the file */
1434 if (FLAGS_SET(flags
, O_EXCL
))
1435 (void) unlinkat(dir_fdt
, to
, 0);
1440 int copy_file_atomic_at_full(
1446 unsigned chattr_flags
,
1447 unsigned chattr_mask
,
1448 CopyFlags copy_flags
,
1449 copy_progress_bytes_t progress_bytes
,
1452 _cleanup_(unlink_and_freep
) char *t
= NULL
;
1453 _cleanup_close_
int fdt
= -EBADF
;
1458 assert(!FLAGS_SET(copy_flags
, COPY_LOCK_BSD
));
1460 if (copy_flags
& COPY_MAC_CREATE
) {
1461 r
= mac_selinux_create_file_prepare_at(dir_fdt
, to
, S_IFREG
);
1465 fdt
= open_tmpfile_linkable_at(dir_fdt
, to
, O_WRONLY
|O_CLOEXEC
, &t
);
1466 if (copy_flags
& COPY_MAC_CREATE
)
1467 mac_selinux_create_file_clear();
1471 if (chattr_mask
!= 0)
1472 (void) chattr_fd(fdt
, chattr_flags
, chattr_mask
& CHATTR_EARLY_FL
, NULL
);
1474 r
= copy_file_fd_at_full(dir_fdf
, from
, fdt
, copy_flags
, progress_bytes
, userdata
);
1478 if (fchmod(fdt
, mode
) < 0)
1481 if ((copy_flags
& (COPY_FSYNC
|COPY_FSYNC_FULL
))) {
1487 r
= link_tmpfile_at(fdt
, dir_fdt
, t
, to
, (copy_flags
& COPY_REPLACE
) ? LINK_TMPFILE_REPLACE
: 0);
1493 if (chattr_mask
!= 0)
1494 (void) chattr_fd(fdt
, chattr_flags
, chattr_mask
& ~CHATTR_EARLY_FL
, NULL
);
1496 r
= close_nointr(TAKE_FD(fdt
)); /* even if this fails, the fd is now invalidated */
1500 if (copy_flags
& COPY_FSYNC_FULL
) {
1501 /* Sync the parent directory */
1502 r
= fsync_parent_at(dir_fdt
, to
);
1510 (void) unlinkat(dir_fdt
, to
, 0);
1514 int copy_times(int fdf
, int fdt
, CopyFlags flags
) {
1520 if (fstat(fdf
, &st
) < 0)
1523 if (futimens(fdt
, (struct timespec
[2]) { st
.st_atim
, st
.st_mtim
}) < 0)
1526 if (FLAGS_SET(flags
, COPY_CRTIME
)) {
1529 if (fd_getcrtime(fdf
, &crtime
) >= 0)
1530 (void) fd_setcrtime(fdt
, crtime
);
1536 int copy_access(int fdf
, int fdt
) {
1542 /* Copies just the access mode (and not the ownership) from fdf to fdt */
1544 if (fstat(fdf
, &st
) < 0)
1547 return RET_NERRNO(fchmod(fdt
, st
.st_mode
& 07777));
1550 int copy_rights_with_fallback(int fdf
, int fdt
, const char *patht
) {
1556 /* Copies both access mode and ownership from fdf to fdt */
1558 if (fstat(fdf
, &st
) < 0)
1561 return fchmod_and_chown_with_fallback(fdt
, patht
, st
.st_mode
& 07777, st
.st_uid
, st
.st_gid
);
1564 int copy_xattr(int df
, const char *from
, int dt
, const char *to
, CopyFlags copy_flags
) {
1565 _cleanup_free_
char *names
= NULL
;
1568 r
= listxattr_at_malloc(df
, from
, 0, &names
);
1572 NULSTR_FOREACH(p
, names
) {
1573 _cleanup_free_
char *value
= NULL
;
1575 if (!FLAGS_SET(copy_flags
, COPY_ALL_XATTRS
) && !startswith(p
, "user."))
1578 r
= getxattr_at_malloc(df
, from
, p
, 0, &value
);
1580 continue; /* gone by now */
1584 if (xsetxattr(dt
, to
, p
, value
, r
, 0) < 0)
1591 int reflink(int infd
, int outfd
) {
1597 /* Make sure we invoke the ioctl on a regular file, so that no device driver accidentally gets it. */
1599 r
= fd_verify_regular(outfd
);
1603 /* FICLONE was introduced in Linux 4.5 but it uses the same number as BTRFS_IOC_CLONE introduced earlier */
1605 assert_cc(FICLONE
== BTRFS_IOC_CLONE
);
1607 return RET_NERRNO(ioctl(outfd
, FICLONE
, infd
));
1610 assert_cc(sizeof(struct file_clone_range
) == sizeof(struct btrfs_ioctl_clone_range_args
));
1612 int reflink_range(int infd
, uint64_t in_offset
, int outfd
, uint64_t out_offset
, uint64_t sz
) {
1613 struct file_clone_range args
= {
1615 .src_offset
= in_offset
,
1617 .dest_offset
= out_offset
,
1624 /* Inside the kernel, FICLONE is identical to FICLONERANGE with offsets and size set to zero, let's
1625 * simplify things and use the simple ioctl in that case. Also, do the same if the size is
1626 * UINT64_MAX, which is how we usually encode "everything". */
1627 if (in_offset
== 0 && out_offset
== 0 && IN_SET(sz
, 0, UINT64_MAX
))
1628 return reflink(infd
, outfd
);
1630 r
= fd_verify_regular(outfd
);
1634 assert_cc(FICLONERANGE
== BTRFS_IOC_CLONE_RANGE
);
1636 return RET_NERRNO(ioctl(outfd
, FICLONERANGE
, &args
));