]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/copy.c
Merge pull request #29687 from yuwata/network-state-file-sync
[thirdparty/systemd.git] / src / shared / copy.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <linux/btrfs.h>
6 #include <stddef.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <sys/file.h>
10 #include <sys/ioctl.h>
11 #include <sys/sendfile.h>
12 #include <sys/xattr.h>
13 #include <unistd.h>
14
15 #include "alloc-util.h"
16 #include "btrfs-util.h"
17 #include "chattr-util.h"
18 #include "copy.h"
19 #include "dirent-util.h"
20 #include "fd-util.h"
21 #include "fileio.h"
22 #include "fs-util.h"
23 #include "io-util.h"
24 #include "macro.h"
25 #include "missing_fs.h"
26 #include "missing_syscall.h"
27 #include "mkdir-label.h"
28 #include "mountpoint-util.h"
29 #include "nulstr-util.h"
30 #include "rm-rf.h"
31 #include "selinux-util.h"
32 #include "signal-util.h"
33 #include "stat-util.h"
34 #include "stdio-util.h"
35 #include "string-util.h"
36 #include "strv.h"
37 #include "sync-util.h"
38 #include "time-util.h"
39 #include "tmpfile-util.h"
40 #include "umask-util.h"
41 #include "user-util.h"
42 #include "xattr-util.h"
43
44 #define COPY_BUFFER_SIZE (16U*1024U)
45
46 /* A safety net for descending recursively into file system trees to copy. On Linux PATH_MAX is 4096, which means the
47 * deepest valid path one can build is around 2048, which we hence use as a safety net here, to not spin endlessly in
48 * case of bind mount cycles and suchlike. */
49 #define COPY_DEPTH_MAX 2048U
50
51 static ssize_t try_copy_file_range(
52 int fd_in, loff_t *off_in,
53 int fd_out, loff_t *off_out,
54 size_t len,
55 unsigned flags) {
56
57 static int have = -1;
58 ssize_t r;
59
60 if (have == 0)
61 return -ENOSYS;
62
63 r = copy_file_range(fd_in, off_in, fd_out, off_out, len, flags);
64 if (have < 0)
65 have = r >= 0 || errno != ENOSYS;
66 if (r < 0)
67 return -errno;
68
69 return r;
70 }
71
72 enum {
73 FD_IS_NO_PIPE,
74 FD_IS_BLOCKING_PIPE,
75 FD_IS_NONBLOCKING_PIPE,
76 };
77
78 static int fd_is_nonblock_pipe(int fd) {
79 struct stat st;
80 int flags;
81
82 /* Checks whether the specified file descriptor refers to a pipe, and if so if O_NONBLOCK is set. */
83
84 if (fstat(fd, &st) < 0)
85 return -errno;
86
87 if (!S_ISFIFO(st.st_mode))
88 return FD_IS_NO_PIPE;
89
90 flags = fcntl(fd, F_GETFL);
91 if (flags < 0)
92 return -errno;
93
94 return FLAGS_SET(flags, O_NONBLOCK) ? FD_IS_NONBLOCKING_PIPE : FD_IS_BLOCKING_PIPE;
95 }
96
97 static int look_for_signals(CopyFlags copy_flags) {
98 int r;
99
100 if ((copy_flags & (COPY_SIGINT|COPY_SIGTERM)) == 0)
101 return 0;
102
103 r = pop_pending_signal(copy_flags & COPY_SIGINT ? SIGINT : 0,
104 copy_flags & COPY_SIGTERM ? SIGTERM : 0);
105 if (r < 0)
106 return r;
107 if (r != 0)
108 return log_debug_errno(SYNTHETIC_ERRNO(EINTR),
109 "Got %s, cancelling copy operation.", signal_to_string(r));
110
111 return 0;
112 }
113
114 static int create_hole(int fd, off_t size) {
115 off_t offset;
116 off_t end;
117
118 offset = lseek(fd, 0, SEEK_CUR);
119 if (offset < 0)
120 return -errno;
121
122 end = lseek(fd, 0, SEEK_END);
123 if (end < 0)
124 return -errno;
125
126 /* If we're not at the end of the target file, try to punch a hole in the existing space using fallocate(). */
127
128 if (offset < end &&
129 fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, MIN(size, end - offset)) < 0 &&
130 !ERRNO_IS_NOT_SUPPORTED(errno))
131 return -errno;
132
133 if (end - offset >= size) {
134 /* If we've created the full hole, set the file pointer to the end of the hole we created and exit. */
135 if (lseek(fd, offset + size, SEEK_SET) < 0)
136 return -errno;
137
138 return 0;
139 }
140
141 /* If we haven't created the full hole, use ftruncate() to grow the file (and the hole) to the
142 * required size and move the file pointer to the end of the file. */
143
144 size -= end - offset;
145
146 if (ftruncate(fd, end + size) < 0)
147 return -errno;
148
149 if (lseek(fd, 0, SEEK_END) < 0)
150 return -errno;
151
152 return 0;
153 }
154
155 int copy_bytes_full(
156 int fdf, int fdt,
157 uint64_t max_bytes,
158 CopyFlags copy_flags,
159 void **ret_remains,
160 size_t *ret_remains_size,
161 copy_progress_bytes_t progress,
162 void *userdata) {
163
164 _cleanup_close_ int fdf_opened = -EBADF, fdt_opened = -EBADF;
165 bool try_cfr = true, try_sendfile = true, try_splice = true, copied_something = false;
166 int r, nonblock_pipe = -1;
167 size_t m = SSIZE_MAX; /* that is the maximum that sendfile and c_f_r accept */
168
169 assert(fdf >= 0);
170 assert(fdt >= 0);
171 assert(!FLAGS_SET(copy_flags, COPY_LOCK_BSD));
172
173 /* Tries to copy bytes from the file descriptor 'fdf' to 'fdt' in the smartest possible way. Copies a maximum
174 * of 'max_bytes', which may be specified as UINT64_MAX, in which no maximum is applied. Returns negative on
175 * error, zero if EOF is hit before the bytes limit is hit and positive otherwise. If the copy fails for some
176 * reason but we read but didn't yet write some data an ret_remains/ret_remains_size is not NULL, then it will
177 * be initialized with an allocated buffer containing this "remaining" data. Note that these two parameters are
178 * initialized with a valid buffer only on failure and only if there's actually data already read. Otherwise
179 * these parameters if non-NULL are set to NULL. */
180
181 if (ret_remains)
182 *ret_remains = NULL;
183 if (ret_remains_size)
184 *ret_remains_size = 0;
185
186 fdf = fd_reopen_condition(fdf, O_CLOEXEC | O_NOCTTY | O_RDONLY, O_PATH, &fdf_opened);
187 if (fdf < 0)
188 return fdf;
189 fdt = fd_reopen_condition(fdt, O_CLOEXEC | O_NOCTTY | O_RDWR, O_PATH, &fdt_opened);
190 if (fdt < 0)
191 return fdt;
192
193 /* Try btrfs reflinks first. This only works on regular, seekable files, hence let's check the file offsets of
194 * source and destination first. */
195 if ((copy_flags & COPY_REFLINK)) {
196 off_t foffset;
197
198 foffset = lseek(fdf, 0, SEEK_CUR);
199 if (foffset >= 0) {
200 off_t toffset;
201
202 toffset = lseek(fdt, 0, SEEK_CUR);
203 if (toffset >= 0) {
204
205 if (foffset == 0 && toffset == 0 && max_bytes == UINT64_MAX)
206 r = reflink(fdf, fdt); /* full file reflink */
207 else
208 r = reflink_range(fdf, foffset, fdt, toffset, max_bytes == UINT64_MAX ? 0 : max_bytes); /* partial reflink */
209 if (r >= 0) {
210 off_t t;
211
212 /* This worked, yay! Now — to be fully correct — let's adjust the file pointers */
213 if (max_bytes == UINT64_MAX) {
214
215 /* We cloned to the end of the source file, let's position the read
216 * pointer there, and query it at the same time. */
217 t = lseek(fdf, 0, SEEK_END);
218 if (t < 0)
219 return -errno;
220 if (t < foffset)
221 return -ESPIPE;
222
223 /* Let's adjust the destination file write pointer by the same number
224 * of bytes. */
225 t = lseek(fdt, toffset + (t - foffset), SEEK_SET);
226 if (t < 0)
227 return -errno;
228
229 return 0; /* we copied the whole thing, hence hit EOF, return 0 */
230 } else {
231 t = lseek(fdf, foffset + max_bytes, SEEK_SET);
232 if (t < 0)
233 return -errno;
234
235 t = lseek(fdt, toffset + max_bytes, SEEK_SET);
236 if (t < 0)
237 return -errno;
238
239 return 1; /* we copied only some number of bytes, which worked, but this means we didn't hit EOF, return 1 */
240 }
241 }
242 }
243 }
244 }
245
246 for (;;) {
247 ssize_t n;
248
249 if (max_bytes <= 0)
250 break;
251
252 r = look_for_signals(copy_flags);
253 if (r < 0)
254 return r;
255
256 if (max_bytes != UINT64_MAX && m > max_bytes)
257 m = max_bytes;
258
259 if (copy_flags & COPY_HOLES) {
260 off_t c, e;
261
262 c = lseek(fdf, 0, SEEK_CUR);
263 if (c < 0)
264 return -errno;
265
266 /* To see if we're in a hole, we search for the next data offset. */
267 e = lseek(fdf, c, SEEK_DATA);
268 if (e < 0 && errno == ENXIO)
269 /* If errno == ENXIO, that means we've reached the final hole of the file and
270 * that hole isn't followed by more data. */
271 e = lseek(fdf, 0, SEEK_END);
272 if (e < 0)
273 return -errno;
274
275 /* If we're in a hole (current offset is not a data offset), create a hole of the
276 * same size in the target file. */
277 if (e > c) {
278 /* Make sure our new hole doesn't go over the maximum size we're allowed to copy. */
279 n = MIN(max_bytes, (uint64_t) e - c);
280 r = create_hole(fdt, n);
281 if (r < 0)
282 return r;
283
284 /* Make sure holes are taken into account in the maximum size we're supposed to copy. */
285 if (max_bytes != UINT64_MAX) {
286 max_bytes -= n;
287 if (max_bytes <= 0)
288 break;
289 }
290
291 /* Update the size we're supposed to copy in this iteration if needed. */
292 if (m > max_bytes)
293 m = max_bytes;
294 }
295
296 c = e; /* Set c to the start of the data segment. */
297
298 /* After copying a potential hole, find the end of the data segment by looking for
299 * the next hole. If we get ENXIO, we're at EOF. */
300 e = lseek(fdf, c, SEEK_HOLE);
301 if (e < 0) {
302 if (errno == ENXIO)
303 break;
304 return -errno;
305 }
306
307 /* SEEK_HOLE modifies the file offset so we need to move back to the initial offset. */
308 if (lseek(fdf, c, SEEK_SET) < 0)
309 return -errno;
310
311 /* Make sure we're not copying more than the current data segment. */
312 m = MIN(m, (size_t) e - c);
313 }
314
315 /* First try copy_file_range(), unless we already tried */
316 if (try_cfr) {
317 n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u);
318 if (n < 0) {
319 if (!IN_SET(n, -EINVAL, -ENOSYS, -EXDEV, -EBADF))
320 return n;
321
322 try_cfr = false;
323 /* use fallback below */
324 } else if (n == 0) { /* likely EOF */
325
326 if (copied_something)
327 break;
328
329 /* So, we hit EOF immediately, without having copied a single byte. This
330 * could indicate two things: the file is actually empty, or we are on some
331 * virtual file system such as procfs/sysfs where the syscall actually
332 * doesn't work but doesn't return an error. Try to handle that, by falling
333 * back to simple read()s in case we encounter empty files.
334 *
335 * See: https://lwn.net/Articles/846403/ */
336 try_cfr = try_sendfile = try_splice = false;
337 } else
338 /* Success! */
339 goto next;
340 }
341
342 /* First try sendfile(), unless we already tried */
343 if (try_sendfile) {
344 n = sendfile(fdt, fdf, NULL, m);
345 if (n < 0) {
346 if (!IN_SET(errno, EINVAL, ENOSYS))
347 return -errno;
348
349 try_sendfile = false;
350 /* use fallback below */
351 } else if (n == 0) { /* likely EOF */
352
353 if (copied_something)
354 break;
355
356 try_sendfile = try_splice = false; /* same logic as above for copy_file_range() */
357 } else
358 /* Success! */
359 goto next;
360 }
361
362 /* Then try splice, unless we already tried. */
363 if (try_splice) {
364
365 /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file
366 * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the
367 * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour
368 * here, and check if either of the specified fds are a pipe, and if so, let's pass
369 * the flag automatically, depending on O_NONBLOCK being set.
370 *
371 * Here's a twist though: when we use it to move data between two pipes of which one
372 * has O_NONBLOCK set and the other has not, then we have no individual control over
373 * O_NONBLOCK behaviour. Hence in that case we can't use splice() and still guarantee
374 * systematic O_NONBLOCK behaviour, hence don't. */
375
376 if (nonblock_pipe < 0) {
377 int a, b;
378
379 /* Check if either of these fds is a pipe, and if so non-blocking or not */
380 a = fd_is_nonblock_pipe(fdf);
381 if (a < 0)
382 return a;
383
384 b = fd_is_nonblock_pipe(fdt);
385 if (b < 0)
386 return b;
387
388 if ((a == FD_IS_NO_PIPE && b == FD_IS_NO_PIPE) ||
389 (a == FD_IS_BLOCKING_PIPE && b == FD_IS_NONBLOCKING_PIPE) ||
390 (a == FD_IS_NONBLOCKING_PIPE && b == FD_IS_BLOCKING_PIPE))
391
392 /* splice() only works if one of the fds is a pipe. If neither is,
393 * let's skip this step right-away. As mentioned above, if one of the
394 * two fds refers to a blocking pipe and the other to a non-blocking
395 * pipe, we can't use splice() either, hence don't try either. This
396 * hence means we can only use splice() if either only one of the two
397 * fds is a pipe, or if both are pipes with the same nonblocking flag
398 * setting. */
399
400 try_splice = false;
401 else
402 nonblock_pipe = a == FD_IS_NONBLOCKING_PIPE || b == FD_IS_NONBLOCKING_PIPE;
403 }
404 }
405
406 if (try_splice) {
407 n = splice(fdf, NULL, fdt, NULL, m, nonblock_pipe ? SPLICE_F_NONBLOCK : 0);
408 if (n < 0) {
409 if (!IN_SET(errno, EINVAL, ENOSYS))
410 return -errno;
411
412 try_splice = false;
413 /* use fallback below */
414 } else if (n == 0) { /* likely EOF */
415
416 if (copied_something)
417 break;
418
419 try_splice = false; /* same logic as above for copy_file_range() + sendfile() */
420 } else
421 /* Success! */
422 goto next;
423 }
424
425 /* As a fallback just copy bits by hand */
426 {
427 uint8_t buf[MIN(m, COPY_BUFFER_SIZE)], *p = buf;
428 ssize_t z;
429
430 n = read(fdf, buf, sizeof buf);
431 if (n < 0)
432 return -errno;
433 if (n == 0) /* EOF */
434 break;
435
436 z = (size_t) n;
437 do {
438 ssize_t k;
439
440 k = write(fdt, p, z);
441 if (k < 0) {
442 r = -errno;
443
444 if (ret_remains) {
445 void *copy;
446
447 copy = memdup(p, z);
448 if (!copy)
449 return -ENOMEM;
450
451 *ret_remains = copy;
452 }
453
454 if (ret_remains_size)
455 *ret_remains_size = z;
456
457 return r;
458 }
459
460 assert(k <= z);
461 z -= k;
462 p += k;
463 } while (z > 0);
464 }
465
466 next:
467 if (progress) {
468 r = progress(n, userdata);
469 if (r < 0)
470 return r;
471 }
472
473 if (max_bytes != UINT64_MAX) {
474 assert(max_bytes >= (uint64_t) n);
475 max_bytes -= n;
476 }
477
478 /* sendfile accepts at most SSIZE_MAX-offset bytes to copy, so reduce our maximum by the
479 * amount we already copied, but don't go below our copy buffer size, unless we are close the
480 * limit of bytes we are allowed to copy. */
481 m = MAX(MIN(COPY_BUFFER_SIZE, max_bytes), m - n);
482
483 copied_something = true;
484 }
485
486 if (copy_flags & COPY_TRUNCATE) {
487 off_t off = lseek(fdt, 0, SEEK_CUR);
488 if (off < 0)
489 return -errno;
490
491 if (ftruncate(fdt, off) < 0)
492 return -errno;
493 }
494
495 return max_bytes <= 0; /* return 0 if we hit EOF earlier than the size limit */
496 }
497
498 static int fd_copy_symlink(
499 int df,
500 const char *from,
501 const struct stat *st,
502 int dt,
503 const char *to,
504 uid_t override_uid,
505 gid_t override_gid,
506 CopyFlags copy_flags) {
507
508 _cleanup_free_ char *target = NULL;
509 int r;
510
511 assert(from);
512 assert(st);
513 assert(to);
514
515 r = readlinkat_malloc(df, from, &target);
516 if (r < 0)
517 return r;
518
519 if (copy_flags & COPY_MAC_CREATE) {
520 r = mac_selinux_create_file_prepare_at(dt, to, S_IFLNK);
521 if (r < 0)
522 return r;
523 }
524 r = RET_NERRNO(symlinkat(target, dt, to));
525 if (copy_flags & COPY_MAC_CREATE)
526 mac_selinux_create_file_clear();
527 if (r < 0) {
528 if (FLAGS_SET(copy_flags, COPY_GRACEFUL_WARN) && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) {
529 log_notice_errno(r, "Failed to copy symlink '%s', ignoring: %m", from);
530 return 0;
531 }
532
533 return r;
534 }
535
536 if (fchownat(dt, to,
537 uid_is_valid(override_uid) ? override_uid : st->st_uid,
538 gid_is_valid(override_gid) ? override_gid : st->st_gid,
539 AT_SYMLINK_NOFOLLOW) < 0)
540 r = -errno;
541
542 (void) copy_xattr(df, from, dt, to, copy_flags);
543 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
544 return r;
545 }
546
547 /* Encapsulates the database we store potential hardlink targets in */
548 typedef struct HardlinkContext {
549 int dir_fd; /* An fd to the directory we use as lookup table. Never AT_FDCWD. Lazily created, when
550 * we add the first entry. */
551
552 /* These two fields are used to create the hardlink repository directory above — via
553 * mkdirat(parent_fd, subdir) — and are kept so that we can automatically remove the directory again
554 * when we are done. */
555 int parent_fd; /* Possibly AT_FDCWD */
556 char *subdir;
557 } HardlinkContext;
558
559 static int hardlink_context_setup(
560 HardlinkContext *c,
561 int dt,
562 const char *to,
563 CopyFlags copy_flags) {
564
565 _cleanup_close_ int dt_copy = -EBADF;
566 int r;
567
568 assert(c);
569 assert(c->dir_fd < 0 && c->dir_fd != AT_FDCWD);
570 assert(c->parent_fd < 0);
571 assert(!c->subdir);
572
573 /* If hardlink recreation is requested we have to maintain a database of inodes that are potential
574 * hardlink sources. Given that generally disk sizes have to be assumed to be larger than what fits
575 * into physical RAM we cannot maintain that database in dynamic memory alone. Here we opt to
576 * maintain it on disk, to simplify things: inside the destination directory we'll maintain a
577 * temporary directory consisting of hardlinks of every inode we copied that might be subject of
578 * hardlinks. We can then use that as hardlink source later on. Yes, this means additional disk IO
579 * but thankfully Linux is optimized for this kind of thing. If this ever becomes a performance
580 * bottleneck we can certainly place an in-memory hash table in front of this, but for the beginning,
581 * let's keep things simple, and just use the disk as lookup table for inodes.
582 *
583 * Note that this should have zero performance impact as long as .n_link of all files copied remains
584 * <= 0, because in that case we will not actually allocate the hardlink inode lookup table directory
585 * on disk (we do so lazily, when the first candidate with .n_link > 1 is seen). This means, in the
586 * common case where hardlinks are not used at all or only for few files the fact that we store the
587 * table on disk shouldn't matter perfomance-wise. */
588
589 if (!FLAGS_SET(copy_flags, COPY_HARDLINKS))
590 return 0;
591
592 if (dt == AT_FDCWD)
593 dt_copy = AT_FDCWD;
594 else if (dt < 0)
595 return -EBADF;
596 else {
597 dt_copy = fcntl(dt, F_DUPFD_CLOEXEC, 3);
598 if (dt_copy < 0)
599 return -errno;
600 }
601
602 r = tempfn_random_child(to, "hardlink", &c->subdir);
603 if (r < 0)
604 return r;
605
606 c->parent_fd = TAKE_FD(dt_copy);
607
608 /* We don't actually create the directory we keep the table in here, that's done on-demand when the
609 * first entry is added, using hardlink_context_realize() below. */
610 return 1;
611 }
612
613 static int hardlink_context_realize(HardlinkContext *c) {
614 if (!c)
615 return 0;
616
617 if (c->dir_fd >= 0) /* Already realized */
618 return 1;
619
620 if (c->parent_fd < 0 && c->parent_fd != AT_FDCWD) /* Not configured */
621 return 0;
622
623 assert(c->subdir);
624
625 c->dir_fd = open_mkdir_at(c->parent_fd, c->subdir, O_EXCL|O_CLOEXEC, 0700);
626 if (c->dir_fd < 0)
627 return c->dir_fd;
628
629 return 1;
630 }
631
632 static void hardlink_context_destroy(HardlinkContext *c) {
633 int r;
634
635 assert(c);
636
637 /* Automatically remove the hardlink lookup table directory again after we are done. This is used via
638 * _cleanup_() so that we really delete this, even on failure. */
639
640 if (c->dir_fd >= 0) {
641 /* <dir_fd> might be have already been used for reading, so we need to rewind it. */
642 if (lseek(c->dir_fd, 0, SEEK_SET) < 0)
643 log_debug_errno(errno, "Failed to lseek on file descriptor, ignoring: %m");
644
645 r = rm_rf_children(TAKE_FD(c->dir_fd), REMOVE_PHYSICAL, NULL); /* consumes dir_fd in all cases, even on failure */
646 if (r < 0)
647 log_debug_errno(r, "Failed to remove hardlink store (%s) contents, ignoring: %m", c->subdir);
648
649 assert(c->parent_fd >= 0 || c->parent_fd == AT_FDCWD);
650 assert(c->subdir);
651
652 if (unlinkat(c->parent_fd, c->subdir, AT_REMOVEDIR) < 0)
653 log_debug_errno(errno, "Failed to remove hardlink store (%s) directory, ignoring: %m", c->subdir);
654 }
655
656 assert_cc(AT_FDCWD < 0);
657 c->parent_fd = safe_close(c->parent_fd);
658
659 c->subdir = mfree(c->subdir);
660 }
661
662 static int try_hardlink(
663 HardlinkContext *c,
664 const struct stat *st,
665 int dt,
666 const char *to) {
667
668 char dev_ino[DECIMAL_STR_MAX(dev_t)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
669
670 assert(st);
671 assert(dt >= 0 || dt == AT_FDCWD);
672 assert(to);
673
674 if (!c) /* No temporary hardlink directory, don't bother */
675 return 0;
676
677 if (st->st_nlink <= 1) /* Source not hardlinked, don't bother */
678 return 0;
679
680 if (c->dir_fd < 0) /* not yet realized, hence empty */
681 return 0;
682
683 xsprintf(dev_ino, "%u:%u:%" PRIu64, major(st->st_dev), minor(st->st_dev), (uint64_t) st->st_ino);
684 if (linkat(c->dir_fd, dev_ino, dt, to, 0) < 0) {
685 if (errno != ENOENT) /* doesn't exist in store yet */
686 log_debug_errno(errno, "Failed to hardlink %s to %s, ignoring: %m", dev_ino, to);
687 return 0;
688 }
689
690 return 1;
691 }
692
693 static int memorize_hardlink(
694 HardlinkContext *c,
695 const struct stat *st,
696 int dt,
697 const char *to) {
698
699 char dev_ino[DECIMAL_STR_MAX(dev_t)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
700 int r;
701
702 assert(st);
703 assert(dt >= 0 || dt == AT_FDCWD);
704 assert(to);
705
706 if (!c) /* No temporary hardlink directory, don't bother */
707 return 0;
708
709 if (st->st_nlink <= 1) /* Source not hardlinked, don't bother */
710 return 0;
711
712 r = hardlink_context_realize(c); /* Create the hardlink store lazily */
713 if (r < 0)
714 return r;
715
716 xsprintf(dev_ino, "%u:%u:%" PRIu64, major(st->st_dev), minor(st->st_dev), (uint64_t) st->st_ino);
717 if (linkat(dt, to, c->dir_fd, dev_ino, 0) < 0) {
718 log_debug_errno(errno, "Failed to hardlink %s to %s, ignoring: %m", to, dev_ino);
719 return 0;
720 }
721
722 return 1;
723 }
724
725 static int fd_copy_tree_generic(
726 int df,
727 const char *from,
728 const struct stat *st,
729 int dt,
730 const char *to,
731 dev_t original_device,
732 unsigned depth_left,
733 uid_t override_uid,
734 gid_t override_gid,
735 CopyFlags copy_flags,
736 Hashmap *denylist,
737 Set *subvolumes,
738 HardlinkContext *hardlink_context,
739 const char *display_path,
740 copy_progress_path_t progress_path,
741 copy_progress_bytes_t progress_bytes,
742 void *userdata);
743
744 static int fd_copy_regular(
745 int df,
746 const char *from,
747 const struct stat *st,
748 int dt,
749 const char *to,
750 uid_t override_uid,
751 gid_t override_gid,
752 CopyFlags copy_flags,
753 HardlinkContext *hardlink_context,
754 copy_progress_bytes_t progress,
755 void *userdata) {
756
757 _cleanup_close_ int fdf = -EBADF, fdt = -EBADF;
758 int r, q;
759
760 assert(from);
761 assert(st);
762 assert(to);
763
764 r = try_hardlink(hardlink_context, st, dt, to);
765 if (r < 0)
766 return r;
767 if (r > 0) /* worked! */
768 return 0;
769
770 fdf = openat(df, from, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
771 if (fdf < 0)
772 return -errno;
773
774 if (copy_flags & COPY_MAC_CREATE) {
775 r = mac_selinux_create_file_prepare_at(dt, to, S_IFREG);
776 if (r < 0)
777 return r;
778 }
779 fdt = openat(dt, to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, st->st_mode & 07777);
780 if (copy_flags & COPY_MAC_CREATE)
781 mac_selinux_create_file_clear();
782 if (fdt < 0)
783 return -errno;
784
785 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags, NULL, NULL, progress, userdata);
786 if (r < 0)
787 goto fail;
788
789 if (fchown(fdt,
790 uid_is_valid(override_uid) ? override_uid : st->st_uid,
791 gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
792 r = -errno;
793
794 if (fchmod(fdt, st->st_mode & 07777) < 0)
795 r = -errno;
796
797 (void) futimens(fdt, (struct timespec[]) { st->st_atim, st->st_mtim });
798 (void) copy_xattr(fdf, NULL, fdt, NULL, copy_flags);
799
800 if (copy_flags & COPY_FSYNC) {
801 if (fsync(fdt) < 0) {
802 r = -errno;
803 goto fail;
804 }
805 }
806
807 q = close_nointr(TAKE_FD(fdt)); /* even if this fails, the fd is now invalidated */
808 if (q < 0) {
809 r = q;
810 goto fail;
811 }
812
813 (void) memorize_hardlink(hardlink_context, st, dt, to);
814 return r;
815
816 fail:
817 (void) unlinkat(dt, to, 0);
818 return r;
819 }
820
821 static int fd_copy_fifo(
822 int df,
823 const char *from,
824 const struct stat *st,
825 int dt,
826 const char *to,
827 uid_t override_uid,
828 gid_t override_gid,
829 CopyFlags copy_flags,
830 HardlinkContext *hardlink_context) {
831 int r;
832
833 assert(from);
834 assert(st);
835 assert(to);
836
837 r = try_hardlink(hardlink_context, st, dt, to);
838 if (r < 0)
839 return r;
840 if (r > 0) /* worked! */
841 return 0;
842
843 if (copy_flags & COPY_MAC_CREATE) {
844 r = mac_selinux_create_file_prepare_at(dt, to, S_IFIFO);
845 if (r < 0)
846 return r;
847 }
848 r = RET_NERRNO(mkfifoat(dt, to, st->st_mode & 07777));
849 if (copy_flags & COPY_MAC_CREATE)
850 mac_selinux_create_file_clear();
851 if (FLAGS_SET(copy_flags, COPY_GRACEFUL_WARN) && (ERRNO_IS_NEG_PRIVILEGE(r) || ERRNO_IS_NEG_NOT_SUPPORTED(r))) {
852 log_notice_errno(r, "Failed to copy fifo '%s', ignoring: %m", from);
853 return 0;
854 } else if (r < 0)
855 return r;
856
857 if (fchownat(dt, to,
858 uid_is_valid(override_uid) ? override_uid : st->st_uid,
859 gid_is_valid(override_gid) ? override_gid : st->st_gid,
860 AT_SYMLINK_NOFOLLOW) < 0)
861 r = -errno;
862
863 if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
864 r = -errno;
865
866 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
867
868 (void) memorize_hardlink(hardlink_context, st, dt, to);
869 return r;
870 }
871
872 static int fd_copy_node(
873 int df,
874 const char *from,
875 const struct stat *st,
876 int dt,
877 const char *to,
878 uid_t override_uid,
879 gid_t override_gid,
880 CopyFlags copy_flags,
881 HardlinkContext *hardlink_context) {
882 int r;
883
884 assert(from);
885 assert(st);
886 assert(to);
887
888 r = try_hardlink(hardlink_context, st, dt, to);
889 if (r < 0)
890 return r;
891 if (r > 0) /* worked! */
892 return 0;
893
894 if (copy_flags & COPY_MAC_CREATE) {
895 r = mac_selinux_create_file_prepare_at(dt, to, st->st_mode & S_IFMT);
896 if (r < 0)
897 return r;
898 }
899 r = RET_NERRNO(mknodat(dt, to, st->st_mode, st->st_rdev));
900 if (copy_flags & COPY_MAC_CREATE)
901 mac_selinux_create_file_clear();
902 if (FLAGS_SET(copy_flags, COPY_GRACEFUL_WARN) && (ERRNO_IS_NEG_PRIVILEGE(r) || ERRNO_IS_NEG_NOT_SUPPORTED(r))) {
903 log_notice_errno(r, "Failed to copy node '%s', ignoring: %m", from);
904 return 0;
905 } else if (r < 0)
906 return r;
907
908 if (fchownat(dt, to,
909 uid_is_valid(override_uid) ? override_uid : st->st_uid,
910 gid_is_valid(override_gid) ? override_gid : st->st_gid,
911 AT_SYMLINK_NOFOLLOW) < 0)
912 r = -errno;
913
914 if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
915 r = -errno;
916
917 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
918
919 (void) memorize_hardlink(hardlink_context, st, dt, to);
920 return r;
921 }
922
923 static int fd_copy_directory(
924 int df,
925 const char *from,
926 const struct stat *st,
927 int dt,
928 const char *to,
929 dev_t original_device,
930 unsigned depth_left,
931 uid_t override_uid,
932 gid_t override_gid,
933 CopyFlags copy_flags,
934 Hashmap *denylist,
935 Set *subvolumes,
936 HardlinkContext *hardlink_context,
937 const char *display_path,
938 copy_progress_path_t progress_path,
939 copy_progress_bytes_t progress_bytes,
940 void *userdata) {
941
942 _cleanup_(hardlink_context_destroy) HardlinkContext our_hardlink_context = {
943 .dir_fd = -EBADF,
944 .parent_fd = -EBADF,
945 };
946
947 _cleanup_close_ int fdf = -EBADF, fdt = -EBADF;
948 _cleanup_closedir_ DIR *d = NULL;
949 bool exists;
950 int r;
951
952 assert(st);
953 assert(to);
954
955 if (depth_left == 0)
956 return -ENAMETOOLONG;
957
958 if (from)
959 fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
960 else
961 fdf = fcntl(df, F_DUPFD_CLOEXEC, 3);
962 if (fdf < 0)
963 return -errno;
964
965 if (!hardlink_context) {
966 /* If recreating hardlinks is requested let's set up a context for that now. */
967 r = hardlink_context_setup(&our_hardlink_context, dt, to, copy_flags);
968 if (r < 0)
969 return r;
970 if (r > 0) /* It's enabled and allocated, let's now use the same context for all recursive
971 * invocations from here down */
972 hardlink_context = &our_hardlink_context;
973 }
974
975 d = take_fdopendir(&fdf);
976 if (!d)
977 return -errno;
978
979 r = dir_is_empty_at(dt, to, /* ignore_hidden_or_backup= */ false);
980 if (r < 0 && r != -ENOENT)
981 return r;
982 if ((r > 0 && !(copy_flags & (COPY_MERGE|COPY_MERGE_EMPTY))) || (r == 0 && !FLAGS_SET(copy_flags, COPY_MERGE)))
983 return -EEXIST;
984
985 exists = r >= 0;
986
987 fdt = xopenat_lock(dt, to,
988 O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|(exists ? 0 : O_CREAT|O_EXCL),
989 (copy_flags & COPY_MAC_CREATE ? XO_LABEL : 0)|(set_contains(subvolumes, st) ? XO_SUBVOLUME : 0),
990 st->st_mode & 07777,
991 copy_flags & COPY_LOCK_BSD ? LOCK_BSD : LOCK_NONE,
992 LOCK_EX);
993 if (fdt < 0)
994 return fdt;
995
996 r = 0;
997
998 if (PTR_TO_INT(hashmap_get(denylist, st)) == DENY_CONTENTS) {
999 log_debug("%s is in the denylist, not recursing", from);
1000 goto finish;
1001 }
1002
1003 FOREACH_DIRENT_ALL(de, d, return -errno) {
1004 const char *child_display_path = NULL;
1005 _cleanup_free_ char *dp = NULL;
1006 struct stat buf;
1007 int q;
1008
1009 if (dot_or_dot_dot(de->d_name))
1010 continue;
1011
1012 r = look_for_signals(copy_flags);
1013 if (r < 0)
1014 return r;
1015
1016 if (fstatat(dirfd(d), de->d_name, &buf, AT_SYMLINK_NOFOLLOW) < 0) {
1017 r = -errno;
1018 continue;
1019 }
1020
1021 if (progress_path) {
1022 if (display_path)
1023 child_display_path = dp = path_join(display_path, de->d_name);
1024 else
1025 child_display_path = de->d_name;
1026
1027 r = progress_path(child_display_path, &buf, userdata);
1028 if (r < 0)
1029 return r;
1030 }
1031
1032 if (PTR_TO_INT(hashmap_get(denylist, &buf)) == DENY_INODE) {
1033 log_debug("%s/%s is in the denylist, ignoring", from, de->d_name);
1034 continue;
1035 }
1036
1037 if (S_ISDIR(buf.st_mode)) {
1038 /*
1039 * Don't descend into directories on other file systems, if this is requested. We do a simple
1040 * .st_dev check here, which basically comes for free. Note that we do this check only on
1041 * directories, not other kind of file system objects, for two reason:
1042 *
1043 * • The kernel's overlayfs pseudo file system that overlays multiple real file systems
1044 * propagates the .st_dev field of the file system a file originates from all the way up
1045 * through the stack to stat(). It doesn't do that for directories however. This means that
1046 * comparing .st_dev on non-directories suggests that they all are mount points. To avoid
1047 * confusion we hence avoid relying on this check for regular files.
1048 *
1049 * • The main reason we do this check at all is to protect ourselves from bind mount cycles,
1050 * where we really want to avoid descending down in all eternity. However the .st_dev check
1051 * is usually not sufficient for this protection anyway, as bind mount cycles from the same
1052 * file system onto itself can't be detected that way. (Note we also do a recursion depth
1053 * check, which is probably the better protection in this regard, which is why
1054 * COPY_SAME_MOUNT is optional).
1055 */
1056
1057 if (FLAGS_SET(copy_flags, COPY_SAME_MOUNT)) {
1058 if (buf.st_dev != original_device)
1059 continue;
1060
1061 r = fd_is_mount_point(dirfd(d), de->d_name, 0);
1062 if (r < 0)
1063 return r;
1064 if (r > 0)
1065 continue;
1066 }
1067 }
1068
1069 q = fd_copy_tree_generic(dirfd(d), de->d_name, &buf, fdt, de->d_name, original_device,
1070 depth_left-1, override_uid, override_gid, copy_flags & ~COPY_LOCK_BSD,
1071 denylist, subvolumes, hardlink_context, child_display_path, progress_path,
1072 progress_bytes, userdata);
1073
1074 if (q == -EINTR) /* Propagate SIGINT/SIGTERM up instantly */
1075 return q;
1076 if (q == -EEXIST && (copy_flags & COPY_MERGE))
1077 q = 0;
1078 if (q < 0)
1079 r = q;
1080 }
1081
1082 finish:
1083 if (!exists) {
1084 if (fchown(fdt,
1085 uid_is_valid(override_uid) ? override_uid : st->st_uid,
1086 gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
1087 r = -errno;
1088
1089 if (fchmod(fdt, st->st_mode & 07777) < 0)
1090 r = -errno;
1091
1092 (void) copy_xattr(dirfd(d), NULL, fdt, NULL, copy_flags);
1093 (void) futimens(fdt, (struct timespec[]) { st->st_atim, st->st_mtim });
1094 }
1095
1096 if (copy_flags & COPY_FSYNC_FULL) {
1097 if (fsync(fdt) < 0)
1098 return -errno;
1099 }
1100
1101 if (r < 0)
1102 return r;
1103
1104 return copy_flags & COPY_LOCK_BSD ? TAKE_FD(fdt) : 0;
1105 }
1106
1107 static int fd_copy_leaf(
1108 int df,
1109 const char *from,
1110 const struct stat *st,
1111 int dt,
1112 const char *to,
1113 uid_t override_uid,
1114 gid_t override_gid,
1115 CopyFlags copy_flags,
1116 HardlinkContext *hardlink_context,
1117 const char *display_path,
1118 copy_progress_bytes_t progress_bytes,
1119 void *userdata) {
1120 int r;
1121
1122 if (S_ISREG(st->st_mode))
1123 r = fd_copy_regular(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context, progress_bytes, userdata);
1124 else if (S_ISLNK(st->st_mode))
1125 r = fd_copy_symlink(df, from, st, dt, to, override_uid, override_gid, copy_flags);
1126 else if (S_ISFIFO(st->st_mode))
1127 r = fd_copy_fifo(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context);
1128 else if (S_ISBLK(st->st_mode) || S_ISCHR(st->st_mode) || S_ISSOCK(st->st_mode))
1129 r = fd_copy_node(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context);
1130 else
1131 r = -EOPNOTSUPP;
1132
1133 return r;
1134 }
1135
1136 static int fd_copy_tree_generic(
1137 int df,
1138 const char *from,
1139 const struct stat *st,
1140 int dt,
1141 const char *to,
1142 dev_t original_device,
1143 unsigned depth_left,
1144 uid_t override_uid,
1145 gid_t override_gid,
1146 CopyFlags copy_flags,
1147 Hashmap *denylist,
1148 Set *subvolumes,
1149 HardlinkContext *hardlink_context,
1150 const char *display_path,
1151 copy_progress_path_t progress_path,
1152 copy_progress_bytes_t progress_bytes,
1153 void *userdata) {
1154
1155 int r;
1156
1157 assert(!FLAGS_SET(copy_flags, COPY_LOCK_BSD));
1158
1159 if (S_ISDIR(st->st_mode))
1160 return fd_copy_directory(df, from, st, dt, to, original_device, depth_left-1, override_uid,
1161 override_gid, copy_flags, denylist, subvolumes, hardlink_context,
1162 display_path, progress_path, progress_bytes, userdata);
1163
1164 DenyType t = PTR_TO_INT(hashmap_get(denylist, st));
1165 if (t == DENY_INODE) {
1166 log_debug("%s is in the denylist, ignoring", from);
1167 return 0;
1168 } else if (t == DENY_CONTENTS)
1169 log_debug("%s is configured to have its contents excluded, but is not a directory", from);
1170
1171 r = fd_copy_leaf(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context, display_path, progress_bytes, userdata);
1172 /* We just tried to copy a leaf node of the tree. If it failed because the node already exists *and* the COPY_REPLACE flag has been provided, we should unlink the node and re-copy. */
1173 if (r == -EEXIST && (copy_flags & COPY_REPLACE)) {
1174 /* This codepath is us trying to address an error to copy, if the unlink fails, lets just return the original error. */
1175 if (unlinkat(dt, to, 0) < 0)
1176 return r;
1177
1178 r = fd_copy_leaf(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context, display_path, progress_bytes, userdata);
1179 }
1180
1181 return r;
1182 }
1183
1184 int copy_tree_at_full(
1185 int fdf,
1186 const char *from,
1187 int fdt,
1188 const char *to,
1189 uid_t override_uid,
1190 gid_t override_gid,
1191 CopyFlags copy_flags,
1192 Hashmap *denylist,
1193 Set *subvolumes,
1194 copy_progress_path_t progress_path,
1195 copy_progress_bytes_t progress_bytes,
1196 void *userdata) {
1197
1198 struct stat st;
1199 int r;
1200
1201 assert(from);
1202 assert(to);
1203 assert(!FLAGS_SET(copy_flags, COPY_LOCK_BSD));
1204
1205 if (fstatat(fdf, from, &st, AT_SYMLINK_NOFOLLOW) < 0)
1206 return -errno;
1207
1208 r = fd_copy_tree_generic(fdf, from, &st, fdt, to, st.st_dev, COPY_DEPTH_MAX, override_uid,
1209 override_gid, copy_flags, denylist, subvolumes, NULL, NULL, progress_path,
1210 progress_bytes, userdata);
1211 if (r < 0)
1212 return r;
1213
1214 if (S_ISDIR(st.st_mode) && (copy_flags & COPY_SYNCFS)) {
1215 /* If the top-level inode is a directory run syncfs() now. */
1216 r = syncfs_path(fdt, to);
1217 if (r < 0)
1218 return r;
1219 } else if ((copy_flags & (COPY_FSYNC_FULL|COPY_SYNCFS)) != 0) {
1220 /* fsync() the parent dir of what we just copied if COPY_FSYNC_FULL is set. Also do this in
1221 * case COPY_SYNCFS is set but the top-level inode wasn't actually a directory. We do this so that
1222 * COPY_SYNCFS provides reasonable synchronization semantics on any kind of inode: when the
1223 * copy operation is done the whole inode — regardless of its type — and all its children
1224 * will be synchronized to disk. */
1225 r = fsync_parent_at(fdt, to);
1226 if (r < 0)
1227 return r;
1228 }
1229
1230 return 0;
1231 }
1232
1233 static int sync_dir_by_flags(int dir_fd, const char *path, CopyFlags copy_flags) {
1234 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
1235 assert(path);
1236
1237 if (copy_flags & COPY_SYNCFS)
1238 return syncfs_path(dir_fd, path);
1239 if (copy_flags & COPY_FSYNC_FULL)
1240 return fsync_parent_at(dir_fd, path);
1241
1242 return 0;
1243 }
1244
1245 int copy_directory_at_full(
1246 int dir_fdf,
1247 const char *from,
1248 int dir_fdt,
1249 const char *to,
1250 CopyFlags copy_flags,
1251 copy_progress_path_t progress_path,
1252 copy_progress_bytes_t progress_bytes,
1253 void *userdata) {
1254
1255 _cleanup_close_ int fdt = -EBADF;
1256 struct stat st;
1257 int r;
1258
1259 assert(dir_fdf >= 0 || dir_fdf == AT_FDCWD);
1260 assert(dir_fdt >= 0 || dir_fdt == AT_FDCWD);
1261 assert(to);
1262
1263 if (fstatat(dir_fdf, strempty(from), &st, AT_SYMLINK_NOFOLLOW|(isempty(from) ? AT_EMPTY_PATH : 0)) < 0)
1264 return -errno;
1265
1266 r = stat_verify_directory(&st);
1267 if (r < 0)
1268 return r;
1269
1270 r = fd_copy_directory(
1271 dir_fdf, from,
1272 &st,
1273 dir_fdt, to,
1274 st.st_dev,
1275 COPY_DEPTH_MAX,
1276 UID_INVALID, GID_INVALID,
1277 copy_flags,
1278 NULL, NULL, NULL, NULL,
1279 progress_path,
1280 progress_bytes,
1281 userdata);
1282 if (r < 0)
1283 return r;
1284
1285 if (FLAGS_SET(copy_flags, COPY_LOCK_BSD))
1286 fdt = r;
1287
1288 r = sync_dir_by_flags(dir_fdt, to, copy_flags);
1289 if (r < 0)
1290 return r;
1291
1292 return FLAGS_SET(copy_flags, COPY_LOCK_BSD) ? TAKE_FD(fdt) : 0;
1293 }
1294
1295 int copy_file_fd_at_full(
1296 int dir_fdf,
1297 const char *from,
1298 int fdt,
1299 CopyFlags copy_flags,
1300 copy_progress_bytes_t progress_bytes,
1301 void *userdata) {
1302
1303 _cleanup_close_ int fdf = -EBADF;
1304 struct stat st;
1305 int r;
1306
1307 assert(dir_fdf >= 0 || dir_fdf == AT_FDCWD);
1308 assert(from);
1309 assert(fdt >= 0);
1310 assert(!FLAGS_SET(copy_flags, COPY_LOCK_BSD));
1311
1312 fdf = openat(dir_fdf, from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1313 if (fdf < 0)
1314 return -errno;
1315
1316 r = fd_verify_regular(fdf);
1317 if (r < 0)
1318 return r;
1319
1320 if (fstat(fdt, &st) < 0)
1321 return -errno;
1322
1323 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags, NULL, NULL, progress_bytes, userdata);
1324 if (r < 0)
1325 return r;
1326
1327 /* Make sure to copy file attributes only over if target is a regular
1328 * file (so that copying a file to /dev/null won't alter the access
1329 * mode/ownership of that device node...) */
1330 if (S_ISREG(st.st_mode)) {
1331 (void) copy_times(fdf, fdt, copy_flags);
1332 (void) copy_xattr(fdf, NULL, fdt, NULL, copy_flags);
1333 }
1334
1335 if (copy_flags & COPY_FSYNC_FULL) {
1336 r = fsync_full(fdt);
1337 if (r < 0)
1338 return r;
1339 } else if (copy_flags & COPY_FSYNC) {
1340 if (fsync(fdt) < 0)
1341 return -errno;
1342 }
1343
1344 return 0;
1345 }
1346
1347 int copy_file_at_full(
1348 int dir_fdf,
1349 const char *from,
1350 int dir_fdt,
1351 const char *to,
1352 int flags,
1353 mode_t mode,
1354 unsigned chattr_flags,
1355 unsigned chattr_mask,
1356 CopyFlags copy_flags,
1357 copy_progress_bytes_t progress_bytes,
1358 void *userdata) {
1359
1360 _cleanup_close_ int fdf = -EBADF, fdt = -EBADF;
1361 struct stat st;
1362 int r;
1363
1364 assert(dir_fdf >= 0 || dir_fdf == AT_FDCWD);
1365 assert(dir_fdt >= 0 || dir_fdt == AT_FDCWD);
1366 assert(from);
1367 assert(to);
1368
1369 fdf = openat(dir_fdf, from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1370 if (fdf < 0)
1371 return -errno;
1372
1373 if (fstat(fdf, &st) < 0)
1374 return -errno;
1375
1376 r = stat_verify_regular(&st);
1377 if (r < 0)
1378 return r;
1379
1380 WITH_UMASK(0000) {
1381 fdt = xopenat_lock(dir_fdt, to,
1382 flags|O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY,
1383 (copy_flags & COPY_MAC_CREATE ? XO_LABEL : 0),
1384 mode != MODE_INVALID ? mode : st.st_mode,
1385 copy_flags & COPY_LOCK_BSD ? LOCK_BSD : LOCK_NONE, LOCK_EX);
1386 if (fdt < 0)
1387 return fdt;
1388 }
1389
1390 if (!FLAGS_SET(flags, O_EXCL)) { /* if O_EXCL was used we created the thing as regular file, no need to check again */
1391 r = fd_verify_regular(fdt);
1392 if (r < 0)
1393 goto fail;
1394 }
1395
1396 if (chattr_mask != 0)
1397 (void) chattr_fd(fdt, chattr_flags, chattr_mask & CHATTR_EARLY_FL, NULL);
1398
1399 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags & ~COPY_LOCK_BSD, NULL, NULL, progress_bytes, userdata);
1400 if (r < 0)
1401 goto fail;
1402
1403 (void) copy_times(fdf, fdt, copy_flags);
1404 (void) copy_xattr(fdf, NULL, fdt, NULL, copy_flags);
1405
1406 if (chattr_mask != 0)
1407 (void) chattr_fd(fdt, chattr_flags, chattr_mask & ~CHATTR_EARLY_FL, NULL);
1408
1409 if (copy_flags & (COPY_FSYNC|COPY_FSYNC_FULL)) {
1410 if (fsync(fdt) < 0) {
1411 r = -errno;
1412 goto fail;
1413 }
1414 }
1415
1416 if (!FLAGS_SET(copy_flags, COPY_LOCK_BSD)) {
1417 r = close_nointr(TAKE_FD(fdt)); /* even if this fails, the fd is now invalidated */
1418 if (r < 0)
1419 goto fail;
1420 }
1421
1422 if (copy_flags & COPY_FSYNC_FULL) {
1423 r = fsync_parent_at(dir_fdt, to);
1424 if (r < 0)
1425 goto fail;
1426 }
1427
1428 return copy_flags & COPY_LOCK_BSD ? TAKE_FD(fdt) : 0;
1429
1430 fail:
1431 /* Only unlink if we definitely are the ones who created the file */
1432 if (FLAGS_SET(flags, O_EXCL))
1433 (void) unlinkat(dir_fdt, to, 0);
1434
1435 return r;
1436 }
1437
1438 int copy_file_atomic_at_full(
1439 int dir_fdf,
1440 const char *from,
1441 int dir_fdt,
1442 const char *to,
1443 mode_t mode,
1444 unsigned chattr_flags,
1445 unsigned chattr_mask,
1446 CopyFlags copy_flags,
1447 copy_progress_bytes_t progress_bytes,
1448 void *userdata) {
1449
1450 _cleanup_(unlink_and_freep) char *t = NULL;
1451 _cleanup_close_ int fdt = -EBADF;
1452 int r;
1453
1454 assert(from);
1455 assert(to);
1456 assert(!FLAGS_SET(copy_flags, COPY_LOCK_BSD));
1457
1458 if (copy_flags & COPY_MAC_CREATE) {
1459 r = mac_selinux_create_file_prepare_at(dir_fdt, to, S_IFREG);
1460 if (r < 0)
1461 return r;
1462 }
1463 fdt = open_tmpfile_linkable_at(dir_fdt, to, O_WRONLY|O_CLOEXEC, &t);
1464 if (copy_flags & COPY_MAC_CREATE)
1465 mac_selinux_create_file_clear();
1466 if (fdt < 0)
1467 return fdt;
1468
1469 if (chattr_mask != 0)
1470 (void) chattr_fd(fdt, chattr_flags, chattr_mask & CHATTR_EARLY_FL, NULL);
1471
1472 r = copy_file_fd_at_full(dir_fdf, from, fdt, copy_flags, progress_bytes, userdata);
1473 if (r < 0)
1474 return r;
1475
1476 if (fchmod(fdt, mode) < 0)
1477 return -errno;
1478
1479 if ((copy_flags & (COPY_FSYNC|COPY_FSYNC_FULL))) {
1480 /* Sync the file */
1481 if (fsync(fdt) < 0)
1482 return -errno;
1483 }
1484
1485 r = link_tmpfile_at(fdt, dir_fdt, t, to, (copy_flags & COPY_REPLACE) ? LINK_TMPFILE_REPLACE : 0);
1486 if (r < 0)
1487 return r;
1488
1489 t = mfree(t);
1490
1491 if (chattr_mask != 0)
1492 (void) chattr_fd(fdt, chattr_flags, chattr_mask & ~CHATTR_EARLY_FL, NULL);
1493
1494 r = close_nointr(TAKE_FD(fdt)); /* even if this fails, the fd is now invalidated */
1495 if (r < 0)
1496 goto fail;
1497
1498 if (copy_flags & COPY_FSYNC_FULL) {
1499 /* Sync the parent directory */
1500 r = fsync_parent_at(dir_fdt, to);
1501 if (r < 0)
1502 goto fail;
1503 }
1504
1505 return 0;
1506
1507 fail:
1508 (void) unlinkat(dir_fdt, to, 0);
1509 return r;
1510 }
1511
1512 int copy_times(int fdf, int fdt, CopyFlags flags) {
1513 struct stat st;
1514
1515 assert(fdf >= 0);
1516 assert(fdt >= 0);
1517
1518 if (fstat(fdf, &st) < 0)
1519 return -errno;
1520
1521 if (futimens(fdt, (struct timespec[2]) { st.st_atim, st.st_mtim }) < 0)
1522 return -errno;
1523
1524 if (FLAGS_SET(flags, COPY_CRTIME)) {
1525 usec_t crtime;
1526
1527 if (fd_getcrtime(fdf, &crtime) >= 0)
1528 (void) fd_setcrtime(fdt, crtime);
1529 }
1530
1531 return 0;
1532 }
1533
1534 int copy_access(int fdf, int fdt) {
1535 struct stat st;
1536
1537 assert(fdf >= 0);
1538 assert(fdt >= 0);
1539
1540 /* Copies just the access mode (and not the ownership) from fdf to fdt */
1541
1542 if (fstat(fdf, &st) < 0)
1543 return -errno;
1544
1545 return RET_NERRNO(fchmod(fdt, st.st_mode & 07777));
1546 }
1547
1548 int copy_rights_with_fallback(int fdf, int fdt, const char *patht) {
1549 struct stat st;
1550
1551 assert(fdf >= 0);
1552 assert(fdt >= 0);
1553
1554 /* Copies both access mode and ownership from fdf to fdt */
1555
1556 if (fstat(fdf, &st) < 0)
1557 return -errno;
1558
1559 return fchmod_and_chown_with_fallback(fdt, patht, st.st_mode & 07777, st.st_uid, st.st_gid);
1560 }
1561
1562 int copy_xattr(int df, const char *from, int dt, const char *to, CopyFlags copy_flags) {
1563 _cleanup_free_ char *names = NULL;
1564 int ret = 0, r;
1565
1566 r = listxattr_at_malloc(df, from, 0, &names);
1567 if (r < 0)
1568 return r;
1569
1570 NULSTR_FOREACH(p, names) {
1571 _cleanup_free_ char *value = NULL;
1572
1573 if (!FLAGS_SET(copy_flags, COPY_ALL_XATTRS) && !startswith(p, "user."))
1574 continue;
1575
1576 r = getxattr_at_malloc(df, from, p, 0, &value);
1577 if (r == -ENODATA)
1578 continue; /* gone by now */
1579 if (r < 0)
1580 return r;
1581
1582 if (xsetxattr(dt, to, p, value, r, 0) < 0)
1583 ret = -errno;
1584 }
1585
1586 return ret;
1587 }
1588
1589 int reflink(int infd, int outfd) {
1590 int r;
1591
1592 assert(infd >= 0);
1593 assert(outfd >= 0);
1594
1595 /* Make sure we invoke the ioctl on a regular file, so that no device driver accidentally gets it. */
1596
1597 r = fd_verify_regular(outfd);
1598 if (r < 0)
1599 return r;
1600
1601 /* FICLONE was introduced in Linux 4.5 but it uses the same number as BTRFS_IOC_CLONE introduced earlier */
1602
1603 assert_cc(FICLONE == BTRFS_IOC_CLONE);
1604
1605 return RET_NERRNO(ioctl(outfd, FICLONE, infd));
1606 }
1607
1608 assert_cc(sizeof(struct file_clone_range) == sizeof(struct btrfs_ioctl_clone_range_args));
1609
1610 int reflink_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) {
1611 struct file_clone_range args = {
1612 .src_fd = infd,
1613 .src_offset = in_offset,
1614 .src_length = sz,
1615 .dest_offset = out_offset,
1616 };
1617 int r;
1618
1619 assert(infd >= 0);
1620 assert(outfd >= 0);
1621
1622 /* Inside the kernel, FICLONE is identical to FICLONERANGE with offsets and size set to zero, let's
1623 * simplify things and use the simple ioctl in that case. Also, do the same if the size is
1624 * UINT64_MAX, which is how we usually encode "everything". */
1625 if (in_offset == 0 && out_offset == 0 && IN_SET(sz, 0, UINT64_MAX))
1626 return reflink(infd, outfd);
1627
1628 r = fd_verify_regular(outfd);
1629 if (r < 0)
1630 return r;
1631
1632 assert_cc(FICLONERANGE == BTRFS_IOC_CLONE_RANGE);
1633
1634 return RET_NERRNO(ioctl(outfd, FICLONERANGE, &args));
1635 }