]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/copy.c
04603fd20e963dfb130e163e3bad399d7b82b009
[thirdparty/systemd.git] / src / shared / copy.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <linux/btrfs.h>
6 #include <stddef.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <sys/file.h>
10 #include <sys/ioctl.h>
11 #include <sys/sendfile.h>
12 #include <sys/xattr.h>
13 #include <unistd.h>
14
15 #include "alloc-util.h"
16 #include "btrfs-util.h"
17 #include "chattr-util.h"
18 #include "copy.h"
19 #include "dirent-util.h"
20 #include "fd-util.h"
21 #include "fileio.h"
22 #include "fs-util.h"
23 #include "io-util.h"
24 #include "macro.h"
25 #include "missing_fs.h"
26 #include "missing_syscall.h"
27 #include "mkdir-label.h"
28 #include "mountpoint-util.h"
29 #include "nulstr-util.h"
30 #include "rm-rf.h"
31 #include "selinux-util.h"
32 #include "signal-util.h"
33 #include "stat-util.h"
34 #include "stdio-util.h"
35 #include "string-util.h"
36 #include "strv.h"
37 #include "sync-util.h"
38 #include "time-util.h"
39 #include "tmpfile-util.h"
40 #include "umask-util.h"
41 #include "user-util.h"
42 #include "xattr-util.h"
43
44 #define COPY_BUFFER_SIZE (16U*1024U)
45
46 /* A safety net for descending recursively into file system trees to copy. On Linux PATH_MAX is 4096, which means the
47 * deepest valid path one can build is around 2048, which we hence use as a safety net here, to not spin endlessly in
48 * case of bind mount cycles and suchlike. */
49 #define COPY_DEPTH_MAX 2048U
50
51 static ssize_t try_copy_file_range(
52 int fd_in, loff_t *off_in,
53 int fd_out, loff_t *off_out,
54 size_t len,
55 unsigned flags) {
56
57 static int have = -1;
58 ssize_t r;
59
60 if (have == 0)
61 return -ENOSYS;
62
63 r = copy_file_range(fd_in, off_in, fd_out, off_out, len, flags);
64 if (have < 0)
65 have = r >= 0 || errno != ENOSYS;
66 if (r < 0)
67 return -errno;
68
69 return r;
70 }
71
72 enum {
73 FD_IS_NO_PIPE,
74 FD_IS_BLOCKING_PIPE,
75 FD_IS_NONBLOCKING_PIPE,
76 };
77
78 static int fd_is_nonblock_pipe(int fd) {
79 struct stat st;
80 int flags;
81
82 /* Checks whether the specified file descriptor refers to a pipe, and if so if O_NONBLOCK is set. */
83
84 if (fstat(fd, &st) < 0)
85 return -errno;
86
87 if (!S_ISFIFO(st.st_mode))
88 return FD_IS_NO_PIPE;
89
90 flags = fcntl(fd, F_GETFL);
91 if (flags < 0)
92 return -errno;
93
94 return FLAGS_SET(flags, O_NONBLOCK) ? FD_IS_NONBLOCKING_PIPE : FD_IS_BLOCKING_PIPE;
95 }
96
97 static int look_for_signals(CopyFlags copy_flags) {
98 int r;
99
100 if ((copy_flags & (COPY_SIGINT|COPY_SIGTERM)) == 0)
101 return 0;
102
103 r = pop_pending_signal(copy_flags & COPY_SIGINT ? SIGINT : 0,
104 copy_flags & COPY_SIGTERM ? SIGTERM : 0);
105 if (r < 0)
106 return r;
107 if (r != 0)
108 return log_debug_errno(SYNTHETIC_ERRNO(EINTR),
109 "Got %s, cancelling copy operation.", signal_to_string(r));
110
111 return 0;
112 }
113
114 static int create_hole(int fd, off_t size) {
115 off_t offset;
116 off_t end;
117
118 offset = lseek(fd, 0, SEEK_CUR);
119 if (offset < 0)
120 return -errno;
121
122 end = lseek(fd, 0, SEEK_END);
123 if (end < 0)
124 return -errno;
125
126 /* If we're not at the end of the target file, try to punch a hole in the existing space using fallocate(). */
127
128 if (offset < end &&
129 fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, MIN(size, end - offset)) < 0 &&
130 !ERRNO_IS_NOT_SUPPORTED(errno))
131 return -errno;
132
133 if (end - offset >= size) {
134 /* If we've created the full hole, set the file pointer to the end of the hole we created and exit. */
135 if (lseek(fd, offset + size, SEEK_SET) < 0)
136 return -errno;
137
138 return 0;
139 }
140
141 /* If we haven't created the full hole, use ftruncate() to grow the file (and the hole) to the
142 * required size and move the file pointer to the end of the file. */
143
144 size -= end - offset;
145
146 if (ftruncate(fd, end + size) < 0)
147 return -errno;
148
149 if (lseek(fd, 0, SEEK_END) < 0)
150 return -errno;
151
152 return 0;
153 }
154
155 int copy_bytes_full(
156 int fdf, int fdt,
157 uint64_t max_bytes,
158 CopyFlags copy_flags,
159 void **ret_remains,
160 size_t *ret_remains_size,
161 copy_progress_bytes_t progress,
162 void *userdata) {
163
164 _cleanup_close_ int fdf_opened = -EBADF, fdt_opened = -EBADF;
165 bool try_cfr = true, try_sendfile = true, try_splice = true, copied_something = false;
166 int r, nonblock_pipe = -1;
167 size_t m = SSIZE_MAX; /* that is the maximum that sendfile and c_f_r accept */
168
169 assert(fdf >= 0);
170 assert(fdt >= 0);
171 assert(!FLAGS_SET(copy_flags, COPY_LOCK_BSD));
172
173 /* Tries to copy bytes from the file descriptor 'fdf' to 'fdt' in the smartest possible way. Copies a maximum
174 * of 'max_bytes', which may be specified as UINT64_MAX, in which no maximum is applied. Returns negative on
175 * error, zero if EOF is hit before the bytes limit is hit and positive otherwise. If the copy fails for some
176 * reason but we read but didn't yet write some data an ret_remains/ret_remains_size is not NULL, then it will
177 * be initialized with an allocated buffer containing this "remaining" data. Note that these two parameters are
178 * initialized with a valid buffer only on failure and only if there's actually data already read. Otherwise
179 * these parameters if non-NULL are set to NULL. */
180
181 if (ret_remains)
182 *ret_remains = NULL;
183 if (ret_remains_size)
184 *ret_remains_size = 0;
185
186 fdf = fd_reopen_condition(fdf, O_CLOEXEC | O_NOCTTY | O_RDONLY, O_PATH, &fdf_opened);
187 if (fdf < 0)
188 return fdf;
189 fdt = fd_reopen_condition(fdt, O_CLOEXEC | O_NOCTTY | O_RDWR, O_PATH, &fdt_opened);
190 if (fdt < 0)
191 return fdt;
192
193 /* Try btrfs reflinks first. This only works on regular, seekable files, hence let's check the file offsets of
194 * source and destination first. */
195 if ((copy_flags & COPY_REFLINK)) {
196 off_t foffset;
197
198 foffset = lseek(fdf, 0, SEEK_CUR);
199 if (foffset >= 0) {
200 off_t toffset;
201
202 toffset = lseek(fdt, 0, SEEK_CUR);
203 if (toffset >= 0) {
204
205 if (foffset == 0 && toffset == 0 && max_bytes == UINT64_MAX)
206 r = reflink(fdf, fdt); /* full file reflink */
207 else
208 r = reflink_range(fdf, foffset, fdt, toffset, max_bytes == UINT64_MAX ? 0 : max_bytes); /* partial reflink */
209 if (r >= 0) {
210 off_t t;
211
212 /* This worked, yay! Now — to be fully correct — let's adjust the file pointers */
213 if (max_bytes == UINT64_MAX) {
214
215 /* We cloned to the end of the source file, let's position the read
216 * pointer there, and query it at the same time. */
217 t = lseek(fdf, 0, SEEK_END);
218 if (t < 0)
219 return -errno;
220 if (t < foffset)
221 return -ESPIPE;
222
223 /* Let's adjust the destination file write pointer by the same number
224 * of bytes. */
225 t = lseek(fdt, toffset + (t - foffset), SEEK_SET);
226 if (t < 0)
227 return -errno;
228
229 return 0; /* we copied the whole thing, hence hit EOF, return 0 */
230 } else {
231 t = lseek(fdf, foffset + max_bytes, SEEK_SET);
232 if (t < 0)
233 return -errno;
234
235 t = lseek(fdt, toffset + max_bytes, SEEK_SET);
236 if (t < 0)
237 return -errno;
238
239 return 1; /* we copied only some number of bytes, which worked, but this means we didn't hit EOF, return 1 */
240 }
241 }
242 }
243 }
244 }
245
246 for (;;) {
247 ssize_t n;
248
249 if (max_bytes <= 0)
250 break;
251
252 r = look_for_signals(copy_flags);
253 if (r < 0)
254 return r;
255
256 if (max_bytes != UINT64_MAX && m > max_bytes)
257 m = max_bytes;
258
259 if (copy_flags & COPY_HOLES) {
260 off_t c, e;
261
262 c = lseek(fdf, 0, SEEK_CUR);
263 if (c < 0)
264 return -errno;
265
266 /* To see if we're in a hole, we search for the next data offset. */
267 e = lseek(fdf, c, SEEK_DATA);
268 if (e < 0 && errno == ENXIO)
269 /* If errno == ENXIO, that means we've reached the final hole of the file and
270 * that hole isn't followed by more data. */
271 e = lseek(fdf, 0, SEEK_END);
272 if (e < 0)
273 return -errno;
274
275 /* If we're in a hole (current offset is not a data offset), create a hole of the
276 * same size in the target file. */
277 if (e > c) {
278 /* Make sure our new hole doesn't go over the maximum size we're allowed to copy. */
279 n = MIN(max_bytes, (uint64_t) e - c);
280 r = create_hole(fdt, n);
281 if (r < 0)
282 return r;
283
284 /* Make sure holes are taken into account in the maximum size we're supposed to copy. */
285 if (max_bytes != UINT64_MAX) {
286 max_bytes -= n;
287 if (max_bytes <= 0)
288 break;
289 }
290
291 /* Update the size we're supposed to copy in this iteration if needed. */
292 if (m > max_bytes)
293 m = max_bytes;
294 }
295
296 c = e; /* Set c to the start of the data segment. */
297
298 /* After copying a potential hole, find the end of the data segment by looking for
299 * the next hole. If we get ENXIO, we're at EOF. */
300 e = lseek(fdf, c, SEEK_HOLE);
301 if (e < 0) {
302 if (errno == ENXIO)
303 break;
304 return -errno;
305 }
306
307 /* SEEK_HOLE modifies the file offset so we need to move back to the initial offset. */
308 if (lseek(fdf, c, SEEK_SET) < 0)
309 return -errno;
310
311 /* Make sure we're not copying more than the current data segment. */
312 m = MIN(m, (size_t) e - c);
313 }
314
315 /* First try copy_file_range(), unless we already tried */
316 if (try_cfr) {
317 n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u);
318 if (n < 0) {
319 if (!IN_SET(n, -EINVAL, -ENOSYS, -EXDEV, -EBADF))
320 return n;
321
322 try_cfr = false;
323 /* use fallback below */
324 } else if (n == 0) { /* likely EOF */
325
326 if (copied_something)
327 break;
328
329 /* So, we hit EOF immediately, without having copied a single byte. This
330 * could indicate two things: the file is actually empty, or we are on some
331 * virtual file system such as procfs/sysfs where the syscall actually
332 * doesn't work but doesn't return an error. Try to handle that, by falling
333 * back to simple read()s in case we encounter empty files.
334 *
335 * See: https://lwn.net/Articles/846403/ */
336 try_cfr = try_sendfile = try_splice = false;
337 } else
338 /* Success! */
339 goto next;
340 }
341
342 /* First try sendfile(), unless we already tried */
343 if (try_sendfile) {
344 n = sendfile(fdt, fdf, NULL, m);
345 if (n < 0) {
346 if (!IN_SET(errno, EINVAL, ENOSYS))
347 return -errno;
348
349 try_sendfile = false;
350 /* use fallback below */
351 } else if (n == 0) { /* likely EOF */
352
353 if (copied_something)
354 break;
355
356 try_sendfile = try_splice = false; /* same logic as above for copy_file_range() */
357 } else
358 /* Success! */
359 goto next;
360 }
361
362 /* Then try splice, unless we already tried. */
363 if (try_splice) {
364
365 /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file
366 * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the
367 * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour
368 * here, and check if either of the specified fds are a pipe, and if so, let's pass
369 * the flag automatically, depending on O_NONBLOCK being set.
370 *
371 * Here's a twist though: when we use it to move data between two pipes of which one
372 * has O_NONBLOCK set and the other has not, then we have no individual control over
373 * O_NONBLOCK behaviour. Hence in that case we can't use splice() and still guarantee
374 * systematic O_NONBLOCK behaviour, hence don't. */
375
376 if (nonblock_pipe < 0) {
377 int a, b;
378
379 /* Check if either of these fds is a pipe, and if so non-blocking or not */
380 a = fd_is_nonblock_pipe(fdf);
381 if (a < 0)
382 return a;
383
384 b = fd_is_nonblock_pipe(fdt);
385 if (b < 0)
386 return b;
387
388 if ((a == FD_IS_NO_PIPE && b == FD_IS_NO_PIPE) ||
389 (a == FD_IS_BLOCKING_PIPE && b == FD_IS_NONBLOCKING_PIPE) ||
390 (a == FD_IS_NONBLOCKING_PIPE && b == FD_IS_BLOCKING_PIPE))
391
392 /* splice() only works if one of the fds is a pipe. If neither is,
393 * let's skip this step right-away. As mentioned above, if one of the
394 * two fds refers to a blocking pipe and the other to a non-blocking
395 * pipe, we can't use splice() either, hence don't try either. This
396 * hence means we can only use splice() if either only one of the two
397 * fds is a pipe, or if both are pipes with the same nonblocking flag
398 * setting. */
399
400 try_splice = false;
401 else
402 nonblock_pipe = a == FD_IS_NONBLOCKING_PIPE || b == FD_IS_NONBLOCKING_PIPE;
403 }
404 }
405
406 if (try_splice) {
407 n = splice(fdf, NULL, fdt, NULL, m, nonblock_pipe ? SPLICE_F_NONBLOCK : 0);
408 if (n < 0) {
409 if (!IN_SET(errno, EINVAL, ENOSYS))
410 return -errno;
411
412 try_splice = false;
413 /* use fallback below */
414 } else if (n == 0) { /* likely EOF */
415
416 if (copied_something)
417 break;
418
419 try_splice = false; /* same logic as above for copy_file_range() + sendfile() */
420 } else
421 /* Success! */
422 goto next;
423 }
424
425 /* As a fallback just copy bits by hand */
426 {
427 uint8_t buf[MIN(m, COPY_BUFFER_SIZE)], *p = buf;
428 ssize_t z;
429
430 n = read(fdf, buf, sizeof buf);
431 if (n < 0)
432 return -errno;
433 if (n == 0) /* EOF */
434 break;
435
436 z = (size_t) n;
437 do {
438 ssize_t k;
439
440 k = write(fdt, p, z);
441 if (k < 0) {
442 r = -errno;
443
444 if (ret_remains) {
445 void *copy;
446
447 copy = memdup(p, z);
448 if (!copy)
449 return -ENOMEM;
450
451 *ret_remains = copy;
452 }
453
454 if (ret_remains_size)
455 *ret_remains_size = z;
456
457 return r;
458 }
459
460 assert(k <= z);
461 z -= k;
462 p += k;
463 } while (z > 0);
464 }
465
466 next:
467 if (progress) {
468 r = progress(n, userdata);
469 if (r < 0)
470 return r;
471 }
472
473 if (max_bytes != UINT64_MAX) {
474 assert(max_bytes >= (uint64_t) n);
475 max_bytes -= n;
476 }
477
478 /* sendfile accepts at most SSIZE_MAX-offset bytes to copy, so reduce our maximum by the
479 * amount we already copied, but don't go below our copy buffer size, unless we are close the
480 * limit of bytes we are allowed to copy. */
481 m = MAX(MIN(COPY_BUFFER_SIZE, max_bytes), m - n);
482
483 copied_something = true;
484 }
485
486 if (copy_flags & COPY_TRUNCATE) {
487 off_t off = lseek(fdt, 0, SEEK_CUR);
488 if (off < 0)
489 return -errno;
490
491 if (ftruncate(fdt, off) < 0)
492 return -errno;
493 }
494
495 return max_bytes <= 0; /* return 0 if we hit EOF earlier than the size limit */
496 }
497
498 static int fd_copy_symlink(
499 int df,
500 const char *from,
501 const struct stat *st,
502 int dt,
503 const char *to,
504 uid_t override_uid,
505 gid_t override_gid,
506 CopyFlags copy_flags) {
507
508 _cleanup_free_ char *target = NULL;
509 int r;
510
511 assert(from);
512 assert(st);
513 assert(to);
514
515 r = readlinkat_malloc(df, from, &target);
516 if (r < 0)
517 return r;
518
519 if (copy_flags & COPY_MAC_CREATE) {
520 r = mac_selinux_create_file_prepare_at(dt, to, S_IFLNK);
521 if (r < 0)
522 return r;
523 }
524 r = RET_NERRNO(symlinkat(target, dt, to));
525 if (copy_flags & COPY_MAC_CREATE)
526 mac_selinux_create_file_clear();
527 if (r < 0) {
528 if (FLAGS_SET(copy_flags, COPY_GRACEFUL_WARN) && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) {
529 log_notice_errno(r, "Failed to copy symlink '%s', ignoring: %m", from);
530 return 0;
531 }
532
533 return r;
534 }
535
536 if (fchownat(dt, to,
537 uid_is_valid(override_uid) ? override_uid : st->st_uid,
538 gid_is_valid(override_gid) ? override_gid : st->st_gid,
539 AT_SYMLINK_NOFOLLOW) < 0)
540 r = -errno;
541
542 (void) copy_xattr(df, from, dt, to, copy_flags);
543 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
544 return r;
545 }
546
547 /* Encapsulates the database we store potential hardlink targets in */
548 typedef struct HardlinkContext {
549 int dir_fd; /* An fd to the directory we use as lookup table. Never AT_FDCWD. Lazily created, when
550 * we add the first entry. */
551
552 /* These two fields are used to create the hardlink repository directory above — via
553 * mkdirat(parent_fd, subdir) — and are kept so that we can automatically remove the directory again
554 * when we are done. */
555 int parent_fd; /* Possibly AT_FDCWD */
556 char *subdir;
557 } HardlinkContext;
558
559 static int hardlink_context_setup(
560 HardlinkContext *c,
561 int dt,
562 const char *to,
563 CopyFlags copy_flags) {
564
565 _cleanup_close_ int dt_copy = -EBADF;
566 int r;
567
568 assert(c);
569 assert(c->dir_fd < 0 && c->dir_fd != AT_FDCWD);
570 assert(c->parent_fd < 0);
571 assert(!c->subdir);
572
573 /* If hardlink recreation is requested we have to maintain a database of inodes that are potential
574 * hardlink sources. Given that generally disk sizes have to be assumed to be larger than what fits
575 * into physical RAM we cannot maintain that database in dynamic memory alone. Here we opt to
576 * maintain it on disk, to simplify things: inside the destination directory we'll maintain a
577 * temporary directory consisting of hardlinks of every inode we copied that might be subject of
578 * hardlinks. We can then use that as hardlink source later on. Yes, this means additional disk IO
579 * but thankfully Linux is optimized for this kind of thing. If this ever becomes a performance
580 * bottleneck we can certainly place an in-memory hash table in front of this, but for the beginning,
581 * let's keep things simple, and just use the disk as lookup table for inodes.
582 *
583 * Note that this should have zero performance impact as long as .n_link of all files copied remains
584 * <= 0, because in that case we will not actually allocate the hardlink inode lookup table directory
585 * on disk (we do so lazily, when the first candidate with .n_link > 1 is seen). This means, in the
586 * common case where hardlinks are not used at all or only for few files the fact that we store the
587 * table on disk shouldn't matter perfomance-wise. */
588
589 if (!FLAGS_SET(copy_flags, COPY_HARDLINKS))
590 return 0;
591
592 if (dt == AT_FDCWD)
593 dt_copy = AT_FDCWD;
594 else if (dt < 0)
595 return -EBADF;
596 else {
597 dt_copy = fcntl(dt, F_DUPFD_CLOEXEC, 3);
598 if (dt_copy < 0)
599 return -errno;
600 }
601
602 r = tempfn_random_child(to, "hardlink", &c->subdir);
603 if (r < 0)
604 return r;
605
606 c->parent_fd = TAKE_FD(dt_copy);
607
608 /* We don't actually create the directory we keep the table in here, that's done on-demand when the
609 * first entry is added, using hardlink_context_realize() below. */
610 return 1;
611 }
612
613 static int hardlink_context_realize(HardlinkContext *c) {
614 if (!c)
615 return 0;
616
617 if (c->dir_fd >= 0) /* Already realized */
618 return 1;
619
620 if (c->parent_fd < 0 && c->parent_fd != AT_FDCWD) /* Not configured */
621 return 0;
622
623 assert(c->subdir);
624
625 c->dir_fd = open_mkdir_at(c->parent_fd, c->subdir, O_EXCL|O_CLOEXEC, 0700);
626 if (c->dir_fd < 0)
627 return c->dir_fd;
628
629 return 1;
630 }
631
632 static void hardlink_context_destroy(HardlinkContext *c) {
633 int r;
634
635 assert(c);
636
637 /* Automatically remove the hardlink lookup table directory again after we are done. This is used via
638 * _cleanup_() so that we really delete this, even on failure. */
639
640 if (c->dir_fd >= 0) {
641 r = rm_rf_children(TAKE_FD(c->dir_fd), REMOVE_PHYSICAL, NULL); /* consumes dir_fd in all cases, even on failure */
642 if (r < 0)
643 log_debug_errno(r, "Failed to remove hardlink store (%s) contents, ignoring: %m", c->subdir);
644
645 assert(c->parent_fd >= 0 || c->parent_fd == AT_FDCWD);
646 assert(c->subdir);
647
648 if (unlinkat(c->parent_fd, c->subdir, AT_REMOVEDIR) < 0)
649 log_debug_errno(errno, "Failed to remove hardlink store (%s) directory, ignoring: %m", c->subdir);
650 }
651
652 assert_cc(AT_FDCWD < 0);
653 c->parent_fd = safe_close(c->parent_fd);
654
655 c->subdir = mfree(c->subdir);
656 }
657
658 static int try_hardlink(
659 HardlinkContext *c,
660 const struct stat *st,
661 int dt,
662 const char *to) {
663
664 char dev_ino[DECIMAL_STR_MAX(dev_t)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
665
666 assert(st);
667 assert(dt >= 0 || dt == AT_FDCWD);
668 assert(to);
669
670 if (!c) /* No temporary hardlink directory, don't bother */
671 return 0;
672
673 if (st->st_nlink <= 1) /* Source not hardlinked, don't bother */
674 return 0;
675
676 if (c->dir_fd < 0) /* not yet realized, hence empty */
677 return 0;
678
679 xsprintf(dev_ino, "%u:%u:%" PRIu64, major(st->st_dev), minor(st->st_dev), (uint64_t) st->st_ino);
680 if (linkat(c->dir_fd, dev_ino, dt, to, 0) < 0) {
681 if (errno != ENOENT) /* doesn't exist in store yet */
682 log_debug_errno(errno, "Failed to hardlink %s to %s, ignoring: %m", dev_ino, to);
683 return 0;
684 }
685
686 return 1;
687 }
688
689 static int memorize_hardlink(
690 HardlinkContext *c,
691 const struct stat *st,
692 int dt,
693 const char *to) {
694
695 char dev_ino[DECIMAL_STR_MAX(dev_t)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
696 int r;
697
698 assert(st);
699 assert(dt >= 0 || dt == AT_FDCWD);
700 assert(to);
701
702 if (!c) /* No temporary hardlink directory, don't bother */
703 return 0;
704
705 if (st->st_nlink <= 1) /* Source not hardlinked, don't bother */
706 return 0;
707
708 r = hardlink_context_realize(c); /* Create the hardlink store lazily */
709 if (r < 0)
710 return r;
711
712 xsprintf(dev_ino, "%u:%u:%" PRIu64, major(st->st_dev), minor(st->st_dev), (uint64_t) st->st_ino);
713 if (linkat(dt, to, c->dir_fd, dev_ino, 0) < 0) {
714 log_debug_errno(errno, "Failed to hardlink %s to %s, ignoring: %m", to, dev_ino);
715 return 0;
716 }
717
718 return 1;
719 }
720
721 static int fd_copy_tree_generic(
722 int df,
723 const char *from,
724 const struct stat *st,
725 int dt,
726 const char *to,
727 dev_t original_device,
728 unsigned depth_left,
729 uid_t override_uid,
730 gid_t override_gid,
731 CopyFlags copy_flags,
732 Hashmap *denylist,
733 Set *subvolumes,
734 HardlinkContext *hardlink_context,
735 const char *display_path,
736 copy_progress_path_t progress_path,
737 copy_progress_bytes_t progress_bytes,
738 void *userdata);
739
740 static int fd_copy_regular(
741 int df,
742 const char *from,
743 const struct stat *st,
744 int dt,
745 const char *to,
746 uid_t override_uid,
747 gid_t override_gid,
748 CopyFlags copy_flags,
749 HardlinkContext *hardlink_context,
750 copy_progress_bytes_t progress,
751 void *userdata) {
752
753 _cleanup_close_ int fdf = -EBADF, fdt = -EBADF;
754 int r, q;
755
756 assert(from);
757 assert(st);
758 assert(to);
759
760 r = try_hardlink(hardlink_context, st, dt, to);
761 if (r < 0)
762 return r;
763 if (r > 0) /* worked! */
764 return 0;
765
766 fdf = openat(df, from, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
767 if (fdf < 0)
768 return -errno;
769
770 if (copy_flags & COPY_MAC_CREATE) {
771 r = mac_selinux_create_file_prepare_at(dt, to, S_IFREG);
772 if (r < 0)
773 return r;
774 }
775 fdt = openat(dt, to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, st->st_mode & 07777);
776 if (copy_flags & COPY_MAC_CREATE)
777 mac_selinux_create_file_clear();
778 if (fdt < 0)
779 return -errno;
780
781 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags, NULL, NULL, progress, userdata);
782 if (r < 0)
783 goto fail;
784
785 if (fchown(fdt,
786 uid_is_valid(override_uid) ? override_uid : st->st_uid,
787 gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
788 r = -errno;
789
790 if (fchmod(fdt, st->st_mode & 07777) < 0)
791 r = -errno;
792
793 (void) futimens(fdt, (struct timespec[]) { st->st_atim, st->st_mtim });
794 (void) copy_xattr(fdf, NULL, fdt, NULL, copy_flags);
795
796 if (copy_flags & COPY_FSYNC) {
797 if (fsync(fdt) < 0) {
798 r = -errno;
799 goto fail;
800 }
801 }
802
803 q = close_nointr(TAKE_FD(fdt)); /* even if this fails, the fd is now invalidated */
804 if (q < 0) {
805 r = q;
806 goto fail;
807 }
808
809 (void) memorize_hardlink(hardlink_context, st, dt, to);
810 return r;
811
812 fail:
813 (void) unlinkat(dt, to, 0);
814 return r;
815 }
816
817 static int fd_copy_fifo(
818 int df,
819 const char *from,
820 const struct stat *st,
821 int dt,
822 const char *to,
823 uid_t override_uid,
824 gid_t override_gid,
825 CopyFlags copy_flags,
826 HardlinkContext *hardlink_context) {
827 int r;
828
829 assert(from);
830 assert(st);
831 assert(to);
832
833 r = try_hardlink(hardlink_context, st, dt, to);
834 if (r < 0)
835 return r;
836 if (r > 0) /* worked! */
837 return 0;
838
839 if (copy_flags & COPY_MAC_CREATE) {
840 r = mac_selinux_create_file_prepare_at(dt, to, S_IFIFO);
841 if (r < 0)
842 return r;
843 }
844 r = RET_NERRNO(mkfifoat(dt, to, st->st_mode & 07777));
845 if (copy_flags & COPY_MAC_CREATE)
846 mac_selinux_create_file_clear();
847 if (r < 0) {
848 if (FLAGS_SET(copy_flags, COPY_GRACEFUL_WARN) && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) {
849 log_notice_errno(r, "Failed to copy fifo '%s', ignoring: %m", from);
850 return 0;
851 }
852
853 return r;
854 }
855
856 if (fchownat(dt, to,
857 uid_is_valid(override_uid) ? override_uid : st->st_uid,
858 gid_is_valid(override_gid) ? override_gid : st->st_gid,
859 AT_SYMLINK_NOFOLLOW) < 0)
860 r = -errno;
861
862 if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
863 r = -errno;
864
865 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
866
867 (void) memorize_hardlink(hardlink_context, st, dt, to);
868 return r;
869 }
870
871 static int fd_copy_node(
872 int df,
873 const char *from,
874 const struct stat *st,
875 int dt,
876 const char *to,
877 uid_t override_uid,
878 gid_t override_gid,
879 CopyFlags copy_flags,
880 HardlinkContext *hardlink_context) {
881 int r;
882
883 assert(from);
884 assert(st);
885 assert(to);
886
887 r = try_hardlink(hardlink_context, st, dt, to);
888 if (r < 0)
889 return r;
890 if (r > 0) /* worked! */
891 return 0;
892
893 if (copy_flags & COPY_MAC_CREATE) {
894 r = mac_selinux_create_file_prepare_at(dt, to, st->st_mode & S_IFMT);
895 if (r < 0)
896 return r;
897 }
898 r = RET_NERRNO(mknodat(dt, to, st->st_mode, st->st_rdev));
899 if (copy_flags & COPY_MAC_CREATE)
900 mac_selinux_create_file_clear();
901 if (r < 0) {
902 if (FLAGS_SET(copy_flags, COPY_GRACEFUL_WARN) && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) {
903 log_notice_errno(r, "Failed to copy node '%s', ignoring: %m", from);
904 return 0;
905 }
906
907 return r;
908 }
909
910 if (fchownat(dt, to,
911 uid_is_valid(override_uid) ? override_uid : st->st_uid,
912 gid_is_valid(override_gid) ? override_gid : st->st_gid,
913 AT_SYMLINK_NOFOLLOW) < 0)
914 r = -errno;
915
916 if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
917 r = -errno;
918
919 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
920
921 (void) memorize_hardlink(hardlink_context, st, dt, to);
922 return r;
923 }
924
925 static int fd_copy_directory(
926 int df,
927 const char *from,
928 const struct stat *st,
929 int dt,
930 const char *to,
931 dev_t original_device,
932 unsigned depth_left,
933 uid_t override_uid,
934 gid_t override_gid,
935 CopyFlags copy_flags,
936 Hashmap *denylist,
937 Set *subvolumes,
938 HardlinkContext *hardlink_context,
939 const char *display_path,
940 copy_progress_path_t progress_path,
941 copy_progress_bytes_t progress_bytes,
942 void *userdata) {
943
944 _cleanup_(hardlink_context_destroy) HardlinkContext our_hardlink_context = {
945 .dir_fd = -EBADF,
946 .parent_fd = -EBADF,
947 };
948
949 _cleanup_close_ int fdf = -EBADF, fdt = -EBADF;
950 _cleanup_closedir_ DIR *d = NULL;
951 bool exists;
952 int r;
953
954 assert(st);
955 assert(to);
956
957 if (depth_left == 0)
958 return -ENAMETOOLONG;
959
960 if (from)
961 fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
962 else
963 fdf = fcntl(df, F_DUPFD_CLOEXEC, 3);
964 if (fdf < 0)
965 return -errno;
966
967 if (!hardlink_context) {
968 /* If recreating hardlinks is requested let's set up a context for that now. */
969 r = hardlink_context_setup(&our_hardlink_context, dt, to, copy_flags);
970 if (r < 0)
971 return r;
972 if (r > 0) /* It's enabled and allocated, let's now use the same context for all recursive
973 * invocations from here down */
974 hardlink_context = &our_hardlink_context;
975 }
976
977 d = take_fdopendir(&fdf);
978 if (!d)
979 return -errno;
980
981 r = dir_is_empty_at(dt, to, /* ignore_hidden_or_backup= */ false);
982 if (r < 0 && r != -ENOENT)
983 return r;
984 if ((r > 0 && !(copy_flags & (COPY_MERGE|COPY_MERGE_EMPTY))) || (r == 0 && !FLAGS_SET(copy_flags, COPY_MERGE)))
985 return -EEXIST;
986
987 exists = r >= 0;
988
989 fdt = xopenat_lock(dt, to,
990 O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|(exists ? 0 : O_CREAT|O_EXCL),
991 (copy_flags & COPY_MAC_CREATE ? XO_LABEL : 0)|(set_contains(subvolumes, st) ? XO_SUBVOLUME : 0),
992 st->st_mode & 07777,
993 copy_flags & COPY_LOCK_BSD ? LOCK_BSD : LOCK_NONE,
994 LOCK_EX);
995 if (fdt < 0)
996 return fdt;
997
998 r = 0;
999
1000 if (PTR_TO_INT(hashmap_get(denylist, st)) == DENY_CONTENTS) {
1001 log_debug("%s is in the denylist, not recursing", from);
1002 goto finish;
1003 }
1004
1005 FOREACH_DIRENT_ALL(de, d, return -errno) {
1006 const char *child_display_path = NULL;
1007 _cleanup_free_ char *dp = NULL;
1008 struct stat buf;
1009 int q;
1010
1011 if (dot_or_dot_dot(de->d_name))
1012 continue;
1013
1014 r = look_for_signals(copy_flags);
1015 if (r < 0)
1016 return r;
1017
1018 if (fstatat(dirfd(d), de->d_name, &buf, AT_SYMLINK_NOFOLLOW) < 0) {
1019 r = -errno;
1020 continue;
1021 }
1022
1023 if (progress_path) {
1024 if (display_path)
1025 child_display_path = dp = path_join(display_path, de->d_name);
1026 else
1027 child_display_path = de->d_name;
1028
1029 r = progress_path(child_display_path, &buf, userdata);
1030 if (r < 0)
1031 return r;
1032 }
1033
1034 if (PTR_TO_INT(hashmap_get(denylist, &buf)) == DENY_INODE) {
1035 log_debug("%s/%s is in the denylist, ignoring", from, de->d_name);
1036 continue;
1037 }
1038
1039 if (S_ISDIR(buf.st_mode)) {
1040 /*
1041 * Don't descend into directories on other file systems, if this is requested. We do a simple
1042 * .st_dev check here, which basically comes for free. Note that we do this check only on
1043 * directories, not other kind of file system objects, for two reason:
1044 *
1045 * • The kernel's overlayfs pseudo file system that overlays multiple real file systems
1046 * propagates the .st_dev field of the file system a file originates from all the way up
1047 * through the stack to stat(). It doesn't do that for directories however. This means that
1048 * comparing .st_dev on non-directories suggests that they all are mount points. To avoid
1049 * confusion we hence avoid relying on this check for regular files.
1050 *
1051 * • The main reason we do this check at all is to protect ourselves from bind mount cycles,
1052 * where we really want to avoid descending down in all eternity. However the .st_dev check
1053 * is usually not sufficient for this protection anyway, as bind mount cycles from the same
1054 * file system onto itself can't be detected that way. (Note we also do a recursion depth
1055 * check, which is probably the better protection in this regard, which is why
1056 * COPY_SAME_MOUNT is optional).
1057 */
1058
1059 if (FLAGS_SET(copy_flags, COPY_SAME_MOUNT)) {
1060 if (buf.st_dev != original_device)
1061 continue;
1062
1063 r = fd_is_mount_point(dirfd(d), de->d_name, 0);
1064 if (r < 0)
1065 return r;
1066 if (r > 0)
1067 continue;
1068 }
1069 }
1070
1071 q = fd_copy_tree_generic(dirfd(d), de->d_name, &buf, fdt, de->d_name, original_device,
1072 depth_left-1, override_uid, override_gid, copy_flags & ~COPY_LOCK_BSD,
1073 denylist, subvolumes, hardlink_context, child_display_path, progress_path,
1074 progress_bytes, userdata);
1075
1076 if (q == -EINTR) /* Propagate SIGINT/SIGTERM up instantly */
1077 return q;
1078 if (q == -EEXIST && (copy_flags & COPY_MERGE))
1079 q = 0;
1080 if (q < 0)
1081 r = q;
1082 }
1083
1084 finish:
1085 if (!exists) {
1086 if (fchown(fdt,
1087 uid_is_valid(override_uid) ? override_uid : st->st_uid,
1088 gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
1089 r = -errno;
1090
1091 if (fchmod(fdt, st->st_mode & 07777) < 0)
1092 r = -errno;
1093
1094 (void) copy_xattr(dirfd(d), NULL, fdt, NULL, copy_flags);
1095 (void) futimens(fdt, (struct timespec[]) { st->st_atim, st->st_mtim });
1096 }
1097
1098 if (copy_flags & COPY_FSYNC_FULL) {
1099 if (fsync(fdt) < 0)
1100 return -errno;
1101 }
1102
1103 if (r < 0)
1104 return r;
1105
1106 return copy_flags & COPY_LOCK_BSD ? TAKE_FD(fdt) : 0;
1107 }
1108
1109 static int fd_copy_leaf(
1110 int df,
1111 const char *from,
1112 const struct stat *st,
1113 int dt,
1114 const char *to,
1115 uid_t override_uid,
1116 gid_t override_gid,
1117 CopyFlags copy_flags,
1118 HardlinkContext *hardlink_context,
1119 const char *display_path,
1120 copy_progress_bytes_t progress_bytes,
1121 void *userdata) {
1122 int r;
1123
1124 if (S_ISREG(st->st_mode))
1125 r = fd_copy_regular(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context, progress_bytes, userdata);
1126 else if (S_ISLNK(st->st_mode))
1127 r = fd_copy_symlink(df, from, st, dt, to, override_uid, override_gid, copy_flags);
1128 else if (S_ISFIFO(st->st_mode))
1129 r = fd_copy_fifo(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context);
1130 else if (S_ISBLK(st->st_mode) || S_ISCHR(st->st_mode) || S_ISSOCK(st->st_mode))
1131 r = fd_copy_node(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context);
1132 else
1133 r = -EOPNOTSUPP;
1134
1135 return r;
1136 }
1137
1138 static int fd_copy_tree_generic(
1139 int df,
1140 const char *from,
1141 const struct stat *st,
1142 int dt,
1143 const char *to,
1144 dev_t original_device,
1145 unsigned depth_left,
1146 uid_t override_uid,
1147 gid_t override_gid,
1148 CopyFlags copy_flags,
1149 Hashmap *denylist,
1150 Set *subvolumes,
1151 HardlinkContext *hardlink_context,
1152 const char *display_path,
1153 copy_progress_path_t progress_path,
1154 copy_progress_bytes_t progress_bytes,
1155 void *userdata) {
1156
1157 int r;
1158
1159 assert(!FLAGS_SET(copy_flags, COPY_LOCK_BSD));
1160
1161 if (S_ISDIR(st->st_mode))
1162 return fd_copy_directory(df, from, st, dt, to, original_device, depth_left-1, override_uid,
1163 override_gid, copy_flags, denylist, subvolumes, hardlink_context,
1164 display_path, progress_path, progress_bytes, userdata);
1165
1166 DenyType t = PTR_TO_INT(hashmap_get(denylist, st));
1167 if (t == DENY_INODE) {
1168 log_debug("%s is in the denylist, ignoring", from);
1169 return 0;
1170 } else if (t == DENY_CONTENTS)
1171 log_debug("%s is configured to have its contents excluded, but is not a directory", from);
1172
1173 r = fd_copy_leaf(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context, display_path, progress_bytes, userdata);
1174 /* We just tried to copy a leaf node of the tree. If it failed because the node already exists *and* the COPY_REPLACE flag has been provided, we should unlink the node and re-copy. */
1175 if (r == -EEXIST && (copy_flags & COPY_REPLACE)) {
1176 /* This codepath is us trying to address an error to copy, if the unlink fails, lets just return the original error. */
1177 if (unlinkat(dt, to, 0) < 0)
1178 return r;
1179
1180 r = fd_copy_leaf(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context, display_path, progress_bytes, userdata);
1181 }
1182
1183 return r;
1184 }
1185
1186 int copy_tree_at_full(
1187 int fdf,
1188 const char *from,
1189 int fdt,
1190 const char *to,
1191 uid_t override_uid,
1192 gid_t override_gid,
1193 CopyFlags copy_flags,
1194 Hashmap *denylist,
1195 Set *subvolumes,
1196 copy_progress_path_t progress_path,
1197 copy_progress_bytes_t progress_bytes,
1198 void *userdata) {
1199
1200 struct stat st;
1201 int r;
1202
1203 assert(from);
1204 assert(to);
1205 assert(!FLAGS_SET(copy_flags, COPY_LOCK_BSD));
1206
1207 if (fstatat(fdf, from, &st, AT_SYMLINK_NOFOLLOW) < 0)
1208 return -errno;
1209
1210 r = fd_copy_tree_generic(fdf, from, &st, fdt, to, st.st_dev, COPY_DEPTH_MAX, override_uid,
1211 override_gid, copy_flags, denylist, subvolumes, NULL, NULL, progress_path,
1212 progress_bytes, userdata);
1213 if (r < 0)
1214 return r;
1215
1216 if (S_ISDIR(st.st_mode) && (copy_flags & COPY_SYNCFS)) {
1217 /* If the top-level inode is a directory run syncfs() now. */
1218 r = syncfs_path(fdt, to);
1219 if (r < 0)
1220 return r;
1221 } else if ((copy_flags & (COPY_FSYNC_FULL|COPY_SYNCFS)) != 0) {
1222 /* fsync() the parent dir of what we just copied if COPY_FSYNC_FULL is set. Also do this in
1223 * case COPY_SYNCFS is set but the top-level inode wasn't actually a directory. We do this so that
1224 * COPY_SYNCFS provides reasonable synchronization semantics on any kind of inode: when the
1225 * copy operation is done the whole inode — regardless of its type — and all its children
1226 * will be synchronized to disk. */
1227 r = fsync_parent_at(fdt, to);
1228 if (r < 0)
1229 return r;
1230 }
1231
1232 return 0;
1233 }
1234
1235 static int sync_dir_by_flags(int dir_fd, const char *path, CopyFlags copy_flags) {
1236 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
1237 assert(path);
1238
1239 if (copy_flags & COPY_SYNCFS)
1240 return syncfs_path(dir_fd, path);
1241 if (copy_flags & COPY_FSYNC_FULL)
1242 return fsync_parent_at(dir_fd, path);
1243
1244 return 0;
1245 }
1246
1247 int copy_directory_at_full(
1248 int dir_fdf,
1249 const char *from,
1250 int dir_fdt,
1251 const char *to,
1252 CopyFlags copy_flags,
1253 copy_progress_path_t progress_path,
1254 copy_progress_bytes_t progress_bytes,
1255 void *userdata) {
1256
1257 _cleanup_close_ int fdt = -EBADF;
1258 struct stat st;
1259 int r;
1260
1261 assert(dir_fdf >= 0 || dir_fdf == AT_FDCWD);
1262 assert(dir_fdt >= 0 || dir_fdt == AT_FDCWD);
1263 assert(to);
1264
1265 if (fstatat(dir_fdf, strempty(from), &st, AT_SYMLINK_NOFOLLOW|(isempty(from) ? AT_EMPTY_PATH : 0)) < 0)
1266 return -errno;
1267
1268 r = stat_verify_directory(&st);
1269 if (r < 0)
1270 return r;
1271
1272 r = fd_copy_directory(
1273 dir_fdf, from,
1274 &st,
1275 dir_fdt, to,
1276 st.st_dev,
1277 COPY_DEPTH_MAX,
1278 UID_INVALID, GID_INVALID,
1279 copy_flags,
1280 NULL, NULL, NULL, NULL,
1281 progress_path,
1282 progress_bytes,
1283 userdata);
1284 if (r < 0)
1285 return r;
1286
1287 if (FLAGS_SET(copy_flags, COPY_LOCK_BSD))
1288 fdt = r;
1289
1290 r = sync_dir_by_flags(dir_fdt, to, copy_flags);
1291 if (r < 0)
1292 return r;
1293
1294 return FLAGS_SET(copy_flags, COPY_LOCK_BSD) ? TAKE_FD(fdt) : 0;
1295 }
1296
1297 int copy_file_fd_at_full(
1298 int dir_fdf,
1299 const char *from,
1300 int fdt,
1301 CopyFlags copy_flags,
1302 copy_progress_bytes_t progress_bytes,
1303 void *userdata) {
1304
1305 _cleanup_close_ int fdf = -EBADF;
1306 struct stat st;
1307 int r;
1308
1309 assert(dir_fdf >= 0 || dir_fdf == AT_FDCWD);
1310 assert(from);
1311 assert(fdt >= 0);
1312 assert(!FLAGS_SET(copy_flags, COPY_LOCK_BSD));
1313
1314 fdf = openat(dir_fdf, from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1315 if (fdf < 0)
1316 return -errno;
1317
1318 r = fd_verify_regular(fdf);
1319 if (r < 0)
1320 return r;
1321
1322 if (fstat(fdt, &st) < 0)
1323 return -errno;
1324
1325 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags, NULL, NULL, progress_bytes, userdata);
1326 if (r < 0)
1327 return r;
1328
1329 /* Make sure to copy file attributes only over if target is a regular
1330 * file (so that copying a file to /dev/null won't alter the access
1331 * mode/ownership of that device node...) */
1332 if (S_ISREG(st.st_mode)) {
1333 (void) copy_times(fdf, fdt, copy_flags);
1334 (void) copy_xattr(fdf, NULL, fdt, NULL, copy_flags);
1335 }
1336
1337 if (copy_flags & COPY_FSYNC_FULL) {
1338 r = fsync_full(fdt);
1339 if (r < 0)
1340 return r;
1341 } else if (copy_flags & COPY_FSYNC) {
1342 if (fsync(fdt) < 0)
1343 return -errno;
1344 }
1345
1346 return 0;
1347 }
1348
1349 int copy_file_at_full(
1350 int dir_fdf,
1351 const char *from,
1352 int dir_fdt,
1353 const char *to,
1354 int flags,
1355 mode_t mode,
1356 unsigned chattr_flags,
1357 unsigned chattr_mask,
1358 CopyFlags copy_flags,
1359 copy_progress_bytes_t progress_bytes,
1360 void *userdata) {
1361
1362 _cleanup_close_ int fdf = -EBADF, fdt = -EBADF;
1363 struct stat st;
1364 int r;
1365
1366 assert(dir_fdf >= 0 || dir_fdf == AT_FDCWD);
1367 assert(dir_fdt >= 0 || dir_fdt == AT_FDCWD);
1368 assert(from);
1369 assert(to);
1370
1371 fdf = openat(dir_fdf, from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1372 if (fdf < 0)
1373 return -errno;
1374
1375 if (fstat(fdf, &st) < 0)
1376 return -errno;
1377
1378 r = stat_verify_regular(&st);
1379 if (r < 0)
1380 return r;
1381
1382 WITH_UMASK(0000) {
1383 fdt = xopenat_lock(dir_fdt, to,
1384 flags|O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY,
1385 (copy_flags & COPY_MAC_CREATE ? XO_LABEL : 0),
1386 mode != MODE_INVALID ? mode : st.st_mode,
1387 copy_flags & COPY_LOCK_BSD ? LOCK_BSD : LOCK_NONE, LOCK_EX);
1388 if (fdt < 0)
1389 return fdt;
1390 }
1391
1392 if (!FLAGS_SET(flags, O_EXCL)) { /* if O_EXCL was used we created the thing as regular file, no need to check again */
1393 r = fd_verify_regular(fdt);
1394 if (r < 0)
1395 goto fail;
1396 }
1397
1398 if (chattr_mask != 0)
1399 (void) chattr_fd(fdt, chattr_flags, chattr_mask & CHATTR_EARLY_FL, NULL);
1400
1401 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags & ~COPY_LOCK_BSD, NULL, NULL, progress_bytes, userdata);
1402 if (r < 0)
1403 goto fail;
1404
1405 (void) copy_times(fdf, fdt, copy_flags);
1406 (void) copy_xattr(fdf, NULL, fdt, NULL, copy_flags);
1407
1408 if (chattr_mask != 0)
1409 (void) chattr_fd(fdt, chattr_flags, chattr_mask & ~CHATTR_EARLY_FL, NULL);
1410
1411 if (copy_flags & (COPY_FSYNC|COPY_FSYNC_FULL)) {
1412 if (fsync(fdt) < 0) {
1413 r = -errno;
1414 goto fail;
1415 }
1416 }
1417
1418 if (!FLAGS_SET(copy_flags, COPY_LOCK_BSD)) {
1419 r = close_nointr(TAKE_FD(fdt)); /* even if this fails, the fd is now invalidated */
1420 if (r < 0)
1421 goto fail;
1422 }
1423
1424 if (copy_flags & COPY_FSYNC_FULL) {
1425 r = fsync_parent_at(dir_fdt, to);
1426 if (r < 0)
1427 goto fail;
1428 }
1429
1430 return copy_flags & COPY_LOCK_BSD ? TAKE_FD(fdt) : 0;
1431
1432 fail:
1433 /* Only unlink if we definitely are the ones who created the file */
1434 if (FLAGS_SET(flags, O_EXCL))
1435 (void) unlinkat(dir_fdt, to, 0);
1436
1437 return r;
1438 }
1439
1440 int copy_file_atomic_at_full(
1441 int dir_fdf,
1442 const char *from,
1443 int dir_fdt,
1444 const char *to,
1445 mode_t mode,
1446 unsigned chattr_flags,
1447 unsigned chattr_mask,
1448 CopyFlags copy_flags,
1449 copy_progress_bytes_t progress_bytes,
1450 void *userdata) {
1451
1452 _cleanup_(unlink_and_freep) char *t = NULL;
1453 _cleanup_close_ int fdt = -EBADF;
1454 int r;
1455
1456 assert(from);
1457 assert(to);
1458 assert(!FLAGS_SET(copy_flags, COPY_LOCK_BSD));
1459
1460 if (copy_flags & COPY_MAC_CREATE) {
1461 r = mac_selinux_create_file_prepare_at(dir_fdt, to, S_IFREG);
1462 if (r < 0)
1463 return r;
1464 }
1465 fdt = open_tmpfile_linkable_at(dir_fdt, to, O_WRONLY|O_CLOEXEC, &t);
1466 if (copy_flags & COPY_MAC_CREATE)
1467 mac_selinux_create_file_clear();
1468 if (fdt < 0)
1469 return fdt;
1470
1471 if (chattr_mask != 0)
1472 (void) chattr_fd(fdt, chattr_flags, chattr_mask & CHATTR_EARLY_FL, NULL);
1473
1474 r = copy_file_fd_at_full(dir_fdf, from, fdt, copy_flags, progress_bytes, userdata);
1475 if (r < 0)
1476 return r;
1477
1478 if (fchmod(fdt, mode) < 0)
1479 return -errno;
1480
1481 if ((copy_flags & (COPY_FSYNC|COPY_FSYNC_FULL))) {
1482 /* Sync the file */
1483 if (fsync(fdt) < 0)
1484 return -errno;
1485 }
1486
1487 r = link_tmpfile_at(fdt, dir_fdt, t, to, (copy_flags & COPY_REPLACE) ? LINK_TMPFILE_REPLACE : 0);
1488 if (r < 0)
1489 return r;
1490
1491 t = mfree(t);
1492
1493 if (chattr_mask != 0)
1494 (void) chattr_fd(fdt, chattr_flags, chattr_mask & ~CHATTR_EARLY_FL, NULL);
1495
1496 r = close_nointr(TAKE_FD(fdt)); /* even if this fails, the fd is now invalidated */
1497 if (r < 0)
1498 goto fail;
1499
1500 if (copy_flags & COPY_FSYNC_FULL) {
1501 /* Sync the parent directory */
1502 r = fsync_parent_at(dir_fdt, to);
1503 if (r < 0)
1504 goto fail;
1505 }
1506
1507 return 0;
1508
1509 fail:
1510 (void) unlinkat(dir_fdt, to, 0);
1511 return r;
1512 }
1513
1514 int copy_times(int fdf, int fdt, CopyFlags flags) {
1515 struct stat st;
1516
1517 assert(fdf >= 0);
1518 assert(fdt >= 0);
1519
1520 if (fstat(fdf, &st) < 0)
1521 return -errno;
1522
1523 if (futimens(fdt, (struct timespec[2]) { st.st_atim, st.st_mtim }) < 0)
1524 return -errno;
1525
1526 if (FLAGS_SET(flags, COPY_CRTIME)) {
1527 usec_t crtime;
1528
1529 if (fd_getcrtime(fdf, &crtime) >= 0)
1530 (void) fd_setcrtime(fdt, crtime);
1531 }
1532
1533 return 0;
1534 }
1535
1536 int copy_access(int fdf, int fdt) {
1537 struct stat st;
1538
1539 assert(fdf >= 0);
1540 assert(fdt >= 0);
1541
1542 /* Copies just the access mode (and not the ownership) from fdf to fdt */
1543
1544 if (fstat(fdf, &st) < 0)
1545 return -errno;
1546
1547 return RET_NERRNO(fchmod(fdt, st.st_mode & 07777));
1548 }
1549
1550 int copy_rights_with_fallback(int fdf, int fdt, const char *patht) {
1551 struct stat st;
1552
1553 assert(fdf >= 0);
1554 assert(fdt >= 0);
1555
1556 /* Copies both access mode and ownership from fdf to fdt */
1557
1558 if (fstat(fdf, &st) < 0)
1559 return -errno;
1560
1561 return fchmod_and_chown_with_fallback(fdt, patht, st.st_mode & 07777, st.st_uid, st.st_gid);
1562 }
1563
1564 int copy_xattr(int df, const char *from, int dt, const char *to, CopyFlags copy_flags) {
1565 _cleanup_free_ char *names = NULL;
1566 int ret = 0, r;
1567
1568 r = listxattr_at_malloc(df, from, 0, &names);
1569 if (r < 0)
1570 return r;
1571
1572 NULSTR_FOREACH(p, names) {
1573 _cleanup_free_ char *value = NULL;
1574
1575 if (!FLAGS_SET(copy_flags, COPY_ALL_XATTRS) && !startswith(p, "user."))
1576 continue;
1577
1578 r = getxattr_at_malloc(df, from, p, 0, &value);
1579 if (r == -ENODATA)
1580 continue; /* gone by now */
1581 if (r < 0)
1582 return r;
1583
1584 if (xsetxattr(dt, to, p, value, r, 0) < 0)
1585 ret = -errno;
1586 }
1587
1588 return ret;
1589 }
1590
1591 int reflink(int infd, int outfd) {
1592 int r;
1593
1594 assert(infd >= 0);
1595 assert(outfd >= 0);
1596
1597 /* Make sure we invoke the ioctl on a regular file, so that no device driver accidentally gets it. */
1598
1599 r = fd_verify_regular(outfd);
1600 if (r < 0)
1601 return r;
1602
1603 /* FICLONE was introduced in Linux 4.5 but it uses the same number as BTRFS_IOC_CLONE introduced earlier */
1604
1605 assert_cc(FICLONE == BTRFS_IOC_CLONE);
1606
1607 return RET_NERRNO(ioctl(outfd, FICLONE, infd));
1608 }
1609
1610 assert_cc(sizeof(struct file_clone_range) == sizeof(struct btrfs_ioctl_clone_range_args));
1611
1612 int reflink_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) {
1613 struct file_clone_range args = {
1614 .src_fd = infd,
1615 .src_offset = in_offset,
1616 .src_length = sz,
1617 .dest_offset = out_offset,
1618 };
1619 int r;
1620
1621 assert(infd >= 0);
1622 assert(outfd >= 0);
1623
1624 /* Inside the kernel, FICLONE is identical to FICLONERANGE with offsets and size set to zero, let's
1625 * simplify things and use the simple ioctl in that case. Also, do the same if the size is
1626 * UINT64_MAX, which is how we usually encode "everything". */
1627 if (in_offset == 0 && out_offset == 0 && IN_SET(sz, 0, UINT64_MAX))
1628 return reflink(infd, outfd);
1629
1630 r = fd_verify_regular(outfd);
1631 if (r < 0)
1632 return r;
1633
1634 assert_cc(FICLONERANGE == BTRFS_IOC_CLONE_RANGE);
1635
1636 return RET_NERRNO(ioctl(outfd, FICLONERANGE, &args));
1637 }