]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/copy.c
login: respect install_sysconfdir_samples in meson file
[thirdparty/systemd.git] / src / shared / copy.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <stddef.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <sys/sendfile.h>
9 #include <sys/xattr.h>
10 #include <unistd.h>
11
12 #include "alloc-util.h"
13 #include "btrfs-util.h"
14 #include "chattr-util.h"
15 #include "copy.h"
16 #include "dirent-util.h"
17 #include "fd-util.h"
18 #include "fileio.h"
19 #include "fs-util.h"
20 #include "io-util.h"
21 #include "macro.h"
22 #include "missing_syscall.h"
23 #include "mountpoint-util.h"
24 #include "nulstr-util.h"
25 #include "rm-rf.h"
26 #include "selinux-util.h"
27 #include "signal-util.h"
28 #include "stat-util.h"
29 #include "stdio-util.h"
30 #include "string-util.h"
31 #include "strv.h"
32 #include "time-util.h"
33 #include "tmpfile-util.h"
34 #include "umask-util.h"
35 #include "user-util.h"
36 #include "xattr-util.h"
37
38 #define COPY_BUFFER_SIZE (16U*1024U)
39
40 /* A safety net for descending recursively into file system trees to copy. On Linux PATH_MAX is 4096, which means the
41 * deepest valid path one can build is around 2048, which we hence use as a safety net here, to not spin endlessly in
42 * case of bind mount cycles and suchlike. */
43 #define COPY_DEPTH_MAX 2048U
44
45 static ssize_t try_copy_file_range(
46 int fd_in, loff_t *off_in,
47 int fd_out, loff_t *off_out,
48 size_t len,
49 unsigned flags) {
50
51 static int have = -1;
52 ssize_t r;
53
54 if (have == 0)
55 return -ENOSYS;
56
57 r = copy_file_range(fd_in, off_in, fd_out, off_out, len, flags);
58 if (have < 0)
59 have = r >= 0 || errno != ENOSYS;
60 if (r < 0)
61 return -errno;
62
63 return r;
64 }
65
66 enum {
67 FD_IS_NO_PIPE,
68 FD_IS_BLOCKING_PIPE,
69 FD_IS_NONBLOCKING_PIPE,
70 };
71
72 static int fd_is_nonblock_pipe(int fd) {
73 struct stat st;
74 int flags;
75
76 /* Checks whether the specified file descriptor refers to a pipe, and if so if O_NONBLOCK is set. */
77
78 if (fstat(fd, &st) < 0)
79 return -errno;
80
81 if (!S_ISFIFO(st.st_mode))
82 return FD_IS_NO_PIPE;
83
84 flags = fcntl(fd, F_GETFL);
85 if (flags < 0)
86 return -errno;
87
88 return FLAGS_SET(flags, O_NONBLOCK) ? FD_IS_NONBLOCKING_PIPE : FD_IS_BLOCKING_PIPE;
89 }
90
91 int copy_bytes_full(
92 int fdf, int fdt,
93 uint64_t max_bytes,
94 CopyFlags copy_flags,
95 void **ret_remains,
96 size_t *ret_remains_size,
97 copy_progress_bytes_t progress,
98 void *userdata) {
99
100 bool try_cfr = true, try_sendfile = true, try_splice = true, copied_something = false;
101 int r, nonblock_pipe = -1;
102 size_t m = SSIZE_MAX; /* that is the maximum that sendfile and c_f_r accept */
103
104 assert(fdf >= 0);
105 assert(fdt >= 0);
106
107 /* Tries to copy bytes from the file descriptor 'fdf' to 'fdt' in the smartest possible way. Copies a maximum
108 * of 'max_bytes', which may be specified as UINT64_MAX, in which no maximum is applied. Returns negative on
109 * error, zero if EOF is hit before the bytes limit is hit and positive otherwise. If the copy fails for some
110 * reason but we read but didn't yet write some data an ret_remains/ret_remains_size is not NULL, then it will
111 * be initialized with an allocated buffer containing this "remaining" data. Note that these two parameters are
112 * initialized with a valid buffer only on failure and only if there's actually data already read. Otherwise
113 * these parameters if non-NULL are set to NULL. */
114
115 if (ret_remains)
116 *ret_remains = NULL;
117 if (ret_remains_size)
118 *ret_remains_size = 0;
119
120 /* Try btrfs reflinks first. This only works on regular, seekable files, hence let's check the file offsets of
121 * source and destination first. */
122 if ((copy_flags & COPY_REFLINK)) {
123 off_t foffset;
124
125 foffset = lseek(fdf, 0, SEEK_CUR);
126 if (foffset >= 0) {
127 off_t toffset;
128
129 toffset = lseek(fdt, 0, SEEK_CUR);
130 if (toffset >= 0) {
131
132 if (foffset == 0 && toffset == 0 && max_bytes == UINT64_MAX)
133 r = btrfs_reflink(fdf, fdt); /* full file reflink */
134 else
135 r = btrfs_clone_range(fdf, foffset, fdt, toffset, max_bytes == UINT64_MAX ? 0 : max_bytes); /* partial reflink */
136 if (r >= 0) {
137 off_t t;
138
139 /* This worked, yay! Now — to be fully correct — let's adjust the file pointers */
140 if (max_bytes == UINT64_MAX) {
141
142 /* We cloned to the end of the source file, let's position the read
143 * pointer there, and query it at the same time. */
144 t = lseek(fdf, 0, SEEK_END);
145 if (t < 0)
146 return -errno;
147 if (t < foffset)
148 return -ESPIPE;
149
150 /* Let's adjust the destination file write pointer by the same number
151 * of bytes. */
152 t = lseek(fdt, toffset + (t - foffset), SEEK_SET);
153 if (t < 0)
154 return -errno;
155
156 return 0; /* we copied the whole thing, hence hit EOF, return 0 */
157 } else {
158 t = lseek(fdf, foffset + max_bytes, SEEK_SET);
159 if (t < 0)
160 return -errno;
161
162 t = lseek(fdt, toffset + max_bytes, SEEK_SET);
163 if (t < 0)
164 return -errno;
165
166 return 1; /* we copied only some number of bytes, which worked, but this means we didn't hit EOF, return 1 */
167 }
168 }
169 }
170 }
171 }
172
173 for (;;) {
174 ssize_t n;
175
176 if (max_bytes <= 0)
177 return 1; /* return > 0 if we hit the max_bytes limit */
178
179 if (FLAGS_SET(copy_flags, COPY_SIGINT)) {
180 r = pop_pending_signal(SIGINT);
181 if (r < 0)
182 return r;
183 if (r > 0)
184 return -EINTR;
185 }
186
187 if (max_bytes != UINT64_MAX && m > max_bytes)
188 m = max_bytes;
189
190 /* First try copy_file_range(), unless we already tried */
191 if (try_cfr) {
192 n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u);
193 if (n < 0) {
194 if (!IN_SET(n, -EINVAL, -ENOSYS, -EXDEV, -EBADF))
195 return n;
196
197 try_cfr = false;
198 /* use fallback below */
199 } else if (n == 0) { /* likely EOF */
200
201 if (copied_something)
202 break;
203
204 /* So, we hit EOF immediately, without having copied a single byte. This
205 * could indicate two things: the file is actually empty, or we are on some
206 * virtual file system such as procfs/sysfs where the syscall actually
207 * doesn't work but doesn't return an error. Try to handle that, by falling
208 * back to simple read()s in case we encounter empty files.
209 *
210 * See: https://lwn.net/Articles/846403/ */
211 try_cfr = try_sendfile = try_splice = false;
212 } else
213 /* Success! */
214 goto next;
215 }
216
217 /* First try sendfile(), unless we already tried */
218 if (try_sendfile) {
219 n = sendfile(fdt, fdf, NULL, m);
220 if (n < 0) {
221 if (!IN_SET(errno, EINVAL, ENOSYS))
222 return -errno;
223
224 try_sendfile = false;
225 /* use fallback below */
226 } else if (n == 0) { /* likely EOF */
227
228 if (copied_something)
229 break;
230
231 try_sendfile = try_splice = false; /* same logic as above for copy_file_range() */
232 } else
233 /* Success! */
234 goto next;
235 }
236
237 /* Then try splice, unless we already tried. */
238 if (try_splice) {
239
240 /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file
241 * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the
242 * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour
243 * here, and check if either of the specified fds are a pipe, and if so, let's pass
244 * the flag automatically, depending on O_NONBLOCK being set.
245 *
246 * Here's a twist though: when we use it to move data between two pipes of which one
247 * has O_NONBLOCK set and the other has not, then we have no individual control over
248 * O_NONBLOCK behaviour. Hence in that case we can't use splice() and still guarantee
249 * systematic O_NONBLOCK behaviour, hence don't. */
250
251 if (nonblock_pipe < 0) {
252 int a, b;
253
254 /* Check if either of these fds is a pipe, and if so non-blocking or not */
255 a = fd_is_nonblock_pipe(fdf);
256 if (a < 0)
257 return a;
258
259 b = fd_is_nonblock_pipe(fdt);
260 if (b < 0)
261 return b;
262
263 if ((a == FD_IS_NO_PIPE && b == FD_IS_NO_PIPE) ||
264 (a == FD_IS_BLOCKING_PIPE && b == FD_IS_NONBLOCKING_PIPE) ||
265 (a == FD_IS_NONBLOCKING_PIPE && b == FD_IS_BLOCKING_PIPE))
266
267 /* splice() only works if one of the fds is a pipe. If neither is,
268 * let's skip this step right-away. As mentioned above, if one of the
269 * two fds refers to a blocking pipe and the other to a non-blocking
270 * pipe, we can't use splice() either, hence don't try either. This
271 * hence means we can only use splice() if either only one of the two
272 * fds is a pipe, or if both are pipes with the same nonblocking flag
273 * setting. */
274
275 try_splice = false;
276 else
277 nonblock_pipe = a == FD_IS_NONBLOCKING_PIPE || b == FD_IS_NONBLOCKING_PIPE;
278 }
279 }
280
281 if (try_splice) {
282 n = splice(fdf, NULL, fdt, NULL, m, nonblock_pipe ? SPLICE_F_NONBLOCK : 0);
283 if (n < 0) {
284 if (!IN_SET(errno, EINVAL, ENOSYS))
285 return -errno;
286
287 try_splice = false;
288 /* use fallback below */
289 } else if (n == 0) { /* likely EOF */
290
291 if (copied_something)
292 break;
293
294 try_splice = false; /* same logic as above for copy_file_range() + sendfile() */
295 } else
296 /* Success! */
297 goto next;
298 }
299
300 /* As a fallback just copy bits by hand */
301 {
302 uint8_t buf[MIN(m, COPY_BUFFER_SIZE)], *p = buf;
303 ssize_t z;
304
305 n = read(fdf, buf, sizeof buf);
306 if (n < 0)
307 return -errno;
308 if (n == 0) /* EOF */
309 break;
310
311 z = (size_t) n;
312 do {
313 ssize_t k;
314
315 k = write(fdt, p, z);
316 if (k < 0) {
317 r = -errno;
318
319 if (ret_remains) {
320 void *copy;
321
322 copy = memdup(p, z);
323 if (!copy)
324 return -ENOMEM;
325
326 *ret_remains = copy;
327 }
328
329 if (ret_remains_size)
330 *ret_remains_size = z;
331
332 return r;
333 }
334
335 assert(k <= z);
336 z -= k;
337 p += k;
338 } while (z > 0);
339 }
340
341 next:
342 if (progress) {
343 r = progress(n, userdata);
344 if (r < 0)
345 return r;
346 }
347
348 if (max_bytes != UINT64_MAX) {
349 assert(max_bytes >= (uint64_t) n);
350 max_bytes -= n;
351 }
352
353 /* sendfile accepts at most SSIZE_MAX-offset bytes to copy, so reduce our maximum by the
354 * amount we already copied, but don't go below our copy buffer size, unless we are close the
355 * limit of bytes we are allowed to copy. */
356 m = MAX(MIN(COPY_BUFFER_SIZE, max_bytes), m - n);
357
358 copied_something = true;
359 }
360
361 return 0; /* return 0 if we hit EOF earlier than the size limit */
362 }
363
364 static int fd_copy_symlink(
365 int df,
366 const char *from,
367 const struct stat *st,
368 int dt,
369 const char *to,
370 uid_t override_uid,
371 gid_t override_gid,
372 CopyFlags copy_flags) {
373
374 _cleanup_free_ char *target = NULL;
375 int r;
376
377 assert(from);
378 assert(st);
379 assert(to);
380
381 r = readlinkat_malloc(df, from, &target);
382 if (r < 0)
383 return r;
384
385 if (copy_flags & COPY_MAC_CREATE) {
386 r = mac_selinux_create_file_prepare_at(dt, to, S_IFLNK);
387 if (r < 0)
388 return r;
389 }
390 r = symlinkat(target, dt, to);
391 if (copy_flags & COPY_MAC_CREATE)
392 mac_selinux_create_file_clear();
393 if (r < 0)
394 return -errno;
395
396 if (fchownat(dt, to,
397 uid_is_valid(override_uid) ? override_uid : st->st_uid,
398 gid_is_valid(override_gid) ? override_gid : st->st_gid,
399 AT_SYMLINK_NOFOLLOW) < 0)
400 r = -errno;
401
402 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
403 return r;
404 }
405
406 /* Encapsulates the database we store potential hardlink targets in */
407 typedef struct HardlinkContext {
408 int dir_fd; /* An fd to the directory we use as lookup table. Never AT_FDCWD. Lazily created, when
409 * we add the first entry. */
410
411 /* These two fields are used to create the hardlink repository directory above — via
412 * mkdirat(parent_fd, subdir) — and are kept so that we can automatically remove the directory again
413 * when we are done. */
414 int parent_fd; /* Possibly AT_FDCWD */
415 char *subdir;
416 } HardlinkContext;
417
418 static int hardlink_context_setup(
419 HardlinkContext *c,
420 int dt,
421 const char *to,
422 CopyFlags copy_flags) {
423
424 _cleanup_close_ int dt_copy = -1;
425 int r;
426
427 assert(c);
428 assert(c->dir_fd < 0 && c->dir_fd != AT_FDCWD);
429 assert(c->parent_fd < 0);
430 assert(!c->subdir);
431
432 /* If hardlink recreation is requested we have to maintain a database of inodes that are potential
433 * hardlink sources. Given that generally disk sizes have to be assumed to be larger than what fits
434 * into physical RAM we cannot maintain that database in dynamic memory alone. Here we opt to
435 * maintain it on disk, to simplify things: inside the destination directory we'll maintain a
436 * temporary directory consisting of hardlinks of every inode we copied that might be subject of
437 * hardlinks. We can then use that as hardlink source later on. Yes, this means additional disk IO
438 * but thankfully Linux is optimized for this kind of thing. If this ever becomes a performance
439 * bottleneck we can certainly place an in-memory hash table in front of this, but for the beginning,
440 * let's keep things simple, and just use the disk as lookup table for inodes.
441 *
442 * Note that this should have zero performance impact as long as .n_link of all files copied remains
443 * <= 0, because in that case we will not actually allocate the hardlink inode lookup table directory
444 * on disk (we do so lazily, when the first candidate with .n_link > 1 is seen). This means, in the
445 * common case where hardlinks are not used at all or only for few files the fact that we store the
446 * table on disk shouldn't matter perfomance-wise. */
447
448 if (!FLAGS_SET(copy_flags, COPY_HARDLINKS))
449 return 0;
450
451 if (dt == AT_FDCWD)
452 dt_copy = AT_FDCWD;
453 else if (dt < 0)
454 return -EBADF;
455 else {
456 dt_copy = fcntl(dt, F_DUPFD_CLOEXEC, 3);
457 if (dt_copy < 0)
458 return -errno;
459 }
460
461 r = tempfn_random_child(to, "hardlink", &c->subdir);
462 if (r < 0)
463 return r;
464
465 c->parent_fd = TAKE_FD(dt_copy);
466
467 /* We don't actually create the directory we keep the table in here, that's done on-demand when the
468 * first entry is added, using hardlink_context_realize() below. */
469 return 1;
470 }
471
472 static int hardlink_context_realize(HardlinkContext *c) {
473 int r;
474
475 if (!c)
476 return 0;
477
478 if (c->dir_fd >= 0) /* Already realized */
479 return 1;
480
481 if (c->parent_fd < 0 && c->parent_fd != AT_FDCWD) /* Not configured */
482 return 0;
483
484 assert(c->subdir);
485
486 if (mkdirat(c->parent_fd, c->subdir, 0700) < 0)
487 return -errno;
488
489 c->dir_fd = openat(c->parent_fd, c->subdir, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
490 if (c->dir_fd < 0) {
491 r = -errno;
492 (void) unlinkat(c->parent_fd, c->subdir, AT_REMOVEDIR);
493 return r;
494 }
495
496 return 1;
497 }
498
499 static void hardlink_context_destroy(HardlinkContext *c) {
500 int r;
501
502 assert(c);
503
504 /* Automatically remove the hardlink lookup table directory again after we are done. This is used via
505 * _cleanup_() so that we really delete this, even on failure. */
506
507 if (c->dir_fd >= 0) {
508 r = rm_rf_children(TAKE_FD(c->dir_fd), REMOVE_PHYSICAL, NULL); /* consumes dir_fd in all cases, even on failure */
509 if (r < 0)
510 log_debug_errno(r, "Failed to remove hardlink store (%s) contents, ignoring: %m", c->subdir);
511
512 assert(c->parent_fd >= 0 || c->parent_fd == AT_FDCWD);
513 assert(c->subdir);
514
515 if (unlinkat(c->parent_fd, c->subdir, AT_REMOVEDIR) < 0)
516 log_debug_errno(errno, "Failed to remove hardlink store (%s) directory, ignoring: %m", c->subdir);
517 }
518
519 assert_cc(AT_FDCWD < 0);
520 c->parent_fd = safe_close(c->parent_fd);
521
522 c->subdir = mfree(c->subdir);
523 }
524
525 static int try_hardlink(
526 HardlinkContext *c,
527 const struct stat *st,
528 int dt,
529 const char *to) {
530
531 char dev_ino[DECIMAL_STR_MAX(dev_t)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
532
533 assert(st);
534 assert(dt >= 0 || dt == AT_FDCWD);
535 assert(to);
536
537 if (!c) /* No temporary hardlink directory, don't bother */
538 return 0;
539
540 if (st->st_nlink <= 1) /* Source not hardlinked, don't bother */
541 return 0;
542
543 if (c->dir_fd < 0) /* not yet realized, hence empty */
544 return 0;
545
546 xsprintf(dev_ino, "%u:%u:%" PRIu64, major(st->st_dev), minor(st->st_dev), (uint64_t) st->st_ino);
547 if (linkat(c->dir_fd, dev_ino, dt, to, 0) < 0) {
548 if (errno != ENOENT) /* doesn't exist in store yet */
549 log_debug_errno(errno, "Failed to hardlink %s to %s, ignoring: %m", dev_ino, to);
550 return 0;
551 }
552
553 return 1;
554 }
555
556 static int memorize_hardlink(
557 HardlinkContext *c,
558 const struct stat *st,
559 int dt,
560 const char *to) {
561
562 char dev_ino[DECIMAL_STR_MAX(dev_t)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
563 int r;
564
565 assert(st);
566 assert(dt >= 0 || dt == AT_FDCWD);
567 assert(to);
568
569 if (!c) /* No temporary hardlink directory, don't bother */
570 return 0;
571
572 if (st->st_nlink <= 1) /* Source not hardlinked, don't bother */
573 return 0;
574
575 r = hardlink_context_realize(c); /* Create the hardlink store lazily */
576 if (r < 0)
577 return r;
578
579 xsprintf(dev_ino, "%u:%u:%" PRIu64, major(st->st_dev), minor(st->st_dev), (uint64_t) st->st_ino);
580 if (linkat(dt, to, c->dir_fd, dev_ino, 0) < 0) {
581 log_debug_errno(errno, "Failed to hardlink %s to %s, ignoring: %m", to, dev_ino);
582 return 0;
583 }
584
585 return 1;
586 }
587
588 static int fd_copy_regular(
589 int df,
590 const char *from,
591 const struct stat *st,
592 int dt,
593 const char *to,
594 uid_t override_uid,
595 gid_t override_gid,
596 CopyFlags copy_flags,
597 HardlinkContext *hardlink_context,
598 copy_progress_bytes_t progress,
599 void *userdata) {
600
601 _cleanup_close_ int fdf = -1, fdt = -1;
602 int r, q;
603
604 assert(from);
605 assert(st);
606 assert(to);
607
608 r = try_hardlink(hardlink_context, st, dt, to);
609 if (r < 0)
610 return r;
611 if (r > 0) /* worked! */
612 return 0;
613
614 fdf = openat(df, from, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
615 if (fdf < 0)
616 return -errno;
617
618 if (copy_flags & COPY_MAC_CREATE) {
619 r = mac_selinux_create_file_prepare_at(dt, to, S_IFREG);
620 if (r < 0)
621 return r;
622 }
623 fdt = openat(dt, to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, st->st_mode & 07777);
624 if (copy_flags & COPY_MAC_CREATE)
625 mac_selinux_create_file_clear();
626 if (fdt < 0)
627 return -errno;
628
629 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags, NULL, NULL, progress, userdata);
630 if (r < 0) {
631 (void) unlinkat(dt, to, 0);
632 return r;
633 }
634
635 if (fchown(fdt,
636 uid_is_valid(override_uid) ? override_uid : st->st_uid,
637 gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
638 r = -errno;
639
640 if (fchmod(fdt, st->st_mode & 07777) < 0)
641 r = -errno;
642
643 (void) futimens(fdt, (struct timespec[]) { st->st_atim, st->st_mtim });
644 (void) copy_xattr(fdf, fdt);
645
646 q = close(fdt);
647 fdt = -1;
648
649 if (q < 0) {
650 r = -errno;
651 (void) unlinkat(dt, to, 0);
652 }
653
654 (void) memorize_hardlink(hardlink_context, st, dt, to);
655 return r;
656 }
657
658 static int fd_copy_fifo(
659 int df,
660 const char *from,
661 const struct stat *st,
662 int dt,
663 const char *to,
664 uid_t override_uid,
665 gid_t override_gid,
666 CopyFlags copy_flags,
667 HardlinkContext *hardlink_context) {
668 int r;
669
670 assert(from);
671 assert(st);
672 assert(to);
673
674 r = try_hardlink(hardlink_context, st, dt, to);
675 if (r < 0)
676 return r;
677 if (r > 0) /* worked! */
678 return 0;
679
680 if (copy_flags & COPY_MAC_CREATE) {
681 r = mac_selinux_create_file_prepare_at(dt, to, S_IFIFO);
682 if (r < 0)
683 return r;
684 }
685 r = mkfifoat(dt, to, st->st_mode & 07777);
686 if (copy_flags & COPY_MAC_CREATE)
687 mac_selinux_create_file_clear();
688 if (r < 0)
689 return -errno;
690
691 if (fchownat(dt, to,
692 uid_is_valid(override_uid) ? override_uid : st->st_uid,
693 gid_is_valid(override_gid) ? override_gid : st->st_gid,
694 AT_SYMLINK_NOFOLLOW) < 0)
695 r = -errno;
696
697 if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
698 r = -errno;
699
700 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
701
702 (void) memorize_hardlink(hardlink_context, st, dt, to);
703 return r;
704 }
705
706 static int fd_copy_node(
707 int df,
708 const char *from,
709 const struct stat *st,
710 int dt,
711 const char *to,
712 uid_t override_uid,
713 gid_t override_gid,
714 CopyFlags copy_flags,
715 HardlinkContext *hardlink_context) {
716 int r;
717
718 assert(from);
719 assert(st);
720 assert(to);
721
722 r = try_hardlink(hardlink_context, st, dt, to);
723 if (r < 0)
724 return r;
725 if (r > 0) /* worked! */
726 return 0;
727
728 if (copy_flags & COPY_MAC_CREATE) {
729 r = mac_selinux_create_file_prepare_at(dt, to, st->st_mode & S_IFMT);
730 if (r < 0)
731 return r;
732 }
733 r = mknodat(dt, to, st->st_mode, st->st_rdev);
734 if (copy_flags & COPY_MAC_CREATE)
735 mac_selinux_create_file_clear();
736 if (r < 0)
737 return -errno;
738
739 if (fchownat(dt, to,
740 uid_is_valid(override_uid) ? override_uid : st->st_uid,
741 gid_is_valid(override_gid) ? override_gid : st->st_gid,
742 AT_SYMLINK_NOFOLLOW) < 0)
743 r = -errno;
744
745 if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
746 r = -errno;
747
748 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
749
750 (void) memorize_hardlink(hardlink_context, st, dt, to);
751 return r;
752 }
753
754 static int fd_copy_directory(
755 int df,
756 const char *from,
757 const struct stat *st,
758 int dt,
759 const char *to,
760 dev_t original_device,
761 unsigned depth_left,
762 uid_t override_uid,
763 gid_t override_gid,
764 CopyFlags copy_flags,
765 HardlinkContext *hardlink_context,
766 const char *display_path,
767 copy_progress_path_t progress_path,
768 copy_progress_bytes_t progress_bytes,
769 void *userdata) {
770
771 _cleanup_(hardlink_context_destroy) HardlinkContext our_hardlink_context = {
772 .dir_fd = -1,
773 .parent_fd = -1,
774 };
775
776 _cleanup_close_ int fdf = -1, fdt = -1;
777 _cleanup_closedir_ DIR *d = NULL;
778 struct dirent *de;
779 bool exists, created;
780 int r;
781
782 assert(st);
783 assert(to);
784
785 if (depth_left == 0)
786 return -ENAMETOOLONG;
787
788 if (from)
789 fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
790 else
791 fdf = fcntl(df, F_DUPFD_CLOEXEC, 3);
792 if (fdf < 0)
793 return -errno;
794
795 if (!hardlink_context) {
796 /* If recreating hardlinks is requested let's set up a context for that now. */
797 r = hardlink_context_setup(&our_hardlink_context, dt, to, copy_flags);
798 if (r < 0)
799 return r;
800 if (r > 0) /* It's enabled and allocated, let's now use the same context for all recursive
801 * invocations from here down */
802 hardlink_context = &our_hardlink_context;
803 }
804
805 d = take_fdopendir(&fdf);
806 if (!d)
807 return -errno;
808
809 exists = false;
810 if (copy_flags & COPY_MERGE_EMPTY) {
811 r = dir_is_empty_at(dt, to);
812 if (r < 0 && r != -ENOENT)
813 return r;
814 else if (r == 1)
815 exists = true;
816 }
817
818 if (exists)
819 created = false;
820 else {
821 if (copy_flags & COPY_MAC_CREATE)
822 r = mkdirat_label(dt, to, st->st_mode & 07777);
823 else
824 r = mkdirat(dt, to, st->st_mode & 07777);
825 if (r >= 0)
826 created = true;
827 else if (errno == EEXIST && (copy_flags & COPY_MERGE))
828 created = false;
829 else
830 return -errno;
831 }
832
833 fdt = openat(dt, to, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
834 if (fdt < 0)
835 return -errno;
836
837 r = 0;
838
839 FOREACH_DIRENT_ALL(de, d, return -errno) {
840 const char *child_display_path = NULL;
841 _cleanup_free_ char *dp = NULL;
842 struct stat buf;
843 int q;
844
845 if (dot_or_dot_dot(de->d_name))
846 continue;
847
848 if (FLAGS_SET(copy_flags, COPY_SIGINT)) {
849 r = pop_pending_signal(SIGINT);
850 if (r < 0)
851 return r;
852 if (r > 0)
853 return -EINTR;
854 }
855
856 if (fstatat(dirfd(d), de->d_name, &buf, AT_SYMLINK_NOFOLLOW) < 0) {
857 r = -errno;
858 continue;
859 }
860
861 if (progress_path) {
862 if (display_path)
863 child_display_path = dp = path_join(display_path, de->d_name);
864 else
865 child_display_path = de->d_name;
866
867 r = progress_path(child_display_path, &buf, userdata);
868 if (r < 0)
869 return r;
870 }
871
872 if (S_ISDIR(buf.st_mode)) {
873 /*
874 * Don't descend into directories on other file systems, if this is requested. We do a simple
875 * .st_dev check here, which basically comes for free. Note that we do this check only on
876 * directories, not other kind of file system objects, for two reason:
877 *
878 * • The kernel's overlayfs pseudo file system that overlays multiple real file systems
879 * propagates the .st_dev field of the file system a file originates from all the way up
880 * through the stack to stat(). It doesn't do that for directories however. This means that
881 * comparing .st_dev on non-directories suggests that they all are mount points. To avoid
882 * confusion we hence avoid relying on this check for regular files.
883 *
884 * • The main reason we do this check at all is to protect ourselves from bind mount cycles,
885 * where we really want to avoid descending down in all eternity. However the .st_dev check
886 * is usually not sufficient for this protection anyway, as bind mount cycles from the same
887 * file system onto itself can't be detected that way. (Note we also do a recursion depth
888 * check, which is probably the better protection in this regard, which is why
889 * COPY_SAME_MOUNT is optional).
890 */
891
892 if (FLAGS_SET(copy_flags, COPY_SAME_MOUNT)) {
893 if (buf.st_dev != original_device)
894 continue;
895
896 r = fd_is_mount_point(dirfd(d), de->d_name, 0);
897 if (r < 0)
898 return r;
899 if (r > 0)
900 continue;
901 }
902
903 q = fd_copy_directory(dirfd(d), de->d_name, &buf, fdt, de->d_name, original_device, depth_left-1, override_uid, override_gid, copy_flags, hardlink_context, child_display_path, progress_path, progress_bytes, userdata);
904 } else if (S_ISREG(buf.st_mode))
905 q = fd_copy_regular(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, hardlink_context, progress_bytes, userdata);
906 else if (S_ISLNK(buf.st_mode))
907 q = fd_copy_symlink(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
908 else if (S_ISFIFO(buf.st_mode))
909 q = fd_copy_fifo(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, hardlink_context);
910 else if (S_ISBLK(buf.st_mode) || S_ISCHR(buf.st_mode) || S_ISSOCK(buf.st_mode))
911 q = fd_copy_node(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, hardlink_context);
912 else
913 q = -EOPNOTSUPP;
914
915 if (q == -EINTR) /* Propagate SIGINT up instantly */
916 return q;
917 if (q == -EEXIST && (copy_flags & COPY_MERGE))
918 q = 0;
919 if (q < 0)
920 r = q;
921 }
922
923 if (created) {
924 if (fchown(fdt,
925 uid_is_valid(override_uid) ? override_uid : st->st_uid,
926 gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
927 r = -errno;
928
929 if (fchmod(fdt, st->st_mode & 07777) < 0)
930 r = -errno;
931
932 (void) copy_xattr(dirfd(d), fdt);
933 (void) futimens(fdt, (struct timespec[]) { st->st_atim, st->st_mtim });
934 }
935
936 return r;
937 }
938
939 int copy_tree_at_full(
940 int fdf,
941 const char *from,
942 int fdt,
943 const char *to,
944 uid_t override_uid,
945 gid_t override_gid,
946 CopyFlags copy_flags,
947 copy_progress_path_t progress_path,
948 copy_progress_bytes_t progress_bytes,
949 void *userdata) {
950
951 struct stat st;
952
953 assert(from);
954 assert(to);
955
956 if (fstatat(fdf, from, &st, AT_SYMLINK_NOFOLLOW) < 0)
957 return -errno;
958
959 if (S_ISREG(st.st_mode))
960 return fd_copy_regular(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, NULL, progress_bytes, userdata);
961 else if (S_ISDIR(st.st_mode))
962 return fd_copy_directory(fdf, from, &st, fdt, to, st.st_dev, COPY_DEPTH_MAX, override_uid, override_gid, copy_flags, NULL, NULL, progress_path, progress_bytes, userdata);
963 else if (S_ISLNK(st.st_mode))
964 return fd_copy_symlink(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
965 else if (S_ISFIFO(st.st_mode))
966 return fd_copy_fifo(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, NULL);
967 else if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode) || S_ISSOCK(st.st_mode))
968 return fd_copy_node(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, NULL);
969 else
970 return -EOPNOTSUPP;
971 }
972
973 int copy_directory_fd_full(
974 int dirfd,
975 const char *to,
976 CopyFlags copy_flags,
977 copy_progress_path_t progress_path,
978 copy_progress_bytes_t progress_bytes,
979 void *userdata) {
980
981 struct stat st;
982 int r;
983
984 assert(dirfd >= 0);
985 assert(to);
986
987 if (fstat(dirfd, &st) < 0)
988 return -errno;
989
990 r = stat_verify_directory(&st);
991 if (r < 0)
992 return r;
993
994 return fd_copy_directory(dirfd, NULL, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags, NULL, NULL, progress_path, progress_bytes, userdata);
995 }
996
997 int copy_directory_full(
998 const char *from,
999 const char *to,
1000 CopyFlags copy_flags,
1001 copy_progress_path_t progress_path,
1002 copy_progress_bytes_t progress_bytes,
1003 void *userdata) {
1004
1005 struct stat st;
1006 int r;
1007
1008 assert(from);
1009 assert(to);
1010
1011 if (lstat(from, &st) < 0)
1012 return -errno;
1013
1014 r = stat_verify_directory(&st);
1015 if (r < 0)
1016 return r;
1017
1018 return fd_copy_directory(AT_FDCWD, from, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags, NULL, NULL, progress_path, progress_bytes, userdata);
1019 }
1020
1021 int copy_file_fd_full(
1022 const char *from,
1023 int fdt,
1024 CopyFlags copy_flags,
1025 copy_progress_bytes_t progress_bytes,
1026 void *userdata) {
1027
1028 _cleanup_close_ int fdf = -1;
1029 int r;
1030
1031 assert(from);
1032 assert(fdt >= 0);
1033
1034 fdf = open(from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1035 if (fdf < 0)
1036 return -errno;
1037
1038 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags, NULL, NULL, progress_bytes, userdata);
1039
1040 (void) copy_times(fdf, fdt, copy_flags);
1041 (void) copy_xattr(fdf, fdt);
1042
1043 return r;
1044 }
1045
1046 int copy_file_full(
1047 const char *from,
1048 const char *to,
1049 int flags,
1050 mode_t mode,
1051 unsigned chattr_flags,
1052 unsigned chattr_mask,
1053 CopyFlags copy_flags,
1054 copy_progress_bytes_t progress_bytes,
1055 void *userdata) {
1056
1057 _cleanup_close_ int fdf = -1;
1058 struct stat st;
1059 int r, fdt = -1; /* avoid false maybe-uninitialized warning */
1060
1061 assert(from);
1062 assert(to);
1063
1064 fdf = open(from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1065 if (fdf < 0)
1066 return -errno;
1067
1068 if (mode == MODE_INVALID)
1069 if (fstat(fdf, &st) < 0)
1070 return -errno;
1071
1072 RUN_WITH_UMASK(0000) {
1073 if (copy_flags & COPY_MAC_CREATE) {
1074 r = mac_selinux_create_file_prepare(to, S_IFREG);
1075 if (r < 0)
1076 return r;
1077 }
1078 fdt = open(to, flags|O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY,
1079 mode != MODE_INVALID ? mode : st.st_mode);
1080 if (copy_flags & COPY_MAC_CREATE)
1081 mac_selinux_create_file_clear();
1082 if (fdt < 0)
1083 return -errno;
1084 }
1085
1086 if (chattr_mask != 0)
1087 (void) chattr_fd(fdt, chattr_flags, chattr_mask & CHATTR_EARLY_FL, NULL);
1088
1089 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags, NULL, NULL, progress_bytes, userdata);
1090 if (r < 0) {
1091 close(fdt);
1092 (void) unlink(to);
1093 return r;
1094 }
1095
1096 (void) copy_times(fdf, fdt, copy_flags);
1097 (void) copy_xattr(fdf, fdt);
1098
1099 if (chattr_mask != 0)
1100 (void) chattr_fd(fdt, chattr_flags, chattr_mask & ~CHATTR_EARLY_FL, NULL);
1101
1102 if (close(fdt) < 0) {
1103 unlink_noerrno(to);
1104 return -errno;
1105 }
1106
1107 return 0;
1108 }
1109
1110 int copy_file_atomic_full(
1111 const char *from,
1112 const char *to,
1113 mode_t mode,
1114 unsigned chattr_flags,
1115 unsigned chattr_mask,
1116 CopyFlags copy_flags,
1117 copy_progress_bytes_t progress_bytes,
1118 void *userdata) {
1119
1120 _cleanup_(unlink_and_freep) char *t = NULL;
1121 _cleanup_close_ int fdt = -1;
1122 int r;
1123
1124 assert(from);
1125 assert(to);
1126
1127 /* We try to use O_TMPFILE here to create the file if we can. Note that this only works if COPY_REPLACE is not
1128 * set though as we need to use linkat() for linking the O_TMPFILE file into the file system but that system
1129 * call can't replace existing files. Hence, if COPY_REPLACE is set we create a temporary name in the file
1130 * system right-away and unconditionally which we then can renameat() to the right name after we completed
1131 * writing it. */
1132
1133 if (copy_flags & COPY_REPLACE) {
1134 _cleanup_free_ char *f = NULL;
1135
1136 r = tempfn_random(to, NULL, &f);
1137 if (r < 0)
1138 return r;
1139
1140 if (copy_flags & COPY_MAC_CREATE) {
1141 r = mac_selinux_create_file_prepare(to, S_IFREG);
1142 if (r < 0)
1143 return r;
1144 }
1145 fdt = open(f, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|O_WRONLY|O_CLOEXEC, 0600);
1146 if (copy_flags & COPY_MAC_CREATE)
1147 mac_selinux_create_file_clear();
1148 if (fdt < 0)
1149 return -errno;
1150
1151 t = TAKE_PTR(f);
1152 } else {
1153 if (copy_flags & COPY_MAC_CREATE) {
1154 r = mac_selinux_create_file_prepare(to, S_IFREG);
1155 if (r < 0)
1156 return r;
1157 }
1158 fdt = open_tmpfile_linkable(to, O_WRONLY|O_CLOEXEC, &t);
1159 if (copy_flags & COPY_MAC_CREATE)
1160 mac_selinux_create_file_clear();
1161 if (fdt < 0)
1162 return fdt;
1163 }
1164
1165 if (chattr_mask != 0)
1166 (void) chattr_fd(fdt, chattr_flags, chattr_mask & CHATTR_EARLY_FL, NULL);
1167
1168 r = copy_file_fd_full(from, fdt, copy_flags, progress_bytes, userdata);
1169 if (r < 0)
1170 return r;
1171
1172 if (fchmod(fdt, mode) < 0)
1173 return -errno;
1174
1175 if (copy_flags & COPY_REPLACE) {
1176 if (renameat(AT_FDCWD, t, AT_FDCWD, to) < 0)
1177 return -errno;
1178 } else {
1179 r = link_tmpfile(fdt, t, to);
1180 if (r < 0)
1181 return r;
1182 }
1183
1184 if (chattr_mask != 0)
1185 (void) chattr_fd(fdt, chattr_flags, chattr_mask & ~CHATTR_EARLY_FL, NULL);
1186
1187 t = mfree(t);
1188 return 0;
1189 }
1190
1191 int copy_times(int fdf, int fdt, CopyFlags flags) {
1192 struct stat st;
1193
1194 assert(fdf >= 0);
1195 assert(fdt >= 0);
1196
1197 if (fstat(fdf, &st) < 0)
1198 return -errno;
1199
1200 if (futimens(fdt, (struct timespec[2]) { st.st_atim, st.st_mtim }) < 0)
1201 return -errno;
1202
1203 if (FLAGS_SET(flags, COPY_CRTIME)) {
1204 usec_t crtime;
1205
1206 if (fd_getcrtime(fdf, &crtime) >= 0)
1207 (void) fd_setcrtime(fdt, crtime);
1208 }
1209
1210 return 0;
1211 }
1212
1213 int copy_access(int fdf, int fdt) {
1214 struct stat st;
1215
1216 assert(fdf >= 0);
1217 assert(fdt >= 0);
1218
1219 /* Copies just the access mode (and not the ownership) from fdf to fdt */
1220
1221 if (fstat(fdf, &st) < 0)
1222 return -errno;
1223
1224 if (fchmod(fdt, st.st_mode & 07777) < 0)
1225 return -errno;
1226
1227 return 0;
1228 }
1229
1230 int copy_rights_with_fallback(int fdf, int fdt, const char *patht) {
1231 struct stat st;
1232
1233 assert(fdf >= 0);
1234 assert(fdt >= 0);
1235
1236 /* Copies both access mode and ownership from fdf to fdt */
1237
1238 if (fstat(fdf, &st) < 0)
1239 return -errno;
1240
1241 return fchmod_and_chown_with_fallback(fdt, patht, st.st_mode & 07777, st.st_uid, st.st_gid);
1242 }
1243
1244 int copy_xattr(int fdf, int fdt) {
1245 _cleanup_free_ char *names = NULL;
1246 int ret = 0, r;
1247 const char *p;
1248
1249 r = flistxattr_malloc(fdf, &names);
1250 if (r < 0)
1251 return r;
1252
1253 NULSTR_FOREACH(p, names) {
1254 _cleanup_free_ char *value = NULL;
1255
1256 if (!startswith(p, "user."))
1257 continue;
1258
1259 r = fgetxattr_malloc(fdf, p, &value);
1260 if (r == -ENODATA)
1261 continue; /* gone by now */
1262 if (r < 0)
1263 return r;
1264
1265 if (fsetxattr(fdt, p, value, r, 0) < 0)
1266 ret = -errno;
1267 }
1268
1269 return ret;
1270 }