]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/copy.c
tree-wide: use UINT64_MAX or friends
[thirdparty/systemd.git] / src / basic / copy.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <stddef.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <sys/sendfile.h>
9 #include <sys/xattr.h>
10 #include <unistd.h>
11
12 #include "alloc-util.h"
13 #include "btrfs-util.h"
14 #include "chattr-util.h"
15 #include "copy.h"
16 #include "dirent-util.h"
17 #include "fd-util.h"
18 #include "fileio.h"
19 #include "fs-util.h"
20 #include "io-util.h"
21 #include "macro.h"
22 #include "missing_syscall.h"
23 #include "mountpoint-util.h"
24 #include "nulstr-util.h"
25 #include "rm-rf.h"
26 #include "selinux-util.h"
27 #include "stat-util.h"
28 #include "stdio-util.h"
29 #include "string-util.h"
30 #include "strv.h"
31 #include "time-util.h"
32 #include "tmpfile-util.h"
33 #include "umask-util.h"
34 #include "user-util.h"
35 #include "xattr-util.h"
36
37 #define COPY_BUFFER_SIZE (16U*1024U)
38
39 /* A safety net for descending recursively into file system trees to copy. On Linux PATH_MAX is 4096, which means the
40 * deepest valid path one can build is around 2048, which we hence use as a safety net here, to not spin endlessly in
41 * case of bind mount cycles and suchlike. */
42 #define COPY_DEPTH_MAX 2048U
43
44 static ssize_t try_copy_file_range(
45 int fd_in, loff_t *off_in,
46 int fd_out, loff_t *off_out,
47 size_t len,
48 unsigned flags) {
49
50 static int have = -1;
51 ssize_t r;
52
53 if (have == 0)
54 return -ENOSYS;
55
56 r = copy_file_range(fd_in, off_in, fd_out, off_out, len, flags);
57 if (have < 0)
58 have = r >= 0 || errno != ENOSYS;
59 if (r < 0)
60 return -errno;
61
62 return r;
63 }
64
65 enum {
66 FD_IS_NO_PIPE,
67 FD_IS_BLOCKING_PIPE,
68 FD_IS_NONBLOCKING_PIPE,
69 };
70
71 static int fd_is_nonblock_pipe(int fd) {
72 struct stat st;
73 int flags;
74
75 /* Checks whether the specified file descriptor refers to a pipe, and if so if O_NONBLOCK is set. */
76
77 if (fstat(fd, &st) < 0)
78 return -errno;
79
80 if (!S_ISFIFO(st.st_mode))
81 return FD_IS_NO_PIPE;
82
83 flags = fcntl(fd, F_GETFL);
84 if (flags < 0)
85 return -errno;
86
87 return FLAGS_SET(flags, O_NONBLOCK) ? FD_IS_NONBLOCKING_PIPE : FD_IS_BLOCKING_PIPE;
88 }
89
90 static int sigint_pending(void) {
91 sigset_t ss;
92
93 assert_se(sigemptyset(&ss) >= 0);
94 assert_se(sigaddset(&ss, SIGINT) >= 0);
95
96 if (sigtimedwait(&ss, NULL, &(struct timespec) { 0, 0 }) < 0) {
97 if (errno == EAGAIN)
98 return false;
99
100 return -errno;
101 }
102
103 return true;
104 }
105
106 int copy_bytes_full(
107 int fdf, int fdt,
108 uint64_t max_bytes,
109 CopyFlags copy_flags,
110 void **ret_remains,
111 size_t *ret_remains_size,
112 copy_progress_bytes_t progress,
113 void *userdata) {
114
115 bool try_cfr = true, try_sendfile = true, try_splice = true, copied_something = false;
116 int r, nonblock_pipe = -1;
117 size_t m = SSIZE_MAX; /* that is the maximum that sendfile and c_f_r accept */
118
119 assert(fdf >= 0);
120 assert(fdt >= 0);
121
122 /* Tries to copy bytes from the file descriptor 'fdf' to 'fdt' in the smartest possible way. Copies a maximum
123 * of 'max_bytes', which may be specified as UINT64_MAX, in which no maximum is applied. Returns negative on
124 * error, zero if EOF is hit before the bytes limit is hit and positive otherwise. If the copy fails for some
125 * reason but we read but didn't yet write some data an ret_remains/ret_remains_size is not NULL, then it will
126 * be initialized with an allocated buffer containing this "remaining" data. Note that these two parameters are
127 * initialized with a valid buffer only on failure and only if there's actually data already read. Otherwise
128 * these parameters if non-NULL are set to NULL. */
129
130 if (ret_remains)
131 *ret_remains = NULL;
132 if (ret_remains_size)
133 *ret_remains_size = 0;
134
135 /* Try btrfs reflinks first. This only works on regular, seekable files, hence let's check the file offsets of
136 * source and destination first. */
137 if ((copy_flags & COPY_REFLINK)) {
138 off_t foffset;
139
140 foffset = lseek(fdf, 0, SEEK_CUR);
141 if (foffset >= 0) {
142 off_t toffset;
143
144 toffset = lseek(fdt, 0, SEEK_CUR);
145 if (toffset >= 0) {
146
147 if (foffset == 0 && toffset == 0 && max_bytes == UINT64_MAX)
148 r = btrfs_reflink(fdf, fdt); /* full file reflink */
149 else
150 r = btrfs_clone_range(fdf, foffset, fdt, toffset, max_bytes == UINT64_MAX ? 0 : max_bytes); /* partial reflink */
151 if (r >= 0) {
152 off_t t;
153
154 /* This worked, yay! Now — to be fully correct — let's adjust the file pointers */
155 if (max_bytes == UINT64_MAX) {
156
157 /* We cloned to the end of the source file, let's position the read
158 * pointer there, and query it at the same time. */
159 t = lseek(fdf, 0, SEEK_END);
160 if (t < 0)
161 return -errno;
162 if (t < foffset)
163 return -ESPIPE;
164
165 /* Let's adjust the destination file write pointer by the same number
166 * of bytes. */
167 t = lseek(fdt, toffset + (t - foffset), SEEK_SET);
168 if (t < 0)
169 return -errno;
170
171 return 0; /* we copied the whole thing, hence hit EOF, return 0 */
172 } else {
173 t = lseek(fdf, foffset + max_bytes, SEEK_SET);
174 if (t < 0)
175 return -errno;
176
177 t = lseek(fdt, toffset + max_bytes, SEEK_SET);
178 if (t < 0)
179 return -errno;
180
181 return 1; /* we copied only some number of bytes, which worked, but this means we didn't hit EOF, return 1 */
182 }
183 }
184 }
185 }
186 }
187
188 for (;;) {
189 ssize_t n;
190
191 if (max_bytes <= 0)
192 return 1; /* return > 0 if we hit the max_bytes limit */
193
194 if (FLAGS_SET(copy_flags, COPY_SIGINT)) {
195 r = sigint_pending();
196 if (r < 0)
197 return r;
198 if (r > 0)
199 return -EINTR;
200 }
201
202 if (max_bytes != UINT64_MAX && m > max_bytes)
203 m = max_bytes;
204
205 /* First try copy_file_range(), unless we already tried */
206 if (try_cfr) {
207 n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u);
208 if (n < 0) {
209 if (!IN_SET(n, -EINVAL, -ENOSYS, -EXDEV, -EBADF))
210 return n;
211
212 try_cfr = false;
213 /* use fallback below */
214 } else if (n == 0) { /* likely EOF */
215
216 if (copied_something)
217 break;
218
219 /* So, we hit EOF immediately, without having copied a single byte. This
220 * could indicate two things: the file is actually empty, or we are on some
221 * virtual file system such as procfs/sysfs where the syscall actually
222 * doesn't work but doesn't return an error. Try to handle that, by falling
223 * back to simple read()s in case we encounter empty files.
224 *
225 * See: https://lwn.net/Articles/846403/ */
226 try_cfr = try_sendfile = try_splice = false;
227 } else
228 /* Success! */
229 goto next;
230 }
231
232 /* First try sendfile(), unless we already tried */
233 if (try_sendfile) {
234 n = sendfile(fdt, fdf, NULL, m);
235 if (n < 0) {
236 if (!IN_SET(errno, EINVAL, ENOSYS))
237 return -errno;
238
239 try_sendfile = false;
240 /* use fallback below */
241 } else if (n == 0) { /* likely EOF */
242
243 if (copied_something)
244 break;
245
246 try_sendfile = try_splice = false; /* same logic as above for copy_file_range() */
247 break;
248 } else
249 /* Success! */
250 goto next;
251 }
252
253 /* Then try splice, unless we already tried. */
254 if (try_splice) {
255
256 /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file
257 * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the
258 * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour
259 * here, and check if either of the specified fds are a pipe, and if so, let's pass
260 * the flag automatically, depending on O_NONBLOCK being set.
261 *
262 * Here's a twist though: when we use it to move data between two pipes of which one
263 * has O_NONBLOCK set and the other has not, then we have no individual control over
264 * O_NONBLOCK behaviour. Hence in that case we can't use splice() and still guarantee
265 * systematic O_NONBLOCK behaviour, hence don't. */
266
267 if (nonblock_pipe < 0) {
268 int a, b;
269
270 /* Check if either of these fds is a pipe, and if so non-blocking or not */
271 a = fd_is_nonblock_pipe(fdf);
272 if (a < 0)
273 return a;
274
275 b = fd_is_nonblock_pipe(fdt);
276 if (b < 0)
277 return b;
278
279 if ((a == FD_IS_NO_PIPE && b == FD_IS_NO_PIPE) ||
280 (a == FD_IS_BLOCKING_PIPE && b == FD_IS_NONBLOCKING_PIPE) ||
281 (a == FD_IS_NONBLOCKING_PIPE && b == FD_IS_BLOCKING_PIPE))
282
283 /* splice() only works if one of the fds is a pipe. If neither is,
284 * let's skip this step right-away. As mentioned above, if one of the
285 * two fds refers to a blocking pipe and the other to a non-blocking
286 * pipe, we can't use splice() either, hence don't try either. This
287 * hence means we can only use splice() if either only one of the two
288 * fds is a pipe, or if both are pipes with the same nonblocking flag
289 * setting. */
290
291 try_splice = false;
292 else
293 nonblock_pipe = a == FD_IS_NONBLOCKING_PIPE || b == FD_IS_NONBLOCKING_PIPE;
294 }
295 }
296
297 if (try_splice) {
298 n = splice(fdf, NULL, fdt, NULL, m, nonblock_pipe ? SPLICE_F_NONBLOCK : 0);
299 if (n < 0) {
300 if (!IN_SET(errno, EINVAL, ENOSYS))
301 return -errno;
302
303 try_splice = false;
304 /* use fallback below */
305 } else if (n == 0) { /* likely EOF */
306
307 if (copied_something)
308 break;
309
310 try_splice = false; /* same logic as above for copy_file_range() + sendfile() */
311 } else
312 /* Success! */
313 goto next;
314 }
315
316 /* As a fallback just copy bits by hand */
317 {
318 uint8_t buf[MIN(m, COPY_BUFFER_SIZE)], *p = buf;
319 ssize_t z;
320
321 n = read(fdf, buf, sizeof buf);
322 if (n < 0)
323 return -errno;
324 if (n == 0) /* EOF */
325 break;
326
327 z = (size_t) n;
328 do {
329 ssize_t k;
330
331 k = write(fdt, p, z);
332 if (k < 0) {
333 r = -errno;
334
335 if (ret_remains) {
336 void *copy;
337
338 copy = memdup(p, z);
339 if (!copy)
340 return -ENOMEM;
341
342 *ret_remains = copy;
343 }
344
345 if (ret_remains_size)
346 *ret_remains_size = z;
347
348 return r;
349 }
350
351 assert(k <= z);
352 z -= k;
353 p += k;
354 } while (z > 0);
355 }
356
357 next:
358 if (progress) {
359 r = progress(n, userdata);
360 if (r < 0)
361 return r;
362 }
363
364 if (max_bytes != UINT64_MAX) {
365 assert(max_bytes >= (uint64_t) n);
366 max_bytes -= n;
367 }
368
369 /* sendfile accepts at most SSIZE_MAX-offset bytes to copy, so reduce our maximum by the
370 * amount we already copied, but don't go below our copy buffer size, unless we are close the
371 * limit of bytes we are allowed to copy. */
372 m = MAX(MIN(COPY_BUFFER_SIZE, max_bytes), m - n);
373
374 copied_something = true;
375 }
376
377 return 0; /* return 0 if we hit EOF earlier than the size limit */
378 }
379
380 static int fd_copy_symlink(
381 int df,
382 const char *from,
383 const struct stat *st,
384 int dt,
385 const char *to,
386 uid_t override_uid,
387 gid_t override_gid,
388 CopyFlags copy_flags) {
389
390 _cleanup_free_ char *target = NULL;
391 int r;
392
393 assert(from);
394 assert(st);
395 assert(to);
396
397 r = readlinkat_malloc(df, from, &target);
398 if (r < 0)
399 return r;
400
401 if (copy_flags & COPY_MAC_CREATE) {
402 r = mac_selinux_create_file_prepare_at(dt, to, S_IFLNK);
403 if (r < 0)
404 return r;
405 }
406 r = symlinkat(target, dt, to);
407 if (copy_flags & COPY_MAC_CREATE)
408 mac_selinux_create_file_clear();
409 if (r < 0)
410 return -errno;
411
412 if (fchownat(dt, to,
413 uid_is_valid(override_uid) ? override_uid : st->st_uid,
414 gid_is_valid(override_gid) ? override_gid : st->st_gid,
415 AT_SYMLINK_NOFOLLOW) < 0)
416 r = -errno;
417
418 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
419 return r;
420 }
421
422 /* Encapsulates the database we store potential hardlink targets in */
423 typedef struct HardlinkContext {
424 int dir_fd; /* An fd to the directory we use as lookup table. Never AT_FDCWD. Lazily created, when
425 * we add the first entry. */
426
427 /* These two fields are used to create the hardlink repository directory above — via
428 * mkdirat(parent_fd, subdir) — and are kept so that we can automatically remove the directory again
429 * when we are done. */
430 int parent_fd; /* Possibly AT_FDCWD */
431 char *subdir;
432 } HardlinkContext;
433
434 static int hardlink_context_setup(
435 HardlinkContext *c,
436 int dt,
437 const char *to,
438 CopyFlags copy_flags) {
439
440 _cleanup_close_ int dt_copy = -1;
441 int r;
442
443 assert(c);
444 assert(c->dir_fd < 0 && c->dir_fd != AT_FDCWD);
445 assert(c->parent_fd < 0);
446 assert(!c->subdir);
447
448 /* If hardlink recreation is requested we have to maintain a database of inodes that are potential
449 * hardlink sources. Given that generally disk sizes have to be assumed to be larger than what fits
450 * into physical RAM we cannot maintain that database in dynamic memory alone. Here we opt to
451 * maintain it on disk, to simplify things: inside the destination directory we'll maintain a
452 * temporary directory consisting of hardlinks of every inode we copied that might be subject of
453 * hardlinks. We can then use that as hardlink source later on. Yes, this means additional disk IO
454 * but thankfully Linux is optimized for this kind of thing. If this ever becomes a performance
455 * bottleneck we can certainly place an in-memory hash table in front of this, but for the beginning,
456 * let's keep things simple, and just use the disk as lookup table for inodes.
457 *
458 * Note that this should have zero performance impact as long as .n_link of all files copied remains
459 * <= 0, because in that case we will not actually allocate the hardlink inode lookup table directory
460 * on disk (we do so lazily, when the first candidate with .n_link > 1 is seen). This means, in the
461 * common case where hardlinks are not used at all or only for few files the fact that we store the
462 * table on disk shouldn't matter perfomance-wise. */
463
464 if (!FLAGS_SET(copy_flags, COPY_HARDLINKS))
465 return 0;
466
467 if (dt == AT_FDCWD)
468 dt_copy = AT_FDCWD;
469 else if (dt < 0)
470 return -EBADF;
471 else {
472 dt_copy = fcntl(dt, F_DUPFD_CLOEXEC, 3);
473 if (dt_copy < 0)
474 return -errno;
475 }
476
477 r = tempfn_random_child(to, "hardlink", &c->subdir);
478 if (r < 0)
479 return r;
480
481 c->parent_fd = TAKE_FD(dt_copy);
482
483 /* We don't actually create the directory we keep the table in here, that's done on-demand when the
484 * first entry is added, using hardlink_context_realize() below. */
485 return 1;
486 }
487
488 static int hardlink_context_realize(HardlinkContext *c) {
489 int r;
490
491 if (!c)
492 return 0;
493
494 if (c->dir_fd >= 0) /* Already realized */
495 return 1;
496
497 if (c->parent_fd < 0 && c->parent_fd != AT_FDCWD) /* Not configured */
498 return 0;
499
500 assert(c->subdir);
501
502 if (mkdirat(c->parent_fd, c->subdir, 0700) < 0)
503 return -errno;
504
505 c->dir_fd = openat(c->parent_fd, c->subdir, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
506 if (c->dir_fd < 0) {
507 r = -errno;
508 (void) unlinkat(c->parent_fd, c->subdir, AT_REMOVEDIR);
509 return r;
510 }
511
512 return 1;
513 }
514
515 static void hardlink_context_destroy(HardlinkContext *c) {
516 int r;
517
518 assert(c);
519
520 /* Automatically remove the hardlink lookup table directory again after we are done. This is used via
521 * _cleanup_() so that we really delete this, even on failure. */
522
523 if (c->dir_fd >= 0) {
524 r = rm_rf_children(TAKE_FD(c->dir_fd), REMOVE_PHYSICAL, NULL); /* consumes dir_fd in all cases, even on failure */
525 if (r < 0)
526 log_debug_errno(r, "Failed to remove hardlink store (%s) contents, ignoring: %m", c->subdir);
527
528 assert(c->parent_fd >= 0 || c->parent_fd == AT_FDCWD);
529 assert(c->subdir);
530
531 if (unlinkat(c->parent_fd, c->subdir, AT_REMOVEDIR) < 0)
532 log_debug_errno(errno, "Failed to remove hardlink store (%s) directory, ignoring: %m", c->subdir);
533 }
534
535 assert_cc(AT_FDCWD < 0);
536 c->parent_fd = safe_close(c->parent_fd);
537
538 c->subdir = mfree(c->subdir);
539 }
540
541 static int try_hardlink(
542 HardlinkContext *c,
543 const struct stat *st,
544 int dt,
545 const char *to) {
546
547 char dev_ino[DECIMAL_STR_MAX(dev_t)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
548
549 assert(st);
550 assert(dt >= 0 || dt == AT_FDCWD);
551 assert(to);
552
553 if (!c) /* No temporary hardlink directory, don't bother */
554 return 0;
555
556 if (st->st_nlink <= 1) /* Source not hardlinked, don't bother */
557 return 0;
558
559 if (c->dir_fd < 0) /* not yet realized, hence empty */
560 return 0;
561
562 xsprintf(dev_ino, "%u:%u:%" PRIu64, major(st->st_dev), minor(st->st_dev), (uint64_t) st->st_ino);
563 if (linkat(c->dir_fd, dev_ino, dt, to, 0) < 0) {
564 if (errno != ENOENT) /* doesn't exist in store yet */
565 log_debug_errno(errno, "Failed to hardlink %s to %s, ignoring: %m", dev_ino, to);
566 return 0;
567 }
568
569 return 1;
570 }
571
572 static int memorize_hardlink(
573 HardlinkContext *c,
574 const struct stat *st,
575 int dt,
576 const char *to) {
577
578 char dev_ino[DECIMAL_STR_MAX(dev_t)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
579 int r;
580
581 assert(st);
582 assert(dt >= 0 || dt == AT_FDCWD);
583 assert(to);
584
585 if (!c) /* No temporary hardlink directory, don't bother */
586 return 0;
587
588 if (st->st_nlink <= 1) /* Source not hardlinked, don't bother */
589 return 0;
590
591 r = hardlink_context_realize(c); /* Create the hardlink store lazily */
592 if (r < 0)
593 return r;
594
595 xsprintf(dev_ino, "%u:%u:%" PRIu64, major(st->st_dev), minor(st->st_dev), (uint64_t) st->st_ino);
596 if (linkat(dt, to, c->dir_fd, dev_ino, 0) < 0) {
597 log_debug_errno(errno, "Failed to hardlink %s to %s, ignoring: %m", to, dev_ino);
598 return 0;
599 }
600
601 return 1;
602 }
603
604 static int fd_copy_regular(
605 int df,
606 const char *from,
607 const struct stat *st,
608 int dt,
609 const char *to,
610 uid_t override_uid,
611 gid_t override_gid,
612 CopyFlags copy_flags,
613 HardlinkContext *hardlink_context,
614 copy_progress_bytes_t progress,
615 void *userdata) {
616
617 _cleanup_close_ int fdf = -1, fdt = -1;
618 int r, q;
619
620 assert(from);
621 assert(st);
622 assert(to);
623
624 r = try_hardlink(hardlink_context, st, dt, to);
625 if (r < 0)
626 return r;
627 if (r > 0) /* worked! */
628 return 0;
629
630 fdf = openat(df, from, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
631 if (fdf < 0)
632 return -errno;
633
634 if (copy_flags & COPY_MAC_CREATE) {
635 r = mac_selinux_create_file_prepare_at(dt, to, S_IFREG);
636 if (r < 0)
637 return r;
638 }
639 fdt = openat(dt, to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, st->st_mode & 07777);
640 if (copy_flags & COPY_MAC_CREATE)
641 mac_selinux_create_file_clear();
642 if (fdt < 0)
643 return -errno;
644
645 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags, NULL, NULL, progress, userdata);
646 if (r < 0) {
647 (void) unlinkat(dt, to, 0);
648 return r;
649 }
650
651 if (fchown(fdt,
652 uid_is_valid(override_uid) ? override_uid : st->st_uid,
653 gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
654 r = -errno;
655
656 if (fchmod(fdt, st->st_mode & 07777) < 0)
657 r = -errno;
658
659 (void) futimens(fdt, (struct timespec[]) { st->st_atim, st->st_mtim });
660 (void) copy_xattr(fdf, fdt);
661
662 q = close(fdt);
663 fdt = -1;
664
665 if (q < 0) {
666 r = -errno;
667 (void) unlinkat(dt, to, 0);
668 }
669
670 (void) memorize_hardlink(hardlink_context, st, dt, to);
671 return r;
672 }
673
674 static int fd_copy_fifo(
675 int df,
676 const char *from,
677 const struct stat *st,
678 int dt,
679 const char *to,
680 uid_t override_uid,
681 gid_t override_gid,
682 CopyFlags copy_flags,
683 HardlinkContext *hardlink_context) {
684 int r;
685
686 assert(from);
687 assert(st);
688 assert(to);
689
690 r = try_hardlink(hardlink_context, st, dt, to);
691 if (r < 0)
692 return r;
693 if (r > 0) /* worked! */
694 return 0;
695
696 if (copy_flags & COPY_MAC_CREATE) {
697 r = mac_selinux_create_file_prepare_at(dt, to, S_IFIFO);
698 if (r < 0)
699 return r;
700 }
701 r = mkfifoat(dt, to, st->st_mode & 07777);
702 if (copy_flags & COPY_MAC_CREATE)
703 mac_selinux_create_file_clear();
704 if (r < 0)
705 return -errno;
706
707 if (fchownat(dt, to,
708 uid_is_valid(override_uid) ? override_uid : st->st_uid,
709 gid_is_valid(override_gid) ? override_gid : st->st_gid,
710 AT_SYMLINK_NOFOLLOW) < 0)
711 r = -errno;
712
713 if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
714 r = -errno;
715
716 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
717
718 (void) memorize_hardlink(hardlink_context, st, dt, to);
719 return r;
720 }
721
722 static int fd_copy_node(
723 int df,
724 const char *from,
725 const struct stat *st,
726 int dt,
727 const char *to,
728 uid_t override_uid,
729 gid_t override_gid,
730 CopyFlags copy_flags,
731 HardlinkContext *hardlink_context) {
732 int r;
733
734 assert(from);
735 assert(st);
736 assert(to);
737
738 r = try_hardlink(hardlink_context, st, dt, to);
739 if (r < 0)
740 return r;
741 if (r > 0) /* worked! */
742 return 0;
743
744 if (copy_flags & COPY_MAC_CREATE) {
745 r = mac_selinux_create_file_prepare_at(dt, to, st->st_mode & S_IFMT);
746 if (r < 0)
747 return r;
748 }
749 r = mknodat(dt, to, st->st_mode, st->st_rdev);
750 if (copy_flags & COPY_MAC_CREATE)
751 mac_selinux_create_file_clear();
752 if (r < 0)
753 return -errno;
754
755 if (fchownat(dt, to,
756 uid_is_valid(override_uid) ? override_uid : st->st_uid,
757 gid_is_valid(override_gid) ? override_gid : st->st_gid,
758 AT_SYMLINK_NOFOLLOW) < 0)
759 r = -errno;
760
761 if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
762 r = -errno;
763
764 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
765
766 (void) memorize_hardlink(hardlink_context, st, dt, to);
767 return r;
768 }
769
770 static int fd_copy_directory(
771 int df,
772 const char *from,
773 const struct stat *st,
774 int dt,
775 const char *to,
776 dev_t original_device,
777 unsigned depth_left,
778 uid_t override_uid,
779 gid_t override_gid,
780 CopyFlags copy_flags,
781 HardlinkContext *hardlink_context,
782 const char *display_path,
783 copy_progress_path_t progress_path,
784 copy_progress_bytes_t progress_bytes,
785 void *userdata) {
786
787 _cleanup_(hardlink_context_destroy) HardlinkContext our_hardlink_context = {
788 .dir_fd = -1,
789 .parent_fd = -1,
790 };
791
792 _cleanup_close_ int fdf = -1, fdt = -1;
793 _cleanup_closedir_ DIR *d = NULL;
794 struct dirent *de;
795 bool exists, created;
796 int r;
797
798 assert(st);
799 assert(to);
800
801 if (depth_left == 0)
802 return -ENAMETOOLONG;
803
804 if (from)
805 fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
806 else
807 fdf = fcntl(df, F_DUPFD_CLOEXEC, 3);
808 if (fdf < 0)
809 return -errno;
810
811 if (!hardlink_context) {
812 /* If recreating hardlinks is requested let's set up a context for that now. */
813 r = hardlink_context_setup(&our_hardlink_context, dt, to, copy_flags);
814 if (r < 0)
815 return r;
816 if (r > 0) /* It's enabled and allocated, let's now use the same context for all recursive
817 * invocations from here down */
818 hardlink_context = &our_hardlink_context;
819 }
820
821 d = take_fdopendir(&fdf);
822 if (!d)
823 return -errno;
824
825 exists = false;
826 if (copy_flags & COPY_MERGE_EMPTY) {
827 r = dir_is_empty_at(dt, to);
828 if (r < 0 && r != -ENOENT)
829 return r;
830 else if (r == 1)
831 exists = true;
832 }
833
834 if (exists)
835 created = false;
836 else {
837 if (copy_flags & COPY_MAC_CREATE)
838 r = mkdirat_label(dt, to, st->st_mode & 07777);
839 else
840 r = mkdirat(dt, to, st->st_mode & 07777);
841 if (r >= 0)
842 created = true;
843 else if (errno == EEXIST && (copy_flags & COPY_MERGE))
844 created = false;
845 else
846 return -errno;
847 }
848
849 fdt = openat(dt, to, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
850 if (fdt < 0)
851 return -errno;
852
853 r = 0;
854
855 FOREACH_DIRENT_ALL(de, d, return -errno) {
856 const char *child_display_path = NULL;
857 _cleanup_free_ char *dp = NULL;
858 struct stat buf;
859 int q;
860
861 if (dot_or_dot_dot(de->d_name))
862 continue;
863
864 if (FLAGS_SET(copy_flags, COPY_SIGINT)) {
865 r = sigint_pending();
866 if (r < 0)
867 return r;
868 if (r > 0)
869 return -EINTR;
870 }
871
872 if (fstatat(dirfd(d), de->d_name, &buf, AT_SYMLINK_NOFOLLOW) < 0) {
873 r = -errno;
874 continue;
875 }
876
877 if (progress_path) {
878 if (display_path)
879 child_display_path = dp = path_join(display_path, de->d_name);
880 else
881 child_display_path = de->d_name;
882
883 r = progress_path(child_display_path, &buf, userdata);
884 if (r < 0)
885 return r;
886 }
887
888 if (S_ISDIR(buf.st_mode)) {
889 /*
890 * Don't descend into directories on other file systems, if this is requested. We do a simple
891 * .st_dev check here, which basically comes for free. Note that we do this check only on
892 * directories, not other kind of file system objects, for two reason:
893 *
894 * • The kernel's overlayfs pseudo file system that overlays multiple real file systems
895 * propagates the .st_dev field of the file system a file originates from all the way up
896 * through the stack to stat(). It doesn't do that for directories however. This means that
897 * comparing .st_dev on non-directories suggests that they all are mount points. To avoid
898 * confusion we hence avoid relying on this check for regular files.
899 *
900 * • The main reason we do this check at all is to protect ourselves from bind mount cycles,
901 * where we really want to avoid descending down in all eternity. However the .st_dev check
902 * is usually not sufficient for this protection anyway, as bind mount cycles from the same
903 * file system onto itself can't be detected that way. (Note we also do a recursion depth
904 * check, which is probably the better protection in this regard, which is why
905 * COPY_SAME_MOUNT is optional).
906 */
907
908 if (FLAGS_SET(copy_flags, COPY_SAME_MOUNT)) {
909 if (buf.st_dev != original_device)
910 continue;
911
912 r = fd_is_mount_point(dirfd(d), de->d_name, 0);
913 if (r < 0)
914 return r;
915 if (r > 0)
916 continue;
917 }
918
919 q = fd_copy_directory(dirfd(d), de->d_name, &buf, fdt, de->d_name, original_device, depth_left-1, override_uid, override_gid, copy_flags, hardlink_context, child_display_path, progress_path, progress_bytes, userdata);
920 } else if (S_ISREG(buf.st_mode))
921 q = fd_copy_regular(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, hardlink_context, progress_bytes, userdata);
922 else if (S_ISLNK(buf.st_mode))
923 q = fd_copy_symlink(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
924 else if (S_ISFIFO(buf.st_mode))
925 q = fd_copy_fifo(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, hardlink_context);
926 else if (S_ISBLK(buf.st_mode) || S_ISCHR(buf.st_mode) || S_ISSOCK(buf.st_mode))
927 q = fd_copy_node(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, hardlink_context);
928 else
929 q = -EOPNOTSUPP;
930
931 if (q == -EINTR) /* Propagate SIGINT up instantly */
932 return q;
933 if (q == -EEXIST && (copy_flags & COPY_MERGE))
934 q = 0;
935 if (q < 0)
936 r = q;
937 }
938
939 if (created) {
940 if (fchown(fdt,
941 uid_is_valid(override_uid) ? override_uid : st->st_uid,
942 gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
943 r = -errno;
944
945 if (fchmod(fdt, st->st_mode & 07777) < 0)
946 r = -errno;
947
948 (void) copy_xattr(dirfd(d), fdt);
949 (void) futimens(fdt, (struct timespec[]) { st->st_atim, st->st_mtim });
950 }
951
952 return r;
953 }
954
955 int copy_tree_at_full(
956 int fdf,
957 const char *from,
958 int fdt,
959 const char *to,
960 uid_t override_uid,
961 gid_t override_gid,
962 CopyFlags copy_flags,
963 copy_progress_path_t progress_path,
964 copy_progress_bytes_t progress_bytes,
965 void *userdata) {
966
967 struct stat st;
968
969 assert(from);
970 assert(to);
971
972 if (fstatat(fdf, from, &st, AT_SYMLINK_NOFOLLOW) < 0)
973 return -errno;
974
975 if (S_ISREG(st.st_mode))
976 return fd_copy_regular(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, NULL, progress_bytes, userdata);
977 else if (S_ISDIR(st.st_mode))
978 return fd_copy_directory(fdf, from, &st, fdt, to, st.st_dev, COPY_DEPTH_MAX, override_uid, override_gid, copy_flags, NULL, NULL, progress_path, progress_bytes, userdata);
979 else if (S_ISLNK(st.st_mode))
980 return fd_copy_symlink(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
981 else if (S_ISFIFO(st.st_mode))
982 return fd_copy_fifo(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, NULL);
983 else if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode) || S_ISSOCK(st.st_mode))
984 return fd_copy_node(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, NULL);
985 else
986 return -EOPNOTSUPP;
987 }
988
989 int copy_directory_fd_full(
990 int dirfd,
991 const char *to,
992 CopyFlags copy_flags,
993 copy_progress_path_t progress_path,
994 copy_progress_bytes_t progress_bytes,
995 void *userdata) {
996
997 struct stat st;
998 int r;
999
1000 assert(dirfd >= 0);
1001 assert(to);
1002
1003 if (fstat(dirfd, &st) < 0)
1004 return -errno;
1005
1006 r = stat_verify_directory(&st);
1007 if (r < 0)
1008 return r;
1009
1010 return fd_copy_directory(dirfd, NULL, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags, NULL, NULL, progress_path, progress_bytes, userdata);
1011 }
1012
1013 int copy_directory_full(
1014 const char *from,
1015 const char *to,
1016 CopyFlags copy_flags,
1017 copy_progress_path_t progress_path,
1018 copy_progress_bytes_t progress_bytes,
1019 void *userdata) {
1020
1021 struct stat st;
1022 int r;
1023
1024 assert(from);
1025 assert(to);
1026
1027 if (lstat(from, &st) < 0)
1028 return -errno;
1029
1030 r = stat_verify_directory(&st);
1031 if (r < 0)
1032 return r;
1033
1034 return fd_copy_directory(AT_FDCWD, from, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags, NULL, NULL, progress_path, progress_bytes, userdata);
1035 }
1036
1037 int copy_file_fd_full(
1038 const char *from,
1039 int fdt,
1040 CopyFlags copy_flags,
1041 copy_progress_bytes_t progress_bytes,
1042 void *userdata) {
1043
1044 _cleanup_close_ int fdf = -1;
1045 int r;
1046
1047 assert(from);
1048 assert(fdt >= 0);
1049
1050 fdf = open(from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1051 if (fdf < 0)
1052 return -errno;
1053
1054 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags, NULL, NULL, progress_bytes, userdata);
1055
1056 (void) copy_times(fdf, fdt, copy_flags);
1057 (void) copy_xattr(fdf, fdt);
1058
1059 return r;
1060 }
1061
1062 int copy_file_full(
1063 const char *from,
1064 const char *to,
1065 int flags,
1066 mode_t mode,
1067 unsigned chattr_flags,
1068 unsigned chattr_mask,
1069 CopyFlags copy_flags,
1070 copy_progress_bytes_t progress_bytes,
1071 void *userdata) {
1072
1073 _cleanup_close_ int fdf = -1;
1074 struct stat st;
1075 int fdt = -1, r;
1076
1077 assert(from);
1078 assert(to);
1079
1080 fdf = open(from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1081 if (fdf < 0)
1082 return -errno;
1083
1084 if (mode == MODE_INVALID)
1085 if (fstat(fdf, &st) < 0)
1086 return -errno;
1087
1088 RUN_WITH_UMASK(0000) {
1089 if (copy_flags & COPY_MAC_CREATE) {
1090 r = mac_selinux_create_file_prepare(to, S_IFREG);
1091 if (r < 0)
1092 return r;
1093 }
1094 fdt = open(to, flags|O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY,
1095 mode != MODE_INVALID ? mode : st.st_mode);
1096 if (copy_flags & COPY_MAC_CREATE)
1097 mac_selinux_create_file_clear();
1098 if (fdt < 0)
1099 return -errno;
1100 }
1101
1102 if (chattr_mask != 0)
1103 (void) chattr_fd(fdt, chattr_flags, chattr_mask & CHATTR_EARLY_FL, NULL);
1104
1105 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags, NULL, NULL, progress_bytes, userdata);
1106 if (r < 0) {
1107 close(fdt);
1108 (void) unlink(to);
1109 return r;
1110 }
1111
1112 (void) copy_times(fdf, fdt, copy_flags);
1113 (void) copy_xattr(fdf, fdt);
1114
1115 if (chattr_mask != 0)
1116 (void) chattr_fd(fdt, chattr_flags, chattr_mask & ~CHATTR_EARLY_FL, NULL);
1117
1118 if (close(fdt) < 0) {
1119 unlink_noerrno(to);
1120 return -errno;
1121 }
1122
1123 return 0;
1124 }
1125
1126 int copy_file_atomic_full(
1127 const char *from,
1128 const char *to,
1129 mode_t mode,
1130 unsigned chattr_flags,
1131 unsigned chattr_mask,
1132 CopyFlags copy_flags,
1133 copy_progress_bytes_t progress_bytes,
1134 void *userdata) {
1135
1136 _cleanup_(unlink_and_freep) char *t = NULL;
1137 _cleanup_close_ int fdt = -1;
1138 int r;
1139
1140 assert(from);
1141 assert(to);
1142
1143 /* We try to use O_TMPFILE here to create the file if we can. Note that this only works if COPY_REPLACE is not
1144 * set though as we need to use linkat() for linking the O_TMPFILE file into the file system but that system
1145 * call can't replace existing files. Hence, if COPY_REPLACE is set we create a temporary name in the file
1146 * system right-away and unconditionally which we then can renameat() to the right name after we completed
1147 * writing it. */
1148
1149 if (copy_flags & COPY_REPLACE) {
1150 r = tempfn_random(to, NULL, &t);
1151 if (r < 0)
1152 return r;
1153
1154 if (copy_flags & COPY_MAC_CREATE) {
1155 r = mac_selinux_create_file_prepare(to, S_IFREG);
1156 if (r < 0) {
1157 t = mfree(t);
1158 return r;
1159 }
1160 }
1161 fdt = open(t, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|O_WRONLY|O_CLOEXEC, 0600);
1162 if (copy_flags & COPY_MAC_CREATE)
1163 mac_selinux_create_file_clear();
1164 if (fdt < 0) {
1165 t = mfree(t);
1166 return -errno;
1167 }
1168 } else {
1169 if (copy_flags & COPY_MAC_CREATE) {
1170 r = mac_selinux_create_file_prepare(to, S_IFREG);
1171 if (r < 0)
1172 return r;
1173 }
1174 fdt = open_tmpfile_linkable(to, O_WRONLY|O_CLOEXEC, &t);
1175 if (copy_flags & COPY_MAC_CREATE)
1176 mac_selinux_create_file_clear();
1177 if (fdt < 0)
1178 return fdt;
1179 }
1180
1181 if (chattr_mask != 0)
1182 (void) chattr_fd(fdt, chattr_flags, chattr_mask & CHATTR_EARLY_FL, NULL);
1183
1184 r = copy_file_fd_full(from, fdt, copy_flags, progress_bytes, userdata);
1185 if (r < 0)
1186 return r;
1187
1188 if (fchmod(fdt, mode) < 0)
1189 return -errno;
1190
1191 if (copy_flags & COPY_REPLACE) {
1192 if (renameat(AT_FDCWD, t, AT_FDCWD, to) < 0)
1193 return -errno;
1194 } else {
1195 r = link_tmpfile(fdt, t, to);
1196 if (r < 0)
1197 return r;
1198 }
1199
1200 if (chattr_mask != 0)
1201 (void) chattr_fd(fdt, chattr_flags, chattr_mask & ~CHATTR_EARLY_FL, NULL);
1202
1203 t = mfree(t);
1204 return 0;
1205 }
1206
1207 int copy_times(int fdf, int fdt, CopyFlags flags) {
1208 struct stat st;
1209
1210 assert(fdf >= 0);
1211 assert(fdt >= 0);
1212
1213 if (fstat(fdf, &st) < 0)
1214 return -errno;
1215
1216 if (futimens(fdt, (struct timespec[2]) { st.st_atim, st.st_mtim }) < 0)
1217 return -errno;
1218
1219 if (FLAGS_SET(flags, COPY_CRTIME)) {
1220 usec_t crtime;
1221
1222 if (fd_getcrtime(fdf, &crtime) >= 0)
1223 (void) fd_setcrtime(fdt, crtime);
1224 }
1225
1226 return 0;
1227 }
1228
1229 int copy_access(int fdf, int fdt) {
1230 struct stat st;
1231
1232 assert(fdf >= 0);
1233 assert(fdt >= 0);
1234
1235 if (fstat(fdf, &st) < 0)
1236 return -errno;
1237
1238 if (fchmod(fdt, st.st_mode & 07777) < 0)
1239 return -errno;
1240
1241 return 0;
1242 }
1243
1244 int copy_xattr(int fdf, int fdt) {
1245 _cleanup_free_ char *names = NULL;
1246 int ret = 0, r;
1247 const char *p;
1248
1249 r = flistxattr_malloc(fdf, &names);
1250 if (r < 0)
1251 return r;
1252
1253 NULSTR_FOREACH(p, names) {
1254 _cleanup_free_ char *value = NULL;
1255
1256 if (!startswith(p, "user."))
1257 continue;
1258
1259 r = fgetxattr_malloc(fdf, p, &value);
1260 if (r == -ENODATA)
1261 continue; /* gone by now */
1262 if (r < 0)
1263 return r;
1264
1265 if (fsetxattr(fdt, p, value, r, 0) < 0)
1266 ret = -errno;
1267 }
1268
1269 return ret;
1270 }