]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/copy.c
license: LGPL-2.1+ -> LGPL-2.1-or-later
[thirdparty/systemd.git] / src / basic / copy.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <stddef.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <sys/sendfile.h>
9 #include <sys/xattr.h>
10 #include <unistd.h>
11
12 #include "alloc-util.h"
13 #include "btrfs-util.h"
14 #include "chattr-util.h"
15 #include "copy.h"
16 #include "dirent-util.h"
17 #include "fd-util.h"
18 #include "fileio.h"
19 #include "fs-util.h"
20 #include "io-util.h"
21 #include "macro.h"
22 #include "missing_syscall.h"
23 #include "mountpoint-util.h"
24 #include "nulstr-util.h"
25 #include "rm-rf.h"
26 #include "selinux-util.h"
27 #include "stat-util.h"
28 #include "stdio-util.h"
29 #include "string-util.h"
30 #include "strv.h"
31 #include "time-util.h"
32 #include "tmpfile-util.h"
33 #include "umask-util.h"
34 #include "user-util.h"
35 #include "xattr-util.h"
36
37 #define COPY_BUFFER_SIZE (16U*1024U)
38
39 /* A safety net for descending recursively into file system trees to copy. On Linux PATH_MAX is 4096, which means the
40 * deepest valid path one can build is around 2048, which we hence use as a safety net here, to not spin endlessly in
41 * case of bind mount cycles and suchlike. */
42 #define COPY_DEPTH_MAX 2048U
43
44 static ssize_t try_copy_file_range(
45 int fd_in, loff_t *off_in,
46 int fd_out, loff_t *off_out,
47 size_t len,
48 unsigned flags) {
49
50 static int have = -1;
51 ssize_t r;
52
53 if (have == 0)
54 return -ENOSYS;
55
56 r = copy_file_range(fd_in, off_in, fd_out, off_out, len, flags);
57 if (have < 0)
58 have = r >= 0 || errno != ENOSYS;
59 if (r < 0)
60 return -errno;
61
62 return r;
63 }
64
65 enum {
66 FD_IS_NO_PIPE,
67 FD_IS_BLOCKING_PIPE,
68 FD_IS_NONBLOCKING_PIPE,
69 };
70
71 static int fd_is_nonblock_pipe(int fd) {
72 struct stat st;
73 int flags;
74
75 /* Checks whether the specified file descriptor refers to a pipe, and if so if O_NONBLOCK is set. */
76
77 if (fstat(fd, &st) < 0)
78 return -errno;
79
80 if (!S_ISFIFO(st.st_mode))
81 return FD_IS_NO_PIPE;
82
83 flags = fcntl(fd, F_GETFL);
84 if (flags < 0)
85 return -errno;
86
87 return FLAGS_SET(flags, O_NONBLOCK) ? FD_IS_NONBLOCKING_PIPE : FD_IS_BLOCKING_PIPE;
88 }
89
90 static int sigint_pending(void) {
91 sigset_t ss;
92
93 assert_se(sigemptyset(&ss) >= 0);
94 assert_se(sigaddset(&ss, SIGINT) >= 0);
95
96 if (sigtimedwait(&ss, NULL, &(struct timespec) { 0, 0 }) < 0) {
97 if (errno == EAGAIN)
98 return false;
99
100 return -errno;
101 }
102
103 return true;
104 }
105
106 int copy_bytes_full(
107 int fdf, int fdt,
108 uint64_t max_bytes,
109 CopyFlags copy_flags,
110 void **ret_remains,
111 size_t *ret_remains_size,
112 copy_progress_bytes_t progress,
113 void *userdata) {
114
115 bool try_cfr = true, try_sendfile = true, try_splice = true;
116 int r, nonblock_pipe = -1;
117 size_t m = SSIZE_MAX; /* that is the maximum that sendfile and c_f_r accept */
118
119 assert(fdf >= 0);
120 assert(fdt >= 0);
121
122 /* Tries to copy bytes from the file descriptor 'fdf' to 'fdt' in the smartest possible way. Copies a maximum
123 * of 'max_bytes', which may be specified as UINT64_MAX, in which no maximum is applied. Returns negative on
124 * error, zero if EOF is hit before the bytes limit is hit and positive otherwise. If the copy fails for some
125 * reason but we read but didn't yet write some data an ret_remains/ret_remains_size is not NULL, then it will
126 * be initialized with an allocated buffer containing this "remaining" data. Note that these two parameters are
127 * initialized with a valid buffer only on failure and only if there's actually data already read. Otherwise
128 * these parameters if non-NULL are set to NULL. */
129
130 if (ret_remains)
131 *ret_remains = NULL;
132 if (ret_remains_size)
133 *ret_remains_size = 0;
134
135 /* Try btrfs reflinks first. This only works on regular, seekable files, hence let's check the file offsets of
136 * source and destination first. */
137 if ((copy_flags & COPY_REFLINK)) {
138 off_t foffset;
139
140 foffset = lseek(fdf, 0, SEEK_CUR);
141 if (foffset >= 0) {
142 off_t toffset;
143
144 toffset = lseek(fdt, 0, SEEK_CUR);
145 if (toffset >= 0) {
146
147 if (foffset == 0 && toffset == 0 && max_bytes == UINT64_MAX)
148 r = btrfs_reflink(fdf, fdt); /* full file reflink */
149 else
150 r = btrfs_clone_range(fdf, foffset, fdt, toffset, max_bytes == UINT64_MAX ? 0 : max_bytes); /* partial reflink */
151 if (r >= 0) {
152 off_t t;
153
154 /* This worked, yay! Now — to be fully correct — let's adjust the file pointers */
155 if (max_bytes == UINT64_MAX) {
156
157 /* We cloned to the end of the source file, let's position the read
158 * pointer there, and query it at the same time. */
159 t = lseek(fdf, 0, SEEK_END);
160 if (t < 0)
161 return -errno;
162 if (t < foffset)
163 return -ESPIPE;
164
165 /* Let's adjust the destination file write pointer by the same number
166 * of bytes. */
167 t = lseek(fdt, toffset + (t - foffset), SEEK_SET);
168 if (t < 0)
169 return -errno;
170
171 return 0; /* we copied the whole thing, hence hit EOF, return 0 */
172 } else {
173 t = lseek(fdf, foffset + max_bytes, SEEK_SET);
174 if (t < 0)
175 return -errno;
176
177 t = lseek(fdt, toffset + max_bytes, SEEK_SET);
178 if (t < 0)
179 return -errno;
180
181 return 1; /* we copied only some number of bytes, which worked, but this means we didn't hit EOF, return 1 */
182 }
183 }
184 }
185 }
186 }
187
188 for (;;) {
189 ssize_t n;
190
191 if (max_bytes <= 0)
192 return 1; /* return > 0 if we hit the max_bytes limit */
193
194 if (FLAGS_SET(copy_flags, COPY_SIGINT)) {
195 r = sigint_pending();
196 if (r < 0)
197 return r;
198 if (r > 0)
199 return -EINTR;
200 }
201
202 if (max_bytes != UINT64_MAX && m > max_bytes)
203 m = max_bytes;
204
205 /* First try copy_file_range(), unless we already tried */
206 if (try_cfr) {
207 n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u);
208 if (n < 0) {
209 if (!IN_SET(n, -EINVAL, -ENOSYS, -EXDEV, -EBADF))
210 return n;
211
212 try_cfr = false;
213 /* use fallback below */
214 } else if (n == 0) /* EOF */
215 break;
216 else
217 /* Success! */
218 goto next;
219 }
220
221 /* First try sendfile(), unless we already tried */
222 if (try_sendfile) {
223 n = sendfile(fdt, fdf, NULL, m);
224 if (n < 0) {
225 if (!IN_SET(errno, EINVAL, ENOSYS))
226 return -errno;
227
228 try_sendfile = false;
229 /* use fallback below */
230 } else if (n == 0) /* EOF */
231 break;
232 else
233 /* Success! */
234 goto next;
235 }
236
237 /* Then try splice, unless we already tried. */
238 if (try_splice) {
239
240 /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file
241 * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the
242 * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour here, and
243 * check if either of the specified fds are a pipe, and if so, let's pass the flag
244 * automatically, depending on O_NONBLOCK being set.
245 *
246 * Here's a twist though: when we use it to move data between two pipes of which one has
247 * O_NONBLOCK set and the other has not, then we have no individual control over O_NONBLOCK
248 * behaviour. Hence in that case we can't use splice() and still guarantee systematic
249 * O_NONBLOCK behaviour, hence don't. */
250
251 if (nonblock_pipe < 0) {
252 int a, b;
253
254 /* Check if either of these fds is a pipe, and if so non-blocking or not */
255 a = fd_is_nonblock_pipe(fdf);
256 if (a < 0)
257 return a;
258
259 b = fd_is_nonblock_pipe(fdt);
260 if (b < 0)
261 return b;
262
263 if ((a == FD_IS_NO_PIPE && b == FD_IS_NO_PIPE) ||
264 (a == FD_IS_BLOCKING_PIPE && b == FD_IS_NONBLOCKING_PIPE) ||
265 (a == FD_IS_NONBLOCKING_PIPE && b == FD_IS_BLOCKING_PIPE))
266
267 /* splice() only works if one of the fds is a pipe. If neither is, let's skip
268 * this step right-away. As mentioned above, if one of the two fds refers to a
269 * blocking pipe and the other to a non-blocking pipe, we can't use splice()
270 * either, hence don't try either. This hence means we can only use splice() if
271 * either only one of the two fds is a pipe, or if both are pipes with the same
272 * nonblocking flag setting. */
273
274 try_splice = false;
275 else
276 nonblock_pipe = a == FD_IS_NONBLOCKING_PIPE || b == FD_IS_NONBLOCKING_PIPE;
277 }
278 }
279
280 if (try_splice) {
281 n = splice(fdf, NULL, fdt, NULL, m, nonblock_pipe ? SPLICE_F_NONBLOCK : 0);
282 if (n < 0) {
283 if (!IN_SET(errno, EINVAL, ENOSYS))
284 return -errno;
285
286 try_splice = false;
287 /* use fallback below */
288 } else if (n == 0) /* EOF */
289 break;
290 else
291 /* Success! */
292 goto next;
293 }
294
295 /* As a fallback just copy bits by hand */
296 {
297 uint8_t buf[MIN(m, COPY_BUFFER_SIZE)], *p = buf;
298 ssize_t z;
299
300 n = read(fdf, buf, sizeof buf);
301 if (n < 0)
302 return -errno;
303 if (n == 0) /* EOF */
304 break;
305
306 z = (size_t) n;
307 do {
308 ssize_t k;
309
310 k = write(fdt, p, z);
311 if (k < 0) {
312 r = -errno;
313
314 if (ret_remains) {
315 void *copy;
316
317 copy = memdup(p, z);
318 if (!copy)
319 return -ENOMEM;
320
321 *ret_remains = copy;
322 }
323
324 if (ret_remains_size)
325 *ret_remains_size = z;
326
327 return r;
328 }
329
330 assert(k <= z);
331 z -= k;
332 p += k;
333 } while (z > 0);
334 }
335
336 next:
337 if (progress) {
338 r = progress(n, userdata);
339 if (r < 0)
340 return r;
341 }
342
343 if (max_bytes != (uint64_t) -1) {
344 assert(max_bytes >= (uint64_t) n);
345 max_bytes -= n;
346 }
347
348 /* sendfile accepts at most SSIZE_MAX-offset bytes to copy,
349 * so reduce our maximum by the amount we already copied,
350 * but don't go below our copy buffer size, unless we are
351 * close the limit of bytes we are allowed to copy. */
352 m = MAX(MIN(COPY_BUFFER_SIZE, max_bytes), m - n);
353 }
354
355 return 0; /* return 0 if we hit EOF earlier than the size limit */
356 }
357
358 static int fd_copy_symlink(
359 int df,
360 const char *from,
361 const struct stat *st,
362 int dt,
363 const char *to,
364 uid_t override_uid,
365 gid_t override_gid,
366 CopyFlags copy_flags) {
367
368 _cleanup_free_ char *target = NULL;
369 int r;
370
371 assert(from);
372 assert(st);
373 assert(to);
374
375 r = readlinkat_malloc(df, from, &target);
376 if (r < 0)
377 return r;
378
379 if (copy_flags & COPY_MAC_CREATE) {
380 r = mac_selinux_create_file_prepare_at(dt, to, S_IFLNK);
381 if (r < 0)
382 return r;
383 }
384 r = symlinkat(target, dt, to);
385 if (copy_flags & COPY_MAC_CREATE)
386 mac_selinux_create_file_clear();
387 if (r < 0)
388 return -errno;
389
390 if (fchownat(dt, to,
391 uid_is_valid(override_uid) ? override_uid : st->st_uid,
392 gid_is_valid(override_gid) ? override_gid : st->st_gid,
393 AT_SYMLINK_NOFOLLOW) < 0)
394 return -errno;
395
396 return 0;
397 }
398
399 /* Encapsulates the database we store potential hardlink targets in */
400 typedef struct HardlinkContext {
401 int dir_fd; /* An fd to the directory we use as lookup table. Never AT_FDCWD. Lazily created, when
402 * we add the first entry. */
403
404 /* These two fields are used to create the hardlink repository directory above — via
405 * mkdirat(parent_fd, subdir) — and are kept so that we can automatically remove the directory again
406 * when we are done. */
407 int parent_fd; /* Possibly AT_FDCWD */
408 char *subdir;
409 } HardlinkContext;
410
411 static int hardlink_context_setup(
412 HardlinkContext *c,
413 int dt,
414 const char *to,
415 CopyFlags copy_flags) {
416
417 _cleanup_close_ int dt_copy = -1;
418 int r;
419
420 assert(c);
421 assert(c->dir_fd < 0 && c->dir_fd != AT_FDCWD);
422 assert(c->parent_fd < 0);
423 assert(!c->subdir);
424
425 /* If hardlink recreation is requested we have to maintain a database of inodes that are potential
426 * hardlink sources. Given that generally disk sizes have to be assumed to be larger than what fits
427 * into physical RAM we cannot maintain that database in dynamic memory alone. Here we opt to
428 * maintain it on disk, to simplify things: inside the destination directory we'll maintain a
429 * temporary directory consisting of hardlinks of every inode we copied that might be subject of
430 * hardlinks. We can then use that as hardlink source later on. Yes, this means additional disk IO
431 * but thankfully Linux is optimized for this kind of thing. If this ever becomes a performance
432 * bottleneck we can certainly place an in-memory hash table in front of this, but for the beginning,
433 * let's keep things simple, and just use the disk as lookup table for inodes.
434 *
435 * Note that this should have zero performance impact as long as .n_link of all files copied remains
436 * <= 0, because in that case we will not actually allocate the hardlink inode lookup table directory
437 * on disk (we do so lazily, when the first candidate with .n_link > 1 is seen). This means, in the
438 * common case where hardlinks are not used at all or only for few files the fact that we store the
439 * table on disk shouldn't matter perfomance-wise. */
440
441 if (!FLAGS_SET(copy_flags, COPY_HARDLINKS))
442 return 0;
443
444 if (dt == AT_FDCWD)
445 dt_copy = AT_FDCWD;
446 else if (dt < 0)
447 return -EBADF;
448 else {
449 dt_copy = fcntl(dt, F_DUPFD_CLOEXEC, 3);
450 if (dt_copy < 0)
451 return -errno;
452 }
453
454 r = tempfn_random_child(to, "hardlink", &c->subdir);
455 if (r < 0)
456 return r;
457
458 c->parent_fd = TAKE_FD(dt_copy);
459
460 /* We don't actually create the directory we keep the table in here, that's done on-demand when the
461 * first entry is added, using hardlink_context_realize() below. */
462 return 1;
463 }
464
465 static int hardlink_context_realize(HardlinkContext *c) {
466 int r;
467
468 if (!c)
469 return 0;
470
471 if (c->dir_fd >= 0) /* Already realized */
472 return 1;
473
474 if (c->parent_fd < 0 && c->parent_fd != AT_FDCWD) /* Not configured */
475 return 0;
476
477 assert(c->subdir);
478
479 if (mkdirat(c->parent_fd, c->subdir, 0700) < 0)
480 return -errno;
481
482 c->dir_fd = openat(c->parent_fd, c->subdir, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
483 if (c->dir_fd < 0) {
484 r = -errno;
485 (void) unlinkat(c->parent_fd, c->subdir, AT_REMOVEDIR);
486 return r;
487 }
488
489 return 1;
490 }
491
492 static void hardlink_context_destroy(HardlinkContext *c) {
493 int r;
494
495 assert(c);
496
497 /* Automatically remove the hardlink lookup table directory again after we are done. This is used via
498 * _cleanup_() so that we really delete this, even on failure. */
499
500 if (c->dir_fd >= 0) {
501 r = rm_rf_children(TAKE_FD(c->dir_fd), REMOVE_PHYSICAL, NULL); /* consumes dir_fd in all cases, even on failure */
502 if (r < 0)
503 log_debug_errno(r, "Failed to remove hardlink store (%s) contents, ignoring: %m", c->subdir);
504
505 assert(c->parent_fd >= 0 || c->parent_fd == AT_FDCWD);
506 assert(c->subdir);
507
508 if (unlinkat(c->parent_fd, c->subdir, AT_REMOVEDIR) < 0)
509 log_debug_errno(errno, "Failed to remove hardlink store (%s) directory, ignoring: %m", c->subdir);
510 }
511
512 assert_cc(AT_FDCWD < 0);
513 c->parent_fd = safe_close(c->parent_fd);
514
515 c->subdir = mfree(c->subdir);
516 }
517
518 static int try_hardlink(
519 HardlinkContext *c,
520 const struct stat *st,
521 int dt,
522 const char *to) {
523
524 char dev_ino[DECIMAL_STR_MAX(dev_t)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
525
526 assert(st);
527 assert(dt >= 0 || dt == AT_FDCWD);
528 assert(to);
529
530 if (!c) /* No temporary hardlink directory, don't bother */
531 return 0;
532
533 if (st->st_nlink <= 1) /* Source not hardlinked, don't bother */
534 return 0;
535
536 if (c->dir_fd < 0) /* not yet realized, hence empty */
537 return 0;
538
539 xsprintf(dev_ino, "%u:%u:%" PRIu64, major(st->st_dev), minor(st->st_dev), (uint64_t) st->st_ino);
540 if (linkat(c->dir_fd, dev_ino, dt, to, 0) < 0) {
541 if (errno != ENOENT) /* doesn't exist in store yet */
542 log_debug_errno(errno, "Failed to hardlink %s to %s, ignoring: %m", dev_ino, to);
543 return 0;
544 }
545
546 return 1;
547 }
548
549 static int memorize_hardlink(
550 HardlinkContext *c,
551 const struct stat *st,
552 int dt,
553 const char *to) {
554
555 char dev_ino[DECIMAL_STR_MAX(dev_t)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
556 int r;
557
558 assert(st);
559 assert(dt >= 0 || dt == AT_FDCWD);
560 assert(to);
561
562 if (!c) /* No temporary hardlink directory, don't bother */
563 return 0;
564
565 if (st->st_nlink <= 1) /* Source not hardlinked, don't bother */
566 return 0;
567
568 r = hardlink_context_realize(c); /* Create the hardlink store lazily */
569 if (r < 0)
570 return r;
571
572 xsprintf(dev_ino, "%u:%u:%" PRIu64, major(st->st_dev), minor(st->st_dev), (uint64_t) st->st_ino);
573 if (linkat(dt, to, c->dir_fd, dev_ino, 0) < 0) {
574 log_debug_errno(errno, "Failed to hardlink %s to %s, ignoring: %m", to, dev_ino);
575 return 0;
576 }
577
578 return 1;
579 }
580
581 static int fd_copy_regular(
582 int df,
583 const char *from,
584 const struct stat *st,
585 int dt,
586 const char *to,
587 uid_t override_uid,
588 gid_t override_gid,
589 CopyFlags copy_flags,
590 HardlinkContext *hardlink_context,
591 copy_progress_bytes_t progress,
592 void *userdata) {
593
594 _cleanup_close_ int fdf = -1, fdt = -1;
595 struct timespec ts[2];
596 int r, q;
597
598 assert(from);
599 assert(st);
600 assert(to);
601
602 r = try_hardlink(hardlink_context, st, dt, to);
603 if (r < 0)
604 return r;
605 if (r > 0) /* worked! */
606 return 0;
607
608 fdf = openat(df, from, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
609 if (fdf < 0)
610 return -errno;
611
612 if (copy_flags & COPY_MAC_CREATE) {
613 r = mac_selinux_create_file_prepare_at(dt, to, S_IFREG);
614 if (r < 0)
615 return r;
616 }
617 fdt = openat(dt, to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, st->st_mode & 07777);
618 if (copy_flags & COPY_MAC_CREATE)
619 mac_selinux_create_file_clear();
620 if (fdt < 0)
621 return -errno;
622
623 r = copy_bytes_full(fdf, fdt, (uint64_t) -1, copy_flags, NULL, NULL, progress, userdata);
624 if (r < 0) {
625 (void) unlinkat(dt, to, 0);
626 return r;
627 }
628
629 if (fchown(fdt,
630 uid_is_valid(override_uid) ? override_uid : st->st_uid,
631 gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
632 r = -errno;
633
634 if (fchmod(fdt, st->st_mode & 07777) < 0)
635 r = -errno;
636
637 ts[0] = st->st_atim;
638 ts[1] = st->st_mtim;
639 (void) futimens(fdt, ts);
640 (void) copy_xattr(fdf, fdt);
641
642 q = close(fdt);
643 fdt = -1;
644
645 if (q < 0) {
646 r = -errno;
647 (void) unlinkat(dt, to, 0);
648 }
649
650 (void) memorize_hardlink(hardlink_context, st, dt, to);
651 return r;
652 }
653
654 static int fd_copy_fifo(
655 int df,
656 const char *from,
657 const struct stat *st,
658 int dt,
659 const char *to,
660 uid_t override_uid,
661 gid_t override_gid,
662 CopyFlags copy_flags,
663 HardlinkContext *hardlink_context) {
664 int r;
665
666 assert(from);
667 assert(st);
668 assert(to);
669
670 r = try_hardlink(hardlink_context, st, dt, to);
671 if (r < 0)
672 return r;
673 if (r > 0) /* worked! */
674 return 0;
675
676 if (copy_flags & COPY_MAC_CREATE) {
677 r = mac_selinux_create_file_prepare_at(dt, to, S_IFIFO);
678 if (r < 0)
679 return r;
680 }
681 r = mkfifoat(dt, to, st->st_mode & 07777);
682 if (copy_flags & COPY_MAC_CREATE)
683 mac_selinux_create_file_clear();
684 if (r < 0)
685 return -errno;
686
687 if (fchownat(dt, to,
688 uid_is_valid(override_uid) ? override_uid : st->st_uid,
689 gid_is_valid(override_gid) ? override_gid : st->st_gid,
690 AT_SYMLINK_NOFOLLOW) < 0)
691 r = -errno;
692
693 if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
694 r = -errno;
695
696 (void) memorize_hardlink(hardlink_context, st, dt, to);
697 return r;
698 }
699
700 static int fd_copy_node(
701 int df,
702 const char *from,
703 const struct stat *st,
704 int dt,
705 const char *to,
706 uid_t override_uid,
707 gid_t override_gid,
708 CopyFlags copy_flags,
709 HardlinkContext *hardlink_context) {
710 int r;
711
712 assert(from);
713 assert(st);
714 assert(to);
715
716 r = try_hardlink(hardlink_context, st, dt, to);
717 if (r < 0)
718 return r;
719 if (r > 0) /* worked! */
720 return 0;
721
722 if (copy_flags & COPY_MAC_CREATE) {
723 r = mac_selinux_create_file_prepare_at(dt, to, st->st_mode & S_IFMT);
724 if (r < 0)
725 return r;
726 }
727 r = mknodat(dt, to, st->st_mode, st->st_rdev);
728 if (copy_flags & COPY_MAC_CREATE)
729 mac_selinux_create_file_clear();
730 if (r < 0)
731 return -errno;
732
733 if (fchownat(dt, to,
734 uid_is_valid(override_uid) ? override_uid : st->st_uid,
735 gid_is_valid(override_gid) ? override_gid : st->st_gid,
736 AT_SYMLINK_NOFOLLOW) < 0)
737 r = -errno;
738
739 if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
740 r = -errno;
741
742 (void) memorize_hardlink(hardlink_context, st, dt, to);
743 return r;
744 }
745
746 static int fd_copy_directory(
747 int df,
748 const char *from,
749 const struct stat *st,
750 int dt,
751 const char *to,
752 dev_t original_device,
753 unsigned depth_left,
754 uid_t override_uid,
755 gid_t override_gid,
756 CopyFlags copy_flags,
757 HardlinkContext *hardlink_context,
758 const char *display_path,
759 copy_progress_path_t progress_path,
760 copy_progress_bytes_t progress_bytes,
761 void *userdata) {
762
763 _cleanup_(hardlink_context_destroy) HardlinkContext our_hardlink_context = {
764 .dir_fd = -1,
765 .parent_fd = -1,
766 };
767
768 _cleanup_close_ int fdf = -1, fdt = -1;
769 _cleanup_closedir_ DIR *d = NULL;
770 struct dirent *de;
771 bool exists, created;
772 int r;
773
774 assert(st);
775 assert(to);
776
777 if (depth_left == 0)
778 return -ENAMETOOLONG;
779
780 if (from)
781 fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
782 else
783 fdf = fcntl(df, F_DUPFD_CLOEXEC, 3);
784 if (fdf < 0)
785 return -errno;
786
787 if (!hardlink_context) {
788 /* If recreating hardlinks is requested let's set up a context for that now. */
789 r = hardlink_context_setup(&our_hardlink_context, dt, to, copy_flags);
790 if (r < 0)
791 return r;
792 if (r > 0) /* It's enabled and allocated, let's now use the same context for all recursive
793 * invocations from here down */
794 hardlink_context = &our_hardlink_context;
795 }
796
797 d = take_fdopendir(&fdf);
798 if (!d)
799 return -errno;
800
801 exists = false;
802 if (copy_flags & COPY_MERGE_EMPTY) {
803 r = dir_is_empty_at(dt, to);
804 if (r < 0 && r != -ENOENT)
805 return r;
806 else if (r == 1)
807 exists = true;
808 }
809
810 if (exists)
811 created = false;
812 else {
813 if (copy_flags & COPY_MAC_CREATE)
814 r = mkdirat_label(dt, to, st->st_mode & 07777);
815 else
816 r = mkdirat(dt, to, st->st_mode & 07777);
817 if (r >= 0)
818 created = true;
819 else if (errno == EEXIST && (copy_flags & COPY_MERGE))
820 created = false;
821 else
822 return -errno;
823 }
824
825 fdt = openat(dt, to, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
826 if (fdt < 0)
827 return -errno;
828
829 r = 0;
830
831 FOREACH_DIRENT_ALL(de, d, return -errno) {
832 const char *child_display_path = NULL;
833 _cleanup_free_ char *dp = NULL;
834 struct stat buf;
835 int q;
836
837 if (dot_or_dot_dot(de->d_name))
838 continue;
839
840 if (FLAGS_SET(copy_flags, COPY_SIGINT)) {
841 r = sigint_pending();
842 if (r < 0)
843 return r;
844 if (r > 0)
845 return -EINTR;
846 }
847
848 if (fstatat(dirfd(d), de->d_name, &buf, AT_SYMLINK_NOFOLLOW) < 0) {
849 r = -errno;
850 continue;
851 }
852
853 if (progress_path) {
854 if (display_path)
855 child_display_path = dp = path_join(display_path, de->d_name);
856 else
857 child_display_path = de->d_name;
858
859 r = progress_path(child_display_path, &buf, userdata);
860 if (r < 0)
861 return r;
862 }
863
864 if (S_ISDIR(buf.st_mode)) {
865 /*
866 * Don't descend into directories on other file systems, if this is requested. We do a simple
867 * .st_dev check here, which basically comes for free. Note that we do this check only on
868 * directories, not other kind of file system objects, for two reason:
869 *
870 * • The kernel's overlayfs pseudo file system that overlays multiple real file systems
871 * propagates the .st_dev field of the file system a file originates from all the way up
872 * through the stack to stat(). It doesn't do that for directories however. This means that
873 * comparing .st_dev on non-directories suggests that they all are mount points. To avoid
874 * confusion we hence avoid relying on this check for regular files.
875 *
876 * • The main reason we do this check at all is to protect ourselves from bind mount cycles,
877 * where we really want to avoid descending down in all eternity. However the .st_dev check
878 * is usually not sufficient for this protection anyway, as bind mount cycles from the same
879 * file system onto itself can't be detected that way. (Note we also do a recursion depth
880 * check, which is probably the better protection in this regard, which is why
881 * COPY_SAME_MOUNT is optional).
882 */
883
884 if (FLAGS_SET(copy_flags, COPY_SAME_MOUNT)) {
885 if (buf.st_dev != original_device)
886 continue;
887
888 r = fd_is_mount_point(dirfd(d), de->d_name, 0);
889 if (r < 0)
890 return r;
891 if (r > 0)
892 continue;
893 }
894
895 q = fd_copy_directory(dirfd(d), de->d_name, &buf, fdt, de->d_name, original_device, depth_left-1, override_uid, override_gid, copy_flags, hardlink_context, child_display_path, progress_path, progress_bytes, userdata);
896 } else if (S_ISREG(buf.st_mode))
897 q = fd_copy_regular(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, hardlink_context, progress_bytes, userdata);
898 else if (S_ISLNK(buf.st_mode))
899 q = fd_copy_symlink(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
900 else if (S_ISFIFO(buf.st_mode))
901 q = fd_copy_fifo(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, hardlink_context);
902 else if (S_ISBLK(buf.st_mode) || S_ISCHR(buf.st_mode) || S_ISSOCK(buf.st_mode))
903 q = fd_copy_node(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, hardlink_context);
904 else
905 q = -EOPNOTSUPP;
906
907 if (q == -EINTR) /* Propagate SIGINT up instantly */
908 return q;
909 if (q == -EEXIST && (copy_flags & COPY_MERGE))
910 q = 0;
911 if (q < 0)
912 r = q;
913 }
914
915 if (created) {
916 struct timespec ut[2] = {
917 st->st_atim,
918 st->st_mtim
919 };
920
921 if (fchown(fdt,
922 uid_is_valid(override_uid) ? override_uid : st->st_uid,
923 gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
924 r = -errno;
925
926 if (fchmod(fdt, st->st_mode & 07777) < 0)
927 r = -errno;
928
929 (void) copy_xattr(dirfd(d), fdt);
930 (void) futimens(fdt, ut);
931 }
932
933 return r;
934 }
935
936 int copy_tree_at_full(
937 int fdf,
938 const char *from,
939 int fdt,
940 const char *to,
941 uid_t override_uid,
942 gid_t override_gid,
943 CopyFlags copy_flags,
944 copy_progress_path_t progress_path,
945 copy_progress_bytes_t progress_bytes,
946 void *userdata) {
947
948 struct stat st;
949
950 assert(from);
951 assert(to);
952
953 if (fstatat(fdf, from, &st, AT_SYMLINK_NOFOLLOW) < 0)
954 return -errno;
955
956 if (S_ISREG(st.st_mode))
957 return fd_copy_regular(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, NULL, progress_bytes, userdata);
958 else if (S_ISDIR(st.st_mode))
959 return fd_copy_directory(fdf, from, &st, fdt, to, st.st_dev, COPY_DEPTH_MAX, override_uid, override_gid, copy_flags, NULL, NULL, progress_path, progress_bytes, userdata);
960 else if (S_ISLNK(st.st_mode))
961 return fd_copy_symlink(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
962 else if (S_ISFIFO(st.st_mode))
963 return fd_copy_fifo(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, NULL);
964 else if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode) || S_ISSOCK(st.st_mode))
965 return fd_copy_node(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, NULL);
966 else
967 return -EOPNOTSUPP;
968 }
969
970 int copy_directory_fd_full(
971 int dirfd,
972 const char *to,
973 CopyFlags copy_flags,
974 copy_progress_path_t progress_path,
975 copy_progress_bytes_t progress_bytes,
976 void *userdata) {
977
978 struct stat st;
979
980 assert(dirfd >= 0);
981 assert(to);
982
983 if (fstat(dirfd, &st) < 0)
984 return -errno;
985
986 if (!S_ISDIR(st.st_mode))
987 return -ENOTDIR;
988
989 return fd_copy_directory(dirfd, NULL, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags, NULL, NULL, progress_path, progress_bytes, userdata);
990 }
991
992 int copy_directory_full(
993 const char *from,
994 const char *to,
995 CopyFlags copy_flags,
996 copy_progress_path_t progress_path,
997 copy_progress_bytes_t progress_bytes,
998 void *userdata) {
999
1000 struct stat st;
1001
1002 assert(from);
1003 assert(to);
1004
1005 if (lstat(from, &st) < 0)
1006 return -errno;
1007
1008 if (!S_ISDIR(st.st_mode))
1009 return -ENOTDIR;
1010
1011 return fd_copy_directory(AT_FDCWD, from, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags, NULL, NULL, progress_path, progress_bytes, userdata);
1012 }
1013
1014 int copy_file_fd_full(
1015 const char *from,
1016 int fdt,
1017 CopyFlags copy_flags,
1018 copy_progress_bytes_t progress_bytes,
1019 void *userdata) {
1020
1021 _cleanup_close_ int fdf = -1;
1022 int r;
1023
1024 assert(from);
1025 assert(fdt >= 0);
1026
1027 fdf = open(from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1028 if (fdf < 0)
1029 return -errno;
1030
1031 r = copy_bytes_full(fdf, fdt, (uint64_t) -1, copy_flags, NULL, NULL, progress_bytes, userdata);
1032
1033 (void) copy_times(fdf, fdt, copy_flags);
1034 (void) copy_xattr(fdf, fdt);
1035
1036 return r;
1037 }
1038
1039 int copy_file_full(
1040 const char *from,
1041 const char *to,
1042 int flags,
1043 mode_t mode,
1044 unsigned chattr_flags,
1045 unsigned chattr_mask,
1046 CopyFlags copy_flags,
1047 copy_progress_bytes_t progress_bytes,
1048 void *userdata) {
1049
1050 int fdt = -1, r;
1051
1052 assert(from);
1053 assert(to);
1054
1055 RUN_WITH_UMASK(0000) {
1056 if (copy_flags & COPY_MAC_CREATE) {
1057 r = mac_selinux_create_file_prepare(to, S_IFREG);
1058 if (r < 0)
1059 return r;
1060 }
1061 fdt = open(to, flags|O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY, mode);
1062 if (copy_flags & COPY_MAC_CREATE)
1063 mac_selinux_create_file_clear();
1064 if (fdt < 0)
1065 return -errno;
1066 }
1067
1068 if (chattr_mask != 0)
1069 (void) chattr_fd(fdt, chattr_flags, chattr_mask & CHATTR_EARLY_FL, NULL);
1070
1071 r = copy_file_fd_full(from, fdt, copy_flags, progress_bytes, userdata);
1072 if (r < 0) {
1073 close(fdt);
1074 (void) unlink(to);
1075 return r;
1076 }
1077
1078 if (chattr_mask != 0)
1079 (void) chattr_fd(fdt, chattr_flags, chattr_mask & ~CHATTR_EARLY_FL, NULL);
1080
1081 if (close(fdt) < 0) {
1082 unlink_noerrno(to);
1083 return -errno;
1084 }
1085
1086 return 0;
1087 }
1088
1089 int copy_file_atomic_full(
1090 const char *from,
1091 const char *to,
1092 mode_t mode,
1093 unsigned chattr_flags,
1094 unsigned chattr_mask,
1095 CopyFlags copy_flags,
1096 copy_progress_bytes_t progress_bytes,
1097 void *userdata) {
1098
1099 _cleanup_(unlink_and_freep) char *t = NULL;
1100 _cleanup_close_ int fdt = -1;
1101 int r;
1102
1103 assert(from);
1104 assert(to);
1105
1106 /* We try to use O_TMPFILE here to create the file if we can. Note that that only works if COPY_REPLACE is not
1107 * set though as we need to use linkat() for linking the O_TMPFILE file into the file system but that system
1108 * call can't replace existing files. Hence, if COPY_REPLACE is set we create a temporary name in the file
1109 * system right-away and unconditionally which we then can renameat() to the right name after we completed
1110 * writing it. */
1111
1112 if (copy_flags & COPY_REPLACE) {
1113 r = tempfn_random(to, NULL, &t);
1114 if (r < 0)
1115 return r;
1116
1117 if (copy_flags & COPY_MAC_CREATE) {
1118 r = mac_selinux_create_file_prepare(to, S_IFREG);
1119 if (r < 0) {
1120 t = mfree(t);
1121 return r;
1122 }
1123 }
1124 fdt = open(t, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|O_WRONLY|O_CLOEXEC, 0600);
1125 if (copy_flags & COPY_MAC_CREATE)
1126 mac_selinux_create_file_clear();
1127 if (fdt < 0) {
1128 t = mfree(t);
1129 return -errno;
1130 }
1131 } else {
1132 if (copy_flags & COPY_MAC_CREATE) {
1133 r = mac_selinux_create_file_prepare(to, S_IFREG);
1134 if (r < 0)
1135 return r;
1136 }
1137 fdt = open_tmpfile_linkable(to, O_WRONLY|O_CLOEXEC, &t);
1138 if (copy_flags & COPY_MAC_CREATE)
1139 mac_selinux_create_file_clear();
1140 if (fdt < 0)
1141 return fdt;
1142 }
1143
1144 if (chattr_mask != 0)
1145 (void) chattr_fd(fdt, chattr_flags, chattr_mask & CHATTR_EARLY_FL, NULL);
1146
1147 r = copy_file_fd_full(from, fdt, copy_flags, progress_bytes, userdata);
1148 if (r < 0)
1149 return r;
1150
1151 if (fchmod(fdt, mode) < 0)
1152 return -errno;
1153
1154 if (copy_flags & COPY_REPLACE) {
1155 if (renameat(AT_FDCWD, t, AT_FDCWD, to) < 0)
1156 return -errno;
1157 } else {
1158 r = link_tmpfile(fdt, t, to);
1159 if (r < 0)
1160 return r;
1161 }
1162
1163 if (chattr_mask != 0)
1164 (void) chattr_fd(fdt, chattr_flags, chattr_mask & ~CHATTR_EARLY_FL, NULL);
1165
1166 t = mfree(t);
1167 return 0;
1168 }
1169
1170 int copy_times(int fdf, int fdt, CopyFlags flags) {
1171 struct timespec ut[2];
1172 struct stat st;
1173
1174 assert(fdf >= 0);
1175 assert(fdt >= 0);
1176
1177 if (fstat(fdf, &st) < 0)
1178 return -errno;
1179
1180 ut[0] = st.st_atim;
1181 ut[1] = st.st_mtim;
1182
1183 if (futimens(fdt, ut) < 0)
1184 return -errno;
1185
1186 if (FLAGS_SET(flags, COPY_CRTIME)) {
1187 usec_t crtime;
1188
1189 if (fd_getcrtime(fdf, &crtime) >= 0)
1190 (void) fd_setcrtime(fdt, crtime);
1191 }
1192
1193 return 0;
1194 }
1195
1196 int copy_access(int fdf, int fdt) {
1197 struct stat st;
1198
1199 assert(fdf >= 0);
1200 assert(fdt >= 0);
1201
1202 if (fstat(fdf, &st) < 0)
1203 return -errno;
1204
1205 if (fchmod(fdt, st.st_mode & 07777) < 0)
1206 return -errno;
1207
1208 return 0;
1209 }
1210
1211 int copy_xattr(int fdf, int fdt) {
1212 _cleanup_free_ char *names = NULL;
1213 int ret = 0, r;
1214 const char *p;
1215
1216 r = flistxattr_malloc(fdf, &names);
1217 if (r < 0)
1218 return r;
1219
1220 NULSTR_FOREACH(p, names) {
1221 _cleanup_free_ char *value = NULL;
1222
1223 if (!startswith(p, "user."))
1224 continue;
1225
1226 r = fgetxattr_malloc(fdf, p, &value);
1227 if (r == -ENODATA)
1228 continue; /* gone by now */
1229 if (r < 0)
1230 return r;
1231
1232 if (fsetxattr(fdt, p, value, r, 0) < 0)
1233 ret = -errno;
1234 }
1235
1236 return ret;
1237 }