]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/copy.c
Merge pull request #27882 from DaanDeMeyer/repart-truncate
[thirdparty/systemd.git] / src / shared / copy.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
849958d1 2
11c3a366
TA
3#include <errno.h>
4#include <fcntl.h>
b640e274 5#include <linux/btrfs.h>
11c3a366
TA
6#include <stddef.h>
7#include <stdio.h>
8#include <stdlib.h>
b640e274 9#include <sys/ioctl.h>
cda134ab 10#include <sys/sendfile.h>
e6bd041c 11#include <sys/xattr.h>
11c3a366 12#include <unistd.h>
cda134ab 13
b5efdb8a 14#include "alloc-util.h"
d7c7c334 15#include "btrfs-util.h"
c8b3094d 16#include "chattr-util.h"
3ffd4af2 17#include "copy.h"
a0956174 18#include "dirent-util.h"
3ffd4af2 19#include "fd-util.h"
9f81a592 20#include "fileio.h"
f4f15635 21#include "fs-util.h"
c004493c 22#include "io-util.h"
11c3a366 23#include "macro.h"
b640e274 24#include "missing_fs.h"
f5947a5e 25#include "missing_syscall.h"
35cd0ba5 26#include "mkdir-label.h"
049af8ad 27#include "mountpoint-util.h"
f9bbb4dc 28#include "nulstr-util.h"
dd480f78 29#include "rm-rf.h"
aeec5efa 30#include "selinux-util.h"
215b64b2 31#include "signal-util.h"
609d3473 32#include "stat-util.h"
dd480f78 33#include "stdio-util.h"
07630cea 34#include "string-util.h"
8420fa3a 35#include "strv.h"
bf819d3a 36#include "sync-util.h"
93cc7779 37#include "time-util.h"
e4de7287 38#include "tmpfile-util.h"
affb60b1 39#include "umask-util.h"
d01cd401 40#include "user-util.h"
89a5a90c 41#include "xattr-util.h"
849958d1 42
575a07d2
LP
43#define COPY_BUFFER_SIZE (16U*1024U)
44
45/* A safety net for descending recursively into file system trees to copy. On Linux PATH_MAX is 4096, which means the
46 * deepest valid path one can build is around 2048, which we hence use as a safety net here, to not spin endlessly in
47 * case of bind mount cycles and suchlike. */
48#define COPY_DEPTH_MAX 2048U
f2cbe59e 49
75036dce
LP
50static ssize_t try_copy_file_range(
51 int fd_in, loff_t *off_in,
52 int fd_out, loff_t *off_out,
53 size_t len,
14cb109d 54 unsigned flags) {
75036dce 55
a44202e9
ZJS
56 static int have = -1;
57 ssize_t r;
58
75036dce 59 if (have == 0)
a44202e9
ZJS
60 return -ENOSYS;
61
62 r = copy_file_range(fd_in, off_in, fd_out, off_out, len, flags);
75036dce 63 if (have < 0)
a44202e9 64 have = r >= 0 || errno != ENOSYS;
75036dce 65 if (r < 0)
a44202e9 66 return -errno;
75036dce
LP
67
68 return r;
a44202e9
ZJS
69}
70
e0c5c7d8
LP
71enum {
72 FD_IS_NO_PIPE,
73 FD_IS_BLOCKING_PIPE,
74 FD_IS_NONBLOCKING_PIPE,
75};
76
77static int fd_is_nonblock_pipe(int fd) {
78 struct stat st;
79 int flags;
80
4436e5a7 81 /* Checks whether the specified file descriptor refers to a pipe, and if so if O_NONBLOCK is set. */
e0c5c7d8
LP
82
83 if (fstat(fd, &st) < 0)
84 return -errno;
85
86 if (!S_ISFIFO(st.st_mode))
87 return FD_IS_NO_PIPE;
88
89 flags = fcntl(fd, F_GETFL);
90 if (flags < 0)
91 return -errno;
92
d94a24ca 93 return FLAGS_SET(flags, O_NONBLOCK) ? FD_IS_NONBLOCKING_PIPE : FD_IS_BLOCKING_PIPE;
e0c5c7d8
LP
94}
95
1ac404ca
LP
96static int look_for_signals(CopyFlags copy_flags) {
97 int r;
98
99 if ((copy_flags & (COPY_SIGINT|COPY_SIGTERM)) == 0)
100 return 0;
101
102 r = pop_pending_signal(copy_flags & COPY_SIGINT ? SIGINT : 0,
103 copy_flags & COPY_SIGTERM ? SIGTERM : 0);
104 if (r < 0)
105 return r;
106 if (r != 0)
107 return log_debug_errno(SYNTHETIC_ERRNO(EINTR),
108 "Got %s, cancelling copy operation.", signal_to_string(r));
109
110 return 0;
111}
112
f82f0b99
DDM
113static int create_hole(int fd, off_t size) {
114 off_t offset;
115 off_t end;
116
117 offset = lseek(fd, 0, SEEK_CUR);
118 if (offset < 0)
119 return -errno;
120
121 end = lseek(fd, 0, SEEK_END);
122 if (end < 0)
123 return -errno;
124
5b2d0f9e 125 /* If we're not at the end of the target file, try to punch a hole in the existing space using fallocate(). */
f82f0b99 126
5b2d0f9e
DDM
127 if (offset < end &&
128 fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, MIN(size, end - offset)) < 0 &&
129 !ERRNO_IS_NOT_SUPPORTED(errno))
f82f0b99
DDM
130 return -errno;
131
132 if (end - offset >= size) {
133 /* If we've created the full hole, set the file pointer to the end of the hole we created and exit. */
134 if (lseek(fd, offset + size, SEEK_SET) < 0)
135 return -errno;
136
137 return 0;
138 }
139
140 /* If we haven't created the full hole, use ftruncate() to grow the file (and the hole) to the
141 * required size and move the file pointer to the end of the file. */
142
143 size -= end - offset;
144
145 if (ftruncate(fd, end + size) < 0)
146 return -errno;
147
148 if (lseek(fd, 0, SEEK_END) < 0)
149 return -errno;
150
151 return 0;
152}
153
7a23c7fd
LP
154int copy_bytes_full(
155 int fdf, int fdt,
156 uint64_t max_bytes,
157 CopyFlags copy_flags,
158 void **ret_remains,
b3cade0c
LP
159 size_t *ret_remains_size,
160 copy_progress_bytes_t progress,
161 void *userdata) {
7a23c7fd 162
e63d0703 163 _cleanup_close_ int fdf_opened = -EBADF, fdt_opened = -EBADF;
ee1aa61c 164 bool try_cfr = true, try_sendfile = true, try_splice = true, copied_something = false;
e0c5c7d8 165 int r, nonblock_pipe = -1;
7c2da2ca 166 size_t m = SSIZE_MAX; /* that is the maximum that sendfile and c_f_r accept */
cda134ab 167
849958d1
LP
168 assert(fdf >= 0);
169 assert(fdt >= 0);
170
78ba8cf7
LP
171 /* Tries to copy bytes from the file descriptor 'fdf' to 'fdt' in the smartest possible way. Copies a maximum
172 * of 'max_bytes', which may be specified as UINT64_MAX, in which no maximum is applied. Returns negative on
7a23c7fd
LP
173 * error, zero if EOF is hit before the bytes limit is hit and positive otherwise. If the copy fails for some
174 * reason but we read but didn't yet write some data an ret_remains/ret_remains_size is not NULL, then it will
175 * be initialized with an allocated buffer containing this "remaining" data. Note that these two parameters are
176 * initialized with a valid buffer only on failure and only if there's actually data already read. Otherwise
177 * these parameters if non-NULL are set to NULL. */
178
179 if (ret_remains)
180 *ret_remains = NULL;
181 if (ret_remains_size)
182 *ret_remains_size = 0;
78ba8cf7 183
e63d0703
YW
184 fdf = fd_reopen_condition(fdf, O_CLOEXEC | O_NOCTTY | O_RDONLY, O_PATH, &fdf_opened);
185 if (fdf < 0)
186 return fdf;
187 fdt = fd_reopen_condition(fdt, O_CLOEXEC | O_NOCTTY | O_RDWR, O_PATH, &fdt_opened);
188 if (fdt < 0)
189 return fdt;
190
5de6e116
LP
191 /* Try btrfs reflinks first. This only works on regular, seekable files, hence let's check the file offsets of
192 * source and destination first. */
193 if ((copy_flags & COPY_REFLINK)) {
194 off_t foffset;
195
196 foffset = lseek(fdf, 0, SEEK_CUR);
197 if (foffset >= 0) {
198 off_t toffset;
199
200 toffset = lseek(fdt, 0, SEEK_CUR);
201 if (toffset >= 0) {
202
203 if (foffset == 0 && toffset == 0 && max_bytes == UINT64_MAX)
b640e274 204 r = reflink(fdf, fdt); /* full file reflink */
5de6e116 205 else
71e84b4b 206 r = reflink_range(fdf, foffset, fdt, toffset, max_bytes == UINT64_MAX ? 0 : max_bytes); /* partial reflink */
5de6e116
LP
207 if (r >= 0) {
208 off_t t;
209
210 /* This worked, yay! Now — to be fully correct — let's adjust the file pointers */
211 if (max_bytes == UINT64_MAX) {
212
213 /* We cloned to the end of the source file, let's position the read
214 * pointer there, and query it at the same time. */
215 t = lseek(fdf, 0, SEEK_END);
216 if (t < 0)
217 return -errno;
218 if (t < foffset)
219 return -ESPIPE;
220
221 /* Let's adjust the destination file write pointer by the same number
222 * of bytes. */
223 t = lseek(fdt, toffset + (t - foffset), SEEK_SET);
224 if (t < 0)
225 return -errno;
226
227 return 0; /* we copied the whole thing, hence hit EOF, return 0 */
228 } else {
229 t = lseek(fdf, foffset + max_bytes, SEEK_SET);
230 if (t < 0)
231 return -errno;
232
233 t = lseek(fdt, toffset + max_bytes, SEEK_SET);
234 if (t < 0)
235 return -errno;
236
237 return 1; /* we copied only some number of bytes, which worked, but this means we didn't hit EOF, return 1 */
238 }
239 }
5de6e116
LP
240 }
241 }
0254b455
LP
242 }
243
849958d1 244 for (;;) {
cda134ab 245 ssize_t n;
93240d3a 246
dd641ad1 247 if (max_bytes <= 0)
cc45eb59 248 break;
93240d3a 249
1ac404ca
LP
250 r = look_for_signals(copy_flags);
251 if (r < 0)
252 return r;
85559592 253
dd641ad1
LP
254 if (max_bytes != UINT64_MAX && m > max_bytes)
255 m = max_bytes;
93240d3a 256
8646b5d6
DDM
257 if (copy_flags & COPY_HOLES) {
258 off_t c, e;
259
260 c = lseek(fdf, 0, SEEK_CUR);
261 if (c < 0)
262 return -errno;
263
264 /* To see if we're in a hole, we search for the next data offset. */
265 e = lseek(fdf, c, SEEK_DATA);
12727c2b 266 if (e < 0 && errno == ENXIO)
8646b5d6
DDM
267 /* If errno == ENXIO, that means we've reached the final hole of the file and
268 * that hole isn't followed by more data. */
269 e = lseek(fdf, 0, SEEK_END);
12727c2b 270 if (e < 0)
8646b5d6
DDM
271 return -errno;
272
12727c2b
DDM
273 /* If we're in a hole (current offset is not a data offset), create a hole of the
274 * same size in the target file. */
f82f0b99
DDM
275 if (e > c) {
276 r = create_hole(fdt, e - c);
277 if (r < 0)
278 return r;
279 }
8646b5d6
DDM
280
281 c = e; /* Set c to the start of the data segment. */
282
12727c2b
DDM
283 /* After copying a potential hole, find the end of the data segment by looking for
284 * the next hole. If we get ENXIO, we're at EOF. */
8646b5d6 285 e = lseek(fdf, c, SEEK_HOLE);
12727c2b
DDM
286 if (e < 0) {
287 if (errno == ENXIO)
288 break;
8646b5d6 289 return -errno;
12727c2b 290 }
8646b5d6
DDM
291
292 /* SEEK_HOLE modifies the file offset so we need to move back to the initial offset. */
293 if (lseek(fdf, c, SEEK_SET) < 0)
294 return -errno;
295
296 /* Make sure we're not copying more than the current data segment. */
297 m = MIN(m, (size_t) e - c);
298 }
299
a44202e9
ZJS
300 /* First try copy_file_range(), unless we already tried */
301 if (try_cfr) {
302 n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u);
303 if (n < 0) {
6402d5c6 304 if (!IN_SET(n, -EINVAL, -ENOSYS, -EXDEV, -EBADF))
a44202e9
ZJS
305 return n;
306
307 try_cfr = false;
308 /* use fallback below */
ee1aa61c
LP
309 } else if (n == 0) { /* likely EOF */
310
311 if (copied_something)
312 break;
313
314 /* So, we hit EOF immediately, without having copied a single byte. This
315 * could indicate two things: the file is actually empty, or we are on some
316 * virtual file system such as procfs/sysfs where the syscall actually
317 * doesn't work but doesn't return an error. Try to handle that, by falling
318 * back to simple read()s in case we encounter empty files.
319 *
320 * See: https://lwn.net/Articles/846403/ */
321 try_cfr = try_sendfile = try_splice = false;
322 } else
a44202e9
ZJS
323 /* Success! */
324 goto next;
325 }
326
cda134ab
LP
327 /* First try sendfile(), unless we already tried */
328 if (try_sendfile) {
cda134ab
LP
329 n = sendfile(fdt, fdf, NULL, m);
330 if (n < 0) {
00a8cf77 331 if (!IN_SET(errno, EINVAL, ENOSYS))
cda134ab
LP
332 return -errno;
333
334 try_sendfile = false;
335 /* use fallback below */
ee1aa61c
LP
336 } else if (n == 0) { /* likely EOF */
337
338 if (copied_something)
339 break;
340
341 try_sendfile = try_splice = false; /* same logic as above for copy_file_range() */
ee1aa61c 342 } else
81d20007
LP
343 /* Success! */
344 goto next;
345 }
346
e0c5c7d8
LP
347 /* Then try splice, unless we already tried. */
348 if (try_splice) {
349
350 /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file
351 * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the
ee1aa61c
LP
352 * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour
353 * here, and check if either of the specified fds are a pipe, and if so, let's pass
354 * the flag automatically, depending on O_NONBLOCK being set.
e0c5c7d8 355 *
ee1aa61c
LP
356 * Here's a twist though: when we use it to move data between two pipes of which one
357 * has O_NONBLOCK set and the other has not, then we have no individual control over
358 * O_NONBLOCK behaviour. Hence in that case we can't use splice() and still guarantee
359 * systematic O_NONBLOCK behaviour, hence don't. */
e0c5c7d8
LP
360
361 if (nonblock_pipe < 0) {
362 int a, b;
363
364 /* Check if either of these fds is a pipe, and if so non-blocking or not */
365 a = fd_is_nonblock_pipe(fdf);
366 if (a < 0)
367 return a;
368
369 b = fd_is_nonblock_pipe(fdt);
370 if (b < 0)
371 return b;
372
373 if ((a == FD_IS_NO_PIPE && b == FD_IS_NO_PIPE) ||
374 (a == FD_IS_BLOCKING_PIPE && b == FD_IS_NONBLOCKING_PIPE) ||
375 (a == FD_IS_NONBLOCKING_PIPE && b == FD_IS_BLOCKING_PIPE))
376
ee1aa61c
LP
377 /* splice() only works if one of the fds is a pipe. If neither is,
378 * let's skip this step right-away. As mentioned above, if one of the
379 * two fds refers to a blocking pipe and the other to a non-blocking
380 * pipe, we can't use splice() either, hence don't try either. This
381 * hence means we can only use splice() if either only one of the two
382 * fds is a pipe, or if both are pipes with the same nonblocking flag
383 * setting. */
e0c5c7d8
LP
384
385 try_splice = false;
386 else
387 nonblock_pipe = a == FD_IS_NONBLOCKING_PIPE || b == FD_IS_NONBLOCKING_PIPE;
388 }
389 }
390
81d20007 391 if (try_splice) {
e0c5c7d8 392 n = splice(fdf, NULL, fdt, NULL, m, nonblock_pipe ? SPLICE_F_NONBLOCK : 0);
81d20007 393 if (n < 0) {
00a8cf77 394 if (!IN_SET(errno, EINVAL, ENOSYS))
81d20007
LP
395 return -errno;
396
397 try_splice = false;
398 /* use fallback below */
ee1aa61c
LP
399 } else if (n == 0) { /* likely EOF */
400
401 if (copied_something)
402 break;
403
404 try_splice = false; /* same logic as above for copy_file_range() + sendfile() */
405 } else
81d20007 406 /* Success! */
cda134ab
LP
407 goto next;
408 }
409
410 /* As a fallback just copy bits by hand */
411 {
7a23c7fd
LP
412 uint8_t buf[MIN(m, COPY_BUFFER_SIZE)], *p = buf;
413 ssize_t z;
849958d1 414
00a8cf77 415 n = read(fdf, buf, sizeof buf);
cda134ab
LP
416 if (n < 0)
417 return -errno;
418 if (n == 0) /* EOF */
419 break;
420
7a23c7fd
LP
421 z = (size_t) n;
422 do {
423 ssize_t k;
424
425 k = write(fdt, p, z);
426 if (k < 0) {
427 r = -errno;
428
429 if (ret_remains) {
430 void *copy;
431
432 copy = memdup(p, z);
433 if (!copy)
434 return -ENOMEM;
435
436 *ret_remains = copy;
437 }
438
439 if (ret_remains_size)
440 *ret_remains_size = z;
441
442 return r;
443 }
444
445 assert(k <= z);
446 z -= k;
447 p += k;
448 } while (z > 0);
cda134ab 449 }
93240d3a 450
cda134ab 451 next:
b3cade0c
LP
452 if (progress) {
453 r = progress(n, userdata);
454 if (r < 0)
455 return r;
456 }
457
f5fbe71d 458 if (max_bytes != UINT64_MAX) {
59f448cf 459 assert(max_bytes >= (uint64_t) n);
93240d3a
LP
460 max_bytes -= n;
461 }
b3cade0c 462
ee1aa61c
LP
463 /* sendfile accepts at most SSIZE_MAX-offset bytes to copy, so reduce our maximum by the
464 * amount we already copied, but don't go below our copy buffer size, unless we are close the
465 * limit of bytes we are allowed to copy. */
00a8cf77 466 m = MAX(MIN(COPY_BUFFER_SIZE, max_bytes), m - n);
ee1aa61c
LP
467
468 copied_something = true;
849958d1
LP
469 }
470
cc45eb59
DDM
471 if (copy_flags & COPY_TRUNCATE) {
472 off_t off = lseek(fdt, 0, SEEK_CUR);
473 if (off < 0)
474 return -errno;
475
476 if (ftruncate(fdt, off) < 0)
477 return -errno;
478 }
479
480 return max_bytes <= 0; /* return 0 if we hit EOF earlier than the size limit */
849958d1
LP
481}
482
d01cd401
LP
483static int fd_copy_symlink(
484 int df,
485 const char *from,
486 const struct stat *st,
487 int dt,
488 const char *to,
489 uid_t override_uid,
490 gid_t override_gid,
491 CopyFlags copy_flags) {
492
849958d1
LP
493 _cleanup_free_ char *target = NULL;
494 int r;
495
496 assert(from);
497 assert(st);
498 assert(to);
499
500 r = readlinkat_malloc(df, from, &target);
501 if (r < 0)
502 return r;
503
aeec5efa
CG
504 if (copy_flags & COPY_MAC_CREATE) {
505 r = mac_selinux_create_file_prepare_at(dt, to, S_IFLNK);
506 if (r < 0)
507 return r;
508 }
494f7fc0 509 r = RET_NERRNO(symlinkat(target, dt, to));
aeec5efa
CG
510 if (copy_flags & COPY_MAC_CREATE)
511 mac_selinux_create_file_clear();
494f7fc0
DDM
512 if (r < 0) {
513 if (FLAGS_SET(copy_flags, COPY_GRACEFUL_WARN) && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) {
514 log_notice_errno(r, "Failed to copy symlink '%s', ignoring: %m", from);
515 return 0;
516 }
517
518 return r;
519 }
849958d1 520
d01cd401
LP
521 if (fchownat(dt, to,
522 uid_is_valid(override_uid) ? override_uid : st->st_uid,
523 gid_is_valid(override_gid) ? override_gid : st->st_gid,
524 AT_SYMLINK_NOFOLLOW) < 0)
fb934d53 525 r = -errno;
849958d1 526
da486c30 527 (void) copy_xattr(df, from, dt, to, copy_flags);
fb934d53
LP
528 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
529 return r;
849958d1
LP
530}
531
dd480f78
LP
532/* Encapsulates the database we store potential hardlink targets in */
533typedef struct HardlinkContext {
534 int dir_fd; /* An fd to the directory we use as lookup table. Never AT_FDCWD. Lazily created, when
535 * we add the first entry. */
536
537 /* These two fields are used to create the hardlink repository directory above — via
538 * mkdirat(parent_fd, subdir) — and are kept so that we can automatically remove the directory again
539 * when we are done. */
540 int parent_fd; /* Possibly AT_FDCWD */
541 char *subdir;
542} HardlinkContext;
543
544static int hardlink_context_setup(
545 HardlinkContext *c,
546 int dt,
547 const char *to,
548 CopyFlags copy_flags) {
549
5bb1d7fb 550 _cleanup_close_ int dt_copy = -EBADF;
dd480f78
LP
551 int r;
552
553 assert(c);
554 assert(c->dir_fd < 0 && c->dir_fd != AT_FDCWD);
555 assert(c->parent_fd < 0);
556 assert(!c->subdir);
557
558 /* If hardlink recreation is requested we have to maintain a database of inodes that are potential
559 * hardlink sources. Given that generally disk sizes have to be assumed to be larger than what fits
560 * into physical RAM we cannot maintain that database in dynamic memory alone. Here we opt to
561 * maintain it on disk, to simplify things: inside the destination directory we'll maintain a
562 * temporary directory consisting of hardlinks of every inode we copied that might be subject of
563 * hardlinks. We can then use that as hardlink source later on. Yes, this means additional disk IO
564 * but thankfully Linux is optimized for this kind of thing. If this ever becomes a performance
565 * bottleneck we can certainly place an in-memory hash table in front of this, but for the beginning,
566 * let's keep things simple, and just use the disk as lookup table for inodes.
567 *
69e3234d 568 * Note that this should have zero performance impact as long as .n_link of all files copied remains
dd480f78
LP
569 * <= 0, because in that case we will not actually allocate the hardlink inode lookup table directory
570 * on disk (we do so lazily, when the first candidate with .n_link > 1 is seen). This means, in the
571 * common case where hardlinks are not used at all or only for few files the fact that we store the
572 * table on disk shouldn't matter perfomance-wise. */
573
574 if (!FLAGS_SET(copy_flags, COPY_HARDLINKS))
575 return 0;
576
577 if (dt == AT_FDCWD)
578 dt_copy = AT_FDCWD;
579 else if (dt < 0)
580 return -EBADF;
581 else {
582 dt_copy = fcntl(dt, F_DUPFD_CLOEXEC, 3);
583 if (dt_copy < 0)
584 return -errno;
585 }
586
587 r = tempfn_random_child(to, "hardlink", &c->subdir);
588 if (r < 0)
589 return r;
590
591 c->parent_fd = TAKE_FD(dt_copy);
592
593 /* We don't actually create the directory we keep the table in here, that's done on-demand when the
594 * first entry is added, using hardlink_context_realize() below. */
595 return 1;
596}
597
598static int hardlink_context_realize(HardlinkContext *c) {
dd480f78
LP
599 if (!c)
600 return 0;
601
602 if (c->dir_fd >= 0) /* Already realized */
603 return 1;
604
605 if (c->parent_fd < 0 && c->parent_fd != AT_FDCWD) /* Not configured */
606 return 0;
607
608 assert(c->subdir);
609
96603ea0
LP
610 c->dir_fd = open_mkdir_at(c->parent_fd, c->subdir, O_EXCL|O_CLOEXEC, 0700);
611 if (c->dir_fd < 0)
612 return c->dir_fd;
dd480f78
LP
613
614 return 1;
615}
616
617static void hardlink_context_destroy(HardlinkContext *c) {
618 int r;
619
620 assert(c);
621
622 /* Automatically remove the hardlink lookup table directory again after we are done. This is used via
623 * _cleanup_() so that we really delete this, even on failure. */
624
625 if (c->dir_fd >= 0) {
626 r = rm_rf_children(TAKE_FD(c->dir_fd), REMOVE_PHYSICAL, NULL); /* consumes dir_fd in all cases, even on failure */
627 if (r < 0)
628 log_debug_errno(r, "Failed to remove hardlink store (%s) contents, ignoring: %m", c->subdir);
629
630 assert(c->parent_fd >= 0 || c->parent_fd == AT_FDCWD);
631 assert(c->subdir);
632
633 if (unlinkat(c->parent_fd, c->subdir, AT_REMOVEDIR) < 0)
634 log_debug_errno(errno, "Failed to remove hardlink store (%s) directory, ignoring: %m", c->subdir);
635 }
636
637 assert_cc(AT_FDCWD < 0);
638 c->parent_fd = safe_close(c->parent_fd);
639
640 c->subdir = mfree(c->subdir);
641}
642
643static int try_hardlink(
644 HardlinkContext *c,
645 const struct stat *st,
646 int dt,
647 const char *to) {
648
649 char dev_ino[DECIMAL_STR_MAX(dev_t)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
650
651 assert(st);
652 assert(dt >= 0 || dt == AT_FDCWD);
653 assert(to);
654
655 if (!c) /* No temporary hardlink directory, don't bother */
656 return 0;
657
658 if (st->st_nlink <= 1) /* Source not hardlinked, don't bother */
659 return 0;
660
661 if (c->dir_fd < 0) /* not yet realized, hence empty */
662 return 0;
663
664 xsprintf(dev_ino, "%u:%u:%" PRIu64, major(st->st_dev), minor(st->st_dev), (uint64_t) st->st_ino);
665 if (linkat(c->dir_fd, dev_ino, dt, to, 0) < 0) {
666 if (errno != ENOENT) /* doesn't exist in store yet */
667 log_debug_errno(errno, "Failed to hardlink %s to %s, ignoring: %m", dev_ino, to);
668 return 0;
669 }
670
671 return 1;
672}
673
674static int memorize_hardlink(
675 HardlinkContext *c,
676 const struct stat *st,
677 int dt,
678 const char *to) {
679
680 char dev_ino[DECIMAL_STR_MAX(dev_t)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
681 int r;
682
683 assert(st);
684 assert(dt >= 0 || dt == AT_FDCWD);
685 assert(to);
686
687 if (!c) /* No temporary hardlink directory, don't bother */
688 return 0;
689
690 if (st->st_nlink <= 1) /* Source not hardlinked, don't bother */
691 return 0;
692
693 r = hardlink_context_realize(c); /* Create the hardlink store lazily */
694 if (r < 0)
695 return r;
696
697 xsprintf(dev_ino, "%u:%u:%" PRIu64, major(st->st_dev), minor(st->st_dev), (uint64_t) st->st_ino);
698 if (linkat(dt, to, c->dir_fd, dev_ino, 0) < 0) {
699 log_debug_errno(errno, "Failed to hardlink %s to %s, ignoring: %m", to, dev_ino);
700 return 0;
701 }
702
703 return 1;
704}
705
d3e2a7f7
AW
706static int fd_copy_tree_generic(
707 int df,
708 const char *from,
709 const struct stat *st,
710 int dt,
711 const char *to,
712 dev_t original_device,
713 unsigned depth_left,
714 uid_t override_uid,
715 gid_t override_gid,
716 CopyFlags copy_flags,
b63bd125 717 Hashmap *denylist,
d3e2a7f7
AW
718 HardlinkContext *hardlink_context,
719 const char *display_path,
720 copy_progress_path_t progress_path,
721 copy_progress_bytes_t progress_bytes,
722 void *userdata);
723
d01cd401
LP
724static int fd_copy_regular(
725 int df,
726 const char *from,
727 const struct stat *st,
728 int dt,
729 const char *to,
730 uid_t override_uid,
731 gid_t override_gid,
b3cade0c 732 CopyFlags copy_flags,
dd480f78 733 HardlinkContext *hardlink_context,
b3cade0c
LP
734 copy_progress_bytes_t progress,
735 void *userdata) {
d01cd401 736
254d1313 737 _cleanup_close_ int fdf = -EBADF, fdt = -EBADF;
849958d1
LP
738 int r, q;
739
740 assert(from);
741 assert(st);
742 assert(to);
743
dd480f78
LP
744 r = try_hardlink(hardlink_context, st, dt, to);
745 if (r < 0)
746 return r;
747 if (r > 0) /* worked! */
748 return 0;
749
849958d1
LP
750 fdf = openat(df, from, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
751 if (fdf < 0)
752 return -errno;
753
aeec5efa
CG
754 if (copy_flags & COPY_MAC_CREATE) {
755 r = mac_selinux_create_file_prepare_at(dt, to, S_IFREG);
756 if (r < 0)
757 return r;
758 }
849958d1 759 fdt = openat(dt, to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, st->st_mode & 07777);
aeec5efa
CG
760 if (copy_flags & COPY_MAC_CREATE)
761 mac_selinux_create_file_clear();
e156347e 762 if (fdt < 0)
849958d1 763 return -errno;
849958d1 764
f5fbe71d 765 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags, NULL, NULL, progress, userdata);
5c9d961e
LP
766 if (r < 0)
767 goto fail;
849958d1 768
d01cd401
LP
769 if (fchown(fdt,
770 uid_is_valid(override_uid) ? override_uid : st->st_uid,
771 gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
849958d1
LP
772 r = -errno;
773
774 if (fchmod(fdt, st->st_mode & 07777) < 0)
775 r = -errno;
776
150dcaf2 777 (void) futimens(fdt, (struct timespec[]) { st->st_atim, st->st_mtim });
c17cfe6e 778 (void) copy_xattr(fdf, NULL, fdt, NULL, copy_flags);
e6bd041c 779
06a40b52
LP
780 if (copy_flags & COPY_FSYNC) {
781 if (fsync(fdt) < 0) {
782 r = -errno;
783 goto fail;
784 }
785 }
786
5c9d961e 787 q = close_nointr(TAKE_FD(fdt)); /* even if this fails, the fd is now invalidated */
849958d1 788 if (q < 0) {
5c9d961e
LP
789 r = q;
790 goto fail;
849958d1
LP
791 }
792
dd480f78 793 (void) memorize_hardlink(hardlink_context, st, dt, to);
849958d1 794 return r;
5c9d961e
LP
795
796fail:
797 (void) unlinkat(dt, to, 0);
798 return r;
849958d1
LP
799}
800
d01cd401
LP
801static int fd_copy_fifo(
802 int df,
803 const char *from,
804 const struct stat *st,
805 int dt,
806 const char *to,
807 uid_t override_uid,
808 gid_t override_gid,
dd480f78
LP
809 CopyFlags copy_flags,
810 HardlinkContext *hardlink_context) {
849958d1
LP
811 int r;
812
813 assert(from);
814 assert(st);
815 assert(to);
816
dd480f78
LP
817 r = try_hardlink(hardlink_context, st, dt, to);
818 if (r < 0)
819 return r;
820 if (r > 0) /* worked! */
821 return 0;
822
aeec5efa
CG
823 if (copy_flags & COPY_MAC_CREATE) {
824 r = mac_selinux_create_file_prepare_at(dt, to, S_IFIFO);
825 if (r < 0)
826 return r;
827 }
494f7fc0 828 r = RET_NERRNO(mkfifoat(dt, to, st->st_mode & 07777));
aeec5efa
CG
829 if (copy_flags & COPY_MAC_CREATE)
830 mac_selinux_create_file_clear();
494f7fc0
DDM
831 if (r < 0) {
832 if (FLAGS_SET(copy_flags, COPY_GRACEFUL_WARN) && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) {
833 log_notice_errno(r, "Failed to copy fifo '%s', ignoring: %m", from);
834 return 0;
835 }
836
837 return r;
838 }
849958d1 839
d01cd401
LP
840 if (fchownat(dt, to,
841 uid_is_valid(override_uid) ? override_uid : st->st_uid,
842 gid_is_valid(override_gid) ? override_gid : st->st_gid,
843 AT_SYMLINK_NOFOLLOW) < 0)
849958d1
LP
844 r = -errno;
845
846 if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
847 r = -errno;
848
fb934d53
LP
849 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
850
dd480f78 851 (void) memorize_hardlink(hardlink_context, st, dt, to);
849958d1
LP
852 return r;
853}
854
d01cd401
LP
855static int fd_copy_node(
856 int df,
857 const char *from,
858 const struct stat *st,
859 int dt,
860 const char *to,
861 uid_t override_uid,
862 gid_t override_gid,
dd480f78
LP
863 CopyFlags copy_flags,
864 HardlinkContext *hardlink_context) {
849958d1
LP
865 int r;
866
867 assert(from);
868 assert(st);
869 assert(to);
870
dd480f78
LP
871 r = try_hardlink(hardlink_context, st, dt, to);
872 if (r < 0)
873 return r;
874 if (r > 0) /* worked! */
875 return 0;
876
aeec5efa
CG
877 if (copy_flags & COPY_MAC_CREATE) {
878 r = mac_selinux_create_file_prepare_at(dt, to, st->st_mode & S_IFMT);
879 if (r < 0)
880 return r;
881 }
494f7fc0 882 r = RET_NERRNO(mknodat(dt, to, st->st_mode, st->st_rdev));
aeec5efa
CG
883 if (copy_flags & COPY_MAC_CREATE)
884 mac_selinux_create_file_clear();
494f7fc0
DDM
885 if (r < 0) {
886 if (FLAGS_SET(copy_flags, COPY_GRACEFUL_WARN) && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) {
887 log_notice_errno(r, "Failed to copy node '%s', ignoring: %m", from);
888 return 0;
889 }
890
891 return r;
892 }
849958d1 893
d01cd401
LP
894 if (fchownat(dt, to,
895 uid_is_valid(override_uid) ? override_uid : st->st_uid,
896 gid_is_valid(override_gid) ? override_gid : st->st_gid,
897 AT_SYMLINK_NOFOLLOW) < 0)
849958d1
LP
898 r = -errno;
899
900 if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
901 r = -errno;
902
fb934d53
LP
903 (void) utimensat(dt, to, (struct timespec[]) { st->st_atim, st->st_mtim }, AT_SYMLINK_NOFOLLOW);
904
dd480f78 905 (void) memorize_hardlink(hardlink_context, st, dt, to);
849958d1
LP
906 return r;
907}
908
d7c7c334
LP
909static int fd_copy_directory(
910 int df,
911 const char *from,
912 const struct stat *st,
913 int dt,
914 const char *to,
915 dev_t original_device,
575a07d2 916 unsigned depth_left,
d01cd401
LP
917 uid_t override_uid,
918 gid_t override_gid,
b3cade0c 919 CopyFlags copy_flags,
b63bd125 920 Hashmap *denylist,
dd480f78 921 HardlinkContext *hardlink_context,
b3cade0c
LP
922 const char *display_path,
923 copy_progress_path_t progress_path,
924 copy_progress_bytes_t progress_bytes,
925 void *userdata) {
d7c7c334 926
dd480f78 927 _cleanup_(hardlink_context_destroy) HardlinkContext our_hardlink_context = {
254d1313
ZJS
928 .dir_fd = -EBADF,
929 .parent_fd = -EBADF,
dd480f78
LP
930 };
931
254d1313 932 _cleanup_close_ int fdf = -EBADF, fdt = -EBADF;
849958d1 933 _cleanup_closedir_ DIR *d = NULL;
609d3473 934 bool exists, created;
849958d1
LP
935 int r;
936
849958d1
LP
937 assert(st);
938 assert(to);
939
575a07d2
LP
940 if (depth_left == 0)
941 return -ENAMETOOLONG;
942
d7c7c334
LP
943 if (from)
944 fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
945 else
946 fdf = fcntl(df, F_DUPFD_CLOEXEC, 3);
b498c53d
LP
947 if (fdf < 0)
948 return -errno;
849958d1 949
dd480f78
LP
950 if (!hardlink_context) {
951 /* If recreating hardlinks is requested let's set up a context for that now. */
952 r = hardlink_context_setup(&our_hardlink_context, dt, to, copy_flags);
953 if (r < 0)
954 return r;
955 if (r > 0) /* It's enabled and allocated, let's now use the same context for all recursive
956 * invocations from here down */
957 hardlink_context = &our_hardlink_context;
958 }
959
9f81a592 960 d = take_fdopendir(&fdf);
849958d1
LP
961 if (!d)
962 return -errno;
849958d1 963
609d3473
RG
964 exists = false;
965 if (copy_flags & COPY_MERGE_EMPTY) {
db55bbf2 966 r = dir_is_empty_at(dt, to, /* ignore_hidden_or_backup= */ false);
609d3473
RG
967 if (r < 0 && r != -ENOENT)
968 return r;
969 else if (r == 1)
970 exists = true;
971 }
972
973 if (exists)
849958d1 974 created = false;
609d3473 975 else {
aeec5efa
CG
976 if (copy_flags & COPY_MAC_CREATE)
977 r = mkdirat_label(dt, to, st->st_mode & 07777);
978 else
979 r = mkdirat(dt, to, st->st_mode & 07777);
609d3473
RG
980 if (r >= 0)
981 created = true;
982 else if (errno == EEXIST && (copy_flags & COPY_MERGE))
983 created = false;
984 else
985 return -errno;
986 }
849958d1
LP
987
988 fdt = openat(dt, to, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
989 if (fdt < 0)
990 return -errno;
991
2c455af4
LP
992 r = 0;
993
b63bd125
DDM
994 if (PTR_TO_INT(hashmap_get(denylist, st)) == DENY_CONTENTS) {
995 log_debug("%s is in the denylist, not recursing", from);
996 goto finish;
997 }
998
8420fa3a 999 FOREACH_DIRENT_ALL(de, d, return -errno) {
b3cade0c
LP
1000 const char *child_display_path = NULL;
1001 _cleanup_free_ char *dp = NULL;
849958d1
LP
1002 struct stat buf;
1003 int q;
1004
49bfc877 1005 if (dot_or_dot_dot(de->d_name))
8420fa3a
LP
1006 continue;
1007
1ac404ca
LP
1008 r = look_for_signals(copy_flags);
1009 if (r < 0)
1010 return r;
85559592 1011
849958d1
LP
1012 if (fstatat(dirfd(d), de->d_name, &buf, AT_SYMLINK_NOFOLLOW) < 0) {
1013 r = -errno;
1014 continue;
1015 }
1016
b3cade0c
LP
1017 if (progress_path) {
1018 if (display_path)
657ee2d8 1019 child_display_path = dp = path_join(display_path, de->d_name);
b3cade0c
LP
1020 else
1021 child_display_path = de->d_name;
1022
1023 r = progress_path(child_display_path, &buf, userdata);
1024 if (r < 0)
1025 return r;
1026 }
1027
b63bd125
DDM
1028 if (PTR_TO_INT(hashmap_get(denylist, &buf)) == DENY_INODE) {
1029 log_debug("%s/%s is in the denylist, ignoring", from, de->d_name);
a424958a
DDM
1030 continue;
1031 }
1032
ef202b84 1033 if (S_ISDIR(buf.st_mode)) {
f6a77804
LP
1034 /*
1035 * Don't descend into directories on other file systems, if this is requested. We do a simple
1036 * .st_dev check here, which basically comes for free. Note that we do this check only on
1037 * directories, not other kind of file system objects, for two reason:
1038 *
1039 * • The kernel's overlayfs pseudo file system that overlays multiple real file systems
1040 * propagates the .st_dev field of the file system a file originates from all the way up
1041 * through the stack to stat(). It doesn't do that for directories however. This means that
1042 * comparing .st_dev on non-directories suggests that they all are mount points. To avoid
1043 * confusion we hence avoid relying on this check for regular files.
1044 *
1045 * • The main reason we do this check at all is to protect ourselves from bind mount cycles,
1046 * where we really want to avoid descending down in all eternity. However the .st_dev check
1047 * is usually not sufficient for this protection anyway, as bind mount cycles from the same
575a07d2
LP
1048 * file system onto itself can't be detected that way. (Note we also do a recursion depth
1049 * check, which is probably the better protection in this regard, which is why
1050 * COPY_SAME_MOUNT is optional).
f6a77804
LP
1051 */
1052
1053 if (FLAGS_SET(copy_flags, COPY_SAME_MOUNT)) {
1054 if (buf.st_dev != original_device)
1055 continue;
1056
1057 r = fd_is_mount_point(dirfd(d), de->d_name, 0);
1058 if (r < 0)
1059 return r;
1060 if (r > 0)
1061 continue;
1062 }
d3e2a7f7 1063 }
f6a77804 1064
a424958a
DDM
1065 q = fd_copy_tree_generic(dirfd(d), de->d_name, &buf, fdt, de->d_name, original_device,
1066 depth_left-1, override_uid, override_gid, copy_flags, denylist,
1067 hardlink_context, child_display_path, progress_path, progress_bytes,
1068 userdata);
849958d1 1069
1ac404ca 1070 if (q == -EINTR) /* Propagate SIGINT/SIGTERM up instantly */
85559592 1071 return q;
1c876927 1072 if (q == -EEXIST && (copy_flags & COPY_MERGE))
e156347e 1073 q = 0;
849958d1
LP
1074 if (q < 0)
1075 r = q;
1076 }
1077
b63bd125 1078finish:
3b8483c0 1079 if (created) {
d01cd401
LP
1080 if (fchown(fdt,
1081 uid_is_valid(override_uid) ? override_uid : st->st_uid,
1082 gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
3b8483c0
LP
1083 r = -errno;
1084
1085 if (fchmod(fdt, st->st_mode & 07777) < 0)
1086 r = -errno;
1087
c17cfe6e 1088 (void) copy_xattr(dirfd(d), NULL, fdt, NULL, copy_flags);
150dcaf2 1089 (void) futimens(fdt, (struct timespec[]) { st->st_atim, st->st_mtim });
3b8483c0
LP
1090 }
1091
06a40b52
LP
1092 if (copy_flags & COPY_FSYNC_FULL) {
1093 if (fsync(fdt) < 0)
1094 return -errno;
1095 }
1096
849958d1
LP
1097 return r;
1098}
1099
d3e2a7f7
AW
1100static int fd_copy_leaf(
1101 int df,
1102 const char *from,
1103 const struct stat *st,
1104 int dt,
1105 const char *to,
1106 uid_t override_uid,
1107 gid_t override_gid,
1108 CopyFlags copy_flags,
1109 HardlinkContext *hardlink_context,
1110 const char *display_path,
1111 copy_progress_bytes_t progress_bytes,
1112 void *userdata) {
1113 int r;
1114
1115 if (S_ISREG(st->st_mode))
1116 r = fd_copy_regular(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context, progress_bytes, userdata);
1117 else if (S_ISLNK(st->st_mode))
1118 r = fd_copy_symlink(df, from, st, dt, to, override_uid, override_gid, copy_flags);
1119 else if (S_ISFIFO(st->st_mode))
1120 r = fd_copy_fifo(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context);
1121 else if (S_ISBLK(st->st_mode) || S_ISCHR(st->st_mode) || S_ISSOCK(st->st_mode))
1122 r = fd_copy_node(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context);
1123 else
1124 r = -EOPNOTSUPP;
1125
1126 return r;
1127}
1128
1129static int fd_copy_tree_generic(
1130 int df,
1131 const char *from,
1132 const struct stat *st,
1133 int dt,
1134 const char *to,
1135 dev_t original_device,
1136 unsigned depth_left,
1137 uid_t override_uid,
1138 gid_t override_gid,
1139 CopyFlags copy_flags,
b63bd125 1140 Hashmap *denylist,
d3e2a7f7
AW
1141 HardlinkContext *hardlink_context,
1142 const char *display_path,
1143 copy_progress_path_t progress_path,
1144 copy_progress_bytes_t progress_bytes,
1145 void *userdata) {
1146 int r;
1147
1148 if (S_ISDIR(st->st_mode))
a424958a
DDM
1149 return fd_copy_directory(df, from, st, dt, to, original_device, depth_left-1, override_uid,
1150 override_gid, copy_flags, denylist, hardlink_context, display_path,
1151 progress_path, progress_bytes, userdata);
92240955 1152
b63bd125
DDM
1153 DenyType t = PTR_TO_INT(hashmap_get(denylist, st));
1154 if (t == DENY_INODE) {
1155 log_debug("%s is in the denylist, ignoring", from);
1156 return 0;
1157 } else if (t == DENY_CONTENTS)
1158 log_debug("%s is configured to have its contents excluded, but is not a directory", from);
1159
92240955
AW
1160 r = fd_copy_leaf(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context, display_path, progress_bytes, userdata);
1161 /* We just tried to copy a leaf node of the tree. If it failed because the node already exists *and* the COPY_REPLACE flag has been provided, we should unlink the node and re-copy. */
1162 if (r == -EEXIST && (copy_flags & COPY_REPLACE)) {
1163 /* This codepath is us trying to address an error to copy, if the unlink fails, lets just return the original error. */
1164 if (unlinkat(dt, to, 0) < 0)
1165 return r;
1166
d3e2a7f7 1167 r = fd_copy_leaf(df, from, st, dt, to, override_uid, override_gid, copy_flags, hardlink_context, display_path, progress_bytes, userdata);
92240955 1168 }
d3e2a7f7
AW
1169
1170 return r;
1171}
1172
b3cade0c
LP
1173int copy_tree_at_full(
1174 int fdf,
1175 const char *from,
1176 int fdt,
1177 const char *to,
1178 uid_t override_uid,
1179 gid_t override_gid,
1180 CopyFlags copy_flags,
b63bd125 1181 Hashmap *denylist,
b3cade0c
LP
1182 copy_progress_path_t progress_path,
1183 copy_progress_bytes_t progress_bytes,
1184 void *userdata) {
1185
849958d1 1186 struct stat st;
06a40b52 1187 int r;
849958d1
LP
1188
1189 assert(from);
1190 assert(to);
1191
f2cbe59e 1192 if (fstatat(fdf, from, &st, AT_SYMLINK_NOFOLLOW) < 0)
849958d1
LP
1193 return -errno;
1194
a424958a
DDM
1195 r = fd_copy_tree_generic(fdf, from, &st, fdt, to, st.st_dev, COPY_DEPTH_MAX, override_uid,
1196 override_gid, copy_flags, denylist, NULL, NULL, progress_path,
1197 progress_bytes, userdata);
06a40b52
LP
1198 if (r < 0)
1199 return r;
1200
864e4062
LP
1201 if (S_ISDIR(st.st_mode) && (copy_flags & COPY_SYNCFS)) {
1202 /* If the top-level inode is a directory run syncfs() now. */
1203 r = syncfs_path(fdt, to);
1204 if (r < 0)
1205 return r;
1206 } else if ((copy_flags & (COPY_FSYNC_FULL|COPY_SYNCFS)) != 0) {
1207 /* fsync() the parent dir of what we just copied if COPY_FSYNC_FULL is set. Also do this in
1208 * case COPY_SYNCFS is set but the top-level inode wasn't actually a directory. We do this so that
1209 * COPY_SYNCFS provides reasonable synchronization semantics on any kind of inode: when the
1210 * copy operation is done the whole inode — regardless of its type — and all its children
1211 * will be synchronized to disk. */
06a40b52
LP
1212 r = fsync_parent_at(fdt, to);
1213 if (r < 0)
1214 return r;
1215 }
1216
1217 return 0;
849958d1
LP
1218}
1219
f9f70e06
DDM
1220static int sync_dir_by_flags(int dir_fd, const char *path, CopyFlags copy_flags) {
1221 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
1222 assert(path);
864e4062
LP
1223
1224 if (copy_flags & COPY_SYNCFS)
f9f70e06 1225 return syncfs_path(dir_fd, path);
864e4062 1226 if (copy_flags & COPY_FSYNC_FULL)
f9f70e06 1227 return fsync_parent_at(dir_fd, path);
864e4062
LP
1228
1229 return 0;
1230}
1231
f9f70e06
DDM
1232int copy_directory_at_full(
1233 int dir_fdf,
b3cade0c 1234 const char *from,
f9f70e06 1235 int dir_fdt,
b3cade0c
LP
1236 const char *to,
1237 CopyFlags copy_flags,
1238 copy_progress_path_t progress_path,
1239 copy_progress_bytes_t progress_bytes,
1240 void *userdata) {
1241
9a50e3ca 1242 struct stat st;
90091508 1243 int r;
9a50e3ca 1244
f9f70e06
DDM
1245 assert(dir_fdf >= 0 || dir_fdf == AT_FDCWD);
1246 assert(dir_fdt >= 0 || dir_fdt == AT_FDCWD);
9a50e3ca
LP
1247 assert(to);
1248
f9f70e06 1249 if (fstatat(dir_fdf, strempty(from), &st, AT_SYMLINK_NOFOLLOW|(isempty(from) ? AT_EMPTY_PATH : 0)) < 0)
9a50e3ca
LP
1250 return -errno;
1251
90091508
LP
1252 r = stat_verify_directory(&st);
1253 if (r < 0)
1254 return r;
9a50e3ca 1255
06a40b52 1256 r = fd_copy_directory(
f9f70e06 1257 dir_fdf, from,
06a40b52 1258 &st,
f9f70e06 1259 dir_fdt, to,
06a40b52
LP
1260 st.st_dev,
1261 COPY_DEPTH_MAX,
1262 UID_INVALID, GID_INVALID,
1263 copy_flags,
a424958a 1264 NULL, NULL, NULL,
06a40b52
LP
1265 progress_path,
1266 progress_bytes,
1267 userdata);
1268 if (r < 0)
1269 return r;
1270
f9f70e06 1271 r = sync_dir_by_flags(dir_fdt, to, copy_flags);
864e4062
LP
1272 if (r < 0)
1273 return r;
06a40b52
LP
1274
1275 return 0;
9a50e3ca
LP
1276}
1277
427d9c34
DDM
1278int copy_file_fd_at_full(
1279 int dir_fdf,
b3cade0c
LP
1280 const char *from,
1281 int fdt,
1282 CopyFlags copy_flags,
1283 copy_progress_bytes_t progress_bytes,
1284 void *userdata) {
1285
254d1313 1286 _cleanup_close_ int fdf = -EBADF;
2f782044 1287 struct stat st;
e6bd041c 1288 int r;
849958d1 1289
427d9c34 1290 assert(dir_fdf >= 0 || dir_fdf == AT_FDCWD);
849958d1 1291 assert(from);
cda134ab 1292 assert(fdt >= 0);
849958d1 1293
427d9c34 1294 fdf = openat(dir_fdf, from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
849958d1
LP
1295 if (fdf < 0)
1296 return -errno;
1297
2f782044
LP
1298 r = fd_verify_regular(fdf);
1299 if (r < 0)
1300 return r;
1301
1302 if (fstat(fdt, &st) < 0)
1303 return -errno;
1304
f5fbe71d 1305 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags, NULL, NULL, progress_bytes, userdata);
2f782044
LP
1306 if (r < 0)
1307 return r;
e6bd041c 1308
43c893e7
FB
1309 /* Make sure to copy file attributes only over if target is a regular
1310 * file (so that copying a file to /dev/null won't alter the access
1311 * mode/ownership of that device node...) */
1312 if (S_ISREG(st.st_mode)) {
2f782044 1313 (void) copy_times(fdf, fdt, copy_flags);
c17cfe6e 1314 (void) copy_xattr(fdf, NULL, fdt, NULL, copy_flags);
2f782044 1315 }
e6bd041c 1316
06a40b52
LP
1317 if (copy_flags & COPY_FSYNC_FULL) {
1318 r = fsync_full(fdt);
1319 if (r < 0)
1320 return r;
1321 } else if (copy_flags & COPY_FSYNC) {
1322 if (fsync(fdt) < 0)
1323 return -errno;
1324 }
1325
2f782044 1326 return 0;
cda134ab
LP
1327}
1328
d9daf0d1
DDM
1329int copy_file_at_full(
1330 int dir_fdf,
b3cade0c 1331 const char *from,
d9daf0d1 1332 int dir_fdt,
b3cade0c
LP
1333 const char *to,
1334 int flags,
1335 mode_t mode,
1336 unsigned chattr_flags,
8a016c74 1337 unsigned chattr_mask,
b3cade0c
LP
1338 CopyFlags copy_flags,
1339 copy_progress_bytes_t progress_bytes,
1340 void *userdata) {
1341
254d1313 1342 _cleanup_close_ int fdf = -EBADF, fdt = -EBADF;
b1b657c4 1343 struct stat st;
5c9d961e 1344 int r;
cda134ab 1345
d9daf0d1
DDM
1346 assert(dir_fdf >= 0 || dir_fdf == AT_FDCWD);
1347 assert(dir_fdt >= 0 || dir_fdt == AT_FDCWD);
cda134ab
LP
1348 assert(from);
1349 assert(to);
1350
d9daf0d1 1351 fdf = openat(dir_fdf, from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
b1b657c4
LP
1352 if (fdf < 0)
1353 return -errno;
1354
2f782044
LP
1355 if (fstat(fdf, &st) < 0)
1356 return -errno;
1357
1358 r = stat_verify_regular(&st);
1359 if (r < 0)
1360 return r;
b1b657c4 1361
2053593f 1362 WITH_UMASK(0000) {
aeec5efa 1363 if (copy_flags & COPY_MAC_CREATE) {
d9daf0d1 1364 r = mac_selinux_create_file_prepare_at(dir_fdt, to, S_IFREG);
aeec5efa
CG
1365 if (r < 0)
1366 return r;
1367 }
d9daf0d1 1368 fdt = openat(dir_fdt, to, flags|O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY,
f5fbe71d 1369 mode != MODE_INVALID ? mode : st.st_mode);
aeec5efa
CG
1370 if (copy_flags & COPY_MAC_CREATE)
1371 mac_selinux_create_file_clear();
ebd93cb6
LP
1372 if (fdt < 0)
1373 return -errno;
1374 }
849958d1 1375
2f782044
LP
1376 if (!FLAGS_SET(flags, O_EXCL)) { /* if O_EXCL was used we created the thing as regular file, no need to check again */
1377 r = fd_verify_regular(fdt);
1378 if (r < 0)
1379 goto fail;
1380 }
1381
8a016c74
LP
1382 if (chattr_mask != 0)
1383 (void) chattr_fd(fdt, chattr_flags, chattr_mask & CHATTR_EARLY_FL, NULL);
f2068bcc 1384
f5fbe71d 1385 r = copy_bytes_full(fdf, fdt, UINT64_MAX, copy_flags, NULL, NULL, progress_bytes, userdata);
5c9d961e
LP
1386 if (r < 0)
1387 goto fail;
849958d1 1388
b1b657c4 1389 (void) copy_times(fdf, fdt, copy_flags);
c17cfe6e 1390 (void) copy_xattr(fdf, NULL, fdt, NULL, copy_flags);
b1b657c4 1391
8a016c74
LP
1392 if (chattr_mask != 0)
1393 (void) chattr_fd(fdt, chattr_flags, chattr_mask & ~CHATTR_EARLY_FL, NULL);
1394
06a40b52
LP
1395 if (copy_flags & (COPY_FSYNC|COPY_FSYNC_FULL)) {
1396 if (fsync(fdt) < 0) {
1397 r = -errno;
1398 goto fail;
1399 }
1400 }
1401
5c9d961e
LP
1402 r = close_nointr(TAKE_FD(fdt)); /* even if this fails, the fd is now invalidated */
1403 if (r < 0)
1404 goto fail;
849958d1 1405
06a40b52 1406 if (copy_flags & COPY_FSYNC_FULL) {
d9daf0d1 1407 r = fsync_parent_at(dir_fdt, to);
06a40b52
LP
1408 if (r < 0)
1409 goto fail;
1410 }
1411
849958d1 1412 return 0;
5c9d961e
LP
1413
1414fail:
1415 /* Only unlink if we definitely are the ones who created the file */
1416 if (FLAGS_SET(flags, O_EXCL))
d9daf0d1 1417 (void) unlinkat(dir_fdt, to, 0);
5c9d961e
LP
1418
1419 return r;
849958d1 1420}
e6bd041c 1421
427d9c34
DDM
1422int copy_file_atomic_at_full(
1423 int dir_fdf,
b3cade0c 1424 const char *from,
427d9c34 1425 int dir_fdt,
b3cade0c
LP
1426 const char *to,
1427 mode_t mode,
1428 unsigned chattr_flags,
8a016c74 1429 unsigned chattr_mask,
b3cade0c
LP
1430 CopyFlags copy_flags,
1431 copy_progress_bytes_t progress_bytes,
1432 void *userdata) {
1433
ec6bdf72 1434 _cleanup_(unlink_and_freep) char *t = NULL;
254d1313 1435 _cleanup_close_ int fdt = -EBADF;
ebd93cb6
LP
1436 int r;
1437
1438 assert(from);
1439 assert(to);
1440
23e208e7 1441 if (copy_flags & COPY_MAC_CREATE) {
427d9c34 1442 r = mac_selinux_create_file_prepare_at(dir_fdt, to, S_IFREG);
ec6bdf72
LP
1443 if (r < 0)
1444 return r;
ec6bdf72 1445 }
427d9c34 1446 fdt = open_tmpfile_linkable_at(dir_fdt, to, O_WRONLY|O_CLOEXEC, &t);
23e208e7
LP
1447 if (copy_flags & COPY_MAC_CREATE)
1448 mac_selinux_create_file_clear();
1449 if (fdt < 0)
1450 return fdt;
ebd93cb6 1451
8a016c74
LP
1452 if (chattr_mask != 0)
1453 (void) chattr_fd(fdt, chattr_flags, chattr_mask & CHATTR_EARLY_FL, NULL);
ec6bdf72 1454
427d9c34 1455 r = copy_file_fd_at_full(dir_fdf, from, fdt, copy_flags, progress_bytes, userdata);
ebd93cb6
LP
1456 if (r < 0)
1457 return r;
1458
ec6bdf72
LP
1459 if (fchmod(fdt, mode) < 0)
1460 return -errno;
1461
06a40b52
LP
1462 if ((copy_flags & (COPY_FSYNC|COPY_FSYNC_FULL))) {
1463 /* Sync the file */
1464 if (fsync(fdt) < 0)
1465 return -errno;
1466 }
1467
427d9c34 1468 r = link_tmpfile_at(fdt, dir_fdt, t, to, copy_flags & COPY_REPLACE);
23e208e7
LP
1469 if (r < 0)
1470 return r;
ebd93cb6 1471
5c9d961e
LP
1472 t = mfree(t);
1473
8a016c74
LP
1474 if (chattr_mask != 0)
1475 (void) chattr_fd(fdt, chattr_flags, chattr_mask & ~CHATTR_EARLY_FL, NULL);
1476
5c9d961e
LP
1477 r = close_nointr(TAKE_FD(fdt)); /* even if this fails, the fd is now invalidated */
1478 if (r < 0)
1479 goto fail;
1480
06a40b52
LP
1481 if (copy_flags & COPY_FSYNC_FULL) {
1482 /* Sync the parent directory */
427d9c34 1483 r = fsync_parent_at(dir_fdt, to);
06a40b52
LP
1484 if (r < 0)
1485 goto fail;
1486 }
1487
ebd93cb6 1488 return 0;
5c9d961e
LP
1489
1490fail:
427d9c34 1491 (void) unlinkat(dir_fdt, to, 0);
5c9d961e 1492 return r;
ebd93cb6
LP
1493}
1494
adc6f43b 1495int copy_times(int fdf, int fdt, CopyFlags flags) {
e6bd041c 1496 struct stat st;
e6bd041c
LP
1497
1498 assert(fdf >= 0);
1499 assert(fdt >= 0);
1500
1501 if (fstat(fdf, &st) < 0)
1502 return -errno;
1503
150dcaf2 1504 if (futimens(fdt, (struct timespec[2]) { st.st_atim, st.st_mtim }) < 0)
e6bd041c
LP
1505 return -errno;
1506
adc6f43b
LP
1507 if (FLAGS_SET(flags, COPY_CRTIME)) {
1508 usec_t crtime;
1509
1510 if (fd_getcrtime(fdf, &crtime) >= 0)
1511 (void) fd_setcrtime(fdt, crtime);
1512 }
e6bd041c
LP
1513
1514 return 0;
1515}
1516
bacf21e9
LP
1517int copy_access(int fdf, int fdt) {
1518 struct stat st;
1519
1520 assert(fdf >= 0);
1521 assert(fdt >= 0);
1522
bb72c434
LP
1523 /* Copies just the access mode (and not the ownership) from fdf to fdt */
1524
bacf21e9
LP
1525 if (fstat(fdf, &st) < 0)
1526 return -errno;
1527
7c248223 1528 return RET_NERRNO(fchmod(fdt, st.st_mode & 07777));
bacf21e9
LP
1529}
1530
0520564d 1531int copy_rights_with_fallback(int fdf, int fdt, const char *patht) {
bb72c434
LP
1532 struct stat st;
1533
1534 assert(fdf >= 0);
1535 assert(fdt >= 0);
1536
1537 /* Copies both access mode and ownership from fdf to fdt */
1538
1539 if (fstat(fdf, &st) < 0)
1540 return -errno;
1541
0520564d 1542 return fchmod_and_chown_with_fallback(fdt, patht, st.st_mode & 07777, st.st_uid, st.st_gid);
bb72c434
LP
1543}
1544
c17cfe6e 1545int copy_xattr(int df, const char *from, int dt, const char *to, CopyFlags copy_flags) {
f9bbb4dc
LP
1546 _cleanup_free_ char *names = NULL;
1547 int ret = 0, r;
e6bd041c 1548
c17cfe6e 1549 r = listxattr_at_malloc(df, from, 0, &names);
f9bbb4dc
LP
1550 if (r < 0)
1551 return r;
e6bd041c 1552
f9bbb4dc
LP
1553 NULSTR_FOREACH(p, names) {
1554 _cleanup_free_ char *value = NULL;
e6bd041c 1555
e394a6fc 1556 if (!FLAGS_SET(copy_flags, COPY_ALL_XATTRS) && !startswith(p, "user."))
f9bbb4dc 1557 continue;
e6bd041c 1558
c17cfe6e 1559 r = getxattr_at_malloc(df, from, p, 0, &value);
f9bbb4dc
LP
1560 if (r == -ENODATA)
1561 continue; /* gone by now */
1562 if (r < 0)
1563 return r;
e6bd041c 1564
c17cfe6e 1565 if (xsetxattr(dt, to, p, value, r, 0) < 0)
f9bbb4dc 1566 ret = -errno;
e6bd041c
LP
1567 }
1568
1569 return ret;
1570}
b640e274
DDM
1571
1572int reflink(int infd, int outfd) {
1573 int r;
1574
1575 assert(infd >= 0);
1576 assert(outfd >= 0);
1577
1578 /* Make sure we invoke the ioctl on a regular file, so that no device driver accidentally gets it. */
1579
1580 r = fd_verify_regular(outfd);
1581 if (r < 0)
1582 return r;
1583
427ad121 1584 /* FICLONE was introduced in Linux 4.5 but it uses the same number as BTRFS_IOC_CLONE introduced earlier */
b640e274 1585
427ad121 1586 assert_cc(FICLONE == BTRFS_IOC_CLONE);
b640e274 1587
427ad121 1588 return RET_NERRNO(ioctl(outfd, FICLONE, infd));
b640e274
DDM
1589}
1590
1591assert_cc(sizeof(struct file_clone_range) == sizeof(struct btrfs_ioctl_clone_range_args));
1592
71e84b4b 1593int reflink_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) {
b640e274
DDM
1594 struct file_clone_range args = {
1595 .src_fd = infd,
1596 .src_offset = in_offset,
1597 .src_length = sz,
1598 .dest_offset = out_offset,
1599 };
1600 int r;
1601
1602 assert(infd >= 0);
1603 assert(outfd >= 0);
1604
535358ad
LP
1605 /* Inside the kernel, FICLONE is identical to FICLONERANGE with offsets and size set to zero, let's
1606 * simplify things and use the simple ioctl in that case. Also, do the same if the size is
1607 * UINT64_MAX, which is how we usually encode "everything". */
1608 if (in_offset == 0 && out_offset == 0 && IN_SET(sz, 0, UINT64_MAX))
1609 return reflink(infd, outfd);
1610
b640e274
DDM
1611 r = fd_verify_regular(outfd);
1612 if (r < 0)
1613 return r;
1614
427ad121 1615 assert_cc(FICLONERANGE == BTRFS_IOC_CLONE_RANGE);
b640e274 1616
427ad121 1617 return RET_NERRNO(ioctl(outfd, FICLONERANGE, &args));
b640e274 1618}