]> git.ipfire.org Git - thirdparty/systemd.git/blame_incremental - src/basic/fs-util.c
logind: Don't match non-leader processes for utmp TTY determination (#38027)
[thirdparty/systemd.git] / src / basic / fs-util.c
... / ...
CommitLineData
1/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3#include <linux/falloc.h>
4#include <stdlib.h>
5#include <sys/file.h>
6#include <unistd.h>
7
8#include "alloc-util.h"
9#include "btrfs.h"
10#include "chattr-util.h"
11#include "dirent-util.h"
12#include "errno-util.h"
13#include "fd-util.h"
14#include "fs-util.h"
15#include "hostname-util.h"
16#include "label.h"
17#include "lock-util.h"
18#include "log.h"
19#include "missing_fcntl.h"
20#include "missing_syscall.h"
21#include "mkdir.h"
22#include "path-util.h"
23#include "process-util.h"
24#include "random-util.h"
25#include "ratelimit.h"
26#include "stat-util.h"
27#include "string-util.h"
28#include "strv.h"
29#include "time-util.h"
30#include "tmpfile-util.h"
31#include "umask-util.h"
32
33int rmdir_parents(const char *path, const char *stop) {
34 char *p;
35 int r;
36
37 assert(path);
38 assert(stop);
39
40 if (!path_is_safe(path))
41 return -EINVAL;
42
43 if (!path_is_safe(stop))
44 return -EINVAL;
45
46 p = strdupa_safe(path);
47
48 for (;;) {
49 char *slash = NULL;
50
51 /* skip the last component. */
52 r = path_find_last_component(p, /* accept_dot_dot= */ false, (const char **) &slash, NULL);
53 if (r <= 0)
54 return r;
55 if (slash == p)
56 return 0;
57
58 assert(*slash == '/');
59 *slash = '\0';
60
61 if (path_startswith_full(stop, p, PATH_STARTSWITH_REFUSE_DOT_DOT))
62 return 0;
63
64 if (rmdir(p) < 0 && errno != ENOENT)
65 return -errno;
66 }
67}
68
69int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) {
70 int r;
71
72 assert(olddirfd >= 0 || olddirfd == AT_FDCWD);
73 assert(oldpath);
74 assert(newdirfd >= 0 || newdirfd == AT_FDCWD);
75 assert(newpath);
76
77 /* Try the ideal approach first */
78 if (renameat2(olddirfd, oldpath, newdirfd, newpath, RENAME_NOREPLACE) >= 0)
79 return 0;
80
81 /* renameat2() exists since Linux 3.15, btrfs and FAT added support for it later. If it is not implemented,
82 * fall back to a different method. */
83 if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EINVAL)
84 return -errno;
85
86 /* Let's try to use linkat()+unlinkat() as fallback. This doesn't work on directories and on some file systems
87 * that do not support hard links (such as FAT, most prominently), but for files it's pretty close to what we
88 * want — though not atomic (i.e. for a short period both the new and the old filename will exist). */
89 if (linkat(olddirfd, oldpath, newdirfd, newpath, 0) >= 0) {
90
91 r = RET_NERRNO(unlinkat(olddirfd, oldpath, 0));
92 if (r < 0) {
93 (void) unlinkat(newdirfd, newpath, 0);
94 return r;
95 }
96
97 return 0;
98 }
99
100 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !IN_SET(errno, EINVAL, EPERM)) /* FAT returns EPERM on link()… */
101 return -errno;
102
103 /* OK, neither RENAME_NOREPLACE nor linkat()+unlinkat() worked. Let's then fall back to the racy TOCTOU
104 * vulnerable accessat(F_OK) check followed by classic, replacing renameat(), we have nothing better. */
105
106 if (faccessat(newdirfd, newpath, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
107 return -EEXIST;
108 if (errno != ENOENT)
109 return -errno;
110
111 return RET_NERRNO(renameat(olddirfd, oldpath, newdirfd, newpath));
112}
113
114int readlinkat_malloc(int fd, const char *p, char **ret) {
115 size_t l = PATH_MAX;
116
117 assert(fd >= 0 || fd == AT_FDCWD);
118
119 if (fd < 0 && isempty(p))
120 return -EISDIR; /* In this case, the fd points to the current working directory, and is
121 * definitely not a symlink. Let's return earlier. */
122
123 for (;;) {
124 _cleanup_free_ char *c = NULL;
125 ssize_t n;
126
127 c = new(char, l+1);
128 if (!c)
129 return -ENOMEM;
130
131 n = readlinkat(fd, strempty(p), c, l);
132 if (n < 0)
133 return -errno;
134
135 if ((size_t) n < l) {
136 c[n] = 0;
137
138 if (ret)
139 *ret = TAKE_PTR(c);
140
141 return 0;
142 }
143
144 if (l > (SSIZE_MAX-1)/2) /* readlinkat() returns an ssize_t, and we want an extra byte for a
145 * trailing NUL, hence do an overflow check relative to SSIZE_MAX-1
146 * here */
147 return -EFBIG;
148
149 l *= 2;
150 }
151}
152
153int readlink_value(const char *p, char **ret) {
154 _cleanup_free_ char *link = NULL, *name = NULL;
155 int r;
156
157 assert(p);
158 assert(ret);
159
160 r = readlink_malloc(p, &link);
161 if (r < 0)
162 return r;
163
164 r = path_extract_filename(link, &name);
165 if (r < 0)
166 return r;
167 if (r == O_DIRECTORY)
168 return -EINVAL;
169
170 *ret = TAKE_PTR(name);
171 return 0;
172}
173
174int readlink_and_make_absolute(const char *p, char **ret) {
175 _cleanup_free_ char *target = NULL;
176 int r;
177
178 assert(p);
179 assert(ret);
180
181 r = readlink_malloc(p, &target);
182 if (r < 0)
183 return r;
184
185 return file_in_same_dir(p, target, ret);
186}
187
188int chmod_and_chown_at(int dir_fd, const char *path, mode_t mode, uid_t uid, gid_t gid) {
189 _cleanup_close_ int fd = -EBADF;
190
191 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
192
193 if (path) {
194 /* Let's acquire an O_PATH fd, as precaution to change mode/owner on the same file */
195 fd = openat(dir_fd, path, O_PATH|O_CLOEXEC|O_NOFOLLOW);
196 if (fd < 0)
197 return -errno;
198 dir_fd = fd;
199
200 } else if (dir_fd == AT_FDCWD) {
201 /* Let's acquire an O_PATH fd of the current directory */
202 fd = openat(dir_fd, ".", O_PATH|O_CLOEXEC|O_NOFOLLOW|O_DIRECTORY);
203 if (fd < 0)
204 return -errno;
205 dir_fd = fd;
206 }
207
208 return fchmod_and_chown(dir_fd, mode, uid, gid);
209}
210
211int fchmod_and_chown_with_fallback(int fd, const char *path, mode_t mode, uid_t uid, gid_t gid) {
212 bool do_chown, do_chmod;
213 struct stat st;
214 int r;
215
216 /* Change ownership and access mode of the specified fd. Tries to do so safely, ensuring that at no
217 * point in time the access mode is above the old access mode under the old ownership or the new
218 * access mode under the new ownership. Note: this call tries hard to leave the access mode
219 * unaffected if the uid/gid is changed, i.e. it undoes implicit suid/sgid dropping the kernel does
220 * on chown().
221 *
222 * This call is happy with O_PATH fds.
223 *
224 * If path is given, allow a fallback path which does not use /proc/self/fd/. On any normal system
225 * /proc will be mounted, but in certain improperly assembled environments it might not be. This is
226 * less secure (potential TOCTOU), so should only be used after consideration. */
227
228 if (fstat(fd, &st) < 0)
229 return -errno;
230
231 do_chown =
232 (uid != UID_INVALID && st.st_uid != uid) ||
233 (gid != GID_INVALID && st.st_gid != gid);
234
235 do_chmod =
236 !S_ISLNK(st.st_mode) && /* chmod is not defined on symlinks */
237 ((mode != MODE_INVALID && ((st.st_mode ^ mode) & 07777) != 0) ||
238 do_chown); /* If we change ownership, make sure we reset the mode afterwards, since chown()
239 * modifies the access mode too */
240
241 if (mode == MODE_INVALID)
242 mode = st.st_mode; /* If we only shall do a chown(), save original mode, since chown() might break it. */
243 else if ((mode & S_IFMT) != 0 && ((mode ^ st.st_mode) & S_IFMT) != 0)
244 return -EINVAL; /* insist on the right file type if it was specified */
245
246 if (do_chown && do_chmod) {
247 mode_t minimal = st.st_mode & mode; /* the subset of the old and the new mask */
248
249 if (((minimal ^ st.st_mode) & 07777) != 0) {
250 r = fchmod_opath(fd, minimal & 07777);
251 if (r < 0) {
252 if (!path || r != -ENOSYS)
253 return r;
254
255 /* Fallback path which doesn't use /proc/self/fd/. */
256 if (chmod(path, minimal & 07777) < 0)
257 return -errno;
258 }
259 }
260 }
261
262 if (do_chown)
263 if (fchownat(fd, "", uid, gid, AT_EMPTY_PATH) < 0)
264 return -errno;
265
266 if (do_chmod) {
267 r = fchmod_opath(fd, mode & 07777);
268 if (r < 0) {
269 if (!path || r != -ENOSYS)
270 return r;
271
272 /* Fallback path which doesn't use /proc/self/fd/. */
273 if (chmod(path, mode & 07777) < 0)
274 return -errno;
275 }
276 }
277
278 return do_chown || do_chmod;
279}
280
281int fchmod_umask(int fd, mode_t m) {
282 _cleanup_umask_ mode_t u = umask(0777);
283
284 return RET_NERRNO(fchmod(fd, m & (~u)));
285}
286
287int fchmod_opath(int fd, mode_t m) {
288 /* This function operates also on fd that might have been opened with
289 * O_PATH. The tool set we have is non-intuitive:
290 * - fchmod(2) only operates on open files (i. e., fds with an open file description);
291 * - fchmodat(2) does not have a flag arg like fchownat(2) does, so no way to pass AT_EMPTY_PATH;
292 * + it should not be confused with the libc fchmodat(3) interface, which adds 4th flag argument,
293 * but does not support AT_EMPTY_PATH (only supports AT_SYMLINK_NOFOLLOW);
294 * - fchmodat2(2) supports all the AT_* flags, but is still very recent.
295 *
296 * We try to use fchmodat2(), and, if it is not supported, resort
297 * to the /proc/self/fd dance. */
298
299 assert(fd >= 0);
300
301 if (fchmodat2(fd, "", m, AT_EMPTY_PATH) >= 0)
302 return 0;
303 if (!IN_SET(errno, ENOSYS, EPERM)) /* Some container managers block unknown syscalls with EPERM */
304 return -errno;
305
306 if (chmod(FORMAT_PROC_FD_PATH(fd), m) < 0) {
307 if (errno != ENOENT)
308 return -errno;
309
310 return proc_fd_enoent_errno();
311 }
312
313 return 0;
314}
315
316int futimens_opath(int fd, const struct timespec ts[2]) {
317 /* Similar to fchmod_opath() but for futimens() */
318
319 assert(fd >= 0);
320
321 if (utimensat(fd, "", ts, AT_EMPTY_PATH) >= 0)
322 return 0;
323 if (errno != EINVAL)
324 return -errno;
325
326 /* Support for AT_EMPTY_PATH is added rather late (kernel 5.8), so fall back to going through /proc/
327 * if unavailable. */
328
329 if (utimensat(AT_FDCWD, FORMAT_PROC_FD_PATH(fd), ts, /* flags = */ 0) < 0) {
330 if (errno != ENOENT)
331 return -errno;
332
333 return proc_fd_enoent_errno();
334 }
335
336 return 0;
337}
338
339int stat_warn_permissions(const char *path, const struct stat *st) {
340 assert(path);
341 assert(st);
342
343 /* Don't complain if we are reading something that is not a file, for example /dev/null */
344 if (!S_ISREG(st->st_mode))
345 return 0;
346
347 if (st->st_mode & 0111)
348 log_warning("Configuration file %s is marked executable. Please remove executable permission bits. Proceeding anyway.", path);
349
350 if (st->st_mode & 0002)
351 log_warning("Configuration file %s is marked world-writable. Please remove world writability permission bits. Proceeding anyway.", path);
352
353 if (getpid_cached() == 1 && (st->st_mode & 0044) != 0044)
354 log_warning("Configuration file %s is marked world-inaccessible. This has no effect as configuration data is accessible via APIs without restrictions. Proceeding anyway.", path);
355
356 return 0;
357}
358
359int fd_warn_permissions(const char *path, int fd) {
360 struct stat st;
361
362 assert(path);
363 assert(fd >= 0);
364
365 if (fstat(fd, &st) < 0)
366 return -errno;
367
368 return stat_warn_permissions(path, &st);
369}
370
371int access_nofollow(const char *path, int mode) {
372 return RET_NERRNO(faccessat(AT_FDCWD, path, mode, AT_SYMLINK_NOFOLLOW));
373}
374
375int touch_fd(int fd, usec_t stamp) {
376 assert(fd >= 0);
377
378 if (stamp == USEC_INFINITY)
379 return futimens_opath(fd, /* ts= */ NULL);
380
381 struct timespec ts[2];
382 timespec_store(ts + 0, stamp);
383 ts[1] = ts[0];
384 return futimens_opath(fd, ts);
385}
386
387int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode) {
388 _cleanup_close_ int fd = -EBADF;
389 int ret;
390
391 assert(path);
392
393 /* Note that touch_file() does not follow symlinks: if invoked on an existing symlink, then it is the symlink
394 * itself which is updated, not its target
395 *
396 * Returns the first error we encounter, but tries to apply as much as possible. */
397
398 if (parents)
399 (void) mkdir_parents(path, 0755);
400
401 /* Initially, we try to open the node with O_PATH, so that we get a reference to the node. This is useful in
402 * case the path refers to an existing device or socket node, as we can open it successfully in all cases, and
403 * won't trigger any driver magic or so. */
404 fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW);
405 if (fd < 0) {
406 if (errno != ENOENT)
407 return -errno;
408
409 /* if the node doesn't exist yet, we create it, but with O_EXCL, so that we only create a regular file
410 * here, and nothing else */
411 fd = open(path, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, IN_SET(mode, 0, MODE_INVALID) ? 0644 : mode);
412 if (fd < 0)
413 return -errno;
414 }
415
416 /* Let's make a path from the fd, and operate on that. With this logic, we can adjust the access mode,
417 * ownership and time of the file node in all cases, even if the fd refers to an O_PATH object — which is
418 * something fchown(), fchmod(), futimensat() don't allow. */
419 ret = fchmod_and_chown(fd, mode, uid, gid);
420
421 return RET_GATHER(ret, touch_fd(fd, stamp));
422}
423
424int touch(const char *path) {
425 return touch_file(path, false, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
426}
427
428int symlinkat_idempotent(const char *from, int atfd, const char *to, bool make_relative) {
429 _cleanup_free_ char *relpath = NULL;
430 int r;
431
432 assert(from);
433 assert(to);
434
435 if (make_relative) {
436 r = path_make_relative_parent(to, from, &relpath);
437 if (r < 0)
438 return r;
439
440 from = relpath;
441 }
442
443 if (symlinkat(from, atfd, to) < 0) {
444 _cleanup_free_ char *p = NULL;
445
446 if (errno != EEXIST)
447 return -errno;
448
449 r = readlinkat_malloc(atfd, to, &p);
450 if (r == -EINVAL) /* Not a symlink? In that case return the original error we encountered: -EEXIST */
451 return -EEXIST;
452 if (r < 0) /* Any other error? In that case propagate it as is */
453 return r;
454
455 if (!streq(p, from)) /* Not the symlink we want it to be? In that case, propagate the original -EEXIST */
456 return -EEXIST;
457 }
458
459 return 0;
460}
461
462int symlinkat_atomic_full(const char *from, int atfd, const char *to, SymlinkFlags flags) {
463 int r;
464
465 assert(from);
466 assert(to);
467
468 _cleanup_free_ char *relpath = NULL;
469 if (FLAGS_SET(flags, SYMLINK_MAKE_RELATIVE)) {
470 r = path_make_relative_parent(to, from, &relpath);
471 if (r < 0)
472 return r;
473
474 from = relpath;
475 }
476
477 _cleanup_free_ char *t = NULL;
478 r = tempfn_random(to, NULL, &t);
479 if (r < 0)
480 return r;
481
482 bool call_label_ops_post = false;
483 if (FLAGS_SET(flags, SYMLINK_LABEL)) {
484 r = label_ops_pre(atfd, to, S_IFLNK);
485 if (r < 0)
486 return r;
487
488 call_label_ops_post = true;
489 }
490
491 r = RET_NERRNO(symlinkat(from, atfd, t));
492 if (call_label_ops_post)
493 RET_GATHER(r, label_ops_post(atfd, t, /* created= */ r >= 0));
494 if (r < 0)
495 return r;
496
497 r = RET_NERRNO(renameat(atfd, t, atfd, to));
498 if (r < 0) {
499 (void) unlinkat(atfd, t, 0);
500 return r;
501 }
502
503 return 0;
504}
505
506int mknodat_atomic(int atfd, const char *path, mode_t mode, dev_t dev) {
507 _cleanup_free_ char *t = NULL;
508 int r;
509
510 assert(path);
511
512 r = tempfn_random(path, NULL, &t);
513 if (r < 0)
514 return r;
515
516 if (mknodat(atfd, t, mode, dev) < 0)
517 return -errno;
518
519 r = RET_NERRNO(renameat(atfd, t, atfd, path));
520 if (r < 0) {
521 (void) unlinkat(atfd, t, 0);
522 return r;
523 }
524
525 return 0;
526}
527
528int mkfifoat_atomic(int atfd, const char *path, mode_t mode) {
529 _cleanup_free_ char *t = NULL;
530 int r;
531
532 assert(path);
533
534 /* We're only interested in the (random) filename. */
535 r = tempfn_random(path, NULL, &t);
536 if (r < 0)
537 return r;
538
539 if (mkfifoat(atfd, t, mode) < 0)
540 return -errno;
541
542 r = RET_NERRNO(renameat(atfd, t, atfd, path));
543 if (r < 0) {
544 (void) unlinkat(atfd, t, 0);
545 return r;
546 }
547
548 return 0;
549}
550
551int get_files_in_directory(const char *path, char ***ret_list) {
552 _cleanup_strv_free_ char **l = NULL;
553 _cleanup_closedir_ DIR *d = NULL;
554 size_t n = 0;
555
556 assert(path);
557
558 /* Returns all files in a directory in *list, and the number
559 * of files as return value. If list is NULL returns only the
560 * number. */
561
562 d = opendir(path);
563 if (!d)
564 return -errno;
565
566 FOREACH_DIRENT_ALL(de, d, return -errno) {
567 if (!dirent_is_file(de))
568 continue;
569
570 if (ret_list) {
571 /* one extra slot is needed for the terminating NULL */
572 if (!GREEDY_REALLOC(l, n + 2))
573 return -ENOMEM;
574
575 l[n] = strdup(de->d_name);
576 if (!l[n])
577 return -ENOMEM;
578
579 l[++n] = NULL;
580 } else
581 n++;
582 }
583
584 if (ret_list)
585 *ret_list = TAKE_PTR(l);
586
587 return n;
588}
589
590static int getenv_tmp_dir(const char **ret_path) {
591 int r, ret = 0;
592
593 assert(ret_path);
594
595 /* We use the same order of environment variables python uses in tempfile.gettempdir():
596 * https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir */
597 FOREACH_STRING(n, "TMPDIR", "TEMP", "TMP") {
598 const char *e;
599
600 e = secure_getenv(n);
601 if (!e)
602 continue;
603 if (!path_is_absolute(e)) {
604 r = -ENOTDIR;
605 goto next;
606 }
607 if (!path_is_normalized(e)) {
608 r = -EPERM;
609 goto next;
610 }
611
612 r = is_dir(e, true);
613 if (r < 0)
614 goto next;
615 if (r == 0) {
616 r = -ENOTDIR;
617 goto next;
618 }
619
620 *ret_path = e;
621 return 1;
622
623 next:
624 /* Remember first error, to make this more debuggable */
625 if (ret >= 0)
626 ret = r;
627 }
628
629 if (ret < 0)
630 return ret;
631
632 *ret_path = NULL;
633 return ret;
634}
635
636static int tmp_dir_internal(const char *def, const char **ret) {
637 const char *e;
638 int r, k;
639
640 assert(def);
641 assert(ret);
642
643 r = getenv_tmp_dir(&e);
644 if (r > 0) {
645 *ret = e;
646 return 0;
647 }
648
649 k = is_dir(def, /* follow = */ true);
650 if (k == 0)
651 k = -ENOTDIR;
652 if (k < 0)
653 return RET_GATHER(r, k);
654
655 *ret = def;
656 return 0;
657}
658
659int var_tmp_dir(const char **ret) {
660 assert(ret);
661
662 /* Returns the location for "larger" temporary files, that is backed by physical storage if available, and thus
663 * even might survive a boot: /var/tmp. If $TMPDIR (or related environment variables) are set, its value is
664 * returned preferably however. Note that both this function and tmp_dir() below are affected by $TMPDIR,
665 * making it a variable that overrides all temporary file storage locations. */
666
667 return tmp_dir_internal("/var/tmp", ret);
668}
669
670int tmp_dir(const char **ret) {
671 assert(ret);
672
673 /* Similar to var_tmp_dir() above, but returns the location for "smaller" temporary files, which is usually
674 * backed by an in-memory file system: /tmp. */
675
676 return tmp_dir_internal("/tmp", ret);
677}
678
679int unlink_or_warn(const char *filename) {
680 assert(filename);
681
682 if (unlink(filename) < 0 && errno != ENOENT)
683 /* If the file doesn't exist and the fs simply was read-only (in which
684 * case unlink() returns EROFS even if the file doesn't exist), don't
685 * complain */
686 if (errno != EROFS || access(filename, F_OK) >= 0)
687 return log_error_errno(errno, "Failed to remove \"%s\": %m", filename);
688
689 return 0;
690}
691
692char *rmdir_and_free(char *p) {
693 PROTECT_ERRNO;
694
695 if (!p)
696 return NULL;
697
698 (void) rmdir(p);
699 return mfree(p);
700}
701
702char* unlink_and_free(char *p) {
703 PROTECT_ERRNO;
704
705 if (!p)
706 return NULL;
707
708 (void) unlink(p);
709 return mfree(p);
710}
711
712int access_fd(int fd, int mode) {
713 assert(fd >= 0);
714
715 /* Like access() but operates on an already open fd */
716
717 if (faccessat(fd, "", mode, AT_EMPTY_PATH) >= 0)
718 return 0;
719 if (errno != EINVAL)
720 return -errno;
721
722 /* Support for AT_EMPTY_PATH is added rather late (kernel 5.8), so fall back to going through /proc/
723 * if unavailable. */
724
725 if (access(FORMAT_PROC_FD_PATH(fd), mode) < 0) {
726 if (errno != ENOENT)
727 return -errno;
728
729 return proc_fd_enoent_errno();
730 }
731
732 return 0;
733}
734
735int unlinkat_deallocate(int fd, const char *name, UnlinkDeallocateFlags flags) {
736 _cleanup_close_ int truncate_fd = -EBADF;
737 struct stat st;
738 off_t l, bs;
739
740 assert(fd >= 0 || fd == AT_FDCWD);
741 assert(name);
742 assert((flags & ~(UNLINK_REMOVEDIR|UNLINK_ERASE)) == 0);
743
744 /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other
745 * link to it. This is useful to ensure that other processes that might have the file open for reading won't be
746 * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up
747 * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and
748 * returned to the free pool.
749 *
750 * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means
751 * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other
752 * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes
753 * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.)
754 * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file
755 * truncation (🔪), as our goal of deallocating the data space trumps our goal of being nice to readers (💐).
756 *
757 * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the
758 * primary job – to delete the file – is accomplished. */
759
760 if (!FLAGS_SET(flags, UNLINK_REMOVEDIR)) {
761 truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
762 if (truncate_fd < 0) {
763
764 /* If this failed because the file doesn't exist propagate the error right-away. Also,
765 * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is
766 * returned when this is a directory but we are not supposed to delete those, hence propagate
767 * the error right-away too. */
768 if (IN_SET(errno, ENOENT, EISDIR))
769 return -errno;
770
771 if (errno != ELOOP) /* don't complain if this is a symlink */
772 log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name);
773 }
774 }
775
776 if (unlinkat(fd, name, FLAGS_SET(flags, UNLINK_REMOVEDIR) ? AT_REMOVEDIR : 0) < 0)
777 return -errno;
778
779 if (truncate_fd < 0) /* Don't have a file handle, can't do more ☹️ */
780 return 0;
781
782 if (fstat(truncate_fd, &st) < 0) {
783 log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name);
784 return 0;
785 }
786
787 if (!S_ISREG(st.st_mode))
788 return 0;
789
790 if (FLAGS_SET(flags, UNLINK_ERASE) && st.st_size > 0 && st.st_nlink == 0) {
791 uint64_t left = st.st_size;
792 char buffer[64 * 1024];
793
794 /* If erasing is requested, let's overwrite the file with random data once before deleting
795 * it. This isn't going to give you shred(1) semantics, but hopefully should be good enough
796 * for stuff backed by tmpfs at least.
797 *
798 * Note that we only erase like this if the link count of the file is zero. If it is higher it
799 * is still linked by someone else and we'll leave it to them to remove it securely
800 * eventually! */
801
802 random_bytes(buffer, sizeof(buffer));
803
804 while (left > 0) {
805 ssize_t n;
806
807 n = write(truncate_fd, buffer, MIN(sizeof(buffer), left));
808 if (n < 0) {
809 log_debug_errno(errno, "Failed to erase data in file '%s', ignoring.", name);
810 break;
811 }
812
813 assert(left >= (size_t) n);
814 left -= n;
815 }
816
817 /* Let's refresh metadata */
818 if (fstat(truncate_fd, &st) < 0) {
819 log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name);
820 return 0;
821 }
822 }
823
824 /* Don't deallocate if there's nothing to deallocate or if the file is linked elsewhere */
825 if (st.st_blocks == 0 || st.st_nlink > 0)
826 return 0;
827
828 /* If this is a regular file, it actually took up space on disk and there are no other links it's time to
829 * punch-hole/truncate this to release the disk space. */
830
831 bs = MAX(st.st_blksize, 512);
832 l = ROUND_UP(st.st_size, bs); /* Round up to next block size */
833
834 if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0)
835 return 0; /* Successfully punched a hole! 😊 */
836
837 /* Fall back to truncation */
838 if (ftruncate(truncate_fd, 0) < 0) {
839 log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m");
840 return 0;
841 }
842
843 return 0;
844}
845
846int open_parent_at(int dir_fd, const char *path, int flags, mode_t mode) {
847 _cleanup_free_ char *parent = NULL;
848 int r;
849
850 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
851 assert(path);
852
853 r = path_extract_directory(path, &parent);
854 if (r == -EDESTADDRREQ) {
855 parent = strdup(".");
856 if (!parent)
857 return -ENOMEM;
858 } else if (r == -EADDRNOTAVAIL) {
859 parent = strdup(path);
860 if (!parent)
861 return -ENOMEM;
862 } else if (r < 0)
863 return r;
864
865 /* Let's insist on O_DIRECTORY since the parent of a file or directory is a directory. Except if we open an
866 * O_TMPFILE file, because in that case we are actually create a regular file below the parent directory. */
867
868 if (FLAGS_SET(flags, O_PATH))
869 flags |= O_DIRECTORY;
870 else if (!FLAGS_SET(flags, O_TMPFILE))
871 flags |= O_DIRECTORY|O_RDONLY;
872
873 return RET_NERRNO(openat(dir_fd, parent, flags, mode));
874}
875
876int conservative_renameat(
877 int olddirfd, const char *oldpath,
878 int newdirfd, const char *newpath) {
879
880 _cleanup_close_ int old_fd = -EBADF, new_fd = -EBADF;
881 struct stat old_stat, new_stat;
882
883 /* Renames the old path to the new path, much like renameat() — except if both are regular files and
884 * have the exact same contents and basic file attributes already. In that case remove the new file
885 * instead. This call is useful for reducing inotify wakeups on files that are updated but don't
886 * actually change. This function is written in a style that we rather rename too often than suppress
887 * too much. I.e. whenever we are in doubt, we rather rename than fail. After all reducing inotify
888 * events is an optimization only, not more. */
889
890 old_fd = openat(olddirfd, oldpath, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_NOFOLLOW);
891 if (old_fd < 0)
892 goto do_rename;
893
894 new_fd = openat(newdirfd, newpath, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_NOFOLLOW);
895 if (new_fd < 0)
896 goto do_rename;
897
898 if (fstat(old_fd, &old_stat) < 0)
899 goto do_rename;
900
901 if (!S_ISREG(old_stat.st_mode))
902 goto do_rename;
903
904 if (fstat(new_fd, &new_stat) < 0)
905 goto do_rename;
906
907 if (stat_inode_same(&new_stat, &old_stat))
908 goto is_same;
909
910 if (old_stat.st_mode != new_stat.st_mode ||
911 old_stat.st_size != new_stat.st_size ||
912 old_stat.st_uid != new_stat.st_uid ||
913 old_stat.st_gid != new_stat.st_gid)
914 goto do_rename;
915
916 for (;;) {
917 uint8_t buf1[16*1024];
918 uint8_t buf2[sizeof(buf1)];
919 ssize_t l1, l2;
920
921 l1 = read(old_fd, buf1, sizeof(buf1));
922 if (l1 < 0)
923 goto do_rename;
924
925 if (l1 == sizeof(buf1))
926 /* Read the full block, hence read a full block in the other file too */
927
928 l2 = read(new_fd, buf2, l1);
929 else {
930 assert((size_t) l1 < sizeof(buf1));
931
932 /* Short read. This hence was the last block in the first file, and then came
933 * EOF. Read one byte more in the second file, so that we can verify we hit EOF there
934 * too. */
935
936 assert((size_t) (l1 + 1) <= sizeof(buf2));
937 l2 = read(new_fd, buf2, l1 + 1);
938 }
939 if (l2 != l1)
940 goto do_rename;
941
942 if (memcmp(buf1, buf2, l1) != 0)
943 goto do_rename;
944
945 if ((size_t) l1 < sizeof(buf1)) /* We hit EOF on the first file, and the second file too, hence exit
946 * now. */
947 break;
948 }
949
950is_same:
951 /* Everything matches? Then don't rename, instead remove the source file, and leave the existing
952 * destination in place */
953
954 if (unlinkat(olddirfd, oldpath, 0) < 0)
955 goto do_rename;
956
957 return 0;
958
959do_rename:
960 if (renameat(olddirfd, oldpath, newdirfd, newpath) < 0)
961 return -errno;
962
963 return 1;
964}
965
966int posix_fallocate_loop(int fd, uint64_t offset, uint64_t size) {
967 RateLimit rl;
968 int r;
969
970 r = posix_fallocate(fd, offset, size); /* returns positive errnos on error */
971 if (r != EINTR)
972 return -r; /* Let's return negative errnos, like common in our codebase */
973
974 /* On EINTR try a couple of times more, but protect against busy looping
975 * (not more than 16 times per 10s) */
976 rl = (const RateLimit) { 10 * USEC_PER_SEC, 16 };
977 while (ratelimit_below(&rl)) {
978 r = posix_fallocate(fd, offset, size);
979 if (r != EINTR)
980 return -r;
981 }
982
983 return -EINTR;
984}
985
986int parse_cifs_service(
987 const char *s,
988 char **ret_host,
989 char **ret_service,
990 char **ret_path) {
991
992 _cleanup_free_ char *h = NULL, *ss = NULL, *x = NULL;
993 const char *p, *e, *d;
994 char delimiter;
995
996 /* Parses a CIFS service in form of //host/service/path… and splitting it in three parts. The last
997 * part is optional, in which case NULL is returned there. To maximize compatibility syntax with
998 * backslashes instead of slashes is accepted too. */
999
1000 if (!s)
1001 return -EINVAL;
1002
1003 p = startswith(s, "//");
1004 if (!p) {
1005 p = startswith(s, "\\\\");
1006 if (!p)
1007 return -EINVAL;
1008 }
1009
1010 delimiter = s[0];
1011 e = strchr(p, delimiter);
1012 if (!e)
1013 return -EINVAL;
1014
1015 h = strndup(p, e - p);
1016 if (!h)
1017 return -ENOMEM;
1018
1019 if (!hostname_is_valid(h, 0))
1020 return -EINVAL;
1021
1022 e++;
1023
1024 d = strchrnul(e, delimiter);
1025
1026 ss = strndup(e, d - e);
1027 if (!ss)
1028 return -ENOMEM;
1029
1030 if (!filename_is_valid(ss))
1031 return -EINVAL;
1032
1033 if (!isempty(d)) {
1034 x = strdup(skip_leading_chars(d, CHAR_TO_STR(delimiter)));
1035 if (!x)
1036 return -EINVAL;
1037
1038 /* Make sure to convert Windows-style "\" → Unix-style / */
1039 for (char *i = x; *i; i++)
1040 if (*i == delimiter)
1041 *i = '/';
1042
1043 if (!path_is_valid(x))
1044 return -EINVAL;
1045
1046 path_simplify(x);
1047 if (!path_is_normalized(x))
1048 return -EINVAL;
1049 }
1050
1051 if (ret_host)
1052 *ret_host = TAKE_PTR(h);
1053 if (ret_service)
1054 *ret_service = TAKE_PTR(ss);
1055 if (ret_path)
1056 *ret_path = TAKE_PTR(x);
1057
1058 return 0;
1059}
1060
1061int open_mkdir_at_full(int dirfd, const char *path, int flags, XOpenFlags xopen_flags, mode_t mode) {
1062 _cleanup_close_ int fd = -EBADF, parent_fd = -EBADF;
1063 _cleanup_free_ char *fname = NULL, *parent = NULL;
1064 int r;
1065
1066 /* Creates a directory with mkdirat() and then opens it, in the "most atomic" fashion we can
1067 * do. Guarantees that the returned fd refers to a directory. If O_EXCL is specified will fail if the
1068 * dir already exists. Otherwise will open an existing dir, but only if it is one. */
1069
1070 if (flags & ~(O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_EXCL|O_NOATIME|O_NOFOLLOW|O_PATH))
1071 return -EINVAL;
1072 if ((flags & O_ACCMODE_STRICT) != O_RDONLY)
1073 return -EINVAL;
1074
1075 /* Note that O_DIRECTORY|O_NOFOLLOW is implied, but we allow specifying it anyway. The following
1076 * flags actually make sense to specify: O_CLOEXEC, O_EXCL, O_NOATIME, O_PATH */
1077
1078 /* If this is not a valid filename, it's a path. Let's open the parent directory then, so
1079 * that we can pin it, and operate below it. */
1080 r = path_extract_directory(path, &parent);
1081 if (r < 0) {
1082 if (!IN_SET(r, -EDESTADDRREQ, -EADDRNOTAVAIL))
1083 return r;
1084 } else {
1085 r = path_extract_filename(path, &fname);
1086 if (r < 0)
1087 return r;
1088
1089 parent_fd = openat(dirfd, parent, O_PATH|O_DIRECTORY|O_CLOEXEC);
1090 if (parent_fd < 0)
1091 return -errno;
1092
1093 dirfd = parent_fd;
1094 path = fname;
1095 }
1096
1097 fd = xopenat_full(dirfd, path, flags|O_CREAT|O_DIRECTORY|O_NOFOLLOW, xopen_flags, mode);
1098 if (IN_SET(fd, -ELOOP, -ENOTDIR))
1099 return -EEXIST;
1100 if (fd < 0)
1101 return fd;
1102
1103 return TAKE_FD(fd);
1104}
1105
1106int openat_report_new(int dirfd, const char *pathname, int flags, mode_t mode, bool *ret_newly_created) {
1107 int fd;
1108
1109 /* Just like openat(), but adds one thing: optionally returns whether we created the file anew or if
1110 * it already existed before. This is only relevant if O_CREAT is set without O_EXCL, and thus will
1111 * shortcut to openat() otherwise.
1112 *
1113 * Note that this routine is a bit more strict with symlinks than regular openat() is. If O_NOFOLLOW
1114 * is not specified, then we'll follow the symlink when opening an existing file but we will *not*
1115 * follow it when creating a new one (because that's a terrible UNIX misfeature and generally a
1116 * security hole). */
1117
1118 if (!FLAGS_SET(flags, O_CREAT) || FLAGS_SET(flags, O_EXCL)) {
1119 fd = openat(dirfd, pathname, flags, mode);
1120 if (fd < 0)
1121 return -errno;
1122
1123 if (ret_newly_created)
1124 *ret_newly_created = FLAGS_SET(flags, O_CREAT);
1125 return fd;
1126 }
1127
1128 for (unsigned attempts = 7;;) {
1129 /* First, attempt to open without O_CREAT/O_EXCL, i.e. open existing file */
1130 fd = openat(dirfd, pathname, flags & ~(O_CREAT | O_EXCL), mode);
1131 if (fd >= 0) {
1132 if (ret_newly_created)
1133 *ret_newly_created = false;
1134 return fd;
1135 }
1136 if (errno != ENOENT)
1137 return -errno;
1138
1139 /* So the file didn't exist yet, hence create it with O_CREAT/O_EXCL/O_NOFOLLOW. */
1140 fd = openat(dirfd, pathname, flags | O_CREAT | O_EXCL | O_NOFOLLOW, mode);
1141 if (fd >= 0) {
1142 if (ret_newly_created)
1143 *ret_newly_created = true;
1144 return fd;
1145 }
1146 if (errno != EEXIST)
1147 return -errno;
1148
1149 /* Hmm, so now we got EEXIST? Then someone might have created the file between the first and
1150 * second call to openat(). Let's try again but with a limit so we don't spin forever. */
1151
1152 if (--attempts == 0) /* Give up eventually, somebody is playing with us */
1153 return -EEXIST;
1154 }
1155}
1156
1157int xopenat_full(int dir_fd, const char *path, int open_flags, XOpenFlags xopen_flags, mode_t mode) {
1158 _cleanup_close_ int fd = -EBADF;
1159 bool made_dir = false, made_file = false;
1160 int r;
1161
1162 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
1163
1164 /* An inode cannot be both a directory and a regular file at the same time. */
1165 assert(!(FLAGS_SET(open_flags, O_DIRECTORY) && FLAGS_SET(xopen_flags, XO_REGULAR)));
1166
1167 /* This is like openat(), but has a few tricks up its sleeves, extending behaviour:
1168 *
1169 * • O_DIRECTORY|O_CREAT is supported, which causes a directory to be created, and immediately
1170 * opened. When used with the XO_SUBVOLUME flag this will even create a btrfs subvolume.
1171 *
1172 * • If O_CREAT is used with XO_LABEL, any created file will be immediately relabelled.
1173 *
1174 * • If the path is specified NULL or empty, behaves like fd_reopen().
1175 *
1176 * • If XO_NOCOW is specified will turn on the NOCOW btrfs flag on the file, if available.
1177 *
1178 * • if XO_REGULAR is specified will return an error if inode is not a regular file.
1179 *
1180 * • If mode is specified as MODE_INVALID, we'll use 0755 for dirs, and 0644 for regular files.
1181 */
1182
1183 if (mode == MODE_INVALID)
1184 mode = (open_flags & O_DIRECTORY) ? 0755 : 0644;
1185
1186 if (isempty(path)) {
1187 assert(!FLAGS_SET(open_flags, O_CREAT|O_EXCL));
1188
1189 if (FLAGS_SET(xopen_flags, XO_REGULAR)) {
1190 r = fd_verify_regular(dir_fd);
1191 if (r < 0)
1192 return r;
1193 }
1194
1195 return fd_reopen(dir_fd, open_flags & ~O_NOFOLLOW);
1196 }
1197
1198 bool call_label_ops_post = false;
1199
1200 if (FLAGS_SET(open_flags, O_CREAT) && FLAGS_SET(xopen_flags, XO_LABEL)) {
1201 r = label_ops_pre(dir_fd, path, FLAGS_SET(open_flags, O_DIRECTORY) ? S_IFDIR : S_IFREG);
1202 if (r < 0)
1203 return r;
1204
1205 call_label_ops_post = true;
1206 }
1207
1208 if (FLAGS_SET(open_flags, O_DIRECTORY|O_CREAT)) {
1209 if (FLAGS_SET(xopen_flags, XO_SUBVOLUME))
1210 r = btrfs_subvol_make_fallback(dir_fd, path, mode);
1211 else
1212 r = RET_NERRNO(mkdirat(dir_fd, path, mode));
1213 if (r == -EEXIST) {
1214 if (FLAGS_SET(open_flags, O_EXCL))
1215 return -EEXIST;
1216 } else if (r < 0)
1217 return r;
1218 else
1219 made_dir = true;
1220
1221 open_flags &= ~(O_EXCL|O_CREAT);
1222 }
1223
1224 if (FLAGS_SET(xopen_flags, XO_REGULAR)) {
1225 /* Guarantee we return a regular fd only, and don't open the file unless we verified it
1226 * first */
1227
1228 if (FLAGS_SET(open_flags, O_PATH)) {
1229 fd = openat(dir_fd, path, open_flags, mode);
1230 if (fd < 0) {
1231 r = -errno;
1232 goto error;
1233 }
1234
1235 r = fd_verify_regular(fd);
1236 if (r < 0)
1237 goto error;
1238
1239 } else if (FLAGS_SET(open_flags, O_CREAT|O_EXCL)) {
1240 /* In O_EXCL mode we can just create the thing, everything is dealt with for us */
1241 fd = openat(dir_fd, path, open_flags, mode);
1242 if (fd < 0) {
1243 r = -errno;
1244 goto error;
1245 }
1246
1247 made_file = true;
1248 } else {
1249 /* Otherwise pin the inode first via O_PATH */
1250 _cleanup_close_ int inode_fd = openat(dir_fd, path, O_PATH|O_CLOEXEC|(open_flags & O_NOFOLLOW));
1251 if (inode_fd < 0) {
1252 if (errno != ENOENT || !FLAGS_SET(open_flags, O_CREAT)) {
1253 r = -errno;
1254 goto error;
1255 }
1256
1257 /* Doesn't exist yet, then try to create it */
1258 fd = openat(dir_fd, path, open_flags|O_CREAT|O_EXCL, mode);
1259 if (fd < 0) {
1260 r = -errno;
1261 goto error;
1262 }
1263
1264 made_file = true;
1265 } else {
1266 /* OK, we pinned it. Now verify it's actually a regular file, and then reopen it */
1267 r = fd_verify_regular(inode_fd);
1268 if (r < 0)
1269 goto error;
1270
1271 fd = fd_reopen(inode_fd, open_flags & ~(O_NOFOLLOW|O_CREAT));
1272 if (fd < 0) {
1273 r = fd;
1274 goto error;
1275 }
1276 }
1277 }
1278 } else {
1279 fd = openat_report_new(dir_fd, path, open_flags, mode, &made_file);
1280 if (fd < 0) {
1281 r = fd;
1282 goto error;
1283 }
1284 }
1285
1286 if (call_label_ops_post) {
1287 call_label_ops_post = false;
1288
1289 r = label_ops_post(fd, /* path= */ NULL, made_file || made_dir);
1290 if (r < 0)
1291 goto error;
1292 }
1293
1294 if (FLAGS_SET(xopen_flags, XO_NOCOW)) {
1295 r = chattr_fd(fd, FS_NOCOW_FL, FS_NOCOW_FL);
1296 if (r < 0 && !ERRNO_IS_IOCTL_NOT_SUPPORTED(r))
1297 goto error;
1298 }
1299
1300 return TAKE_FD(fd);
1301
1302error:
1303 if (call_label_ops_post)
1304 (void) label_ops_post(fd >= 0 ? fd : dir_fd, fd >= 0 ? NULL : path, made_dir || made_file);
1305
1306 if (made_dir || made_file)
1307 (void) unlinkat(dir_fd, path, made_dir ? AT_REMOVEDIR : 0);
1308
1309 return r;
1310}
1311
1312int xopenat_lock_full(
1313 int dir_fd,
1314 const char *path,
1315 int open_flags,
1316 XOpenFlags xopen_flags,
1317 mode_t mode,
1318 LockType locktype,
1319 int operation) {
1320
1321 _cleanup_close_ int fd = -EBADF;
1322 int r;
1323
1324 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
1325 assert(IN_SET(operation & ~LOCK_NB, LOCK_EX, LOCK_SH));
1326
1327 /* POSIX/UNPOSIX locks don't work on directories (errno is set to -EBADF so let's return early with
1328 * the same error here). */
1329 if (FLAGS_SET(open_flags, O_DIRECTORY) && !IN_SET(locktype, LOCK_BSD, LOCK_NONE))
1330 return -EBADF;
1331
1332 for (;;) {
1333 struct stat st;
1334
1335 fd = xopenat_full(dir_fd, path, open_flags, xopen_flags, mode);
1336 if (fd < 0)
1337 return fd;
1338
1339 r = lock_generic(fd, locktype, operation);
1340 if (r < 0)
1341 return r;
1342
1343 /* If we acquired the lock, let's check if the file/directory still exists in the file
1344 * system. If not, then the previous exclusive owner removed it and then closed it. In such a
1345 * case our acquired lock is worthless, hence try again. */
1346
1347 if (fstat(fd, &st) < 0)
1348 return -errno;
1349 if (st.st_nlink > 0)
1350 break;
1351
1352 fd = safe_close(fd);
1353 }
1354
1355 return TAKE_FD(fd);
1356}
1357
1358int link_fd(int fd, int newdirfd, const char *newpath) {
1359 int r;
1360
1361 assert(fd >= 0);
1362 assert(newdirfd >= 0 || newdirfd == AT_FDCWD);
1363 assert(newpath);
1364
1365 /* Try to link via AT_EMPTY_PATH first. This fails with ENOENT if we don't have CAP_DAC_READ_SEARCH
1366 * on kernels < 6.10, in which case we'd then resort to /proc/self/fd/ dance.
1367 *
1368 * See also: https://github.com/torvalds/linux/commit/42bd2af5950456d46fdaa91c3a8fb02e680f19f5 */
1369 r = RET_NERRNO(linkat(fd, "", newdirfd, newpath, AT_EMPTY_PATH));
1370 if (r == -ENOENT) {
1371 r = RET_NERRNO(linkat(AT_FDCWD, FORMAT_PROC_FD_PATH(fd), newdirfd, newpath, AT_SYMLINK_FOLLOW));
1372 if (r == -ENOENT && proc_mounted() == 0) /* No proc_fd_enoent_errno() here because we don't
1373 know if it's the target path that's missing. */
1374 return -ENOSYS;
1375 }
1376
1377 return r;
1378}
1379
1380int linkat_replace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) {
1381 _cleanup_close_ int old_fd = -EBADF;
1382 int r;
1383
1384 assert(olddirfd >= 0 || olddirfd == AT_FDCWD);
1385 assert(newdirfd >= 0 || newdirfd == AT_FDCWD);
1386 assert(!isempty(newpath)); /* source path is optional, but the target path is not */
1387
1388 /* Like linkat() but replaces the target if needed. Is a NOP if source and target already share the
1389 * same inode. */
1390
1391 if (olddirfd == AT_FDCWD && isempty(oldpath)) /* Refuse operating on the cwd (which is a dir, and dirs can't be hardlinked) */
1392 return -EISDIR;
1393
1394 if (path_implies_directory(oldpath)) /* Refuse these definite directories early */
1395 return -EISDIR;
1396
1397 if (path_implies_directory(newpath))
1398 return -EISDIR;
1399
1400 /* First, try to link this directly */
1401 if (oldpath)
1402 r = RET_NERRNO(linkat(olddirfd, oldpath, newdirfd, newpath, 0));
1403 else
1404 r = link_fd(olddirfd, newdirfd, newpath);
1405 if (r >= 0)
1406 return 0;
1407 if (r != -EEXIST)
1408 return r;
1409
1410 old_fd = xopenat(olddirfd, oldpath, O_PATH|O_CLOEXEC);
1411 if (old_fd < 0)
1412 return old_fd;
1413
1414 struct stat old_st;
1415 if (fstat(old_fd, &old_st) < 0)
1416 return -errno;
1417
1418 if (S_ISDIR(old_st.st_mode)) /* Don't bother if we are operating on a directory */
1419 return -EISDIR;
1420
1421 struct stat new_st;
1422 if (fstatat(newdirfd, newpath, &new_st, AT_SYMLINK_NOFOLLOW) < 0)
1423 return -errno;
1424
1425 if (S_ISDIR(new_st.st_mode)) /* Refuse replacing directories */
1426 return -EEXIST;
1427
1428 if (stat_inode_same(&old_st, &new_st)) /* Already the same inode? Then shortcut this */
1429 return 0;
1430
1431 _cleanup_free_ char *tmp_path = NULL;
1432 r = tempfn_random(newpath, /* extra= */ NULL, &tmp_path);
1433 if (r < 0)
1434 return r;
1435
1436 r = link_fd(old_fd, newdirfd, tmp_path);
1437 if (r < 0) {
1438 if (!ERRNO_IS_PRIVILEGE(r))
1439 return r;
1440
1441 /* If that didn't work due to permissions then go via the path of the dentry */
1442 r = RET_NERRNO(linkat(olddirfd, oldpath, newdirfd, tmp_path, 0));
1443 if (r < 0)
1444 return r;
1445 }
1446
1447 r = RET_NERRNO(renameat(newdirfd, tmp_path, newdirfd, newpath));
1448 if (r < 0) {
1449 (void) unlinkat(newdirfd, tmp_path, /* flags= */ 0);
1450 return r;
1451 }
1452
1453 return 0;
1454}