]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/fs-util.c
capability: add new type for maintaining all five cap sets as one
[thirdparty/systemd.git] / src / basic / fs-util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
f4f15635 2
11c3a366
TA
3#include <errno.h>
4#include <stddef.h>
5#include <stdio.h>
6#include <stdlib.h>
7#include <string.h>
8#include <sys/stat.h>
655f2da0 9#include <linux/magic.h>
11c3a366
TA
10#include <time.h>
11#include <unistd.h>
12
b5efdb8a 13#include "alloc-util.h"
f4f15635
LP
14#include "dirent-util.h"
15#include "fd-util.h"
16#include "fileio.h"
17#include "fs-util.h"
11c3a366
TA
18#include "log.h"
19#include "macro.h"
20#include "missing.h"
93cc7779
TA
21#include "mkdir.h"
22#include "parse-util.h"
23#include "path-util.h"
dccca82b 24#include "process-util.h"
34a8f081 25#include "stat-util.h"
430fbf8e 26#include "stdio-util.h"
f4f15635
LP
27#include "string-util.h"
28#include "strv.h"
93cc7779 29#include "time-util.h"
ee104e11 30#include "user-util.h"
f4f15635
LP
31#include "util.h"
32
33int unlink_noerrno(const char *path) {
34 PROTECT_ERRNO;
35 int r;
36
37 r = unlink(path);
38 if (r < 0)
39 return -errno;
40
41 return 0;
42}
43
44int rmdir_parents(const char *path, const char *stop) {
45 size_t l;
46 int r = 0;
47
48 assert(path);
49 assert(stop);
50
51 l = strlen(path);
52
53 /* Skip trailing slashes */
54 while (l > 0 && path[l-1] == '/')
55 l--;
56
57 while (l > 0) {
58 char *t;
59
60 /* Skip last component */
61 while (l > 0 && path[l-1] != '/')
62 l--;
63
64 /* Skip trailing slashes */
65 while (l > 0 && path[l-1] == '/')
66 l--;
67
68 if (l <= 0)
69 break;
70
71 t = strndup(path, l);
72 if (!t)
73 return -ENOMEM;
74
75 if (path_startswith(stop, t)) {
76 free(t);
77 return 0;
78 }
79
80 r = rmdir(t);
81 free(t);
82
83 if (r < 0)
84 if (errno != ENOENT)
85 return -errno;
86 }
87
88 return 0;
89}
90
f4f15635 91int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) {
2f15b625 92 int r;
f4f15635 93
2f15b625
LP
94 /* Try the ideal approach first */
95 if (renameat2(olddirfd, oldpath, newdirfd, newpath, RENAME_NOREPLACE) >= 0)
f4f15635
LP
96 return 0;
97
2f15b625
LP
98 /* renameat2() exists since Linux 3.15, btrfs and FAT added support for it later. If it is not implemented,
99 * fall back to a different method. */
100 if (!IN_SET(errno, EINVAL, ENOSYS, ENOTTY))
f4f15635
LP
101 return -errno;
102
2f15b625
LP
103 /* Let's try to use linkat()+unlinkat() as fallback. This doesn't work on directories and on some file systems
104 * that do not support hard links (such as FAT, most prominently), but for files it's pretty close to what we
105 * want — though not atomic (i.e. for a short period both the new and the old filename will exist). */
106 if (linkat(olddirfd, oldpath, newdirfd, newpath, 0) >= 0) {
107
108 if (unlinkat(olddirfd, oldpath, 0) < 0) {
109 r = -errno; /* Backup errno before the following unlinkat() alters it */
110 (void) unlinkat(newdirfd, newpath, 0);
111 return r;
112 }
113
114 return 0;
f4f15635
LP
115 }
116
2f15b625 117 if (!IN_SET(errno, EINVAL, ENOSYS, ENOTTY, EPERM)) /* FAT returns EPERM on link()… */
f4f15635
LP
118 return -errno;
119
2f15b625
LP
120 /* OK, neither RENAME_NOREPLACE nor linkat()+unlinkat() worked. Let's then fallback to the racy TOCTOU
121 * vulnerable accessat(F_OK) check followed by classic, replacing renameat(), we have nothing better. */
122
123 if (faccessat(newdirfd, newpath, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
124 return -EEXIST;
125 if (errno != ENOENT)
126 return -errno;
127
128 if (renameat(olddirfd, oldpath, newdirfd, newpath) < 0)
f4f15635 129 return -errno;
f4f15635
LP
130
131 return 0;
132}
133
134int readlinkat_malloc(int fd, const char *p, char **ret) {
8e060ec2 135 size_t l = FILENAME_MAX+1;
f4f15635
LP
136 int r;
137
138 assert(p);
139 assert(ret);
140
141 for (;;) {
142 char *c;
143 ssize_t n;
144
145 c = new(char, l);
146 if (!c)
147 return -ENOMEM;
148
149 n = readlinkat(fd, p, c, l-1);
150 if (n < 0) {
151 r = -errno;
152 free(c);
153 return r;
154 }
155
156 if ((size_t) n < l-1) {
157 c[n] = 0;
158 *ret = c;
159 return 0;
160 }
161
162 free(c);
163 l *= 2;
164 }
165}
166
167int readlink_malloc(const char *p, char **ret) {
168 return readlinkat_malloc(AT_FDCWD, p, ret);
169}
170
171int readlink_value(const char *p, char **ret) {
172 _cleanup_free_ char *link = NULL;
173 char *value;
174 int r;
175
176 r = readlink_malloc(p, &link);
177 if (r < 0)
178 return r;
179
180 value = basename(link);
181 if (!value)
182 return -ENOENT;
183
184 value = strdup(value);
185 if (!value)
186 return -ENOMEM;
187
188 *ret = value;
189
190 return 0;
191}
192
193int readlink_and_make_absolute(const char *p, char **r) {
194 _cleanup_free_ char *target = NULL;
195 char *k;
196 int j;
197
198 assert(p);
199 assert(r);
200
201 j = readlink_malloc(p, &target);
202 if (j < 0)
203 return j;
204
205 k = file_in_same_dir(p, target);
206 if (!k)
207 return -ENOMEM;
208
209 *r = k;
210 return 0;
211}
212
f4f15635
LP
213int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid) {
214 assert(path);
215
216 /* Under the assumption that we are running privileged we
217 * first change the access mode and only then hand out
218 * ownership to avoid a window where access is too open. */
219
220 if (mode != MODE_INVALID)
221 if (chmod(path, mode) < 0)
222 return -errno;
223
224 if (uid != UID_INVALID || gid != GID_INVALID)
225 if (chown(path, uid, gid) < 0)
226 return -errno;
b8da477e
YW
227
228 return 0;
229}
230
231int fchmod_and_chown(int fd, mode_t mode, uid_t uid, gid_t gid) {
232 /* Under the assumption that we are running privileged we
233 * first change the access mode and only then hand out
234 * ownership to avoid a window where access is too open. */
235
236 if (mode != MODE_INVALID)
237 if (fchmod(fd, mode) < 0)
238 return -errno;
239
240 if (uid != UID_INVALID || gid != GID_INVALID)
241 if (fchown(fd, uid, gid) < 0)
242 return -errno;
f4f15635
LP
243
244 return 0;
245}
246
f4f15635
LP
247int fchmod_umask(int fd, mode_t m) {
248 mode_t u;
249 int r;
250
251 u = umask(0777);
252 r = fchmod(fd, m & (~u)) < 0 ? -errno : 0;
253 umask(u);
254
255 return r;
256}
257
4dfaa528 258int fchmod_opath(int fd, mode_t m) {
22dd8d35 259 char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
4dfaa528
FB
260
261 /* This function operates also on fd that might have been opened with
262 * O_PATH. Indeed fchmodat() doesn't have the AT_EMPTY_PATH flag like
263 * fchownat() does. */
264
265 xsprintf(procfs_path, "/proc/self/fd/%i", fd);
266
267 if (chmod(procfs_path, m) < 0)
268 return -errno;
269
270 return 0;
271}
272
f4f15635
LP
273int fd_warn_permissions(const char *path, int fd) {
274 struct stat st;
275
276 if (fstat(fd, &st) < 0)
277 return -errno;
278
279 if (st.st_mode & 0111)
280 log_warning("Configuration file %s is marked executable. Please remove executable permission bits. Proceeding anyway.", path);
281
282 if (st.st_mode & 0002)
283 log_warning("Configuration file %s is marked world-writable. Please remove world writability permission bits. Proceeding anyway.", path);
284
df0ff127 285 if (getpid_cached() == 1 && (st.st_mode & 0044) != 0044)
f4f15635
LP
286 log_warning("Configuration file %s is marked world-inaccessible. This has no effect as configuration data is accessible via APIs without restrictions. Proceeding anyway.", path);
287
288 return 0;
289}
290
291int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode) {
9e3fa6e8
LP
292 char fdpath[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
293 _cleanup_close_ int fd = -1;
294 int r, ret = 0;
f4f15635
LP
295
296 assert(path);
297
9e3fa6e8
LP
298 /* Note that touch_file() does not follow symlinks: if invoked on an existing symlink, then it is the symlink
299 * itself which is updated, not its target
300 *
301 * Returns the first error we encounter, but tries to apply as much as possible. */
f4f15635 302
9e3fa6e8
LP
303 if (parents)
304 (void) mkdir_parents(path, 0755);
305
306 /* Initially, we try to open the node with O_PATH, so that we get a reference to the node. This is useful in
307 * case the path refers to an existing device or socket node, as we can open it successfully in all cases, and
308 * won't trigger any driver magic or so. */
309 fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW);
310 if (fd < 0) {
311 if (errno != ENOENT)
f4f15635 312 return -errno;
f4f15635 313
9e3fa6e8
LP
314 /* if the node doesn't exist yet, we create it, but with O_EXCL, so that we only create a regular file
315 * here, and nothing else */
316 fd = open(path, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, IN_SET(mode, 0, MODE_INVALID) ? 0644 : mode);
317 if (fd < 0)
f4f15635
LP
318 return -errno;
319 }
320
9e3fa6e8
LP
321 /* Let's make a path from the fd, and operate on that. With this logic, we can adjust the access mode,
322 * ownership and time of the file node in all cases, even if the fd refers to an O_PATH object — which is
323 * something fchown(), fchmod(), futimensat() don't allow. */
324 xsprintf(fdpath, "/proc/self/fd/%i", fd);
325
326 if (mode != MODE_INVALID)
327 if (chmod(fdpath, mode) < 0)
328 ret = -errno;
329
330 if (uid_is_valid(uid) || gid_is_valid(gid))
331 if (chown(fdpath, uid, gid) < 0 && ret >= 0)
332 ret = -errno;
333
f4f15635
LP
334 if (stamp != USEC_INFINITY) {
335 struct timespec ts[2];
336
337 timespec_store(&ts[0], stamp);
338 ts[1] = ts[0];
9e3fa6e8 339 r = utimensat(AT_FDCWD, fdpath, ts, 0);
f4f15635 340 } else
9e3fa6e8
LP
341 r = utimensat(AT_FDCWD, fdpath, NULL, 0);
342 if (r < 0 && ret >= 0)
f4f15635
LP
343 return -errno;
344
9e3fa6e8 345 return ret;
f4f15635
LP
346}
347
348int touch(const char *path) {
ee735086 349 return touch_file(path, false, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
f4f15635
LP
350}
351
6c9c51e5
YW
352int symlink_idempotent(const char *from, const char *to, bool make_relative) {
353 _cleanup_free_ char *relpath = NULL;
f4f15635
LP
354 int r;
355
356 assert(from);
357 assert(to);
358
6c9c51e5
YW
359 if (make_relative) {
360 _cleanup_free_ char *parent = NULL;
361
362 parent = dirname_malloc(to);
363 if (!parent)
364 return -ENOMEM;
365
366 r = path_make_relative(parent, from, &relpath);
367 if (r < 0)
368 return r;
369
370 from = relpath;
371 }
372
f4f15635 373 if (symlink(from, to) < 0) {
77b79723
LP
374 _cleanup_free_ char *p = NULL;
375
f4f15635
LP
376 if (errno != EEXIST)
377 return -errno;
378
379 r = readlink_malloc(to, &p);
77b79723
LP
380 if (r == -EINVAL) /* Not a symlink? In that case return the original error we encountered: -EEXIST */
381 return -EEXIST;
382 if (r < 0) /* Any other error? In that case propagate it as is */
f4f15635
LP
383 return r;
384
77b79723
LP
385 if (!streq(p, from)) /* Not the symlink we want it to be? In that case, propagate the original -EEXIST */
386 return -EEXIST;
f4f15635
LP
387 }
388
389 return 0;
390}
391
392int symlink_atomic(const char *from, const char *to) {
393 _cleanup_free_ char *t = NULL;
394 int r;
395
396 assert(from);
397 assert(to);
398
399 r = tempfn_random(to, NULL, &t);
400 if (r < 0)
401 return r;
402
403 if (symlink(from, t) < 0)
404 return -errno;
405
406 if (rename(t, to) < 0) {
407 unlink_noerrno(t);
408 return -errno;
409 }
410
411 return 0;
412}
413
414int mknod_atomic(const char *path, mode_t mode, dev_t dev) {
415 _cleanup_free_ char *t = NULL;
416 int r;
417
418 assert(path);
419
420 r = tempfn_random(path, NULL, &t);
421 if (r < 0)
422 return r;
423
424 if (mknod(t, mode, dev) < 0)
425 return -errno;
426
427 if (rename(t, path) < 0) {
428 unlink_noerrno(t);
429 return -errno;
430 }
431
432 return 0;
433}
434
435int mkfifo_atomic(const char *path, mode_t mode) {
436 _cleanup_free_ char *t = NULL;
437 int r;
438
439 assert(path);
440
441 r = tempfn_random(path, NULL, &t);
442 if (r < 0)
443 return r;
444
445 if (mkfifo(t, mode) < 0)
446 return -errno;
447
448 if (rename(t, path) < 0) {
4fe3828c
FB
449 unlink_noerrno(t);
450 return -errno;
451 }
452
453 return 0;
454}
455
456int mkfifoat_atomic(int dirfd, const char *path, mode_t mode) {
457 _cleanup_free_ char *t = NULL;
458 int r;
459
460 assert(path);
461
462 if (path_is_absolute(path))
463 return mkfifo_atomic(path, mode);
464
465 /* We're only interested in the (random) filename. */
466 r = tempfn_random_child("", NULL, &t);
467 if (r < 0)
468 return r;
469
470 if (mkfifoat(dirfd, t, mode) < 0)
471 return -errno;
472
473 if (renameat(dirfd, t, dirfd, path) < 0) {
f4f15635
LP
474 unlink_noerrno(t);
475 return -errno;
476 }
477
478 return 0;
479}
480
481int get_files_in_directory(const char *path, char ***list) {
482 _cleanup_closedir_ DIR *d = NULL;
8fb3f009 483 struct dirent *de;
f4f15635
LP
484 size_t bufsize = 0, n = 0;
485 _cleanup_strv_free_ char **l = NULL;
486
487 assert(path);
488
489 /* Returns all files in a directory in *list, and the number
490 * of files as return value. If list is NULL returns only the
491 * number. */
492
493 d = opendir(path);
494 if (!d)
495 return -errno;
496
8fb3f009 497 FOREACH_DIRENT_ALL(de, d, return -errno) {
f4f15635
LP
498 dirent_ensure_type(d, de);
499
500 if (!dirent_is_file(de))
501 continue;
502
503 if (list) {
504 /* one extra slot is needed for the terminating NULL */
505 if (!GREEDY_REALLOC(l, bufsize, n + 2))
506 return -ENOMEM;
507
508 l[n] = strdup(de->d_name);
509 if (!l[n])
510 return -ENOMEM;
511
512 l[++n] = NULL;
513 } else
514 n++;
515 }
516
ae2a15bc
LP
517 if (list)
518 *list = TAKE_PTR(l);
f4f15635
LP
519
520 return n;
521}
430fbf8e 522
992e8f22
LP
523static int getenv_tmp_dir(const char **ret_path) {
524 const char *n;
525 int r, ret = 0;
34a8f081 526
992e8f22 527 assert(ret_path);
34a8f081 528
992e8f22
LP
529 /* We use the same order of environment variables python uses in tempfile.gettempdir():
530 * https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir */
531 FOREACH_STRING(n, "TMPDIR", "TEMP", "TMP") {
532 const char *e;
533
534 e = secure_getenv(n);
535 if (!e)
536 continue;
537 if (!path_is_absolute(e)) {
538 r = -ENOTDIR;
539 goto next;
540 }
99be45a4 541 if (!path_is_normalized(e)) {
992e8f22
LP
542 r = -EPERM;
543 goto next;
544 }
545
546 r = is_dir(e, true);
547 if (r < 0)
548 goto next;
549 if (r == 0) {
550 r = -ENOTDIR;
551 goto next;
552 }
553
554 *ret_path = e;
555 return 1;
556
557 next:
558 /* Remember first error, to make this more debuggable */
559 if (ret >= 0)
560 ret = r;
34a8f081
OW
561 }
562
992e8f22
LP
563 if (ret < 0)
564 return ret;
34a8f081 565
992e8f22
LP
566 *ret_path = NULL;
567 return ret;
568}
34a8f081 569
992e8f22
LP
570static int tmp_dir_internal(const char *def, const char **ret) {
571 const char *e;
572 int r, k;
573
574 assert(def);
575 assert(ret);
576
577 r = getenv_tmp_dir(&e);
578 if (r > 0) {
579 *ret = e;
580 return 0;
581 }
582
583 k = is_dir(def, true);
584 if (k == 0)
585 k = -ENOTDIR;
586 if (k < 0)
587 return r < 0 ? r : k;
588
589 *ret = def;
34a8f081
OW
590 return 0;
591}
592
992e8f22
LP
593int var_tmp_dir(const char **ret) {
594
595 /* Returns the location for "larger" temporary files, that is backed by physical storage if available, and thus
596 * even might survive a boot: /var/tmp. If $TMPDIR (or related environment variables) are set, its value is
597 * returned preferably however. Note that both this function and tmp_dir() below are affected by $TMPDIR,
598 * making it a variable that overrides all temporary file storage locations. */
599
600 return tmp_dir_internal("/var/tmp", ret);
601}
602
603int tmp_dir(const char **ret) {
604
605 /* Similar to var_tmp_dir() above, but returns the location for "smaller" temporary files, which is usually
606 * backed by an in-memory file system: /tmp. */
607
608 return tmp_dir_internal("/tmp", ret);
609}
610
af229d7a
ZJS
611int unlink_or_warn(const char *filename) {
612 if (unlink(filename) < 0 && errno != ENOENT)
613 /* If the file doesn't exist and the fs simply was read-only (in which
614 * case unlink() returns EROFS even if the file doesn't exist), don't
615 * complain */
616 if (errno != EROFS || access(filename, F_OK) >= 0)
617 return log_error_errno(errno, "Failed to remove \"%s\": %m", filename);
618
619 return 0;
620}
621
430fbf8e 622int inotify_add_watch_fd(int fd, int what, uint32_t mask) {
fbd0b64f 623 char path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
430fbf8e
LP
624 int r;
625
626 /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */
627 xsprintf(path, "/proc/self/fd/%i", what);
628
629 r = inotify_add_watch(fd, path, mask);
630 if (r < 0)
631 return -errno;
632
633 return r;
634}
d944dc95 635
f14f1806
LP
636static bool safe_transition(const struct stat *a, const struct stat *b) {
637 /* Returns true if the transition from a to b is safe, i.e. that we never transition from unprivileged to
638 * privileged files or directories. Why bother? So that unprivileged code can't symlink to privileged files
639 * making us believe we read something safe even though it isn't safe in the specific context we open it in. */
640
641 if (a->st_uid == 0) /* Transitioning from privileged to unprivileged is always fine */
642 return true;
643
644 return a->st_uid == b->st_uid; /* Otherwise we need to stay within the same UID */
645}
646
c4f4fce7 647int chase_symlinks(const char *path, const char *original_root, unsigned flags, char **ret) {
d944dc95
LP
648 _cleanup_free_ char *buffer = NULL, *done = NULL, *root = NULL;
649 _cleanup_close_ int fd = -1;
f10f4215 650 unsigned max_follow = CHASE_SYMLINKS_MAX; /* how many symlinks to follow before giving up and returning ELOOP */
f14f1806 651 struct stat previous_stat;
a9fb0867 652 bool exists = true;
d944dc95
LP
653 char *todo;
654 int r;
655
656 assert(path);
657
1ed34d75 658 /* Either the file may be missing, or we return an fd to the final object, but both make no sense */
d94a24ca 659 if (FLAGS_SET(flags, CHASE_NONEXISTENT | CHASE_OPEN))
1ed34d75
LP
660 return -EINVAL;
661
d94a24ca 662 if (FLAGS_SET(flags, CHASE_STEP | CHASE_OPEN))
49eb3659
LP
663 return -EINVAL;
664
a49424af
LP
665 if (isempty(path))
666 return -EINVAL;
667
d944dc95
LP
668 /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following
669 * symlinks relative to a root directory, instead of the root of the host.
670 *
fc4b68e5 671 * Note that "root" primarily matters if we encounter an absolute symlink. It is also used when following
c4f4fce7
LP
672 * relative symlinks to ensure they cannot be used to "escape" the root directory. The path parameter passed is
673 * assumed to be already prefixed by it, except if the CHASE_PREFIX_ROOT flag is set, in which case it is first
674 * prefixed accordingly.
d944dc95
LP
675 *
676 * Algorithmically this operates on two path buffers: "done" are the components of the path we already
677 * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to
678 * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning
679 * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no
680 * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races
fc4b68e5
LP
681 * at a minimum.
682 *
683 * Suggested usage: whenever you want to canonicalize a path, use this function. Pass the absolute path you got
684 * as-is: fully qualified and relative to your host's root. Optionally, specify the root parameter to tell this
685 * function what to do when encountering a symlink with an absolute path as directory: prefix it by the
49eb3659
LP
686 * specified path.
687 *
688 * There are three ways to invoke this function:
689 *
690 * 1. Without CHASE_STEP or CHASE_OPEN: in this case the path is resolved and the normalized path is returned
691 * in `ret`. The return value is < 0 on error. If CHASE_NONEXISTENT is also set 0 is returned if the file
692 * doesn't exist, > 0 otherwise. If CHASE_NONEXISTENT is not set >= 0 is returned if the destination was
693 * found, -ENOENT if it doesn't.
694 *
695 * 2. With CHASE_OPEN: in this case the destination is opened after chasing it as O_PATH and this file
696 * descriptor is returned as return value. This is useful to open files relative to some root
697 * directory. Note that the returned O_PATH file descriptors must be converted into a regular one (using
698 * fd_reopen() or such) before it can be used for reading/writing. CHASE_OPEN may not be combined with
699 * CHASE_NONEXISTENT.
700 *
701 * 3. With CHASE_STEP: in this case only a single step of the normalization is executed, i.e. only the first
702 * symlink or ".." component of the path is resolved, and the resulting path is returned. This is useful if
703 * a caller wants to trace the a path through the file system verbosely. Returns < 0 on error, > 0 if the
704 * path is fully normalized, and == 0 for each normalization step. This may be combined with
705 * CHASE_NONEXISTENT, in which case 1 is returned when a component is not found.
706 *
707 * */
d944dc95 708
22bc57c5 709 /* A root directory of "/" or "" is identical to none */
57ea45e1 710 if (empty_or_root(original_root))
22bc57c5 711 original_root = NULL;
b1bfb848 712
49eb3659 713 if (!original_root && !ret && (flags & (CHASE_NONEXISTENT|CHASE_NO_AUTOFS|CHASE_SAFE|CHASE_OPEN|CHASE_STEP)) == CHASE_OPEN) {
244d2f07
LP
714 /* Shortcut the CHASE_OPEN case if the caller isn't interested in the actual path and has no root set
715 * and doesn't care about any of the other special features we provide either. */
1f56e4ce 716 r = open(path, O_PATH|O_CLOEXEC|((flags & CHASE_NOFOLLOW) ? O_NOFOLLOW : 0));
244d2f07
LP
717 if (r < 0)
718 return -errno;
719
720 return r;
721 }
722
c4f4fce7
LP
723 if (original_root) {
724 r = path_make_absolute_cwd(original_root, &root);
d944dc95
LP
725 if (r < 0)
726 return r;
c4f4fce7 727
382a5078
LP
728 if (flags & CHASE_PREFIX_ROOT) {
729
730 /* We don't support relative paths in combination with a root directory */
731 if (!path_is_absolute(path))
732 return -EINVAL;
733
c4f4fce7 734 path = prefix_roota(root, path);
382a5078 735 }
d944dc95
LP
736 }
737
c4f4fce7
LP
738 r = path_make_absolute_cwd(path, &buffer);
739 if (r < 0)
740 return r;
741
d944dc95
LP
742 fd = open("/", O_CLOEXEC|O_NOFOLLOW|O_PATH);
743 if (fd < 0)
744 return -errno;
745
f14f1806
LP
746 if (flags & CHASE_SAFE) {
747 if (fstat(fd, &previous_stat) < 0)
748 return -errno;
749 }
750
d944dc95
LP
751 todo = buffer;
752 for (;;) {
753 _cleanup_free_ char *first = NULL;
754 _cleanup_close_ int child = -1;
755 struct stat st;
756 size_t n, m;
757
758 /* Determine length of first component in the path */
759 n = strspn(todo, "/"); /* The slashes */
760 m = n + strcspn(todo + n, "/"); /* The entire length of the component */
761
762 /* Extract the first component. */
763 first = strndup(todo, m);
764 if (!first)
765 return -ENOMEM;
766
767 todo += m;
768
b12d25a8
ZJS
769 /* Empty? Then we reached the end. */
770 if (isempty(first))
771 break;
772
d944dc95 773 /* Just a single slash? Then we reached the end. */
b12d25a8
ZJS
774 if (path_equal(first, "/")) {
775 /* Preserve the trailing slash */
62570f6f
LP
776
777 if (flags & CHASE_TRAIL_SLASH)
778 if (!strextend(&done, "/", NULL))
779 return -ENOMEM;
b12d25a8 780
d944dc95 781 break;
b12d25a8 782 }
d944dc95
LP
783
784 /* Just a dot? Then let's eat this up. */
785 if (path_equal(first, "/."))
786 continue;
787
788 /* Two dots? Then chop off the last bit of what we already found out. */
789 if (path_equal(first, "/..")) {
790 _cleanup_free_ char *parent = NULL;
2b6d2dda 791 _cleanup_close_ int fd_parent = -1;
d944dc95 792
a4eaf3cf
LP
793 /* If we already are at the top, then going up will not change anything. This is in-line with
794 * how the kernel handles this. */
57ea45e1 795 if (empty_or_root(done))
a4eaf3cf 796 continue;
d944dc95
LP
797
798 parent = dirname_malloc(done);
799 if (!parent)
800 return -ENOMEM;
801
a4eaf3cf 802 /* Don't allow this to leave the root dir. */
d944dc95
LP
803 if (root &&
804 path_startswith(done, root) &&
805 !path_startswith(parent, root))
a4eaf3cf 806 continue;
d944dc95 807
3b319885 808 free_and_replace(done, parent);
d944dc95 809
49eb3659
LP
810 if (flags & CHASE_STEP)
811 goto chased_one;
812
d944dc95
LP
813 fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH);
814 if (fd_parent < 0)
815 return -errno;
816
f14f1806
LP
817 if (flags & CHASE_SAFE) {
818 if (fstat(fd_parent, &st) < 0)
819 return -errno;
820
821 if (!safe_transition(&previous_stat, &st))
822 return -EPERM;
823
824 previous_stat = st;
825 }
826
d944dc95 827 safe_close(fd);
c10d6bdb 828 fd = TAKE_FD(fd_parent);
d944dc95
LP
829
830 continue;
831 }
832
833 /* Otherwise let's see what this is. */
834 child = openat(fd, first + n, O_CLOEXEC|O_NOFOLLOW|O_PATH);
a9fb0867
LP
835 if (child < 0) {
836
837 if (errno == ENOENT &&
cb638b5e 838 (flags & CHASE_NONEXISTENT) &&
99be45a4 839 (isempty(todo) || path_is_normalized(todo))) {
a9fb0867 840
cb638b5e 841 /* If CHASE_NONEXISTENT is set, and the path does not exist, then that's OK, return
a9fb0867
LP
842 * what we got so far. But don't allow this if the remaining path contains "../ or "./"
843 * or something else weird. */
844
a1904a46
YW
845 /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */
846 if (streq_ptr(done, "/"))
847 *done = '\0';
848
a9fb0867
LP
849 if (!strextend(&done, first, todo, NULL))
850 return -ENOMEM;
851
852 exists = false;
853 break;
854 }
855
d944dc95 856 return -errno;
a9fb0867 857 }
d944dc95
LP
858
859 if (fstat(child, &st) < 0)
860 return -errno;
f14f1806
LP
861 if ((flags & CHASE_SAFE) &&
862 !safe_transition(&previous_stat, &st))
863 return -EPERM;
864
865 previous_stat = st;
866
655f2da0 867 if ((flags & CHASE_NO_AUTOFS) &&
a66fee2e 868 fd_is_fs_type(child, AUTOFS_SUPER_MAGIC) > 0)
655f2da0 869 return -EREMOTE;
d944dc95 870
1f56e4ce 871 if (S_ISLNK(st.st_mode) && !((flags & CHASE_NOFOLLOW) && isempty(todo))) {
877777d7
CCW
872 char *joined;
873
d944dc95
LP
874 _cleanup_free_ char *destination = NULL;
875
876 /* This is a symlink, in this case read the destination. But let's make sure we don't follow
877 * symlinks without bounds. */
878 if (--max_follow <= 0)
879 return -ELOOP;
880
881 r = readlinkat_malloc(fd, first + n, &destination);
882 if (r < 0)
883 return r;
884 if (isempty(destination))
885 return -EINVAL;
886
887 if (path_is_absolute(destination)) {
888
889 /* An absolute destination. Start the loop from the beginning, but use the root
890 * directory as base. */
891
892 safe_close(fd);
893 fd = open(root ?: "/", O_CLOEXEC|O_NOFOLLOW|O_PATH);
894 if (fd < 0)
895 return -errno;
896
f14f1806
LP
897 if (flags & CHASE_SAFE) {
898 if (fstat(fd, &st) < 0)
899 return -errno;
900
901 if (!safe_transition(&previous_stat, &st))
902 return -EPERM;
903
904 previous_stat = st;
905 }
906
b539437a
YW
907 free(done);
908
d944dc95
LP
909 /* Note that we do not revalidate the root, we take it as is. */
910 if (isempty(root))
911 done = NULL;
912 else {
913 done = strdup(root);
914 if (!done)
915 return -ENOMEM;
916 }
917
8c4a8ea2
LP
918 /* Prefix what's left to do with what we just read, and start the loop again, but
919 * remain in the current directory. */
920 joined = strjoin(destination, todo);
921 } else
922 joined = strjoin("/", destination, todo);
877777d7
CCW
923 if (!joined)
924 return -ENOMEM;
d944dc95 925
877777d7
CCW
926 free(buffer);
927 todo = buffer = joined;
d944dc95 928
49eb3659
LP
929 if (flags & CHASE_STEP)
930 goto chased_one;
931
d944dc95
LP
932 continue;
933 }
934
935 /* If this is not a symlink, then let's just add the name we read to what we already verified. */
ae2a15bc
LP
936 if (!done)
937 done = TAKE_PTR(first);
938 else {
a1904a46
YW
939 /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */
940 if (streq(done, "/"))
941 *done = '\0';
942
d944dc95
LP
943 if (!strextend(&done, first, NULL))
944 return -ENOMEM;
945 }
946
947 /* And iterate again, but go one directory further down. */
948 safe_close(fd);
c10d6bdb 949 fd = TAKE_FD(child);
d944dc95
LP
950 }
951
952 if (!done) {
953 /* Special case, turn the empty string into "/", to indicate the root directory. */
954 done = strdup("/");
955 if (!done)
956 return -ENOMEM;
957 }
958
ae2a15bc
LP
959 if (ret)
960 *ret = TAKE_PTR(done);
d944dc95 961
1ed34d75 962 if (flags & CHASE_OPEN) {
1ed34d75
LP
963 /* Return the O_PATH fd we currently are looking to the caller. It can translate it to a proper fd by
964 * opening /proc/self/fd/xyz. */
965
966 assert(fd >= 0);
c10d6bdb 967 return TAKE_FD(fd);
1ed34d75
LP
968 }
969
49eb3659
LP
970 if (flags & CHASE_STEP)
971 return 1;
972
a9fb0867 973 return exists;
49eb3659
LP
974
975chased_one:
49eb3659
LP
976 if (ret) {
977 char *c;
978
027cc9c9
ZJS
979 c = strjoin(strempty(done), todo);
980 if (!c)
981 return -ENOMEM;
49eb3659
LP
982
983 *ret = c;
984 }
985
986 return 0;
d944dc95 987}
57a4359e 988
21c692e9
LP
989int chase_symlinks_and_open(
990 const char *path,
991 const char *root,
992 unsigned chase_flags,
993 int open_flags,
994 char **ret_path) {
995
996 _cleanup_close_ int path_fd = -1;
997 _cleanup_free_ char *p = NULL;
998 int r;
999
1000 if (chase_flags & CHASE_NONEXISTENT)
1001 return -EINVAL;
1002
57ea45e1 1003 if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
21c692e9
LP
1004 /* Shortcut this call if none of the special features of this call are requested */
1005 r = open(path, open_flags);
1006 if (r < 0)
1007 return -errno;
1008
1009 return r;
1010 }
1011
1012 path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL);
1013 if (path_fd < 0)
1014 return path_fd;
1015
1016 r = fd_reopen(path_fd, open_flags);
1017 if (r < 0)
1018 return r;
1019
1020 if (ret_path)
1021 *ret_path = TAKE_PTR(p);
1022
1023 return r;
1024}
1025
1026int chase_symlinks_and_opendir(
1027 const char *path,
1028 const char *root,
1029 unsigned chase_flags,
1030 char **ret_path,
1031 DIR **ret_dir) {
1032
1033 char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
1034 _cleanup_close_ int path_fd = -1;
1035 _cleanup_free_ char *p = NULL;
1036 DIR *d;
1037
1038 if (!ret_dir)
1039 return -EINVAL;
1040 if (chase_flags & CHASE_NONEXISTENT)
1041 return -EINVAL;
1042
57ea45e1 1043 if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
21c692e9
LP
1044 /* Shortcut this call if none of the special features of this call are requested */
1045 d = opendir(path);
1046 if (!d)
1047 return -errno;
1048
1049 *ret_dir = d;
1050 return 0;
1051 }
1052
1053 path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL);
1054 if (path_fd < 0)
1055 return path_fd;
1056
1057 xsprintf(procfs_path, "/proc/self/fd/%i", path_fd);
1058 d = opendir(procfs_path);
1059 if (!d)
1060 return -errno;
1061
1062 if (ret_path)
1063 *ret_path = TAKE_PTR(p);
1064
1065 *ret_dir = d;
1066 return 0;
1067}
1068
d2bcd0ba
LP
1069int chase_symlinks_and_stat(
1070 const char *path,
1071 const char *root,
1072 unsigned chase_flags,
1073 char **ret_path,
1074 struct stat *ret_stat) {
1075
1076 _cleanup_close_ int path_fd = -1;
1077 _cleanup_free_ char *p = NULL;
1078
1079 assert(path);
1080 assert(ret_stat);
1081
1082 if (chase_flags & CHASE_NONEXISTENT)
1083 return -EINVAL;
1084
1085 if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
1086 /* Shortcut this call if none of the special features of this call are requested */
1087 if (stat(path, ret_stat) < 0)
1088 return -errno;
1089
1090 return 1;
1091 }
1092
1093 path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL);
1094 if (path_fd < 0)
1095 return path_fd;
1096
1097 if (fstat(path_fd, ret_stat) < 0)
1098 return -errno;
1099
1100 if (ret_path)
1101 *ret_path = TAKE_PTR(p);
1102
1103 if (chase_flags & CHASE_OPEN)
1104 return TAKE_FD(path_fd);
1105
1106 return 1;
1107}
1108
57a4359e 1109int access_fd(int fd, int mode) {
fbd0b64f 1110 char p[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(fd) + 1];
57a4359e
LP
1111 int r;
1112
1113 /* Like access() but operates on an already open fd */
1114
1115 xsprintf(p, "/proc/self/fd/%i", fd);
57a4359e
LP
1116 r = access(p, mode);
1117 if (r < 0)
21c692e9 1118 return -errno;
57a4359e
LP
1119
1120 return r;
1121}
43767d9d 1122
627d2bac
ZJS
1123void unlink_tempfilep(char (*p)[]) {
1124 /* If the file is created with mkstemp(), it will (almost always)
1125 * change the suffix. Treat this as a sign that the file was
1126 * successfully created. We ignore both the rare case where the
1127 * original suffix is used and unlink failures. */
1128 if (!endswith(*p, ".XXXXXX"))
69821560 1129 (void) unlink_noerrno(*p);
627d2bac
ZJS
1130}
1131
43767d9d
LP
1132int unlinkat_deallocate(int fd, const char *name, int flags) {
1133 _cleanup_close_ int truncate_fd = -1;
1134 struct stat st;
1135 off_t l, bs;
1136
1137 /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other
1138 * link to it. This is useful to ensure that other processes that might have the file open for reading won't be
1139 * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up
1140 * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and
1141 * returned to the free pool.
1142 *
1143 * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means
1144 * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other
1145 * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes
1146 * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.)
1147 * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file
1148 * truncation (🔪), as our goal of deallocating the data space trumps our goal of being nice to readers (💐).
1149 *
1150 * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the
1151 * primary job – to delete the file – is accomplished. */
1152
1153 if ((flags & AT_REMOVEDIR) == 0) {
1154 truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
1155 if (truncate_fd < 0) {
1156
1157 /* If this failed because the file doesn't exist propagate the error right-away. Also,
1158 * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is
1159 * returned when this is a directory but we are not supposed to delete those, hence propagate
1160 * the error right-away too. */
1161 if (IN_SET(errno, ENOENT, EISDIR))
1162 return -errno;
1163
1164 if (errno != ELOOP) /* don't complain if this is a symlink */
1165 log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name);
1166 }
1167 }
1168
1169 if (unlinkat(fd, name, flags) < 0)
1170 return -errno;
1171
1172 if (truncate_fd < 0) /* Don't have a file handle, can't do more ☹️ */
1173 return 0;
1174
1175 if (fstat(truncate_fd, &st) < 0) {
011723a4 1176 log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name);
43767d9d
LP
1177 return 0;
1178 }
1179
1180 if (!S_ISREG(st.st_mode) || st.st_blocks == 0 || st.st_nlink > 0)
1181 return 0;
1182
1183 /* If this is a regular file, it actually took up space on disk and there are no other links it's time to
1184 * punch-hole/truncate this to release the disk space. */
1185
1186 bs = MAX(st.st_blksize, 512);
1187 l = DIV_ROUND_UP(st.st_size, bs) * bs; /* Round up to next block size */
1188
1189 if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0)
1190 return 0; /* Successfully punched a hole! 😊 */
1191
1192 /* Fall back to truncation */
1193 if (ftruncate(truncate_fd, 0) < 0) {
1194 log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m");
1195 return 0;
1196 }
1197
1198 return 0;
1199}
11b29a96
LP
1200
1201int fsync_directory_of_file(int fd) {
0c462ea4 1202 _cleanup_free_ char *path = NULL;
11b29a96
LP
1203 _cleanup_close_ int dfd = -1;
1204 int r;
1205
1206 r = fd_verify_regular(fd);
1207 if (r < 0)
1208 return r;
1209
1210 r = fd_get_path(fd, &path);
3ceae1bc 1211 if (r < 0) {
b8b846d7
LP
1212 log_debug_errno(r, "Failed to query /proc/self/fd/%d%s: %m",
1213 fd,
1214 r == -EOPNOTSUPP ? ", ignoring" : "");
3ceae1bc
ZJS
1215
1216 if (r == -EOPNOTSUPP)
1217 /* If /proc is not available, we're most likely running in some
1218 * chroot environment, and syncing the directory is not very
1219 * important in that case. Let's just silently do nothing. */
1220 return 0;
1221
11b29a96 1222 return r;
3ceae1bc 1223 }
11b29a96
LP
1224
1225 if (!path_is_absolute(path))
1226 return -EINVAL;
1227
0c462ea4 1228 dfd = open_parent(path, O_CLOEXEC, 0);
11b29a96 1229 if (dfd < 0)
0c462ea4 1230 return dfd;
11b29a96
LP
1231
1232 if (fsync(dfd) < 0)
1233 return -errno;
1234
1235 return 0;
1236}
ef8becfa 1237
36695e88
LP
1238int fsync_path_at(int at_fd, const char *path) {
1239 _cleanup_close_ int opened_fd = -1;
1240 int fd;
1241
1242 if (isempty(path)) {
1243 if (at_fd == AT_FDCWD) {
1244 opened_fd = open(".", O_RDONLY|O_DIRECTORY|O_CLOEXEC);
1245 if (opened_fd < 0)
1246 return -errno;
1247
1248 fd = opened_fd;
1249 } else
1250 fd = at_fd;
1251 } else {
1252
1253 opened_fd = openat(at_fd, path, O_RDONLY|O_CLOEXEC);
1254 if (opened_fd < 0)
1255 return -errno;
1256
1257 fd = opened_fd;
1258 }
1259
1260 if (fsync(fd) < 0)
1261 return -errno;
1262
1263 return 0;
1264}
1265
ef8becfa
LP
1266int open_parent(const char *path, int flags, mode_t mode) {
1267 _cleanup_free_ char *parent = NULL;
1268 int fd;
1269
1270 if (isempty(path))
1271 return -EINVAL;
1272 if (path_equal(path, "/")) /* requesting the parent of the root dir is fishy, let's prohibit that */
1273 return -EINVAL;
1274
1275 parent = dirname_malloc(path);
1276 if (!parent)
1277 return -ENOMEM;
1278
1279 /* Let's insist on O_DIRECTORY since the parent of a file or directory is a directory. Except if we open an
1280 * O_TMPFILE file, because in that case we are actually create a regular file below the parent directory. */
1281
1282 if ((flags & O_PATH) == O_PATH)
1283 flags |= O_DIRECTORY;
1284 else if ((flags & O_TMPFILE) != O_TMPFILE)
1285 flags |= O_DIRECTORY|O_RDONLY;
1286
1287 fd = open(parent, flags, mode);
1288 if (fd < 0)
1289 return -errno;
1290
1291 return fd;
1292}