]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/fs-util.c
Merge pull request #8086 from hdante/sdboot-setmode-v2
[thirdparty/systemd.git] / src / basic / fs-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <stddef.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/stat.h>
27 #include <linux/magic.h>
28 #include <time.h>
29 #include <unistd.h>
30
31 #include "alloc-util.h"
32 #include "dirent-util.h"
33 #include "fd-util.h"
34 #include "fileio.h"
35 #include "fs-util.h"
36 #include "log.h"
37 #include "macro.h"
38 #include "missing.h"
39 #include "mkdir.h"
40 #include "parse-util.h"
41 #include "path-util.h"
42 #include "process-util.h"
43 #include "stat-util.h"
44 #include "stdio-util.h"
45 #include "string-util.h"
46 #include "strv.h"
47 #include "time-util.h"
48 #include "user-util.h"
49 #include "util.h"
50
51 int unlink_noerrno(const char *path) {
52 PROTECT_ERRNO;
53 int r;
54
55 r = unlink(path);
56 if (r < 0)
57 return -errno;
58
59 return 0;
60 }
61
62 int rmdir_parents(const char *path, const char *stop) {
63 size_t l;
64 int r = 0;
65
66 assert(path);
67 assert(stop);
68
69 l = strlen(path);
70
71 /* Skip trailing slashes */
72 while (l > 0 && path[l-1] == '/')
73 l--;
74
75 while (l > 0) {
76 char *t;
77
78 /* Skip last component */
79 while (l > 0 && path[l-1] != '/')
80 l--;
81
82 /* Skip trailing slashes */
83 while (l > 0 && path[l-1] == '/')
84 l--;
85
86 if (l <= 0)
87 break;
88
89 t = strndup(path, l);
90 if (!t)
91 return -ENOMEM;
92
93 if (path_startswith(stop, t)) {
94 free(t);
95 return 0;
96 }
97
98 r = rmdir(t);
99 free(t);
100
101 if (r < 0)
102 if (errno != ENOENT)
103 return -errno;
104 }
105
106 return 0;
107 }
108
109 int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) {
110 struct stat buf;
111 int ret;
112
113 ret = renameat2(olddirfd, oldpath, newdirfd, newpath, RENAME_NOREPLACE);
114 if (ret >= 0)
115 return 0;
116
117 /* renameat2() exists since Linux 3.15, btrfs added support for it later.
118 * If it is not implemented, fallback to another method. */
119 if (!IN_SET(errno, EINVAL, ENOSYS))
120 return -errno;
121
122 /* The link()/unlink() fallback does not work on directories. But
123 * renameat() without RENAME_NOREPLACE gives the same semantics on
124 * directories, except when newpath is an *empty* directory. This is
125 * good enough. */
126 ret = fstatat(olddirfd, oldpath, &buf, AT_SYMLINK_NOFOLLOW);
127 if (ret >= 0 && S_ISDIR(buf.st_mode)) {
128 ret = renameat(olddirfd, oldpath, newdirfd, newpath);
129 return ret >= 0 ? 0 : -errno;
130 }
131
132 /* If it is not a directory, use the link()/unlink() fallback. */
133 ret = linkat(olddirfd, oldpath, newdirfd, newpath, 0);
134 if (ret < 0)
135 return -errno;
136
137 ret = unlinkat(olddirfd, oldpath, 0);
138 if (ret < 0) {
139 /* backup errno before the following unlinkat() alters it */
140 ret = errno;
141 (void) unlinkat(newdirfd, newpath, 0);
142 errno = ret;
143 return -errno;
144 }
145
146 return 0;
147 }
148
149 int readlinkat_malloc(int fd, const char *p, char **ret) {
150 size_t l = 100;
151 int r;
152
153 assert(p);
154 assert(ret);
155
156 for (;;) {
157 char *c;
158 ssize_t n;
159
160 c = new(char, l);
161 if (!c)
162 return -ENOMEM;
163
164 n = readlinkat(fd, p, c, l-1);
165 if (n < 0) {
166 r = -errno;
167 free(c);
168 return r;
169 }
170
171 if ((size_t) n < l-1) {
172 c[n] = 0;
173 *ret = c;
174 return 0;
175 }
176
177 free(c);
178 l *= 2;
179 }
180 }
181
182 int readlink_malloc(const char *p, char **ret) {
183 return readlinkat_malloc(AT_FDCWD, p, ret);
184 }
185
186 int readlink_value(const char *p, char **ret) {
187 _cleanup_free_ char *link = NULL;
188 char *value;
189 int r;
190
191 r = readlink_malloc(p, &link);
192 if (r < 0)
193 return r;
194
195 value = basename(link);
196 if (!value)
197 return -ENOENT;
198
199 value = strdup(value);
200 if (!value)
201 return -ENOMEM;
202
203 *ret = value;
204
205 return 0;
206 }
207
208 int readlink_and_make_absolute(const char *p, char **r) {
209 _cleanup_free_ char *target = NULL;
210 char *k;
211 int j;
212
213 assert(p);
214 assert(r);
215
216 j = readlink_malloc(p, &target);
217 if (j < 0)
218 return j;
219
220 k = file_in_same_dir(p, target);
221 if (!k)
222 return -ENOMEM;
223
224 *r = k;
225 return 0;
226 }
227
228 int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid) {
229 assert(path);
230
231 /* Under the assumption that we are running privileged we
232 * first change the access mode and only then hand out
233 * ownership to avoid a window where access is too open. */
234
235 if (mode != MODE_INVALID)
236 if (chmod(path, mode) < 0)
237 return -errno;
238
239 if (uid != UID_INVALID || gid != GID_INVALID)
240 if (chown(path, uid, gid) < 0)
241 return -errno;
242
243 return 0;
244 }
245
246 int fchmod_umask(int fd, mode_t m) {
247 mode_t u;
248 int r;
249
250 u = umask(0777);
251 r = fchmod(fd, m & (~u)) < 0 ? -errno : 0;
252 umask(u);
253
254 return r;
255 }
256
257 int fd_warn_permissions(const char *path, int fd) {
258 struct stat st;
259
260 if (fstat(fd, &st) < 0)
261 return -errno;
262
263 if (st.st_mode & 0111)
264 log_warning("Configuration file %s is marked executable. Please remove executable permission bits. Proceeding anyway.", path);
265
266 if (st.st_mode & 0002)
267 log_warning("Configuration file %s is marked world-writable. Please remove world writability permission bits. Proceeding anyway.", path);
268
269 if (getpid_cached() == 1 && (st.st_mode & 0044) != 0044)
270 log_warning("Configuration file %s is marked world-inaccessible. This has no effect as configuration data is accessible via APIs without restrictions. Proceeding anyway.", path);
271
272 return 0;
273 }
274
275 int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode) {
276 char fdpath[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
277 _cleanup_close_ int fd = -1;
278 int r, ret = 0;
279
280 assert(path);
281
282 /* Note that touch_file() does not follow symlinks: if invoked on an existing symlink, then it is the symlink
283 * itself which is updated, not its target
284 *
285 * Returns the first error we encounter, but tries to apply as much as possible. */
286
287 if (parents)
288 (void) mkdir_parents(path, 0755);
289
290 /* Initially, we try to open the node with O_PATH, so that we get a reference to the node. This is useful in
291 * case the path refers to an existing device or socket node, as we can open it successfully in all cases, and
292 * won't trigger any driver magic or so. */
293 fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW);
294 if (fd < 0) {
295 if (errno != ENOENT)
296 return -errno;
297
298 /* if the node doesn't exist yet, we create it, but with O_EXCL, so that we only create a regular file
299 * here, and nothing else */
300 fd = open(path, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, IN_SET(mode, 0, MODE_INVALID) ? 0644 : mode);
301 if (fd < 0)
302 return -errno;
303 }
304
305 /* Let's make a path from the fd, and operate on that. With this logic, we can adjust the access mode,
306 * ownership and time of the file node in all cases, even if the fd refers to an O_PATH object — which is
307 * something fchown(), fchmod(), futimensat() don't allow. */
308 xsprintf(fdpath, "/proc/self/fd/%i", fd);
309
310 if (mode != MODE_INVALID)
311 if (chmod(fdpath, mode) < 0)
312 ret = -errno;
313
314 if (uid_is_valid(uid) || gid_is_valid(gid))
315 if (chown(fdpath, uid, gid) < 0 && ret >= 0)
316 ret = -errno;
317
318 if (stamp != USEC_INFINITY) {
319 struct timespec ts[2];
320
321 timespec_store(&ts[0], stamp);
322 ts[1] = ts[0];
323 r = utimensat(AT_FDCWD, fdpath, ts, 0);
324 } else
325 r = utimensat(AT_FDCWD, fdpath, NULL, 0);
326 if (r < 0 && ret >= 0)
327 return -errno;
328
329 return ret;
330 }
331
332 int touch(const char *path) {
333 return touch_file(path, false, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
334 }
335
336 int symlink_idempotent(const char *from, const char *to) {
337 int r;
338
339 assert(from);
340 assert(to);
341
342 if (symlink(from, to) < 0) {
343 _cleanup_free_ char *p = NULL;
344
345 if (errno != EEXIST)
346 return -errno;
347
348 r = readlink_malloc(to, &p);
349 if (r == -EINVAL) /* Not a symlink? In that case return the original error we encountered: -EEXIST */
350 return -EEXIST;
351 if (r < 0) /* Any other error? In that case propagate it as is */
352 return r;
353
354 if (!streq(p, from)) /* Not the symlink we want it to be? In that case, propagate the original -EEXIST */
355 return -EEXIST;
356 }
357
358 return 0;
359 }
360
361 int symlink_atomic(const char *from, const char *to) {
362 _cleanup_free_ char *t = NULL;
363 int r;
364
365 assert(from);
366 assert(to);
367
368 r = tempfn_random(to, NULL, &t);
369 if (r < 0)
370 return r;
371
372 if (symlink(from, t) < 0)
373 return -errno;
374
375 if (rename(t, to) < 0) {
376 unlink_noerrno(t);
377 return -errno;
378 }
379
380 return 0;
381 }
382
383 int mknod_atomic(const char *path, mode_t mode, dev_t dev) {
384 _cleanup_free_ char *t = NULL;
385 int r;
386
387 assert(path);
388
389 r = tempfn_random(path, NULL, &t);
390 if (r < 0)
391 return r;
392
393 if (mknod(t, mode, dev) < 0)
394 return -errno;
395
396 if (rename(t, path) < 0) {
397 unlink_noerrno(t);
398 return -errno;
399 }
400
401 return 0;
402 }
403
404 int mkfifo_atomic(const char *path, mode_t mode) {
405 _cleanup_free_ char *t = NULL;
406 int r;
407
408 assert(path);
409
410 r = tempfn_random(path, NULL, &t);
411 if (r < 0)
412 return r;
413
414 if (mkfifo(t, mode) < 0)
415 return -errno;
416
417 if (rename(t, path) < 0) {
418 unlink_noerrno(t);
419 return -errno;
420 }
421
422 return 0;
423 }
424
425 int get_files_in_directory(const char *path, char ***list) {
426 _cleanup_closedir_ DIR *d = NULL;
427 struct dirent *de;
428 size_t bufsize = 0, n = 0;
429 _cleanup_strv_free_ char **l = NULL;
430
431 assert(path);
432
433 /* Returns all files in a directory in *list, and the number
434 * of files as return value. If list is NULL returns only the
435 * number. */
436
437 d = opendir(path);
438 if (!d)
439 return -errno;
440
441 FOREACH_DIRENT_ALL(de, d, return -errno) {
442 dirent_ensure_type(d, de);
443
444 if (!dirent_is_file(de))
445 continue;
446
447 if (list) {
448 /* one extra slot is needed for the terminating NULL */
449 if (!GREEDY_REALLOC(l, bufsize, n + 2))
450 return -ENOMEM;
451
452 l[n] = strdup(de->d_name);
453 if (!l[n])
454 return -ENOMEM;
455
456 l[++n] = NULL;
457 } else
458 n++;
459 }
460
461 if (list) {
462 *list = l;
463 l = NULL; /* avoid freeing */
464 }
465
466 return n;
467 }
468
469 static int getenv_tmp_dir(const char **ret_path) {
470 const char *n;
471 int r, ret = 0;
472
473 assert(ret_path);
474
475 /* We use the same order of environment variables python uses in tempfile.gettempdir():
476 * https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir */
477 FOREACH_STRING(n, "TMPDIR", "TEMP", "TMP") {
478 const char *e;
479
480 e = secure_getenv(n);
481 if (!e)
482 continue;
483 if (!path_is_absolute(e)) {
484 r = -ENOTDIR;
485 goto next;
486 }
487 if (!path_is_normalized(e)) {
488 r = -EPERM;
489 goto next;
490 }
491
492 r = is_dir(e, true);
493 if (r < 0)
494 goto next;
495 if (r == 0) {
496 r = -ENOTDIR;
497 goto next;
498 }
499
500 *ret_path = e;
501 return 1;
502
503 next:
504 /* Remember first error, to make this more debuggable */
505 if (ret >= 0)
506 ret = r;
507 }
508
509 if (ret < 0)
510 return ret;
511
512 *ret_path = NULL;
513 return ret;
514 }
515
516 static int tmp_dir_internal(const char *def, const char **ret) {
517 const char *e;
518 int r, k;
519
520 assert(def);
521 assert(ret);
522
523 r = getenv_tmp_dir(&e);
524 if (r > 0) {
525 *ret = e;
526 return 0;
527 }
528
529 k = is_dir(def, true);
530 if (k == 0)
531 k = -ENOTDIR;
532 if (k < 0)
533 return r < 0 ? r : k;
534
535 *ret = def;
536 return 0;
537 }
538
539 int var_tmp_dir(const char **ret) {
540
541 /* Returns the location for "larger" temporary files, that is backed by physical storage if available, and thus
542 * even might survive a boot: /var/tmp. If $TMPDIR (or related environment variables) are set, its value is
543 * returned preferably however. Note that both this function and tmp_dir() below are affected by $TMPDIR,
544 * making it a variable that overrides all temporary file storage locations. */
545
546 return tmp_dir_internal("/var/tmp", ret);
547 }
548
549 int tmp_dir(const char **ret) {
550
551 /* Similar to var_tmp_dir() above, but returns the location for "smaller" temporary files, which is usually
552 * backed by an in-memory file system: /tmp. */
553
554 return tmp_dir_internal("/tmp", ret);
555 }
556
557 int unlink_or_warn(const char *filename) {
558 if (unlink(filename) < 0 && errno != ENOENT)
559 /* If the file doesn't exist and the fs simply was read-only (in which
560 * case unlink() returns EROFS even if the file doesn't exist), don't
561 * complain */
562 if (errno != EROFS || access(filename, F_OK) >= 0)
563 return log_error_errno(errno, "Failed to remove \"%s\": %m", filename);
564
565 return 0;
566 }
567
568 int inotify_add_watch_fd(int fd, int what, uint32_t mask) {
569 char path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
570 int r;
571
572 /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */
573 xsprintf(path, "/proc/self/fd/%i", what);
574
575 r = inotify_add_watch(fd, path, mask);
576 if (r < 0)
577 return -errno;
578
579 return r;
580 }
581
582 static bool safe_transition(const struct stat *a, const struct stat *b) {
583 /* Returns true if the transition from a to b is safe, i.e. that we never transition from unprivileged to
584 * privileged files or directories. Why bother? So that unprivileged code can't symlink to privileged files
585 * making us believe we read something safe even though it isn't safe in the specific context we open it in. */
586
587 if (a->st_uid == 0) /* Transitioning from privileged to unprivileged is always fine */
588 return true;
589
590 return a->st_uid == b->st_uid; /* Otherwise we need to stay within the same UID */
591 }
592
593 int chase_symlinks(const char *path, const char *original_root, unsigned flags, char **ret) {
594 _cleanup_free_ char *buffer = NULL, *done = NULL, *root = NULL;
595 _cleanup_close_ int fd = -1;
596 unsigned max_follow = 32; /* how many symlinks to follow before giving up and returning ELOOP */
597 struct stat previous_stat;
598 bool exists = true;
599 char *todo;
600 int r;
601
602 assert(path);
603
604 /* Either the file may be missing, or we return an fd to the final object, but both make no sense */
605 if ((flags & (CHASE_NONEXISTENT|CHASE_OPEN)) == (CHASE_NONEXISTENT|CHASE_OPEN))
606 return -EINVAL;
607
608 if (isempty(path))
609 return -EINVAL;
610
611 /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following
612 * symlinks relative to a root directory, instead of the root of the host.
613 *
614 * Note that "root" primarily matters if we encounter an absolute symlink. It is also used when following
615 * relative symlinks to ensure they cannot be used to "escape" the root directory. The path parameter passed is
616 * assumed to be already prefixed by it, except if the CHASE_PREFIX_ROOT flag is set, in which case it is first
617 * prefixed accordingly.
618 *
619 * Algorithmically this operates on two path buffers: "done" are the components of the path we already
620 * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to
621 * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning
622 * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no
623 * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races
624 * at a minimum.
625 *
626 * Suggested usage: whenever you want to canonicalize a path, use this function. Pass the absolute path you got
627 * as-is: fully qualified and relative to your host's root. Optionally, specify the root parameter to tell this
628 * function what to do when encountering a symlink with an absolute path as directory: prefix it by the
629 * specified path. */
630
631 /* A root directory of "/" or "" is identical to none */
632 if (isempty(original_root) || path_equal(original_root, "/"))
633 original_root = NULL;
634
635 if (original_root) {
636 r = path_make_absolute_cwd(original_root, &root);
637 if (r < 0)
638 return r;
639
640 if (flags & CHASE_PREFIX_ROOT) {
641
642 /* We don't support relative paths in combination with a root directory */
643 if (!path_is_absolute(path))
644 return -EINVAL;
645
646 path = prefix_roota(root, path);
647 }
648 }
649
650 r = path_make_absolute_cwd(path, &buffer);
651 if (r < 0)
652 return r;
653
654 fd = open("/", O_CLOEXEC|O_NOFOLLOW|O_PATH);
655 if (fd < 0)
656 return -errno;
657
658 if (flags & CHASE_SAFE) {
659 if (fstat(fd, &previous_stat) < 0)
660 return -errno;
661 }
662
663 todo = buffer;
664 for (;;) {
665 _cleanup_free_ char *first = NULL;
666 _cleanup_close_ int child = -1;
667 struct stat st;
668 size_t n, m;
669
670 /* Determine length of first component in the path */
671 n = strspn(todo, "/"); /* The slashes */
672 m = n + strcspn(todo + n, "/"); /* The entire length of the component */
673
674 /* Extract the first component. */
675 first = strndup(todo, m);
676 if (!first)
677 return -ENOMEM;
678
679 todo += m;
680
681 /* Empty? Then we reached the end. */
682 if (isempty(first))
683 break;
684
685 /* Just a single slash? Then we reached the end. */
686 if (path_equal(first, "/")) {
687 /* Preserve the trailing slash */
688 if (!strextend(&done, "/", NULL))
689 return -ENOMEM;
690
691 break;
692 }
693
694 /* Just a dot? Then let's eat this up. */
695 if (path_equal(first, "/."))
696 continue;
697
698 /* Two dots? Then chop off the last bit of what we already found out. */
699 if (path_equal(first, "/..")) {
700 _cleanup_free_ char *parent = NULL;
701 _cleanup_close_ int fd_parent = -1;
702
703 /* If we already are at the top, then going up will not change anything. This is in-line with
704 * how the kernel handles this. */
705 if (isempty(done) || path_equal(done, "/"))
706 continue;
707
708 parent = dirname_malloc(done);
709 if (!parent)
710 return -ENOMEM;
711
712 /* Don't allow this to leave the root dir. */
713 if (root &&
714 path_startswith(done, root) &&
715 !path_startswith(parent, root))
716 continue;
717
718 free_and_replace(done, parent);
719
720 fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH);
721 if (fd_parent < 0)
722 return -errno;
723
724 if (flags & CHASE_SAFE) {
725 if (fstat(fd_parent, &st) < 0)
726 return -errno;
727
728 if (!safe_transition(&previous_stat, &st))
729 return -EPERM;
730
731 previous_stat = st;
732 }
733
734 safe_close(fd);
735 fd = fd_parent;
736 fd_parent = -1;
737
738 continue;
739 }
740
741 /* Otherwise let's see what this is. */
742 child = openat(fd, first + n, O_CLOEXEC|O_NOFOLLOW|O_PATH);
743 if (child < 0) {
744
745 if (errno == ENOENT &&
746 (flags & CHASE_NONEXISTENT) &&
747 (isempty(todo) || path_is_normalized(todo))) {
748
749 /* If CHASE_NONEXISTENT is set, and the path does not exist, then that's OK, return
750 * what we got so far. But don't allow this if the remaining path contains "../ or "./"
751 * or something else weird. */
752
753 /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */
754 if (streq_ptr(done, "/"))
755 *done = '\0';
756
757 if (!strextend(&done, first, todo, NULL))
758 return -ENOMEM;
759
760 exists = false;
761 break;
762 }
763
764 return -errno;
765 }
766
767 if (fstat(child, &st) < 0)
768 return -errno;
769 if ((flags & CHASE_SAFE) &&
770 !safe_transition(&previous_stat, &st))
771 return -EPERM;
772
773 previous_stat = st;
774
775 if ((flags & CHASE_NO_AUTOFS) &&
776 fd_is_fs_type(child, AUTOFS_SUPER_MAGIC) > 0)
777 return -EREMOTE;
778
779 if (S_ISLNK(st.st_mode)) {
780 char *joined;
781
782 _cleanup_free_ char *destination = NULL;
783
784 /* This is a symlink, in this case read the destination. But let's make sure we don't follow
785 * symlinks without bounds. */
786 if (--max_follow <= 0)
787 return -ELOOP;
788
789 r = readlinkat_malloc(fd, first + n, &destination);
790 if (r < 0)
791 return r;
792 if (isempty(destination))
793 return -EINVAL;
794
795 if (path_is_absolute(destination)) {
796
797 /* An absolute destination. Start the loop from the beginning, but use the root
798 * directory as base. */
799
800 safe_close(fd);
801 fd = open(root ?: "/", O_CLOEXEC|O_NOFOLLOW|O_PATH);
802 if (fd < 0)
803 return -errno;
804
805 if (flags & CHASE_SAFE) {
806 if (fstat(fd, &st) < 0)
807 return -errno;
808
809 if (!safe_transition(&previous_stat, &st))
810 return -EPERM;
811
812 previous_stat = st;
813 }
814
815 free(done);
816
817 /* Note that we do not revalidate the root, we take it as is. */
818 if (isempty(root))
819 done = NULL;
820 else {
821 done = strdup(root);
822 if (!done)
823 return -ENOMEM;
824 }
825
826 /* Prefix what's left to do with what we just read, and start the loop again, but
827 * remain in the current directory. */
828 joined = strjoin(destination, todo);
829 } else
830 joined = strjoin("/", destination, todo);
831 if (!joined)
832 return -ENOMEM;
833
834 free(buffer);
835 todo = buffer = joined;
836
837 continue;
838 }
839
840 /* If this is not a symlink, then let's just add the name we read to what we already verified. */
841 if (!done) {
842 done = first;
843 first = NULL;
844 } else {
845 /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */
846 if (streq(done, "/"))
847 *done = '\0';
848
849 if (!strextend(&done, first, NULL))
850 return -ENOMEM;
851 }
852
853 /* And iterate again, but go one directory further down. */
854 safe_close(fd);
855 fd = child;
856 child = -1;
857 }
858
859 if (!done) {
860 /* Special case, turn the empty string into "/", to indicate the root directory. */
861 done = strdup("/");
862 if (!done)
863 return -ENOMEM;
864 }
865
866 if (ret) {
867 *ret = done;
868 done = NULL;
869 }
870
871 if (flags & CHASE_OPEN) {
872 int q;
873
874 /* Return the O_PATH fd we currently are looking to the caller. It can translate it to a proper fd by
875 * opening /proc/self/fd/xyz. */
876
877 assert(fd >= 0);
878 q = fd;
879 fd = -1;
880
881 return q;
882 }
883
884 return exists;
885 }
886
887 int access_fd(int fd, int mode) {
888 char p[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(fd) + 1];
889 int r;
890
891 /* Like access() but operates on an already open fd */
892
893 xsprintf(p, "/proc/self/fd/%i", fd);
894
895 r = access(p, mode);
896 if (r < 0)
897 r = -errno;
898
899 return r;
900 }
901
902 int unlinkat_deallocate(int fd, const char *name, int flags) {
903 _cleanup_close_ int truncate_fd = -1;
904 struct stat st;
905 off_t l, bs;
906
907 /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other
908 * link to it. This is useful to ensure that other processes that might have the file open for reading won't be
909 * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up
910 * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and
911 * returned to the free pool.
912 *
913 * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means
914 * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other
915 * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes
916 * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.)
917 * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file
918 * truncation (đŸ”Ē), as our goal of deallocating the data space trumps our goal of being nice to readers (💐).
919 *
920 * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the
921 * primary job – to delete the file – is accomplished. */
922
923 if ((flags & AT_REMOVEDIR) == 0) {
924 truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
925 if (truncate_fd < 0) {
926
927 /* If this failed because the file doesn't exist propagate the error right-away. Also,
928 * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is
929 * returned when this is a directory but we are not supposed to delete those, hence propagate
930 * the error right-away too. */
931 if (IN_SET(errno, ENOENT, EISDIR))
932 return -errno;
933
934 if (errno != ELOOP) /* don't complain if this is a symlink */
935 log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name);
936 }
937 }
938
939 if (unlinkat(fd, name, flags) < 0)
940 return -errno;
941
942 if (truncate_fd < 0) /* Don't have a file handle, can't do more ☚ī¸ */
943 return 0;
944
945 if (fstat(truncate_fd, &st) < 0) {
946 log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring.", name);
947 return 0;
948 }
949
950 if (!S_ISREG(st.st_mode) || st.st_blocks == 0 || st.st_nlink > 0)
951 return 0;
952
953 /* If this is a regular file, it actually took up space on disk and there are no other links it's time to
954 * punch-hole/truncate this to release the disk space. */
955
956 bs = MAX(st.st_blksize, 512);
957 l = DIV_ROUND_UP(st.st_size, bs) * bs; /* Round up to next block size */
958
959 if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0)
960 return 0; /* Successfully punched a hole! 😊 */
961
962 /* Fall back to truncation */
963 if (ftruncate(truncate_fd, 0) < 0) {
964 log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m");
965 return 0;
966 }
967
968 return 0;
969 }
970
971 int fsync_directory_of_file(int fd) {
972 _cleanup_free_ char *path = NULL, *dn = NULL;
973 _cleanup_close_ int dfd = -1;
974 int r;
975
976 r = fd_verify_regular(fd);
977 if (r < 0)
978 return r;
979
980 r = fd_get_path(fd, &path);
981 if (r < 0)
982 return r;
983
984 if (!path_is_absolute(path))
985 return -EINVAL;
986
987 dn = dirname_malloc(path);
988 if (!dn)
989 return -ENOMEM;
990
991 dfd = open(dn, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
992 if (dfd < 0)
993 return -errno;
994
995 if (fsync(dfd) < 0)
996 return -errno;
997
998 return 0;
999 }