]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/mount-util.c
path-util: introduce path_simplify()
[thirdparty/systemd.git] / src / basic / mount-util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
4349cd7c
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
4349cd7c
LP
6***/
7
11c3a366 8#include <errno.h>
35bbbf85 9#include <stdio_ext.h>
11c3a366 10#include <stdlib.h>
4349cd7c
LP
11#include <string.h>
12#include <sys/mount.h>
11c3a366 13#include <sys/stat.h>
4349cd7c 14#include <sys/statvfs.h>
11c3a366 15#include <unistd.h>
4349cd7c 16
9e7f941a
YW
17/* Include later */
18#include <libmount.h>
19
b5efdb8a 20#include "alloc-util.h"
4349cd7c 21#include "escape.h"
9e7f941a 22#include "extract-word.h"
4349cd7c
LP
23#include "fd-util.h"
24#include "fileio.h"
e1873695 25#include "fs-util.h"
93cc7779 26#include "hashmap.h"
4349cd7c
LP
27#include "mount-util.h"
28#include "parse-util.h"
29#include "path-util.h"
30#include "set.h"
15a5e950 31#include "stdio-util.h"
4349cd7c 32#include "string-util.h"
6b7c9f8b 33#include "strv.h"
4349cd7c 34
01a7e0a1
LP
35/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
36 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
37 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
38 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
39 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
40 * with large file handles anyway. */
41#define ORIGINAL_MAX_HANDLE_SZ 128
42
cbfb8679
LP
43int name_to_handle_at_loop(
44 int fd,
45 const char *path,
46 struct file_handle **ret_handle,
47 int *ret_mnt_id,
48 int flags) {
49
93719c6b 50 _cleanup_free_ struct file_handle *h = NULL;
01a7e0a1 51 size_t n = ORIGINAL_MAX_HANDLE_SZ;
cbfb8679
LP
52
53 /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
54 * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
55 * start value, it is not an upper bound on the buffer size required.
56 *
57 * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
58 * as NULL if there's no interest in either. */
59
cbfb8679
LP
60 for (;;) {
61 int mnt_id = -1;
cbfb8679 62
93719c6b
LP
63 h = malloc0(offsetof(struct file_handle, f_handle) + n);
64 if (!h)
65 return -ENOMEM;
66
67 h->handle_bytes = n;
68
cbfb8679
LP
69 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
70
ae2a15bc
LP
71 if (ret_handle)
72 *ret_handle = TAKE_PTR(h);
cbfb8679
LP
73
74 if (ret_mnt_id)
75 *ret_mnt_id = mnt_id;
76
77 return 0;
78 }
79 if (errno != EOVERFLOW)
80 return -errno;
81
82 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
83
fc010b01
LP
84 /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
85 * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
86 * be filled in, and the caller was interested in only the mount ID an nothing else. */
cbfb8679
LP
87
88 *ret_mnt_id = mnt_id;
89 return 0;
90 }
91
fc010b01
LP
92 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
93 * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
94 * buffer. In that case propagate EOVERFLOW */
95 if (h->handle_bytes <= n)
cbfb8679 96 return -EOVERFLOW;
fc010b01
LP
97
98 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
99 n = h->handle_bytes;
100 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
cbfb8679 101 return -EOVERFLOW;
cbfb8679 102
93719c6b 103 h = mfree(h);
cbfb8679
LP
104 }
105}
106
4349cd7c 107static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
fbd0b64f 108 char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
4349cd7c
LP
109 _cleanup_free_ char *fdinfo = NULL;
110 _cleanup_close_ int subfd = -1;
111 char *p;
112 int r;
113
114 if ((flags & AT_EMPTY_PATH) && isempty(filename))
115 xsprintf(path, "/proc/self/fdinfo/%i", fd);
116 else {
c4b69156 117 subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
4349cd7c
LP
118 if (subfd < 0)
119 return -errno;
120
121 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
122 }
123
124 r = read_full_file(path, &fdinfo, NULL);
125 if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
126 return -EOPNOTSUPP;
127 if (r < 0)
0d9bcb7c 128 return r;
4349cd7c
LP
129
130 p = startswith(fdinfo, "mnt_id:");
131 if (!p) {
132 p = strstr(fdinfo, "\nmnt_id:");
133 if (!p) /* The mnt_id field is a relatively new addition */
134 return -EOPNOTSUPP;
135
136 p += 8;
137 }
138
139 p += strspn(p, WHITESPACE);
140 p[strcspn(p, WHITESPACE)] = 0;
141
142 return safe_atoi(p, mnt_id);
143}
144
4349cd7c 145int fd_is_mount_point(int fd, const char *filename, int flags) {
cbfb8679 146 _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
4349cd7c
LP
147 int mount_id = -1, mount_id_parent = -1;
148 bool nosupp = false, check_st_dev = true;
149 struct stat a, b;
150 int r;
151
152 assert(fd >= 0);
153 assert(filename);
154
155 /* First we will try the name_to_handle_at() syscall, which
156 * tells us the mount id and an opaque file "handle". It is
157 * not supported everywhere though (kernel compile-time
158 * option, not all file systems are hooked up). If it works
159 * the mount id is usually good enough to tell us whether
160 * something is a mount point.
161 *
162 * If that didn't work we will try to read the mount id from
163 * /proc/self/fdinfo/<fd>. This is almost as good as
164 * name_to_handle_at(), however, does not return the
165 * opaque file handle. The opaque file handle is pretty useful
166 * to detect the root directory, which we should always
167 * consider a mount point. Hence we use this only as
168 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
169 * kernel addition.
170 *
171 * As last fallback we do traditional fstat() based st_dev
172 * comparisons. This is how things were traditionally done,
61233823 173 * but unionfs breaks this since it exposes file
4349cd7c
LP
174 * systems with a variety of st_dev reported. Also, btrfs
175 * subvolumes have different st_dev, even though they aren't
176 * real mounts of their own. */
177
cbfb8679 178 r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
976c0478 179 if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
c83b20d7
LP
180 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
181 * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
976c0478
LP
182 * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
183 * (EINVAL): fall back to simpler logic. */
cbfb8679
LP
184 goto fallback_fdinfo;
185 else if (r == -EOPNOTSUPP)
186 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
187 * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
188 * logic */
189 nosupp = true;
190 else if (r < 0)
191 return r;
192
193 r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
194 if (r == -EOPNOTSUPP) {
195 if (nosupp)
196 /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
4349cd7c 197 goto fallback_fdinfo;
4349cd7c 198 else
cbfb8679
LP
199 /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
200 * it must be a mount point. */
201 return 1;
202 } else if (r < 0)
4739fc55 203 return r;
4349cd7c
LP
204
205 /* The parent can do name_to_handle_at() but the
206 * directory we are interested in can't? If so, it
207 * must be a mount point. */
208 if (nosupp)
209 return 1;
210
211 /* If the file handle for the directory we are
212 * interested in and its parent are identical, we
213 * assume this is the root directory, which is a mount
214 * point. */
215
cbfb8679
LP
216 if (h->handle_bytes == h_parent->handle_bytes &&
217 h->handle_type == h_parent->handle_type &&
218 memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
4349cd7c
LP
219 return 1;
220
221 return mount_id != mount_id_parent;
222
223fallback_fdinfo:
224 r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
059c35f5 225 if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
4349cd7c
LP
226 goto fallback_fstat;
227 if (r < 0)
228 return r;
229
230 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
231 if (r < 0)
232 return r;
233
234 if (mount_id != mount_id_parent)
235 return 1;
236
237 /* Hmm, so, the mount ids are the same. This leaves one
238 * special case though for the root file system. For that,
239 * let's see if the parent directory has the same inode as we
240 * are interested in. Hence, let's also do fstat() checks now,
241 * too, but avoid the st_dev comparisons, since they aren't
242 * that useful on unionfs mounts. */
243 check_st_dev = false;
244
245fallback_fstat:
246 /* yay for fstatat() taking a different set of flags than the other
247 * _at() above */
248 if (flags & AT_SYMLINK_FOLLOW)
249 flags &= ~AT_SYMLINK_FOLLOW;
250 else
251 flags |= AT_SYMLINK_NOFOLLOW;
252 if (fstatat(fd, filename, &a, flags) < 0)
253 return -errno;
254
255 if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
256 return -errno;
257
258 /* A directory with same device and inode as its parent? Must
259 * be the root directory */
260 if (a.st_dev == b.st_dev &&
261 a.st_ino == b.st_ino)
262 return 1;
263
264 return check_st_dev && (a.st_dev != b.st_dev);
265}
266
267/* flags can be AT_SYMLINK_FOLLOW or 0 */
e1873695 268int path_is_mount_point(const char *t, const char *root, int flags) {
4349cd7c 269 _cleanup_free_ char *canonical = NULL, *parent = NULL;
e1873695
LP
270 _cleanup_close_ int fd = -1;
271 int r;
4349cd7c
LP
272
273 assert(t);
b12d25a8 274 assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
4349cd7c
LP
275
276 if (path_equal(t, "/"))
277 return 1;
278
279 /* we need to resolve symlinks manually, we can't just rely on
280 * fd_is_mount_point() to do that for us; if we have a structure like
281 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
282 * look at needs to be /usr, not /. */
283 if (flags & AT_SYMLINK_FOLLOW) {
62570f6f 284 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
e1873695
LP
285 if (r < 0)
286 return r;
4349cd7c
LP
287
288 t = canonical;
289 }
290
291 parent = dirname_malloc(t);
292 if (!parent)
293 return -ENOMEM;
294
c4b69156 295 fd = openat(AT_FDCWD, parent, O_DIRECTORY|O_CLOEXEC|O_PATH);
4349cd7c
LP
296 if (fd < 0)
297 return -errno;
298
b12d25a8 299 return fd_is_mount_point(fd, last_path_component(t), flags);
4349cd7c
LP
300}
301
c2a986d5
LP
302int path_get_mnt_id(const char *path, int *ret) {
303 int r;
304
305 r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
976c0478 306 if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
c2a986d5
LP
307 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
308
309 return r;
310}
311
4349cd7c
LP
312int umount_recursive(const char *prefix, int flags) {
313 bool again;
314 int n = 0, r;
315
316 /* Try to umount everything recursively below a
317 * directory. Also, take care of stacked mounts, and keep
318 * unmounting them until they are gone. */
319
320 do {
321 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
322
323 again = false;
324 r = 0;
325
326 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
327 if (!proc_self_mountinfo)
328 return -errno;
329
35bbbf85
LP
330 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
331
4349cd7c
LP
332 for (;;) {
333 _cleanup_free_ char *path = NULL, *p = NULL;
334 int k;
335
336 k = fscanf(proc_self_mountinfo,
337 "%*s " /* (1) mount id */
338 "%*s " /* (2) parent id */
339 "%*s " /* (3) major:minor */
340 "%*s " /* (4) root */
341 "%ms " /* (5) mount point */
342 "%*s" /* (6) mount options */
343 "%*[^-]" /* (7) optional fields */
344 "- " /* (8) separator */
345 "%*s " /* (9) file system type */
346 "%*s" /* (10) mount source */
347 "%*s" /* (11) mount options 2 */
348 "%*[^\n]", /* some rubbish at the end */
349 &path);
350 if (k != 1) {
351 if (k == EOF)
352 break;
353
354 continue;
355 }
356
357 r = cunescape(path, UNESCAPE_RELAX, &p);
358 if (r < 0)
359 return r;
360
361 if (!path_startswith(p, prefix))
362 continue;
363
364 if (umount2(p, flags) < 0) {
6b7c9f8b 365 r = log_debug_errno(errno, "Failed to umount %s: %m", p);
4349cd7c
LP
366 continue;
367 }
368
6b7c9f8b
LP
369 log_debug("Successfully unmounted %s", p);
370
4349cd7c
LP
371 again = true;
372 n++;
373
374 break;
375 }
376
377 } while (again);
378
379 return r ? r : n;
380}
381
382static int get_mount_flags(const char *path, unsigned long *flags) {
383 struct statvfs buf;
384
385 if (statvfs(path, &buf) < 0)
386 return -errno;
387 *flags = buf.f_flag;
388 return 0;
389}
390
ac9de0b3
TR
391/* Use this function only if do you have direct access to /proc/self/mountinfo
392 * and need the caller to open it for you. This is the case when /proc is
393 * masked or not mounted. Otherwise, use bind_remount_recursive. */
394int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) {
4349cd7c
LP
395 _cleanup_set_free_free_ Set *done = NULL;
396 _cleanup_free_ char *cleaned = NULL;
397 int r;
398
ac9de0b3
TR
399 assert(proc_self_mountinfo);
400
6b7c9f8b
LP
401 /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
402 * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
403 * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
404 * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
405 * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
406 * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
407 * future submounts that have been triggered via autofs.
408 *
409 * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
410 * remount operation. Note that we'll ignore the blacklist for the top-level path. */
4349cd7c
LP
411
412 cleaned = strdup(prefix);
413 if (!cleaned)
414 return -ENOMEM;
415
858d36c1 416 path_simplify(cleaned, false);
4349cd7c 417
548f6937 418 done = set_new(&path_hash_ops);
4349cd7c
LP
419 if (!done)
420 return -ENOMEM;
421
422 for (;;) {
4349cd7c
LP
423 _cleanup_set_free_free_ Set *todo = NULL;
424 bool top_autofs = false;
425 char *x;
426 unsigned long orig_flags;
427
548f6937 428 todo = set_new(&path_hash_ops);
4349cd7c
LP
429 if (!todo)
430 return -ENOMEM;
431
ac9de0b3 432 rewind(proc_self_mountinfo);
4349cd7c
LP
433
434 for (;;) {
435 _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
436 int k;
437
438 k = fscanf(proc_self_mountinfo,
439 "%*s " /* (1) mount id */
440 "%*s " /* (2) parent id */
441 "%*s " /* (3) major:minor */
442 "%*s " /* (4) root */
443 "%ms " /* (5) mount point */
444 "%*s" /* (6) mount options (superblock) */
445 "%*[^-]" /* (7) optional fields */
446 "- " /* (8) separator */
447 "%ms " /* (9) file system type */
448 "%*s" /* (10) mount source */
449 "%*s" /* (11) mount options (bind mount) */
450 "%*[^\n]", /* some rubbish at the end */
451 &path,
452 &type);
453 if (k != 2) {
454 if (k == EOF)
455 break;
456
457 continue;
458 }
459
460 r = cunescape(path, UNESCAPE_RELAX, &p);
461 if (r < 0)
462 return r;
463
6b7c9f8b
LP
464 if (!path_startswith(p, cleaned))
465 continue;
466
467 /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
468 * operate on. */
469 if (!path_equal(cleaned, p)) {
470 bool blacklisted = false;
471 char **i;
472
473 STRV_FOREACH(i, blacklist) {
474
475 if (path_equal(*i, cleaned))
476 continue;
477
478 if (!path_startswith(*i, cleaned))
479 continue;
480
481 if (path_startswith(p, *i)) {
482 blacklisted = true;
483 log_debug("Not remounting %s, because blacklisted by %s, called for %s", p, *i, cleaned);
484 break;
485 }
486 }
487 if (blacklisted)
488 continue;
489 }
490
4349cd7c
LP
491 /* Let's ignore autofs mounts. If they aren't
492 * triggered yet, we want to avoid triggering
493 * them, as we don't make any guarantees for
494 * future submounts anyway. If they are
495 * already triggered, then we will find
496 * another entry for this. */
497 if (streq(type, "autofs")) {
498 top_autofs = top_autofs || path_equal(cleaned, p);
499 continue;
500 }
501
6b7c9f8b 502 if (!set_contains(done, p)) {
4349cd7c
LP
503 r = set_consume(todo, p);
504 p = NULL;
4349cd7c
LP
505 if (r == -EEXIST)
506 continue;
507 if (r < 0)
508 return r;
509 }
510 }
511
512 /* If we have no submounts to process anymore and if
513 * the root is either already done, or an autofs, we
514 * are done */
515 if (set_isempty(todo) &&
516 (top_autofs || set_contains(done, cleaned)))
517 return 0;
518
519 if (!set_contains(done, cleaned) &&
520 !set_contains(todo, cleaned)) {
6b7c9f8b 521 /* The prefix directory itself is not yet a mount, make it one. */
4349cd7c
LP
522 if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
523 return -errno;
524
525 orig_flags = 0;
526 (void) get_mount_flags(cleaned, &orig_flags);
527 orig_flags &= ~MS_RDONLY;
528
529 if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
530 return -errno;
531
6b7c9f8b
LP
532 log_debug("Made top-level directory %s a mount point.", prefix);
533
4349cd7c
LP
534 x = strdup(cleaned);
535 if (!x)
536 return -ENOMEM;
537
538 r = set_consume(done, x);
539 if (r < 0)
540 return r;
541 }
542
543 while ((x = set_steal_first(todo))) {
544
545 r = set_consume(done, x);
4c701096 546 if (IN_SET(r, 0, -EEXIST))
4349cd7c
LP
547 continue;
548 if (r < 0)
549 return r;
550
6b7c9f8b 551 /* Deal with mount points that are obstructed by a later mount */
e1873695 552 r = path_is_mount_point(x, NULL, 0);
4c701096 553 if (IN_SET(r, 0, -ENOENT))
98df8089
AC
554 continue;
555 if (r < 0)
556 return r;
557
558 /* Try to reuse the original flag set */
4349cd7c
LP
559 orig_flags = 0;
560 (void) get_mount_flags(x, &orig_flags);
561 orig_flags &= ~MS_RDONLY;
562
98df8089
AC
563 if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
564 return -errno;
4349cd7c 565
6b7c9f8b 566 log_debug("Remounted %s read-only.", x);
4349cd7c
LP
567 }
568 }
569}
570
ac9de0b3
TR
571int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
572 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
573
574 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
575 if (!proc_self_mountinfo)
576 return -errno;
577
35bbbf85
LP
578 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
579
ac9de0b3
TR
580 return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo);
581}
582
4349cd7c
LP
583int mount_move_root(const char *path) {
584 assert(path);
585
586 if (chdir(path) < 0)
587 return -errno;
588
589 if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
590 return -errno;
591
592 if (chroot(".") < 0)
593 return -errno;
594
595 if (chdir("/") < 0)
596 return -errno;
597
598 return 0;
599}
4e036b7a
LP
600
601bool fstype_is_network(const char *fstype) {
4e036b7a
LP
602 const char *x;
603
604 x = startswith(fstype, "fuse.");
605 if (x)
606 fstype = x;
607
5991ce44
ZJS
608 return STR_IN_SET(fstype,
609 "afs",
610 "cifs",
611 "smbfs",
612 "sshfs",
613 "ncpfs",
614 "ncp",
615 "nfs",
616 "nfs4",
617 "gfs",
618 "gfs2",
619 "glusterfs",
620 "pvfs2", /* OrangeFS */
621 "ocfs2",
622 "lustre");
4e036b7a 623}
3f2c0bec 624
e2be442e 625bool fstype_is_api_vfs(const char *fstype) {
5991ce44
ZJS
626 return STR_IN_SET(fstype,
627 "autofs",
628 "bpf",
629 "cgroup",
630 "cgroup2",
631 "configfs",
632 "cpuset",
633 "debugfs",
634 "devpts",
635 "devtmpfs",
636 "efivarfs",
637 "fusectl",
638 "hugetlbfs",
639 "mqueue",
640 "proc",
641 "pstore",
642 "ramfs",
643 "securityfs",
644 "sysfs",
645 "tmpfs",
646 "tracefs");
e2be442e
YW
647}
648
896f937f 649bool fstype_is_ro(const char *fstype) {
896f937f 650 /* All Linux file systems that are necessarily read-only */
5991ce44
ZJS
651 return STR_IN_SET(fstype,
652 "DM_verity_hash",
653 "iso9660",
654 "squashfs");
896f937f
LP
655}
656
154d2269 657bool fstype_can_discard(const char *fstype) {
5991ce44
ZJS
658 return STR_IN_SET(fstype,
659 "btrfs",
660 "ext4",
661 "vfat",
662 "xfs");
154d2269
LP
663}
664
2d3a5a73
LP
665bool fstype_can_uid_gid(const char *fstype) {
666
667 /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
668 * current and future. */
669
670 return STR_IN_SET(fstype,
671 "adfs",
672 "fat",
673 "hfs",
674 "hpfs",
675 "iso9660",
676 "msdos",
677 "ntfs",
678 "vfat");
679}
680
3f2c0bec
LP
681int repeat_unmount(const char *path, int flags) {
682 bool done = false;
683
684 assert(path);
685
686 /* If there are multiple mounts on a mount point, this
687 * removes them all */
688
689 for (;;) {
690 if (umount2(path, flags) < 0) {
691
692 if (errno == EINVAL)
693 return done;
694
695 return -errno;
696 }
697
698 done = true;
699 }
700}
c4b41707
AP
701
702const char* mode_to_inaccessible_node(mode_t mode) {
fe80fcc7
LP
703 /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during
704 * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe
705 * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes
706 * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead,
707 * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts
708 * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */
709
c4b41707
AP
710 switch(mode & S_IFMT) {
711 case S_IFREG:
712 return "/run/systemd/inaccessible/reg";
fe80fcc7 713
c4b41707
AP
714 case S_IFDIR:
715 return "/run/systemd/inaccessible/dir";
fe80fcc7 716
c4b41707 717 case S_IFCHR:
b3d1d516
AP
718 if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
719 return "/run/systemd/inaccessible/chr";
720 return "/run/systemd/inaccessible/sock";
fe80fcc7 721
c4b41707 722 case S_IFBLK:
b3d1d516
AP
723 if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
724 return "/run/systemd/inaccessible/blk";
725 return "/run/systemd/inaccessible/sock";
fe80fcc7 726
c4b41707
AP
727 case S_IFIFO:
728 return "/run/systemd/inaccessible/fifo";
fe80fcc7 729
c4b41707
AP
730 case S_IFSOCK:
731 return "/run/systemd/inaccessible/sock";
732 }
733 return NULL;
734}
60e76d48
ZJS
735
736#define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
737static char* mount_flags_to_string(long unsigned flags) {
738 char *x;
739 _cleanup_free_ char *y = NULL;
740 long unsigned overflow;
741
742 overflow = flags & ~(MS_RDONLY |
743 MS_NOSUID |
744 MS_NODEV |
745 MS_NOEXEC |
746 MS_SYNCHRONOUS |
747 MS_REMOUNT |
748 MS_MANDLOCK |
749 MS_DIRSYNC |
750 MS_NOATIME |
751 MS_NODIRATIME |
752 MS_BIND |
753 MS_MOVE |
754 MS_REC |
755 MS_SILENT |
756 MS_POSIXACL |
757 MS_UNBINDABLE |
758 MS_PRIVATE |
759 MS_SLAVE |
760 MS_SHARED |
761 MS_RELATIME |
762 MS_KERNMOUNT |
763 MS_I_VERSION |
764 MS_STRICTATIME |
765 MS_LAZYTIME);
766
767 if (flags == 0 || overflow != 0)
768 if (asprintf(&y, "%lx", overflow) < 0)
769 return NULL;
770
771 x = strjoin(FLAG(MS_RDONLY),
772 FLAG(MS_NOSUID),
773 FLAG(MS_NODEV),
774 FLAG(MS_NOEXEC),
775 FLAG(MS_SYNCHRONOUS),
776 FLAG(MS_REMOUNT),
777 FLAG(MS_MANDLOCK),
778 FLAG(MS_DIRSYNC),
779 FLAG(MS_NOATIME),
780 FLAG(MS_NODIRATIME),
781 FLAG(MS_BIND),
782 FLAG(MS_MOVE),
783 FLAG(MS_REC),
784 FLAG(MS_SILENT),
785 FLAG(MS_POSIXACL),
786 FLAG(MS_UNBINDABLE),
787 FLAG(MS_PRIVATE),
788 FLAG(MS_SLAVE),
789 FLAG(MS_SHARED),
790 FLAG(MS_RELATIME),
791 FLAG(MS_KERNMOUNT),
792 FLAG(MS_I_VERSION),
793 FLAG(MS_STRICTATIME),
794 FLAG(MS_LAZYTIME),
605405c6 795 y);
60e76d48
ZJS
796 if (!x)
797 return NULL;
798 if (!y)
799 x[strlen(x) - 1] = '\0'; /* truncate the last | */
800 return x;
801}
802
803int mount_verbose(
804 int error_log_level,
805 const char *what,
806 const char *where,
807 const char *type,
808 unsigned long flags,
809 const char *options) {
810
6ef8df2b
YW
811 _cleanup_free_ char *fl = NULL, *o = NULL;
812 unsigned long f;
813 int r;
814
815 r = mount_option_mangle(options, flags, &f, &o);
816 if (r < 0)
817 return log_full_errno(error_log_level, r,
818 "Failed to mangle mount options %s: %m",
819 strempty(options));
60e76d48 820
6ef8df2b 821 fl = mount_flags_to_string(f);
60e76d48 822
6ef8df2b 823 if ((f & MS_REMOUNT) && !what && !type)
60e76d48 824 log_debug("Remounting %s (%s \"%s\")...",
6ef8df2b 825 where, strnull(fl), strempty(o));
60e76d48
ZJS
826 else if (!what && !type)
827 log_debug("Mounting %s (%s \"%s\")...",
6ef8df2b
YW
828 where, strnull(fl), strempty(o));
829 else if ((f & MS_BIND) && !type)
60e76d48 830 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
6ef8df2b
YW
831 what, where, strnull(fl), strempty(o));
832 else if (f & MS_MOVE)
afe682bc 833 log_debug("Moving mount %s → %s (%s \"%s\")...",
6ef8df2b 834 what, where, strnull(fl), strempty(o));
60e76d48
ZJS
835 else
836 log_debug("Mounting %s on %s (%s \"%s\")...",
6ef8df2b
YW
837 strna(type), where, strnull(fl), strempty(o));
838 if (mount(what, where, type, f, o) < 0)
60e76d48
ZJS
839 return log_full_errno(error_log_level, errno,
840 "Failed to mount %s on %s (%s \"%s\"): %m",
6ef8df2b 841 strna(type), where, strnull(fl), strempty(o));
60e76d48
ZJS
842 return 0;
843}
844
845int umount_verbose(const char *what) {
846 log_debug("Umounting %s...", what);
847 if (umount(what) < 0)
848 return log_error_errno(errno, "Failed to unmount %s: %m", what);
849 return 0;
850}
83555251
LP
851
852const char *mount_propagation_flags_to_string(unsigned long flags) {
853
854 switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
c7383828
ZJS
855 case 0:
856 return "";
83555251
LP
857 case MS_SHARED:
858 return "shared";
83555251
LP
859 case MS_SLAVE:
860 return "slave";
83555251
LP
861 case MS_PRIVATE:
862 return "private";
863 }
864
865 return NULL;
866}
867
c7383828 868int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
83555251 869
c7383828
ZJS
870 if (isempty(name))
871 *ret = 0;
872 else if (streq(name, "shared"))
873 *ret = MS_SHARED;
874 else if (streq(name, "slave"))
875 *ret = MS_SLAVE;
876 else if (streq(name, "private"))
877 *ret = MS_PRIVATE;
878 else
879 return -EINVAL;
83555251
LP
880 return 0;
881}
9e7f941a
YW
882
883int mount_option_mangle(
884 const char *options,
885 unsigned long mount_flags,
886 unsigned long *ret_mount_flags,
887 char **ret_remaining_options) {
888
889 const struct libmnt_optmap *map;
890 _cleanup_free_ char *ret = NULL;
891 const char *p;
892 int r;
893
894 /* This extracts mount flags from the mount options, and store
895 * non-mount-flag options to '*ret_remaining_options'.
896 * E.g.,
897 * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
898 * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
899 * "size=1630748k,mode=700,uid=1000,gid=1000".
900 * See more examples in test-mount-utils.c.
901 *
902 * Note that if 'options' does not contain any non-mount-flag options,
903 * then '*ret_remaining_options' is set to NULL instread of empty string.
904 * Note that this does not check validity of options stored in
905 * '*ret_remaining_options'.
906 * Note that if 'options' is NULL, then this just copies 'mount_flags'
907 * to '*ret_mount_flags'. */
908
909 assert(ret_mount_flags);
910 assert(ret_remaining_options);
911
912 map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
913 if (!map)
914 return -EINVAL;
915
916 p = options;
917 for (;;) {
918 _cleanup_free_ char *word = NULL;
919 const struct libmnt_optmap *ent;
920
921 r = extract_first_word(&p, &word, ",", EXTRACT_QUOTES);
922 if (r < 0)
923 return r;
924 if (r == 0)
925 break;
926
927 for (ent = map; ent->name; ent++) {
928 /* All entries in MNT_LINUX_MAP do not take any argument.
929 * Thus, ent->name does not contain "=" or "[=]". */
930 if (!streq(word, ent->name))
931 continue;
932
933 if (!(ent->mask & MNT_INVERT))
934 mount_flags |= ent->id;
935 else if (mount_flags & ent->id)
936 mount_flags ^= ent->id;
937
938 break;
939 }
940
941 /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
942 if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
943 return -ENOMEM;
944 }
945
946 *ret_mount_flags = mount_flags;
ae2a15bc 947 *ret_remaining_options = TAKE_PTR(ret);
9e7f941a
YW
948
949 return 0;
950}