]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/mount-util.c
libudev-monitor: rename udev_has_devtmpfs() and move it to mount-util.c
[thirdparty/systemd.git] / src / basic / mount-util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
4349cd7c 2
11c3a366 3#include <errno.h>
35bbbf85 4#include <stdio_ext.h>
11c3a366 5#include <stdlib.h>
4349cd7c
LP
6#include <string.h>
7#include <sys/mount.h>
11c3a366 8#include <sys/stat.h>
4349cd7c 9#include <sys/statvfs.h>
11c3a366 10#include <unistd.h>
4349cd7c 11
9e7f941a
YW
12/* Include later */
13#include <libmount.h>
14
b5efdb8a 15#include "alloc-util.h"
4349cd7c 16#include "escape.h"
9e7f941a 17#include "extract-word.h"
4349cd7c
LP
18#include "fd-util.h"
19#include "fileio.h"
e1873695 20#include "fs-util.h"
93cc7779 21#include "hashmap.h"
4349cd7c
LP
22#include "mount-util.h"
23#include "parse-util.h"
24#include "path-util.h"
25#include "set.h"
15a5e950 26#include "stdio-util.h"
4349cd7c 27#include "string-util.h"
6b7c9f8b 28#include "strv.h"
4349cd7c 29
01a7e0a1
LP
30/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
31 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
32 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
33 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
34 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
35 * with large file handles anyway. */
36#define ORIGINAL_MAX_HANDLE_SZ 128
37
cbfb8679
LP
38int name_to_handle_at_loop(
39 int fd,
40 const char *path,
41 struct file_handle **ret_handle,
42 int *ret_mnt_id,
43 int flags) {
44
93719c6b 45 _cleanup_free_ struct file_handle *h = NULL;
01a7e0a1 46 size_t n = ORIGINAL_MAX_HANDLE_SZ;
cbfb8679
LP
47
48 /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
49 * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
50 * start value, it is not an upper bound on the buffer size required.
51 *
52 * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
53 * as NULL if there's no interest in either. */
54
cbfb8679
LP
55 for (;;) {
56 int mnt_id = -1;
cbfb8679 57
93719c6b
LP
58 h = malloc0(offsetof(struct file_handle, f_handle) + n);
59 if (!h)
60 return -ENOMEM;
61
62 h->handle_bytes = n;
63
cbfb8679
LP
64 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
65
ae2a15bc
LP
66 if (ret_handle)
67 *ret_handle = TAKE_PTR(h);
cbfb8679
LP
68
69 if (ret_mnt_id)
70 *ret_mnt_id = mnt_id;
71
72 return 0;
73 }
74 if (errno != EOVERFLOW)
75 return -errno;
76
77 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
78
fc010b01
LP
79 /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
80 * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
81 * be filled in, and the caller was interested in only the mount ID an nothing else. */
cbfb8679
LP
82
83 *ret_mnt_id = mnt_id;
84 return 0;
85 }
86
fc010b01
LP
87 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
88 * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
89 * buffer. In that case propagate EOVERFLOW */
90 if (h->handle_bytes <= n)
cbfb8679 91 return -EOVERFLOW;
fc010b01
LP
92
93 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
94 n = h->handle_bytes;
95 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
cbfb8679 96 return -EOVERFLOW;
cbfb8679 97
93719c6b 98 h = mfree(h);
cbfb8679
LP
99 }
100}
101
4349cd7c 102static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
fbd0b64f 103 char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
4349cd7c
LP
104 _cleanup_free_ char *fdinfo = NULL;
105 _cleanup_close_ int subfd = -1;
106 char *p;
107 int r;
108
109 if ((flags & AT_EMPTY_PATH) && isempty(filename))
110 xsprintf(path, "/proc/self/fdinfo/%i", fd);
111 else {
c4b69156 112 subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
4349cd7c
LP
113 if (subfd < 0)
114 return -errno;
115
116 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
117 }
118
119 r = read_full_file(path, &fdinfo, NULL);
120 if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
121 return -EOPNOTSUPP;
122 if (r < 0)
0d9bcb7c 123 return r;
4349cd7c
LP
124
125 p = startswith(fdinfo, "mnt_id:");
126 if (!p) {
127 p = strstr(fdinfo, "\nmnt_id:");
128 if (!p) /* The mnt_id field is a relatively new addition */
129 return -EOPNOTSUPP;
130
131 p += 8;
132 }
133
134 p += strspn(p, WHITESPACE);
135 p[strcspn(p, WHITESPACE)] = 0;
136
137 return safe_atoi(p, mnt_id);
138}
139
4349cd7c 140int fd_is_mount_point(int fd, const char *filename, int flags) {
cbfb8679 141 _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
4349cd7c
LP
142 int mount_id = -1, mount_id_parent = -1;
143 bool nosupp = false, check_st_dev = true;
144 struct stat a, b;
145 int r;
146
147 assert(fd >= 0);
148 assert(filename);
149
150 /* First we will try the name_to_handle_at() syscall, which
151 * tells us the mount id and an opaque file "handle". It is
152 * not supported everywhere though (kernel compile-time
153 * option, not all file systems are hooked up). If it works
154 * the mount id is usually good enough to tell us whether
155 * something is a mount point.
156 *
157 * If that didn't work we will try to read the mount id from
158 * /proc/self/fdinfo/<fd>. This is almost as good as
159 * name_to_handle_at(), however, does not return the
160 * opaque file handle. The opaque file handle is pretty useful
161 * to detect the root directory, which we should always
162 * consider a mount point. Hence we use this only as
163 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
164 * kernel addition.
165 *
166 * As last fallback we do traditional fstat() based st_dev
167 * comparisons. This is how things were traditionally done,
61233823 168 * but unionfs breaks this since it exposes file
4349cd7c
LP
169 * systems with a variety of st_dev reported. Also, btrfs
170 * subvolumes have different st_dev, even though they aren't
171 * real mounts of their own. */
172
cbfb8679 173 r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
976c0478 174 if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
c83b20d7
LP
175 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
176 * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
976c0478
LP
177 * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
178 * (EINVAL): fall back to simpler logic. */
cbfb8679
LP
179 goto fallback_fdinfo;
180 else if (r == -EOPNOTSUPP)
181 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
182 * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
183 * logic */
184 nosupp = true;
185 else if (r < 0)
186 return r;
187
188 r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
189 if (r == -EOPNOTSUPP) {
190 if (nosupp)
191 /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
4349cd7c 192 goto fallback_fdinfo;
4349cd7c 193 else
cbfb8679
LP
194 /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
195 * it must be a mount point. */
196 return 1;
197 } else if (r < 0)
4739fc55 198 return r;
4349cd7c
LP
199
200 /* The parent can do name_to_handle_at() but the
201 * directory we are interested in can't? If so, it
202 * must be a mount point. */
203 if (nosupp)
204 return 1;
205
206 /* If the file handle for the directory we are
207 * interested in and its parent are identical, we
208 * assume this is the root directory, which is a mount
209 * point. */
210
cbfb8679
LP
211 if (h->handle_bytes == h_parent->handle_bytes &&
212 h->handle_type == h_parent->handle_type &&
213 memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
4349cd7c
LP
214 return 1;
215
216 return mount_id != mount_id_parent;
217
218fallback_fdinfo:
219 r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
059c35f5 220 if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
4349cd7c
LP
221 goto fallback_fstat;
222 if (r < 0)
223 return r;
224
225 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
226 if (r < 0)
227 return r;
228
229 if (mount_id != mount_id_parent)
230 return 1;
231
232 /* Hmm, so, the mount ids are the same. This leaves one
233 * special case though for the root file system. For that,
234 * let's see if the parent directory has the same inode as we
235 * are interested in. Hence, let's also do fstat() checks now,
236 * too, but avoid the st_dev comparisons, since they aren't
237 * that useful on unionfs mounts. */
238 check_st_dev = false;
239
240fallback_fstat:
241 /* yay for fstatat() taking a different set of flags than the other
242 * _at() above */
243 if (flags & AT_SYMLINK_FOLLOW)
244 flags &= ~AT_SYMLINK_FOLLOW;
245 else
246 flags |= AT_SYMLINK_NOFOLLOW;
247 if (fstatat(fd, filename, &a, flags) < 0)
248 return -errno;
249
250 if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
251 return -errno;
252
253 /* A directory with same device and inode as its parent? Must
254 * be the root directory */
255 if (a.st_dev == b.st_dev &&
256 a.st_ino == b.st_ino)
257 return 1;
258
259 return check_st_dev && (a.st_dev != b.st_dev);
260}
261
262/* flags can be AT_SYMLINK_FOLLOW or 0 */
e1873695 263int path_is_mount_point(const char *t, const char *root, int flags) {
0c462ea4 264 _cleanup_free_ char *canonical = NULL;
e1873695
LP
265 _cleanup_close_ int fd = -1;
266 int r;
4349cd7c
LP
267
268 assert(t);
b12d25a8 269 assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
4349cd7c
LP
270
271 if (path_equal(t, "/"))
272 return 1;
273
274 /* we need to resolve symlinks manually, we can't just rely on
275 * fd_is_mount_point() to do that for us; if we have a structure like
276 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
277 * look at needs to be /usr, not /. */
278 if (flags & AT_SYMLINK_FOLLOW) {
62570f6f 279 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
e1873695
LP
280 if (r < 0)
281 return r;
4349cd7c
LP
282
283 t = canonical;
284 }
285
0c462ea4 286 fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
4349cd7c
LP
287 if (fd < 0)
288 return -errno;
289
b12d25a8 290 return fd_is_mount_point(fd, last_path_component(t), flags);
4349cd7c
LP
291}
292
c2a986d5
LP
293int path_get_mnt_id(const char *path, int *ret) {
294 int r;
295
296 r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
976c0478 297 if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
c2a986d5
LP
298 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
299
300 return r;
301}
302
4349cd7c
LP
303int umount_recursive(const char *prefix, int flags) {
304 bool again;
305 int n = 0, r;
306
307 /* Try to umount everything recursively below a
308 * directory. Also, take care of stacked mounts, and keep
309 * unmounting them until they are gone. */
310
311 do {
312 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
313
314 again = false;
315 r = 0;
316
317 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
318 if (!proc_self_mountinfo)
319 return -errno;
320
35bbbf85
LP
321 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
322
4349cd7c
LP
323 for (;;) {
324 _cleanup_free_ char *path = NULL, *p = NULL;
325 int k;
326
327 k = fscanf(proc_self_mountinfo,
328 "%*s " /* (1) mount id */
329 "%*s " /* (2) parent id */
330 "%*s " /* (3) major:minor */
331 "%*s " /* (4) root */
332 "%ms " /* (5) mount point */
333 "%*s" /* (6) mount options */
334 "%*[^-]" /* (7) optional fields */
335 "- " /* (8) separator */
336 "%*s " /* (9) file system type */
337 "%*s" /* (10) mount source */
338 "%*s" /* (11) mount options 2 */
339 "%*[^\n]", /* some rubbish at the end */
340 &path);
341 if (k != 1) {
342 if (k == EOF)
343 break;
344
345 continue;
346 }
347
348 r = cunescape(path, UNESCAPE_RELAX, &p);
349 if (r < 0)
350 return r;
351
352 if (!path_startswith(p, prefix))
353 continue;
354
355 if (umount2(p, flags) < 0) {
6b7c9f8b 356 r = log_debug_errno(errno, "Failed to umount %s: %m", p);
4349cd7c
LP
357 continue;
358 }
359
6b7c9f8b
LP
360 log_debug("Successfully unmounted %s", p);
361
4349cd7c
LP
362 again = true;
363 n++;
364
365 break;
366 }
367
368 } while (again);
369
370 return r ? r : n;
371}
372
373static int get_mount_flags(const char *path, unsigned long *flags) {
374 struct statvfs buf;
375
376 if (statvfs(path, &buf) < 0)
377 return -errno;
378 *flags = buf.f_flag;
379 return 0;
380}
381
ac9de0b3
TR
382/* Use this function only if do you have direct access to /proc/self/mountinfo
383 * and need the caller to open it for you. This is the case when /proc is
384 * masked or not mounted. Otherwise, use bind_remount_recursive. */
385int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) {
4349cd7c
LP
386 _cleanup_set_free_free_ Set *done = NULL;
387 _cleanup_free_ char *cleaned = NULL;
388 int r;
389
ac9de0b3
TR
390 assert(proc_self_mountinfo);
391
6b7c9f8b
LP
392 /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
393 * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
394 * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
395 * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
396 * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
397 * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
398 * future submounts that have been triggered via autofs.
399 *
400 * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
401 * remount operation. Note that we'll ignore the blacklist for the top-level path. */
4349cd7c
LP
402
403 cleaned = strdup(prefix);
404 if (!cleaned)
405 return -ENOMEM;
406
858d36c1 407 path_simplify(cleaned, false);
4349cd7c 408
548f6937 409 done = set_new(&path_hash_ops);
4349cd7c
LP
410 if (!done)
411 return -ENOMEM;
412
413 for (;;) {
4349cd7c
LP
414 _cleanup_set_free_free_ Set *todo = NULL;
415 bool top_autofs = false;
416 char *x;
417 unsigned long orig_flags;
418
548f6937 419 todo = set_new(&path_hash_ops);
4349cd7c
LP
420 if (!todo)
421 return -ENOMEM;
422
ac9de0b3 423 rewind(proc_self_mountinfo);
4349cd7c
LP
424
425 for (;;) {
426 _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
427 int k;
428
429 k = fscanf(proc_self_mountinfo,
430 "%*s " /* (1) mount id */
431 "%*s " /* (2) parent id */
432 "%*s " /* (3) major:minor */
433 "%*s " /* (4) root */
434 "%ms " /* (5) mount point */
435 "%*s" /* (6) mount options (superblock) */
436 "%*[^-]" /* (7) optional fields */
437 "- " /* (8) separator */
438 "%ms " /* (9) file system type */
439 "%*s" /* (10) mount source */
440 "%*s" /* (11) mount options (bind mount) */
441 "%*[^\n]", /* some rubbish at the end */
442 &path,
443 &type);
444 if (k != 2) {
445 if (k == EOF)
446 break;
447
448 continue;
449 }
450
451 r = cunescape(path, UNESCAPE_RELAX, &p);
452 if (r < 0)
453 return r;
454
6b7c9f8b
LP
455 if (!path_startswith(p, cleaned))
456 continue;
457
458 /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
459 * operate on. */
460 if (!path_equal(cleaned, p)) {
461 bool blacklisted = false;
462 char **i;
463
464 STRV_FOREACH(i, blacklist) {
465
466 if (path_equal(*i, cleaned))
467 continue;
468
469 if (!path_startswith(*i, cleaned))
470 continue;
471
472 if (path_startswith(p, *i)) {
473 blacklisted = true;
474 log_debug("Not remounting %s, because blacklisted by %s, called for %s", p, *i, cleaned);
475 break;
476 }
477 }
478 if (blacklisted)
479 continue;
480 }
481
4349cd7c
LP
482 /* Let's ignore autofs mounts. If they aren't
483 * triggered yet, we want to avoid triggering
484 * them, as we don't make any guarantees for
485 * future submounts anyway. If they are
486 * already triggered, then we will find
487 * another entry for this. */
488 if (streq(type, "autofs")) {
489 top_autofs = top_autofs || path_equal(cleaned, p);
490 continue;
491 }
492
6b7c9f8b 493 if (!set_contains(done, p)) {
4349cd7c
LP
494 r = set_consume(todo, p);
495 p = NULL;
4349cd7c
LP
496 if (r == -EEXIST)
497 continue;
498 if (r < 0)
499 return r;
500 }
501 }
502
503 /* If we have no submounts to process anymore and if
504 * the root is either already done, or an autofs, we
505 * are done */
506 if (set_isempty(todo) &&
507 (top_autofs || set_contains(done, cleaned)))
508 return 0;
509
510 if (!set_contains(done, cleaned) &&
511 !set_contains(todo, cleaned)) {
6b7c9f8b 512 /* The prefix directory itself is not yet a mount, make it one. */
4349cd7c
LP
513 if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
514 return -errno;
515
516 orig_flags = 0;
517 (void) get_mount_flags(cleaned, &orig_flags);
518 orig_flags &= ~MS_RDONLY;
519
ef454fd1 520 if (mount(NULL, cleaned, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
4349cd7c
LP
521 return -errno;
522
6b7c9f8b
LP
523 log_debug("Made top-level directory %s a mount point.", prefix);
524
4349cd7c
LP
525 x = strdup(cleaned);
526 if (!x)
527 return -ENOMEM;
528
529 r = set_consume(done, x);
530 if (r < 0)
531 return r;
532 }
533
534 while ((x = set_steal_first(todo))) {
535
536 r = set_consume(done, x);
4c701096 537 if (IN_SET(r, 0, -EEXIST))
4349cd7c
LP
538 continue;
539 if (r < 0)
540 return r;
541
6b7c9f8b 542 /* Deal with mount points that are obstructed by a later mount */
e1873695 543 r = path_is_mount_point(x, NULL, 0);
4c701096 544 if (IN_SET(r, 0, -ENOENT))
98df8089 545 continue;
ef454fd1 546 if (IN_SET(r, -EACCES, -EPERM)) {
53c442ef
YW
547 /* Even if root user invoke this, submounts under private FUSE or NFS mount points
548 * may not be acceessed. E.g.,
549 *
550 * $ bindfs --no-allow-other ~/mnt/mnt ~/mnt/mnt
551 * $ bindfs --no-allow-other ~/mnt ~/mnt
552 *
553 * Then, root user cannot access the mount point ~/mnt/mnt.
554 * In such cases, the submounts are ignored, as we have no way to manage them. */
ef454fd1
YW
555 log_debug_errno(r, "Failed to determine '%s' is mount point or not, ignoring: %m", x);
556 continue;
557 }
98df8089
AC
558 if (r < 0)
559 return r;
560
561 /* Try to reuse the original flag set */
4349cd7c
LP
562 orig_flags = 0;
563 (void) get_mount_flags(x, &orig_flags);
564 orig_flags &= ~MS_RDONLY;
565
98df8089
AC
566 if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
567 return -errno;
4349cd7c 568
6b7c9f8b 569 log_debug("Remounted %s read-only.", x);
4349cd7c
LP
570 }
571 }
572}
573
ac9de0b3
TR
574int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
575 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
576
577 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
578 if (!proc_self_mountinfo)
579 return -errno;
580
35bbbf85
LP
581 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
582
ac9de0b3
TR
583 return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo);
584}
585
4349cd7c
LP
586int mount_move_root(const char *path) {
587 assert(path);
588
589 if (chdir(path) < 0)
590 return -errno;
591
592 if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
593 return -errno;
594
595 if (chroot(".") < 0)
596 return -errno;
597
598 if (chdir("/") < 0)
599 return -errno;
600
601 return 0;
602}
4e036b7a
LP
603
604bool fstype_is_network(const char *fstype) {
4e036b7a
LP
605 const char *x;
606
607 x = startswith(fstype, "fuse.");
608 if (x)
609 fstype = x;
610
5991ce44
ZJS
611 return STR_IN_SET(fstype,
612 "afs",
613 "cifs",
614 "smbfs",
615 "sshfs",
616 "ncpfs",
617 "ncp",
618 "nfs",
619 "nfs4",
620 "gfs",
621 "gfs2",
622 "glusterfs",
623 "pvfs2", /* OrangeFS */
624 "ocfs2",
625 "lustre");
4e036b7a 626}
3f2c0bec 627
e2be442e 628bool fstype_is_api_vfs(const char *fstype) {
5991ce44
ZJS
629 return STR_IN_SET(fstype,
630 "autofs",
631 "bpf",
632 "cgroup",
633 "cgroup2",
634 "configfs",
635 "cpuset",
636 "debugfs",
637 "devpts",
638 "devtmpfs",
639 "efivarfs",
640 "fusectl",
641 "hugetlbfs",
642 "mqueue",
643 "proc",
644 "pstore",
645 "ramfs",
646 "securityfs",
647 "sysfs",
648 "tmpfs",
649 "tracefs");
e2be442e
YW
650}
651
896f937f 652bool fstype_is_ro(const char *fstype) {
896f937f 653 /* All Linux file systems that are necessarily read-only */
5991ce44
ZJS
654 return STR_IN_SET(fstype,
655 "DM_verity_hash",
656 "iso9660",
657 "squashfs");
896f937f
LP
658}
659
154d2269 660bool fstype_can_discard(const char *fstype) {
5991ce44
ZJS
661 return STR_IN_SET(fstype,
662 "btrfs",
663 "ext4",
664 "vfat",
665 "xfs");
154d2269
LP
666}
667
2d3a5a73
LP
668bool fstype_can_uid_gid(const char *fstype) {
669
670 /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
671 * current and future. */
672
673 return STR_IN_SET(fstype,
674 "adfs",
675 "fat",
676 "hfs",
677 "hpfs",
678 "iso9660",
679 "msdos",
680 "ntfs",
681 "vfat");
682}
683
3f2c0bec
LP
684int repeat_unmount(const char *path, int flags) {
685 bool done = false;
686
687 assert(path);
688
689 /* If there are multiple mounts on a mount point, this
690 * removes them all */
691
692 for (;;) {
693 if (umount2(path, flags) < 0) {
694
695 if (errno == EINVAL)
696 return done;
697
698 return -errno;
699 }
700
701 done = true;
702 }
703}
c4b41707
AP
704
705const char* mode_to_inaccessible_node(mode_t mode) {
fe80fcc7
LP
706 /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during
707 * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe
708 * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes
709 * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead,
710 * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts
711 * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */
712
c4b41707
AP
713 switch(mode & S_IFMT) {
714 case S_IFREG:
715 return "/run/systemd/inaccessible/reg";
fe80fcc7 716
c4b41707
AP
717 case S_IFDIR:
718 return "/run/systemd/inaccessible/dir";
fe80fcc7 719
c4b41707 720 case S_IFCHR:
b3d1d516
AP
721 if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
722 return "/run/systemd/inaccessible/chr";
723 return "/run/systemd/inaccessible/sock";
fe80fcc7 724
c4b41707 725 case S_IFBLK:
b3d1d516
AP
726 if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
727 return "/run/systemd/inaccessible/blk";
728 return "/run/systemd/inaccessible/sock";
fe80fcc7 729
c4b41707
AP
730 case S_IFIFO:
731 return "/run/systemd/inaccessible/fifo";
fe80fcc7 732
c4b41707
AP
733 case S_IFSOCK:
734 return "/run/systemd/inaccessible/sock";
735 }
736 return NULL;
737}
60e76d48
ZJS
738
739#define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
740static char* mount_flags_to_string(long unsigned flags) {
741 char *x;
742 _cleanup_free_ char *y = NULL;
743 long unsigned overflow;
744
745 overflow = flags & ~(MS_RDONLY |
746 MS_NOSUID |
747 MS_NODEV |
748 MS_NOEXEC |
749 MS_SYNCHRONOUS |
750 MS_REMOUNT |
751 MS_MANDLOCK |
752 MS_DIRSYNC |
753 MS_NOATIME |
754 MS_NODIRATIME |
755 MS_BIND |
756 MS_MOVE |
757 MS_REC |
758 MS_SILENT |
759 MS_POSIXACL |
760 MS_UNBINDABLE |
761 MS_PRIVATE |
762 MS_SLAVE |
763 MS_SHARED |
764 MS_RELATIME |
765 MS_KERNMOUNT |
766 MS_I_VERSION |
767 MS_STRICTATIME |
768 MS_LAZYTIME);
769
770 if (flags == 0 || overflow != 0)
771 if (asprintf(&y, "%lx", overflow) < 0)
772 return NULL;
773
774 x = strjoin(FLAG(MS_RDONLY),
775 FLAG(MS_NOSUID),
776 FLAG(MS_NODEV),
777 FLAG(MS_NOEXEC),
778 FLAG(MS_SYNCHRONOUS),
779 FLAG(MS_REMOUNT),
780 FLAG(MS_MANDLOCK),
781 FLAG(MS_DIRSYNC),
782 FLAG(MS_NOATIME),
783 FLAG(MS_NODIRATIME),
784 FLAG(MS_BIND),
785 FLAG(MS_MOVE),
786 FLAG(MS_REC),
787 FLAG(MS_SILENT),
788 FLAG(MS_POSIXACL),
789 FLAG(MS_UNBINDABLE),
790 FLAG(MS_PRIVATE),
791 FLAG(MS_SLAVE),
792 FLAG(MS_SHARED),
793 FLAG(MS_RELATIME),
794 FLAG(MS_KERNMOUNT),
795 FLAG(MS_I_VERSION),
796 FLAG(MS_STRICTATIME),
797 FLAG(MS_LAZYTIME),
605405c6 798 y);
60e76d48
ZJS
799 if (!x)
800 return NULL;
801 if (!y)
802 x[strlen(x) - 1] = '\0'; /* truncate the last | */
803 return x;
804}
805
806int mount_verbose(
807 int error_log_level,
808 const char *what,
809 const char *where,
810 const char *type,
811 unsigned long flags,
812 const char *options) {
813
6ef8df2b
YW
814 _cleanup_free_ char *fl = NULL, *o = NULL;
815 unsigned long f;
816 int r;
817
818 r = mount_option_mangle(options, flags, &f, &o);
819 if (r < 0)
820 return log_full_errno(error_log_level, r,
821 "Failed to mangle mount options %s: %m",
822 strempty(options));
60e76d48 823
6ef8df2b 824 fl = mount_flags_to_string(f);
60e76d48 825
6ef8df2b 826 if ((f & MS_REMOUNT) && !what && !type)
60e76d48 827 log_debug("Remounting %s (%s \"%s\")...",
6ef8df2b 828 where, strnull(fl), strempty(o));
60e76d48
ZJS
829 else if (!what && !type)
830 log_debug("Mounting %s (%s \"%s\")...",
6ef8df2b
YW
831 where, strnull(fl), strempty(o));
832 else if ((f & MS_BIND) && !type)
60e76d48 833 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
6ef8df2b
YW
834 what, where, strnull(fl), strempty(o));
835 else if (f & MS_MOVE)
afe682bc 836 log_debug("Moving mount %s → %s (%s \"%s\")...",
6ef8df2b 837 what, where, strnull(fl), strempty(o));
60e76d48
ZJS
838 else
839 log_debug("Mounting %s on %s (%s \"%s\")...",
6ef8df2b
YW
840 strna(type), where, strnull(fl), strempty(o));
841 if (mount(what, where, type, f, o) < 0)
60e76d48
ZJS
842 return log_full_errno(error_log_level, errno,
843 "Failed to mount %s on %s (%s \"%s\"): %m",
6ef8df2b 844 strna(type), where, strnull(fl), strempty(o));
60e76d48
ZJS
845 return 0;
846}
847
848int umount_verbose(const char *what) {
849 log_debug("Umounting %s...", what);
850 if (umount(what) < 0)
851 return log_error_errno(errno, "Failed to unmount %s: %m", what);
852 return 0;
853}
83555251
LP
854
855const char *mount_propagation_flags_to_string(unsigned long flags) {
856
857 switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
c7383828
ZJS
858 case 0:
859 return "";
83555251
LP
860 case MS_SHARED:
861 return "shared";
83555251
LP
862 case MS_SLAVE:
863 return "slave";
83555251
LP
864 case MS_PRIVATE:
865 return "private";
866 }
867
868 return NULL;
869}
870
c7383828 871int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
83555251 872
c7383828
ZJS
873 if (isempty(name))
874 *ret = 0;
875 else if (streq(name, "shared"))
876 *ret = MS_SHARED;
877 else if (streq(name, "slave"))
878 *ret = MS_SLAVE;
879 else if (streq(name, "private"))
880 *ret = MS_PRIVATE;
881 else
882 return -EINVAL;
83555251
LP
883 return 0;
884}
9e7f941a
YW
885
886int mount_option_mangle(
887 const char *options,
888 unsigned long mount_flags,
889 unsigned long *ret_mount_flags,
890 char **ret_remaining_options) {
891
892 const struct libmnt_optmap *map;
893 _cleanup_free_ char *ret = NULL;
894 const char *p;
895 int r;
896
897 /* This extracts mount flags from the mount options, and store
898 * non-mount-flag options to '*ret_remaining_options'.
899 * E.g.,
900 * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
901 * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
902 * "size=1630748k,mode=700,uid=1000,gid=1000".
903 * See more examples in test-mount-utils.c.
904 *
905 * Note that if 'options' does not contain any non-mount-flag options,
906 * then '*ret_remaining_options' is set to NULL instread of empty string.
907 * Note that this does not check validity of options stored in
908 * '*ret_remaining_options'.
909 * Note that if 'options' is NULL, then this just copies 'mount_flags'
910 * to '*ret_mount_flags'. */
911
912 assert(ret_mount_flags);
913 assert(ret_remaining_options);
914
915 map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
916 if (!map)
917 return -EINVAL;
918
919 p = options;
920 for (;;) {
921 _cleanup_free_ char *word = NULL;
922 const struct libmnt_optmap *ent;
923
924 r = extract_first_word(&p, &word, ",", EXTRACT_QUOTES);
925 if (r < 0)
926 return r;
927 if (r == 0)
928 break;
929
930 for (ent = map; ent->name; ent++) {
931 /* All entries in MNT_LINUX_MAP do not take any argument.
932 * Thus, ent->name does not contain "=" or "[=]". */
933 if (!streq(word, ent->name))
934 continue;
935
936 if (!(ent->mask & MNT_INVERT))
937 mount_flags |= ent->id;
938 else if (mount_flags & ent->id)
939 mount_flags ^= ent->id;
940
941 break;
942 }
943
944 /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
945 if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
946 return -ENOMEM;
947 }
948
949 *ret_mount_flags = mount_flags;
ae2a15bc 950 *ret_remaining_options = TAKE_PTR(ret);
9e7f941a
YW
951
952 return 0;
953}
be1791ad
YW
954
955int dev_is_devtmpfs(void) {
956 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
957 char line[LINE_MAX], *e;
958 int mount_id, r;
959
960 r = path_get_mnt_id("/dev", &mount_id);
961 if (r < 0)
962 return r;
963
964 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
965 if (!proc_self_mountinfo)
966 return -errno;
967
968 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
969
970 FOREACH_LINE(line, proc_self_mountinfo, return -errno) {
971 int mid;
972
973 if (sscanf(line, "%i", &mid) != 1)
974 continue;
975
976 if (mid != mount_id)
977 continue;
978
979 e = strstr(line, " - ");
980 if (!e)
981 continue;
982
983 /* accept any name that starts with the currently expected type */
984 if (startswith(e + 3, "devtmpfs"))
985 return true;
986 }
987
988 return false;
989}