]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/mount-util.c
cgroup-util: add mask definitions for sets of controllers supported by cgroupsv1...
[thirdparty/systemd.git] / src / basic / mount-util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
4349cd7c 2
11c3a366 3#include <errno.h>
35bbbf85 4#include <stdio_ext.h>
11c3a366 5#include <stdlib.h>
4349cd7c
LP
6#include <string.h>
7#include <sys/mount.h>
11c3a366 8#include <sys/stat.h>
4349cd7c 9#include <sys/statvfs.h>
11c3a366 10#include <unistd.h>
4349cd7c 11
9e7f941a
YW
12/* Include later */
13#include <libmount.h>
14
b5efdb8a 15#include "alloc-util.h"
f5af75ea 16#include "def.h"
4349cd7c 17#include "escape.h"
9e7f941a 18#include "extract-word.h"
4349cd7c
LP
19#include "fd-util.h"
20#include "fileio.h"
e1873695 21#include "fs-util.h"
93cc7779 22#include "hashmap.h"
4349cd7c
LP
23#include "mount-util.h"
24#include "parse-util.h"
25#include "path-util.h"
26#include "set.h"
15a5e950 27#include "stdio-util.h"
4349cd7c 28#include "string-util.h"
6b7c9f8b 29#include "strv.h"
4349cd7c 30
01a7e0a1
LP
31/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
32 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
33 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
34 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
35 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
36 * with large file handles anyway. */
37#define ORIGINAL_MAX_HANDLE_SZ 128
38
cbfb8679
LP
39int name_to_handle_at_loop(
40 int fd,
41 const char *path,
42 struct file_handle **ret_handle,
43 int *ret_mnt_id,
44 int flags) {
45
93719c6b 46 _cleanup_free_ struct file_handle *h = NULL;
01a7e0a1 47 size_t n = ORIGINAL_MAX_HANDLE_SZ;
cbfb8679
LP
48
49 /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
50 * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
51 * start value, it is not an upper bound on the buffer size required.
52 *
53 * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
54 * as NULL if there's no interest in either. */
55
cbfb8679
LP
56 for (;;) {
57 int mnt_id = -1;
cbfb8679 58
93719c6b
LP
59 h = malloc0(offsetof(struct file_handle, f_handle) + n);
60 if (!h)
61 return -ENOMEM;
62
63 h->handle_bytes = n;
64
cbfb8679
LP
65 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
66
ae2a15bc
LP
67 if (ret_handle)
68 *ret_handle = TAKE_PTR(h);
cbfb8679
LP
69
70 if (ret_mnt_id)
71 *ret_mnt_id = mnt_id;
72
73 return 0;
74 }
75 if (errno != EOVERFLOW)
76 return -errno;
77
78 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
79
fc010b01
LP
80 /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
81 * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
82 * be filled in, and the caller was interested in only the mount ID an nothing else. */
cbfb8679
LP
83
84 *ret_mnt_id = mnt_id;
85 return 0;
86 }
87
fc010b01
LP
88 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
89 * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
90 * buffer. In that case propagate EOVERFLOW */
91 if (h->handle_bytes <= n)
cbfb8679 92 return -EOVERFLOW;
fc010b01
LP
93
94 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
95 n = h->handle_bytes;
96 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
cbfb8679 97 return -EOVERFLOW;
cbfb8679 98
93719c6b 99 h = mfree(h);
cbfb8679
LP
100 }
101}
102
4349cd7c 103static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
fbd0b64f 104 char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
4349cd7c
LP
105 _cleanup_free_ char *fdinfo = NULL;
106 _cleanup_close_ int subfd = -1;
107 char *p;
108 int r;
109
110 if ((flags & AT_EMPTY_PATH) && isempty(filename))
111 xsprintf(path, "/proc/self/fdinfo/%i", fd);
112 else {
c4b69156 113 subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
4349cd7c
LP
114 if (subfd < 0)
115 return -errno;
116
117 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
118 }
119
120 r = read_full_file(path, &fdinfo, NULL);
121 if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
122 return -EOPNOTSUPP;
123 if (r < 0)
0d9bcb7c 124 return r;
4349cd7c
LP
125
126 p = startswith(fdinfo, "mnt_id:");
127 if (!p) {
128 p = strstr(fdinfo, "\nmnt_id:");
129 if (!p) /* The mnt_id field is a relatively new addition */
130 return -EOPNOTSUPP;
131
132 p += 8;
133 }
134
135 p += strspn(p, WHITESPACE);
136 p[strcspn(p, WHITESPACE)] = 0;
137
138 return safe_atoi(p, mnt_id);
139}
140
4349cd7c 141int fd_is_mount_point(int fd, const char *filename, int flags) {
cbfb8679 142 _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
4349cd7c
LP
143 int mount_id = -1, mount_id_parent = -1;
144 bool nosupp = false, check_st_dev = true;
145 struct stat a, b;
146 int r;
147
148 assert(fd >= 0);
149 assert(filename);
150
151 /* First we will try the name_to_handle_at() syscall, which
152 * tells us the mount id and an opaque file "handle". It is
153 * not supported everywhere though (kernel compile-time
154 * option, not all file systems are hooked up). If it works
155 * the mount id is usually good enough to tell us whether
156 * something is a mount point.
157 *
158 * If that didn't work we will try to read the mount id from
159 * /proc/self/fdinfo/<fd>. This is almost as good as
160 * name_to_handle_at(), however, does not return the
161 * opaque file handle. The opaque file handle is pretty useful
162 * to detect the root directory, which we should always
163 * consider a mount point. Hence we use this only as
164 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
165 * kernel addition.
166 *
167 * As last fallback we do traditional fstat() based st_dev
168 * comparisons. This is how things were traditionally done,
61233823 169 * but unionfs breaks this since it exposes file
4349cd7c
LP
170 * systems with a variety of st_dev reported. Also, btrfs
171 * subvolumes have different st_dev, even though they aren't
172 * real mounts of their own. */
173
cbfb8679 174 r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
976c0478 175 if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
c83b20d7
LP
176 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
177 * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
976c0478
LP
178 * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
179 * (EINVAL): fall back to simpler logic. */
cbfb8679
LP
180 goto fallback_fdinfo;
181 else if (r == -EOPNOTSUPP)
182 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
183 * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
184 * logic */
185 nosupp = true;
186 else if (r < 0)
187 return r;
188
189 r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
190 if (r == -EOPNOTSUPP) {
191 if (nosupp)
192 /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
4349cd7c 193 goto fallback_fdinfo;
4349cd7c 194 else
cbfb8679
LP
195 /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
196 * it must be a mount point. */
197 return 1;
198 } else if (r < 0)
4739fc55 199 return r;
4349cd7c
LP
200
201 /* The parent can do name_to_handle_at() but the
202 * directory we are interested in can't? If so, it
203 * must be a mount point. */
204 if (nosupp)
205 return 1;
206
207 /* If the file handle for the directory we are
208 * interested in and its parent are identical, we
209 * assume this is the root directory, which is a mount
210 * point. */
211
cbfb8679
LP
212 if (h->handle_bytes == h_parent->handle_bytes &&
213 h->handle_type == h_parent->handle_type &&
214 memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
4349cd7c
LP
215 return 1;
216
217 return mount_id != mount_id_parent;
218
219fallback_fdinfo:
220 r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
059c35f5 221 if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
4349cd7c
LP
222 goto fallback_fstat;
223 if (r < 0)
224 return r;
225
226 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
227 if (r < 0)
228 return r;
229
230 if (mount_id != mount_id_parent)
231 return 1;
232
233 /* Hmm, so, the mount ids are the same. This leaves one
234 * special case though for the root file system. For that,
235 * let's see if the parent directory has the same inode as we
236 * are interested in. Hence, let's also do fstat() checks now,
237 * too, but avoid the st_dev comparisons, since they aren't
238 * that useful on unionfs mounts. */
239 check_st_dev = false;
240
241fallback_fstat:
242 /* yay for fstatat() taking a different set of flags than the other
243 * _at() above */
244 if (flags & AT_SYMLINK_FOLLOW)
245 flags &= ~AT_SYMLINK_FOLLOW;
246 else
247 flags |= AT_SYMLINK_NOFOLLOW;
248 if (fstatat(fd, filename, &a, flags) < 0)
249 return -errno;
250
251 if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
252 return -errno;
253
254 /* A directory with same device and inode as its parent? Must
255 * be the root directory */
256 if (a.st_dev == b.st_dev &&
257 a.st_ino == b.st_ino)
258 return 1;
259
260 return check_st_dev && (a.st_dev != b.st_dev);
261}
262
263/* flags can be AT_SYMLINK_FOLLOW or 0 */
e1873695 264int path_is_mount_point(const char *t, const char *root, int flags) {
0c462ea4 265 _cleanup_free_ char *canonical = NULL;
e1873695
LP
266 _cleanup_close_ int fd = -1;
267 int r;
4349cd7c
LP
268
269 assert(t);
b12d25a8 270 assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
4349cd7c
LP
271
272 if (path_equal(t, "/"))
273 return 1;
274
275 /* we need to resolve symlinks manually, we can't just rely on
276 * fd_is_mount_point() to do that for us; if we have a structure like
277 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
278 * look at needs to be /usr, not /. */
279 if (flags & AT_SYMLINK_FOLLOW) {
62570f6f 280 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
e1873695
LP
281 if (r < 0)
282 return r;
4349cd7c
LP
283
284 t = canonical;
285 }
286
0c462ea4 287 fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
4349cd7c
LP
288 if (fd < 0)
289 return -errno;
290
b12d25a8 291 return fd_is_mount_point(fd, last_path_component(t), flags);
4349cd7c
LP
292}
293
c2a986d5
LP
294int path_get_mnt_id(const char *path, int *ret) {
295 int r;
296
297 r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
976c0478 298 if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
c2a986d5
LP
299 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
300
301 return r;
302}
303
4349cd7c
LP
304int umount_recursive(const char *prefix, int flags) {
305 bool again;
306 int n = 0, r;
307
308 /* Try to umount everything recursively below a
309 * directory. Also, take care of stacked mounts, and keep
310 * unmounting them until they are gone. */
311
312 do {
313 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
314
315 again = false;
316 r = 0;
317
318 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
319 if (!proc_self_mountinfo)
320 return -errno;
321
35bbbf85
LP
322 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
323
4349cd7c
LP
324 for (;;) {
325 _cleanup_free_ char *path = NULL, *p = NULL;
326 int k;
327
328 k = fscanf(proc_self_mountinfo,
329 "%*s " /* (1) mount id */
330 "%*s " /* (2) parent id */
331 "%*s " /* (3) major:minor */
332 "%*s " /* (4) root */
333 "%ms " /* (5) mount point */
334 "%*s" /* (6) mount options */
335 "%*[^-]" /* (7) optional fields */
336 "- " /* (8) separator */
337 "%*s " /* (9) file system type */
338 "%*s" /* (10) mount source */
339 "%*s" /* (11) mount options 2 */
340 "%*[^\n]", /* some rubbish at the end */
341 &path);
342 if (k != 1) {
343 if (k == EOF)
344 break;
345
346 continue;
347 }
348
349 r = cunescape(path, UNESCAPE_RELAX, &p);
350 if (r < 0)
351 return r;
352
353 if (!path_startswith(p, prefix))
354 continue;
355
356 if (umount2(p, flags) < 0) {
6b7c9f8b 357 r = log_debug_errno(errno, "Failed to umount %s: %m", p);
4349cd7c
LP
358 continue;
359 }
360
6b7c9f8b
LP
361 log_debug("Successfully unmounted %s", p);
362
4349cd7c
LP
363 again = true;
364 n++;
365
366 break;
367 }
368
369 } while (again);
370
371 return r ? r : n;
372}
373
374static int get_mount_flags(const char *path, unsigned long *flags) {
375 struct statvfs buf;
376
377 if (statvfs(path, &buf) < 0)
378 return -errno;
379 *flags = buf.f_flag;
380 return 0;
381}
382
ac9de0b3
TR
383/* Use this function only if do you have direct access to /proc/self/mountinfo
384 * and need the caller to open it for you. This is the case when /proc is
385 * masked or not mounted. Otherwise, use bind_remount_recursive. */
386int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) {
4349cd7c
LP
387 _cleanup_set_free_free_ Set *done = NULL;
388 _cleanup_free_ char *cleaned = NULL;
389 int r;
390
ac9de0b3
TR
391 assert(proc_self_mountinfo);
392
6b7c9f8b
LP
393 /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
394 * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
395 * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
396 * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
397 * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
398 * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
399 * future submounts that have been triggered via autofs.
400 *
401 * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
402 * remount operation. Note that we'll ignore the blacklist for the top-level path. */
4349cd7c
LP
403
404 cleaned = strdup(prefix);
405 if (!cleaned)
406 return -ENOMEM;
407
858d36c1 408 path_simplify(cleaned, false);
4349cd7c 409
548f6937 410 done = set_new(&path_hash_ops);
4349cd7c
LP
411 if (!done)
412 return -ENOMEM;
413
414 for (;;) {
4349cd7c
LP
415 _cleanup_set_free_free_ Set *todo = NULL;
416 bool top_autofs = false;
417 char *x;
418 unsigned long orig_flags;
419
548f6937 420 todo = set_new(&path_hash_ops);
4349cd7c
LP
421 if (!todo)
422 return -ENOMEM;
423
ac9de0b3 424 rewind(proc_self_mountinfo);
4349cd7c
LP
425
426 for (;;) {
427 _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
428 int k;
429
430 k = fscanf(proc_self_mountinfo,
431 "%*s " /* (1) mount id */
432 "%*s " /* (2) parent id */
433 "%*s " /* (3) major:minor */
434 "%*s " /* (4) root */
435 "%ms " /* (5) mount point */
436 "%*s" /* (6) mount options (superblock) */
437 "%*[^-]" /* (7) optional fields */
438 "- " /* (8) separator */
439 "%ms " /* (9) file system type */
440 "%*s" /* (10) mount source */
441 "%*s" /* (11) mount options (bind mount) */
442 "%*[^\n]", /* some rubbish at the end */
443 &path,
444 &type);
445 if (k != 2) {
446 if (k == EOF)
447 break;
448
449 continue;
450 }
451
452 r = cunescape(path, UNESCAPE_RELAX, &p);
453 if (r < 0)
454 return r;
455
6b7c9f8b
LP
456 if (!path_startswith(p, cleaned))
457 continue;
458
459 /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
460 * operate on. */
461 if (!path_equal(cleaned, p)) {
462 bool blacklisted = false;
463 char **i;
464
465 STRV_FOREACH(i, blacklist) {
466
467 if (path_equal(*i, cleaned))
468 continue;
469
470 if (!path_startswith(*i, cleaned))
471 continue;
472
473 if (path_startswith(p, *i)) {
474 blacklisted = true;
c745d2bf 475 log_debug("Not remounting %s blacklisted by %s, called for %s", p, *i, cleaned);
6b7c9f8b
LP
476 break;
477 }
478 }
479 if (blacklisted)
480 continue;
481 }
482
4349cd7c
LP
483 /* Let's ignore autofs mounts. If they aren't
484 * triggered yet, we want to avoid triggering
485 * them, as we don't make any guarantees for
486 * future submounts anyway. If they are
487 * already triggered, then we will find
488 * another entry for this. */
489 if (streq(type, "autofs")) {
490 top_autofs = top_autofs || path_equal(cleaned, p);
491 continue;
492 }
493
6b7c9f8b 494 if (!set_contains(done, p)) {
4349cd7c
LP
495 r = set_consume(todo, p);
496 p = NULL;
4349cd7c
LP
497 if (r == -EEXIST)
498 continue;
499 if (r < 0)
500 return r;
501 }
502 }
503
504 /* If we have no submounts to process anymore and if
505 * the root is either already done, or an autofs, we
506 * are done */
507 if (set_isempty(todo) &&
508 (top_autofs || set_contains(done, cleaned)))
509 return 0;
510
511 if (!set_contains(done, cleaned) &&
512 !set_contains(todo, cleaned)) {
6b7c9f8b 513 /* The prefix directory itself is not yet a mount, make it one. */
4349cd7c
LP
514 if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
515 return -errno;
516
517 orig_flags = 0;
518 (void) get_mount_flags(cleaned, &orig_flags);
519 orig_flags &= ~MS_RDONLY;
520
ef454fd1 521 if (mount(NULL, cleaned, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
4349cd7c
LP
522 return -errno;
523
6b7c9f8b
LP
524 log_debug("Made top-level directory %s a mount point.", prefix);
525
4349cd7c
LP
526 x = strdup(cleaned);
527 if (!x)
528 return -ENOMEM;
529
530 r = set_consume(done, x);
531 if (r < 0)
532 return r;
533 }
534
535 while ((x = set_steal_first(todo))) {
536
537 r = set_consume(done, x);
4c701096 538 if (IN_SET(r, 0, -EEXIST))
4349cd7c
LP
539 continue;
540 if (r < 0)
541 return r;
542
6b7c9f8b 543 /* Deal with mount points that are obstructed by a later mount */
e1873695 544 r = path_is_mount_point(x, NULL, 0);
4c701096 545 if (IN_SET(r, 0, -ENOENT))
98df8089 546 continue;
ef454fd1 547 if (IN_SET(r, -EACCES, -EPERM)) {
53c442ef
YW
548 /* Even if root user invoke this, submounts under private FUSE or NFS mount points
549 * may not be acceessed. E.g.,
550 *
551 * $ bindfs --no-allow-other ~/mnt/mnt ~/mnt/mnt
552 * $ bindfs --no-allow-other ~/mnt ~/mnt
553 *
554 * Then, root user cannot access the mount point ~/mnt/mnt.
555 * In such cases, the submounts are ignored, as we have no way to manage them. */
ef454fd1
YW
556 log_debug_errno(r, "Failed to determine '%s' is mount point or not, ignoring: %m", x);
557 continue;
558 }
98df8089
AC
559 if (r < 0)
560 return r;
561
562 /* Try to reuse the original flag set */
4349cd7c
LP
563 orig_flags = 0;
564 (void) get_mount_flags(x, &orig_flags);
565 orig_flags &= ~MS_RDONLY;
566
98df8089
AC
567 if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
568 return -errno;
4349cd7c 569
6b7c9f8b 570 log_debug("Remounted %s read-only.", x);
4349cd7c
LP
571 }
572 }
573}
574
ac9de0b3
TR
575int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
576 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
577
578 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
579 if (!proc_self_mountinfo)
580 return -errno;
581
35bbbf85
LP
582 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
583
ac9de0b3
TR
584 return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo);
585}
586
4349cd7c
LP
587int mount_move_root(const char *path) {
588 assert(path);
589
590 if (chdir(path) < 0)
591 return -errno;
592
593 if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
594 return -errno;
595
596 if (chroot(".") < 0)
597 return -errno;
598
599 if (chdir("/") < 0)
600 return -errno;
601
602 return 0;
603}
4e036b7a
LP
604
605bool fstype_is_network(const char *fstype) {
4e036b7a
LP
606 const char *x;
607
608 x = startswith(fstype, "fuse.");
609 if (x)
610 fstype = x;
611
5991ce44
ZJS
612 return STR_IN_SET(fstype,
613 "afs",
614 "cifs",
615 "smbfs",
616 "sshfs",
617 "ncpfs",
618 "ncp",
619 "nfs",
620 "nfs4",
621 "gfs",
622 "gfs2",
623 "glusterfs",
624 "pvfs2", /* OrangeFS */
625 "ocfs2",
626 "lustre");
4e036b7a 627}
3f2c0bec 628
e2be442e 629bool fstype_is_api_vfs(const char *fstype) {
5991ce44
ZJS
630 return STR_IN_SET(fstype,
631 "autofs",
632 "bpf",
633 "cgroup",
634 "cgroup2",
635 "configfs",
636 "cpuset",
637 "debugfs",
638 "devpts",
639 "devtmpfs",
640 "efivarfs",
641 "fusectl",
642 "hugetlbfs",
643 "mqueue",
644 "proc",
645 "pstore",
646 "ramfs",
647 "securityfs",
648 "sysfs",
649 "tmpfs",
650 "tracefs");
e2be442e
YW
651}
652
896f937f 653bool fstype_is_ro(const char *fstype) {
896f937f 654 /* All Linux file systems that are necessarily read-only */
5991ce44
ZJS
655 return STR_IN_SET(fstype,
656 "DM_verity_hash",
657 "iso9660",
658 "squashfs");
896f937f
LP
659}
660
154d2269 661bool fstype_can_discard(const char *fstype) {
5991ce44
ZJS
662 return STR_IN_SET(fstype,
663 "btrfs",
664 "ext4",
665 "vfat",
666 "xfs");
154d2269
LP
667}
668
2d3a5a73
LP
669bool fstype_can_uid_gid(const char *fstype) {
670
671 /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
672 * current and future. */
673
674 return STR_IN_SET(fstype,
675 "adfs",
676 "fat",
677 "hfs",
678 "hpfs",
679 "iso9660",
680 "msdos",
681 "ntfs",
682 "vfat");
683}
684
3f2c0bec
LP
685int repeat_unmount(const char *path, int flags) {
686 bool done = false;
687
688 assert(path);
689
690 /* If there are multiple mounts on a mount point, this
691 * removes them all */
692
693 for (;;) {
694 if (umount2(path, flags) < 0) {
695
696 if (errno == EINVAL)
697 return done;
698
699 return -errno;
700 }
701
702 done = true;
703 }
704}
c4b41707
AP
705
706const char* mode_to_inaccessible_node(mode_t mode) {
fe80fcc7
LP
707 /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during
708 * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe
709 * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes
710 * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead,
711 * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts
712 * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */
713
c4b41707
AP
714 switch(mode & S_IFMT) {
715 case S_IFREG:
716 return "/run/systemd/inaccessible/reg";
fe80fcc7 717
c4b41707
AP
718 case S_IFDIR:
719 return "/run/systemd/inaccessible/dir";
fe80fcc7 720
c4b41707 721 case S_IFCHR:
b3d1d516
AP
722 if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
723 return "/run/systemd/inaccessible/chr";
724 return "/run/systemd/inaccessible/sock";
fe80fcc7 725
c4b41707 726 case S_IFBLK:
b3d1d516
AP
727 if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
728 return "/run/systemd/inaccessible/blk";
729 return "/run/systemd/inaccessible/sock";
fe80fcc7 730
c4b41707
AP
731 case S_IFIFO:
732 return "/run/systemd/inaccessible/fifo";
fe80fcc7 733
c4b41707
AP
734 case S_IFSOCK:
735 return "/run/systemd/inaccessible/sock";
736 }
737 return NULL;
738}
60e76d48
ZJS
739
740#define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
741static char* mount_flags_to_string(long unsigned flags) {
742 char *x;
743 _cleanup_free_ char *y = NULL;
744 long unsigned overflow;
745
746 overflow = flags & ~(MS_RDONLY |
747 MS_NOSUID |
748 MS_NODEV |
749 MS_NOEXEC |
750 MS_SYNCHRONOUS |
751 MS_REMOUNT |
752 MS_MANDLOCK |
753 MS_DIRSYNC |
754 MS_NOATIME |
755 MS_NODIRATIME |
756 MS_BIND |
757 MS_MOVE |
758 MS_REC |
759 MS_SILENT |
760 MS_POSIXACL |
761 MS_UNBINDABLE |
762 MS_PRIVATE |
763 MS_SLAVE |
764 MS_SHARED |
765 MS_RELATIME |
766 MS_KERNMOUNT |
767 MS_I_VERSION |
768 MS_STRICTATIME |
769 MS_LAZYTIME);
770
771 if (flags == 0 || overflow != 0)
772 if (asprintf(&y, "%lx", overflow) < 0)
773 return NULL;
774
775 x = strjoin(FLAG(MS_RDONLY),
776 FLAG(MS_NOSUID),
777 FLAG(MS_NODEV),
778 FLAG(MS_NOEXEC),
779 FLAG(MS_SYNCHRONOUS),
780 FLAG(MS_REMOUNT),
781 FLAG(MS_MANDLOCK),
782 FLAG(MS_DIRSYNC),
783 FLAG(MS_NOATIME),
784 FLAG(MS_NODIRATIME),
785 FLAG(MS_BIND),
786 FLAG(MS_MOVE),
787 FLAG(MS_REC),
788 FLAG(MS_SILENT),
789 FLAG(MS_POSIXACL),
790 FLAG(MS_UNBINDABLE),
791 FLAG(MS_PRIVATE),
792 FLAG(MS_SLAVE),
793 FLAG(MS_SHARED),
794 FLAG(MS_RELATIME),
795 FLAG(MS_KERNMOUNT),
796 FLAG(MS_I_VERSION),
797 FLAG(MS_STRICTATIME),
798 FLAG(MS_LAZYTIME),
605405c6 799 y);
60e76d48
ZJS
800 if (!x)
801 return NULL;
802 if (!y)
803 x[strlen(x) - 1] = '\0'; /* truncate the last | */
804 return x;
805}
806
807int mount_verbose(
808 int error_log_level,
809 const char *what,
810 const char *where,
811 const char *type,
812 unsigned long flags,
813 const char *options) {
814
6ef8df2b
YW
815 _cleanup_free_ char *fl = NULL, *o = NULL;
816 unsigned long f;
817 int r;
818
819 r = mount_option_mangle(options, flags, &f, &o);
820 if (r < 0)
821 return log_full_errno(error_log_level, r,
822 "Failed to mangle mount options %s: %m",
823 strempty(options));
60e76d48 824
6ef8df2b 825 fl = mount_flags_to_string(f);
60e76d48 826
6ef8df2b 827 if ((f & MS_REMOUNT) && !what && !type)
60e76d48 828 log_debug("Remounting %s (%s \"%s\")...",
6ef8df2b 829 where, strnull(fl), strempty(o));
60e76d48
ZJS
830 else if (!what && !type)
831 log_debug("Mounting %s (%s \"%s\")...",
6ef8df2b
YW
832 where, strnull(fl), strempty(o));
833 else if ((f & MS_BIND) && !type)
60e76d48 834 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
6ef8df2b
YW
835 what, where, strnull(fl), strempty(o));
836 else if (f & MS_MOVE)
afe682bc 837 log_debug("Moving mount %s → %s (%s \"%s\")...",
6ef8df2b 838 what, where, strnull(fl), strempty(o));
60e76d48
ZJS
839 else
840 log_debug("Mounting %s on %s (%s \"%s\")...",
6ef8df2b
YW
841 strna(type), where, strnull(fl), strempty(o));
842 if (mount(what, where, type, f, o) < 0)
60e76d48 843 return log_full_errno(error_log_level, errno,
3ccf6126
LP
844 "Failed to mount %s (type %s) on %s (%s \"%s\"): %m",
845 strna(what), strna(type), where, strnull(fl), strempty(o));
60e76d48
ZJS
846 return 0;
847}
848
849int umount_verbose(const char *what) {
850 log_debug("Umounting %s...", what);
851 if (umount(what) < 0)
852 return log_error_errno(errno, "Failed to unmount %s: %m", what);
853 return 0;
854}
83555251
LP
855
856const char *mount_propagation_flags_to_string(unsigned long flags) {
857
858 switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
c7383828
ZJS
859 case 0:
860 return "";
83555251
LP
861 case MS_SHARED:
862 return "shared";
83555251
LP
863 case MS_SLAVE:
864 return "slave";
83555251
LP
865 case MS_PRIVATE:
866 return "private";
867 }
868
869 return NULL;
870}
871
c7383828 872int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
83555251 873
c7383828
ZJS
874 if (isempty(name))
875 *ret = 0;
876 else if (streq(name, "shared"))
877 *ret = MS_SHARED;
878 else if (streq(name, "slave"))
879 *ret = MS_SLAVE;
880 else if (streq(name, "private"))
881 *ret = MS_PRIVATE;
882 else
883 return -EINVAL;
83555251
LP
884 return 0;
885}
9e7f941a
YW
886
887int mount_option_mangle(
888 const char *options,
889 unsigned long mount_flags,
890 unsigned long *ret_mount_flags,
891 char **ret_remaining_options) {
892
893 const struct libmnt_optmap *map;
894 _cleanup_free_ char *ret = NULL;
895 const char *p;
896 int r;
897
898 /* This extracts mount flags from the mount options, and store
899 * non-mount-flag options to '*ret_remaining_options'.
900 * E.g.,
901 * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
902 * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
903 * "size=1630748k,mode=700,uid=1000,gid=1000".
904 * See more examples in test-mount-utils.c.
905 *
906 * Note that if 'options' does not contain any non-mount-flag options,
907 * then '*ret_remaining_options' is set to NULL instread of empty string.
908 * Note that this does not check validity of options stored in
909 * '*ret_remaining_options'.
910 * Note that if 'options' is NULL, then this just copies 'mount_flags'
911 * to '*ret_mount_flags'. */
912
913 assert(ret_mount_flags);
914 assert(ret_remaining_options);
915
916 map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
917 if (!map)
918 return -EINVAL;
919
920 p = options;
921 for (;;) {
922 _cleanup_free_ char *word = NULL;
923 const struct libmnt_optmap *ent;
924
925 r = extract_first_word(&p, &word, ",", EXTRACT_QUOTES);
926 if (r < 0)
927 return r;
928 if (r == 0)
929 break;
930
931 for (ent = map; ent->name; ent++) {
932 /* All entries in MNT_LINUX_MAP do not take any argument.
933 * Thus, ent->name does not contain "=" or "[=]". */
934 if (!streq(word, ent->name))
935 continue;
936
937 if (!(ent->mask & MNT_INVERT))
938 mount_flags |= ent->id;
939 else if (mount_flags & ent->id)
940 mount_flags ^= ent->id;
941
942 break;
943 }
944
945 /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
946 if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
947 return -ENOMEM;
948 }
949
950 *ret_mount_flags = mount_flags;
ae2a15bc 951 *ret_remaining_options = TAKE_PTR(ret);
9e7f941a
YW
952
953 return 0;
954}
be1791ad
YW
955
956int dev_is_devtmpfs(void) {
957 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
be1791ad 958 int mount_id, r;
f5af75ea 959 char *e;
be1791ad
YW
960
961 r = path_get_mnt_id("/dev", &mount_id);
962 if (r < 0)
963 return r;
964
965 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
966 if (!proc_self_mountinfo)
967 return -errno;
968
969 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
970
f5af75ea
LP
971 for (;;) {
972 _cleanup_free_ char *line = NULL;
be1791ad
YW
973 int mid;
974
f5af75ea
LP
975 r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
976 if (r < 0)
977 return r;
978 if (r == 0)
979 break;
980
be1791ad
YW
981 if (sscanf(line, "%i", &mid) != 1)
982 continue;
983
984 if (mid != mount_id)
985 continue;
986
987 e = strstr(line, " - ");
988 if (!e)
989 continue;
990
991 /* accept any name that starts with the currently expected type */
992 if (startswith(e + 3, "devtmpfs"))
993 return true;
994 }
995
996 return false;
997}