]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/mount-util.c
tree-wide: remove Lennart's copyright lines
[thirdparty/systemd.git] / src / basic / mount-util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
4349cd7c 2
11c3a366 3#include <errno.h>
35bbbf85 4#include <stdio_ext.h>
11c3a366 5#include <stdlib.h>
4349cd7c
LP
6#include <string.h>
7#include <sys/mount.h>
11c3a366 8#include <sys/stat.h>
4349cd7c 9#include <sys/statvfs.h>
11c3a366 10#include <unistd.h>
4349cd7c 11
9e7f941a
YW
12/* Include later */
13#include <libmount.h>
14
b5efdb8a 15#include "alloc-util.h"
4349cd7c 16#include "escape.h"
9e7f941a 17#include "extract-word.h"
4349cd7c
LP
18#include "fd-util.h"
19#include "fileio.h"
e1873695 20#include "fs-util.h"
93cc7779 21#include "hashmap.h"
4349cd7c
LP
22#include "mount-util.h"
23#include "parse-util.h"
24#include "path-util.h"
25#include "set.h"
15a5e950 26#include "stdio-util.h"
4349cd7c 27#include "string-util.h"
6b7c9f8b 28#include "strv.h"
4349cd7c 29
01a7e0a1
LP
30/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
31 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
32 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
33 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
34 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
35 * with large file handles anyway. */
36#define ORIGINAL_MAX_HANDLE_SZ 128
37
cbfb8679
LP
38int name_to_handle_at_loop(
39 int fd,
40 const char *path,
41 struct file_handle **ret_handle,
42 int *ret_mnt_id,
43 int flags) {
44
93719c6b 45 _cleanup_free_ struct file_handle *h = NULL;
01a7e0a1 46 size_t n = ORIGINAL_MAX_HANDLE_SZ;
cbfb8679
LP
47
48 /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
49 * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
50 * start value, it is not an upper bound on the buffer size required.
51 *
52 * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
53 * as NULL if there's no interest in either. */
54
cbfb8679
LP
55 for (;;) {
56 int mnt_id = -1;
cbfb8679 57
93719c6b
LP
58 h = malloc0(offsetof(struct file_handle, f_handle) + n);
59 if (!h)
60 return -ENOMEM;
61
62 h->handle_bytes = n;
63
cbfb8679
LP
64 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
65
ae2a15bc
LP
66 if (ret_handle)
67 *ret_handle = TAKE_PTR(h);
cbfb8679
LP
68
69 if (ret_mnt_id)
70 *ret_mnt_id = mnt_id;
71
72 return 0;
73 }
74 if (errno != EOVERFLOW)
75 return -errno;
76
77 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
78
fc010b01
LP
79 /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
80 * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
81 * be filled in, and the caller was interested in only the mount ID an nothing else. */
cbfb8679
LP
82
83 *ret_mnt_id = mnt_id;
84 return 0;
85 }
86
fc010b01
LP
87 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
88 * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
89 * buffer. In that case propagate EOVERFLOW */
90 if (h->handle_bytes <= n)
cbfb8679 91 return -EOVERFLOW;
fc010b01
LP
92
93 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
94 n = h->handle_bytes;
95 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
cbfb8679 96 return -EOVERFLOW;
cbfb8679 97
93719c6b 98 h = mfree(h);
cbfb8679
LP
99 }
100}
101
4349cd7c 102static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
fbd0b64f 103 char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
4349cd7c
LP
104 _cleanup_free_ char *fdinfo = NULL;
105 _cleanup_close_ int subfd = -1;
106 char *p;
107 int r;
108
109 if ((flags & AT_EMPTY_PATH) && isempty(filename))
110 xsprintf(path, "/proc/self/fdinfo/%i", fd);
111 else {
c4b69156 112 subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
4349cd7c
LP
113 if (subfd < 0)
114 return -errno;
115
116 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
117 }
118
119 r = read_full_file(path, &fdinfo, NULL);
120 if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
121 return -EOPNOTSUPP;
122 if (r < 0)
0d9bcb7c 123 return r;
4349cd7c
LP
124
125 p = startswith(fdinfo, "mnt_id:");
126 if (!p) {
127 p = strstr(fdinfo, "\nmnt_id:");
128 if (!p) /* The mnt_id field is a relatively new addition */
129 return -EOPNOTSUPP;
130
131 p += 8;
132 }
133
134 p += strspn(p, WHITESPACE);
135 p[strcspn(p, WHITESPACE)] = 0;
136
137 return safe_atoi(p, mnt_id);
138}
139
4349cd7c 140int fd_is_mount_point(int fd, const char *filename, int flags) {
cbfb8679 141 _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
4349cd7c
LP
142 int mount_id = -1, mount_id_parent = -1;
143 bool nosupp = false, check_st_dev = true;
144 struct stat a, b;
145 int r;
146
147 assert(fd >= 0);
148 assert(filename);
149
150 /* First we will try the name_to_handle_at() syscall, which
151 * tells us the mount id and an opaque file "handle". It is
152 * not supported everywhere though (kernel compile-time
153 * option, not all file systems are hooked up). If it works
154 * the mount id is usually good enough to tell us whether
155 * something is a mount point.
156 *
157 * If that didn't work we will try to read the mount id from
158 * /proc/self/fdinfo/<fd>. This is almost as good as
159 * name_to_handle_at(), however, does not return the
160 * opaque file handle. The opaque file handle is pretty useful
161 * to detect the root directory, which we should always
162 * consider a mount point. Hence we use this only as
163 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
164 * kernel addition.
165 *
166 * As last fallback we do traditional fstat() based st_dev
167 * comparisons. This is how things were traditionally done,
61233823 168 * but unionfs breaks this since it exposes file
4349cd7c
LP
169 * systems with a variety of st_dev reported. Also, btrfs
170 * subvolumes have different st_dev, even though they aren't
171 * real mounts of their own. */
172
cbfb8679 173 r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
976c0478 174 if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
c83b20d7
LP
175 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
176 * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
976c0478
LP
177 * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
178 * (EINVAL): fall back to simpler logic. */
cbfb8679
LP
179 goto fallback_fdinfo;
180 else if (r == -EOPNOTSUPP)
181 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
182 * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
183 * logic */
184 nosupp = true;
185 else if (r < 0)
186 return r;
187
188 r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
189 if (r == -EOPNOTSUPP) {
190 if (nosupp)
191 /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
4349cd7c 192 goto fallback_fdinfo;
4349cd7c 193 else
cbfb8679
LP
194 /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
195 * it must be a mount point. */
196 return 1;
197 } else if (r < 0)
4739fc55 198 return r;
4349cd7c
LP
199
200 /* The parent can do name_to_handle_at() but the
201 * directory we are interested in can't? If so, it
202 * must be a mount point. */
203 if (nosupp)
204 return 1;
205
206 /* If the file handle for the directory we are
207 * interested in and its parent are identical, we
208 * assume this is the root directory, which is a mount
209 * point. */
210
cbfb8679
LP
211 if (h->handle_bytes == h_parent->handle_bytes &&
212 h->handle_type == h_parent->handle_type &&
213 memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
4349cd7c
LP
214 return 1;
215
216 return mount_id != mount_id_parent;
217
218fallback_fdinfo:
219 r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
059c35f5 220 if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
4349cd7c
LP
221 goto fallback_fstat;
222 if (r < 0)
223 return r;
224
225 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
226 if (r < 0)
227 return r;
228
229 if (mount_id != mount_id_parent)
230 return 1;
231
232 /* Hmm, so, the mount ids are the same. This leaves one
233 * special case though for the root file system. For that,
234 * let's see if the parent directory has the same inode as we
235 * are interested in. Hence, let's also do fstat() checks now,
236 * too, but avoid the st_dev comparisons, since they aren't
237 * that useful on unionfs mounts. */
238 check_st_dev = false;
239
240fallback_fstat:
241 /* yay for fstatat() taking a different set of flags than the other
242 * _at() above */
243 if (flags & AT_SYMLINK_FOLLOW)
244 flags &= ~AT_SYMLINK_FOLLOW;
245 else
246 flags |= AT_SYMLINK_NOFOLLOW;
247 if (fstatat(fd, filename, &a, flags) < 0)
248 return -errno;
249
250 if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
251 return -errno;
252
253 /* A directory with same device and inode as its parent? Must
254 * be the root directory */
255 if (a.st_dev == b.st_dev &&
256 a.st_ino == b.st_ino)
257 return 1;
258
259 return check_st_dev && (a.st_dev != b.st_dev);
260}
261
262/* flags can be AT_SYMLINK_FOLLOW or 0 */
e1873695 263int path_is_mount_point(const char *t, const char *root, int flags) {
4349cd7c 264 _cleanup_free_ char *canonical = NULL, *parent = NULL;
e1873695
LP
265 _cleanup_close_ int fd = -1;
266 int r;
4349cd7c
LP
267
268 assert(t);
b12d25a8 269 assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
4349cd7c
LP
270
271 if (path_equal(t, "/"))
272 return 1;
273
274 /* we need to resolve symlinks manually, we can't just rely on
275 * fd_is_mount_point() to do that for us; if we have a structure like
276 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
277 * look at needs to be /usr, not /. */
278 if (flags & AT_SYMLINK_FOLLOW) {
62570f6f 279 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
e1873695
LP
280 if (r < 0)
281 return r;
4349cd7c
LP
282
283 t = canonical;
284 }
285
286 parent = dirname_malloc(t);
287 if (!parent)
288 return -ENOMEM;
289
c4b69156 290 fd = openat(AT_FDCWD, parent, O_DIRECTORY|O_CLOEXEC|O_PATH);
4349cd7c
LP
291 if (fd < 0)
292 return -errno;
293
b12d25a8 294 return fd_is_mount_point(fd, last_path_component(t), flags);
4349cd7c
LP
295}
296
c2a986d5
LP
297int path_get_mnt_id(const char *path, int *ret) {
298 int r;
299
300 r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
976c0478 301 if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
c2a986d5
LP
302 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
303
304 return r;
305}
306
4349cd7c
LP
307int umount_recursive(const char *prefix, int flags) {
308 bool again;
309 int n = 0, r;
310
311 /* Try to umount everything recursively below a
312 * directory. Also, take care of stacked mounts, and keep
313 * unmounting them until they are gone. */
314
315 do {
316 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
317
318 again = false;
319 r = 0;
320
321 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
322 if (!proc_self_mountinfo)
323 return -errno;
324
35bbbf85
LP
325 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
326
4349cd7c
LP
327 for (;;) {
328 _cleanup_free_ char *path = NULL, *p = NULL;
329 int k;
330
331 k = fscanf(proc_self_mountinfo,
332 "%*s " /* (1) mount id */
333 "%*s " /* (2) parent id */
334 "%*s " /* (3) major:minor */
335 "%*s " /* (4) root */
336 "%ms " /* (5) mount point */
337 "%*s" /* (6) mount options */
338 "%*[^-]" /* (7) optional fields */
339 "- " /* (8) separator */
340 "%*s " /* (9) file system type */
341 "%*s" /* (10) mount source */
342 "%*s" /* (11) mount options 2 */
343 "%*[^\n]", /* some rubbish at the end */
344 &path);
345 if (k != 1) {
346 if (k == EOF)
347 break;
348
349 continue;
350 }
351
352 r = cunescape(path, UNESCAPE_RELAX, &p);
353 if (r < 0)
354 return r;
355
356 if (!path_startswith(p, prefix))
357 continue;
358
359 if (umount2(p, flags) < 0) {
6b7c9f8b 360 r = log_debug_errno(errno, "Failed to umount %s: %m", p);
4349cd7c
LP
361 continue;
362 }
363
6b7c9f8b
LP
364 log_debug("Successfully unmounted %s", p);
365
4349cd7c
LP
366 again = true;
367 n++;
368
369 break;
370 }
371
372 } while (again);
373
374 return r ? r : n;
375}
376
377static int get_mount_flags(const char *path, unsigned long *flags) {
378 struct statvfs buf;
379
380 if (statvfs(path, &buf) < 0)
381 return -errno;
382 *flags = buf.f_flag;
383 return 0;
384}
385
ac9de0b3
TR
386/* Use this function only if do you have direct access to /proc/self/mountinfo
387 * and need the caller to open it for you. This is the case when /proc is
388 * masked or not mounted. Otherwise, use bind_remount_recursive. */
389int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) {
4349cd7c
LP
390 _cleanup_set_free_free_ Set *done = NULL;
391 _cleanup_free_ char *cleaned = NULL;
392 int r;
393
ac9de0b3
TR
394 assert(proc_self_mountinfo);
395
6b7c9f8b
LP
396 /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
397 * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
398 * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
399 * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
400 * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
401 * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
402 * future submounts that have been triggered via autofs.
403 *
404 * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
405 * remount operation. Note that we'll ignore the blacklist for the top-level path. */
4349cd7c
LP
406
407 cleaned = strdup(prefix);
408 if (!cleaned)
409 return -ENOMEM;
410
858d36c1 411 path_simplify(cleaned, false);
4349cd7c 412
548f6937 413 done = set_new(&path_hash_ops);
4349cd7c
LP
414 if (!done)
415 return -ENOMEM;
416
417 for (;;) {
4349cd7c
LP
418 _cleanup_set_free_free_ Set *todo = NULL;
419 bool top_autofs = false;
420 char *x;
421 unsigned long orig_flags;
422
548f6937 423 todo = set_new(&path_hash_ops);
4349cd7c
LP
424 if (!todo)
425 return -ENOMEM;
426
ac9de0b3 427 rewind(proc_self_mountinfo);
4349cd7c
LP
428
429 for (;;) {
430 _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
431 int k;
432
433 k = fscanf(proc_self_mountinfo,
434 "%*s " /* (1) mount id */
435 "%*s " /* (2) parent id */
436 "%*s " /* (3) major:minor */
437 "%*s " /* (4) root */
438 "%ms " /* (5) mount point */
439 "%*s" /* (6) mount options (superblock) */
440 "%*[^-]" /* (7) optional fields */
441 "- " /* (8) separator */
442 "%ms " /* (9) file system type */
443 "%*s" /* (10) mount source */
444 "%*s" /* (11) mount options (bind mount) */
445 "%*[^\n]", /* some rubbish at the end */
446 &path,
447 &type);
448 if (k != 2) {
449 if (k == EOF)
450 break;
451
452 continue;
453 }
454
455 r = cunescape(path, UNESCAPE_RELAX, &p);
456 if (r < 0)
457 return r;
458
6b7c9f8b
LP
459 if (!path_startswith(p, cleaned))
460 continue;
461
462 /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
463 * operate on. */
464 if (!path_equal(cleaned, p)) {
465 bool blacklisted = false;
466 char **i;
467
468 STRV_FOREACH(i, blacklist) {
469
470 if (path_equal(*i, cleaned))
471 continue;
472
473 if (!path_startswith(*i, cleaned))
474 continue;
475
476 if (path_startswith(p, *i)) {
477 blacklisted = true;
478 log_debug("Not remounting %s, because blacklisted by %s, called for %s", p, *i, cleaned);
479 break;
480 }
481 }
482 if (blacklisted)
483 continue;
484 }
485
4349cd7c
LP
486 /* Let's ignore autofs mounts. If they aren't
487 * triggered yet, we want to avoid triggering
488 * them, as we don't make any guarantees for
489 * future submounts anyway. If they are
490 * already triggered, then we will find
491 * another entry for this. */
492 if (streq(type, "autofs")) {
493 top_autofs = top_autofs || path_equal(cleaned, p);
494 continue;
495 }
496
6b7c9f8b 497 if (!set_contains(done, p)) {
4349cd7c
LP
498 r = set_consume(todo, p);
499 p = NULL;
4349cd7c
LP
500 if (r == -EEXIST)
501 continue;
502 if (r < 0)
503 return r;
504 }
505 }
506
507 /* If we have no submounts to process anymore and if
508 * the root is either already done, or an autofs, we
509 * are done */
510 if (set_isempty(todo) &&
511 (top_autofs || set_contains(done, cleaned)))
512 return 0;
513
514 if (!set_contains(done, cleaned) &&
515 !set_contains(todo, cleaned)) {
6b7c9f8b 516 /* The prefix directory itself is not yet a mount, make it one. */
4349cd7c
LP
517 if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
518 return -errno;
519
520 orig_flags = 0;
521 (void) get_mount_flags(cleaned, &orig_flags);
522 orig_flags &= ~MS_RDONLY;
523
524 if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
525 return -errno;
526
6b7c9f8b
LP
527 log_debug("Made top-level directory %s a mount point.", prefix);
528
4349cd7c
LP
529 x = strdup(cleaned);
530 if (!x)
531 return -ENOMEM;
532
533 r = set_consume(done, x);
534 if (r < 0)
535 return r;
536 }
537
538 while ((x = set_steal_first(todo))) {
539
540 r = set_consume(done, x);
4c701096 541 if (IN_SET(r, 0, -EEXIST))
4349cd7c
LP
542 continue;
543 if (r < 0)
544 return r;
545
6b7c9f8b 546 /* Deal with mount points that are obstructed by a later mount */
e1873695 547 r = path_is_mount_point(x, NULL, 0);
4c701096 548 if (IN_SET(r, 0, -ENOENT))
98df8089
AC
549 continue;
550 if (r < 0)
551 return r;
552
553 /* Try to reuse the original flag set */
4349cd7c
LP
554 orig_flags = 0;
555 (void) get_mount_flags(x, &orig_flags);
556 orig_flags &= ~MS_RDONLY;
557
98df8089
AC
558 if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
559 return -errno;
4349cd7c 560
6b7c9f8b 561 log_debug("Remounted %s read-only.", x);
4349cd7c
LP
562 }
563 }
564}
565
ac9de0b3
TR
566int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
567 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
568
569 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
570 if (!proc_self_mountinfo)
571 return -errno;
572
35bbbf85
LP
573 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
574
ac9de0b3
TR
575 return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo);
576}
577
4349cd7c
LP
578int mount_move_root(const char *path) {
579 assert(path);
580
581 if (chdir(path) < 0)
582 return -errno;
583
584 if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
585 return -errno;
586
587 if (chroot(".") < 0)
588 return -errno;
589
590 if (chdir("/") < 0)
591 return -errno;
592
593 return 0;
594}
4e036b7a
LP
595
596bool fstype_is_network(const char *fstype) {
4e036b7a
LP
597 const char *x;
598
599 x = startswith(fstype, "fuse.");
600 if (x)
601 fstype = x;
602
5991ce44
ZJS
603 return STR_IN_SET(fstype,
604 "afs",
605 "cifs",
606 "smbfs",
607 "sshfs",
608 "ncpfs",
609 "ncp",
610 "nfs",
611 "nfs4",
612 "gfs",
613 "gfs2",
614 "glusterfs",
615 "pvfs2", /* OrangeFS */
616 "ocfs2",
617 "lustre");
4e036b7a 618}
3f2c0bec 619
e2be442e 620bool fstype_is_api_vfs(const char *fstype) {
5991ce44
ZJS
621 return STR_IN_SET(fstype,
622 "autofs",
623 "bpf",
624 "cgroup",
625 "cgroup2",
626 "configfs",
627 "cpuset",
628 "debugfs",
629 "devpts",
630 "devtmpfs",
631 "efivarfs",
632 "fusectl",
633 "hugetlbfs",
634 "mqueue",
635 "proc",
636 "pstore",
637 "ramfs",
638 "securityfs",
639 "sysfs",
640 "tmpfs",
641 "tracefs");
e2be442e
YW
642}
643
896f937f 644bool fstype_is_ro(const char *fstype) {
896f937f 645 /* All Linux file systems that are necessarily read-only */
5991ce44
ZJS
646 return STR_IN_SET(fstype,
647 "DM_verity_hash",
648 "iso9660",
649 "squashfs");
896f937f
LP
650}
651
154d2269 652bool fstype_can_discard(const char *fstype) {
5991ce44
ZJS
653 return STR_IN_SET(fstype,
654 "btrfs",
655 "ext4",
656 "vfat",
657 "xfs");
154d2269
LP
658}
659
2d3a5a73
LP
660bool fstype_can_uid_gid(const char *fstype) {
661
662 /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
663 * current and future. */
664
665 return STR_IN_SET(fstype,
666 "adfs",
667 "fat",
668 "hfs",
669 "hpfs",
670 "iso9660",
671 "msdos",
672 "ntfs",
673 "vfat");
674}
675
3f2c0bec
LP
676int repeat_unmount(const char *path, int flags) {
677 bool done = false;
678
679 assert(path);
680
681 /* If there are multiple mounts on a mount point, this
682 * removes them all */
683
684 for (;;) {
685 if (umount2(path, flags) < 0) {
686
687 if (errno == EINVAL)
688 return done;
689
690 return -errno;
691 }
692
693 done = true;
694 }
695}
c4b41707
AP
696
697const char* mode_to_inaccessible_node(mode_t mode) {
fe80fcc7
LP
698 /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during
699 * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe
700 * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes
701 * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead,
702 * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts
703 * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */
704
c4b41707
AP
705 switch(mode & S_IFMT) {
706 case S_IFREG:
707 return "/run/systemd/inaccessible/reg";
fe80fcc7 708
c4b41707
AP
709 case S_IFDIR:
710 return "/run/systemd/inaccessible/dir";
fe80fcc7 711
c4b41707 712 case S_IFCHR:
b3d1d516
AP
713 if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
714 return "/run/systemd/inaccessible/chr";
715 return "/run/systemd/inaccessible/sock";
fe80fcc7 716
c4b41707 717 case S_IFBLK:
b3d1d516
AP
718 if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
719 return "/run/systemd/inaccessible/blk";
720 return "/run/systemd/inaccessible/sock";
fe80fcc7 721
c4b41707
AP
722 case S_IFIFO:
723 return "/run/systemd/inaccessible/fifo";
fe80fcc7 724
c4b41707
AP
725 case S_IFSOCK:
726 return "/run/systemd/inaccessible/sock";
727 }
728 return NULL;
729}
60e76d48
ZJS
730
731#define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
732static char* mount_flags_to_string(long unsigned flags) {
733 char *x;
734 _cleanup_free_ char *y = NULL;
735 long unsigned overflow;
736
737 overflow = flags & ~(MS_RDONLY |
738 MS_NOSUID |
739 MS_NODEV |
740 MS_NOEXEC |
741 MS_SYNCHRONOUS |
742 MS_REMOUNT |
743 MS_MANDLOCK |
744 MS_DIRSYNC |
745 MS_NOATIME |
746 MS_NODIRATIME |
747 MS_BIND |
748 MS_MOVE |
749 MS_REC |
750 MS_SILENT |
751 MS_POSIXACL |
752 MS_UNBINDABLE |
753 MS_PRIVATE |
754 MS_SLAVE |
755 MS_SHARED |
756 MS_RELATIME |
757 MS_KERNMOUNT |
758 MS_I_VERSION |
759 MS_STRICTATIME |
760 MS_LAZYTIME);
761
762 if (flags == 0 || overflow != 0)
763 if (asprintf(&y, "%lx", overflow) < 0)
764 return NULL;
765
766 x = strjoin(FLAG(MS_RDONLY),
767 FLAG(MS_NOSUID),
768 FLAG(MS_NODEV),
769 FLAG(MS_NOEXEC),
770 FLAG(MS_SYNCHRONOUS),
771 FLAG(MS_REMOUNT),
772 FLAG(MS_MANDLOCK),
773 FLAG(MS_DIRSYNC),
774 FLAG(MS_NOATIME),
775 FLAG(MS_NODIRATIME),
776 FLAG(MS_BIND),
777 FLAG(MS_MOVE),
778 FLAG(MS_REC),
779 FLAG(MS_SILENT),
780 FLAG(MS_POSIXACL),
781 FLAG(MS_UNBINDABLE),
782 FLAG(MS_PRIVATE),
783 FLAG(MS_SLAVE),
784 FLAG(MS_SHARED),
785 FLAG(MS_RELATIME),
786 FLAG(MS_KERNMOUNT),
787 FLAG(MS_I_VERSION),
788 FLAG(MS_STRICTATIME),
789 FLAG(MS_LAZYTIME),
605405c6 790 y);
60e76d48
ZJS
791 if (!x)
792 return NULL;
793 if (!y)
794 x[strlen(x) - 1] = '\0'; /* truncate the last | */
795 return x;
796}
797
798int mount_verbose(
799 int error_log_level,
800 const char *what,
801 const char *where,
802 const char *type,
803 unsigned long flags,
804 const char *options) {
805
6ef8df2b
YW
806 _cleanup_free_ char *fl = NULL, *o = NULL;
807 unsigned long f;
808 int r;
809
810 r = mount_option_mangle(options, flags, &f, &o);
811 if (r < 0)
812 return log_full_errno(error_log_level, r,
813 "Failed to mangle mount options %s: %m",
814 strempty(options));
60e76d48 815
6ef8df2b 816 fl = mount_flags_to_string(f);
60e76d48 817
6ef8df2b 818 if ((f & MS_REMOUNT) && !what && !type)
60e76d48 819 log_debug("Remounting %s (%s \"%s\")...",
6ef8df2b 820 where, strnull(fl), strempty(o));
60e76d48
ZJS
821 else if (!what && !type)
822 log_debug("Mounting %s (%s \"%s\")...",
6ef8df2b
YW
823 where, strnull(fl), strempty(o));
824 else if ((f & MS_BIND) && !type)
60e76d48 825 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
6ef8df2b
YW
826 what, where, strnull(fl), strempty(o));
827 else if (f & MS_MOVE)
afe682bc 828 log_debug("Moving mount %s → %s (%s \"%s\")...",
6ef8df2b 829 what, where, strnull(fl), strempty(o));
60e76d48
ZJS
830 else
831 log_debug("Mounting %s on %s (%s \"%s\")...",
6ef8df2b
YW
832 strna(type), where, strnull(fl), strempty(o));
833 if (mount(what, where, type, f, o) < 0)
60e76d48
ZJS
834 return log_full_errno(error_log_level, errno,
835 "Failed to mount %s on %s (%s \"%s\"): %m",
6ef8df2b 836 strna(type), where, strnull(fl), strempty(o));
60e76d48
ZJS
837 return 0;
838}
839
840int umount_verbose(const char *what) {
841 log_debug("Umounting %s...", what);
842 if (umount(what) < 0)
843 return log_error_errno(errno, "Failed to unmount %s: %m", what);
844 return 0;
845}
83555251
LP
846
847const char *mount_propagation_flags_to_string(unsigned long flags) {
848
849 switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
c7383828
ZJS
850 case 0:
851 return "";
83555251
LP
852 case MS_SHARED:
853 return "shared";
83555251
LP
854 case MS_SLAVE:
855 return "slave";
83555251
LP
856 case MS_PRIVATE:
857 return "private";
858 }
859
860 return NULL;
861}
862
c7383828 863int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
83555251 864
c7383828
ZJS
865 if (isempty(name))
866 *ret = 0;
867 else if (streq(name, "shared"))
868 *ret = MS_SHARED;
869 else if (streq(name, "slave"))
870 *ret = MS_SLAVE;
871 else if (streq(name, "private"))
872 *ret = MS_PRIVATE;
873 else
874 return -EINVAL;
83555251
LP
875 return 0;
876}
9e7f941a
YW
877
878int mount_option_mangle(
879 const char *options,
880 unsigned long mount_flags,
881 unsigned long *ret_mount_flags,
882 char **ret_remaining_options) {
883
884 const struct libmnt_optmap *map;
885 _cleanup_free_ char *ret = NULL;
886 const char *p;
887 int r;
888
889 /* This extracts mount flags from the mount options, and store
890 * non-mount-flag options to '*ret_remaining_options'.
891 * E.g.,
892 * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
893 * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
894 * "size=1630748k,mode=700,uid=1000,gid=1000".
895 * See more examples in test-mount-utils.c.
896 *
897 * Note that if 'options' does not contain any non-mount-flag options,
898 * then '*ret_remaining_options' is set to NULL instread of empty string.
899 * Note that this does not check validity of options stored in
900 * '*ret_remaining_options'.
901 * Note that if 'options' is NULL, then this just copies 'mount_flags'
902 * to '*ret_mount_flags'. */
903
904 assert(ret_mount_flags);
905 assert(ret_remaining_options);
906
907 map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
908 if (!map)
909 return -EINVAL;
910
911 p = options;
912 for (;;) {
913 _cleanup_free_ char *word = NULL;
914 const struct libmnt_optmap *ent;
915
916 r = extract_first_word(&p, &word, ",", EXTRACT_QUOTES);
917 if (r < 0)
918 return r;
919 if (r == 0)
920 break;
921
922 for (ent = map; ent->name; ent++) {
923 /* All entries in MNT_LINUX_MAP do not take any argument.
924 * Thus, ent->name does not contain "=" or "[=]". */
925 if (!streq(word, ent->name))
926 continue;
927
928 if (!(ent->mask & MNT_INVERT))
929 mount_flags |= ent->id;
930 else if (mount_flags & ent->id)
931 mount_flags ^= ent->id;
932
933 break;
934 }
935
936 /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
937 if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
938 return -ENOMEM;
939 }
940
941 *ret_mount_flags = mount_flags;
ae2a15bc 942 *ret_remaining_options = TAKE_PTR(ret);
9e7f941a
YW
943
944 return 0;
945}