]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/mount-util.c
45fdd3b2da754bad4209c085126c5094ecc7a3e1
[thirdparty/systemd.git] / src / shared / mount-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <stdlib.h>
5 #include <sys/mount.h>
6 #include <sys/stat.h>
7 #include <sys/statvfs.h>
8 #include <unistd.h>
9
10 #include "alloc-util.h"
11 #include "extract-word.h"
12 #include "fd-util.h"
13 #include "fileio.h"
14 #include "fs-util.h"
15 #include "hashmap.h"
16 #include "libmount-util.h"
17 #include "mount-util.h"
18 #include "mountpoint-util.h"
19 #include "parse-util.h"
20 #include "path-util.h"
21 #include "set.h"
22 #include "stdio-util.h"
23 #include "string-util.h"
24 #include "strv.h"
25
26 int umount_recursive(const char *prefix, int flags) {
27 int n = 0, r;
28 bool again;
29
30 /* Try to umount everything recursively below a
31 * directory. Also, take care of stacked mounts, and keep
32 * unmounting them until they are gone. */
33
34 do {
35 _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
36 _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
37
38 again = false;
39
40 r = libmount_parse("/proc/self/mountinfo", NULL, &table, &iter);
41 if (r < 0)
42 return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
43
44 for (;;) {
45 struct libmnt_fs *fs;
46 const char *path;
47
48 r = mnt_table_next_fs(table, iter, &fs);
49 if (r == 1)
50 break;
51 if (r < 0)
52 return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
53
54 path = mnt_fs_get_target(fs);
55 if (!path)
56 continue;
57
58 if (!path_startswith(path, prefix))
59 continue;
60
61 if (umount2(path, flags) < 0) {
62 r = log_debug_errno(errno, "Failed to umount %s: %m", path);
63 continue;
64 }
65
66 log_debug("Successfully unmounted %s", path);
67
68 again = true;
69 n++;
70
71 break;
72 }
73
74 } while (again);
75
76 return n;
77 }
78
79 static int get_mount_flags(
80 struct libmnt_table *table,
81 const char *path,
82 unsigned long *ret) {
83 struct libmnt_fs *fs;
84 struct statvfs buf;
85 const char *opts;
86 int r = 0;
87
88 /* Get the mount flags for the mountpoint at "path" from "table". We have a fallback using statvfs()
89 * in place (which provides us with mostly the same info), but it's just a fallback, since using it
90 * means triggering autofs or NFS mounts, which we'd rather avoid needlessly. */
91
92 fs = mnt_table_find_target(table, path, MNT_ITER_FORWARD);
93 if (!fs) {
94 log_debug("Could not find '%s' in mount table, ignoring.", path);
95 goto fallback;
96 }
97
98 opts = mnt_fs_get_vfs_options(fs);
99 if (!opts) {
100 *ret = 0;
101 return 0;
102 }
103
104 r = mnt_optstr_get_flags(opts, ret, mnt_get_builtin_optmap(MNT_LINUX_MAP));
105 if (r != 0) {
106 log_debug_errno(r, "Could not get flags for '%s', ignoring: %m", path);
107 goto fallback;
108 }
109
110 /* MS_RELATIME is default and trying to set it in an unprivileged container causes EPERM */
111 *ret &= ~MS_RELATIME;
112 return 0;
113
114 fallback:
115 if (statvfs(path, &buf) < 0)
116 return -errno;
117
118 /* The statvfs() flags and the mount flags mostly have the same values, but for some cases do
119 * not. Hence map the flags manually. (Strictly speaking, ST_RELATIME/MS_RELATIME is the most
120 * prominent one that doesn't match, but that's the one we mask away anyway, see above.) */
121
122 *ret =
123 FLAGS_SET(buf.f_flag, ST_RDONLY) * MS_RDONLY |
124 FLAGS_SET(buf.f_flag, ST_NODEV) * MS_NODEV |
125 FLAGS_SET(buf.f_flag, ST_NOEXEC) * MS_NOEXEC |
126 FLAGS_SET(buf.f_flag, ST_NOSUID) * MS_NOSUID |
127 FLAGS_SET(buf.f_flag, ST_NOATIME) * MS_NOATIME |
128 FLAGS_SET(buf.f_flag, ST_NODIRATIME) * MS_NODIRATIME;
129
130 return 0;
131 }
132
133 /* Use this function only if you do not have direct access to /proc/self/mountinfo but the caller can open it
134 * for you. This is the case when /proc is masked or not mounted. Otherwise, use bind_remount_recursive. */
135 int bind_remount_recursive_with_mountinfo(
136 const char *prefix,
137 unsigned long new_flags,
138 unsigned long flags_mask,
139 char **deny_list,
140 FILE *proc_self_mountinfo) {
141
142 _cleanup_set_free_free_ Set *done = NULL;
143 _cleanup_free_ char *simplified = NULL;
144 int r;
145
146 assert(prefix);
147 assert(proc_self_mountinfo);
148
149 /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
150 * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
151 * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
152 * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
153 * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
154 * do not have any effect on future submounts that might get propagated, they might be writable. This includes
155 * future submounts that have been triggered via autofs.
156 *
157 * If the "deny_list" parameter is specified it may contain a list of subtrees to exclude from the
158 * remount operation. Note that we'll ignore the deny list for the top-level path. */
159
160 simplified = strdup(prefix);
161 if (!simplified)
162 return -ENOMEM;
163
164 path_simplify(simplified, false);
165
166 done = set_new(&path_hash_ops);
167 if (!done)
168 return -ENOMEM;
169
170 for (;;) {
171 _cleanup_set_free_free_ Set *todo = NULL;
172 _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
173 _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
174 bool top_autofs = false;
175 char *x;
176 unsigned long orig_flags;
177
178 todo = set_new(&path_hash_ops);
179 if (!todo)
180 return -ENOMEM;
181
182 rewind(proc_self_mountinfo);
183
184 r = libmount_parse("/proc/self/mountinfo", proc_self_mountinfo, &table, &iter);
185 if (r < 0)
186 return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
187
188 for (;;) {
189 struct libmnt_fs *fs;
190 const char *path, *type;
191
192 r = mnt_table_next_fs(table, iter, &fs);
193 if (r == 1)
194 break;
195 if (r < 0)
196 return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
197
198 path = mnt_fs_get_target(fs);
199 type = mnt_fs_get_fstype(fs);
200 if (!path || !type)
201 continue;
202
203 if (!path_startswith(path, simplified))
204 continue;
205
206 /* Ignore this mount if it is deny-listed, but only if it isn't the top-level mount
207 * we shall operate on. */
208 if (!path_equal(path, simplified)) {
209 bool deny_listed = false;
210 char **i;
211
212 STRV_FOREACH(i, deny_list) {
213 if (path_equal(*i, simplified))
214 continue;
215
216 if (!path_startswith(*i, simplified))
217 continue;
218
219 if (path_startswith(path, *i)) {
220 deny_listed = true;
221 log_debug("Not remounting %s deny-listed by %s, called for %s",
222 path, *i, simplified);
223 break;
224 }
225 }
226 if (deny_listed)
227 continue;
228 }
229
230 /* Let's ignore autofs mounts. If they aren't
231 * triggered yet, we want to avoid triggering
232 * them, as we don't make any guarantees for
233 * future submounts anyway. If they are
234 * already triggered, then we will find
235 * another entry for this. */
236 if (streq(type, "autofs")) {
237 top_autofs = top_autofs || path_equal(path, simplified);
238 continue;
239 }
240
241 if (!set_contains(done, path)) {
242 r = set_put_strdup(&todo, path);
243 if (r < 0)
244 return r;
245 }
246 }
247
248 /* If we have no submounts to process anymore and if
249 * the root is either already done, or an autofs, we
250 * are done */
251 if (set_isempty(todo) &&
252 (top_autofs || set_contains(done, simplified)))
253 return 0;
254
255 if (!set_contains(done, simplified) &&
256 !set_contains(todo, simplified)) {
257 /* The prefix directory itself is not yet a mount, make it one. */
258 if (mount(simplified, simplified, NULL, MS_BIND|MS_REC, NULL) < 0)
259 return -errno;
260
261 orig_flags = 0;
262 (void) get_mount_flags(table, simplified, &orig_flags);
263
264 if (mount(NULL, simplified, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL) < 0)
265 return -errno;
266
267 log_debug("Made top-level directory %s a mount point.", prefix);
268
269 r = set_put_strdup(&done, simplified);
270 if (r < 0)
271 return r;
272 }
273
274 while ((x = set_steal_first(todo))) {
275
276 r = set_consume(done, x);
277 if (IN_SET(r, 0, -EEXIST))
278 continue;
279 if (r < 0)
280 return r;
281
282 /* Deal with mount points that are obstructed by a later mount */
283 r = path_is_mount_point(x, NULL, 0);
284 if (IN_SET(r, 0, -ENOENT))
285 continue;
286 if (IN_SET(r, -EACCES, -EPERM)) {
287 /* Even if root user invoke this, submounts under private FUSE or NFS mount points
288 * may not be acceessed. E.g.,
289 *
290 * $ bindfs --no-allow-other ~/mnt/mnt ~/mnt/mnt
291 * $ bindfs --no-allow-other ~/mnt ~/mnt
292 *
293 * Then, root user cannot access the mount point ~/mnt/mnt.
294 * In such cases, the submounts are ignored, as we have no way to manage them. */
295 log_debug_errno(r, "Failed to determine '%s' is mount point or not, ignoring: %m", x);
296 continue;
297 }
298 if (r < 0)
299 return r;
300
301 /* Try to reuse the original flag set */
302 orig_flags = 0;
303 (void) get_mount_flags(table, x, &orig_flags);
304
305 if (mount(NULL, x, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL) < 0)
306 return -errno;
307
308 log_debug("Remounted %s read-only.", x);
309 }
310 }
311 }
312
313 int bind_remount_recursive(
314 const char *prefix,
315 unsigned long new_flags,
316 unsigned long flags_mask,
317 char **deny_list) {
318
319 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
320 int r;
321
322 r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
323 if (r < 0)
324 return r;
325
326 return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, deny_list, proc_self_mountinfo);
327 }
328
329 int bind_remount_one_with_mountinfo(
330 const char *path,
331 unsigned long new_flags,
332 unsigned long flags_mask,
333 FILE *proc_self_mountinfo) {
334
335 _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
336 unsigned long orig_flags = 0;
337 int r;
338
339 assert(path);
340 assert(proc_self_mountinfo);
341
342 rewind(proc_self_mountinfo);
343
344 table = mnt_new_table();
345 if (!table)
346 return -ENOMEM;
347
348 r = mnt_table_parse_stream(table, proc_self_mountinfo, "/proc/self/mountinfo");
349 if (r < 0)
350 return r;
351
352 /* Try to reuse the original flag set */
353 (void) get_mount_flags(table, path, &orig_flags);
354
355 if (mount(NULL, path, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL) < 0)
356 return -errno;
357
358 return 0;
359 }
360
361 int mount_move_root(const char *path) {
362 assert(path);
363
364 if (chdir(path) < 0)
365 return -errno;
366
367 if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
368 return -errno;
369
370 if (chroot(".") < 0)
371 return -errno;
372
373 if (chdir("/") < 0)
374 return -errno;
375
376 return 0;
377 }
378
379 int repeat_unmount(const char *path, int flags) {
380 bool done = false;
381
382 assert(path);
383
384 /* If there are multiple mounts on a mount point, this
385 * removes them all */
386
387 for (;;) {
388 if (umount2(path, flags) < 0) {
389
390 if (errno == EINVAL)
391 return done;
392
393 return -errno;
394 }
395
396 done = true;
397 }
398 }
399
400 int mode_to_inaccessible_node(
401 const char *runtime_dir,
402 mode_t mode,
403 char **ret) {
404
405 /* This function maps a node type to a corresponding inaccessible file node. These nodes are created
406 * during early boot by PID 1. In some cases we lacked the privs to create the character and block
407 * devices (maybe because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a
408 * devices policy that excludes device nodes with major and minor of 0), but that's fine, in that
409 * case we use an AF_UNIX file node instead, which is not the same, but close enough for most
410 * uses. And most importantly, the kernel allows bind mounts from socket nodes to any non-directory
411 * file nodes, and that's the most important thing that matters.
412 *
413 * Note that the runtime directory argument shall be the top-level runtime directory, i.e. /run/ if
414 * we operate in system context and $XDG_RUNTIME_DIR if we operate in user context. */
415
416 _cleanup_free_ char *d = NULL;
417 const char *node = NULL;
418 bool fallback = false;
419
420 assert(ret);
421
422 if (!runtime_dir)
423 runtime_dir = "/run";
424
425 switch(mode & S_IFMT) {
426 case S_IFREG:
427 node = "/systemd/inaccessible/reg";
428 break;
429
430 case S_IFDIR:
431 node = "/systemd/inaccessible/dir";
432 break;
433
434 case S_IFCHR:
435 node = "/systemd/inaccessible/chr";
436 fallback = true;
437 break;
438
439 case S_IFBLK:
440 node = "/systemd/inaccessible/blk";
441 fallback = true;
442 break;
443
444 case S_IFIFO:
445 node = "/systemd/inaccessible/fifo";
446 break;
447
448 case S_IFSOCK:
449 node = "/systemd/inaccessible/sock";
450 break;
451 }
452 if (!node)
453 return -EINVAL;
454
455 d = path_join(runtime_dir, node);
456 if (!d)
457 return -ENOMEM;
458
459 if (fallback && access(d, F_OK) < 0) {
460 free(d);
461 d = path_join(runtime_dir, "/systemd/inaccessible/sock");
462 if (!d)
463 return -ENOMEM;
464 }
465
466 *ret = TAKE_PTR(d);
467 return 0;
468 }
469
470 #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
471 static char* mount_flags_to_string(long unsigned flags) {
472 char *x;
473 _cleanup_free_ char *y = NULL;
474 long unsigned overflow;
475
476 overflow = flags & ~(MS_RDONLY |
477 MS_NOSUID |
478 MS_NODEV |
479 MS_NOEXEC |
480 MS_SYNCHRONOUS |
481 MS_REMOUNT |
482 MS_MANDLOCK |
483 MS_DIRSYNC |
484 MS_NOATIME |
485 MS_NODIRATIME |
486 MS_BIND |
487 MS_MOVE |
488 MS_REC |
489 MS_SILENT |
490 MS_POSIXACL |
491 MS_UNBINDABLE |
492 MS_PRIVATE |
493 MS_SLAVE |
494 MS_SHARED |
495 MS_RELATIME |
496 MS_KERNMOUNT |
497 MS_I_VERSION |
498 MS_STRICTATIME |
499 MS_LAZYTIME);
500
501 if (flags == 0 || overflow != 0)
502 if (asprintf(&y, "%lx", overflow) < 0)
503 return NULL;
504
505 x = strjoin(FLAG(MS_RDONLY),
506 FLAG(MS_NOSUID),
507 FLAG(MS_NODEV),
508 FLAG(MS_NOEXEC),
509 FLAG(MS_SYNCHRONOUS),
510 FLAG(MS_REMOUNT),
511 FLAG(MS_MANDLOCK),
512 FLAG(MS_DIRSYNC),
513 FLAG(MS_NOATIME),
514 FLAG(MS_NODIRATIME),
515 FLAG(MS_BIND),
516 FLAG(MS_MOVE),
517 FLAG(MS_REC),
518 FLAG(MS_SILENT),
519 FLAG(MS_POSIXACL),
520 FLAG(MS_UNBINDABLE),
521 FLAG(MS_PRIVATE),
522 FLAG(MS_SLAVE),
523 FLAG(MS_SHARED),
524 FLAG(MS_RELATIME),
525 FLAG(MS_KERNMOUNT),
526 FLAG(MS_I_VERSION),
527 FLAG(MS_STRICTATIME),
528 FLAG(MS_LAZYTIME),
529 y);
530 if (!x)
531 return NULL;
532 if (!y)
533 x[strlen(x) - 1] = '\0'; /* truncate the last | */
534 return x;
535 }
536
537 int mount_verbose(
538 int error_log_level,
539 const char *what,
540 const char *where,
541 const char *type,
542 unsigned long flags,
543 const char *options) {
544
545 _cleanup_free_ char *fl = NULL, *o = NULL;
546 unsigned long f;
547 int r;
548
549 r = mount_option_mangle(options, flags, &f, &o);
550 if (r < 0)
551 return log_full_errno(error_log_level, r,
552 "Failed to mangle mount options %s: %m",
553 strempty(options));
554
555 fl = mount_flags_to_string(f);
556
557 if ((f & MS_REMOUNT) && !what && !type)
558 log_debug("Remounting %s (%s \"%s\")...",
559 where, strnull(fl), strempty(o));
560 else if (!what && !type)
561 log_debug("Mounting %s (%s \"%s\")...",
562 where, strnull(fl), strempty(o));
563 else if ((f & MS_BIND) && !type)
564 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
565 what, where, strnull(fl), strempty(o));
566 else if (f & MS_MOVE)
567 log_debug("Moving mount %s → %s (%s \"%s\")...",
568 what, where, strnull(fl), strempty(o));
569 else
570 log_debug("Mounting %s on %s (%s \"%s\")...",
571 strna(type), where, strnull(fl), strempty(o));
572 if (mount(what, where, type, f, o) < 0)
573 return log_full_errno(error_log_level, errno,
574 "Failed to mount %s (type %s) on %s (%s \"%s\"): %m",
575 strna(what), strna(type), where, strnull(fl), strempty(o));
576 return 0;
577 }
578
579 int umount_verbose(const char *what) {
580 log_debug("Umounting %s...", what);
581 if (umount(what) < 0)
582 return log_error_errno(errno, "Failed to unmount %s: %m", what);
583 return 0;
584 }
585
586 int mount_option_mangle(
587 const char *options,
588 unsigned long mount_flags,
589 unsigned long *ret_mount_flags,
590 char **ret_remaining_options) {
591
592 const struct libmnt_optmap *map;
593 _cleanup_free_ char *ret = NULL;
594 const char *p;
595 int r;
596
597 /* This extracts mount flags from the mount options, and store
598 * non-mount-flag options to '*ret_remaining_options'.
599 * E.g.,
600 * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
601 * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
602 * "size=1630748k,mode=700,uid=1000,gid=1000".
603 * See more examples in test-mount-utils.c.
604 *
605 * Note that if 'options' does not contain any non-mount-flag options,
606 * then '*ret_remaining_options' is set to NULL instead of empty string.
607 * Note that this does not check validity of options stored in
608 * '*ret_remaining_options'.
609 * Note that if 'options' is NULL, then this just copies 'mount_flags'
610 * to '*ret_mount_flags'. */
611
612 assert(ret_mount_flags);
613 assert(ret_remaining_options);
614
615 map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
616 if (!map)
617 return -EINVAL;
618
619 p = options;
620 for (;;) {
621 _cleanup_free_ char *word = NULL;
622 const struct libmnt_optmap *ent;
623
624 r = extract_first_word(&p, &word, ",", EXTRACT_UNQUOTE);
625 if (r < 0)
626 return r;
627 if (r == 0)
628 break;
629
630 for (ent = map; ent->name; ent++) {
631 /* All entries in MNT_LINUX_MAP do not take any argument.
632 * Thus, ent->name does not contain "=" or "[=]". */
633 if (!streq(word, ent->name))
634 continue;
635
636 if (!(ent->mask & MNT_INVERT))
637 mount_flags |= ent->id;
638 else if (mount_flags & ent->id)
639 mount_flags ^= ent->id;
640
641 break;
642 }
643
644 /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
645 if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
646 return -ENOMEM;
647 }
648
649 *ret_mount_flags = mount_flags;
650 *ret_remaining_options = TAKE_PTR(ret);
651
652 return 0;
653 }